diff --git a/NEWS b/NEWS index 8ed39ceb9..e1557bfc7 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,15 @@ definitely unused---this is notably the case for modules that are only used at macro-expansion time, such as (srfi srfi-26). In those cases, the compiler reports it as "possibly unused". +** copy-file now supports copy-on-write + +The copy-file procedure now takes an additional keyword argument, +#:copy-on-write, specifying whether copy-on-write should be done, if the +underlying file-system supports it. Possible values are 'always, 'auto +and 'never, with 'auto being the default. + +This speeds up copying large files a lot while saving the disk space. + * Bug fixes ** (ice-9 suspendable-ports) incorrect UTF-8 decoding diff --git a/configure.ac b/configure.ac index d0a2dc79b..c46586e9b 100644 --- a/configure.ac +++ b/configure.ac @@ -418,6 +418,7 @@ AC_SUBST([SCM_I_GSC_HAVE_STRUCT_DIRENT64]) # sys/sendfile.h - non-POSIX, found in glibc # AC_CHECK_HEADERS([complex.h fenv.h io.h memory.h process.h \ +linux/fs.h \ sys/dir.h sys/ioctl.h sys/select.h \ sys/time.h sys/timeb.h sys/times.h sys/stdtypes.h sys/types.h \ sys/utime.h unistd.h utime.h pwd.h grp.h sys/utsname.h \ diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi index 8414c3e2d..bde9f6f75 100644 --- a/doc/ref/guile.texi +++ b/doc/ref/guile.texi @@ -15,7 +15,8 @@ This manual documents Guile version @value{VERSION}. Copyright (C) 1996-1997, 2000-2005, 2009-2023 Free Software Foundation, Inc. @* -Copyright (C) 2021 Maxime Devos +Copyright (C) 2021 Maxime Devos@* +Copyright (C) 2024 Tomas Volf@* Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or diff --git a/doc/ref/posix.texi b/doc/ref/posix.texi index fec42d061..d26808d91 100644 --- a/doc/ref/posix.texi +++ b/doc/ref/posix.texi @@ -896,10 +896,17 @@ of @code{delete-file}. Why doesn't POSIX have a @code{rmdirat} function for this instead? No idea! @end deffn -@deffn {Scheme Procedure} copy-file oldfile newfile +@deffn {Scheme Procedure} copy-file @var{oldfile} @var{newfile} @ + [#:copy-on-write='auto] @deffnx {C Function} scm_copy_file (oldfile, newfile) Copy the file specified by @var{oldfile} to @var{newfile}. The return value is unspecified. + +@code{#:copy-on-write} keyword argument determines whether copy-on-write +copy should be attempted and the behavior in case of failure. Possible +values are @code{'always} (attempt the copy-on-write, return error if it +fails), @code{'auto} (attempt the copy-on-write, fallback to regular +copy if it fails) and @code{'never} (perform the regular copy). @end deffn @deffn {Scheme Procedure} sendfile out in count [offset] diff --git a/libguile/filesys.c b/libguile/filesys.c index 1f0bba556..70a6ef6eb 100644 --- a/libguile/filesys.c +++ b/libguile/filesys.c @@ -1,6 +1,7 @@ /* Copyright 1996-2002,2004,2006,2009-2019,2021 Free Software Foundation, Inc. Copyright 2021 Maxime Devos + Copyright 2024 Tomas Volf <~@wolfsden.cz> This file is part of Guile. @@ -67,6 +68,11 @@ # include #endif +#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_LINUX_FS_H) +# include +# include +#endif + #include "async.h" #include "boolean.h" #include "dynwind.h" @@ -75,6 +81,7 @@ #include "fports.h" #include "gsubr.h" #include "iselect.h" +#include "keywords.h" #include "list.h" #include "load.h" /* for scm_i_mirror_backslashes */ #include "modules.h" @@ -1255,20 +1262,49 @@ SCM_DEFINE (scm_readlink, "readlink", 1, 0, 0, } #undef FUNC_NAME -SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0, - (SCM oldfile, SCM newfile), +static int +clone_file (int oldfd, int newfd) +{ +#ifdef FICLONE + return ioctl (newfd, FICLONE, oldfd); +#else + (void)oldfd; + (void)newfd; + errno = EOPNOTSUPP; + return -1; +#endif +} + +SCM_KEYWORD (k_copy_on_write, "copy-on-write"); +SCM_SYMBOL (sym_always, "always"); +SCM_SYMBOL (sym_auto, "auto"); +SCM_SYMBOL (sym_never, "never"); + +SCM_DEFINE (scm_copy_file2, "copy-file", 2, 0, 1, + (SCM oldfile, SCM newfile, SCM rest), "Copy the file specified by @var{oldfile} to @var{newfile}.\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_copy_file + "The return value is unspecified.\n" + "\n" + "@code{#:copy-on-write} keyword argument determines whether " + "copy-on-write copy should be attempted and the " + "behavior in case of failure. Possible values are " + "@code{'always} (attempt the copy-on-write, return error if " + "it fails), @code{'auto} (attempt the copy-on-write, " + "fallback to regular copy if it fails) and @code{'never} " + "(perform the regular copy)." + ) +#define FUNC_NAME s_scm_copy_file2 { char *c_oldfile, *c_newfile; int oldfd, newfd; int n, rv; + SCM cow = sym_auto; + int clone_res; char buf[BUFSIZ]; struct stat_or_stat64 oldstat; scm_dynwind_begin (0); - + c_oldfile = scm_to_locale_string (oldfile); scm_dynwind_free (c_oldfile); c_newfile = scm_to_locale_string (newfile); @@ -1292,13 +1328,30 @@ SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0, SCM_SYSERROR; } - while ((n = read (oldfd, buf, sizeof buf)) > 0) - if (write (newfd, buf, n) != n) - { - close (oldfd); - close (newfd); - SCM_SYSERROR; - } + scm_c_bind_keyword_arguments ("copy-file", rest, 0, + k_copy_on_write, &cow, + SCM_UNDEFINED); + + if (scm_is_eq (cow, sym_always) || scm_is_eq (cow, sym_auto)) + clone_res = clone_file(oldfd, newfd); + else if (scm_is_eq (cow, sym_never)) + clone_res = -1; + else + scm_misc_error ("copy-file", + "invalid value for #:copy-on-write: ~S", + scm_list_1 (cow)); + + if (scm_is_eq (cow, sym_always) && clone_res) + scm_syserror ("copy-file: copy-on-write failed"); + + if (clone_res) + while ((n = read (oldfd, buf, sizeof buf)) > 0) + if (write (newfd, buf, n) != n) + { + close (oldfd); + close (newfd); + SCM_SYSERROR; + } close (oldfd); if (close (newfd) == -1) SCM_SYSERROR; @@ -1308,6 +1361,12 @@ SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0, } #undef FUNC_NAME +SCM +scm_copy_file (SCM oldfile, SCM newfile) +{ + return scm_copy_file2 (oldfile, newfile, SCM_UNSPECIFIED); +} + SCM_DEFINE (scm_sendfile, "sendfile", 3, 1, 0, (SCM out, SCM in, SCM count, SCM offset), "Send @var{count} bytes from @var{in} to @var{out}, both of which " diff --git a/libguile/filesys.h b/libguile/filesys.h index 1ce50d30e..8e849fe7a 100644 --- a/libguile/filesys.h +++ b/libguile/filesys.h @@ -74,6 +74,7 @@ SCM_API SCM scm_symlinkat (SCM dir, SCM oldpath, SCM newpath); SCM_API SCM scm_readlink (SCM path); SCM_API SCM scm_lstat (SCM str); SCM_API SCM scm_copy_file (SCM oldfile, SCM newfile); +SCM_INTERNAL SCM scm_copy_file2 (SCM oldfile, SCM newfile, SCM rest); SCM_API SCM scm_mkstemp (SCM tmpl); SCM_API SCM scm_mkdtemp (SCM tmpl); SCM_API SCM scm_dirname (SCM filename);