diff --git a/NEWS b/NEWS index 474202336..81feccdfd 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,13 @@ and 'never, with 'auto being the default. This speeds up copying large files a lot while saving the disk space. +** 'seek' can now navigate holes in sparse files + +On systems that support it, such as GNU/Linux, the new SEEK_DATA and +SEEK_HOLE values can now be passed to the 'seek' procedure to change +file offset to the next piece of data or to the next hole in sparse +files. See "Random Access" in the manual for details. + * Bug fixes ** (ice-9 suspendable-ports) incorrect UTF-8 decoding diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi index e263e2985..3dd2b6fa0 100644 --- a/doc/ref/api-io.texi +++ b/doc/ref/api-io.texi @@ -823,8 +823,26 @@ Seek from the current position. @defvar SEEK_END Seek from the end of the file. @end defvar -If @var{fd_port} is a file descriptor, the underlying system -call is @code{lseek}. @var{port} may be a string port. + +On systems that support it, such as GNU/Linux, the following +constants can be used for @var{whence} to navigate ``holes'' in +sparse files: +@defvar SEEK_DATA +Seek to the next location in the file greater than or equal to +@var{offset} containing data. If @var{offset} points to data, +then the file offset is set to @var{offset}. +@end defvar +@defvar SEEK_HOLE +Seek to the next hole in the file greater than or equal to the +@var{offset}. If @var{offset} points into the middle of a hole, +then the file offset is set to @var{offset}. If there is no hole +past @var{offset}, then the file offset is adjusted to the end of +the file---i.e., there is an implicit hole at the end of any file. +@end defvar + +If @var{fd_port} is a file descriptor, the underlying system call +is @code{lseek} (@pxref{File Position Primitive,,, libc, The GNU C +Library Reference Manual}). @var{port} may be a string port. The value returned is the new position in @var{fd_port}. This means that the current position of a port can be obtained using: diff --git a/libguile/ports.c b/libguile/ports.c index c25c20709..d3f763400 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1,4 +1,4 @@ -/* Copyright 1995-2001,2003-2004,2006-2019,2021 +/* Copyright 1995-2001,2003-2004,2006-2019,2021,2024 Free Software Foundation, Inc. This file is part of Guile. @@ -3713,9 +3713,26 @@ SCM_DEFINE (scm_seek, "seek", 3, 0, 0, "@defvar SEEK_END\n" "Seek from the end of the file.\n" "@end defvar\n" - "If @var{fd_port} is a file descriptor, the underlying system\n" - "call is @code{lseek}. @var{port} may be a string port.\n" - "\n" + "On systems that support it, such as GNU/Linux, the following\n" + "constants can be used for @var{whence} to navigate ``holes'' in\n" + "sparse files:\n" + "@defvar SEEK_DATA\n" + "Seek to the next location in the file greater than or equal to\n" + "@var{offset} containing data. If @var{offset} points to data,\n" + "then the file offset is set to @var{offset}.\n" + "@end defvar\n" + "@defvar SEEK_HOLE\n" + "Seek to the next hole in the file greater than or equal to the\n" + "@var{offset}. If @var{offset} points into the middle of a hole,\n" + "then the file offset is set to @var{offset}. If there is no hole\n" + "past @var{offset}, then the file offset is adjusted to the end of\n" + "the file---i.e., there is an implicit hole at the end of any file.\n" + "@end defvar\n" + "\n" + "If @var{fd_port} is a file descriptor, the underlying system call\n" + "is @code{lseek} (@pxref{File Position Primitive,,, libc, The GNU C\n" + "Library Reference Manual}). @var{port} may be a string port.\n" + "\n" "The value returned is the new position in the file. This means\n" "that the current position of a port can be obtained using:\n" "@lisp\n" @@ -3728,7 +3745,14 @@ SCM_DEFINE (scm_seek, "seek", 3, 0, 0, fd_port = SCM_COERCE_OUTPORT (fd_port); how = scm_to_int (whence); - if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END) + if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END +#ifdef SEEK_DATA + && how != SEEK_DATA +#endif +#ifdef SEEK_HOLE + && how != SEEK_HOLE +#endif + ) SCM_OUT_OF_RANGE (3, whence); if (SCM_OPPORTP (fd_port)) @@ -4151,6 +4175,14 @@ scm_init_ice_9_ports (void) scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR)); scm_c_define ("SEEK_END", scm_from_int (SEEK_END)); + /* Support for sparse files (glibc). */ +#ifdef SEEK_DATA + scm_c_define ("SEEK_DATA", scm_from_int (SEEK_DATA)); +#endif +#ifdef SEEK_HOLE + scm_c_define ("SEEK_HOLE", scm_from_int (SEEK_HOLE)); +#endif + scm_c_define ("%current-input-port-fluid", cur_inport_fluid); scm_c_define ("%current-output-port-fluid", cur_outport_fluid); scm_c_define ("%current-error-port-fluid", cur_errport_fluid); diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm index bb05769a3..926dc5b0b 100644 --- a/module/ice-9/ports.scm +++ b/module/ice-9/ports.scm @@ -1,5 +1,5 @@ ;;; Ports -;;; Copyright (C) 2016,2019,2021 Free Software Foundation, Inc. +;;; Copyright (C) 2016,2019,2021,2024 Free Software Foundation, Inc. ;;; ;;; This library is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU Lesser General Public License as @@ -153,6 +153,12 @@ (load-extension (string-append "libguile-" (effective-version)) "scm_init_ice_9_ioext") +(eval-when (load eval expand) + (when (defined? 'SEEK_DATA) + (module-export! (current-module) '(SEEK_DATA))) + (when (defined? 'SEEK_HOLE) + (module-export! (current-module) '(SEEK_HOLE)))) + (define (port-encoding port) diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index 1b30e1a68..27acf13b4 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -1,8 +1,8 @@ ;;;; ports.test --- Guile I/O ports. -*- coding: utf-8; mode: scheme; -*- ;;;; Jim Blandy --- May 1999 ;;;; -;;;; Copyright (C) 1999, 2001, 2004, 2006, 2007, 2009, 2010, -;;;; 2011, 2012, 2013, 2014, 2015, 2017, 2019, 2020, 2021 Free Software Foundation, Inc. +;;;; Copyright (C) 1999, 2001, 2004, 2006-2007, 2009-2015, 2017, 2019-2021, +;;;; 2024 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -185,6 +185,52 @@ (close-port iport)) (delete-file filename)) +(let* ((file (test-file)) + (port (open-output-file file))) + (seek port 4096 SEEK_SET) + (display "bye." port) + (close-port port) + + (pass-if-equal "size of sparse file" + 4100 + (stat:size (stat file))) + + (pass-if-equal "SEEK_DATA while on data" + 4096 + (if (defined? 'SEEK_DATA) + (call-with-input-file file + (lambda (port) + (catch 'system-error + (lambda () + (seek port 4096 SEEK_DATA)) + (lambda _ + (throw 'unresolved))))) + (throw 'unresolved))) + + (pass-if-equal "SEEK_DATA while in hole" + 4096 + (if (defined? 'SEEK_DATA) + (call-with-input-file file + (lambda (port) + (catch 'system-error + (lambda () + (seek port 10 SEEK_DATA)) + (lambda _ + (throw 'unresolved))))) + (throw 'unresolved))) + + (pass-if-equal "SEEK_HOLE while in hole" + 10 + (if (defined? 'SEEK_HOLE) + (call-with-input-file file + (lambda (port) + (catch 'system-error + (lambda () + (seek port 10 SEEK_HOLE)) + (lambda _ + (throw 'unresolved))))) + (throw 'unresolved)))) + ;;; unusual characters. (let* ((filename (test-file)) (port (open-output-file filename)))