mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Allow read-line to handle "\r\n" as a line terminator
Adds CRLF as a line ending. %read-line will return these. In the case of CRLF, %read-line will return a string "\r\n" as the line ending. * libguile/rdelim.c (scm_read_line): handle CRLF line delimiter * module/ice-9/suspendable-ports.scm (%read-line): modify to handle CRLF line delimiter (read-line): use modified %read-line * test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test ("two long lines, split, CRLF"): new test * doc/ref/api-io.texi: update read-line documentation
This commit is contained in:
parent
8d388c97e7
commit
9bb8793a6c
5 changed files with 109 additions and 14 deletions
11
NEWS
11
NEWS
|
@ -109,6 +109,17 @@ eval-in-sandbox is modified so that #:time-limit accepts #f to disable
|
||||||
the time limit. Systems without SIGALRM can use eval-in-sandbox if the
|
the time limit. Systems without SIGALRM can use eval-in-sandbox if the
|
||||||
time limit is disabled.
|
time limit is disabled.
|
||||||
|
|
||||||
|
** 'read-line' detects '\r\n' as a line delimiter
|
||||||
|
|
||||||
|
read-line is updated to detect a carriage return / line feed pair as a
|
||||||
|
line delimiter. When CRLF is detected at the end of a line, the
|
||||||
|
returned line delimiter is "\r\n". Previously, when a line terminated
|
||||||
|
with '\r\n' was read, the return character was appended to the string,
|
||||||
|
and the returned delimiter was '\n'.
|
||||||
|
|
||||||
|
Carriage return / line feed is a common line deliminator for Windows
|
||||||
|
text files.
|
||||||
|
|
||||||
* Bug fixes
|
* Bug fixes
|
||||||
|
|
||||||
** `basename` now checks the suffix against the base name, not the full path
|
** `basename` now checks the suffix against the base name, not the full path
|
||||||
|
|
|
@ -894,8 +894,10 @@ a specified set of characters.
|
||||||
|
|
||||||
@deffn {Scheme Procedure} read-line [port] [handle-delim]
|
@deffn {Scheme Procedure} read-line [port] [handle-delim]
|
||||||
Return a line of text from @var{port} if specified, otherwise from the
|
Return a line of text from @var{port} if specified, otherwise from the
|
||||||
value returned by @code{(current-input-port)}. Under Unix, a line of text
|
value returned by @code{(current-input-port)}. A line of
|
||||||
is terminated by the first end-of-line character or by end-of-file.
|
text is terminated by a single linefeed character, a return followed
|
||||||
|
by a linefeed, or by end-of-file.
|
||||||
|
|
||||||
|
|
||||||
If @var{handle-delim} is specified, it should be one of the following
|
If @var{handle-delim} is specified, it should be one of the following
|
||||||
symbols:
|
symbols:
|
||||||
|
|
|
@ -127,6 +127,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
SCM line, strings, result;
|
SCM line, strings, result;
|
||||||
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
||||||
size_t index;
|
size_t index;
|
||||||
|
int cr = 0;
|
||||||
|
|
||||||
if (SCM_UNBNDP (port))
|
if (SCM_UNBNDP (port))
|
||||||
port = scm_current_input_port ();
|
port = scm_current_input_port ();
|
||||||
|
@ -152,12 +153,22 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
buf[index] = scm_getc (port);
|
buf[index] = scm_getc (port);
|
||||||
switch (buf[index])
|
switch (buf[index])
|
||||||
{
|
{
|
||||||
case EOF:
|
|
||||||
case '\n':
|
case '\n':
|
||||||
delim = buf[index];
|
delim = buf[index];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case EOF:
|
||||||
|
cr = 0;
|
||||||
|
delim = buf[index];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '\r':
|
||||||
|
cr = 1;
|
||||||
|
index ++;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
cr = 0;
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -165,20 +176,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
while (delim == 0);
|
while (delim == 0);
|
||||||
|
|
||||||
if (SCM_LIKELY (scm_is_false (strings)))
|
if (SCM_LIKELY (scm_is_false (strings)))
|
||||||
/* The fast path. */
|
{
|
||||||
line = scm_from_utf32_stringn (buf, index);
|
/* The fast path. */
|
||||||
|
if (cr)
|
||||||
|
line = scm_from_utf32_stringn (buf, index - 1);
|
||||||
|
else
|
||||||
|
line = scm_from_utf32_stringn (buf, index);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Aggregate the intermediary results. */
|
/* Aggregate the intermediary results. */
|
||||||
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
if (cr)
|
||||||
|
strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
|
||||||
|
else
|
||||||
|
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
||||||
line = scm_string_concatenate (scm_reverse (strings));
|
line = scm_string_concatenate (scm_reverse (strings));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (delim == EOF && scm_i_string_length (line) == 0)
|
if (delim == EOF && scm_i_string_length (line) == 0)
|
||||||
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
||||||
else
|
else
|
||||||
result = scm_cons (line,
|
{
|
||||||
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
if (cr)
|
||||||
|
result = scm_cons (line, scm_from_latin1_string("\r\n"));
|
||||||
|
else
|
||||||
|
result = scm_cons (line,
|
||||||
|
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
#undef LINE_BUFFER_SIZE
|
#undef LINE_BUFFER_SIZE
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
;;; Ports, implemented in Scheme
|
;;; Ports, implemented in Scheme
|
||||||
;;; Copyright (C) 2016, 2019 Free Software Foundation, Inc.
|
;;; Copyright (C) 2016, 2018, 2019 Free Software Foundation, Inc.
|
||||||
;;;
|
;;;
|
||||||
;;; This library is free software: you can redistribute it and/or modify
|
;;; This library is free software: you can redistribute it and/or modify
|
||||||
;;; it under the terms of the GNU Lesser General Public License as
|
;;; it under the terms of the GNU Lesser General Public License as
|
||||||
|
@ -691,12 +691,48 @@
|
||||||
(or (eqv? char (string-ref delims i))
|
(or (eqv? char (string-ref delims i))
|
||||||
(lp (1+ i)))))))))))
|
(lp (1+ i)))))))))))
|
||||||
|
|
||||||
|
(define* (%read-line port)
|
||||||
|
(let* ((return-flag-delim-and-chars
|
||||||
|
(let loop ((cr #f)
|
||||||
|
(chars '())
|
||||||
|
(c (read-char port)))
|
||||||
|
(cond
|
||||||
|
((eof-object? c)
|
||||||
|
(list #f c chars))
|
||||||
|
((char=? c #\newline)
|
||||||
|
(list cr c chars))
|
||||||
|
((char=? c #\return)
|
||||||
|
(loop #t (cons c chars) (read-char port)))
|
||||||
|
(else
|
||||||
|
(loop #f (cons c chars) (read-char port))))))
|
||||||
|
(return-flag (car return-flag-delim-and-chars))
|
||||||
|
(delim (cadr return-flag-delim-and-chars))
|
||||||
|
(chars (caddr return-flag-delim-and-chars)))
|
||||||
|
|
||||||
|
(if (and (eof-object? delim)
|
||||||
|
(null? chars))
|
||||||
|
(cons the-eof-object the-eof-object)
|
||||||
|
;; Else
|
||||||
|
(if return-flag
|
||||||
|
(cons (list->string (reverse (cdr chars))) "\r\n")
|
||||||
|
(cons (list->string (reverse chars)) delim)))))
|
||||||
|
|
||||||
(define* (read-line #:optional (port (current-input-port))
|
(define* (read-line #:optional (port (current-input-port))
|
||||||
(handle-delim 'trim))
|
(handle-delim 'trim))
|
||||||
(read-delimited "\n" port handle-delim))
|
(let* ((line/delim (%read-line port))
|
||||||
|
(line (car line/delim))
|
||||||
(define* (%read-line port)
|
(delim (cdr line/delim)))
|
||||||
(read-line port 'split))
|
(case handle-delim
|
||||||
|
((trim) line)
|
||||||
|
((split) line/delim)
|
||||||
|
((concat) (if (and (string? line) (char? delim))
|
||||||
|
(string-append line (string delim))
|
||||||
|
line))
|
||||||
|
((peek) (if (char? delim)
|
||||||
|
(unread-char delim port))
|
||||||
|
line)
|
||||||
|
(else
|
||||||
|
(error "unexpected handle-delim value: " handle-delim)))))
|
||||||
|
|
||||||
(define* (put-string port str #:optional (start 0)
|
(define* (put-string port str #:optional (start 0)
|
||||||
(count (- (string-length str) start)))
|
(count (- (string-length str) start)))
|
||||||
|
|
|
@ -63,6 +63,28 @@
|
||||||
(read-line p 'split)))
|
(read-line p 'split)))
|
||||||
(eof-object? (read-line p)))))
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
|
(pass-if "two lines, split, CRLF"
|
||||||
|
(let* ((s "foo\r\nbar\r\n")
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? '(("foo" . "\r\n")
|
||||||
|
("bar" . "\r\n"))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
|
(pass-if "two long lines, split, CRLF"
|
||||||
|
;; Must be longer than 256 codepoints
|
||||||
|
(let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
(text1 (string-append text0 text0 text0 text0 text0))
|
||||||
|
(text2 (string-append text1 "\r\n" text1 "\r\n")))
|
||||||
|
(let* ((s text2)
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? `((,text1 . "\r\n")
|
||||||
|
(,text1 . "\r\n"))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p))))))
|
||||||
|
|
||||||
(pass-if "two Greek lines, trim"
|
(pass-if "two Greek lines, trim"
|
||||||
(let* ((s "λαμβδα\nμυ\n")
|
(let* ((s "λαμβδα\nμυ\n")
|
||||||
(p (open-input-string s)))
|
(p (open-input-string s)))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue