mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Revert "Handle CRLF and Unicode line endings in read-line"
This reverts commit 0f983e3db0
.
After discussing with Mike we are going to punt the read-line changes
for now. Open the port in O_TEXT mode if you want to chomp the CR in
CFLF sequences.
This commit is contained in:
parent
1c472fef54
commit
e30ee90478
6 changed files with 25 additions and 171 deletions
6
NEWS
6
NEWS
|
@ -166,12 +166,6 @@ See the newly reorganized "Foreign Function Interface", for details.
|
||||||
These new interfaces replace `dynamic-link', `dynamic-pointer' and
|
These new interfaces replace `dynamic-link', `dynamic-pointer' and
|
||||||
similar, which will eventually be deprecated.
|
similar, which will eventually be deprecated.
|
||||||
|
|
||||||
** `read-line'
|
|
||||||
|
|
||||||
This now accepts return + newline and the Unicode line separator and
|
|
||||||
paragraph separator as line separators, as well as the newline and <eof>
|
|
||||||
line separators it handled before.
|
|
||||||
|
|
||||||
** `read-syntax'
|
** `read-syntax'
|
||||||
|
|
||||||
See "Annotated Scheme Read" in the manual.
|
See "Annotated Scheme Read" in the manual.
|
||||||
|
|
|
@ -755,10 +755,8 @@ a specified set of characters.
|
||||||
|
|
||||||
@deffn {Scheme Procedure} read-line [port] [handle-delim]
|
@deffn {Scheme Procedure} read-line [port] [handle-delim]
|
||||||
Return a line of text from @var{port} if specified, otherwise from the
|
Return a line of text from @var{port} if specified, otherwise from the
|
||||||
value returned by @code{(current-input-port)}. Under Unix, a line of
|
value returned by @code{(current-input-port)}. Under Unix, a line of text
|
||||||
text is terminated by the first end-of-line character or by end-of-file.
|
is terminated by the first end-of-line character or by end-of-file.
|
||||||
The end-of-line characters handled are newline, carriage return plus
|
|
||||||
newline, or the Unicode line or paragraph separators.
|
|
||||||
|
|
||||||
If @var{handle-delim} is specified, it should be one of the following
|
If @var{handle-delim} is specified, it should be one of the following
|
||||||
symbols:
|
symbols:
|
||||||
|
@ -773,9 +771,7 @@ Append the terminating delimiter (if any) to the returned string.
|
||||||
Push the terminating delimiter (if any) back on to the port.
|
Push the terminating delimiter (if any) back on to the port.
|
||||||
@item split
|
@item split
|
||||||
Return a pair containing the string read from the port and the
|
Return a pair containing the string read from the port and the
|
||||||
terminating delimiter or end-of-file object. The delimiter will either
|
terminating delimiter or end-of-file object.
|
||||||
be a single character for newline or the Unicode line or paragraph
|
|
||||||
separators, or it will be the string @code{"\r\n"}.
|
|
||||||
@end table
|
@end table
|
||||||
@end deffn
|
@end deffn
|
||||||
|
|
||||||
|
|
|
@ -112,11 +112,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
|
||||||
|
|
||||||
SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
(SCM port),
|
(SCM port),
|
||||||
"Read a line from @var{port}, allocating storage as necessary.\n"
|
"Read a newline-terminated line from @var{port}, allocating storage as\n"
|
||||||
"The terminator (if any) is removed from the string,\n"
|
"necessary. The newline terminator (if any) is removed from the string,\n"
|
||||||
"and a pair consisting of the line and its delimiter is returned. The\n"
|
"and a pair consisting of the line and its delimiter is returned. The\n"
|
||||||
"delimiter may be either a newline, return + newline, the Unicode\n"
|
"delimiter may be either a newline or the @var{eof-object}; if\n"
|
||||||
"line or paragraph separators, or the @var{eof-object}; if\n"
|
|
||||||
"@code{%read-line} is called at the end of file, it returns the pair\n"
|
"@code{%read-line} is called at the end of file, it returns the pair\n"
|
||||||
"@code{(#<eof> . #<eof>)}.")
|
"@code{(#<eof> . #<eof>)}.")
|
||||||
#define FUNC_NAME s_scm_read_line
|
#define FUNC_NAME s_scm_read_line
|
||||||
|
@ -128,7 +127,6 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
SCM line, strings, result;
|
SCM line, strings, result;
|
||||||
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
||||||
size_t index;
|
size_t index;
|
||||||
int cr = 0;
|
|
||||||
|
|
||||||
if (SCM_UNBNDP (port))
|
if (SCM_UNBNDP (port))
|
||||||
port = scm_current_input_port ();
|
port = scm_current_input_port ();
|
||||||
|
@ -154,24 +152,12 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
buf[index] = scm_getc (port);
|
buf[index] = scm_getc (port);
|
||||||
switch (buf[index])
|
switch (buf[index])
|
||||||
{
|
{
|
||||||
|
case EOF:
|
||||||
case '\n':
|
case '\n':
|
||||||
delim = buf[index];
|
delim = buf[index];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EOF:
|
|
||||||
case 0x2028: /* U+2028 LINE SEPARATOR */
|
|
||||||
case 0x2029: /* U+2029 PARAGRAPH SEPARATOR */
|
|
||||||
cr = 0;
|
|
||||||
delim = buf[index];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '\r':
|
|
||||||
cr = 1;
|
|
||||||
index ++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
cr = 0;
|
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,33 +165,20 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
while (delim == 0);
|
while (delim == 0);
|
||||||
|
|
||||||
if (SCM_LIKELY (scm_is_false (strings)))
|
if (SCM_LIKELY (scm_is_false (strings)))
|
||||||
{
|
|
||||||
/* The fast path. */
|
/* The fast path. */
|
||||||
if (cr)
|
|
||||||
line = scm_from_utf32_stringn (buf, index - 1);
|
|
||||||
else
|
|
||||||
line = scm_from_utf32_stringn (buf, index);
|
line = scm_from_utf32_stringn (buf, index);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Aggregate the intermediary results. */
|
/* Aggregate the intermediary results. */
|
||||||
if (cr)
|
|
||||||
strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
|
|
||||||
else
|
|
||||||
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
||||||
line = scm_string_concatenate (scm_reverse (strings));
|
line = scm_string_concatenate (scm_reverse (strings));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (delim == EOF && scm_i_string_length (line) == 0)
|
if (delim == EOF && scm_i_string_length (line) == 0)
|
||||||
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
||||||
else
|
|
||||||
{
|
|
||||||
if (cr)
|
|
||||||
result = scm_cons (line, scm_from_latin1_string("\r\n"));
|
|
||||||
else
|
else
|
||||||
result = scm_cons (line,
|
result = scm_cons (line,
|
||||||
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
#undef LINE_BUFFER_SIZE
|
#undef LINE_BUFFER_SIZE
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
;;; Ports, implemented in Scheme
|
;;; Ports, implemented in Scheme
|
||||||
;;; Copyright (C) 2016, 2018, 2021 Free Software Foundation, Inc.
|
;;; Copyright (C) 2016, 2019 Free Software Foundation, Inc.
|
||||||
;;;
|
;;;
|
||||||
;;; This library is free software: you can redistribute it and/or modify
|
;;; This library is free software: you can redistribute it and/or modify
|
||||||
;;; it under the terms of the GNU Lesser General Public License as
|
;;; it under the terms of the GNU Lesser General Public License as
|
||||||
|
@ -689,81 +689,10 @@
|
||||||
|
|
||||||
(define* (read-line #:optional (port (current-input-port))
|
(define* (read-line #:optional (port (current-input-port))
|
||||||
(handle-delim 'trim))
|
(handle-delim 'trim))
|
||||||
(let* ((line/delim (%read-line port))
|
(read-delimited "\n" port handle-delim))
|
||||||
(line (car line/delim))
|
|
||||||
(delim (cdr line/delim)))
|
|
||||||
(case handle-delim
|
|
||||||
((trim) line)
|
|
||||||
((split) line/delim)
|
|
||||||
((concat) (if (and (string? line) (char? delim))
|
|
||||||
(string-append line (string delim))
|
|
||||||
line))
|
|
||||||
((peek) (if (char? delim)
|
|
||||||
(unread-char delim port))
|
|
||||||
line)
|
|
||||||
(else
|
|
||||||
(error "unexpected handle-delim value: " handle-delim)))))
|
|
||||||
|
|
||||||
(define* (%read-line port)
|
(define* (%read-line port)
|
||||||
(let ((LINE_BUFFER_SIZE 256))
|
(read-line port 'split))
|
||||||
(let ((strings #f)
|
|
||||||
(result #f)
|
|
||||||
(buf (make-string LINE_BUFFER_SIZE #\nul))
|
|
||||||
(delim #f)
|
|
||||||
(index 0)
|
|
||||||
(cr #f)
|
|
||||||
(go #t))
|
|
||||||
(cond
|
|
||||||
((not (input-port? port))
|
|
||||||
(error "Not an input port." port))
|
|
||||||
(else
|
|
||||||
(while go
|
|
||||||
(cond
|
|
||||||
((>= index LINE_BUFFER_SIZE)
|
|
||||||
(set! strings (cons (substring buf 0 index)
|
|
||||||
(or strings '())))
|
|
||||||
(set! index 0))
|
|
||||||
(else
|
|
||||||
(let ((c (read-char port)))
|
|
||||||
(cond
|
|
||||||
((or (eof-object? c)
|
|
||||||
(char=? c #\x2028) ; U+2028 LINE SEPARATOR
|
|
||||||
(char=? c #\x2029)) ; U+2029 PARAGRAPH SEPARATOR
|
|
||||||
(set! cr #f)
|
|
||||||
(set! delim c))
|
|
||||||
((char=? c #\newline)
|
|
||||||
(set! delim c))
|
|
||||||
((char=? c #\return)
|
|
||||||
(set! cr #t)
|
|
||||||
(string-set! buf index c)
|
|
||||||
(set! index (1+ index)))
|
|
||||||
(else
|
|
||||||
(set! cr #f)
|
|
||||||
(string-set! buf index c)
|
|
||||||
(set! index (1+ index)))))))
|
|
||||||
|
|
||||||
(if (or (eof-object? delim)
|
|
||||||
(char? delim))
|
|
||||||
(set! go #f)))
|
|
||||||
(let ((line (if (not strings)
|
|
||||||
;; A short string.
|
|
||||||
(if cr
|
|
||||||
(substring buf 0 (1- index))
|
|
||||||
(substring buf 0 index))
|
|
||||||
;; Else, aggregate the intermediary results.
|
|
||||||
(begin
|
|
||||||
(if cr
|
|
||||||
(set! strings (cons (substring buf 0 (1- index)) strings))
|
|
||||||
(set! strings (cons (substring buf 0 index) strings)))
|
|
||||||
(apply string-append (reverse strings))))))
|
|
||||||
|
|
||||||
(if (and (eof-object? delim)
|
|
||||||
(zero? (string-length line)))
|
|
||||||
(cons the-eof-object the-eof-object)
|
|
||||||
;; Else
|
|
||||||
(if cr
|
|
||||||
(cons line "\r\n")
|
|
||||||
(cons line delim)))))))))
|
|
||||||
|
|
||||||
(define* (put-string port str #:optional (start 0)
|
(define* (put-string port str #:optional (start 0)
|
||||||
(count (- (string-length str) start)))
|
(count (- (string-length str) start)))
|
||||||
|
|
|
@ -157,12 +157,13 @@ The default writer will call ‘put-string’."
|
||||||
Raise a 'bad-header' exception if the line does not end in CRLF or LF,
|
Raise a 'bad-header' exception if the line does not end in CRLF or LF,
|
||||||
or if EOF is reached."
|
or if EOF is reached."
|
||||||
(match (%read-line port)
|
(match (%read-line port)
|
||||||
(((? string? line) . "\r\n")
|
|
||||||
line)
|
|
||||||
(((? string? line) . #\newline)
|
(((? string? line) . #\newline)
|
||||||
;; We are more tolerant than the RFC in that we tolerate LF-only
|
;; '%read-line' does not consider #\return a delimiter; so if it's
|
||||||
;; endings.
|
;; there, remove it. We are more tolerant than the RFC in that we
|
||||||
line)
|
;; tolerate LF-only endings.
|
||||||
|
(if (string-suffix? "\r" line)
|
||||||
|
(string-drop-right line 1)
|
||||||
|
line))
|
||||||
((line . _) ;EOF or missing delimiter
|
((line . _) ;EOF or missing delimiter
|
||||||
(bad-header 'read-header-line line))))
|
(bad-header 'read-header-line line))))
|
||||||
|
|
||||||
|
@ -183,7 +184,8 @@ was known but the value was invalid.
|
||||||
Returns the end-of-file object for both values if the end of the message
|
Returns the end-of-file object for both values if the end of the message
|
||||||
body was reached (i.e., a blank line)."
|
body was reached (i.e., a blank line)."
|
||||||
(let ((line (read-header-line port)))
|
(let ((line (read-header-line port)))
|
||||||
(if (string-null? line)
|
(if (or (string-null? line)
|
||||||
|
(string=? line "\r"))
|
||||||
(values *eof* *eof*)
|
(values *eof* *eof*)
|
||||||
(let* ((delim (or (string-index line #\:)
|
(let* ((delim (or (string-index line #\:)
|
||||||
(bad-header '%read line)))
|
(bad-header '%read line)))
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
;;;; rdelim.test --- Delimited I/O. -*- mode: scheme; coding: utf-8; -*-
|
;;;; rdelim.test --- Delimited I/O. -*- mode: scheme; coding: utf-8; -*-
|
||||||
;;;; Ludovic Courtès <ludo@gnu.org>
|
;;;; Ludovic Courtès <ludo@gnu.org>
|
||||||
;;;;
|
;;;;
|
||||||
;;;; Copyright (C) 2011, 2013, 2014, 2021 Free Software Foundation, Inc.
|
;;;; Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
|
||||||
;;;;
|
;;;;
|
||||||
;;;; This library is free software; you can redistribute it and/or
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
;;;; modify it under the terms of the GNU Lesser General Public
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ -62,46 +62,6 @@
|
||||||
(read-line p 'split)))
|
(read-line p 'split)))
|
||||||
(eof-object? (read-line p)))))
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
(pass-if "two lines, split, CRLF"
|
|
||||||
(let* ((s "foo\r\nbar\r\n")
|
|
||||||
(p (open-input-string s)))
|
|
||||||
(and (equal? '(("foo" . "\r\n")
|
|
||||||
("bar" . "\r\n"))
|
|
||||||
(list (read-line p 'split)
|
|
||||||
(read-line p 'split)))
|
|
||||||
(eof-object? (read-line p)))))
|
|
||||||
|
|
||||||
(pass-if "two long lines, split, CRLF"
|
|
||||||
;; Must be longer than 256 codepoints
|
|
||||||
(let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
||||||
(text1 (string-append text0 text0 text0 text0 text0))
|
|
||||||
(text2 (string-append text1 "\r\n" text1 "\r\n")))
|
|
||||||
(let* ((s text2)
|
|
||||||
(p (open-input-string s)))
|
|
||||||
(and (equal? `((,text1 . "\r\n")
|
|
||||||
(,text1 . "\r\n"))
|
|
||||||
(list (read-line p 'split)
|
|
||||||
(read-line p 'split)))
|
|
||||||
(eof-object? (read-line p))))))
|
|
||||||
|
|
||||||
(pass-if "two lines, split, LS"
|
|
||||||
(let* ((s "foo\u2028bar\u2028")
|
|
||||||
(p (open-input-string s)))
|
|
||||||
(and (equal? '(("foo" . #\x2028)
|
|
||||||
("bar" . #\x2028))
|
|
||||||
(list (read-line p 'split)
|
|
||||||
(read-line p 'split)))
|
|
||||||
(eof-object? (read-line p)))))
|
|
||||||
|
|
||||||
(pass-if "two lines, split, PS"
|
|
||||||
(let* ((s "foo\u2029bar\u2029")
|
|
||||||
(p (open-input-string s)))
|
|
||||||
(and (equal? '(("foo" . #\x2029)
|
|
||||||
("bar" . #\x2029))
|
|
||||||
(list (read-line p 'split)
|
|
||||||
(read-line p 'split)))
|
|
||||||
(eof-object? (read-line p)))))
|
|
||||||
|
|
||||||
(pass-if "two Greek lines, trim"
|
(pass-if "two Greek lines, trim"
|
||||||
(let* ((s "λαμβδα\nμυ\n")
|
(let* ((s "λαμβδα\nμυ\n")
|
||||||
(p (open-input-string s)))
|
(p (open-input-string s)))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue