diff --git a/NEWS b/NEWS index 2ac599a1c..ec3d096d1 100644 --- a/NEWS +++ b/NEWS @@ -166,12 +166,6 @@ See the newly reorganized "Foreign Function Interface", for details. These new interfaces replace `dynamic-link', `dynamic-pointer' and similar, which will eventually be deprecated. -** `read-line' - -This now accepts return + newline and the Unicode line separator and -paragraph separator as line separators, as well as the newline and -line separators it handled before. - ** `read-syntax' See "Annotated Scheme Read" in the manual. diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi index 2345f043c..777f282e9 100644 --- a/doc/ref/api-io.texi +++ b/doc/ref/api-io.texi @@ -755,10 +755,8 @@ a specified set of characters. @deffn {Scheme Procedure} read-line [port] [handle-delim] Return a line of text from @var{port} if specified, otherwise from the -value returned by @code{(current-input-port)}. Under Unix, a line of -text is terminated by the first end-of-line character or by end-of-file. -The end-of-line characters handled are newline, carriage return plus -newline, or the Unicode line or paragraph separators. +value returned by @code{(current-input-port)}. Under Unix, a line of text +is terminated by the first end-of-line character or by end-of-file. If @var{handle-delim} is specified, it should be one of the following symbols: @@ -773,9 +771,7 @@ Append the terminating delimiter (if any) to the returned string. Push the terminating delimiter (if any) back on to the port. @item split Return a pair containing the string read from the port and the -terminating delimiter or end-of-file object. The delimiter will either -be a single character for newline or the Unicode line or paragraph -separators, or it will be the string @code{"\r\n"}. +terminating delimiter or end-of-file object. @end table @end deffn diff --git a/libguile/rdelim.c b/libguile/rdelim.c index c1b92023a..4a0b20954 100644 --- a/libguile/rdelim.c +++ b/libguile/rdelim.c @@ -112,11 +112,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0, SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0, (SCM port), - "Read a line from @var{port}, allocating storage as necessary.\n" - "The terminator (if any) is removed from the string,\n" + "Read a newline-terminated line from @var{port}, allocating storage as\n" + "necessary. The newline terminator (if any) is removed from the string,\n" "and a pair consisting of the line and its delimiter is returned. The\n" - "delimiter may be either a newline, return + newline, the Unicode\n" - "line or paragraph separators, or the @var{eof-object}; if\n" + "delimiter may be either a newline or the @var{eof-object}; if\n" "@code{%read-line} is called at the end of file, it returns the pair\n" "@code{(# . #)}.") #define FUNC_NAME s_scm_read_line @@ -128,7 +127,6 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0, SCM line, strings, result; scm_t_wchar buf[LINE_BUFFER_SIZE], delim; size_t index; - int cr = 0; if (SCM_UNBNDP (port)) port = scm_current_input_port (); @@ -154,24 +152,12 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0, buf[index] = scm_getc (port); switch (buf[index]) { + case EOF: case '\n': delim = buf[index]; - break; - - case EOF: - case 0x2028: /* U+2028 LINE SEPARATOR */ - case 0x2029: /* U+2029 PARAGRAPH SEPARATOR */ - cr = 0; - delim = buf[index]; - break; - - case '\r': - cr = 1; - index ++; - break; + break; default: - cr = 0; index++; } } @@ -179,33 +165,20 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0, while (delim == 0); if (SCM_LIKELY (scm_is_false (strings))) - { - /* The fast path. */ - if (cr) - line = scm_from_utf32_stringn (buf, index - 1); - else - line = scm_from_utf32_stringn (buf, index); - } + /* The fast path. */ + line = scm_from_utf32_stringn (buf, index); else { /* Aggregate the intermediary results. */ - if (cr) - strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings); - else - strings = scm_cons (scm_from_utf32_stringn (buf, index), strings); + strings = scm_cons (scm_from_utf32_stringn (buf, index), strings); line = scm_string_concatenate (scm_reverse (strings)); } if (delim == EOF && scm_i_string_length (line) == 0) result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL); else - { - if (cr) - result = scm_cons (line, scm_from_latin1_string("\r\n")); - else - result = scm_cons (line, - delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim)); - } + result = scm_cons (line, + delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim)); return result; #undef LINE_BUFFER_SIZE diff --git a/module/ice-9/suspendable-ports.scm b/module/ice-9/suspendable-ports.scm index ba8d225f5..f5f005cca 100644 --- a/module/ice-9/suspendable-ports.scm +++ b/module/ice-9/suspendable-ports.scm @@ -1,5 +1,5 @@ ;;; Ports, implemented in Scheme -;;; Copyright (C) 2016, 2018, 2021 Free Software Foundation, Inc. +;;; Copyright (C) 2016, 2019 Free Software Foundation, Inc. ;;; ;;; This library is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU Lesser General Public License as @@ -689,81 +689,10 @@ (define* (read-line #:optional (port (current-input-port)) (handle-delim 'trim)) - (let* ((line/delim (%read-line port)) - (line (car line/delim)) - (delim (cdr line/delim))) - (case handle-delim - ((trim) line) - ((split) line/delim) - ((concat) (if (and (string? line) (char? delim)) - (string-append line (string delim)) - line)) - ((peek) (if (char? delim) - (unread-char delim port)) - line) - (else - (error "unexpected handle-delim value: " handle-delim))))) + (read-delimited "\n" port handle-delim)) (define* (%read-line port) - (let ((LINE_BUFFER_SIZE 256)) - (let ((strings #f) - (result #f) - (buf (make-string LINE_BUFFER_SIZE #\nul)) - (delim #f) - (index 0) - (cr #f) - (go #t)) - (cond - ((not (input-port? port)) - (error "Not an input port." port)) - (else - (while go - (cond - ((>= index LINE_BUFFER_SIZE) - (set! strings (cons (substring buf 0 index) - (or strings '()))) - (set! index 0)) - (else - (let ((c (read-char port))) - (cond - ((or (eof-object? c) - (char=? c #\x2028) ; U+2028 LINE SEPARATOR - (char=? c #\x2029)) ; U+2029 PARAGRAPH SEPARATOR - (set! cr #f) - (set! delim c)) - ((char=? c #\newline) - (set! delim c)) - ((char=? c #\return) - (set! cr #t) - (string-set! buf index c) - (set! index (1+ index))) - (else - (set! cr #f) - (string-set! buf index c) - (set! index (1+ index))))))) - - (if (or (eof-object? delim) - (char? delim)) - (set! go #f))) - (let ((line (if (not strings) - ;; A short string. - (if cr - (substring buf 0 (1- index)) - (substring buf 0 index)) - ;; Else, aggregate the intermediary results. - (begin - (if cr - (set! strings (cons (substring buf 0 (1- index)) strings)) - (set! strings (cons (substring buf 0 index) strings))) - (apply string-append (reverse strings)))))) - - (if (and (eof-object? delim) - (zero? (string-length line))) - (cons the-eof-object the-eof-object) - ;; Else - (if cr - (cons line "\r\n") - (cons line delim))))))))) + (read-line port 'split)) (define* (put-string port str #:optional (start 0) (count (- (string-length str) start))) diff --git a/module/web/http.scm b/module/web/http.scm index 32a3093f1..4276e1744 100644 --- a/module/web/http.scm +++ b/module/web/http.scm @@ -157,12 +157,13 @@ The default writer will call ‘put-string’." Raise a 'bad-header' exception if the line does not end in CRLF or LF, or if EOF is reached." (match (%read-line port) - (((? string? line) . "\r\n") - line) (((? string? line) . #\newline) - ;; We are more tolerant than the RFC in that we tolerate LF-only - ;; endings. - line) + ;; '%read-line' does not consider #\return a delimiter; so if it's + ;; there, remove it. We are more tolerant than the RFC in that we + ;; tolerate LF-only endings. + (if (string-suffix? "\r" line) + (string-drop-right line 1) + line)) ((line . _) ;EOF or missing delimiter (bad-header 'read-header-line line)))) @@ -183,7 +184,8 @@ was known but the value was invalid. Returns the end-of-file object for both values if the end of the message body was reached (i.e., a blank line)." (let ((line (read-header-line port))) - (if (string-null? line) + (if (or (string-null? line) + (string=? line "\r")) (values *eof* *eof*) (let* ((delim (or (string-index line #\:) (bad-header '%read line))) diff --git a/test-suite/tests/rdelim.test b/test-suite/tests/rdelim.test index 1060d5c17..3aaa0b253 100644 --- a/test-suite/tests/rdelim.test +++ b/test-suite/tests/rdelim.test @@ -1,7 +1,7 @@ ;;;; rdelim.test --- Delimited I/O. -*- mode: scheme; coding: utf-8; -*- ;;;; Ludovic Courtès ;;;; -;;;; Copyright (C) 2011, 2013, 2014, 2021 Free Software Foundation, Inc. +;;;; Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -62,46 +62,6 @@ (read-line p 'split))) (eof-object? (read-line p))))) - (pass-if "two lines, split, CRLF" - (let* ((s "foo\r\nbar\r\n") - (p (open-input-string s))) - (and (equal? '(("foo" . "\r\n") - ("bar" . "\r\n")) - (list (read-line p 'split) - (read-line p 'split))) - (eof-object? (read-line p))))) - - (pass-if "two long lines, split, CRLF" - ;; Must be longer than 256 codepoints - (let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") - (text1 (string-append text0 text0 text0 text0 text0)) - (text2 (string-append text1 "\r\n" text1 "\r\n"))) - (let* ((s text2) - (p (open-input-string s))) - (and (equal? `((,text1 . "\r\n") - (,text1 . "\r\n")) - (list (read-line p 'split) - (read-line p 'split))) - (eof-object? (read-line p)))))) - - (pass-if "two lines, split, LS" - (let* ((s "foo\u2028bar\u2028") - (p (open-input-string s))) - (and (equal? '(("foo" . #\x2028) - ("bar" . #\x2028)) - (list (read-line p 'split) - (read-line p 'split))) - (eof-object? (read-line p))))) - - (pass-if "two lines, split, PS" - (let* ((s "foo\u2029bar\u2029") - (p (open-input-string s))) - (and (equal? '(("foo" . #\x2029) - ("bar" . #\x2029)) - (list (read-line p 'split) - (read-line p 'split))) - (eof-object? (read-line p))))) - (pass-if "two Greek lines, trim" (let* ((s "λαμβδα\nμυ\n") (p (open-input-string s)))