1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-04 16:50:25 +02:00

Let read-line handle alternate line endings

Adds CRLF, NEL, PS and LS as line endings. %read-line will return
these. In the case of CRLF, %read-line will return a string "\r\n"
as the line ending.

* libguile/rdelim.c (scm_read_line): handle more line delimiters
* test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test
  ("two long lines, split, CRLF", "two lines, split, NEL"): new tests
  ("two lines, split, LS", "two lines, split, PS"): new tests
This commit is contained in:
Michael Gran 2018-04-17 08:22:18 -07:00
parent 34131e3ac5
commit 77b33170f4
2 changed files with 83 additions and 7 deletions

View file

@ -126,6 +126,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
SCM line, strings, result; SCM line, strings, result;
scm_t_wchar buf[LINE_BUFFER_SIZE], delim; scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
size_t index; size_t index;
int cr = 0;
if (SCM_UNBNDP (port)) if (SCM_UNBNDP (port))
port = scm_current_input_port (); port = scm_current_input_port ();
@ -151,12 +152,25 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
buf[index] = scm_getc (port); buf[index] = scm_getc (port);
switch (buf[index]) switch (buf[index])
{ {
case EOF:
case '\n': case '\n':
delim = buf[index]; delim = buf[index];
break; break;
case EOF:
case 0x85:
case 0x2028:
case 0x2029:
cr = 0;
delim = buf[index];
break;
case '\r':
cr = 1;
index ++;
break;
default: default:
cr = 0;
index++; index++;
} }
} }
@ -164,20 +178,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
while (delim == 0); while (delim == 0);
if (SCM_LIKELY (scm_is_false (strings))) if (SCM_LIKELY (scm_is_false (strings)))
/* The fast path. */ {
line = scm_from_utf32_stringn (buf, index); /* The fast path. */
if (cr)
line = scm_from_utf32_stringn (buf, index - 1);
else
line = scm_from_utf32_stringn (buf, index);
}
else else
{ {
/* Aggregate the intermediary results. */ /* Aggregate the intermediary results. */
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings); if (cr)
strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
else
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
line = scm_string_concatenate (scm_reverse (strings)); line = scm_string_concatenate (scm_reverse (strings));
} }
if (delim == EOF && scm_i_string_length (line) == 0) if (delim == EOF && scm_i_string_length (line) == 0)
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL); result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
else else
result = scm_cons (line, {
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim)); if (cr)
result = scm_cons (line, scm_from_latin1_string("\r\n"));
else
result = scm_cons (line,
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
}
return result; return result;
#undef LINE_BUFFER_SIZE #undef LINE_BUFFER_SIZE

View file

@ -62,6 +62,55 @@
(read-line p 'split))) (read-line p 'split)))
(eof-object? (read-line p))))) (eof-object? (read-line p)))))
(pass-if "two lines, split, CRLF"
(let* ((s "foo\r\nbar\r\n")
(p (open-input-string s)))
(and (equal? '(("foo" . "\r\n")
("bar" . "\r\n"))
(list (read-line p 'split)
(read-line p 'split)))
(eof-object? (read-line p)))))
(pass-if "two long lines, split, CRLF"
;; Must be longer than 256 codepoints
(let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
(text1 (string-append text0 text0 text0 text0 text0))
(text2 (string-append text1 "\r\n" text1 "\r\n")))
(let* ((s text2)
(p (open-input-string s)))
(and (equal? `((,text1 . "\r\n")
(,text1 . "\r\n"))
(list (read-line p 'split)
(read-line p 'split)))
(eof-object? (read-line p))))))
(pass-if "two lines, split, NEL"
(let* ((s "foo\x85bar\x85")
(p (open-input-string s)))
(and (equal? '(("foo" . #\x85)
("bar" . #\x85))
(list (read-line p 'split)
(read-line p 'split)))
(eof-object? (read-line p)))))
(pass-if "two lines, split, LS"
(let* ((s "foo\u2028bar\u2028")
(p (open-input-string s)))
(and (equal? '(("foo" . #\x2028)
("bar" . #\x2028))
(list (read-line p 'split)
(read-line p 'split)))
(eof-object? (read-line p)))))
(pass-if "two lines, split, PS"
(let* ((s "foo\u2029bar\u2029")
(p (open-input-string s)))
(and (equal? '(("foo" . #\x2029)
("bar" . #\x2029))
(list (read-line p 'split)
(read-line p 'split)))
(eof-object? (read-line p)))))
(pass-if "two Greek lines, trim" (pass-if "two Greek lines, trim"
(let* ((s "λαμβδα\nμυ\n") (let* ((s "λαμβδα\nμυ\n")
(p (open-input-string s))) (p (open-input-string s)))