mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-07-03 16:20:39 +02:00
Let read-line handle alternate line endings
Adds CRLF, NEL, PS and LS as line endings. %read-line will return these. In the case of CRLF, %read-line will return a string "\r\n" as the line ending. * libguile/rdelim.c (scm_read_line): handle more line delimiters * test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test ("two long lines, split, CRLF", "two lines, split, NEL"): new tests ("two lines, split, LS", "two lines, split, PS"): new tests
This commit is contained in:
parent
34131e3ac5
commit
77b33170f4
2 changed files with 83 additions and 7 deletions
|
@ -126,6 +126,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
|||
SCM line, strings, result;
|
||||
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
||||
size_t index;
|
||||
int cr = 0;
|
||||
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
|
@ -151,12 +152,25 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
|||
buf[index] = scm_getc (port);
|
||||
switch (buf[index])
|
||||
{
|
||||
case EOF:
|
||||
case '\n':
|
||||
delim = buf[index];
|
||||
break;
|
||||
|
||||
case EOF:
|
||||
case 0x85:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
cr = 0;
|
||||
delim = buf[index];
|
||||
break;
|
||||
|
||||
case '\r':
|
||||
cr = 1;
|
||||
index ++;
|
||||
break;
|
||||
|
||||
default:
|
||||
cr = 0;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
@ -164,20 +178,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
|||
while (delim == 0);
|
||||
|
||||
if (SCM_LIKELY (scm_is_false (strings)))
|
||||
{
|
||||
/* The fast path. */
|
||||
if (cr)
|
||||
line = scm_from_utf32_stringn (buf, index - 1);
|
||||
else
|
||||
line = scm_from_utf32_stringn (buf, index);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Aggregate the intermediary results. */
|
||||
if (cr)
|
||||
strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
|
||||
else
|
||||
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
||||
line = scm_string_concatenate (scm_reverse (strings));
|
||||
}
|
||||
|
||||
if (delim == EOF && scm_i_string_length (line) == 0)
|
||||
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
||||
else
|
||||
{
|
||||
if (cr)
|
||||
result = scm_cons (line, scm_from_latin1_string("\r\n"));
|
||||
else
|
||||
result = scm_cons (line,
|
||||
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
||||
}
|
||||
|
||||
return result;
|
||||
#undef LINE_BUFFER_SIZE
|
||||
|
|
|
@ -62,6 +62,55 @@
|
|||
(read-line p 'split)))
|
||||
(eof-object? (read-line p)))))
|
||||
|
||||
(pass-if "two lines, split, CRLF"
|
||||
(let* ((s "foo\r\nbar\r\n")
|
||||
(p (open-input-string s)))
|
||||
(and (equal? '(("foo" . "\r\n")
|
||||
("bar" . "\r\n"))
|
||||
(list (read-line p 'split)
|
||||
(read-line p 'split)))
|
||||
(eof-object? (read-line p)))))
|
||||
|
||||
(pass-if "two long lines, split, CRLF"
|
||||
;; Must be longer than 256 codepoints
|
||||
(let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
(text1 (string-append text0 text0 text0 text0 text0))
|
||||
(text2 (string-append text1 "\r\n" text1 "\r\n")))
|
||||
(let* ((s text2)
|
||||
(p (open-input-string s)))
|
||||
(and (equal? `((,text1 . "\r\n")
|
||||
(,text1 . "\r\n"))
|
||||
(list (read-line p 'split)
|
||||
(read-line p 'split)))
|
||||
(eof-object? (read-line p))))))
|
||||
|
||||
(pass-if "two lines, split, NEL"
|
||||
(let* ((s "foo\x85bar\x85")
|
||||
(p (open-input-string s)))
|
||||
(and (equal? '(("foo" . #\x85)
|
||||
("bar" . #\x85))
|
||||
(list (read-line p 'split)
|
||||
(read-line p 'split)))
|
||||
(eof-object? (read-line p)))))
|
||||
|
||||
(pass-if "two lines, split, LS"
|
||||
(let* ((s "foo\u2028bar\u2028")
|
||||
(p (open-input-string s)))
|
||||
(and (equal? '(("foo" . #\x2028)
|
||||
("bar" . #\x2028))
|
||||
(list (read-line p 'split)
|
||||
(read-line p 'split)))
|
||||
(eof-object? (read-line p)))))
|
||||
|
||||
(pass-if "two lines, split, PS"
|
||||
(let* ((s "foo\u2029bar\u2029")
|
||||
(p (open-input-string s)))
|
||||
(and (equal? '(("foo" . #\x2029)
|
||||
("bar" . #\x2029))
|
||||
(list (read-line p 'split)
|
||||
(read-line p 'split)))
|
||||
(eof-object? (read-line p)))))
|
||||
|
||||
(pass-if "two Greek lines, trim"
|
||||
(let* ((s "λαμβδα\nμυ\n")
|
||||
(p (open-input-string s)))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue