mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-07-04 16:50:25 +02:00
Let read-line handle alternate line endings
Adds CRLF, NEL, PS and LS as line endings. %read-line will return these. In the case of CRLF, %read-line will return a string "\r\n" as the line ending. * libguile/rdelim.c (scm_read_line): handle more line delimiters * test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test ("two long lines, split, CRLF", "two lines, split, NEL"): new tests ("two lines, split, LS", "two lines, split, PS"): new tests
This commit is contained in:
parent
34131e3ac5
commit
77b33170f4
2 changed files with 83 additions and 7 deletions
|
@ -126,6 +126,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
SCM line, strings, result;
|
SCM line, strings, result;
|
||||||
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
|
||||||
size_t index;
|
size_t index;
|
||||||
|
int cr = 0;
|
||||||
|
|
||||||
if (SCM_UNBNDP (port))
|
if (SCM_UNBNDP (port))
|
||||||
port = scm_current_input_port ();
|
port = scm_current_input_port ();
|
||||||
|
@ -151,12 +152,25 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
buf[index] = scm_getc (port);
|
buf[index] = scm_getc (port);
|
||||||
switch (buf[index])
|
switch (buf[index])
|
||||||
{
|
{
|
||||||
case EOF:
|
|
||||||
case '\n':
|
case '\n':
|
||||||
delim = buf[index];
|
delim = buf[index];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case EOF:
|
||||||
|
case 0x85:
|
||||||
|
case 0x2028:
|
||||||
|
case 0x2029:
|
||||||
|
cr = 0;
|
||||||
|
delim = buf[index];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '\r':
|
||||||
|
cr = 1;
|
||||||
|
index ++;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
cr = 0;
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -164,20 +178,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
|
||||||
while (delim == 0);
|
while (delim == 0);
|
||||||
|
|
||||||
if (SCM_LIKELY (scm_is_false (strings)))
|
if (SCM_LIKELY (scm_is_false (strings)))
|
||||||
/* The fast path. */
|
{
|
||||||
line = scm_from_utf32_stringn (buf, index);
|
/* The fast path. */
|
||||||
|
if (cr)
|
||||||
|
line = scm_from_utf32_stringn (buf, index - 1);
|
||||||
|
else
|
||||||
|
line = scm_from_utf32_stringn (buf, index);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Aggregate the intermediary results. */
|
/* Aggregate the intermediary results. */
|
||||||
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
if (cr)
|
||||||
|
strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
|
||||||
|
else
|
||||||
|
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
|
||||||
line = scm_string_concatenate (scm_reverse (strings));
|
line = scm_string_concatenate (scm_reverse (strings));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (delim == EOF && scm_i_string_length (line) == 0)
|
if (delim == EOF && scm_i_string_length (line) == 0)
|
||||||
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
|
||||||
else
|
else
|
||||||
result = scm_cons (line,
|
{
|
||||||
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
if (cr)
|
||||||
|
result = scm_cons (line, scm_from_latin1_string("\r\n"));
|
||||||
|
else
|
||||||
|
result = scm_cons (line,
|
||||||
|
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
#undef LINE_BUFFER_SIZE
|
#undef LINE_BUFFER_SIZE
|
||||||
|
|
|
@ -62,6 +62,55 @@
|
||||||
(read-line p 'split)))
|
(read-line p 'split)))
|
||||||
(eof-object? (read-line p)))))
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
|
(pass-if "two lines, split, CRLF"
|
||||||
|
(let* ((s "foo\r\nbar\r\n")
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? '(("foo" . "\r\n")
|
||||||
|
("bar" . "\r\n"))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
|
(pass-if "two long lines, split, CRLF"
|
||||||
|
;; Must be longer than 256 codepoints
|
||||||
|
(let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
(text1 (string-append text0 text0 text0 text0 text0))
|
||||||
|
(text2 (string-append text1 "\r\n" text1 "\r\n")))
|
||||||
|
(let* ((s text2)
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? `((,text1 . "\r\n")
|
||||||
|
(,text1 . "\r\n"))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p))))))
|
||||||
|
|
||||||
|
(pass-if "two lines, split, NEL"
|
||||||
|
(let* ((s "foo\x85bar\x85")
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? '(("foo" . #\x85)
|
||||||
|
("bar" . #\x85))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
|
(pass-if "two lines, split, LS"
|
||||||
|
(let* ((s "foo\u2028bar\u2028")
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? '(("foo" . #\x2028)
|
||||||
|
("bar" . #\x2028))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
|
(pass-if "two lines, split, PS"
|
||||||
|
(let* ((s "foo\u2029bar\u2029")
|
||||||
|
(p (open-input-string s)))
|
||||||
|
(and (equal? '(("foo" . #\x2029)
|
||||||
|
("bar" . #\x2029))
|
||||||
|
(list (read-line p 'split)
|
||||||
|
(read-line p 'split)))
|
||||||
|
(eof-object? (read-line p)))))
|
||||||
|
|
||||||
(pass-if "two Greek lines, trim"
|
(pass-if "two Greek lines, trim"
|
||||||
(let* ((s "λαμβδα\nμυ\n")
|
(let* ((s "λαμβδα\nμυ\n")
|
||||||
(p (open-input-string s)))
|
(p (open-input-string s)))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue