1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 03:30:27 +02:00

Rewrite read-line' in terms of scm_getc'.

As a result `read-line' handles decoding and decoding errors the same
way as `scm_getc'.  It's also simpler and free of `malloc' calls.

* libguile/rdelim.c (scm_do_read_line): Remove.
  (scm_read_line): Rewrite as a loop that calls `scm_getc'.

* test-suite/tests/rdelim.test: New file.
* test-suite/Makefile.am (SCM_TESTS): Add `tests/rdelim.test'.
This commit is contained in:
Ludovic Courtès 2011-01-26 00:24:34 +01:00
parent cc540d0bbd
commit a2c36371ce
3 changed files with 128 additions and 116 deletions

View file

@ -1,5 +1,6 @@
/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2006 Free Software Foundation, Inc.
*
/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2006,
* 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
@ -100,88 +101,6 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
}
#undef FUNC_NAME
static unsigned char *
scm_do_read_line (SCM port, size_t *len_p)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
unsigned char *end;
/* I thought reading lines was simple. Mercy me. */
/* The common case: the buffer contains a complete line.
This needs to be fast. */
if ((end = memchr (pt->read_pos, '\n', (pt->read_end - pt->read_pos)))
!= 0)
{
size_t buf_len = (end + 1) - pt->read_pos;
/* Allocate a buffer of the perfect size. */
unsigned char *buf = scm_malloc (buf_len + 1);
memcpy (buf, pt->read_pos, buf_len);
pt->read_pos += buf_len;
buf[buf_len] = '\0';
*len_p = buf_len;
return buf;
}
/* The buffer contains no newlines. */
{
/* When live, len is always the number of characters in the
current buffer that are part of the current line. */
size_t len = (pt->read_end - pt->read_pos);
size_t buf_size = (len < 50) ? 60 : len * 2;
/* Invariant: buf always has buf_size + 1 characters allocated;
the `+ 1' is for the final '\0'. */
unsigned char *buf = scm_malloc (buf_size + 1);
size_t buf_len = 0;
for (;;)
{
if (buf_len + len > buf_size)
{
size_t new_size = (buf_len + len) * 2;
buf = scm_realloc (buf, new_size + 1);
buf_size = new_size;
}
/* Copy what we've got out of the port, into our buffer. */
memcpy (buf + buf_len, pt->read_pos, len);
buf_len += len;
pt->read_pos += len;
/* If we had seen a newline, we're done now. */
if (end)
break;
/* Get more characters. */
if (scm_fill_input (port) == EOF)
{
/* If we're missing a final newline in the file, return
what we did get, sans newline. */
if (buf_len > 0)
break;
free (buf);
return 0;
}
/* Search the buffer for newlines. */
if ((end = memchr (pt->read_pos, '\n',
(len = (pt->read_end - pt->read_pos))))
!= 0)
len = (end - pt->read_pos) + 1;
}
/* I wonder how expensive this realloc is. */
buf = scm_realloc (buf, buf_len + 1);
buf[buf_len] = '\0';
*len_p = buf_len;
return buf;
}
}
/*
* %read-line
@ -201,52 +120,67 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
"@code{(#<eof> . #<eof>)}.")
#define FUNC_NAME s_scm_read_line
{
scm_t_port *pt;
char *s;
size_t slen = 0;
SCM line, term;
const char *enc;
scm_t_string_failed_conversion_handler hndl;
/* Threshold under which the only allocation performed is that of the
resulting string and pair. */
#define LINE_BUFFER_SIZE 1024
SCM line, strings, result;
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
size_t index;
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
SCM_VALIDATE_OPINPORT (1,port);
pt = SCM_PTAB_ENTRY (port);
enc = pt->encoding;
hndl = pt->ilseq_handler;
if (pt->rw_active == SCM_PORT_WRITE)
scm_ptobs[SCM_PTOBNUM (port)].flush (port);
index = 0;
delim = 0;
strings = SCM_EOL;
s = (char *) scm_do_read_line (port, &slen);
if (s == NULL)
term = line = SCM_EOF_VAL;
else
do
{
if (s[slen - 1] == '\n')
if (index >= sizeof (buf))
{
term = SCM_MAKE_CHAR ('\n');
s[slen - 1] = '\0';
line = scm_from_stringn (s, slen - 1, enc, hndl);
free (s);
SCM_INCLINE (port);
/* The line is getting longer than BUF so store its current
contents in STRINGS. */
strings = scm_cons (scm_from_utf32_stringn (buf, index),
scm_is_false (strings) ? SCM_EOL : strings);
index = 0;
}
else
{
/* Fix: we should check for eof on the port before assuming this. */
term = SCM_EOF_VAL;
line = scm_from_stringn (s, slen, enc, hndl);
free (s);
SCM_COL (port) += scm_i_string_length (line);
buf[index] = scm_getc (port);
switch (buf[index])
{
case EOF:
case '\n':
delim = buf[index];
break;
default:
index++;
}
}
}
while (delim == 0);
if (pt->rw_random)
pt->rw_active = SCM_PORT_READ;
if (scm_is_false (strings))
line = scm_from_utf32_stringn (buf, index);
else
{
/* Aggregate the intermediary results. */
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
line = scm_string_concatenate (scm_reverse (strings));
}
return scm_cons (line, term);
if (delim == EOF && scm_i_string_length (line) == 0)
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
else
result = scm_cons (line,
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
return result;
#undef LINE_BUFFER_SIZE
}
#undef FUNC_NAME

View file

@ -1,6 +1,7 @@
## Process this file with automake to produce Makefile.in.
##
## Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Software Foundation, Inc.
## Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
## 2010, 2011 Software Foundation, Inc.
##
## This file is part of GUILE.
##
@ -100,6 +101,7 @@ SCM_TESTS = tests/00-initial-env.test \
tests/r6rs-unicode.test \
tests/rnrs-libraries.test \
tests/ramap.test \
tests/rdelim.test \
tests/reader.test \
tests/receive.test \
tests/regexp.test \

View file

@ -0,0 +1,76 @@
;;;; rdelim.test --- Delimited I/O. -*- mode: scheme; coding: utf-8; -*-
;;;; Ludovic Courtès <ludo@gnu.org>
;;;;
;;;; Copyright (C) 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
(define-module (test-suite test-rdelim)
#:use-module (ice-9 rdelim)
#:use-module (test-suite lib))
(with-fluids ((%default-port-encoding "UTF-8"))
(with-test-prefix "read-line"
(pass-if "one line"
(let* ((s "hello, world")
(p (open-input-string s)))
(and (string=? s (read-line p))
(eof-object? (read-line p)))))
(pass-if "two lines, trim"
(let* ((s "foo\nbar\n")
(p (open-input-string s)))
(and (equal? (string-tokenize s)
(list (read-line p) (read-line p)))
(eof-object? (read-line p)))))
(pass-if "two lines, concat"
(let* ((s "foo\nbar\n")
(p (open-input-string s)))
(and (equal? '("foo\n" "bar\n")
(list (read-line p 'concat)
(read-line p 'concat)))
(eof-object? (read-line p)))))
(pass-if "two lines, peek"
(let* ((s "foo\nbar\n")
(p (open-input-string s)))
(and (equal? '("foo" #\newline "bar" #\newline)
(list (read-line p 'peek) (read-char p)
(read-line p 'peek) (read-char p)))
(eof-object? (read-line p)))))
(pass-if "two lines, split"
(let* ((s "foo\nbar\n")
(p (open-input-string s)))
(and (equal? '(("foo" . #\newline)
("bar" . #\newline))
(list (read-line p 'split)
(read-line p 'split)))
(eof-object? (read-line p)))))
(pass-if "two Greek lines, trim"
(let* ((s "λαμβδα\nμυ\n")
(p (open-input-string s)))
(and (equal? (string-tokenize s)
(list (read-line p) (read-line p)))
(eof-object? (read-line p)))))))
;;; Local Variables:
;;; eval: (put 'with-test-prefix 'scheme-indent-function 1)
;;; eval: (put 'pass-if 'scheme-indent-function 1)
;;; End: