mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
Implement efficient 'scm_unget_bytes' and 'unget-bytevector'.
* libguile/ports.c (scm_i_unget_bytes): New static function. (scm_unget_bytes): New API function. (scm_unget_byte): Rewrite to simply call 'scm_i_unget_bytes'. (scm_ungetc, scm_peek_char, looking_at_bytes): Use 'scm_i_unget_bytes'. * libguile/ports.h: Add prototype for 'scm_unget_bytes'. * libguile/fports.c (scm_setvbuf): Use 'scm_unget_bytes'. * libguile/r6rs-ports.c (scm_unget_bytevector): New procedure. * module/ice-9/binary-ports.scm (unget-bytevector): New export. * doc/ref/api-io.texi (R6RS Binary Input): Add documentation. (R6RS I/O Ports): Update brief description of (ice-9 binary-ports) to reflect the new reality: it is no longer a subset of (rnrs io ports). * test-suite/tests/ports.test ("unget-bytevector"): Add test.
This commit is contained in:
parent
e1966d0e21
commit
7f6c3f8f00
7 changed files with 160 additions and 60 deletions
|
@ -1223,9 +1223,10 @@ possible.
|
|||
* R6RS Textual Output:: Textual output.
|
||||
@end menu
|
||||
|
||||
A subset of the @code{(rnrs io ports)} module is provided by the
|
||||
@code{(ice-9 binary-ports)} module. It contains binary input/output
|
||||
procedures and does not rely on R6RS support.
|
||||
A subset of the @code{(rnrs io ports)} module, plus one non-standard
|
||||
procedure @code{unget-bytevector} (@pxref{R6RS Binary Input}), is
|
||||
provided by the @code{(ice-9 binary-ports)} module. It contains binary
|
||||
input/output procedures and does not rely on R6RS support.
|
||||
|
||||
@node R6RS File Names
|
||||
@subsubsection File Names
|
||||
|
@ -1855,6 +1856,18 @@ reached. Return either a new bytevector containing the data read or the
|
|||
end-of-file object (if no data were available).
|
||||
@end deffn
|
||||
|
||||
The @code{(ice-9 binary-ports)} module provides the following procedure
|
||||
as an extension to @code{(rnrs io ports)}:
|
||||
|
||||
@deffn {Scheme Procedure} unget-bytevector port bv [start [count]]
|
||||
@deffnx {C Function} scm_unget_bytevector (port, bv, start, count)
|
||||
Place the contents of @var{bv} in @var{port}, optionally starting at
|
||||
index @var{start} and limiting to @var{count} octets, so that its bytes
|
||||
will be read from left-to-right as the next bytes from @var{port} during
|
||||
subsequent read operations. If called multiple times, the unread bytes
|
||||
will be read again in last-in first-out order.
|
||||
@end deffn
|
||||
|
||||
@node R6RS Textual Input
|
||||
@subsubsection Textual Input
|
||||
|
||||
|
|
|
@ -225,8 +225,7 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
|
|||
|
||||
if (ndrained > 0)
|
||||
/* Put DRAINED back to PORT. */
|
||||
while (ndrained-- > 0)
|
||||
scm_unget_byte (drained[ndrained], port);
|
||||
scm_unget_bytes ((unsigned char *) drained, ndrained, port);
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
|
130
libguile/ports.c
130
libguile/ports.c
|
@ -1789,52 +1789,25 @@ scm_end_input (SCM port)
|
|||
|
||||
|
||||
|
||||
void
|
||||
scm_unget_byte (int c, SCM port)
|
||||
#define FUNC_NAME "scm_unget_byte"
|
||||
static void
|
||||
scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
|
||||
#define FUNC_NAME "scm_unget_bytes"
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
size_t old_len, new_len;
|
||||
|
||||
scm_i_clear_pending_eof (port);
|
||||
if (pt->read_buf == pt->putback_buf)
|
||||
/* already using the put-back buffer. */
|
||||
{
|
||||
/* enlarge putback_buf if necessary. */
|
||||
if (pt->read_end == pt->read_buf + pt->read_buf_size
|
||||
&& pt->read_buf == pt->read_pos)
|
||||
{
|
||||
size_t new_size = pt->read_buf_size * 2;
|
||||
unsigned char *tmp = (unsigned char *)
|
||||
scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
|
||||
"putback buffer");
|
||||
|
||||
pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
|
||||
pt->read_end = pt->read_buf + pt->read_buf_size;
|
||||
pt->read_buf_size = pt->putback_buf_size = new_size;
|
||||
}
|
||||
|
||||
/* shift any existing bytes to buffer + 1. */
|
||||
if (pt->read_pos == pt->read_end)
|
||||
pt->read_end = pt->read_buf + 1;
|
||||
else if (pt->read_pos != pt->read_buf + 1)
|
||||
{
|
||||
int count = pt->read_end - pt->read_pos;
|
||||
|
||||
memmove (pt->read_buf + 1, pt->read_pos, count);
|
||||
pt->read_end = pt->read_buf + 1 + count;
|
||||
}
|
||||
|
||||
pt->read_pos = pt->read_buf;
|
||||
}
|
||||
else
|
||||
if (pt->read_buf != pt->putback_buf)
|
||||
/* switch to the put-back buffer. */
|
||||
{
|
||||
if (pt->putback_buf == NULL)
|
||||
{
|
||||
pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
|
||||
? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
|
||||
pt->putback_buf
|
||||
= (unsigned char *) scm_gc_malloc_pointerless
|
||||
(SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
|
||||
pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
|
||||
(pt->putback_buf_size, "putback buffer");
|
||||
}
|
||||
|
||||
pt->saved_read_buf = pt->read_buf;
|
||||
|
@ -1842,18 +1815,80 @@ scm_unget_byte (int c, SCM port)
|
|||
pt->saved_read_end = pt->read_end;
|
||||
pt->saved_read_buf_size = pt->read_buf_size;
|
||||
|
||||
pt->read_pos = pt->read_buf = pt->putback_buf;
|
||||
pt->read_end = pt->read_buf + 1;
|
||||
/* Put read_pos at the end of the buffer, so that ungets will not
|
||||
have to shift the buffer contents each time. */
|
||||
pt->read_buf = pt->putback_buf;
|
||||
pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
|
||||
pt->read_buf_size = pt->putback_buf_size;
|
||||
}
|
||||
|
||||
*pt->read_buf = c;
|
||||
old_len = pt->read_end - pt->read_pos;
|
||||
new_len = old_len + len;
|
||||
|
||||
if (new_len > pt->read_buf_size)
|
||||
/* The putback buffer needs to be enlarged. */
|
||||
{
|
||||
size_t new_buf_size;
|
||||
unsigned char *new_buf, *new_end, *new_pos;
|
||||
|
||||
new_buf_size = pt->read_buf_size * 2;
|
||||
if (new_buf_size < new_len)
|
||||
new_buf_size = new_len;
|
||||
|
||||
new_buf = (unsigned char *)
|
||||
scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
|
||||
|
||||
/* Put the bytes at the end of the buffer, so that future
|
||||
ungets won't need to shift the buffer. */
|
||||
new_end = new_buf + new_buf_size;
|
||||
new_pos = new_end - old_len;
|
||||
memcpy (new_pos, pt->read_pos, old_len);
|
||||
|
||||
pt->read_buf = pt->putback_buf = new_buf;
|
||||
pt->read_pos = new_pos;
|
||||
pt->read_end = new_end;
|
||||
pt->read_buf_size = pt->putback_buf_size = new_buf_size;
|
||||
}
|
||||
else if (pt->read_buf + len < pt->read_pos)
|
||||
/* If needed, shift the existing buffer contents up.
|
||||
This should not happen unless some external code
|
||||
manipulates the putback buffer pointers. */
|
||||
{
|
||||
unsigned char *new_end = pt->read_buf + pt->read_buf_size;
|
||||
unsigned char *new_pos = new_end - old_len;
|
||||
|
||||
memmove (new_pos, pt->read_pos, old_len);
|
||||
pt->read_pos = new_pos;
|
||||
pt->read_end = new_end;
|
||||
}
|
||||
|
||||
/* Move read_pos back and copy the bytes there. */
|
||||
pt->read_pos -= len;
|
||||
memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
|
||||
|
||||
if (pt->rw_active == SCM_PORT_WRITE)
|
||||
scm_flush (port);
|
||||
|
||||
if (pt->rw_random)
|
||||
pt->rw_active = SCM_PORT_READ;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
void
|
||||
scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
|
||||
{
|
||||
scm_i_unget_bytes (buf, len, port);
|
||||
}
|
||||
|
||||
void
|
||||
scm_unget_byte (int c, SCM port)
|
||||
{
|
||||
unsigned char byte;
|
||||
|
||||
byte = c;
|
||||
scm_i_unget_bytes (&byte, 1, port);
|
||||
}
|
||||
|
||||
void
|
||||
scm_ungetc (scm_t_wchar c, SCM port)
|
||||
#define FUNC_NAME "scm_ungetc"
|
||||
|
@ -1863,7 +1898,6 @@ scm_ungetc (scm_t_wchar c, SCM port)
|
|||
char result_buf[10];
|
||||
const char *encoding;
|
||||
size_t len;
|
||||
int i;
|
||||
|
||||
if (pt->encoding != NULL)
|
||||
encoding = pt->encoding;
|
||||
|
@ -1881,8 +1915,7 @@ scm_ungetc (scm_t_wchar c, SCM port)
|
|||
"conversion to port encoding failed",
|
||||
SCM_BOOL_F, SCM_MAKE_CHAR (c));
|
||||
|
||||
for (i = len - 1; i >= 0; i--)
|
||||
scm_unget_byte (result[i], port);
|
||||
scm_i_unget_bytes ((unsigned char *) result, len, port);
|
||||
|
||||
if (SCM_UNLIKELY (result != result_buf))
|
||||
free (result);
|
||||
|
@ -1941,7 +1974,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
|
|||
SCM result;
|
||||
scm_t_wchar c;
|
||||
char bytes[SCM_MBCHAR_BUF_SIZE];
|
||||
long column, line, i;
|
||||
long column, line;
|
||||
size_t len;
|
||||
|
||||
if (SCM_UNBNDP (port))
|
||||
|
@ -1953,8 +1986,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
|
|||
|
||||
err = get_codepoint (port, &c, bytes, &len);
|
||||
|
||||
for (i = len - 1; i >= 0; i--)
|
||||
scm_unget_byte (bytes[i], port);
|
||||
scm_i_unget_bytes ((unsigned char *) bytes, len, port);
|
||||
|
||||
SCM_COL (port) = column;
|
||||
SCM_LINUM (port) = line;
|
||||
|
@ -2336,7 +2368,6 @@ static int
|
|||
looking_at_bytes (SCM port, const unsigned char *bytes, int len)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
int result;
|
||||
int i = 0;
|
||||
|
||||
while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
|
||||
|
@ -2344,13 +2375,8 @@ looking_at_bytes (SCM port, const unsigned char *bytes, int len)
|
|||
pt->read_pos++;
|
||||
i++;
|
||||
}
|
||||
|
||||
result = (i == len);
|
||||
|
||||
while (i > 0)
|
||||
scm_unget_byte (bytes[--i], port);
|
||||
|
||||
return result;
|
||||
scm_i_unget_bytes (bytes, i, port);
|
||||
return (i == len);
|
||||
}
|
||||
|
||||
static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
|
||||
|
|
|
@ -302,6 +302,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
|
|||
SCM_API void scm_flush (SCM port);
|
||||
SCM_API void scm_end_input (SCM port);
|
||||
SCM_API int scm_fill_input (SCM port);
|
||||
SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
|
||||
SCM_API void scm_unget_byte (int c, SCM port);
|
||||
SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
|
||||
SCM_API void scm_ungets (const char *s, int n, SCM port);
|
||||
|
|
|
@ -714,6 +714,49 @@ SCM_DEFINE (scm_put_bytevector, "put-bytevector", 2, 2, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_unget_bytevector, "unget-bytevector", 2, 2, 0,
|
||||
(SCM port, SCM bv, SCM start, SCM count),
|
||||
"Unget the contents of @var{bv} to @var{port}, optionally "
|
||||
"starting at index @var{start} and limiting to @var{count} "
|
||||
"octets.")
|
||||
#define FUNC_NAME s_scm_unget_bytevector
|
||||
{
|
||||
unsigned char *c_bv;
|
||||
size_t c_start, c_count, c_len;
|
||||
|
||||
SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
|
||||
SCM_VALIDATE_BYTEVECTOR (2, bv);
|
||||
|
||||
c_len = SCM_BYTEVECTOR_LENGTH (bv);
|
||||
c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
|
||||
|
||||
if (!scm_is_eq (start, SCM_UNDEFINED))
|
||||
{
|
||||
c_start = scm_to_size_t (start);
|
||||
|
||||
if (!scm_is_eq (count, SCM_UNDEFINED))
|
||||
{
|
||||
c_count = scm_to_size_t (count);
|
||||
if (SCM_UNLIKELY (c_start + c_count > c_len))
|
||||
scm_out_of_range (FUNC_NAME, count);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (SCM_UNLIKELY (c_start >= c_len))
|
||||
scm_out_of_range (FUNC_NAME, start);
|
||||
else
|
||||
c_count = c_len - c_start;
|
||||
}
|
||||
}
|
||||
else
|
||||
c_start = 0, c_count = c_len;
|
||||
|
||||
scm_unget_bytes (c_bv + c_start, c_count, port);
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
|
||||
/* Bytevector output port ("bop" for short). */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
;;;; binary-ports.scm --- Binary IO on ports
|
||||
|
||||
;;;; Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
;;;; Copyright (C) 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -40,6 +40,7 @@
|
|||
get-string-n!
|
||||
put-u8
|
||||
put-bytevector
|
||||
unget-bytevector
|
||||
open-bytevector-output-port
|
||||
make-custom-binary-output-port))
|
||||
|
||||
|
|
|
@ -24,8 +24,12 @@
|
|||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (rnrs bytevectors)
|
||||
#:use-module ((rnrs io ports) #:select (open-bytevector-input-port
|
||||
open-bytevector-output-port)))
|
||||
#:use-module ((ice-9 binary-ports) #:select (open-bytevector-input-port
|
||||
open-bytevector-output-port
|
||||
put-bytevector
|
||||
get-bytevector-n
|
||||
get-bytevector-all
|
||||
unget-bytevector)))
|
||||
|
||||
(define (display-line . args)
|
||||
(for-each display args)
|
||||
|
@ -1235,6 +1239,19 @@
|
|||
|
||||
|
||||
|
||||
(pass-if-equal "unget-bytevector"
|
||||
#vu8(10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 200 201 202 203
|
||||
1 2 3 4 251 253 254 255)
|
||||
(let ((port (open-bytevector-input-port #vu8(1 2 3 4 251 253 254 255))))
|
||||
(unget-bytevector port #vu8(200 201 202 203))
|
||||
(unget-bytevector port #vu8(20 21 22 23 24))
|
||||
(unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 4)
|
||||
(unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 2 2)
|
||||
(unget-bytevector port #vu8(10 11))
|
||||
(get-bytevector-all port)))
|
||||
|
||||
|
||||
|
||||
(with-test-prefix "unicode byte-order marks (BOMs)"
|
||||
|
||||
(define (bv-read-test* encoding bv proc)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue