mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 12:20:26 +02:00
Beginnings of shunting ports-in-scheme off to a module
* libguile/ports.c (scm_specialize_port_encoding_x): Add some sanity checks. (scm_unget_bytes): Use scm_expand_port_read_buffer_x. (port_clear_stream_start_for_bom_read): Use scm_specialize_port_encoding_x. (scm_fill_input): Use scm_expand_port_read_buffer_x. (scm_expand_port_read_buffer_x): Rename from scm_set_port_read_buffer_x and actually expand the buffer. * libguile/ports.h: Adapt to scm_expand_port_read_buffer_x change. * module/ice-9/ports.scm: Remove ports-in-scheme stuff, and instead expose the ports internals via an auxiliary module. This will let ports-in-scheme live in a module during Guile 2.2.
This commit is contained in:
parent
df0dade9b7
commit
d1bb400c3f
3 changed files with 105 additions and 394 deletions
|
@ -1147,6 +1147,21 @@ SCM_DEFINE (scm_specialize_port_encoding_x,
|
||||||
SCM_VALIDATE_PORT (1, port);
|
SCM_VALIDATE_PORT (1, port);
|
||||||
SCM_VALIDATE_SYMBOL (2, encoding);
|
SCM_VALIDATE_SYMBOL (2, encoding);
|
||||||
|
|
||||||
|
if (scm_is_eq (SCM_PTAB_ENTRY (port)->encoding, sym_UTF_16))
|
||||||
|
{
|
||||||
|
if (!scm_is_eq (encoding, sym_UTF_16LE)
|
||||||
|
&& !scm_is_eq (encoding, sym_UTF_16BE))
|
||||||
|
SCM_OUT_OF_RANGE (2, encoding);
|
||||||
|
}
|
||||||
|
else if (scm_is_eq (SCM_PTAB_ENTRY (port)->encoding, sym_UTF_32))
|
||||||
|
{
|
||||||
|
if (!scm_is_eq (encoding, sym_UTF_32LE)
|
||||||
|
&& !scm_is_eq (encoding, sym_UTF_32BE))
|
||||||
|
SCM_OUT_OF_RANGE (2, encoding);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
SCM_OUT_OF_RANGE (2, encoding);
|
||||||
|
|
||||||
prepare_iconv_descriptors (port, encoding);
|
prepare_iconv_descriptors (port, encoding);
|
||||||
|
|
||||||
return SCM_UNSPECIFIED;
|
return SCM_UNSPECIFIED;
|
||||||
|
@ -1898,19 +1913,11 @@ scm_unget_bytes (const scm_t_uint8 *buf, size_t len, SCM port)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Bah, have to expand the read_buf for the putback. */
|
/* Bah, have to expand the read_buf for the putback. */
|
||||||
SCM new_buf;
|
|
||||||
|
|
||||||
while (size < len + buffered)
|
while (size < len + buffered)
|
||||||
size *= 2;
|
size *= 2;
|
||||||
|
read_buf = scm_expand_port_read_buffer_x (port,
|
||||||
new_buf = scm_c_make_port_buffer (size);
|
scm_from_size_t (size),
|
||||||
scm_port_buffer_reset_end (new_buf);
|
SCM_BOOL_T);
|
||||||
scm_port_buffer_set_has_eof_p (new_buf,
|
|
||||||
scm_port_buffer_has_eof_p (read_buf));
|
|
||||||
scm_port_buffer_putback (new_buf,
|
|
||||||
scm_port_buffer_take_pointer (read_buf),
|
|
||||||
buffered);
|
|
||||||
pt->read_buf = read_buf = new_buf;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2323,16 +2330,16 @@ port_clear_stream_start_for_bom_read (SCM port, enum bom_io_mode io_mode)
|
||||||
{
|
{
|
||||||
if (maybe_consume_bom (port, scm_utf16le_bom, sizeof (scm_utf16le_bom)))
|
if (maybe_consume_bom (port, scm_utf16le_bom, sizeof (scm_utf16le_bom)))
|
||||||
{
|
{
|
||||||
prepare_iconv_descriptors (port, sym_UTF_16LE);
|
scm_specialize_port_encoding_x (port, sym_UTF_16LE);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
if (maybe_consume_bom (port, scm_utf16be_bom, sizeof (scm_utf16be_bom)))
|
if (maybe_consume_bom (port, scm_utf16be_bom, sizeof (scm_utf16be_bom)))
|
||||||
{
|
{
|
||||||
prepare_iconv_descriptors (port, sym_UTF_16BE);
|
scm_specialize_port_encoding_x (port, sym_UTF_16BE);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
/* Big-endian by default. */
|
/* Big-endian by default. */
|
||||||
prepare_iconv_descriptors (port, sym_UTF_16BE);
|
scm_specialize_port_encoding_x (port, sym_UTF_16BE);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2341,16 +2348,16 @@ port_clear_stream_start_for_bom_read (SCM port, enum bom_io_mode io_mode)
|
||||||
if (maybe_consume_bom (port, scm_utf32le_bom, sizeof (scm_utf32le_bom)))
|
if (maybe_consume_bom (port, scm_utf32le_bom, sizeof (scm_utf32le_bom)))
|
||||||
{
|
{
|
||||||
/* Big-endian by default. */
|
/* Big-endian by default. */
|
||||||
prepare_iconv_descriptors (port, sym_UTF_32LE);
|
scm_specialize_port_encoding_x (port, sym_UTF_32LE);
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
if (maybe_consume_bom (port, scm_utf32be_bom, sizeof (scm_utf32be_bom)))
|
if (maybe_consume_bom (port, scm_utf32be_bom, sizeof (scm_utf32be_bom)))
|
||||||
{
|
{
|
||||||
prepare_iconv_descriptors (port, sym_UTF_32BE);
|
scm_specialize_port_encoding_x (port, sym_UTF_32BE);
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
/* Big-endian by default. */
|
/* Big-endian by default. */
|
||||||
prepare_iconv_descriptors (port, sym_UTF_32BE);
|
scm_specialize_port_encoding_x (port, sym_UTF_32BE);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2441,15 +2448,10 @@ scm_fill_input (SCM port, size_t minimum_size)
|
||||||
minimum_size, and ensure that cur is zero so that we fill towards
|
minimum_size, and ensure that cur is zero so that we fill towards
|
||||||
the end of the buffer. */
|
the end of the buffer. */
|
||||||
if (minimum_size > scm_port_buffer_size (read_buf))
|
if (minimum_size > scm_port_buffer_size (read_buf))
|
||||||
{
|
|
||||||
/* Grow the read buffer. */
|
/* Grow the read buffer. */
|
||||||
SCM new_buf = scm_c_make_port_buffer (minimum_size);
|
read_buf = scm_expand_port_read_buffer_x (port,
|
||||||
scm_port_buffer_reset (new_buf);
|
scm_from_size_t (minimum_size),
|
||||||
scm_port_buffer_put (new_buf,
|
SCM_BOOL_F);
|
||||||
scm_port_buffer_take_pointer (read_buf),
|
|
||||||
buffered);
|
|
||||||
pt->read_buf = read_buf = new_buf;
|
|
||||||
}
|
|
||||||
else if (buffered == 0)
|
else if (buffered == 0)
|
||||||
scm_port_buffer_reset (read_buf);
|
scm_port_buffer_reset (read_buf);
|
||||||
else
|
else
|
||||||
|
@ -2501,16 +2503,45 @@ SCM_DEFINE (scm_port_read_buffering, "port-read-buffering", 1, 0, 0,
|
||||||
}
|
}
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
SCM_DEFINE (scm_set_port_read_buffer_x, "set-port-read-buffer!", 2, 0, 0,
|
SCM_DEFINE (scm_expand_port_read_buffer_x, "expand-port-read-buffer!", 2, 1, 0,
|
||||||
(SCM port, SCM buf),
|
(SCM port, SCM size, SCM putback_p),
|
||||||
"Reset the read buffer on an input port.")
|
"Expand the read buffer of @var{port} to @var{size}. Copy the\n"
|
||||||
#define FUNC_NAME s_scm_set_port_read_buffer_x
|
"old buffered data, if, any, to the beginning of the new\n"
|
||||||
|
"buffer, unless @var{putback_p} is true, in which case copy it\n"
|
||||||
|
"to the end instead. Return the new buffer.")
|
||||||
|
#define FUNC_NAME s_scm_expand_port_read_buffer_x
|
||||||
{
|
{
|
||||||
|
scm_t_port *pt;
|
||||||
|
size_t c_size;
|
||||||
|
SCM new_buf;
|
||||||
|
|
||||||
SCM_VALIDATE_OPINPORT (1, port);
|
SCM_VALIDATE_OPINPORT (1, port);
|
||||||
SCM_ASSERT_TYPE (scm_is_vector (buf) && scm_c_vector_length (buf) >= 4,
|
pt = SCM_PTAB_ENTRY (port);
|
||||||
buf, 2, FUNC_NAME, "port buffer");
|
c_size = scm_to_size_t (size);
|
||||||
SCM_PTAB_ENTRY (port)->read_buf = buf;
|
SCM_ASSERT_RANGE (2, size, c_size > scm_port_buffer_size (pt->read_buf));
|
||||||
return SCM_UNSPECIFIED;
|
if (SCM_UNBNDP (putback_p))
|
||||||
|
putback_p = SCM_BOOL_F;
|
||||||
|
|
||||||
|
new_buf = scm_c_make_port_buffer (c_size);
|
||||||
|
scm_port_buffer_set_has_eof_p (new_buf,
|
||||||
|
scm_port_buffer_has_eof_p (pt->read_buf));
|
||||||
|
if (scm_is_true (putback_p))
|
||||||
|
{
|
||||||
|
scm_port_buffer_reset_end (new_buf);
|
||||||
|
scm_port_buffer_putback (new_buf,
|
||||||
|
scm_port_buffer_take_pointer (pt->read_buf),
|
||||||
|
scm_port_buffer_can_take (pt->read_buf));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
scm_port_buffer_reset (new_buf);
|
||||||
|
scm_port_buffer_put (new_buf,
|
||||||
|
scm_port_buffer_take_pointer (pt->read_buf),
|
||||||
|
scm_port_buffer_can_take (pt->read_buf));
|
||||||
|
}
|
||||||
|
pt->read_buf = new_buf;
|
||||||
|
|
||||||
|
return new_buf;
|
||||||
}
|
}
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
|
|
|
@ -321,7 +321,8 @@ SCM_API void scm_flush (SCM port);
|
||||||
|
|
||||||
SCM_INTERNAL SCM scm_port_random_access_p (SCM port);
|
SCM_INTERNAL SCM scm_port_random_access_p (SCM port);
|
||||||
SCM_INTERNAL SCM scm_port_read_buffering (SCM port);
|
SCM_INTERNAL SCM scm_port_read_buffering (SCM port);
|
||||||
SCM_INTERNAL SCM scm_set_port_read_buffer_x (SCM port, SCM buf);
|
SCM_INTERNAL SCM scm_expand_port_read_buffer_x (SCM port, SCM size,
|
||||||
|
SCM putback_p);
|
||||||
SCM_INTERNAL SCM scm_port_read (SCM port);
|
SCM_INTERNAL SCM scm_port_read (SCM port);
|
||||||
SCM_INTERNAL SCM scm_port_write (SCM port);
|
SCM_INTERNAL SCM scm_port_write (SCM port);
|
||||||
SCM_INTERNAL SCM scm_port_read_buffer (SCM port);
|
SCM_INTERNAL SCM scm_port_read_buffer (SCM port);
|
||||||
|
|
|
@ -26,7 +26,6 @@
|
||||||
|
|
||||||
|
|
||||||
(define-module (ice-9 ports)
|
(define-module (ice-9 ports)
|
||||||
#:use-module (rnrs bytevectors)
|
|
||||||
#:export (;; Definitions from ports.c.
|
#:export (;; Definitions from ports.c.
|
||||||
%port-property
|
%port-property
|
||||||
%set-port-property!
|
%set-port-property!
|
||||||
|
@ -161,6 +160,26 @@ interpret its input and output."
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(define-module (ice-9 ports internal)
|
||||||
|
#:use-module (ice-9 ports)
|
||||||
|
#:export (port-read-buffer
|
||||||
|
port-write-buffer
|
||||||
|
expand-port-read-buffer!
|
||||||
|
port-buffer-bytevector
|
||||||
|
port-buffer-cur
|
||||||
|
port-buffer-end
|
||||||
|
port-buffer-has-eof?
|
||||||
|
set-port-buffer-cur!
|
||||||
|
set-port-buffer-end!
|
||||||
|
set-port-buffer-has-eof?!
|
||||||
|
port-read
|
||||||
|
port-write
|
||||||
|
port-clear-stream-start-for-bom-read
|
||||||
|
%port-encoding
|
||||||
|
specialize-port-encoding!
|
||||||
|
port-random-access?
|
||||||
|
port-read-buffering))
|
||||||
|
|
||||||
(define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0))
|
(define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0))
|
||||||
(define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1))
|
(define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1))
|
||||||
(define-syntax-rule (port-buffer-end buf) (vector-ref buf 2))
|
(define-syntax-rule (port-buffer-end buf) (vector-ref buf 2))
|
||||||
|
@ -173,366 +192,26 @@ interpret its input and output."
|
||||||
(define-syntax-rule (set-port-buffer-has-eof?! buf has-eof?)
|
(define-syntax-rule (set-port-buffer-has-eof?! buf has-eof?)
|
||||||
(vector-set! buf 3 has-eof?))
|
(vector-set! buf 3 has-eof?))
|
||||||
|
|
||||||
(define (make-port-buffer size)
|
(eval-when (expand)
|
||||||
(vector (make-bytevector size 0) 0 0 #f))
|
(define-syntax-rule (private-port-bindings binding ...)
|
||||||
|
|
||||||
(define (write-bytes port src start count)
|
|
||||||
(let ((written ((port-write port) port src start count)))
|
|
||||||
(unless (<= 0 written count)
|
|
||||||
(error "bad return from port write function" written))
|
|
||||||
(when (< written count)
|
|
||||||
(write-bytes port src (+ start written) (- count written)))))
|
|
||||||
|
|
||||||
(define (flush-output port)
|
|
||||||
(let* ((buf (port-write-buffer port))
|
|
||||||
(cur (port-buffer-cur buf))
|
|
||||||
(end (port-buffer-end buf)))
|
|
||||||
(when (< cur end)
|
|
||||||
;; Update cursors before attempting to write, assuming that I/O
|
|
||||||
;; errors are sticky. That way if the write throws an error,
|
|
||||||
;; causing the computation to abort, and possibly causing the port
|
|
||||||
;; to be collected by GC when it's open, any subsequent close-port
|
|
||||||
;; or force-output won't signal *another* error.
|
|
||||||
(set-port-buffer-cur! buf 0)
|
|
||||||
(set-port-buffer-end! buf 0)
|
|
||||||
(write-bytes port (port-buffer-bytevector buf) cur (- end cur)))))
|
|
||||||
|
|
||||||
(define (read-bytes port dst start count)
|
|
||||||
(let ((read ((port-read port) port dst start count)))
|
|
||||||
(unless (<= 0 read count)
|
|
||||||
(error "bad return from port read function" read))
|
|
||||||
read))
|
|
||||||
|
|
||||||
(define utf8-bom #vu8(#xEF #xBB #xBF))
|
|
||||||
(define utf16be-bom #vu8(#xFE #xFF))
|
|
||||||
(define utf16le-bom #vu8(#xFF #xFE))
|
|
||||||
(define utf32be-bom #vu8(#x00 #x00 #xFE #xFF))
|
|
||||||
(define utf32le-bom #vu8(#xFF #xFE #x00 #x00))
|
|
||||||
|
|
||||||
(define (clear-stream-start-for-bom-read port io-mode)
|
|
||||||
(define (maybe-consume-bom bom)
|
|
||||||
(and (eq? (peek-byte port) (bytevector-u8-ref bom 0))
|
|
||||||
(call-with-values (lambda ()
|
|
||||||
(fill-input port (bytevector-length bom)))
|
|
||||||
(lambda (buf buffered)
|
|
||||||
(and (<= (bytevector-length bom) buffered)
|
|
||||||
(let ((bv (port-buffer-bytevector buf))
|
|
||||||
(cur (port-buffer-cur buf)))
|
|
||||||
(let lp ((i 1))
|
|
||||||
(if (= i (bytevector-length bom))
|
|
||||||
(begin
|
(begin
|
||||||
(set-port-buffer-cur! buf (+ cur i))
|
(define binding (@@ (ice-9 ports) binding))
|
||||||
#t)
|
...)))
|
||||||
(and (eq? (bytevector-u8-ref bv (+ cur i))
|
|
||||||
(bytevector-u8-ref bom i))
|
|
||||||
(lp (1+ i)))))))))))
|
|
||||||
(when (and (port-clear-stream-start-for-bom-read port)
|
|
||||||
(eq? io-mode 'text))
|
|
||||||
(case (%port-encoding port)
|
|
||||||
((UTF-8)
|
|
||||||
(maybe-consume-bom utf8-bom))
|
|
||||||
((UTF-16)
|
|
||||||
(cond
|
|
||||||
((maybe-consume-bom utf16le-bom)
|
|
||||||
(specialize-port-encoding! port 'UTF-16LE))
|
|
||||||
(else
|
|
||||||
(maybe-consume-bom utf16be-bom)
|
|
||||||
(specialize-port-encoding! port 'UTF-16BE))))
|
|
||||||
((UTF-32)
|
|
||||||
(cond
|
|
||||||
((maybe-consume-bom utf32le-bom)
|
|
||||||
(specialize-port-encoding! port 'UTF-32LE))
|
|
||||||
(else
|
|
||||||
(maybe-consume-bom utf32be-bom)
|
|
||||||
(specialize-port-encoding! port 'UTF-32BE)))))))
|
|
||||||
|
|
||||||
(define* (fill-input port #:optional (minimum-buffering 1))
|
(private-port-bindings port-read-buffer
|
||||||
(clear-stream-start-for-bom-read port 'text)
|
port-write-buffer
|
||||||
(let* ((buf (port-read-buffer port))
|
expand-port-read-buffer!
|
||||||
(cur (port-buffer-cur buf))
|
port-read
|
||||||
(buffered (- (port-buffer-end buf) cur)))
|
port-write
|
||||||
(cond
|
port-clear-stream-start-for-bom-read
|
||||||
((or (<= minimum-buffering buffered) (port-buffer-has-eof? buf))
|
%port-encoding
|
||||||
(values buf buffered))
|
specialize-port-encoding!
|
||||||
(else
|
port-decode-char
|
||||||
(unless (input-port? port)
|
port-random-access?
|
||||||
(error "not an input port" port))
|
port-read-buffering)
|
||||||
(when (port-random-access? port)
|
|
||||||
(flush-output port))
|
|
||||||
(let ((bv (port-buffer-bytevector buf)))
|
|
||||||
(cond
|
|
||||||
((< (bytevector-length bv) minimum-buffering)
|
|
||||||
(let ((buf* (make-port-buffer minimum-buffering)))
|
|
||||||
(bytevector-copy! bv cur (port-buffer-bytevector buf*) 0 buffered)
|
|
||||||
(set-port-buffer-end! buf* buffered)
|
|
||||||
(set-port-read-buffer! port buf*)
|
|
||||||
(fill-input port minimum-buffering)))
|
|
||||||
(else
|
|
||||||
(when (< 0 cur)
|
|
||||||
(bytevector-copy! bv cur bv 0 buffered)
|
|
||||||
(set-port-buffer-cur! buf 0)
|
|
||||||
(set-port-buffer-end! buf buffered))
|
|
||||||
(let ((buffering (max (port-read-buffering port) minimum-buffering)))
|
|
||||||
(let lp ((buffered buffered))
|
|
||||||
(let* ((count (- buffering buffered))
|
|
||||||
(read (read-bytes port bv buffered count)))
|
|
||||||
(cond
|
|
||||||
((zero? read)
|
|
||||||
(set-port-buffer-has-eof?! buf #t)
|
|
||||||
(values buf buffered))
|
|
||||||
(else
|
|
||||||
(let ((buffered (+ buffered read)))
|
|
||||||
(set-port-buffer-end! buf buffered)
|
|
||||||
(if (< buffered minimum-buffering)
|
|
||||||
(lp buffered)
|
|
||||||
(values buf buffered)))))))))))))))
|
|
||||||
|
|
||||||
(define-inlinable (peek-bytes port count kfast kslow)
|
;; And we're back.
|
||||||
(let* ((buf (port-read-buffer port))
|
(define-module (ice-9 ports))
|
||||||
(cur (port-buffer-cur buf))
|
|
||||||
(buffered (- (port-buffer-end buf) cur)))
|
|
||||||
(if (<= count buffered)
|
|
||||||
(kfast buf (port-buffer-bytevector buf) cur buffered)
|
|
||||||
(call-with-values (lambda () (fill-input port count))
|
|
||||||
(lambda (buf buffered)
|
|
||||||
(kslow buf (port-buffer-bytevector buf) (port-buffer-cur buf)
|
|
||||||
buffered))))))
|
|
||||||
|
|
||||||
(define (peek-byte port)
|
|
||||||
(peek-bytes port 1
|
|
||||||
(lambda (buf bv cur buffered)
|
|
||||||
(bytevector-u8-ref bv cur))
|
|
||||||
(lambda (buf bv cur buffered)
|
|
||||||
(and (> buffered 0)
|
|
||||||
(bytevector-u8-ref bv cur)))))
|
|
||||||
|
|
||||||
(define* (%lookahead-u8 port)
|
|
||||||
(define (fast-path buf bv cur buffered)
|
|
||||||
(bytevector-u8-ref bv cur))
|
|
||||||
(define (slow-path buf bv cur buffered)
|
|
||||||
(if (zero? buffered)
|
|
||||||
the-eof-object
|
|
||||||
(fast-path buf bv cur buffered)))
|
|
||||||
(peek-bytes port 1 fast-path slow-path))
|
|
||||||
|
|
||||||
(define* (%get-u8 port)
|
|
||||||
(define (fast-path buf bv cur buffered)
|
|
||||||
(set-port-buffer-cur! buf (1+ cur))
|
|
||||||
(bytevector-u8-ref bv cur))
|
|
||||||
(define (slow-path buf bv cur buffered)
|
|
||||||
(if (zero? buffered)
|
|
||||||
(begin
|
|
||||||
(set-port-buffer-has-eof?! buf #f)
|
|
||||||
the-eof-object)
|
|
||||||
(fast-path buf bv cur buffered)))
|
|
||||||
(peek-bytes port 1 fast-path slow-path))
|
|
||||||
|
|
||||||
(define (decoding-error subr port)
|
|
||||||
;; GNU/Linux definition; fixme?
|
|
||||||
(define EILSEQ 84)
|
|
||||||
(throw 'decoding-error subr "input decoding error" EILSEQ port))
|
|
||||||
|
|
||||||
(define-inlinable (decode-utf8 bv start avail u8_0 kt kf)
|
|
||||||
(cond
|
|
||||||
((< u8_0 #x80)
|
|
||||||
(kt (integer->char u8_0) 1))
|
|
||||||
((and (<= #xc2 u8_0 #xdf) (<= 2 avail))
|
|
||||||
(let ((u8_1 (bytevector-u8-ref bv (1+ start))))
|
|
||||||
(if (= (logand u8_1 #xc0) #x80)
|
|
||||||
(kt (integer->char
|
|
||||||
(logior (ash (logand u8_0 #x1f) 6)
|
|
||||||
(logand u8_1 #x3f)))
|
|
||||||
2)
|
|
||||||
(kf))))
|
|
||||||
((and (= (logand u8_0 #xf0) #xe0) (<= 3 avail))
|
|
||||||
(let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
|
|
||||||
(u8_2 (bytevector-u8-ref bv (+ start 2))))
|
|
||||||
(if (and (= (logand u8_1 #xc0) #x80)
|
|
||||||
(= (logand u8_2 #xc0) #x80)
|
|
||||||
(case u8_0
|
|
||||||
((#xe0) (>= u8_1 #xa0))
|
|
||||||
((#xed) (>= u8_1 #x9f))
|
|
||||||
(else #t)))
|
|
||||||
(kt (integer->char
|
|
||||||
(logior (ash (logand u8_0 #x0f) 12)
|
|
||||||
(ash (logand u8_1 #x3f) 6)
|
|
||||||
(logand u8_2 #x3f)))
|
|
||||||
3)
|
|
||||||
(kf))))
|
|
||||||
((and (<= #xf0 u8_0 #xf4) (<= 4 avail))
|
|
||||||
(let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
|
|
||||||
(u8_2 (bytevector-u8-ref bv (+ start 2)))
|
|
||||||
(u8_3 (bytevector-u8-ref bv (+ start 3))))
|
|
||||||
(if (and (= (logand u8_1 #xc0) #x80)
|
|
||||||
(= (logand u8_2 #xc0) #x80)
|
|
||||||
(= (logand u8_3 #xc0) #x80)
|
|
||||||
(case u8_0
|
|
||||||
((#xf0) (>= u8_1 #x90))
|
|
||||||
((#xf4) (>= u8_1 #x8f))
|
|
||||||
(else #t)))
|
|
||||||
(kt (integer->char
|
|
||||||
(logior (ash (logand u8_0 #x07) 18)
|
|
||||||
(ash (logand u8_1 #x3f) 12)
|
|
||||||
(ash (logand u8_2 #x3f) 6)
|
|
||||||
(logand u8_3 #x3f)))
|
|
||||||
4)
|
|
||||||
(kf))))
|
|
||||||
(else (kf))))
|
|
||||||
|
|
||||||
(define (bad-utf8-len bv cur buffering first-byte)
|
|
||||||
(define (ref n)
|
|
||||||
(bytevector-u8-ref bv (+ cur n)))
|
|
||||||
(cond
|
|
||||||
((< first-byte #x80) 0)
|
|
||||||
((<= #xc2 first-byte #xdf)
|
|
||||||
(cond
|
|
||||||
((< buffering 2) 1)
|
|
||||||
((not (= (logand (ref 1) #xc0) #x80)) 1)
|
|
||||||
(else 0)))
|
|
||||||
((= (logand first-byte #xf0) #xe0)
|
|
||||||
(cond
|
|
||||||
((< buffering 2) 1)
|
|
||||||
((not (= (logand (ref 1) #xc0) #x80)) 1)
|
|
||||||
((and (eq? first-byte #xe0) (< (ref 1) #xa0)) 1)
|
|
||||||
((and (eq? first-byte #xed) (< (ref 1) #x9f)) 1)
|
|
||||||
((< buffering 3) 2)
|
|
||||||
((not (= (logand (ref 2) #xc0) #x80)) 2)
|
|
||||||
(else 0)))
|
|
||||||
((<= #xf0 first-byte #xf4)
|
|
||||||
(cond
|
|
||||||
((< buffering 2) 1)
|
|
||||||
((not (= (logand (ref 1) #xc0) #x80)) 1)
|
|
||||||
((and (eq? first-byte #xf0) (< (ref 1) #x90)) 1)
|
|
||||||
((and (eq? first-byte #xf4) (< (ref 1) #x8f)) 1)
|
|
||||||
((< buffering 3) 2)
|
|
||||||
((not (= (logand (ref 2) #xc0) #x80)) 2)
|
|
||||||
((< buffering 4) 3)
|
|
||||||
((not (= (logand (ref 3) #xc0) #x80)) 3)
|
|
||||||
(else 0)))
|
|
||||||
(else 1)))
|
|
||||||
|
|
||||||
(define (peek-char-and-len/utf8 port first-byte)
|
|
||||||
(define (bad-utf8 len)
|
|
||||||
(if (eq? (port-conversion-strategy port) 'substitute)
|
|
||||||
(values #\? len)
|
|
||||||
(decoding-error "peek-char" port)))
|
|
||||||
(if (< first-byte #x80)
|
|
||||||
(values (integer->char first-byte) 1)
|
|
||||||
(call-with-values (lambda ()
|
|
||||||
(fill-input port
|
|
||||||
(cond
|
|
||||||
((<= #xc2 first-byte #xdf) 2)
|
|
||||||
((= (logand first-byte #xf0) #xe0) 3)
|
|
||||||
(else 4))))
|
|
||||||
(lambda (buf buffering)
|
|
||||||
(let* ((bv (port-buffer-bytevector buf))
|
|
||||||
(cur (port-buffer-cur buf)))
|
|
||||||
(define (bad-utf8)
|
|
||||||
(let ((len (bad-utf8-len bv cur buffering first-byte)))
|
|
||||||
(when (zero? len) (error "internal error"))
|
|
||||||
(if (eq? (port-conversion-strategy port) 'substitute)
|
|
||||||
(values #\? len)
|
|
||||||
(decoding-error "peek-char" port))))
|
|
||||||
(decode-utf8 bv cur buffering first-byte values bad-utf8))))))
|
|
||||||
|
|
||||||
(define (peek-char-and-len/iso-8859-1 port first-byte)
|
|
||||||
(values (integer->char first-byte) 1))
|
|
||||||
|
|
||||||
(define (peek-char-and-len/iconv port first-byte)
|
|
||||||
(let lp ((prev-input-size 0))
|
|
||||||
(let ((input-size (1+ prev-input-size)))
|
|
||||||
(call-with-values (lambda () (fill-input port input-size))
|
|
||||||
(lambda (buf buffered)
|
|
||||||
(cond
|
|
||||||
((< buffered input-size)
|
|
||||||
;; Buffer failed to fill; EOF, possibly premature.
|
|
||||||
(cond
|
|
||||||
((zero? prev-input-size)
|
|
||||||
(values the-eof-object 0))
|
|
||||||
((eq? (port-conversion-strategy port) 'substitute)
|
|
||||||
(values #\? prev-input-size))
|
|
||||||
(else
|
|
||||||
(decoding-error "peek-char" port))))
|
|
||||||
((port-decode-char port (port-buffer-bytevector buf)
|
|
||||||
(port-buffer-cur buf) input-size)
|
|
||||||
=> (lambda (char)
|
|
||||||
(values char input-size)))
|
|
||||||
(else
|
|
||||||
(lp input-size))))))))
|
|
||||||
|
|
||||||
(define (peek-char-and-len port)
|
|
||||||
(let ((first-byte (peek-byte port)))
|
|
||||||
(if (not first-byte)
|
|
||||||
(values the-eof-object 0)
|
|
||||||
(case (%port-encoding port)
|
|
||||||
((UTF-8)
|
|
||||||
(peek-char-and-len/utf8 port first-byte))
|
|
||||||
((ISO-8859-1)
|
|
||||||
(peek-char-and-len/iso-8859-1 port first-byte))
|
|
||||||
(else
|
|
||||||
(peek-char-and-len/iconv port first-byte))))))
|
|
||||||
|
|
||||||
(define* (%peek-char #:optional (port (current-input-port)))
|
|
||||||
(define (slow-path)
|
|
||||||
(call-with-values (lambda () (peek-char-and-len port))
|
|
||||||
(lambda (char len)
|
|
||||||
char)))
|
|
||||||
(define (fast-path buf bv cur buffered)
|
|
||||||
(let ((u8 (bytevector-u8-ref bv cur))
|
|
||||||
(enc (%port-encoding port)))
|
|
||||||
(case enc
|
|
||||||
((UTF-8) (decode-utf8 bv cur buffered u8 (lambda (char len) char)
|
|
||||||
slow-path))
|
|
||||||
((ISO-8859-1) (integer->char u8))
|
|
||||||
(else (slow-path)))))
|
|
||||||
(peek-bytes port 1 fast-path
|
|
||||||
(lambda (buf bv cur buffered) (slow-path))))
|
|
||||||
|
|
||||||
(define* (%read-char #:optional (port (current-input-port)))
|
|
||||||
(define (update-position! char)
|
|
||||||
(case char
|
|
||||||
((#\alarm) #t) ; No change.
|
|
||||||
((#\backspace)
|
|
||||||
(let ((col (port-column port)))
|
|
||||||
(when (> col 0)
|
|
||||||
(set-port-column! port (1- col)))))
|
|
||||||
((#\newline)
|
|
||||||
(set-port-line! port (1+ (port-line port)))
|
|
||||||
(set-port-column! port 0))
|
|
||||||
((#\return)
|
|
||||||
(set-port-column! port 0))
|
|
||||||
((#\tab)
|
|
||||||
(let ((col (port-column port)))
|
|
||||||
(set-port-column! port (- (+ col 8) (remainder col 8)))))
|
|
||||||
(else
|
|
||||||
(set-port-column! port (1+ (port-column port)))))
|
|
||||||
char)
|
|
||||||
(define (slow-path)
|
|
||||||
(call-with-values (lambda () (peek-char-and-len port))
|
|
||||||
(lambda (char len)
|
|
||||||
(let ((buf (port-read-buffer port)))
|
|
||||||
(set-port-buffer-cur! buf (+ (port-buffer-cur buf) len))
|
|
||||||
(if (eq? char the-eof-object)
|
|
||||||
(begin
|
|
||||||
(set-port-buffer-has-eof?! buf #f)
|
|
||||||
char)
|
|
||||||
(update-position! char))))))
|
|
||||||
(define (fast-path buf bv cur buffered)
|
|
||||||
(let ((u8 (bytevector-u8-ref bv cur))
|
|
||||||
(enc (%port-encoding port)))
|
|
||||||
(case enc
|
|
||||||
((UTF-8)
|
|
||||||
(decode-utf8 bv cur buffered u8
|
|
||||||
(lambda (char len)
|
|
||||||
(set-port-buffer-cur! buf (+ cur len))
|
|
||||||
(update-position! char))
|
|
||||||
slow-path))
|
|
||||||
((ISO-8859-1)
|
|
||||||
(set-port-buffer-cur! buf (+ cur 1))
|
|
||||||
(update-position! (integer->char u8)))
|
|
||||||
(else (slow-path)))))
|
|
||||||
(peek-bytes port 1 fast-path
|
|
||||||
(lambda (buf bv cur buffered) (slow-path))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue