diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm index 9774e46d2..0c4233198 100644 --- a/module/ice-9/ports.scm +++ b/module/ice-9/ports.scm @@ -243,7 +243,7 @@ interpret its input and output." (lp buffered) (values buf buffered))))))))))))))) -(define (peek-byte port) +(define-inlinable (peek-byte port) (let* ((buf (port-read-buffer port)) (cur (port-buffer-cur buf))) (if (< cur (port-buffer-end buf)) @@ -261,85 +261,79 @@ interpret its input and output." (define-syntax-rule (decoding-error subr port) (throw 'decoding-error subr "input decoding error" EILSEQ port)) -(define-inlinable (peek-char-and-len/utf8 port) +(define-inlinable (peek-char-and-len/utf8 port first-byte) (define (bad-utf8 len) (if (eq? (port-conversion-strategy port) 'substitute) (values #\? len) (decoding-error "peek-char" port))) - (let ((first-byte (peek-byte port))) - (cond - ((eq? first-byte the-eof-object) - (values first-byte 0)) - ((< first-byte #x80) - (values (integer->char first-byte) 1)) - ((<= #xc2 first-byte #xdf) - (call-with-values (lambda () (fill-input port 2)) - (lambda (buf buffering) - (let ((bv (port-buffer-bytevector buf)) - (cur (port-buffer-cur buf))) - (define (ref n) - (bytevector-u8-ref bv (+ cur 1))) - (when (or (< buffering 2) - (not (= (logand (ref 1) #xc0) #x80))) - (bad-utf8 1)) - (values (integer->char - (logior (ash (logand first-byte #x1f) 6) - (logand (ref 1) #x3f))) - 2))))) - ((= (logand first-byte #xf0) #xe0) - (call-with-values (lambda () (fill-input port 3)) - (lambda (buf buffering) - (let ((bv (port-buffer-bytevector buf)) - (cur (port-buffer-cur buf))) - (define (ref n) - (bytevector-u8-ref bv (+ cur 1))) - (when (or (< buffering 2) - (not (= (logand (ref 1) #xc0) #x80)) - (and (eq? first-byte #xe0) (< (ref 1) #xa0)) - (and (eq? first-byte #xed) (< (ref 1) #x9f))) - (bad-utf8 1)) - (when (or (< buffering 3) - (not (= (logand (ref 2) #xc0) #x80))) - (bad-utf8 2)) - (values (integer->char - (logior (ash (logand first-byte #x0f) 12) - (ash (logand (ref 1) #x3f) 6) - (logand (ref 2) #x3f))) - 3))))) - ((<= #xf0 first-byte #xf4) - (call-with-values (lambda () (fill-input port 4)) - (lambda (buf buffering) - (let ((bv (port-buffer-bytevector buf)) - (cur (port-buffer-cur buf))) - (define (ref n) - (bytevector-u8-ref bv (+ cur 1))) - (when (or (< buffering 2) - (not (= (logand (ref 1) #xc0) #x80)) - (and (eq? first-byte #xf0) (< (ref 1) #x90)) - (and (eq? first-byte #xf4) (< (ref 1) #x8f))) - (bad-utf8 1)) - (when (or (< buffering 3) - (not (= (logand (ref 2) #xc0) #x80))) - (bad-utf8 2)) - (when (or (< buffering 4) - (not (= (logand (ref 3) #xc0) #x80))) - (bad-utf8 3)) - (values (integer->char - (logior (ash (logand first-byte #x07) 18) - (ash (logand (ref 1) #x3f) 12) - (ash (logand (ref 2) #x3f) 6) - (logand (ref 3) #x3f))) - 4))))) - (else - (bad-utf8 1))))) + (cond + ((< first-byte #x80) + (values (integer->char first-byte) 1)) + ((<= #xc2 first-byte #xdf) + (call-with-values (lambda () (fill-input port 2)) + (lambda (buf buffering) + (let ((bv (port-buffer-bytevector buf)) + (cur (port-buffer-cur buf))) + (define (ref n) + (bytevector-u8-ref bv (+ cur 1))) + (when (or (< buffering 2) + (not (= (logand (ref 1) #xc0) #x80))) + (bad-utf8 1)) + (values (integer->char + (logior (ash (logand first-byte #x1f) 6) + (logand (ref 1) #x3f))) + 2))))) + ((= (logand first-byte #xf0) #xe0) + (call-with-values (lambda () (fill-input port 3)) + (lambda (buf buffering) + (let ((bv (port-buffer-bytevector buf)) + (cur (port-buffer-cur buf))) + (define (ref n) + (bytevector-u8-ref bv (+ cur 1))) + (when (or (< buffering 2) + (not (= (logand (ref 1) #xc0) #x80)) + (and (eq? first-byte #xe0) (< (ref 1) #xa0)) + (and (eq? first-byte #xed) (< (ref 1) #x9f))) + (bad-utf8 1)) + (when (or (< buffering 3) + (not (= (logand (ref 2) #xc0) #x80))) + (bad-utf8 2)) + (values (integer->char + (logior (ash (logand first-byte #x0f) 12) + (ash (logand (ref 1) #x3f) 6) + (logand (ref 2) #x3f))) + 3))))) + ((<= #xf0 first-byte #xf4) + (call-with-values (lambda () (fill-input port 4)) + (lambda (buf buffering) + (let ((bv (port-buffer-bytevector buf)) + (cur (port-buffer-cur buf))) + (define (ref n) + (bytevector-u8-ref bv (+ cur 1))) + (when (or (< buffering 2) + (not (= (logand (ref 1) #xc0) #x80)) + (and (eq? first-byte #xf0) (< (ref 1) #x90)) + (and (eq? first-byte #xf4) (< (ref 1) #x8f))) + (bad-utf8 1)) + (when (or (< buffering 3) + (not (= (logand (ref 2) #xc0) #x80))) + (bad-utf8 2)) + (when (or (< buffering 4) + (not (= (logand (ref 3) #xc0) #x80))) + (bad-utf8 3)) + (values (integer->char + (logior (ash (logand first-byte #x07) 18) + (ash (logand (ref 1) #x3f) 12) + (ash (logand (ref 2) #x3f) 6) + (logand (ref 3) #x3f))) + 4))))) + (else + (bad-utf8 1)))) -(define-inlinable (peek-char-and-len/iso-8859-1 port) - (let ((byte-or-eof (peek-byte port))) - (if (eof-object? byte-or-eof) - (values byte-or-eof 0) - (values (integer->char byte-or-eof) 1)))) +(define-inlinable (peek-char-and-len/iso-8859-1 port first-byte) + (values (integer->char first-byte) 1)) -(define (peek-char-and-len/iconv port) +(define (peek-char-and-len/iconv port first-byte) (define (bad-input len) (if (eq? (port-conversion-strategy port) 'substitute) (values #\? len) @@ -362,17 +356,23 @@ interpret its input and output." (lp input-size)))))) (define-inlinable (peek-char-and-len port) - (let ((enc (%port-encoding port))) - (call-with-values - (lambda () - (case enc - ((UTF-8) (peek-char-and-len/utf8 port)) - ((ISO-8859-1) (peek-char-and-len/iso-8859-1 port)) - (else (peek-char-and-len/iconv port)))) - (lambda (char len) - (if (port-maybe-consume-initial-byte-order-mark port char len) - (peek-char-and-len port) - (values char len)))))) + (let ((first-byte (peek-byte port))) + (if (eq? first-byte the-eof-object) + (values first-byte 0) + (let ((first-byte (logand first-byte #xff))) + (call-with-values + (lambda () + (case (%port-encoding port) + ((UTF-8) + (peek-char-and-len/utf8 port first-byte)) + ((ISO-8859-1) + (peek-char-and-len/iso-8859-1 port first-byte)) + (else + (peek-char-and-len/iconv port first-byte)))) + (lambda (char len) + (if (port-maybe-consume-initial-byte-order-mark port char len) + (peek-char-and-len port) + (values char len)))))))) (define (%peek-char port) (call-with-values (lambda () (peek-char-and-len port))