1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-21 20:20:24 +02:00

Spead tweaks to Scheme peek-char

* module/ice-9/ports.scm: Speed tweaks to %peek-char.  Ultimately
  somewhat fruitless; I can get 1.4s instead of 1.5s by only
  half-inlining the UTF-8 case though.
This commit is contained in:
Andy Wingo 2016-05-04 12:40:27 +02:00
parent f5b9a53bd0
commit d7a111b0ec

View file

@ -243,7 +243,7 @@ interpret its input and output."
(lp buffered) (lp buffered)
(values buf buffered))))))))))))))) (values buf buffered)))))))))))))))
(define (peek-byte port) (define-inlinable (peek-byte port)
(let* ((buf (port-read-buffer port)) (let* ((buf (port-read-buffer port))
(cur (port-buffer-cur buf))) (cur (port-buffer-cur buf)))
(if (< cur (port-buffer-end buf)) (if (< cur (port-buffer-end buf))
@ -261,85 +261,79 @@ interpret its input and output."
(define-syntax-rule (decoding-error subr port) (define-syntax-rule (decoding-error subr port)
(throw 'decoding-error subr "input decoding error" EILSEQ port)) (throw 'decoding-error subr "input decoding error" EILSEQ port))
(define-inlinable (peek-char-and-len/utf8 port) (define-inlinable (peek-char-and-len/utf8 port first-byte)
(define (bad-utf8 len) (define (bad-utf8 len)
(if (eq? (port-conversion-strategy port) 'substitute) (if (eq? (port-conversion-strategy port) 'substitute)
(values #\? len) (values #\? len)
(decoding-error "peek-char" port))) (decoding-error "peek-char" port)))
(let ((first-byte (peek-byte port))) (cond
(cond ((< first-byte #x80)
((eq? first-byte the-eof-object) (values (integer->char first-byte) 1))
(values first-byte 0)) ((<= #xc2 first-byte #xdf)
((< first-byte #x80) (call-with-values (lambda () (fill-input port 2))
(values (integer->char first-byte) 1)) (lambda (buf buffering)
((<= #xc2 first-byte #xdf) (let ((bv (port-buffer-bytevector buf))
(call-with-values (lambda () (fill-input port 2)) (cur (port-buffer-cur buf)))
(lambda (buf buffering) (define (ref n)
(let ((bv (port-buffer-bytevector buf)) (bytevector-u8-ref bv (+ cur 1)))
(cur (port-buffer-cur buf))) (when (or (< buffering 2)
(define (ref n) (not (= (logand (ref 1) #xc0) #x80)))
(bytevector-u8-ref bv (+ cur 1))) (bad-utf8 1))
(when (or (< buffering 2) (values (integer->char
(not (= (logand (ref 1) #xc0) #x80))) (logior (ash (logand first-byte #x1f) 6)
(bad-utf8 1)) (logand (ref 1) #x3f)))
(values (integer->char 2)))))
(logior (ash (logand first-byte #x1f) 6) ((= (logand first-byte #xf0) #xe0)
(logand (ref 1) #x3f))) (call-with-values (lambda () (fill-input port 3))
2))))) (lambda (buf buffering)
((= (logand first-byte #xf0) #xe0) (let ((bv (port-buffer-bytevector buf))
(call-with-values (lambda () (fill-input port 3)) (cur (port-buffer-cur buf)))
(lambda (buf buffering) (define (ref n)
(let ((bv (port-buffer-bytevector buf)) (bytevector-u8-ref bv (+ cur 1)))
(cur (port-buffer-cur buf))) (when (or (< buffering 2)
(define (ref n) (not (= (logand (ref 1) #xc0) #x80))
(bytevector-u8-ref bv (+ cur 1))) (and (eq? first-byte #xe0) (< (ref 1) #xa0))
(when (or (< buffering 2) (and (eq? first-byte #xed) (< (ref 1) #x9f)))
(not (= (logand (ref 1) #xc0) #x80)) (bad-utf8 1))
(and (eq? first-byte #xe0) (< (ref 1) #xa0)) (when (or (< buffering 3)
(and (eq? first-byte #xed) (< (ref 1) #x9f))) (not (= (logand (ref 2) #xc0) #x80)))
(bad-utf8 1)) (bad-utf8 2))
(when (or (< buffering 3) (values (integer->char
(not (= (logand (ref 2) #xc0) #x80))) (logior (ash (logand first-byte #x0f) 12)
(bad-utf8 2)) (ash (logand (ref 1) #x3f) 6)
(values (integer->char (logand (ref 2) #x3f)))
(logior (ash (logand first-byte #x0f) 12) 3)))))
(ash (logand (ref 1) #x3f) 6) ((<= #xf0 first-byte #xf4)
(logand (ref 2) #x3f))) (call-with-values (lambda () (fill-input port 4))
3))))) (lambda (buf buffering)
((<= #xf0 first-byte #xf4) (let ((bv (port-buffer-bytevector buf))
(call-with-values (lambda () (fill-input port 4)) (cur (port-buffer-cur buf)))
(lambda (buf buffering) (define (ref n)
(let ((bv (port-buffer-bytevector buf)) (bytevector-u8-ref bv (+ cur 1)))
(cur (port-buffer-cur buf))) (when (or (< buffering 2)
(define (ref n) (not (= (logand (ref 1) #xc0) #x80))
(bytevector-u8-ref bv (+ cur 1))) (and (eq? first-byte #xf0) (< (ref 1) #x90))
(when (or (< buffering 2) (and (eq? first-byte #xf4) (< (ref 1) #x8f)))
(not (= (logand (ref 1) #xc0) #x80)) (bad-utf8 1))
(and (eq? first-byte #xf0) (< (ref 1) #x90)) (when (or (< buffering 3)
(and (eq? first-byte #xf4) (< (ref 1) #x8f))) (not (= (logand (ref 2) #xc0) #x80)))
(bad-utf8 1)) (bad-utf8 2))
(when (or (< buffering 3) (when (or (< buffering 4)
(not (= (logand (ref 2) #xc0) #x80))) (not (= (logand (ref 3) #xc0) #x80)))
(bad-utf8 2)) (bad-utf8 3))
(when (or (< buffering 4) (values (integer->char
(not (= (logand (ref 3) #xc0) #x80))) (logior (ash (logand first-byte #x07) 18)
(bad-utf8 3)) (ash (logand (ref 1) #x3f) 12)
(values (integer->char (ash (logand (ref 2) #x3f) 6)
(logior (ash (logand first-byte #x07) 18) (logand (ref 3) #x3f)))
(ash (logand (ref 1) #x3f) 12) 4)))))
(ash (logand (ref 2) #x3f) 6) (else
(logand (ref 3) #x3f))) (bad-utf8 1))))
4)))))
(else
(bad-utf8 1)))))
(define-inlinable (peek-char-and-len/iso-8859-1 port) (define-inlinable (peek-char-and-len/iso-8859-1 port first-byte)
(let ((byte-or-eof (peek-byte port))) (values (integer->char first-byte) 1))
(if (eof-object? byte-or-eof)
(values byte-or-eof 0)
(values (integer->char byte-or-eof) 1))))
(define (peek-char-and-len/iconv port) (define (peek-char-and-len/iconv port first-byte)
(define (bad-input len) (define (bad-input len)
(if (eq? (port-conversion-strategy port) 'substitute) (if (eq? (port-conversion-strategy port) 'substitute)
(values #\? len) (values #\? len)
@ -362,17 +356,23 @@ interpret its input and output."
(lp input-size)))))) (lp input-size))))))
(define-inlinable (peek-char-and-len port) (define-inlinable (peek-char-and-len port)
(let ((enc (%port-encoding port))) (let ((first-byte (peek-byte port)))
(call-with-values (if (eq? first-byte the-eof-object)
(lambda () (values first-byte 0)
(case enc (let ((first-byte (logand first-byte #xff)))
((UTF-8) (peek-char-and-len/utf8 port)) (call-with-values
((ISO-8859-1) (peek-char-and-len/iso-8859-1 port)) (lambda ()
(else (peek-char-and-len/iconv port)))) (case (%port-encoding port)
(lambda (char len) ((UTF-8)
(if (port-maybe-consume-initial-byte-order-mark port char len) (peek-char-and-len/utf8 port first-byte))
(peek-char-and-len port) ((ISO-8859-1)
(values char len)))))) (peek-char-and-len/iso-8859-1 port first-byte))
(else
(peek-char-and-len/iconv port first-byte))))
(lambda (char len)
(if (port-maybe-consume-initial-byte-order-mark port char len)
(peek-char-and-len port)
(values char len))))))))
(define (%peek-char port) (define (%peek-char port)
(call-with-values (lambda () (peek-char-and-len port)) (call-with-values (lambda () (peek-char-and-len port))