mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-09 21:40:33 +02:00
Speed refactors to Scheme %peek-char
* module/ice-9/ports.scm (peek-bytes): New helper. (peek-byte): Use peek-bytes helper. (decoding-error): Don't inline; no need. (decode-utf8, bad-utf8-len): New helpers. (peek-char-and-len/utf8): Use new helpers. (peek-char-and-len): No fast paths, and not inline. Peek-char has its own fast path. (%peek-char): Use helpers to make fast path.
This commit is contained in:
parent
13f2128587
commit
d77b50476a
1 changed files with 140 additions and 92 deletions
|
@ -286,94 +286,134 @@ interpret its input and output."
|
|||
(lp buffered)
|
||||
(values buf buffered)))))))))))))))
|
||||
|
||||
(define-inlinable (peek-byte port)
|
||||
(define-inlinable (peek-bytes port count kfast kslow)
|
||||
(let* ((buf (port-read-buffer port))
|
||||
(cur (port-buffer-cur buf)))
|
||||
(if (< cur (port-buffer-end buf))
|
||||
(bytevector-u8-ref (port-buffer-bytevector buf) cur)
|
||||
(call-with-values (lambda () (fill-input port))
|
||||
(cur (port-buffer-cur buf))
|
||||
(buffered (- (port-buffer-end buf) cur)))
|
||||
(if (<= count buffered)
|
||||
(kfast buf (port-buffer-bytevector buf) cur buffered)
|
||||
(call-with-values (lambda () (fill-input port count))
|
||||
(lambda (buf buffered)
|
||||
(if (zero? buffered)
|
||||
the-eof-object
|
||||
(bytevector-u8-ref (port-buffer-bytevector buf)
|
||||
(port-buffer-cur buf))))))))
|
||||
(kslow buf (port-buffer-bytevector buf) (port-buffer-cur buf)
|
||||
buffered))))))
|
||||
|
||||
;; GNU/Linux definition; fixme?
|
||||
(define-syntax EILSEQ (identifier-syntax 84))
|
||||
(define (peek-byte port)
|
||||
(peek-bytes port 1
|
||||
(lambda (buf bv cur buffered)
|
||||
(bytevector-u8-ref bv cur))
|
||||
(lambda (buf bv cur buffered)
|
||||
(and (> buffered 0)
|
||||
(bytevector-u8-ref bv cur)))))
|
||||
|
||||
(define-syntax-rule (decoding-error subr port)
|
||||
(define (decoding-error subr port)
|
||||
;; GNU/Linux definition; fixme?
|
||||
(define EILSEQ 84)
|
||||
(throw 'decoding-error subr "input decoding error" EILSEQ port))
|
||||
|
||||
(define-inlinable (decode-utf8 bv start avail u8_0 kt kf)
|
||||
(cond
|
||||
((< u8_0 #x80)
|
||||
(kt (integer->char u8_0) 1))
|
||||
((and (<= #xc2 u8_0 #xdf) (<= 2 avail))
|
||||
(let ((u8_1 (bytevector-u8-ref bv (1+ start))))
|
||||
(if (= (logand u8_1 #xc0) #x80)
|
||||
(kt (integer->char
|
||||
(logior (ash (logand u8_0 #x1f) 6)
|
||||
(logand u8_1 #x3f)))
|
||||
2)
|
||||
(kf))))
|
||||
((and (= (logand u8_0 #xf0) #xe0) (<= 3 avail))
|
||||
(let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
|
||||
(u8_2 (bytevector-u8-ref bv (+ start 2))))
|
||||
(if (and (= (logand u8_1 #xc0) #x80)
|
||||
(= (logand u8_2 #xc0) #x80)
|
||||
(case u8_0
|
||||
((#xe0) (>= u8_1 #xa0))
|
||||
((#xed) (>= u8_1 #x9f))
|
||||
(else #t)))
|
||||
(kt (integer->char
|
||||
(logior (ash (logand u8_0 #x0f) 12)
|
||||
(ash (logand u8_1 #x3f) 6)
|
||||
(logand u8_2 #x3f)))
|
||||
3)
|
||||
(kf))))
|
||||
((and (<= #xf0 u8_0 #xf4) (<= 4 avail))
|
||||
(let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
|
||||
(u8_2 (bytevector-u8-ref bv (+ start 2)))
|
||||
(u8_3 (bytevector-u8-ref bv (+ start 3))))
|
||||
(if (and (= (logand u8_1 #xc0) #x80)
|
||||
(= (logand u8_2 #xc0) #x80)
|
||||
(= (logand u8_3 #xc0) #x80)
|
||||
(case u8_0
|
||||
((#xf0) (>= u8_1 #x90))
|
||||
((#xf4) (>= u8_1 #x8f))
|
||||
(else #t)))
|
||||
(kt (integer->char
|
||||
(logior (ash (logand u8_0 #x07) 18)
|
||||
(ash (logand u8_1 #x3f) 12)
|
||||
(ash (logand u8_2 #x3f) 6)
|
||||
(logand u8_3 #x3f)))
|
||||
4)
|
||||
(kf))))
|
||||
(else (kf))))
|
||||
|
||||
(define (bad-utf8-len bv cur buffering first-byte)
|
||||
(define (ref n)
|
||||
(bytevector-u8-ref bv (+ cur 1)))
|
||||
(cond
|
||||
((< first-byte #x80) 0)
|
||||
((<= #xc2 first-byte #xdf)
|
||||
(cond
|
||||
((< buffering 2) 1)
|
||||
((not (= (logand (ref 1) #xc0) #x80)) 1)
|
||||
(else 0)))
|
||||
((= (logand first-byte #xf0) #xe0)
|
||||
(cond
|
||||
((< buffering 2) 1)
|
||||
((not (= (logand (ref 1) #xc0) #x80)) 1)
|
||||
((and (eq? first-byte #xe0) (< (ref 1) #xa0)) 1)
|
||||
((and (eq? first-byte #xed) (< (ref 1) #x9f)) 1)
|
||||
((< buffering 3) 2)
|
||||
((not (= (logand (ref 2) #xc0) #x80)) 2)
|
||||
(else 0)))
|
||||
((<= #xf0 first-byte #xf4)
|
||||
(cond
|
||||
((< buffering 2) 1)
|
||||
((not (= (logand (ref 1) #xc0) #x80)) 1)
|
||||
((and (eq? first-byte #xf0) (< (ref 1) #x90)) 1)
|
||||
((and (eq? first-byte #xf4) (< (ref 1) #x8f)) 1)
|
||||
((< buffering 3) 2)
|
||||
((not (= (logand (ref 2) #xc0) #x80)) 2)
|
||||
((< buffering 4) 3)
|
||||
((not (= (logand (ref 3) #xc0) #x80)) 3)
|
||||
(else 0)))
|
||||
(else 1)))
|
||||
|
||||
(define (peek-char-and-len/utf8 port first-byte)
|
||||
(define (bad-utf8 len)
|
||||
(if (eq? (port-conversion-strategy port) 'substitute)
|
||||
(values #\? len)
|
||||
(decoding-error "peek-char" port)))
|
||||
(cond
|
||||
((< first-byte #x80)
|
||||
(values (integer->char first-byte) 1))
|
||||
((<= #xc2 first-byte #xdf)
|
||||
(call-with-values (lambda () (fill-input port 2))
|
||||
(lambda (buf buffering)
|
||||
(let ((bv (port-buffer-bytevector buf))
|
||||
(cur (port-buffer-cur buf)))
|
||||
(define (ref n)
|
||||
(bytevector-u8-ref bv (+ cur 1)))
|
||||
(when (or (< buffering 2)
|
||||
(not (= (logand (ref 1) #xc0) #x80)))
|
||||
(bad-utf8 1))
|
||||
(values (integer->char
|
||||
(logior (ash (logand first-byte #x1f) 6)
|
||||
(logand (ref 1) #x3f)))
|
||||
2)))))
|
||||
((= (logand first-byte #xf0) #xe0)
|
||||
(call-with-values (lambda () (fill-input port 3))
|
||||
(lambda (buf buffering)
|
||||
(let ((bv (port-buffer-bytevector buf))
|
||||
(cur (port-buffer-cur buf)))
|
||||
(define (ref n)
|
||||
(bytevector-u8-ref bv (+ cur 1)))
|
||||
(when (or (< buffering 2)
|
||||
(not (= (logand (ref 1) #xc0) #x80))
|
||||
(and (eq? first-byte #xe0) (< (ref 1) #xa0))
|
||||
(and (eq? first-byte #xed) (< (ref 1) #x9f)))
|
||||
(bad-utf8 1))
|
||||
(when (or (< buffering 3)
|
||||
(not (= (logand (ref 2) #xc0) #x80)))
|
||||
(bad-utf8 2))
|
||||
(values (integer->char
|
||||
(logior (ash (logand first-byte #x0f) 12)
|
||||
(ash (logand (ref 1) #x3f) 6)
|
||||
(logand (ref 2) #x3f)))
|
||||
3)))))
|
||||
((<= #xf0 first-byte #xf4)
|
||||
(call-with-values (lambda () (fill-input port 4))
|
||||
(lambda (buf buffering)
|
||||
(let ((bv (port-buffer-bytevector buf))
|
||||
(cur (port-buffer-cur buf)))
|
||||
(define (ref n)
|
||||
(bytevector-u8-ref bv (+ cur 1)))
|
||||
(when (or (< buffering 2)
|
||||
(not (= (logand (ref 1) #xc0) #x80))
|
||||
(and (eq? first-byte #xf0) (< (ref 1) #x90))
|
||||
(and (eq? first-byte #xf4) (< (ref 1) #x8f)))
|
||||
(bad-utf8 1))
|
||||
(when (or (< buffering 3)
|
||||
(not (= (logand (ref 2) #xc0) #x80)))
|
||||
(bad-utf8 2))
|
||||
(when (or (< buffering 4)
|
||||
(not (= (logand (ref 3) #xc0) #x80)))
|
||||
(bad-utf8 3))
|
||||
(values (integer->char
|
||||
(logior (ash (logand first-byte #x07) 18)
|
||||
(ash (logand (ref 1) #x3f) 12)
|
||||
(ash (logand (ref 2) #x3f) 6)
|
||||
(logand (ref 3) #x3f)))
|
||||
4)))))
|
||||
(else
|
||||
(bad-utf8 1))))
|
||||
(if (< first-byte #x80)
|
||||
(values (integer->char first-byte) 1)
|
||||
(call-with-values (lambda ()
|
||||
(fill-input port
|
||||
(cond
|
||||
((<= #xc2 first-byte #xdf) 2)
|
||||
((= (logand first-byte #xf0) #xe0) 3)
|
||||
(else 4))))
|
||||
(lambda (buf buffering)
|
||||
(let* ((bv (port-buffer-bytevector buf))
|
||||
(cur (port-buffer-cur buf)))
|
||||
(define (bad-utf8)
|
||||
(let ((len (bad-utf8-len bv cur buffering first-byte)))
|
||||
(when (zero? len) (error "internal error"))
|
||||
(if (eq? (port-conversion-strategy port) 'substitute)
|
||||
(values #\? len)
|
||||
(decoding-error "peek-char" port))))
|
||||
(decode-utf8 bv cur buffering first-byte values bad-utf8))))))
|
||||
|
||||
(define-inlinable (peek-char-and-len/iso-8859-1 port first-byte)
|
||||
(define (peek-char-and-len/iso-8859-1 port first-byte)
|
||||
(values (integer->char first-byte) 1))
|
||||
|
||||
(define (peek-char-and-len/iconv port first-byte)
|
||||
|
@ -398,25 +438,33 @@ interpret its input and output."
|
|||
(else
|
||||
(lp input-size))))))
|
||||
|
||||
(define-inlinable (peek-char-and-len port)
|
||||
(define (peek-char-and-len port)
|
||||
(let ((first-byte (peek-byte port)))
|
||||
(if (eq? first-byte the-eof-object)
|
||||
(values first-byte 0)
|
||||
(let ((first-byte (logand first-byte #xff)))
|
||||
(case (%port-encoding port)
|
||||
((UTF-8)
|
||||
(if (< first-byte #x80)
|
||||
(values (integer->char first-byte) 1)
|
||||
(peek-char-and-len/utf8 port first-byte)))
|
||||
((ISO-8859-1)
|
||||
(peek-char-and-len/iso-8859-1 port first-byte))
|
||||
(else
|
||||
(peek-char-and-len/iconv port first-byte)))))))
|
||||
(if (not first-byte)
|
||||
(values the-eof-object 0)
|
||||
(case (%port-encoding port)
|
||||
((UTF-8)
|
||||
(peek-char-and-len/utf8 port first-byte))
|
||||
((ISO-8859-1)
|
||||
(peek-char-and-len/iso-8859-1 port first-byte))
|
||||
(else
|
||||
(peek-char-and-len/iconv port first-byte))))))
|
||||
|
||||
(define (%peek-char port)
|
||||
(call-with-values (lambda () (peek-char-and-len port))
|
||||
(lambda (char len)
|
||||
char)))
|
||||
(define (slow-path)
|
||||
(call-with-values (lambda () (peek-char-and-len port))
|
||||
(lambda (char len)
|
||||
char)))
|
||||
(define (fast-path buf bv cur buffered)
|
||||
(let ((u8 (bytevector-u8-ref bv cur))
|
||||
(enc (%port-encoding port)))
|
||||
(case enc
|
||||
((UTF-8) (decode-utf8 bv cur buffered u8 (lambda (char len) char)
|
||||
slow-path))
|
||||
((ISO-8859-1) (integer->char u8))
|
||||
(else (slow-path)))))
|
||||
(peek-bytes port 1 fast-path
|
||||
(lambda (buf bv cur buffered) (slow-path))))
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue