1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 19:50:24 +02:00

Initial peek-char implementation in Scheme

* module/ice-9/ports.scm (EILSEQ, decoding-error, peek-char-and-len/utf8):
  (peek-char-and-len/iso-8859-1, peek-char-and-len/iconv):
  (peek-char-and-len, %peek-char): New definitions.  Missing iconv1 for
  peek-char, but enough to benchmark.
This commit is contained in:
Andy Wingo 2016-05-04 11:48:05 +02:00
parent 36e32138f8
commit 2ba638092f

View file

@ -255,6 +255,129 @@ interpret its input and output."
(bytevector-u8-ref (port-buffer-bytevector buf)
(port-buffer-cur buf))))))))
;; GNU/Linux definition; fixme?
(define-syntax EILSEQ (identifier-syntax 84))
(define-syntax-rule (decoding-error subr port)
(throw 'decoding-error subr "input decoding error" EILSEQ port))
(define-inlinable (peek-char-and-len/utf8 port)
(define (bad-utf8 len)
(if (eq? (port-conversion-strategy port) 'substitute)
(values #\? len)
(decoding-error "peek-char" port)))
(let ((first-byte (peek-byte port)))
(cond
((eq? first-byte the-eof-object)
(values first-byte 0))
((< first-byte #x80)
(values (integer->char first-byte) 1))
((<= #xc2 first-byte #xdf)
(call-with-values (lambda () (fill-input port 2))
(lambda (buf buffering)
(let ((bv (port-buffer-bytevector buf))
(cur (port-buffer-cur buf)))
(define (ref n)
(bytevector-u8-ref bv (+ cur 1)))
(when (or (< buffering 2)
(not (= (logand (ref 1) #xc0) #x80)))
(bad-utf8 1))
(values (integer->char
(logior (ash (logand first-byte #x1f) 6)
(logand (ref 1) #x3f)))
2)))))
((= (logand first-byte #xf0) #xe0)
(call-with-values (lambda () (fill-input port 3))
(lambda (buf buffering)
(let ((bv (port-buffer-bytevector buf))
(cur (port-buffer-cur buf)))
(define (ref n)
(bytevector-u8-ref bv (+ cur 1)))
(when (or (< buffering 2)
(not (= (logand (ref 1) #xc0) #x80))
(and (eq? first-byte #xe0) (< (ref 1) #xa0))
(and (eq? first-byte #xed) (< (ref 1) #x9f)))
(bad-utf8 1))
(when (or (< buffering 3)
(not (= (logand (ref 2) #xc0) #x80)))
(bad-utf8 2))
(values (integer->char
(logior (ash (logand first-byte #x0f) 12)
(ash (logand (ref 1) #x3f) 6)
(logand (ref 2) #x3f)))
3)))))
((<= #xf0 first-byte #xf4)
(call-with-values (lambda () (fill-input port 4))
(lambda (buf buffering)
(let ((bv (port-buffer-bytevector buf))
(cur (port-buffer-cur buf)))
(define (ref n)
(bytevector-u8-ref bv (+ cur 1)))
(when (or (< buffering 2)
(not (= (logand (ref 1) #xc0) #x80))
(and (eq? first-byte #xf0) (< (ref 1) #x90))
(and (eq? first-byte #xf4) (< (ref 1) #x8f)))
(bad-utf8 1))
(when (or (< buffering 3)
(not (= (logand (ref 2) #xc0) #x80)))
(bad-utf8 2))
(when (or (< buffering 4)
(not (= (logand (ref 3) #xc0) #x80)))
(bad-utf8 3))
(values (integer->char
(logior (ash (logand first-byte #x07) 18)
(ash (logand (ref 1) #x3f) 12)
(ash (logand (ref 2) #x3f) 6)
(logand (ref 3) #x3f)))
4)))))
(else
(bad-utf8 1)))))
(define-inlinable (peek-char-and-len/iso-8859-1 port)
(let ((byte-or-eof (peek-byte port)))
(if (eof-object? byte-or-eof)
(values byte-or-eof 0)
(values (integer->char byte-or-eof) 1))))
(define (peek-char-and-len/iconv port)
(define (bad-input len)
(if (eq? (port-conversion-strategy port) 'substitute)
(values #\? len)
(decoding-error "peek-char" port)))
(let lp ((prev-input-size 0))
(let* ((input-size (1+ prev-input-size))
(buf (fill-input port input-size))
(cur (port-buffer-cur buf)))
(cond
((<= (- (port-buffer-end buf) cur) prev-input-size)
(if (zero? prev-input-size)
(values the-eof-object 0)
(bad-input prev-input-size)))
;; fixme: takes port arg???
((iconv1 port (port-buffer-bytevector buf) cur input-size
(port-conversion-strategy port))
=> (lambda (char)
(values char input-size)))
(else
(lp input-size))))))
(define-inlinable (peek-char-and-len port)
(let ((enc (%port-encoding port)))
(call-with-values
(lambda ()
(case enc
((UTF-8) (peek-char-and-len/utf8 port))
((ISO-8859-1) (peek-char-and-len/iso-8859-1 port))
(else (peek-char-and-len/iconv port))))
(lambda (char len)
(if (port-maybe-consume-initial-byte-order-mark port char len)
(peek-char-and-len port)
(values char len))))))
(define (%peek-char port)
(call-with-values (lambda () (peek-char-and-len port))
(lambda (char len)
char)))