1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 19:50:24 +02:00

Port to Scheme of new BOM handling

* libguile/ports.c (scm_specialize_port_encoding_x)
  (scm_port_clear_stream_start_for_bom_read): New functions exported
  to (ice-9 ports).
* module/ice-9/ports.scm (clear-stream-start-for-bom-read):
  (fill-input, peek-char-and-len): Rework to handle BOM in fill-input
  instead of once per peek-char.
This commit is contained in:
Andy Wingo 2016-05-05 22:54:58 +02:00
parent 86267af8b3
commit 6d15a71e8f
2 changed files with 99 additions and 13 deletions

View file

@ -1137,6 +1137,22 @@ prepare_iconv_descriptors (SCM port, SCM encoding)
(encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port)); (encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port));
} }
SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
SCM_DEFINE (scm_specialize_port_encoding_x,
"specialize-port-encoding!", 2, 0, 0,
(SCM port, SCM encoding),
"")
#define FUNC_NAME s_scm_specialize_port_encoding_x
{
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_SYMBOL (2, encoding);
prepare_iconv_descriptors (port, encoding);
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
scm_t_iconv_descriptors * scm_t_iconv_descriptors *
scm_i_port_iconv_descriptors (SCM port) scm_i_port_iconv_descriptors (SCM port)
{ {
@ -2351,6 +2367,39 @@ port_clear_stream_start_for_bom_read (SCM port, enum bom_io_mode io_mode)
return 0; return 0;
} }
SCM_INTERNAL SCM scm_port_clear_stream_start_for_bom_read (SCM port);
SCM_DEFINE (scm_port_clear_stream_start_for_bom_read,
"port-clear-stream-start-for-bom-read", 1, 0, 0,
(SCM port),
"")
#define FUNC_NAME s_scm_port_clear_stream_start_for_bom_read
{
scm_t_port_internal *pti;
scm_t_port *pt;
SCM_VALIDATE_PORT (1, port);
pti = SCM_PORT_GET_INTERNAL (port);
if (!pti->at_stream_start_for_bom_read)
return 0;
/* Maybe slurp off a byte-order marker. */
pt = SCM_PTAB_ENTRY (port);
pti->at_stream_start_for_bom_read = 0;
if (!pti->at_stream_start_for_bom_read)
return SCM_BOOL_F;
/* Maybe slurp off a byte-order marker. */
pt = SCM_PTAB_ENTRY (port);
pti->at_stream_start_for_bom_read = 0;
if (pt->rw_random)
pti->at_stream_start_for_bom_write = 0;
return SCM_BOOL_T;
}
#undef FUNC_NAME
static void static void
port_clear_stream_start_for_bom_write (SCM port, enum bom_io_mode io_mode) port_clear_stream_start_for_bom_write (SCM port, enum bom_io_mode io_mode)
{ {

View file

@ -203,7 +203,50 @@ interpret its input and output."
(error "bad return from port read function" read)) (error "bad return from port read function" read))
read)) read))
(define utf8-bom #vu8(#xEF #xBB #xBF))
(define utf16be-bom #vu8(#xFE #xFF))
(define utf16le-bom #vu8(#xFF #xFE))
(define utf32be-bom #vu8(#x00 #x00 #xFE #xFF))
(define utf32le-bom #vu8(#xFF #xFE #x00 #x00))
(define (clear-stream-start-for-bom-read port io-mode)
(define (maybe-consume-bom bom)
(and (eq? (peek-byte port) (bytevector-u8-ref bom 0))
(let* ((buf (fill-input port (bytevector-length bom)))
(bv (port-buffer-bytevector buf))
(cur (port-buffer-cur bv)))
(and (<= (bytevector-length bv)
(- (port-buffer-end buf) cur))
(let lp ((i 1))
(if (= i (bytevector-length bom))
(begin
(set-port-buffer-cur! buf (+ cur i))
#t)
(and (eq? (bytevector-u8-ref bv (+ cur i))
(bytevector-u8-ref bom i))
(lp (1+ i)))))))))
(when (and (port-clear-stream-start-for-bom-read port)
(eq? io-mode 'text))
(case (port-encoding port)
((UTF-8)
(maybe-consume-bom utf8-bom))
((UTF-16)
(cond
((maybe-consume-bom utf16le-bom)
(specialize-port-encoding! port 'UTF-16LE))
(else
(maybe-consume-bom utf16be-bom)
(specialize-port-encoding! port 'UTF-16BE))))
((UTF-32)
(cond
((maybe-consume-bom utf32le-bom)
(specialize-port-encoding! port 'UTF-32LE))
(else
(maybe-consume-bom utf32be-bom)
(specialize-port-encoding! port 'UTF-32BE)))))))
(define* (fill-input port #:optional (minimum-buffering 1)) (define* (fill-input port #:optional (minimum-buffering 1))
(clear-stream-start-for-bom-read port 'text)
(let* ((buf (port-read-buffer port)) (let* ((buf (port-read-buffer port))
(cur (port-buffer-cur buf)) (cur (port-buffer-cur buf))
(buffered (- (port-buffer-end buf) cur))) (buffered (- (port-buffer-end buf) cur)))
@ -360,19 +403,13 @@ interpret its input and output."
(if (eq? first-byte the-eof-object) (if (eq? first-byte the-eof-object)
(values first-byte 0) (values first-byte 0)
(let ((first-byte (logand first-byte #xff))) (let ((first-byte (logand first-byte #xff)))
(call-with-values
(lambda ()
(case (%port-encoding port) (case (%port-encoding port)
((UTF-8) ((UTF-8)
(peek-char-and-len/utf8 port first-byte)) (peek-char-and-len/utf8 port first-byte))
((ISO-8859-1) ((ISO-8859-1)
(peek-char-and-len/iso-8859-1 port first-byte)) (peek-char-and-len/iso-8859-1 port first-byte))
(else (else
(peek-char-and-len/iconv port first-byte)))) (peek-char-and-len/iconv port first-byte)))))))
(lambda (char len)
(if (port-maybe-consume-initial-byte-order-mark port char len)
(peek-char-and-len port)
(values char len))))))))
(define (%peek-char port) (define (%peek-char port)
(call-with-values (lambda () (peek-char-and-len port)) (call-with-values (lambda () (peek-char-and-len port))