mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
Port to Scheme of new BOM handling
* libguile/ports.c (scm_specialize_port_encoding_x) (scm_port_clear_stream_start_for_bom_read): New functions exported to (ice-9 ports). * module/ice-9/ports.scm (clear-stream-start-for-bom-read): (fill-input, peek-char-and-len): Rework to handle BOM in fill-input instead of once per peek-char.
This commit is contained in:
parent
86267af8b3
commit
6d15a71e8f
2 changed files with 99 additions and 13 deletions
|
@ -1137,6 +1137,22 @@ prepare_iconv_descriptors (SCM port, SCM encoding)
|
|||
(encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port));
|
||||
}
|
||||
|
||||
SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
|
||||
SCM_DEFINE (scm_specialize_port_encoding_x,
|
||||
"specialize-port-encoding!", 2, 0, 0,
|
||||
(SCM port, SCM encoding),
|
||||
"")
|
||||
#define FUNC_NAME s_scm_specialize_port_encoding_x
|
||||
{
|
||||
SCM_VALIDATE_PORT (1, port);
|
||||
SCM_VALIDATE_SYMBOL (2, encoding);
|
||||
|
||||
prepare_iconv_descriptors (port, encoding);
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
scm_t_iconv_descriptors *
|
||||
scm_i_port_iconv_descriptors (SCM port)
|
||||
{
|
||||
|
@ -2351,6 +2367,39 @@ port_clear_stream_start_for_bom_read (SCM port, enum bom_io_mode io_mode)
|
|||
return 0;
|
||||
}
|
||||
|
||||
SCM_INTERNAL SCM scm_port_clear_stream_start_for_bom_read (SCM port);
|
||||
SCM_DEFINE (scm_port_clear_stream_start_for_bom_read,
|
||||
"port-clear-stream-start-for-bom-read", 1, 0, 0,
|
||||
(SCM port),
|
||||
"")
|
||||
#define FUNC_NAME s_scm_port_clear_stream_start_for_bom_read
|
||||
{
|
||||
scm_t_port_internal *pti;
|
||||
scm_t_port *pt;
|
||||
|
||||
SCM_VALIDATE_PORT (1, port);
|
||||
|
||||
pti = SCM_PORT_GET_INTERNAL (port);
|
||||
if (!pti->at_stream_start_for_bom_read)
|
||||
return 0;
|
||||
|
||||
/* Maybe slurp off a byte-order marker. */
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
pti->at_stream_start_for_bom_read = 0;
|
||||
|
||||
if (!pti->at_stream_start_for_bom_read)
|
||||
return SCM_BOOL_F;
|
||||
|
||||
/* Maybe slurp off a byte-order marker. */
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
pti->at_stream_start_for_bom_read = 0;
|
||||
if (pt->rw_random)
|
||||
pti->at_stream_start_for_bom_write = 0;
|
||||
|
||||
return SCM_BOOL_T;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
static void
|
||||
port_clear_stream_start_for_bom_write (SCM port, enum bom_io_mode io_mode)
|
||||
{
|
||||
|
|
|
@ -203,7 +203,50 @@ interpret its input and output."
|
|||
(error "bad return from port read function" read))
|
||||
read))
|
||||
|
||||
(define utf8-bom #vu8(#xEF #xBB #xBF))
|
||||
(define utf16be-bom #vu8(#xFE #xFF))
|
||||
(define utf16le-bom #vu8(#xFF #xFE))
|
||||
(define utf32be-bom #vu8(#x00 #x00 #xFE #xFF))
|
||||
(define utf32le-bom #vu8(#xFF #xFE #x00 #x00))
|
||||
|
||||
(define (clear-stream-start-for-bom-read port io-mode)
|
||||
(define (maybe-consume-bom bom)
|
||||
(and (eq? (peek-byte port) (bytevector-u8-ref bom 0))
|
||||
(let* ((buf (fill-input port (bytevector-length bom)))
|
||||
(bv (port-buffer-bytevector buf))
|
||||
(cur (port-buffer-cur bv)))
|
||||
(and (<= (bytevector-length bv)
|
||||
(- (port-buffer-end buf) cur))
|
||||
(let lp ((i 1))
|
||||
(if (= i (bytevector-length bom))
|
||||
(begin
|
||||
(set-port-buffer-cur! buf (+ cur i))
|
||||
#t)
|
||||
(and (eq? (bytevector-u8-ref bv (+ cur i))
|
||||
(bytevector-u8-ref bom i))
|
||||
(lp (1+ i)))))))))
|
||||
(when (and (port-clear-stream-start-for-bom-read port)
|
||||
(eq? io-mode 'text))
|
||||
(case (port-encoding port)
|
||||
((UTF-8)
|
||||
(maybe-consume-bom utf8-bom))
|
||||
((UTF-16)
|
||||
(cond
|
||||
((maybe-consume-bom utf16le-bom)
|
||||
(specialize-port-encoding! port 'UTF-16LE))
|
||||
(else
|
||||
(maybe-consume-bom utf16be-bom)
|
||||
(specialize-port-encoding! port 'UTF-16BE))))
|
||||
((UTF-32)
|
||||
(cond
|
||||
((maybe-consume-bom utf32le-bom)
|
||||
(specialize-port-encoding! port 'UTF-32LE))
|
||||
(else
|
||||
(maybe-consume-bom utf32be-bom)
|
||||
(specialize-port-encoding! port 'UTF-32BE)))))))
|
||||
|
||||
(define* (fill-input port #:optional (minimum-buffering 1))
|
||||
(clear-stream-start-for-bom-read port 'text)
|
||||
(let* ((buf (port-read-buffer port))
|
||||
(cur (port-buffer-cur buf))
|
||||
(buffered (- (port-buffer-end buf) cur)))
|
||||
|
@ -360,19 +403,13 @@ interpret its input and output."
|
|||
(if (eq? first-byte the-eof-object)
|
||||
(values first-byte 0)
|
||||
(let ((first-byte (logand first-byte #xff)))
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(case (%port-encoding port)
|
||||
((UTF-8)
|
||||
(peek-char-and-len/utf8 port first-byte))
|
||||
((ISO-8859-1)
|
||||
(peek-char-and-len/iso-8859-1 port first-byte))
|
||||
(else
|
||||
(peek-char-and-len/iconv port first-byte))))
|
||||
(lambda (char len)
|
||||
(if (port-maybe-consume-initial-byte-order-mark port char len)
|
||||
(peek-char-and-len port)
|
||||
(values char len))))))))
|
||||
(peek-char-and-len/iconv port first-byte)))))))
|
||||
|
||||
(define (%peek-char port)
|
||||
(call-with-values (lambda () (peek-char-and-len port))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue