1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Port refactors to help Scheme peek-char

* libguile/ports.h (scm_sys_port_encoding, scm_sys_set_port_encoding):
  New functions, to expose port encodings as symbols directly to (ice-9
  ports).
  (scm_port_maybe_consume_initial_byte_order_mark): New function.
* libguile/ports.c (scm_port_encoding): Dispatch to %port-encoding.
  (scm_set_port_encoding_x): Dispatch to %set-port-encoding!.
  (port_maybe_consume_initial_byte_order_mark): New helper, factored out
  of peek_codepoint.
  (scm_port_maybe_consume_initial_byte_order_mark, peek_codepoint): Call
  port_maybe_consume_initial_byte_order_mark.
* module/ice-9/ports.scm (port-encoding): Implement in Scheme.
This commit is contained in:
Andy Wingo 2016-05-04 11:40:22 +02:00
parent 383df7976f
commit 36e32138f8
3 changed files with 77 additions and 33 deletions

View file

@ -1222,40 +1222,48 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
close_iconv_descriptors (prev);
}
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
(SCM port),
"Returns, as a string, the character encoding that @var{port}\n"
"Returns, as a symbol, the character encoding that @var{port}\n"
"uses to interpret its input and output.\n")
#define FUNC_NAME s_scm_port_encoding
#define FUNC_NAME s_scm_sys_port_encoding
{
SCM_VALIDATE_PORT (1, port);
return scm_symbol_to_string (SCM_PTAB_ENTRY (port)->encoding);
return SCM_PTAB_ENTRY (port)->encoding;
}
#undef FUNC_NAME
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
SCM
scm_port_encoding (SCM port)
{
return scm_symbol_to_string (scm_sys_port_encoding (port));
}
SCM_DEFINE (scm_sys_set_port_encoding_x, "%set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n"
"port I/O. New ports are created with the encoding\n"
"appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n")
#define FUNC_NAME s_scm_set_port_encoding_x
#define FUNC_NAME s_scm_sys_set_port_encoding_x
{
char *enc_str;
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc);
SCM_VALIDATE_SYMBOL (2, enc);
enc_str = scm_to_latin1_string (enc);
scm_i_set_port_encoding_x (port, enc_str);
free (enc_str);
scm_i_set_port_encoding_x (port, scm_i_symbol_chars (enc));
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
SCM
scm_set_port_encoding_x (SCM port, SCM enc)
{
return scm_sys_set_port_encoding_x (port, scm_string_to_symbol (enc));
}
scm_t_string_failed_conversion_handler
scm_i_string_failed_conversion_handler (SCM conversion_strategy)
{
@ -1545,6 +1553,50 @@ scm_c_read (SCM port, void *buffer, size_t size)
}
#undef FUNC_NAME
static int
port_maybe_consume_initial_byte_order_mark (SCM port, scm_t_wchar codepoint,
size_t len)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
if (!pti->at_stream_start_for_bom_read) return 0;
/* Record that we're no longer at stream start. */
pti->at_stream_start_for_bom_read = 0;
if (pt->rw_random)
pti->at_stream_start_for_bom_write = 0;
if (codepoint != SCM_UNICODE_BOM) return 0;
/* If we just read a BOM in an encoding that recognizes them, then
silently consume it. */
if (scm_is_eq (pt->encoding, sym_UTF_8)
|| scm_is_eq (pt->encoding, sym_UTF_16)
|| scm_is_eq (pt->encoding, sym_UTF_32))
{
scm_port_buffer_did_take (pt->read_buf, len);
return 1;
}
return 0;
}
SCM_DEFINE (scm_port_maybe_consume_initial_byte_order_mark,
"port-maybe-consume-initial-byte-order-mark", 3, 0, 0,
(SCM port, SCM codepoint, SCM len),
"")
#define FUNC_NAME s_scm_port_maybe_consume_initial_byte_order_mark
{
SCM_VALIDATE_PORT (1, port);
return scm_from_bool
(port_maybe_consume_initial_byte_order_mark
(port,
SCM_CHARP (codepoint) ? SCM_CHAR (codepoint) : EOF,
scm_to_size_t (len)));
}
#undef FUNC_NAME
/* Update the line and column number of PORT after consumption of C. */
static inline void
update_port_lf (scm_t_wchar c, SCM port)
@ -1807,7 +1859,6 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
{
int err;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
if (scm_is_eq (pt->encoding, sym_UTF_8))
err = peek_utf8_codepoint (port, codepoint, len);
@ -1818,25 +1869,8 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
if (SCM_LIKELY (err == 0))
{
if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
{
/* Record that we're no longer at stream start. */
pti->at_stream_start_for_bom_read = 0;
if (pt->rw_random)
pti->at_stream_start_for_bom_write = 0;
/* If we just read a BOM in an encoding that recognizes them,
then silently consume it and read another code point. */
if (SCM_UNLIKELY
(*codepoint == SCM_UNICODE_BOM
&& (scm_is_eq (pt->encoding, sym_UTF_8)
|| scm_is_eq (pt->encoding, sym_UTF_16)
|| scm_is_eq (pt->encoding, sym_UTF_32))))
{
scm_port_buffer_did_take (pt->read_buf, *len);
return peek_codepoint (port, codepoint, len);
}
}
if (port_maybe_consume_initial_byte_order_mark (port, *codepoint, *len))
return peek_codepoint (port, codepoint, len);
}
else if (scm_is_eq (pt->conversion_strategy, sym_substitute))
{
@ -3162,7 +3196,7 @@ scm_init_ports (void)
/* The following bindings are used early in boot-9.scm. */
/* Used by `include'. */
scm_c_define_gsubr (s_scm_set_port_encoding_x, 2, 0, 0,
scm_c_define_gsubr ("set-port-encoding!", 2, 0, 0,
(scm_t_subr) scm_set_port_encoding_x);
scm_c_define_gsubr (s_scm_eof_object_p, 1, 0, 0,
(scm_t_subr) scm_eof_object_p);

View file

@ -285,12 +285,15 @@ SCM_INTERNAL void scm_i_set_default_port_encoding (const char *encoding);
SCM_INTERNAL SCM scm_i_default_port_conversion_strategy (void);
SCM_INTERNAL void scm_i_set_default_port_conversion_strategy (SCM strategy);
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
SCM_INTERNAL SCM scm_sys_port_encoding (SCM port);
SCM_INTERNAL SCM scm_sys_set_port_encoding_x (SCM port, SCM encoding);
SCM_API SCM scm_port_encoding (SCM port);
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
SCM_API SCM scm_port_conversion_strategy (SCM port);
SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
/* Input. */
SCM_INTERNAL SCM scm_port_maybe_consume_initial_byte_order_mark (SCM, SCM, SCM);
SCM_API int scm_get_byte_or_eof (SCM port);
SCM_API int scm_peek_byte_or_eof (SCM port);
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);

View file

@ -154,6 +154,13 @@
(define (port-encoding port)
"Return, as a string, the character encoding that @var{port} uses to
interpret its input and output."
(symbol->string (%port-encoding port)))
(define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0))
(define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1))
(define-syntax-rule (port-buffer-end buf) (vector-ref buf 2))