1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Port refactors to help Scheme peek-char

* libguile/ports.h (scm_sys_port_encoding, scm_sys_set_port_encoding):
  New functions, to expose port encodings as symbols directly to (ice-9
  ports).
  (scm_port_maybe_consume_initial_byte_order_mark): New function.
* libguile/ports.c (scm_port_encoding): Dispatch to %port-encoding.
  (scm_set_port_encoding_x): Dispatch to %set-port-encoding!.
  (port_maybe_consume_initial_byte_order_mark): New helper, factored out
  of peek_codepoint.
  (scm_port_maybe_consume_initial_byte_order_mark, peek_codepoint): Call
  port_maybe_consume_initial_byte_order_mark.
* module/ice-9/ports.scm (port-encoding): Implement in Scheme.
This commit is contained in:
Andy Wingo 2016-05-04 11:40:22 +02:00
parent 383df7976f
commit 36e32138f8
3 changed files with 77 additions and 33 deletions

View file

@ -1222,40 +1222,48 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
close_iconv_descriptors (prev); close_iconv_descriptors (prev);
} }
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
(SCM port), (SCM port),
"Returns, as a string, the character encoding that @var{port}\n" "Returns, as a symbol, the character encoding that @var{port}\n"
"uses to interpret its input and output.\n") "uses to interpret its input and output.\n")
#define FUNC_NAME s_scm_port_encoding #define FUNC_NAME s_scm_sys_port_encoding
{ {
SCM_VALIDATE_PORT (1, port); SCM_VALIDATE_PORT (1, port);
return scm_symbol_to_string (SCM_PTAB_ENTRY (port)->encoding); return SCM_PTAB_ENTRY (port)->encoding;
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, SCM
scm_port_encoding (SCM port)
{
return scm_symbol_to_string (scm_sys_port_encoding (port));
}
SCM_DEFINE (scm_sys_set_port_encoding_x, "%set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc), (SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n" "Sets the character encoding that will be used to interpret all\n"
"port I/O. New ports are created with the encoding\n" "port I/O. New ports are created with the encoding\n"
"appropriate for the current locale if @code{setlocale} has \n" "appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n" "been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n") "and this procedure can be used to modify that encoding.\n")
#define FUNC_NAME s_scm_set_port_encoding_x #define FUNC_NAME s_scm_sys_set_port_encoding_x
{ {
char *enc_str;
SCM_VALIDATE_PORT (1, port); SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc); SCM_VALIDATE_SYMBOL (2, enc);
enc_str = scm_to_latin1_string (enc); scm_i_set_port_encoding_x (port, scm_i_symbol_chars (enc));
scm_i_set_port_encoding_x (port, enc_str);
free (enc_str);
return SCM_UNSPECIFIED; return SCM_UNSPECIFIED;
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM
scm_set_port_encoding_x (SCM port, SCM enc)
{
return scm_sys_set_port_encoding_x (port, scm_string_to_symbol (enc));
}
scm_t_string_failed_conversion_handler scm_t_string_failed_conversion_handler
scm_i_string_failed_conversion_handler (SCM conversion_strategy) scm_i_string_failed_conversion_handler (SCM conversion_strategy)
{ {
@ -1545,6 +1553,50 @@ scm_c_read (SCM port, void *buffer, size_t size)
} }
#undef FUNC_NAME #undef FUNC_NAME
static int
port_maybe_consume_initial_byte_order_mark (SCM port, scm_t_wchar codepoint,
size_t len)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
if (!pti->at_stream_start_for_bom_read) return 0;
/* Record that we're no longer at stream start. */
pti->at_stream_start_for_bom_read = 0;
if (pt->rw_random)
pti->at_stream_start_for_bom_write = 0;
if (codepoint != SCM_UNICODE_BOM) return 0;
/* If we just read a BOM in an encoding that recognizes them, then
silently consume it. */
if (scm_is_eq (pt->encoding, sym_UTF_8)
|| scm_is_eq (pt->encoding, sym_UTF_16)
|| scm_is_eq (pt->encoding, sym_UTF_32))
{
scm_port_buffer_did_take (pt->read_buf, len);
return 1;
}
return 0;
}
SCM_DEFINE (scm_port_maybe_consume_initial_byte_order_mark,
"port-maybe-consume-initial-byte-order-mark", 3, 0, 0,
(SCM port, SCM codepoint, SCM len),
"")
#define FUNC_NAME s_scm_port_maybe_consume_initial_byte_order_mark
{
SCM_VALIDATE_PORT (1, port);
return scm_from_bool
(port_maybe_consume_initial_byte_order_mark
(port,
SCM_CHARP (codepoint) ? SCM_CHAR (codepoint) : EOF,
scm_to_size_t (len)));
}
#undef FUNC_NAME
/* Update the line and column number of PORT after consumption of C. */ /* Update the line and column number of PORT after consumption of C. */
static inline void static inline void
update_port_lf (scm_t_wchar c, SCM port) update_port_lf (scm_t_wchar c, SCM port)
@ -1807,7 +1859,6 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
{ {
int err; int err;
scm_t_port *pt = SCM_PTAB_ENTRY (port); scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
if (scm_is_eq (pt->encoding, sym_UTF_8)) if (scm_is_eq (pt->encoding, sym_UTF_8))
err = peek_utf8_codepoint (port, codepoint, len); err = peek_utf8_codepoint (port, codepoint, len);
@ -1818,25 +1869,8 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
if (SCM_LIKELY (err == 0)) if (SCM_LIKELY (err == 0))
{ {
if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read)) if (port_maybe_consume_initial_byte_order_mark (port, *codepoint, *len))
{ return peek_codepoint (port, codepoint, len);
/* Record that we're no longer at stream start. */
pti->at_stream_start_for_bom_read = 0;
if (pt->rw_random)
pti->at_stream_start_for_bom_write = 0;
/* If we just read a BOM in an encoding that recognizes them,
then silently consume it and read another code point. */
if (SCM_UNLIKELY
(*codepoint == SCM_UNICODE_BOM
&& (scm_is_eq (pt->encoding, sym_UTF_8)
|| scm_is_eq (pt->encoding, sym_UTF_16)
|| scm_is_eq (pt->encoding, sym_UTF_32))))
{
scm_port_buffer_did_take (pt->read_buf, *len);
return peek_codepoint (port, codepoint, len);
}
}
} }
else if (scm_is_eq (pt->conversion_strategy, sym_substitute)) else if (scm_is_eq (pt->conversion_strategy, sym_substitute))
{ {
@ -3162,7 +3196,7 @@ scm_init_ports (void)
/* The following bindings are used early in boot-9.scm. */ /* The following bindings are used early in boot-9.scm. */
/* Used by `include'. */ /* Used by `include'. */
scm_c_define_gsubr (s_scm_set_port_encoding_x, 2, 0, 0, scm_c_define_gsubr ("set-port-encoding!", 2, 0, 0,
(scm_t_subr) scm_set_port_encoding_x); (scm_t_subr) scm_set_port_encoding_x);
scm_c_define_gsubr (s_scm_eof_object_p, 1, 0, 0, scm_c_define_gsubr (s_scm_eof_object_p, 1, 0, 0,
(scm_t_subr) scm_eof_object_p); (scm_t_subr) scm_eof_object_p);

View file

@ -285,12 +285,15 @@ SCM_INTERNAL void scm_i_set_default_port_encoding (const char *encoding);
SCM_INTERNAL SCM scm_i_default_port_conversion_strategy (void); SCM_INTERNAL SCM scm_i_default_port_conversion_strategy (void);
SCM_INTERNAL void scm_i_set_default_port_conversion_strategy (SCM strategy); SCM_INTERNAL void scm_i_set_default_port_conversion_strategy (SCM strategy);
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str); SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
SCM_INTERNAL SCM scm_sys_port_encoding (SCM port);
SCM_INTERNAL SCM scm_sys_set_port_encoding_x (SCM port, SCM encoding);
SCM_API SCM scm_port_encoding (SCM port); SCM_API SCM scm_port_encoding (SCM port);
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding); SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
SCM_API SCM scm_port_conversion_strategy (SCM port); SCM_API SCM scm_port_conversion_strategy (SCM port);
SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior); SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
/* Input. */ /* Input. */
SCM_INTERNAL SCM scm_port_maybe_consume_initial_byte_order_mark (SCM, SCM, SCM);
SCM_API int scm_get_byte_or_eof (SCM port); SCM_API int scm_get_byte_or_eof (SCM port);
SCM_API int scm_peek_byte_or_eof (SCM port); SCM_API int scm_peek_byte_or_eof (SCM port);
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size); SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);

View file

@ -154,6 +154,13 @@
(define (port-encoding port)
"Return, as a string, the character encoding that @var{port} uses to
interpret its input and output."
(symbol->string (%port-encoding port)))
(define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0)) (define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0))
(define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1)) (define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1))
(define-syntax-rule (port-buffer-end buf) (vector-ref buf 2)) (define-syntax-rule (port-buffer-end buf) (vector-ref buf 2))