mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-10 14:00:21 +02:00
port i/o optimizations for iso-8859-1
* libguile/ports.h (scm_t_port_encoding_mode): * libguile/ports.c (scm_c_make_port_with_encoding): (scm_i_set_port_encoding_x): Add special treatment for latin1 encoding. (get_latin1_codepoint, get_codepoint): Add latin1 fast-path. * libguile/print.c (display_string_as_latin1): Add latin1 fastpath.
This commit is contained in:
parent
2dcf6b5965
commit
79eb47ea47
3 changed files with 83 additions and 6 deletions
|
@ -605,6 +605,8 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
|
|||
entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
|
||||
if (encoding && strcmp (encoding, "UTF-8") == 0)
|
||||
entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
|
||||
else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
|
||||
entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
|
||||
else
|
||||
entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
|
||||
entry->ilseq_handler = handler;
|
||||
|
@ -941,15 +943,18 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
|
|||
pt = SCM_PTAB_ENTRY (port);
|
||||
prev = pt->iconv_descriptors;
|
||||
|
||||
if (encoding == NULL)
|
||||
encoding = "ISO-8859-1";
|
||||
|
||||
if (strcmp (encoding, "UTF-8") == 0)
|
||||
if (encoding && strcmp (encoding, "UTF-8") == 0)
|
||||
{
|
||||
pt->encoding = "UTF-8";
|
||||
pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
|
||||
pt->iconv_descriptors = NULL;
|
||||
}
|
||||
else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
|
||||
{
|
||||
pt->encoding = "ISO-8859-1";
|
||||
pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
|
||||
pt->iconv_descriptors = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Open descriptors before mutating the port. */
|
||||
|
@ -1582,6 +1587,26 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
|
|||
#undef ASSERT_NOT_EOF
|
||||
}
|
||||
|
||||
/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
|
||||
*0 and set CODEPOINT to the codepoint that was read, fill BUF with
|
||||
*its UTF-8 representation, and set *LEN to the length in bytes.
|
||||
*Return `EILSEQ' on error. */
|
||||
static int
|
||||
get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
|
||||
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
||||
{
|
||||
*codepoint = scm_get_byte_or_eof_unlocked (port);
|
||||
|
||||
if (*codepoint == EOF)
|
||||
*len = 0;
|
||||
else
|
||||
{
|
||||
*len = 1;
|
||||
buf[0] = *codepoint;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Likewise, read a byte sequence from PORT, passing it through its
|
||||
input conversion descriptor. */
|
||||
static int
|
||||
|
@ -1662,6 +1687,8 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
|
|||
|
||||
if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
|
||||
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
|
||||
else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
|
||||
err = get_latin1_codepoint (port, codepoint, buf, len);
|
||||
else
|
||||
err = get_iconv_codepoint (port, codepoint, buf, len);
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ typedef enum scm_t_port_rw_active {
|
|||
|
||||
typedef enum scm_t_port_encoding_mode {
|
||||
SCM_PORT_ENCODING_MODE_UTF8,
|
||||
SCM_PORT_ENCODING_MODE_LATIN1,
|
||||
SCM_PORT_ENCODING_MODE_ICONV
|
||||
} scm_t_port_encoding_mode;
|
||||
|
||||
|
|
|
@ -853,6 +853,54 @@ display_string_as_utf8 (const void *str, int narrow_p, size_t len,
|
|||
return len;
|
||||
}
|
||||
|
||||
/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it
|
||||
is narrow if NARROW_P is true, wide otherwise. Return LEN. */
|
||||
static size_t
|
||||
display_string_as_latin1 (const void *str, int narrow_p, size_t len,
|
||||
SCM port,
|
||||
scm_t_string_failed_conversion_handler strategy)
|
||||
{
|
||||
size_t printed = 0;
|
||||
|
||||
if (narrow_p)
|
||||
{
|
||||
scm_lfwrite_unlocked (str, len, port);
|
||||
return len;
|
||||
}
|
||||
|
||||
while (printed < len)
|
||||
{
|
||||
char buf[256];
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < sizeof(buf) && printed < len; i++, printed++)
|
||||
{
|
||||
scm_t_wchar c = STR_REF (str, printed);
|
||||
|
||||
if (c < 256)
|
||||
buf[i] = c;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
scm_lfwrite_unlocked (buf, i, port);
|
||||
|
||||
if (i < sizeof(buf) && printed < len)
|
||||
{
|
||||
if (strategy == SCM_FAILED_CONVERSION_ERROR)
|
||||
break;
|
||||
else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
write_character_escaped (STR_REF (str, printed), 1, port);
|
||||
else
|
||||
/* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
|
||||
display_string ("?", 1, 1, port, strategy);
|
||||
printed++;
|
||||
}
|
||||
}
|
||||
|
||||
return printed;
|
||||
}
|
||||
|
||||
/* Convert STR through PORT's output conversion descriptor and write the
|
||||
output to PORT. Return the number of codepoints written. */
|
||||
static size_t
|
||||
|
@ -968,9 +1016,10 @@ display_string (const void *str, int narrow_p,
|
|||
|
||||
if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
|
||||
return display_string_as_utf8 (str, narrow_p, len, port);
|
||||
else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
|
||||
return display_string_as_latin1 (str, narrow_p, len, port, strategy);
|
||||
else
|
||||
return display_string_using_iconv (str, narrow_p, len,
|
||||
port, strategy);
|
||||
return display_string_using_iconv (str, narrow_p, len, port, strategy);
|
||||
}
|
||||
|
||||
/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue