1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-10 14:00:21 +02:00

port i/o optimizations for iso-8859-1

* libguile/ports.h (scm_t_port_encoding_mode):
* libguile/ports.c (scm_c_make_port_with_encoding):
  (scm_i_set_port_encoding_x): Add special treatment for latin1
  encoding.
  (get_latin1_codepoint, get_codepoint): Add latin1 fast-path.

* libguile/print.c (display_string_as_latin1): Add latin1 fastpath.
This commit is contained in:
Andy Wingo 2012-02-24 23:05:02 +01:00
parent 2dcf6b5965
commit 79eb47ea47
3 changed files with 83 additions and 6 deletions

View file

@ -605,6 +605,8 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
if (encoding && strcmp (encoding, "UTF-8") == 0)
entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
else
entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
entry->ilseq_handler = handler;
@ -941,15 +943,18 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
pt = SCM_PTAB_ENTRY (port);
prev = pt->iconv_descriptors;
if (encoding == NULL)
encoding = "ISO-8859-1";
if (strcmp (encoding, "UTF-8") == 0)
if (encoding && strcmp (encoding, "UTF-8") == 0)
{
pt->encoding = "UTF-8";
pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
pt->iconv_descriptors = NULL;
}
else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
{
pt->encoding = "ISO-8859-1";
pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
pt->iconv_descriptors = NULL;
}
else
{
/* Open descriptors before mutating the port. */
@ -1582,6 +1587,26 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
#undef ASSERT_NOT_EOF
}
/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
*0 and set CODEPOINT to the codepoint that was read, fill BUF with
*its UTF-8 representation, and set *LEN to the length in bytes.
*Return `EILSEQ' on error. */
static int
get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
*codepoint = scm_get_byte_or_eof_unlocked (port);
if (*codepoint == EOF)
*len = 0;
else
{
*len = 1;
buf[0] = *codepoint;
}
return 0;
}
/* Likewise, read a byte sequence from PORT, passing it through its
input conversion descriptor. */
static int
@ -1662,6 +1687,8 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
err = get_latin1_codepoint (port, codepoint, buf, len);
else
err = get_iconv_codepoint (port, codepoint, buf, len);

View file

@ -50,6 +50,7 @@ typedef enum scm_t_port_rw_active {
typedef enum scm_t_port_encoding_mode {
SCM_PORT_ENCODING_MODE_UTF8,
SCM_PORT_ENCODING_MODE_LATIN1,
SCM_PORT_ENCODING_MODE_ICONV
} scm_t_port_encoding_mode;

View file

@ -853,6 +853,54 @@ display_string_as_utf8 (const void *str, int narrow_p, size_t len,
return len;
}
/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it
is narrow if NARROW_P is true, wide otherwise. Return LEN. */
static size_t
display_string_as_latin1 (const void *str, int narrow_p, size_t len,
SCM port,
scm_t_string_failed_conversion_handler strategy)
{
size_t printed = 0;
if (narrow_p)
{
scm_lfwrite_unlocked (str, len, port);
return len;
}
while (printed < len)
{
char buf[256];
size_t i;
for (i = 0; i < sizeof(buf) && printed < len; i++, printed++)
{
scm_t_wchar c = STR_REF (str, printed);
if (c < 256)
buf[i] = c;
else
break;
}
scm_lfwrite_unlocked (buf, i, port);
if (i < sizeof(buf) && printed < len)
{
if (strategy == SCM_FAILED_CONVERSION_ERROR)
break;
else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
write_character_escaped (STR_REF (str, printed), 1, port);
else
/* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
display_string ("?", 1, 1, port, strategy);
printed++;
}
}
return printed;
}
/* Convert STR through PORT's output conversion descriptor and write the
output to PORT. Return the number of codepoints written. */
static size_t
@ -968,9 +1016,10 @@ display_string (const void *str, int narrow_p,
if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
return display_string_as_utf8 (str, narrow_p, len, port);
else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
return display_string_as_latin1 (str, narrow_p, len, port, strategy);
else
return display_string_using_iconv (str, narrow_p, len,
port, strategy);
return display_string_using_iconv (str, narrow_p, len, port, strategy);
}
/* Attempt to display CH to PORT according to STRATEGY. Return non-zero