1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

Fix encoding errors with strings returned by string ports

String ports, being 8-bit, store strings using the character encoding
of the port.  This fixes a bug where the default character encoding, and
not the port's encoding, was being used to convert the string port data
back to a string.

* libguile/strports.c: extra comments
  (scm_strport_to_string):  use port's encoding when converting port data
  to a string

* libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn
  and made internal.  All callers changed.
  (scm_from_stringn): renamed to scm_i_from_stringn.

* libguile/strings.h: declaration for scm_i_from_stringn
This commit is contained in:
Michael Gran 2009-08-30 15:41:56 -07:00
parent f84c500d2e
commit fac32b518e
3 changed files with 34 additions and 11 deletions

View file

@ -1477,15 +1477,18 @@ scm_is_string (SCM obj)
return IS_STRING (obj);
}
static SCM
scm_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler)
SCM
scm_i_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler)
{
size_t u32len, i;
scm_t_wchar *u32;
int wide = 0;
SCM res;
if (len == 0)
return scm_nullstr;
if (encoding == NULL)
{
/* If encoding is null, use Latin-1. */
@ -1575,7 +1578,7 @@ scm_from_locale_stringn (const char *str, size_t len)
hndl = SCM_FAILED_CONVERSION_ERROR;
}
return scm_from_stringn (str, len, enc, hndl);
return scm_i_from_stringn (str, len, enc, hndl);
}
SCM
@ -1590,7 +1593,7 @@ scm_from_locale_string (const char *str)
SCM
scm_i_from_utf8_string (const scm_t_uint8 *str)
{
return scm_from_stringn ((const char *) str,
return scm_i_from_stringn ((const char *) str,
strlen ((char *) str), "UTF-8",
SCM_FAILED_CONVERSION_ERROR);
}
@ -1681,7 +1684,7 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
}
char *
scm_to_locale_stringn (SCM str, size_t * lenp)
scm_to_locale_stringn (SCM str, size_t *lenp)
{
SCM outport;
scm_t_port *pt;

View file

@ -111,6 +111,10 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_symbol_length (SCM sym);

View file

@ -301,9 +301,9 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, co
to a locale representation for storage. But, since string ports
rely on string functionality for their memory management, we need
to create a new string that has the 8-bit locale representation
of the underlying string. This violates the guideline that the
internal encoding of characters in strings is in unicode
codepoints. */
of the underlying string.
locale_str is already in the locale of the port. */
str = scm_i_make_string (str_len, &buf);
memcpy (buf, locale_str, str_len);
@ -348,13 +348,18 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
of the underlying string. This violates the guideline that the
internal encoding of characters in strings is in unicode
codepoints. */
/* Ports are initialized with the thread-default values for encoding and
invalid sequence handling. */
buf = scm_to_locale_stringn (str, &str_len);
z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
free (buf);
return z;
}
/* create a new string from a string port's buffer. */
/* Create a new string from a string port's buffer, converting from
the port's 8-bit locale-specific representation to the standard
string representation. */
SCM scm_strport_to_string (SCM port)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
@ -363,7 +368,18 @@ SCM scm_strport_to_string (SCM port)
if (pt->rw_active == SCM_PORT_WRITE)
st_flush (port);
str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
if (pt->read_buf_size == 0)
return scm_nullstr;
if (pt->encoding == NULL)
{
char *buf;
str = scm_i_make_string (pt->read_buf_size, &buf);
memcpy (buf, pt->read_buf, pt->read_buf_size);
}
else
str = scm_i_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
pt->encoding, pt->ilseq_handler);
scm_remember_upto_here_1 (port);
return str;
}