1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-14 15:40:19 +02:00

Fix encoding errors with strings returned by string ports

String ports, being 8-bit, store strings using the character encoding
of the port.  This fixes a bug where the default character encoding, and
not the port's encoding, was being used to convert the string port data
back to a string.

* libguile/strports.c: extra comments
  (scm_strport_to_string):  use port's encoding when converting port data
  to a string

* libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn
  and made internal.  All callers changed.
  (scm_from_stringn): renamed to scm_i_from_stringn.

* libguile/strings.h: declaration for scm_i_from_stringn
This commit is contained in:
Michael Gran 2009-08-30 15:41:56 -07:00
parent f84c500d2e
commit fac32b518e
3 changed files with 34 additions and 11 deletions

View file

@ -1477,15 +1477,18 @@ scm_is_string (SCM obj)
return IS_STRING (obj); return IS_STRING (obj);
} }
static SCM SCM
scm_from_stringn (const char *str, size_t len, const char *encoding, scm_i_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler) scm_t_string_failed_conversion_handler handler)
{ {
size_t u32len, i; size_t u32len, i;
scm_t_wchar *u32; scm_t_wchar *u32;
int wide = 0; int wide = 0;
SCM res; SCM res;
if (len == 0)
return scm_nullstr;
if (encoding == NULL) if (encoding == NULL)
{ {
/* If encoding is null, use Latin-1. */ /* If encoding is null, use Latin-1. */
@ -1575,7 +1578,7 @@ scm_from_locale_stringn (const char *str, size_t len)
hndl = SCM_FAILED_CONVERSION_ERROR; hndl = SCM_FAILED_CONVERSION_ERROR;
} }
return scm_from_stringn (str, len, enc, hndl); return scm_i_from_stringn (str, len, enc, hndl);
} }
SCM SCM
@ -1590,7 +1593,7 @@ scm_from_locale_string (const char *str)
SCM SCM
scm_i_from_utf8_string (const scm_t_uint8 *str) scm_i_from_utf8_string (const scm_t_uint8 *str)
{ {
return scm_from_stringn ((const char *) str, return scm_i_from_stringn ((const char *) str,
strlen ((char *) str), "UTF-8", strlen ((char *) str), "UTF-8",
SCM_FAILED_CONVERSION_ERROR); SCM_FAILED_CONVERSION_ERROR);
} }
@ -1681,7 +1684,7 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
} }
char * char *
scm_to_locale_stringn (SCM str, size_t * lenp) scm_to_locale_stringn (SCM str, size_t *lenp)
{ {
SCM outport; SCM outport;
scm_t_port *pt; scm_t_port *pt;

View file

@ -111,6 +111,10 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end); SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args); SCM_API SCM scm_string_append (SCM args);
SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr); SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str); SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_symbol_length (SCM sym); SCM_API size_t scm_c_symbol_length (SCM sym);

View file

@ -301,9 +301,9 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, co
to a locale representation for storage. But, since string ports to a locale representation for storage. But, since string ports
rely on string functionality for their memory management, we need rely on string functionality for their memory management, we need
to create a new string that has the 8-bit locale representation to create a new string that has the 8-bit locale representation
of the underlying string. This violates the guideline that the of the underlying string.
internal encoding of characters in strings is in unicode
codepoints. */ locale_str is already in the locale of the port. */
str = scm_i_make_string (str_len, &buf); str = scm_i_make_string (str_len, &buf);
memcpy (buf, locale_str, str_len); memcpy (buf, locale_str, str_len);
@ -348,13 +348,18 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
of the underlying string. This violates the guideline that the of the underlying string. This violates the guideline that the
internal encoding of characters in strings is in unicode internal encoding of characters in strings is in unicode
codepoints. */ codepoints. */
/* Ports are initialized with the thread-default values for encoding and
invalid sequence handling. */
buf = scm_to_locale_stringn (str, &str_len); buf = scm_to_locale_stringn (str, &str_len);
z = scm_i_mkstrport (pos, buf, str_len, modes, caller); z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
free (buf); free (buf);
return z; return z;
} }
/* create a new string from a string port's buffer. */ /* Create a new string from a string port's buffer, converting from
the port's 8-bit locale-specific representation to the standard
string representation. */
SCM scm_strport_to_string (SCM port) SCM scm_strport_to_string (SCM port)
{ {
scm_t_port *pt = SCM_PTAB_ENTRY (port); scm_t_port *pt = SCM_PTAB_ENTRY (port);
@ -363,7 +368,18 @@ SCM scm_strport_to_string (SCM port)
if (pt->rw_active == SCM_PORT_WRITE) if (pt->rw_active == SCM_PORT_WRITE)
st_flush (port); st_flush (port);
str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size); if (pt->read_buf_size == 0)
return scm_nullstr;
if (pt->encoding == NULL)
{
char *buf;
str = scm_i_make_string (pt->read_buf_size, &buf);
memcpy (buf, pt->read_buf, pt->read_buf_size);
}
else
str = scm_i_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
pt->encoding, pt->ilseq_handler);
scm_remember_upto_here_1 (port); scm_remember_upto_here_1 (port);
return str; return str;
} }