mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Fix encoding errors with strings returned by string ports
String ports, being 8-bit, store strings using the character encoding of the port. This fixes a bug where the default character encoding, and not the port's encoding, was being used to convert the string port data back to a string. * libguile/strports.c: extra comments (scm_strport_to_string): use port's encoding when converting port data to a string * libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn and made internal. All callers changed. (scm_from_stringn): renamed to scm_i_from_stringn. * libguile/strings.h: declaration for scm_i_from_stringn
This commit is contained in:
parent
f84c500d2e
commit
fac32b518e
3 changed files with 34 additions and 11 deletions
|
@ -1477,15 +1477,18 @@ scm_is_string (SCM obj)
|
|||
return IS_STRING (obj);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
SCM
|
||||
scm_i_from_stringn (const char *str, size_t len, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
size_t u32len, i;
|
||||
scm_t_wchar *u32;
|
||||
int wide = 0;
|
||||
SCM res;
|
||||
|
||||
if (len == 0)
|
||||
return scm_nullstr;
|
||||
|
||||
if (encoding == NULL)
|
||||
{
|
||||
/* If encoding is null, use Latin-1. */
|
||||
|
@ -1575,7 +1578,7 @@ scm_from_locale_stringn (const char *str, size_t len)
|
|||
hndl = SCM_FAILED_CONVERSION_ERROR;
|
||||
}
|
||||
|
||||
return scm_from_stringn (str, len, enc, hndl);
|
||||
return scm_i_from_stringn (str, len, enc, hndl);
|
||||
}
|
||||
|
||||
SCM
|
||||
|
@ -1590,7 +1593,7 @@ scm_from_locale_string (const char *str)
|
|||
SCM
|
||||
scm_i_from_utf8_string (const scm_t_uint8 *str)
|
||||
{
|
||||
return scm_from_stringn ((const char *) str,
|
||||
return scm_i_from_stringn ((const char *) str,
|
||||
strlen ((char *) str), "UTF-8",
|
||||
SCM_FAILED_CONVERSION_ERROR);
|
||||
}
|
||||
|
@ -1681,7 +1684,7 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
|
|||
}
|
||||
|
||||
char *
|
||||
scm_to_locale_stringn (SCM str, size_t * lenp)
|
||||
scm_to_locale_stringn (SCM str, size_t *lenp)
|
||||
{
|
||||
SCM outport;
|
||||
scm_t_port *pt;
|
||||
|
|
|
@ -111,6 +111,10 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
|
|||
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
|
||||
SCM_API SCM scm_string_append (SCM args);
|
||||
|
||||
SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
|
||||
const char *encoding,
|
||||
scm_t_string_failed_conversion_handler
|
||||
handler);
|
||||
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
|
||||
SCM_API size_t scm_c_string_length (SCM str);
|
||||
SCM_API size_t scm_c_symbol_length (SCM sym);
|
||||
|
|
|
@ -301,9 +301,9 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, co
|
|||
to a locale representation for storage. But, since string ports
|
||||
rely on string functionality for their memory management, we need
|
||||
to create a new string that has the 8-bit locale representation
|
||||
of the underlying string. This violates the guideline that the
|
||||
internal encoding of characters in strings is in unicode
|
||||
codepoints. */
|
||||
of the underlying string.
|
||||
|
||||
locale_str is already in the locale of the port. */
|
||||
str = scm_i_make_string (str_len, &buf);
|
||||
memcpy (buf, locale_str, str_len);
|
||||
|
||||
|
@ -348,13 +348,18 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
|||
of the underlying string. This violates the guideline that the
|
||||
internal encoding of characters in strings is in unicode
|
||||
codepoints. */
|
||||
|
||||
/* Ports are initialized with the thread-default values for encoding and
|
||||
invalid sequence handling. */
|
||||
buf = scm_to_locale_stringn (str, &str_len);
|
||||
z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
|
||||
free (buf);
|
||||
return z;
|
||||
}
|
||||
|
||||
/* create a new string from a string port's buffer. */
|
||||
/* Create a new string from a string port's buffer, converting from
|
||||
the port's 8-bit locale-specific representation to the standard
|
||||
string representation. */
|
||||
SCM scm_strport_to_string (SCM port)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
@ -363,7 +368,18 @@ SCM scm_strport_to_string (SCM port)
|
|||
if (pt->rw_active == SCM_PORT_WRITE)
|
||||
st_flush (port);
|
||||
|
||||
str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
|
||||
if (pt->read_buf_size == 0)
|
||||
return scm_nullstr;
|
||||
|
||||
if (pt->encoding == NULL)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (pt->read_buf_size, &buf);
|
||||
memcpy (buf, pt->read_buf, pt->read_buf_size);
|
||||
}
|
||||
else
|
||||
str = scm_i_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
|
||||
pt->encoding, pt->ilseq_handler);
|
||||
scm_remember_upto_here_1 (port);
|
||||
return str;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue