1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Add `scm_{to,from}_utf32_string'.

* libguile/strings.c (scm_from_utf32_string, scm_from_utf32_stringn,
  scm_to_utf32_string, scm_to_utf32_stringn): New functions.

* libguile/strings.h (scm_from_utf32_string, scm_from_utf32_stringn,
  scm_to_utf32_string, scm_to_utf32_stringn): New declarations.

* doc/ref/api-data.texi (Conversion to/from C): Document
  `scm_{to,from}_{utf8,utf32}_stringn'.
This commit is contained in:
Ludovic Courtès 2011-01-25 23:36:35 +01:00
parent e9a35a965b
commit 647dc1ac23
3 changed files with 76 additions and 6 deletions

View file

@ -4040,14 +4040,21 @@ conversion functions are provided to convert between Latin-1 C strings
and Guile strings. and Guile strings.
@deftypefn {C Function} SCM scm_from_latin1_stringn (const char *str, size_t len) @deftypefn {C Function} SCM scm_from_latin1_stringn (const char *str, size_t len)
This function returns a scheme string from an ISO-8859-1-encoded C @deftypefnx {C Function} SCM scm_from_utf8_stringn (const char *str, size_t len)
string @var{str} of length @var{len}. @deftypefnx {C Function} SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
Return a scheme string from C string @var{str}, which is ISO-8859-1-,
UTF-8-, or UTF-32-encoded, of length @var{len}. @var{len} is the number
of bytes pointed to by @var{str} for @code{scm_from_latin1_stringn} and
@code{scm_from_utf8_stringn}; it is the number of elements (code points)
in @var{str} in the case of @code{scm_from_utf32_stringn}.
@end deftypefn @end deftypefn
@deftypefn {C function} char * scm_to_latin1_stringn (SCM str, size_t *lenp) @deftypefn {C function} char *scm_to_latin1_stringn (SCM str, size_t *lenp)
This function returns a newly allocated, ISO-8859-1-encoded C string @deftypefnx {C function} char *scm_to_utf8_stringn (SCM str, size_t *lenp)
from the scheme string @var{str}. An error will be thrown if the scheme @deftypefnx {C function} scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp)
string cannot be converted to the ISO-8859-1 encoding. If @var{lenp} is Return a newly allocated, ISO-8859-1-, UTF-8-, or UTF-32-encoded C string
from Scheme string @var{str}. An error is thrown when @var{str}
string cannot be converted to the specified encoding. If @var{lenp} is
@code{NULL}, the returned C string will be null terminated, and an error @code{NULL}, the returned C string will be null terminated, and an error
will be thrown if the C string would otherwise contain null will be thrown if the C string would otherwise contain null
characters. If @var{lenp} is not NULL, the length of the string is characters. If @var{lenp} is not NULL, the length of the string is

View file

@ -1570,6 +1570,28 @@ scm_from_utf8_stringn (const char *str, size_t len)
return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR); return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
} }
SCM
scm_from_utf32_string (const scm_t_wchar *str)
{
return scm_from_utf32_stringn (str, -1);
}
SCM
scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
{
SCM result;
scm_t_wchar *buf;
if (len == (size_t) -1)
len = u32_strlen ((uint32_t *) str);
result = scm_i_make_wide_string (len, &buf);
memcpy (buf, str, len * sizeof (scm_t_wchar));
scm_i_try_narrow_string (result);
return result;
}
/* Create a new scheme string from the C string STR. The memory of /* Create a new scheme string from the C string STR. The memory of
STR may be used directly as storage for the new string. */ STR may be used directly as storage for the new string. */
/* FIXME: GC-wise, the only way to use the memory area pointed to by STR /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
@ -1795,6 +1817,42 @@ scm_to_utf8_stringn (SCM str, size_t *lenp)
return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR); return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
} }
scm_t_wchar *
scm_to_utf32_string (SCM str)
{
return scm_to_utf32_stringn (str, NULL);
}
scm_t_wchar *
scm_to_utf32_stringn (SCM str, size_t *lenp)
#define FUNC_NAME "scm_to_utf32_stringn"
{
scm_t_wchar *result;
SCM_VALIDATE_STRING (1, str);
if (scm_i_is_narrow_string (str))
result = (scm_t_wchar *)
scm_to_stringn (str, lenp, "UTF-32",
SCM_FAILED_CONVERSION_ERROR);
else
{
size_t len;
len = scm_i_string_length (str);
if (lenp)
*lenp = len;
result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
memcpy (result, scm_i_string_wide_chars (str),
len * sizeof (scm_t_wchar));
result[len] = 0;
}
return result;
}
#undef FUNC_NAME
/* Return a malloc(3)-allocated buffer containing the contents of STR encoded /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
according to ENCODING. If LENP is non-NULL, set it to the size in bytes of according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
the returned buffer. If the conversion to ENCODING fails, apply the strategy the returned buffer. If the conversion to ENCODING fails, apply the strategy

View file

@ -148,6 +148,11 @@ SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf8_string (const char *str); SCM_API SCM scm_from_utf8_string (const char *str);
SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len); SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len);
SCM_API scm_t_wchar *scm_to_utf32_string (SCM str);
SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str);
SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len);
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
scm_t_string_failed_conversion_handler handler); scm_t_string_failed_conversion_handler handler);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);