diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 17b32bbd8..e9d40bfb5 100755 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -4040,14 +4040,21 @@ conversion functions are provided to convert between Latin-1 C strings and Guile strings. @deftypefn {C Function} SCM scm_from_latin1_stringn (const char *str, size_t len) -This function returns a scheme string from an ISO-8859-1-encoded C -string @var{str} of length @var{len}. +@deftypefnx {C Function} SCM scm_from_utf8_stringn (const char *str, size_t len) +@deftypefnx {C Function} SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len) +Return a scheme string from C string @var{str}, which is ISO-8859-1-, +UTF-8-, or UTF-32-encoded, of length @var{len}. @var{len} is the number +of bytes pointed to by @var{str} for @code{scm_from_latin1_stringn} and +@code{scm_from_utf8_stringn}; it is the number of elements (code points) +in @var{str} in the case of @code{scm_from_utf32_stringn}. @end deftypefn -@deftypefn {C function} char * scm_to_latin1_stringn (SCM str, size_t *lenp) -This function returns a newly allocated, ISO-8859-1-encoded C string -from the scheme string @var{str}. An error will be thrown if the scheme -string cannot be converted to the ISO-8859-1 encoding. If @var{lenp} is +@deftypefn {C function} char *scm_to_latin1_stringn (SCM str, size_t *lenp) +@deftypefnx {C function} char *scm_to_utf8_stringn (SCM str, size_t *lenp) +@deftypefnx {C function} scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp) +Return a newly allocated, ISO-8859-1-, UTF-8-, or UTF-32-encoded C string +from Scheme string @var{str}. An error is thrown when @var{str} +string cannot be converted to the specified encoding. If @var{lenp} is @code{NULL}, the returned C string will be null terminated, and an error will be thrown if the C string would otherwise contain null characters. If @var{lenp} is not NULL, the length of the string is diff --git a/libguile/strings.c b/libguile/strings.c index 188bf0a81..41998a92b 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1570,6 +1570,28 @@ scm_from_utf8_stringn (const char *str, size_t len) return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR); } +SCM +scm_from_utf32_string (const scm_t_wchar *str) +{ + return scm_from_utf32_stringn (str, -1); +} + +SCM +scm_from_utf32_stringn (const scm_t_wchar *str, size_t len) +{ + SCM result; + scm_t_wchar *buf; + + if (len == (size_t) -1) + len = u32_strlen ((uint32_t *) str); + + result = scm_i_make_wide_string (len, &buf); + memcpy (buf, str, len * sizeof (scm_t_wchar)); + scm_i_try_narrow_string (result); + + return result; +} + /* Create a new scheme string from the C string STR. The memory of STR may be used directly as storage for the new string. */ /* FIXME: GC-wise, the only way to use the memory area pointed to by STR @@ -1795,6 +1817,42 @@ scm_to_utf8_stringn (SCM str, size_t *lenp) return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR); } +scm_t_wchar * +scm_to_utf32_string (SCM str) +{ + return scm_to_utf32_stringn (str, NULL); +} + +scm_t_wchar * +scm_to_utf32_stringn (SCM str, size_t *lenp) +#define FUNC_NAME "scm_to_utf32_stringn" +{ + scm_t_wchar *result; + + SCM_VALIDATE_STRING (1, str); + + if (scm_i_is_narrow_string (str)) + result = (scm_t_wchar *) + scm_to_stringn (str, lenp, "UTF-32", + SCM_FAILED_CONVERSION_ERROR); + else + { + size_t len; + + len = scm_i_string_length (str); + if (lenp) + *lenp = len; + + result = scm_malloc ((len + 1) * sizeof (scm_t_wchar)); + memcpy (result, scm_i_string_wide_chars (str), + len * sizeof (scm_t_wchar)); + result[len] = 0; + } + + return result; +} +#undef FUNC_NAME + /* Return a malloc(3)-allocated buffer containing the contents of STR encoded according to ENCODING. If LENP is non-NULL, set it to the size in bytes of the returned buffer. If the conversion to ENCODING fails, apply the strategy diff --git a/libguile/strings.h b/libguile/strings.h index 168fcb7d1..b9e901b6c 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -148,6 +148,11 @@ SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp); SCM_API SCM scm_from_utf8_string (const char *str); SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len); +SCM_API scm_t_wchar *scm_to_utf32_string (SCM str); +SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp); +SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str); +SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len); + SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, scm_t_string_failed_conversion_handler handler); SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);