1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-10 14:00:21 +02:00

Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function
This commit is contained in:
Michael Gran 2010-09-12 08:29:31 -07:00
parent 58228cc689
commit cf313a947b
3 changed files with 87 additions and 8 deletions

View file

@ -3969,6 +3969,74 @@ is larger than @var{max_len}, only @var{max_len} bytes have been
stored and you probably need to try again with a larger buffer.
@end deftypefn
For most situations, string conversion should occur using the current
locale, such as with the functions above. But there may be cases where
one wants to convert strings from a character encoding other than the
locale's character encoding. For these cases, the lower-level functions
@code{scm_to_stringn} and @code{scm_from_stringn} are provided. These
functions should seldom be necessary if one is properly using locales.
@deftp {C Type} scm_t_string_failed_conversion_handler
This is an enumerated type that can take one of three values:
@code{SCM_FAILED_CONVERSION_ERROR},
@code{SCM_FAILED_CONVERSION_QUESTION_MARK}, and
@code{SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE}. They are used to indicate
a strategy for handling characters that cannot be converted to or from a
given character encoding. @code{SCM_FAILED_CONVERSION_ERROR} indicates
that a conversion should throw an error if some characters cannot be
converted. @code{SCM_FAILED_CONVERSION_QUESTION_MARK} indicates that a
conversion should replace unconvertable characters with the question
mark character. And, @code{SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE}
requests that a conversion should replace an unconvertable character
with an escape sequence.
While all three strategies apply when converting Scheme strings to C,
only @code{SCM_FAILED_CONVERSION_ERROR} and
@code{SCM_FAILED_CONVERSION_QUESTION_MARK} can be used when converting C
strings to Scheme.
@end deftp
@deftypefn {C Function} char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, scm_t_string_failed_conversion_handler handler)
This function returns a newly allocated C string from the Guile string
@var{str}. The length of the string will be returned in @var{lenp}.
The character encoding of the C string is passed as the ASCII,
null-terminated C string @var{encoding}. The @var{handler} parameter
gives a strategy for dealing with characters that cannot be converted
into @var{encoding}.
If @var{lenp} is NULL, this function will return a null-terminated C
string. It will throw an error if the string contains a null
character.
@end deftypefn
@deftypefn {C Function} SCM scm_from_stringn (const char *str, size_t len, const char *encoding, scm_t_string_failed_conversion_handler handler)
This function returns a scheme string from the C string @var{str}. The
length of the C string is input as @var{len}. The encoding of the C
string is passed as the ASCII, null-terminated C string @code{encoding}.
The @var{handler} parameters suggests a strategy for dealing with
unconvertable characters.
@end deftypefn
ISO-8859-1 is the most common 8-bit character encoding. This encoding
is also referred to as the Latin-1 encoding. The following two
conversion functions are provided to convert between Latin-1 C strings
and Guile strings.
@deftypefn {C Function} SCM scm_from_latin1_stringn (const char *str, size_t len)
This function returns a scheme string from an ISO-8859-1-encoded C
string @var{str} of length @var{len}.
@end deftypefn
@deftypefn {C function} char * scm_to_latin1_stringn (SCM str, size_t *lenp)
This function returns a newly allocated, ISO-8859-1-encoded C string
from the scheme string @var{str}. An error will be thrown if the scheme
string cannot be converted to the ISO-8859-1 encoding. If @var{lenp} is
@code{NULL}, the returned C string will be null terminated, and an error
will be thrown if the C string would otherwise contain null
characters. If @var{lenp} is not NULL, the length of the string is
returned in @var{lenp}, and the string is not null terminated.
@end deftypefn
@node String Internals
@subsubsection String Internals

View file

@ -1501,6 +1501,12 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
return res;
}
SCM
scm_from_latin1_stringn (const char *str, size_t len)
{
return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
}
SCM
scm_from_locale_stringn (const char *str, size_t len)
{
@ -1697,6 +1703,11 @@ unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp)
memcpy (before, after, j);
}
char *
scm_to_latin1_stringn (SCM str, size_t *lenp)
{
return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
}
char *
scm_to_locale_stringn (SCM str, size_t *lenp)

View file

@ -113,10 +113,8 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
SCM_API SCM scm_from_stringn (const char *str, size_t len,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_symbol_length (SCM sym);
@ -128,17 +126,17 @@ SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
SCM_API int scm_is_string (SCM x);
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
SCM_API SCM scm_from_locale_string (const char *str);
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
SCM_API SCM scm_take_locale_string (char *str);
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
SCM_API char *scm_to_locale_string (SCM str);
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
scm_t_string_failed_conversion_handler handler);
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
@ -215,6 +213,8 @@ SCM_API SCM scm_sys_symbol_dump (SCM);
SCM_API SCM scm_sys_stringbuf_hist (void);
#endif
/* deprecated stuff */
#if SCM_ENABLE_DEPRECATED