mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-10 14:00:21 +02:00
Provide non-locale C/Scheme string conversion functions
* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn, scm_to_latin1_stringn, and scm_from_latin1_stringn * libguile/strings.h (scm_to_stringn): make public (scm_to_latin1_stringn): new declaration (scm_from_latin1_stringn): new declaration * libguile/strings.c (scm_to_latin1_stringn): new function (scm_from_latin1_stringn): new function
This commit is contained in:
parent
58228cc689
commit
cf313a947b
3 changed files with 87 additions and 8 deletions
|
@ -3969,6 +3969,74 @@ is larger than @var{max_len}, only @var{max_len} bytes have been
|
|||
stored and you probably need to try again with a larger buffer.
|
||||
@end deftypefn
|
||||
|
||||
For most situations, string conversion should occur using the current
|
||||
locale, such as with the functions above. But there may be cases where
|
||||
one wants to convert strings from a character encoding other than the
|
||||
locale's character encoding. For these cases, the lower-level functions
|
||||
@code{scm_to_stringn} and @code{scm_from_stringn} are provided. These
|
||||
functions should seldom be necessary if one is properly using locales.
|
||||
|
||||
@deftp {C Type} scm_t_string_failed_conversion_handler
|
||||
This is an enumerated type that can take one of three values:
|
||||
@code{SCM_FAILED_CONVERSION_ERROR},
|
||||
@code{SCM_FAILED_CONVERSION_QUESTION_MARK}, and
|
||||
@code{SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE}. They are used to indicate
|
||||
a strategy for handling characters that cannot be converted to or from a
|
||||
given character encoding. @code{SCM_FAILED_CONVERSION_ERROR} indicates
|
||||
that a conversion should throw an error if some characters cannot be
|
||||
converted. @code{SCM_FAILED_CONVERSION_QUESTION_MARK} indicates that a
|
||||
conversion should replace unconvertable characters with the question
|
||||
mark character. And, @code{SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE}
|
||||
requests that a conversion should replace an unconvertable character
|
||||
with an escape sequence.
|
||||
|
||||
While all three strategies apply when converting Scheme strings to C,
|
||||
only @code{SCM_FAILED_CONVERSION_ERROR} and
|
||||
@code{SCM_FAILED_CONVERSION_QUESTION_MARK} can be used when converting C
|
||||
strings to Scheme.
|
||||
@end deftp
|
||||
|
||||
@deftypefn {C Function} char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, scm_t_string_failed_conversion_handler handler)
|
||||
This function returns a newly allocated C string from the Guile string
|
||||
@var{str}. The length of the string will be returned in @var{lenp}.
|
||||
The character encoding of the C string is passed as the ASCII,
|
||||
null-terminated C string @var{encoding}. The @var{handler} parameter
|
||||
gives a strategy for dealing with characters that cannot be converted
|
||||
into @var{encoding}.
|
||||
|
||||
If @var{lenp} is NULL, this function will return a null-terminated C
|
||||
string. It will throw an error if the string contains a null
|
||||
character.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {C Function} SCM scm_from_stringn (const char *str, size_t len, const char *encoding, scm_t_string_failed_conversion_handler handler)
|
||||
This function returns a scheme string from the C string @var{str}. The
|
||||
length of the C string is input as @var{len}. The encoding of the C
|
||||
string is passed as the ASCII, null-terminated C string @code{encoding}.
|
||||
The @var{handler} parameters suggests a strategy for dealing with
|
||||
unconvertable characters.
|
||||
@end deftypefn
|
||||
|
||||
ISO-8859-1 is the most common 8-bit character encoding. This encoding
|
||||
is also referred to as the Latin-1 encoding. The following two
|
||||
conversion functions are provided to convert between Latin-1 C strings
|
||||
and Guile strings.
|
||||
|
||||
@deftypefn {C Function} SCM scm_from_latin1_stringn (const char *str, size_t len)
|
||||
This function returns a scheme string from an ISO-8859-1-encoded C
|
||||
string @var{str} of length @var{len}.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {C function} char * scm_to_latin1_stringn (SCM str, size_t *lenp)
|
||||
This function returns a newly allocated, ISO-8859-1-encoded C string
|
||||
from the scheme string @var{str}. An error will be thrown if the scheme
|
||||
string cannot be converted to the ISO-8859-1 encoding. If @var{lenp} is
|
||||
@code{NULL}, the returned C string will be null terminated, and an error
|
||||
will be thrown if the C string would otherwise contain null
|
||||
characters. If @var{lenp} is not NULL, the length of the string is
|
||||
returned in @var{lenp}, and the string is not null terminated.
|
||||
@end deftypefn
|
||||
|
||||
@node String Internals
|
||||
@subsubsection String Internals
|
||||
|
||||
|
|
|
@ -1501,6 +1501,12 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
|||
return res;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_latin1_stringn (const char *str, size_t len)
|
||||
{
|
||||
return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_locale_stringn (const char *str, size_t len)
|
||||
{
|
||||
|
@ -1697,6 +1703,11 @@ unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp)
|
|||
memcpy (before, after, j);
|
||||
}
|
||||
|
||||
char *
|
||||
scm_to_latin1_stringn (SCM str, size_t *lenp)
|
||||
{
|
||||
return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
|
||||
}
|
||||
|
||||
char *
|
||||
scm_to_locale_stringn (SCM str, size_t *lenp)
|
||||
|
|
|
@ -113,10 +113,8 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
|
|||
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
|
||||
SCM_API SCM scm_string_append (SCM args);
|
||||
|
||||
SCM_API SCM scm_from_stringn (const char *str, size_t len,
|
||||
const char *encoding,
|
||||
scm_t_string_failed_conversion_handler
|
||||
handler);
|
||||
SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler);
|
||||
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
|
||||
SCM_API size_t scm_c_string_length (SCM str);
|
||||
SCM_API size_t scm_c_symbol_length (SCM sym);
|
||||
|
@ -128,17 +126,17 @@ SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
|
|||
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
|
||||
|
||||
SCM_API int scm_is_string (SCM x);
|
||||
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
|
||||
SCM_API SCM scm_from_locale_string (const char *str);
|
||||
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
|
||||
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
|
||||
SCM_API SCM scm_take_locale_string (char *str);
|
||||
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
|
||||
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
|
||||
SCM_API char *scm_to_locale_string (SCM str);
|
||||
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
|
||||
SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
|
||||
const char *encoding,
|
||||
scm_t_string_failed_conversion_handler
|
||||
handler);
|
||||
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler);
|
||||
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
|
||||
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
|
||||
|
||||
|
@ -215,6 +213,8 @@ SCM_API SCM scm_sys_symbol_dump (SCM);
|
|||
SCM_API SCM scm_sys_stringbuf_hist (void);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* deprecated stuff */
|
||||
|
||||
#if SCM_ENABLE_DEPRECATED
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue