mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
add scm_{to,from}_{utf8,latin1}_string{n,}
* libguile/strings.h: * libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New functions, in terms of the latin1_stringn variants. (scm_from_utf8_string, scm_from_utf8_stringn) (scm_to_utf8_string, scm_to_utf8_stringn): New functions. (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal functions. (scm_from_stringn): Handle -1 as a length. Unlike the previous behavior of scm_from_locale_string (NULL), which returned the empty string, we now raise an error. The null pointer is not the same as the empty string. * libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of utf8 functions.
This commit is contained in:
parent
929ccf48fc
commit
d40e1ca893
3 changed files with 82 additions and 55 deletions
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public License
|
* modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -625,11 +625,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
||||||
{
|
{
|
||||||
struct tm t;
|
struct tm t;
|
||||||
|
|
||||||
scm_t_uint8 *tbuf;
|
char *tbuf;
|
||||||
int size = 50;
|
int size = 50;
|
||||||
scm_t_uint8 *fmt;
|
char *fmt;
|
||||||
scm_t_uint8 *myfmt;
|
char *myfmt;
|
||||||
int len;
|
size_t len;
|
||||||
SCM result;
|
SCM result;
|
||||||
|
|
||||||
SCM_VALIDATE_STRING (1, format);
|
SCM_VALIDATE_STRING (1, format);
|
||||||
|
@ -637,8 +637,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
||||||
|
|
||||||
/* Convert string to UTF-8 so that non-ASCII characters in the
|
/* Convert string to UTF-8 so that non-ASCII characters in the
|
||||||
format are passed through unchanged. */
|
format are passed through unchanged. */
|
||||||
fmt = scm_i_to_utf8_string (format);
|
fmt = scm_to_utf8_stringn (format, &len);
|
||||||
len = strlen ((const char *) fmt);
|
|
||||||
|
|
||||||
/* Ugly hack: strftime can return 0 if its buffer is too small,
|
/* Ugly hack: strftime can return 0 if its buffer is too small,
|
||||||
but some valid time strings (e.g. "%p") can sometimes produce
|
but some valid time strings (e.g. "%p") can sometimes produce
|
||||||
|
@ -647,7 +646,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
||||||
nonzero. */
|
nonzero. */
|
||||||
myfmt = scm_malloc (len+2);
|
myfmt = scm_malloc (len+2);
|
||||||
*myfmt = (scm_t_uint8) 'x';
|
*myfmt = (scm_t_uint8) 'x';
|
||||||
strncpy ((char *) myfmt + 1, (const char *) fmt, len);
|
strncpy (myfmt + 1, fmt, len);
|
||||||
myfmt[len + 1] = 0;
|
myfmt[len + 1] = 0;
|
||||||
scm_remember_upto_here_1 (format);
|
scm_remember_upto_here_1 (format);
|
||||||
free (fmt);
|
free (fmt);
|
||||||
|
@ -685,8 +684,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
||||||
|
|
||||||
/* Use `nstrftime ()' from Gnulib, which supports all GNU extensions
|
/* Use `nstrftime ()' from Gnulib, which supports all GNU extensions
|
||||||
supported by glibc. */
|
supported by glibc. */
|
||||||
while ((len = nstrftime ((char *) tbuf, size,
|
while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0)
|
||||||
(const char *) myfmt, &t, 0, 0)) == 0)
|
|
||||||
{
|
{
|
||||||
free (tbuf);
|
free (tbuf);
|
||||||
size *= 2;
|
size *= 2;
|
||||||
|
@ -702,7 +700,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
result = scm_i_from_utf8_string ((const scm_t_uint8 *) tbuf + 1);
|
result = scm_from_utf8_string (tbuf + 1);
|
||||||
free (tbuf);
|
free (tbuf);
|
||||||
free (myfmt);
|
free (myfmt);
|
||||||
#if HAVE_STRUCT_TM_TM_ZONE
|
#if HAVE_STRUCT_TM_TM_ZONE
|
||||||
|
@ -728,7 +726,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
||||||
#define FUNC_NAME s_scm_strptime
|
#define FUNC_NAME s_scm_strptime
|
||||||
{
|
{
|
||||||
struct tm t;
|
struct tm t;
|
||||||
scm_t_uint8 *fmt, *str, *rest;
|
char *fmt, *str, *rest;
|
||||||
size_t used_len;
|
size_t used_len;
|
||||||
long zoff;
|
long zoff;
|
||||||
|
|
||||||
|
@ -737,8 +735,8 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
||||||
|
|
||||||
/* Convert strings to UTF-8 so that non-ASCII characters are passed
|
/* Convert strings to UTF-8 so that non-ASCII characters are passed
|
||||||
through unchanged. */
|
through unchanged. */
|
||||||
fmt = scm_i_to_utf8_string (format);
|
fmt = scm_to_utf8_string (format);
|
||||||
str = scm_i_to_utf8_string (string);
|
str = scm_to_utf8_string (string);
|
||||||
|
|
||||||
/* initialize the struct tm */
|
/* initialize the struct tm */
|
||||||
#define tm_init(field) t.field = 0
|
#define tm_init(field) t.field = 0
|
||||||
|
@ -760,8 +758,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
||||||
fields, hence the use of SCM_CRITICAL_SECTION_START. */
|
fields, hence the use of SCM_CRITICAL_SECTION_START. */
|
||||||
t.tm_isdst = -1;
|
t.tm_isdst = -1;
|
||||||
SCM_CRITICAL_SECTION_START;
|
SCM_CRITICAL_SECTION_START;
|
||||||
rest = (scm_t_uint8 *) strptime ((const char *) str,
|
rest = strptime (str, fmt, &t);
|
||||||
(const char *) fmt, &t);
|
|
||||||
SCM_CRITICAL_SECTION_END;
|
SCM_CRITICAL_SECTION_END;
|
||||||
if (rest == NULL)
|
if (rest == NULL)
|
||||||
{
|
{
|
||||||
|
@ -784,7 +781,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Compute the number of UTF-8 characters. */
|
/* Compute the number of UTF-8 characters. */
|
||||||
used_len = u8_strnlen (str, rest-str);
|
used_len = u8_strnlen ((scm_t_uint8*) str, rest-str);
|
||||||
scm_remember_upto_here_2 (format, string);
|
scm_remember_upto_here_2 (format, string);
|
||||||
free (str);
|
free (str);
|
||||||
free (fmt);
|
free (fmt);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
|
/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public License
|
* modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -1437,8 +1437,13 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||||
int wide = 0;
|
int wide = 0;
|
||||||
SCM res;
|
SCM res;
|
||||||
|
|
||||||
|
/* The order of these checks is important. */
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
return scm_nullstr;
|
return scm_nullstr;
|
||||||
|
if (!str)
|
||||||
|
scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
|
||||||
|
if (len == (size_t) -1)
|
||||||
|
len = strlen (str);
|
||||||
|
|
||||||
if (encoding == NULL)
|
if (encoding == NULL)
|
||||||
{
|
{
|
||||||
|
@ -1502,9 +1507,9 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
scm_from_latin1_stringn (const char *str, size_t len)
|
scm_from_locale_string (const char *str)
|
||||||
{
|
{
|
||||||
return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
|
return scm_from_locale_stringn (str, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
|
@ -1515,11 +1520,6 @@ scm_from_locale_stringn (const char *str, size_t len)
|
||||||
SCM inport;
|
SCM inport;
|
||||||
scm_t_port *pt;
|
scm_t_port *pt;
|
||||||
|
|
||||||
if (len == (size_t) -1)
|
|
||||||
len = strlen (str);
|
|
||||||
if (len == 0)
|
|
||||||
return scm_nullstr;
|
|
||||||
|
|
||||||
inport = scm_current_input_port ();
|
inport = scm_current_input_port ();
|
||||||
if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
|
if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
|
||||||
{
|
{
|
||||||
|
@ -1537,20 +1537,27 @@ scm_from_locale_stringn (const char *str, size_t len)
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
scm_from_locale_string (const char *str)
|
scm_from_latin1_string (const char *str)
|
||||||
{
|
{
|
||||||
if (str == NULL)
|
return scm_from_latin1_stringn (str, -1);
|
||||||
return scm_nullstr;
|
|
||||||
|
|
||||||
return scm_from_locale_stringn (str, -1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
scm_i_from_utf8_string (const scm_t_uint8 *str)
|
scm_from_latin1_stringn (const char *str, size_t len)
|
||||||
{
|
{
|
||||||
return scm_from_stringn ((const char *) str,
|
return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
|
||||||
strlen ((char *) str), "UTF-8",
|
}
|
||||||
SCM_FAILED_CONVERSION_ERROR);
|
|
||||||
|
SCM
|
||||||
|
scm_from_utf8_string (const char *str)
|
||||||
|
{
|
||||||
|
return scm_from_utf8_stringn (str, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
SCM
|
||||||
|
scm_from_utf8_stringn (const char *str, size_t len)
|
||||||
|
{
|
||||||
|
return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create a new scheme string from the C string STR. The memory of
|
/* Create a new scheme string from the C string STR. The memory of
|
||||||
|
@ -1707,9 +1714,9 @@ scm_i_unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
|
||||||
}
|
}
|
||||||
|
|
||||||
char *
|
char *
|
||||||
scm_to_latin1_stringn (SCM str, size_t *lenp)
|
scm_to_locale_string (SCM str)
|
||||||
{
|
{
|
||||||
return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
|
return scm_to_locale_stringn (str, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *
|
char *
|
||||||
|
@ -1733,6 +1740,30 @@ scm_to_locale_stringn (SCM str, size_t *lenp)
|
||||||
scm_i_get_conversion_strategy (SCM_BOOL_F));
|
scm_i_get_conversion_strategy (SCM_BOOL_F));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
scm_to_latin1_string (SCM str)
|
||||||
|
{
|
||||||
|
return scm_to_latin1_stringn (str, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
scm_to_latin1_stringn (SCM str, size_t *lenp)
|
||||||
|
{
|
||||||
|
return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
scm_to_utf8_string (SCM str)
|
||||||
|
{
|
||||||
|
return scm_to_utf8_stringn (str, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
scm_to_utf8_stringn (SCM str, size_t *lenp)
|
||||||
|
{
|
||||||
|
return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
/* Return a malloc(3)-allocated buffer containing the contents of STR encoded
|
/* Return a malloc(3)-allocated buffer containing the contents of STR encoded
|
||||||
according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
|
according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
|
||||||
the returned buffer. If the conversion to ENCODING fails, apply the strategy
|
the returned buffer. If the conversion to ENCODING fails, apply the strategy
|
||||||
|
@ -1845,20 +1876,6 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *
|
|
||||||
scm_to_locale_string (SCM str)
|
|
||||||
{
|
|
||||||
return scm_to_locale_stringn (str, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
scm_t_uint8 *
|
|
||||||
scm_i_to_utf8_string (SCM str)
|
|
||||||
{
|
|
||||||
char *u8str;
|
|
||||||
u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
|
|
||||||
return (scm_t_uint8 *) u8str;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
|
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
|
||||||
{
|
{
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#ifndef SCM_STRINGS_H
|
#ifndef SCM_STRINGS_H
|
||||||
#define SCM_STRINGS_H
|
#define SCM_STRINGS_H
|
||||||
|
|
||||||
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
|
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public License
|
* modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -125,18 +125,31 @@ SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end);
|
||||||
SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
|
SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
|
||||||
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
|
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
|
||||||
|
|
||||||
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
|
/* Use locale encoding for user input, user output, or interacting with
|
||||||
|
the C library. Use latin1 for ASCII, and for literals in source
|
||||||
|
code. Use utf8 for interaction with modern libraries which deal in
|
||||||
|
UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a
|
||||||
|
specific encoding. */
|
||||||
|
|
||||||
SCM_API SCM scm_from_locale_string (const char *str);
|
SCM_API SCM scm_from_locale_string (const char *str);
|
||||||
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
|
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
|
||||||
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
|
|
||||||
SCM_API SCM scm_take_locale_string (char *str);
|
SCM_API SCM scm_take_locale_string (char *str);
|
||||||
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
|
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
|
||||||
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
|
|
||||||
SCM_API char *scm_to_locale_string (SCM str);
|
SCM_API char *scm_to_locale_string (SCM str);
|
||||||
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
|
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
|
||||||
|
|
||||||
|
SCM_API SCM scm_from_latin1_string (const char *str);
|
||||||
|
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
|
||||||
|
SCM_API char *scm_to_latin1_string (SCM str);
|
||||||
|
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
|
||||||
|
|
||||||
|
SCM_API char *scm_to_utf8_string (SCM str);
|
||||||
|
SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp);
|
||||||
|
SCM_API SCM scm_from_utf8_string (const char *str);
|
||||||
|
SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len);
|
||||||
|
|
||||||
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
||||||
scm_t_string_failed_conversion_handler handler);
|
scm_t_string_failed_conversion_handler handler);
|
||||||
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
|
|
||||||
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
|
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
|
||||||
|
|
||||||
SCM_API SCM scm_string_normalize_nfd (SCM str);
|
SCM_API SCM scm_string_normalize_nfd (SCM str);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue