1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.
This commit is contained in:
Andy Wingo 2011-01-05 18:21:54 -06:00
parent 929ccf48fc
commit d40e1ca893
3 changed files with 82 additions and 55 deletions

View file

@ -1,4 +1,4 @@
/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. /* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License * modify it under the terms of the GNU Lesser General Public License
@ -625,11 +625,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
{ {
struct tm t; struct tm t;
scm_t_uint8 *tbuf; char *tbuf;
int size = 50; int size = 50;
scm_t_uint8 *fmt; char *fmt;
scm_t_uint8 *myfmt; char *myfmt;
int len; size_t len;
SCM result; SCM result;
SCM_VALIDATE_STRING (1, format); SCM_VALIDATE_STRING (1, format);
@ -637,8 +637,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
/* Convert string to UTF-8 so that non-ASCII characters in the /* Convert string to UTF-8 so that non-ASCII characters in the
format are passed through unchanged. */ format are passed through unchanged. */
fmt = scm_i_to_utf8_string (format); fmt = scm_to_utf8_stringn (format, &len);
len = strlen ((const char *) fmt);
/* Ugly hack: strftime can return 0 if its buffer is too small, /* Ugly hack: strftime can return 0 if its buffer is too small,
but some valid time strings (e.g. "%p") can sometimes produce but some valid time strings (e.g. "%p") can sometimes produce
@ -647,7 +646,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
nonzero. */ nonzero. */
myfmt = scm_malloc (len+2); myfmt = scm_malloc (len+2);
*myfmt = (scm_t_uint8) 'x'; *myfmt = (scm_t_uint8) 'x';
strncpy ((char *) myfmt + 1, (const char *) fmt, len); strncpy (myfmt + 1, fmt, len);
myfmt[len + 1] = 0; myfmt[len + 1] = 0;
scm_remember_upto_here_1 (format); scm_remember_upto_here_1 (format);
free (fmt); free (fmt);
@ -685,8 +684,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
/* Use `nstrftime ()' from Gnulib, which supports all GNU extensions /* Use `nstrftime ()' from Gnulib, which supports all GNU extensions
supported by glibc. */ supported by glibc. */
while ((len = nstrftime ((char *) tbuf, size, while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0)
(const char *) myfmt, &t, 0, 0)) == 0)
{ {
free (tbuf); free (tbuf);
size *= 2; size *= 2;
@ -702,7 +700,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
#endif #endif
} }
result = scm_i_from_utf8_string ((const scm_t_uint8 *) tbuf + 1); result = scm_from_utf8_string (tbuf + 1);
free (tbuf); free (tbuf);
free (myfmt); free (myfmt);
#if HAVE_STRUCT_TM_TM_ZONE #if HAVE_STRUCT_TM_TM_ZONE
@ -728,7 +726,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
#define FUNC_NAME s_scm_strptime #define FUNC_NAME s_scm_strptime
{ {
struct tm t; struct tm t;
scm_t_uint8 *fmt, *str, *rest; char *fmt, *str, *rest;
size_t used_len; size_t used_len;
long zoff; long zoff;
@ -737,8 +735,8 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
/* Convert strings to UTF-8 so that non-ASCII characters are passed /* Convert strings to UTF-8 so that non-ASCII characters are passed
through unchanged. */ through unchanged. */
fmt = scm_i_to_utf8_string (format); fmt = scm_to_utf8_string (format);
str = scm_i_to_utf8_string (string); str = scm_to_utf8_string (string);
/* initialize the struct tm */ /* initialize the struct tm */
#define tm_init(field) t.field = 0 #define tm_init(field) t.field = 0
@ -760,8 +758,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
fields, hence the use of SCM_CRITICAL_SECTION_START. */ fields, hence the use of SCM_CRITICAL_SECTION_START. */
t.tm_isdst = -1; t.tm_isdst = -1;
SCM_CRITICAL_SECTION_START; SCM_CRITICAL_SECTION_START;
rest = (scm_t_uint8 *) strptime ((const char *) str, rest = strptime (str, fmt, &t);
(const char *) fmt, &t);
SCM_CRITICAL_SECTION_END; SCM_CRITICAL_SECTION_END;
if (rest == NULL) if (rest == NULL)
{ {
@ -784,7 +781,7 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
#endif #endif
/* Compute the number of UTF-8 characters. */ /* Compute the number of UTF-8 characters. */
used_len = u8_strnlen (str, rest-str); used_len = u8_strnlen ((scm_t_uint8*) str, rest-str);
scm_remember_upto_here_2 (format, string); scm_remember_upto_here_2 (format, string);
free (str); free (str);
free (fmt); free (fmt);

View file

@ -1,4 +1,4 @@
/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License * modify it under the terms of the GNU Lesser General Public License
@ -1437,8 +1437,13 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
int wide = 0; int wide = 0;
SCM res; SCM res;
/* The order of these checks is important. */
if (len == 0) if (len == 0)
return scm_nullstr; return scm_nullstr;
if (!str)
scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
if (len == (size_t) -1)
len = strlen (str);
if (encoding == NULL) if (encoding == NULL)
{ {
@ -1502,9 +1507,9 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
} }
SCM SCM
scm_from_latin1_stringn (const char *str, size_t len) scm_from_locale_string (const char *str)
{ {
return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR); return scm_from_locale_stringn (str, -1);
} }
SCM SCM
@ -1515,11 +1520,6 @@ scm_from_locale_stringn (const char *str, size_t len)
SCM inport; SCM inport;
scm_t_port *pt; scm_t_port *pt;
if (len == (size_t) -1)
len = strlen (str);
if (len == 0)
return scm_nullstr;
inport = scm_current_input_port (); inport = scm_current_input_port ();
if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport)) if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
{ {
@ -1537,20 +1537,27 @@ scm_from_locale_stringn (const char *str, size_t len)
} }
SCM SCM
scm_from_locale_string (const char *str) scm_from_latin1_string (const char *str)
{ {
if (str == NULL) return scm_from_latin1_stringn (str, -1);
return scm_nullstr;
return scm_from_locale_stringn (str, -1);
} }
SCM SCM
scm_i_from_utf8_string (const scm_t_uint8 *str) scm_from_latin1_stringn (const char *str, size_t len)
{ {
return scm_from_stringn ((const char *) str, return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
strlen ((char *) str), "UTF-8", }
SCM_FAILED_CONVERSION_ERROR);
SCM
scm_from_utf8_string (const char *str)
{
return scm_from_utf8_stringn (str, -1);
}
SCM
scm_from_utf8_stringn (const char *str, size_t len)
{
return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
} }
/* Create a new scheme string from the C string STR. The memory of /* Create a new scheme string from the C string STR. The memory of
@ -1707,9 +1714,9 @@ scm_i_unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
} }
char * char *
scm_to_latin1_stringn (SCM str, size_t *lenp) scm_to_locale_string (SCM str)
{ {
return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR); return scm_to_locale_stringn (str, NULL);
} }
char * char *
@ -1733,6 +1740,30 @@ scm_to_locale_stringn (SCM str, size_t *lenp)
scm_i_get_conversion_strategy (SCM_BOOL_F)); scm_i_get_conversion_strategy (SCM_BOOL_F));
} }
char *
scm_to_latin1_string (SCM str)
{
return scm_to_latin1_stringn (str, NULL);
}
char *
scm_to_latin1_stringn (SCM str, size_t *lenp)
{
return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
}
char *
scm_to_utf8_string (SCM str)
{
return scm_to_utf8_stringn (str, NULL);
}
char *
scm_to_utf8_stringn (SCM str, size_t *lenp)
{
return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
}
/* Return a malloc(3)-allocated buffer containing the contents of STR encoded /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
according to ENCODING. If LENP is non-NULL, set it to the size in bytes of according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
the returned buffer. If the conversion to ENCODING fails, apply the strategy the returned buffer. If the conversion to ENCODING fails, apply the strategy
@ -1845,20 +1876,6 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
return buf; return buf;
} }
char *
scm_to_locale_string (SCM str)
{
return scm_to_locale_stringn (str, NULL);
}
scm_t_uint8 *
scm_i_to_utf8_string (SCM str)
{
char *u8str;
u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
return (scm_t_uint8 *) u8str;
}
size_t size_t
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len) scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
{ {

View file

@ -3,7 +3,7 @@
#ifndef SCM_STRINGS_H #ifndef SCM_STRINGS_H
#define SCM_STRINGS_H #define SCM_STRINGS_H
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. /* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License * modify it under the terms of the GNU Lesser General Public License
@ -125,18 +125,31 @@ SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end); SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end); SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len); /* Use locale encoding for user input, user output, or interacting with
the C library. Use latin1 for ASCII, and for literals in source
code. Use utf8 for interaction with modern libraries which deal in
UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a
specific encoding. */
SCM_API SCM scm_from_locale_string (const char *str); SCM_API SCM scm_from_locale_string (const char *str);
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len); SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
SCM_API SCM scm_take_locale_string (char *str); SCM_API SCM scm_take_locale_string (char *str);
SCM_API SCM scm_take_locale_stringn (char *str, size_t len); SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
SCM_API char *scm_to_locale_string (SCM str); SCM_API char *scm_to_locale_string (SCM str);
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp); SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_latin1_string (const char *str);
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
SCM_API char *scm_to_latin1_string (SCM str);
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
SCM_API char *scm_to_utf8_string (SCM str);
SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf8_string (const char *str);
SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len);
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
scm_t_string_failed_conversion_handler handler); scm_t_string_failed_conversion_handler handler);
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
SCM_API SCM scm_string_normalize_nfd (SCM str); SCM_API SCM scm_string_normalize_nfd (SCM str);