mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-10 14:00:21 +02:00
Modify socket and time functions for wide strings
* libguile/socket.c (scm_recv): receive the message without holding the stringbuf writing lock (scm_send): try to narrow a string before using it * libguile/stime.c (strftime): convert string to UTF-8 so that it can be safely passed to strftime (strptime): convert input string to UTF-8 so that it can be safely passed through strptime * libguile/strings.c (narrow_stringbuf): new function (scm_i_try_narrow_string): new function * libguile/strings.h: new declaration for scm_i_try_narrow_string
This commit is contained in:
parent
27646f414e
commit
587a33556f
5 changed files with 200 additions and 33 deletions
|
@ -33,6 +33,7 @@
|
|||
#include "libguile/strings.h"
|
||||
#include "libguile/vectors.h"
|
||||
#include "libguile/dynwind.h"
|
||||
#include "libguile/srfi-13.h"
|
||||
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/socket.h"
|
||||
|
@ -1414,6 +1415,8 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
"protocols, if a packet larger than this limit is encountered\n"
|
||||
"then some data\n"
|
||||
"will be irrevocably lost.\n\n"
|
||||
"The data is assumed to be binary, and there is no decoding of\n"
|
||||
"of locale-encoded strings.\n\n"
|
||||
"The optional @var{flags} argument is a value or\n"
|
||||
"bitwise OR of MSG_OOB, MSG_PEEK, MSG_DONTROUTE etc.\n\n"
|
||||
"The value returned is the number of bytes read from the\n"
|
||||
|
@ -1428,6 +1431,7 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
int flg;
|
||||
char *dest;
|
||||
size_t len;
|
||||
SCM msg;
|
||||
|
||||
SCM_VALIDATE_OPFPORT (1, sock);
|
||||
SCM_VALIDATE_STRING (2, buf);
|
||||
|
@ -1437,16 +1441,16 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
flg = scm_to_int (flags);
|
||||
fd = SCM_FPORT_FDES (sock);
|
||||
|
||||
len = scm_i_string_length (buf);
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
dest = scm_i_string_writable_chars (buf);
|
||||
len = scm_i_string_length (buf);
|
||||
msg = scm_i_make_string (len, &dest);
|
||||
SCM_SYSCALL (rv = recv (fd, dest, len, flg));
|
||||
scm_i_string_stop_writing ();
|
||||
scm_string_copy_x (buf, scm_from_int (0),
|
||||
msg, scm_from_int (0), scm_from_size_t (len));
|
||||
|
||||
if (rv == -1)
|
||||
SCM_SYSERROR;
|
||||
|
||||
scm_remember_upto_here_1 (buf);
|
||||
scm_remember_upto_here_2 (buf, msg);
|
||||
return scm_from_int (rv);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
@ -1464,18 +1468,28 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0,
|
|||
"bitwise OR of MSG_OOB, MSG_PEEK, MSG_DONTROUTE etc.\n\n"
|
||||
"Note that the data is written directly to the socket\n"
|
||||
"file descriptor:\n"
|
||||
"any unflushed buffered port data is ignored.")
|
||||
"any unflushed buffered port data is ignored.\n\n"
|
||||
"This operation is defined only for strings containing codepoints\n"
|
||||
"zero to 255.")
|
||||
#define FUNC_NAME s_scm_send
|
||||
{
|
||||
int rv;
|
||||
int fd;
|
||||
int flg;
|
||||
const char *src;
|
||||
char *src;
|
||||
size_t len;
|
||||
|
||||
sock = SCM_COERCE_OUTPORT (sock);
|
||||
SCM_VALIDATE_OPFPORT (1, sock);
|
||||
SCM_VALIDATE_STRING (2, message);
|
||||
|
||||
/* If the string is wide, see if it can be coerced into
|
||||
a narrow string. */
|
||||
if (!scm_i_is_narrow_string (message)
|
||||
|| scm_i_try_narrow_string (message))
|
||||
SCM_MISC_ERROR ("the message string is not 8-bit: ~s",
|
||||
scm_list_1 (message));
|
||||
|
||||
if (SCM_UNBNDP (flags))
|
||||
flg = 0;
|
||||
else
|
||||
|
@ -1592,7 +1606,9 @@ SCM_DEFINE (scm_sendto, "sendto", 3, 1, 1,
|
|||
"set to be non-blocking.\n"
|
||||
"Note that the data is written directly to the socket\n"
|
||||
"file descriptor:\n"
|
||||
"any unflushed buffered port data is ignored.")
|
||||
"any unflushed buffered port data is ignored.\n"
|
||||
"This operation is defined only for strings containing codepoints\n"
|
||||
"zero to 255.")
|
||||
#define FUNC_NAME s_scm_sendto
|
||||
{
|
||||
int rv;
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <strftime.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/async.h"
|
||||
|
@ -53,6 +54,7 @@
|
|||
#include "libguile/strings.h"
|
||||
#include "libguile/vectors.h"
|
||||
#include "libguile/dynwind.h"
|
||||
#include "libguile/strings.h"
|
||||
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/stime.h"
|
||||
|
@ -624,18 +626,20 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
{
|
||||
struct tm t;
|
||||
|
||||
char *tbuf;
|
||||
scm_t_uint8 *tbuf;
|
||||
int size = 50;
|
||||
const char *fmt;
|
||||
char *myfmt;
|
||||
scm_t_uint8 *fmt;
|
||||
scm_t_uint8 *myfmt;
|
||||
int len;
|
||||
SCM result;
|
||||
|
||||
SCM_VALIDATE_STRING (1, format);
|
||||
bdtime2c (stime, &t, SCM_ARG2, FUNC_NAME);
|
||||
|
||||
fmt = scm_i_string_chars (format);
|
||||
len = scm_i_string_length (format);
|
||||
/* Convert string to UTF-8 so that non-ASCII characters in the
|
||||
format are passed through unchanged. */
|
||||
fmt = scm_i_to_utf8_string (format);
|
||||
len = strlen ((const char *) fmt);
|
||||
|
||||
/* Ugly hack: strftime can return 0 if its buffer is too small,
|
||||
but some valid time strings (e.g. "%p") can sometimes produce
|
||||
|
@ -643,9 +647,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
character to the format string, so that valid returns are always
|
||||
nonzero. */
|
||||
myfmt = scm_malloc (len+2);
|
||||
*myfmt = 'x';
|
||||
strncpy(myfmt+1, fmt, len);
|
||||
myfmt[len+1] = 0;
|
||||
*myfmt = (scm_t_uint8) 'x';
|
||||
strncpy ((char *) myfmt + 1, (const char *) fmt, len);
|
||||
myfmt[len + 1] = 0;
|
||||
scm_remember_upto_here_1 (format);
|
||||
free (fmt);
|
||||
|
||||
tbuf = scm_malloc (size);
|
||||
{
|
||||
|
@ -680,7 +686,8 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
|
||||
/* Use `nstrftime ()' from Gnulib, which supports all GNU extensions
|
||||
supported by glibc. */
|
||||
while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0)
|
||||
while ((len = nstrftime ((char *) tbuf, size,
|
||||
(const char *) myfmt, &t, 0, 0)) == 0)
|
||||
{
|
||||
free (tbuf);
|
||||
size *= 2;
|
||||
|
@ -696,7 +703,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
#endif
|
||||
}
|
||||
|
||||
result = scm_from_locale_stringn (tbuf + 1, len - 1);
|
||||
result = scm_i_from_utf8_string ((const scm_t_uint8 *) tbuf + 1);
|
||||
free (tbuf);
|
||||
free (myfmt);
|
||||
#if HAVE_STRUCT_TM_TM_ZONE
|
||||
|
@ -722,14 +729,17 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
#define FUNC_NAME s_scm_strptime
|
||||
{
|
||||
struct tm t;
|
||||
const char *fmt, *str, *rest;
|
||||
scm_t_uint8 *fmt, *str, *rest;
|
||||
size_t used_len;
|
||||
long zoff;
|
||||
|
||||
SCM_VALIDATE_STRING (1, format);
|
||||
SCM_VALIDATE_STRING (2, string);
|
||||
|
||||
fmt = scm_i_string_chars (format);
|
||||
str = scm_i_string_chars (string);
|
||||
/* Convert strings to UTF-8 so that non-ASCII characters are passed
|
||||
through unchanged. */
|
||||
fmt = scm_i_to_utf8_string (format);
|
||||
str = scm_i_to_utf8_string (string);
|
||||
|
||||
/* initialize the struct tm */
|
||||
#define tm_init(field) t.field = 0
|
||||
|
@ -751,7 +761,8 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
fields, hence the use of SCM_CRITICAL_SECTION_START. */
|
||||
t.tm_isdst = -1;
|
||||
SCM_CRITICAL_SECTION_START;
|
||||
rest = strptime (str, fmt, &t);
|
||||
rest = (scm_t_uint8 *) strptime ((const char *) str,
|
||||
(const char *) fmt, &t);
|
||||
SCM_CRITICAL_SECTION_END;
|
||||
if (rest == NULL)
|
||||
{
|
||||
|
@ -759,6 +770,9 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
instance it doesn't. Force a sensible value for our error
|
||||
message. */
|
||||
errno = EINVAL;
|
||||
scm_remember_upto_here_2 (format, string);
|
||||
free (str);
|
||||
free (fmt);
|
||||
SCM_SYSERROR;
|
||||
}
|
||||
|
||||
|
@ -770,8 +784,14 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
zoff = 0;
|
||||
#endif
|
||||
|
||||
/* Compute the number of UTF-8 characters. */
|
||||
used_len = u8_strnlen (str, rest-str);
|
||||
scm_remember_upto_here_2 (format, string);
|
||||
free (str);
|
||||
free (fmt);
|
||||
|
||||
return scm_cons (filltime (&t, zoff, NULL),
|
||||
scm_from_signed_integer (rest - str));
|
||||
scm_from_signed_integer (used_len));
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
#endif /* HAVE_STRPTIME */
|
||||
|
|
|
@ -239,6 +239,36 @@ widen_stringbuf (SCM buf)
|
|||
}
|
||||
}
|
||||
|
||||
/* Convert a stringbuf of 32-bit UCS-4-encoded characters to one
|
||||
containing 8-bit Latin-1-encoded characters, if possible. */
|
||||
static void
|
||||
narrow_stringbuf (SCM buf)
|
||||
{
|
||||
size_t i, len;
|
||||
scm_t_wchar *wmem;
|
||||
char *mem;
|
||||
|
||||
if (!STRINGBUF_WIDE (buf))
|
||||
return;
|
||||
|
||||
len = STRINGBUF_OUTLINE_LENGTH (buf);
|
||||
i = 0;
|
||||
wmem = STRINGBUF_WIDE_CHARS (buf);
|
||||
while (i < len)
|
||||
if (wmem[i++] > 0xFF)
|
||||
return;
|
||||
|
||||
mem = scm_gc_malloc (sizeof (char) * (len + 1), "string");
|
||||
for (i = 0; i < len; i++)
|
||||
mem[i] = (unsigned char) wmem[i];
|
||||
|
||||
scm_gc_free (wmem, sizeof (scm_t_wchar) * (len + 1), "string");
|
||||
|
||||
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_WIDE);
|
||||
SCM_SET_CELL_WORD_1 (buf, mem);
|
||||
SCM_SET_CELL_WORD_2 (buf, len);
|
||||
}
|
||||
|
||||
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
/* Copy-on-write strings.
|
||||
|
@ -459,6 +489,18 @@ scm_i_is_narrow_string (SCM str)
|
|||
return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
|
||||
}
|
||||
|
||||
/* Try to coerce a string to be narrow. It if is narrow already, do
|
||||
nothing. If it is wide, shrink it to narrow if none of its
|
||||
characters are above 0xFF. Return true if the string is narrow or
|
||||
was made to be narrow. */
|
||||
int
|
||||
scm_i_try_narrow_string (SCM str)
|
||||
{
|
||||
narrow_stringbuf (STRING_STRINGBUF (str));
|
||||
|
||||
return scm_i_is_narrow_string (str);
|
||||
}
|
||||
|
||||
/* Returns a pointer to the 8-bit Latin-1 encoded character array of
|
||||
STR. */
|
||||
const char *
|
||||
|
@ -623,7 +665,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
|
|||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
char *dst = scm_i_string_writable_chars (str);
|
||||
dst[p] = (char) (unsigned char) chr;
|
||||
dst[p] = chr;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -633,7 +675,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
|
|||
}
|
||||
|
||||
/* Symbols.
|
||||
|
||||
|
||||
Basic symbol creation and accessing is done here, the rest is in
|
||||
symbols.[hc]. This has been done to keep stringbufs and the
|
||||
internals of strings and string-like objects confined to this file.
|
||||
|
@ -866,7 +908,7 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
|
|||
else
|
||||
e5 = scm_cons (scm_from_locale_symbol ("read-only"),
|
||||
SCM_BOOL_F);
|
||||
|
||||
|
||||
/* Stringbuf info */
|
||||
if (!STRINGBUF_WIDE (buf))
|
||||
{
|
||||
|
@ -1426,6 +1468,80 @@ scm_from_locale_string (const char *str)
|
|||
return scm_from_locale_stringn (str, -1);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
size_t u32len, i;
|
||||
scm_t_wchar *u32;
|
||||
int wide = 0;
|
||||
SCM res;
|
||||
|
||||
u32len = 0;
|
||||
u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
|
||||
(enum iconv_ilseq_handler)
|
||||
handler,
|
||||
str, len,
|
||||
NULL,
|
||||
NULL, &u32len);
|
||||
|
||||
if (u32 == NULL)
|
||||
{
|
||||
if (errno == ENOMEM)
|
||||
scm_memory_error ("locale string conversion");
|
||||
else
|
||||
{
|
||||
/* There are invalid sequences in the input string. Since
|
||||
it is partially nonsense, what is the best strategy for
|
||||
printing it in the error message? */
|
||||
SCM errstr;
|
||||
char *dst;
|
||||
/* We'll just print it unconverted and hope for the best. */
|
||||
errstr = scm_i_make_string (len, &dst);
|
||||
memcpy (dst, str, len);
|
||||
scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
|
||||
scm_list_2 (scm_from_locale_string (encoding),
|
||||
errstr));
|
||||
scm_remember_upto_here_1 (errstr);
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < u32len)
|
||||
if (u32[i++] > 0xFF)
|
||||
{
|
||||
wide = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!wide)
|
||||
{
|
||||
char *dst;
|
||||
res = scm_i_make_string (u32len, &dst);
|
||||
for (i = 0; i < u32len; i ++)
|
||||
dst[i] = (unsigned char) u32[i];
|
||||
dst[u32len] = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_wchar *wdst;
|
||||
res = scm_i_make_wide_string (u32len, &wdst);
|
||||
u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
|
||||
wdst[u32len] = 0;
|
||||
}
|
||||
|
||||
free (u32);
|
||||
return res;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_i_from_utf8_string (const scm_t_uint8 *str)
|
||||
{
|
||||
return scm_from_stringn ((const char *) str,
|
||||
strlen ((char *) str), "UTF-8",
|
||||
SCM_FAILED_CONVERSION_ERROR);
|
||||
}
|
||||
|
||||
/* Create a new scheme string from the C string STR. The memory of
|
||||
STR may be used directly as storage for the new string. */
|
||||
SCM
|
||||
|
@ -1519,16 +1635,15 @@ scm_to_locale_stringn (SCM str, size_t * lenp)
|
|||
/* In the future, enc will hold the port's encoding. */
|
||||
enc = NULL;
|
||||
|
||||
return scm_to_stringn (str, lenp, enc,
|
||||
return scm_to_stringn (str, lenp, enc,
|
||||
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
}
|
||||
|
||||
/* Low-level scheme to C string conversion function. */
|
||||
char *
|
||||
scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
||||
scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
static const char iso[11] = "ISO-8859-1";
|
||||
char *buf;
|
||||
size_t ilen, len, i;
|
||||
|
||||
|
@ -1544,7 +1659,7 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
|||
*lenp = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
if (lenp == NULL)
|
||||
for (i = 0; i < ilen; i++)
|
||||
if (scm_i_string_ref (str, i) == '\0')
|
||||
|
@ -1570,16 +1685,16 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
buf = NULL;
|
||||
len = 0;
|
||||
buf = u32_conv_to_encoding (iso,
|
||||
buf = u32_conv_to_encoding (encoding ? encoding : "ISO-8859-1",
|
||||
(enum iconv_ilseq_handler) handler,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (str),
|
||||
ilen, NULL, NULL, &len);
|
||||
if (buf == NULL)
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (iso), str));
|
||||
scm_list_2 (scm_from_locale_string (encoding), str));
|
||||
|
||||
if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
unistring_escapes_to_guile_escapes (&buf, &len);
|
||||
|
@ -1602,6 +1717,14 @@ scm_to_locale_string (SCM str)
|
|||
return scm_to_locale_stringn (str, NULL);
|
||||
}
|
||||
|
||||
scm_t_uint8 *
|
||||
scm_i_to_utf8_string (SCM str)
|
||||
{
|
||||
char *u8str;
|
||||
u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
|
||||
return (scm_t_uint8 *) u8str;
|
||||
}
|
||||
|
||||
size_t
|
||||
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
|
||||
{
|
||||
|
|
|
@ -124,6 +124,7 @@ SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
|
|||
SCM_API int scm_is_string (SCM x);
|
||||
SCM_API SCM scm_from_locale_string (const char *str);
|
||||
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
|
||||
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
|
||||
SCM_API SCM scm_take_locale_string (char *str);
|
||||
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
|
||||
SCM_API char *scm_to_locale_string (SCM str);
|
||||
|
@ -132,6 +133,7 @@ SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
|
|||
const char *encoding,
|
||||
scm_t_string_failed_conversion_handler
|
||||
handler);
|
||||
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
|
||||
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
|
||||
|
||||
SCM_API SCM scm_makfromstrs (int argc, char **argv);
|
||||
|
@ -168,6 +170,7 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
|
|||
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
|
||||
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
|
||||
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
|
||||
SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
|
||||
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
|
||||
|
||||
|
|
|
@ -202,6 +202,11 @@
|
|||
(string=? (strftime "%Z" t)
|
||||
"ZOW")))
|
||||
|
||||
(pass-if "strftime passes wide characters"
|
||||
(let ((t (localtime (current-time))))
|
||||
(string=? (substring (strftime "\u0100%Z" t) 0 1)
|
||||
"\u0100")))
|
||||
|
||||
(with-test-prefix "C99 %z format"
|
||||
|
||||
;; %z here is quite possibly affected by the same tm:gmtoff vs current
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue