mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
Strings, i18n: Limit the use of alloca to approximately 8 kilobytes.
* libguile/i18n.c (SCM_MAX_ALLOCA): New macro. (SCM_STRING_TO_U32_BUF): Accept an additional variable to remember whether we used malloc to allocate the buffer. Use malloc if the allocation size is greater than SCM_MAX_ALLOCA. (SCM_CLEANUP_U32_BUF): New macro. (compare_u32_strings, compare_u32_strings_ci, str_to_case): Adapt. * libguile/strings.c (SCM_MAX_ALLOCA): New macro. (normalize_str, unistring_escapes_to_r6rs_escapes): Use malloc if the allocation size is greater than SCM_MAX_ALLOCA. * test-suite/tests/i18n.test, test-suite/tests/strings.test: Add tests.
This commit is contained in:
parent
2bfa4f73f1
commit
bd50407d1f
4 changed files with 107 additions and 37 deletions
|
@ -51,6 +51,10 @@
|
||||||
|
|
||||||
#include "i18n.h"
|
#include "i18n.h"
|
||||||
|
|
||||||
|
#ifndef SCM_MAX_ALLOCA
|
||||||
|
# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE
|
#if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE
|
||||||
/* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
|
/* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
|
||||||
Model, a Proposal'', by Ulrich Drepper:
|
Model, a Proposal'', by Ulrich Drepper:
|
||||||
|
@ -752,23 +756,35 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
|
||||||
A similar API can be found in MzScheme starting from version 200:
|
A similar API can be found in MzScheme starting from version 200:
|
||||||
http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
|
http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
|
||||||
|
|
||||||
#define SCM_STRING_TO_U32_BUF(s1, c_s1) \
|
#define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \
|
||||||
do \
|
do \
|
||||||
{ \
|
{ \
|
||||||
if (scm_i_is_narrow_string (s1)) \
|
if (scm_i_is_narrow_string (str)) \
|
||||||
{ \
|
{ \
|
||||||
size_t i, len; \
|
size_t i, len, bytes; \
|
||||||
const char *buf = scm_i_string_chars (s1); \
|
const char *buf = scm_i_string_chars (str); \
|
||||||
\
|
\
|
||||||
len = scm_i_string_length (s1); \
|
len = scm_i_string_length (str); \
|
||||||
c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \
|
bytes = (len + 1) * sizeof (scm_t_wchar); \
|
||||||
\
|
c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \
|
||||||
for (i = 0; i < len; i ++) \
|
c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \
|
||||||
c_s1[i] = (unsigned char ) buf[i]; \
|
\
|
||||||
c_s1[len] = 0; \
|
for (i = 0; i < len; i ++) \
|
||||||
} \
|
c_str[i] = (unsigned char ) buf[i]; \
|
||||||
else \
|
c_str[len] = 0; \
|
||||||
c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
c_str_malloc_p = 0; \
|
||||||
|
c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (c_str_malloc_p) \
|
||||||
|
free (c_str); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
@ -782,10 +798,11 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
|
||||||
int result;
|
int result;
|
||||||
scm_t_locale c_locale;
|
scm_t_locale c_locale;
|
||||||
scm_t_wchar *c_s1, *c_s2;
|
scm_t_wchar *c_s1, *c_s2;
|
||||||
|
int c_s1_malloc_p, c_s2_malloc_p;
|
||||||
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
|
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
|
||||||
|
|
||||||
SCM_STRING_TO_U32_BUF (s1, c_s1);
|
SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
|
||||||
SCM_STRING_TO_U32_BUF (s2, c_s2);
|
SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
|
||||||
|
|
||||||
if (c_locale)
|
if (c_locale)
|
||||||
RUN_IN_LOCALE_SECTION (c_locale,
|
RUN_IN_LOCALE_SECTION (c_locale,
|
||||||
|
@ -795,6 +812,9 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
|
||||||
result = u32_strcoll ((const uint32_t *) c_s1,
|
result = u32_strcoll ((const uint32_t *) c_s1,
|
||||||
(const uint32_t *) c_s2);
|
(const uint32_t *) c_s2);
|
||||||
|
|
||||||
|
SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
|
||||||
|
SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
|
||||||
|
|
||||||
scm_remember_upto_here_2 (s1, s2);
|
scm_remember_upto_here_2 (s1, s2);
|
||||||
scm_remember_upto_here (locale);
|
scm_remember_upto_here (locale);
|
||||||
return result;
|
return result;
|
||||||
|
@ -837,10 +857,11 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
|
||||||
int result, ret = 0;
|
int result, ret = 0;
|
||||||
scm_t_locale c_locale;
|
scm_t_locale c_locale;
|
||||||
scm_t_wchar *c_s1, *c_s2;
|
scm_t_wchar *c_s1, *c_s2;
|
||||||
|
int c_s1_malloc_p, c_s2_malloc_p;
|
||||||
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
|
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
|
||||||
|
|
||||||
SCM_STRING_TO_U32_BUF (s1, c_s1);
|
SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
|
||||||
SCM_STRING_TO_U32_BUF (s2, c_s2);
|
SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
|
||||||
|
|
||||||
if (c_locale)
|
if (c_locale)
|
||||||
RUN_IN_LOCALE_SECTION
|
RUN_IN_LOCALE_SECTION
|
||||||
|
@ -855,6 +876,9 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
|
||||||
(const uint32_t *) c_s2,
|
(const uint32_t *) c_s2,
|
||||||
&result);
|
&result);
|
||||||
|
|
||||||
|
SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
|
||||||
|
SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
|
||||||
|
|
||||||
if (SCM_UNLIKELY (ret != 0))
|
if (SCM_UNLIKELY (ret != 0))
|
||||||
{
|
{
|
||||||
errno = ret;
|
errno = ret;
|
||||||
|
@ -1221,13 +1245,13 @@ str_to_case (SCM str, scm_t_locale c_locale,
|
||||||
scm_t_wchar *c_str, *c_buf;
|
scm_t_wchar *c_str, *c_buf;
|
||||||
uint32_t *c_convstr;
|
uint32_t *c_convstr;
|
||||||
size_t len, convlen;
|
size_t len, convlen;
|
||||||
int ret;
|
int ret, c_str_malloc_p;
|
||||||
SCM convstr;
|
SCM convstr;
|
||||||
|
|
||||||
len = scm_i_string_length (str);
|
len = scm_i_string_length (str);
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
return scm_nullstr;
|
return scm_nullstr;
|
||||||
SCM_STRING_TO_U32_BUF (str, c_str);
|
SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p);
|
||||||
|
|
||||||
if (c_locale)
|
if (c_locale)
|
||||||
RUN_IN_LOCALE_SECTION (c_locale, ret =
|
RUN_IN_LOCALE_SECTION (c_locale, ret =
|
||||||
|
@ -1239,6 +1263,8 @@ str_to_case (SCM str, scm_t_locale c_locale,
|
||||||
u32_locale_tocase ((uint32_t *) c_str, len,
|
u32_locale_tocase ((uint32_t *) c_str, len,
|
||||||
&c_convstr, &convlen, func);
|
&c_convstr, &convlen, func);
|
||||||
|
|
||||||
|
SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p);
|
||||||
|
|
||||||
scm_remember_upto_here (str);
|
scm_remember_upto_here (str);
|
||||||
|
|
||||||
if (SCM_UNLIKELY (ret != 0))
|
if (SCM_UNLIKELY (ret != 0))
|
||||||
|
|
|
@ -50,6 +50,10 @@
|
||||||
#include "strings.h"
|
#include "strings.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef SCM_MAX_ALLOCA
|
||||||
|
# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* {Strings}
|
/* {Strings}
|
||||||
|
@ -1813,6 +1817,7 @@ static void
|
||||||
unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
|
unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
|
||||||
{
|
{
|
||||||
char *before, *after;
|
char *before, *after;
|
||||||
|
int malloc_p;
|
||||||
size_t i, j;
|
size_t i, j;
|
||||||
/* The worst case is if the input string contains all 4-digit hex escapes.
|
/* The worst case is if the input string contains all 4-digit hex escapes.
|
||||||
"\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
|
"\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
|
||||||
|
@ -1820,7 +1825,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
|
||||||
size_t nzeros, ndigits;
|
size_t nzeros, ndigits;
|
||||||
|
|
||||||
before = buf;
|
before = buf;
|
||||||
after = alloca (max_out_len);
|
malloc_p = (max_out_len > SCM_MAX_ALLOCA);
|
||||||
|
after = malloc_p ? malloc (max_out_len) : alloca (max_out_len);
|
||||||
i = 0;
|
i = 0;
|
||||||
j = 0;
|
j = 0;
|
||||||
while (i < *lenp)
|
while (i < *lenp)
|
||||||
|
@ -1878,6 +1884,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
|
||||||
}
|
}
|
||||||
*lenp = j;
|
*lenp = j;
|
||||||
memcpy (before, after, j);
|
memcpy (before, after, j);
|
||||||
|
if (malloc_p)
|
||||||
|
free (after);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *
|
char *
|
||||||
|
@ -2318,28 +2326,37 @@ normalize_str (SCM string, uninorm_t form)
|
||||||
{
|
{
|
||||||
SCM ret;
|
SCM ret;
|
||||||
uint32_t *w_str;
|
uint32_t *w_str;
|
||||||
|
uint32_t *w_norm_str;
|
||||||
scm_t_wchar *cbuf;
|
scm_t_wchar *cbuf;
|
||||||
size_t rlen, len = scm_i_string_length (string);
|
int malloc_p;
|
||||||
|
size_t norm_len, len = scm_i_string_length (string);
|
||||||
|
|
||||||
if (scm_i_is_narrow_string (string))
|
if (scm_i_is_narrow_string (string))
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i, bytes;
|
||||||
const char *buf = scm_i_string_chars (string);
|
const char *buf = scm_i_string_chars (string);
|
||||||
|
|
||||||
w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
|
bytes = (len + 1) * sizeof (scm_t_wchar);
|
||||||
|
malloc_p = (bytes > SCM_MAX_ALLOCA);
|
||||||
|
w_str = malloc_p ? malloc (bytes) : alloca (bytes);
|
||||||
|
|
||||||
for (i = 0; i < len; i ++)
|
for (i = 0; i < len; i ++)
|
||||||
w_str[i] = (unsigned char) buf[i];
|
w_str[i] = (unsigned char) buf[i];
|
||||||
w_str[len] = 0;
|
w_str[len] = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
w_str = (uint32_t *) scm_i_string_wide_chars (string);
|
{
|
||||||
|
malloc_p = 0;
|
||||||
|
w_str = (uint32_t *) scm_i_string_wide_chars (string);
|
||||||
|
}
|
||||||
|
|
||||||
w_str = u32_normalize (form, w_str, len, NULL, &rlen);
|
w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len);
|
||||||
|
|
||||||
ret = scm_i_make_wide_string (rlen, &cbuf, 0);
|
ret = scm_i_make_wide_string (norm_len, &cbuf, 0);
|
||||||
u32_cpy ((uint32_t *) cbuf, w_str, rlen);
|
u32_cpy ((uint32_t *) cbuf, w_norm_str, norm_len);
|
||||||
free (w_str);
|
free (w_norm_str);
|
||||||
|
if (malloc_p)
|
||||||
|
free (w_str);
|
||||||
|
|
||||||
scm_i_try_narrow_string (ret);
|
scm_i_try_narrow_string (ret);
|
||||||
|
|
||||||
|
|
|
@ -78,7 +78,13 @@
|
||||||
(pass-if "string-locale-ci<?"
|
(pass-if "string-locale-ci<?"
|
||||||
(and (string-locale-ci<? "hello" "WORLD")
|
(and (string-locale-ci<? "hello" "WORLD")
|
||||||
(string-locale-ci<? "hello" "WORLD"
|
(string-locale-ci<? "hello" "WORLD"
|
||||||
(make-locale (list LC_COLLATE) "C")))))
|
(make-locale (list LC_COLLATE) "C"))))
|
||||||
|
(pass-if "large strings"
|
||||||
|
;; In Guile <= 2.2.4, these would overflow the C stack and crash.
|
||||||
|
(let ((large (make-string 4000000 #\a)))
|
||||||
|
(and (string-locale-ci=? large large)
|
||||||
|
(not (string-locale-ci<? large large))
|
||||||
|
(not (string-locale<? large large))))))
|
||||||
|
|
||||||
|
|
||||||
(define mingw?
|
(define mingw?
|
||||||
|
@ -333,6 +339,15 @@
|
||||||
(string=? "Hello, World" (string-locale-titlecase
|
(string=? "Hello, World" (string-locale-titlecase
|
||||||
"hello, world" (make-locale LC_ALL "C")))))
|
"hello, world" (make-locale LC_ALL "C")))))
|
||||||
|
|
||||||
|
(pass-if "large strings"
|
||||||
|
;; In Guile <= 2.2.4, these would overflow the C stack and crash.
|
||||||
|
(let ((hellos (string-join (make-list 700000 "hello")))
|
||||||
|
(HELLOs (string-join (make-list 700000 "HELLO")))
|
||||||
|
(Hellos (string-join (make-list 700000 "Hello"))))
|
||||||
|
(and (string=? hellos (string-locale-downcase Hellos))
|
||||||
|
(string=? HELLOs (string-locale-upcase Hellos))
|
||||||
|
(string=? Hellos (string-locale-titlecase hellos)))))
|
||||||
|
|
||||||
(pass-if "string-locale-upcase German"
|
(pass-if "string-locale-upcase German"
|
||||||
(under-german-utf8-locale-or-unresolved
|
(under-german-utf8-locale-or-unresolved
|
||||||
(lambda ()
|
(lambda ()
|
||||||
|
|
|
@ -471,6 +471,18 @@
|
||||||
(pass-if "compatibility composition is equal?"
|
(pass-if "compatibility composition is equal?"
|
||||||
(equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
|
(equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; normalizing large strings
|
||||||
|
;;
|
||||||
|
|
||||||
|
(pass-if "string-normalize-{nfd,nfc,nfkd,nfkc} on large strings"
|
||||||
|
;; In Guile <= 2.2.4, these would overflow the C stack and crash.
|
||||||
|
(let ((large (make-string 4000000 #\a)))
|
||||||
|
(and (string=? large (string-normalize-nfd large))
|
||||||
|
(string=? large (string-normalize-nfc large))
|
||||||
|
(string=? large (string-normalize-nfkd large))
|
||||||
|
(string=? large (string-normalize-nfkc large)))))
|
||||||
|
|
||||||
;;
|
;;
|
||||||
;; string-utf8-length
|
;; string-utf8-length
|
||||||
;;
|
;;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue