1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Strings, i18n: Limit the use of alloca to approximately 8 kilobytes.

* libguile/i18n.c (SCM_MAX_ALLOCA): New macro.
(SCM_STRING_TO_U32_BUF): Accept an additional variable to remember
whether we used malloc to allocate the buffer.  Use malloc if the
allocation size is greater than SCM_MAX_ALLOCA.
(SCM_CLEANUP_U32_BUF): New macro.
(compare_u32_strings, compare_u32_strings_ci, str_to_case): Adapt.
* libguile/strings.c (SCM_MAX_ALLOCA): New macro.
(normalize_str, unistring_escapes_to_r6rs_escapes): Use malloc if the
allocation size is greater than SCM_MAX_ALLOCA.
* test-suite/tests/i18n.test, test-suite/tests/strings.test: Add tests.
This commit is contained in:
Mark H Weaver 2019-05-06 21:11:26 -04:00 committed by Andy Wingo
parent 2bfa4f73f1
commit bd50407d1f
4 changed files with 107 additions and 37 deletions

View file

@ -51,6 +51,10 @@
#include "i18n.h" #include "i18n.h"
#ifndef SCM_MAX_ALLOCA
# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
#endif
#if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE #if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE
/* The GNU thread-aware locale API is documented in ``Thread-Aware Locale /* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
Model, a Proposal'', by Ulrich Drepper: Model, a Proposal'', by Ulrich Drepper:
@ -752,23 +756,35 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
A similar API can be found in MzScheme starting from version 200: A similar API can be found in MzScheme starting from version 200:
http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */ http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
#define SCM_STRING_TO_U32_BUF(s1, c_s1) \ #define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \
do \ do \
{ \ { \
if (scm_i_is_narrow_string (s1)) \ if (scm_i_is_narrow_string (str)) \
{ \ { \
size_t i, len; \ size_t i, len, bytes; \
const char *buf = scm_i_string_chars (s1); \ const char *buf = scm_i_string_chars (str); \
\ \
len = scm_i_string_length (s1); \ len = scm_i_string_length (str); \
c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \ bytes = (len + 1) * sizeof (scm_t_wchar); \
\ c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \
for (i = 0; i < len; i ++) \ c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \
c_s1[i] = (unsigned char ) buf[i]; \ \
c_s1[len] = 0; \ for (i = 0; i < len; i ++) \
} \ c_str[i] = (unsigned char ) buf[i]; \
else \ c_str[len] = 0; \
c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \ } \
else \
{ \
c_str_malloc_p = 0; \
c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \
} \
} while (0)
#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \
do \
{ \
if (c_str_malloc_p) \
free (c_str); \
} while (0) } while (0)
@ -782,10 +798,11 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
int result; int result;
scm_t_locale c_locale; scm_t_locale c_locale;
scm_t_wchar *c_s1, *c_s2; scm_t_wchar *c_s1, *c_s2;
int c_s1_malloc_p, c_s2_malloc_p;
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
SCM_STRING_TO_U32_BUF (s1, c_s1); SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
SCM_STRING_TO_U32_BUF (s2, c_s2); SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
if (c_locale) if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale, RUN_IN_LOCALE_SECTION (c_locale,
@ -795,6 +812,9 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
result = u32_strcoll ((const uint32_t *) c_s1, result = u32_strcoll ((const uint32_t *) c_s1,
(const uint32_t *) c_s2); (const uint32_t *) c_s2);
SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
scm_remember_upto_here_2 (s1, s2); scm_remember_upto_here_2 (s1, s2);
scm_remember_upto_here (locale); scm_remember_upto_here (locale);
return result; return result;
@ -837,10 +857,11 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
int result, ret = 0; int result, ret = 0;
scm_t_locale c_locale; scm_t_locale c_locale;
scm_t_wchar *c_s1, *c_s2; scm_t_wchar *c_s1, *c_s2;
int c_s1_malloc_p, c_s2_malloc_p;
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
SCM_STRING_TO_U32_BUF (s1, c_s1); SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
SCM_STRING_TO_U32_BUF (s2, c_s2); SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
if (c_locale) if (c_locale)
RUN_IN_LOCALE_SECTION RUN_IN_LOCALE_SECTION
@ -855,6 +876,9 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
(const uint32_t *) c_s2, (const uint32_t *) c_s2,
&result); &result);
SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
if (SCM_UNLIKELY (ret != 0)) if (SCM_UNLIKELY (ret != 0))
{ {
errno = ret; errno = ret;
@ -1221,13 +1245,13 @@ str_to_case (SCM str, scm_t_locale c_locale,
scm_t_wchar *c_str, *c_buf; scm_t_wchar *c_str, *c_buf;
uint32_t *c_convstr; uint32_t *c_convstr;
size_t len, convlen; size_t len, convlen;
int ret; int ret, c_str_malloc_p;
SCM convstr; SCM convstr;
len = scm_i_string_length (str); len = scm_i_string_length (str);
if (len == 0) if (len == 0)
return scm_nullstr; return scm_nullstr;
SCM_STRING_TO_U32_BUF (str, c_str); SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p);
if (c_locale) if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale, ret = RUN_IN_LOCALE_SECTION (c_locale, ret =
@ -1239,6 +1263,8 @@ str_to_case (SCM str, scm_t_locale c_locale,
u32_locale_tocase ((uint32_t *) c_str, len, u32_locale_tocase ((uint32_t *) c_str, len,
&c_convstr, &convlen, func); &c_convstr, &convlen, func);
SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p);
scm_remember_upto_here (str); scm_remember_upto_here (str);
if (SCM_UNLIKELY (ret != 0)) if (SCM_UNLIKELY (ret != 0))

View file

@ -50,6 +50,10 @@
#include "strings.h" #include "strings.h"
#ifndef SCM_MAX_ALLOCA
# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
#endif
/* {Strings} /* {Strings}
@ -1813,6 +1817,7 @@ static void
unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp) unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
{ {
char *before, *after; char *before, *after;
int malloc_p;
size_t i, j; size_t i, j;
/* The worst case is if the input string contains all 4-digit hex escapes. /* The worst case is if the input string contains all 4-digit hex escapes.
"\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */ "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
@ -1820,7 +1825,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
size_t nzeros, ndigits; size_t nzeros, ndigits;
before = buf; before = buf;
after = alloca (max_out_len); malloc_p = (max_out_len > SCM_MAX_ALLOCA);
after = malloc_p ? malloc (max_out_len) : alloca (max_out_len);
i = 0; i = 0;
j = 0; j = 0;
while (i < *lenp) while (i < *lenp)
@ -1878,6 +1884,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
} }
*lenp = j; *lenp = j;
memcpy (before, after, j); memcpy (before, after, j);
if (malloc_p)
free (after);
} }
char * char *
@ -2318,28 +2326,37 @@ normalize_str (SCM string, uninorm_t form)
{ {
SCM ret; SCM ret;
uint32_t *w_str; uint32_t *w_str;
uint32_t *w_norm_str;
scm_t_wchar *cbuf; scm_t_wchar *cbuf;
size_t rlen, len = scm_i_string_length (string); int malloc_p;
size_t norm_len, len = scm_i_string_length (string);
if (scm_i_is_narrow_string (string)) if (scm_i_is_narrow_string (string))
{ {
size_t i; size_t i, bytes;
const char *buf = scm_i_string_chars (string); const char *buf = scm_i_string_chars (string);
w_str = alloca (sizeof (scm_t_wchar) * (len + 1)); bytes = (len + 1) * sizeof (scm_t_wchar);
malloc_p = (bytes > SCM_MAX_ALLOCA);
w_str = malloc_p ? malloc (bytes) : alloca (bytes);
for (i = 0; i < len; i ++) for (i = 0; i < len; i ++)
w_str[i] = (unsigned char) buf[i]; w_str[i] = (unsigned char) buf[i];
w_str[len] = 0; w_str[len] = 0;
} }
else else
w_str = (uint32_t *) scm_i_string_wide_chars (string); {
malloc_p = 0;
w_str = (uint32_t *) scm_i_string_wide_chars (string);
}
w_str = u32_normalize (form, w_str, len, NULL, &rlen); w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len);
ret = scm_i_make_wide_string (rlen, &cbuf, 0); ret = scm_i_make_wide_string (norm_len, &cbuf, 0);
u32_cpy ((uint32_t *) cbuf, w_str, rlen); u32_cpy ((uint32_t *) cbuf, w_norm_str, norm_len);
free (w_str); free (w_norm_str);
if (malloc_p)
free (w_str);
scm_i_try_narrow_string (ret); scm_i_try_narrow_string (ret);

View file

@ -78,7 +78,13 @@
(pass-if "string-locale-ci<?" (pass-if "string-locale-ci<?"
(and (string-locale-ci<? "hello" "WORLD") (and (string-locale-ci<? "hello" "WORLD")
(string-locale-ci<? "hello" "WORLD" (string-locale-ci<? "hello" "WORLD"
(make-locale (list LC_COLLATE) "C"))))) (make-locale (list LC_COLLATE) "C"))))
(pass-if "large strings"
;; In Guile <= 2.2.4, these would overflow the C stack and crash.
(let ((large (make-string 4000000 #\a)))
(and (string-locale-ci=? large large)
(not (string-locale-ci<? large large))
(not (string-locale<? large large))))))
(define mingw? (define mingw?
@ -333,6 +339,15 @@
(string=? "Hello, World" (string-locale-titlecase (string=? "Hello, World" (string-locale-titlecase
"hello, world" (make-locale LC_ALL "C"))))) "hello, world" (make-locale LC_ALL "C")))))
(pass-if "large strings"
;; In Guile <= 2.2.4, these would overflow the C stack and crash.
(let ((hellos (string-join (make-list 700000 "hello")))
(HELLOs (string-join (make-list 700000 "HELLO")))
(Hellos (string-join (make-list 700000 "Hello"))))
(and (string=? hellos (string-locale-downcase Hellos))
(string=? HELLOs (string-locale-upcase Hellos))
(string=? Hellos (string-locale-titlecase hellos)))))
(pass-if "string-locale-upcase German" (pass-if "string-locale-upcase German"
(under-german-utf8-locale-or-unresolved (under-german-utf8-locale-or-unresolved
(lambda () (lambda ()

View file

@ -471,6 +471,18 @@
(pass-if "compatibility composition is equal?" (pass-if "compatibility composition is equal?"
(equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69"))) (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
;;
;; normalizing large strings
;;
(pass-if "string-normalize-{nfd,nfc,nfkd,nfkc} on large strings"
;; In Guile <= 2.2.4, these would overflow the C stack and crash.
(let ((large (make-string 4000000 #\a)))
(and (string=? large (string-normalize-nfd large))
(string=? large (string-normalize-nfc large))
(string=? large (string-normalize-nfkd large))
(string=? large (string-normalize-nfkc large)))))
;; ;;
;; string-utf8-length ;; string-utf8-length
;; ;;