1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

scm_i_utf8_string_hash: compute u8 chars not bytes

Noticed while investigating a migration to utf-8 strings.  After making
changes that routed non-ascii symbol hashing through this function,
encoding-iso88597.test began intermittently failing because it would
traverse trailing garbage when u8_strnlen reported 8 chars instead of 4.

Change the scm_i_str2symbol and scm_i_str2uninterned_symbol internal
hash type to unsigned long to explicitly match the scm_i_string_hash
result type.

* libguile/hash.c (scm_i_utf8_string_hash): Call u8_mbsnlen not u8_strnlen.
* libguile/symbols.c (scm_i_str2symbol, scm_i_str2uninterned_symbol):
Use unsigned long for scm_i_string_hash result.
* test-suite/standalone/.gitignore: Add test-hashing.
* test-suite/standalone/Makefile.am: Add test-hashing.
* test-suite/standalone/test-hashing.c: Add.
This commit is contained in:
Rob Browning 2023-03-12 14:26:10 -05:00
parent f0df1ed0fd
commit ffb95239aa
6 changed files with 86 additions and 3 deletions

View file

@ -239,7 +239,7 @@ static SCM
scm_i_str2symbol (SCM str)
{
SCM symbol;
size_t raw_hash = scm_i_string_hash (str);
unsigned long raw_hash = scm_i_string_hash (str);
symbol = lookup_interned_symbol (str, raw_hash);
if (scm_is_true (symbol))
@ -261,7 +261,7 @@ scm_i_str2symbol (SCM str)
static SCM
scm_i_str2uninterned_symbol (SCM str)
{
size_t raw_hash = scm_i_string_hash (str);
unsigned long raw_hash = scm_i_string_hash (str);
return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash);
}