mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-29 19:30:36 +02:00
scm_i_utf8_string_hash: don't overrun when len is zero
When the length is zero, the previous code would include the byte after the end of the string in the hash. Fix that (the wide and narrow hashers also guard against it via "case 0"), and don't bother mutating length for the trailing bytes. Since we already compute the char length, use that to detect all ASCII strings and follow the same narrow string path that we do for latin-1. libguile/hash.c (scm_i_utf8_string_hash): avoid overrun when len == 0.
This commit is contained in:
parent
c45e1f57ec
commit
35f13806af
1 changed files with 16 additions and 14 deletions
|
@ -169,25 +169,26 @@ scm_i_latin1_string_hash (const char *str, size_t len)
|
|||
unsigned long
|
||||
scm_i_utf8_string_hash (const char *str, size_t len)
|
||||
{
|
||||
const uint8_t *end, *ustr = (const uint8_t *) str;
|
||||
unsigned long ret;
|
||||
|
||||
/* The length of the string in characters. This name corresponds to
|
||||
Jenkins' original name. */
|
||||
size_t length;
|
||||
|
||||
uint32_t a, b, c, u32;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
|
||||
end = ustr + len;
|
||||
// FIXME: eventually make fewer passes over str
|
||||
|
||||
const uint8_t *ustr = (const uint8_t *) str;
|
||||
if (u8_check (ustr, len) != NULL)
|
||||
/* Invalid UTF-8; punt. */
|
||||
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
|
||||
|
||||
length = u8_mbsnlen (ustr, len);
|
||||
/* The length of the string in characters. This name corresponds to
|
||||
Jenkins' original name. */
|
||||
size_t length = u8_mbsnlen (ustr, len);
|
||||
|
||||
if (len == length) // ascii, same as narrow_string_hash above
|
||||
return narrow_string_hash ((uint8_t *) str, len);
|
||||
|
||||
const uint8_t * const end = ustr + len;
|
||||
uint32_t a, b, c, u32;
|
||||
unsigned long ret;
|
||||
|
||||
/* Set up the internal state. */
|
||||
a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + 47;
|
||||
|
@ -205,14 +206,15 @@ scm_i_utf8_string_hash (const char *str, size_t len)
|
|||
length -= 3;
|
||||
}
|
||||
|
||||
/* Handle the last 3 elements's. */
|
||||
// Similar to narrow_string_hash(). Handle the last 3 chars; length
|
||||
// cannot be zero because len != length above.
|
||||
ustr += u8_mbtouc (&u32, ustr, end - ustr);
|
||||
a += u32;
|
||||
if (--length)
|
||||
if (length > 1)
|
||||
{
|
||||
ustr += u8_mbtouc (&u32, ustr, end - ustr);
|
||||
b += u32;
|
||||
if (--length)
|
||||
if (length > 2)
|
||||
{
|
||||
ustr += u8_mbtouc (&u32, ustr, end - ustr);
|
||||
c += u32;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue