1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-10 14:00:21 +02:00

add hash functions for locale, latin1, and utf8 strings

* libguile/hash.c (scm_i_locale_string_hash)
  (scm_i_latin1_string_hash, scm_i_utf8_string_hash): New functions.
This commit is contained in:
Andy Wingo 2011-01-05 18:43:28 -08:00
parent f756cd3076
commit 622415380c
2 changed files with 87 additions and 1 deletions

View file

@ -22,6 +22,12 @@
# include <config.h>
#endif
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#include <unistr.h>
#include "libguile/_scm.h"
#include "libguile/chars.h"
#include "libguile/ports.h"
@ -64,6 +70,79 @@ scm_i_string_hash (SCM str)
return h;
}
unsigned long
scm_i_locale_string_hash (const char *str, size_t len)
{
#ifdef HAVE_WCHAR_H
mbstate_t state;
wchar_t c;
size_t byte_idx = 0, nbytes;
unsigned long h = 0;
if (len == (size_t) -1)
len = strlen (str);
while ((nbytes = mbrtowc (&c, str + byte_idx, len - byte_idx, &state)) > 0)
{
if (nbytes >= (size_t) -2)
/* Invalid input string; punt. */
return scm_i_string_hash (scm_from_locale_stringn (str, len));
h = (unsigned long) c + h * 37;
byte_idx += nbytes;
}
return h;
#else
return scm_i_string_hash (scm_from_locale_stringn (str, len));
#endif
}
unsigned long
scm_i_latin1_string_hash (const char *str, size_t len)
{
const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
size_t i = 0;
unsigned long h = 0;
if (len == (size_t) -1)
len = strlen (str);
for (; i < len; i++)
h = (unsigned long) ustr[i] + h * 37;
return h;
}
unsigned long
scm_i_utf8_string_hash (const char *str, size_t len)
{
const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
size_t byte_idx = 0;
unsigned long h = 0;
if (len == (size_t) -1)
len = strlen (str);
while (byte_idx < len)
{
ucs4_t c;
int nbytes;
nbytes = u8_mbtouc (&c, ustr + byte_idx, len - byte_idx);
if (nbytes == 0)
break;
else if (nbytes < 0)
/* Bad UTF-8; punt. */
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
h = (unsigned long) c + h * 37;
byte_idx += nbytes;
}
return h;
}
/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
/* Dirk:FIXME:: scm_hasher could be made static. */

View file

@ -3,7 +3,7 @@
#ifndef SCM_HASH_H
#define SCM_HASH_H
/* Copyright (C) 1995,1996,2000, 2006, 2008 Free Software Foundation, Inc.
/* Copyright (C) 1995,1996,2000, 2006, 2008, 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@ -28,6 +28,13 @@
SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
SCM_INTERNAL unsigned long scm_i_locale_string_hash (const char *str,
size_t len);
SCM_INTERNAL unsigned long scm_i_latin1_string_hash (const char *str,
size_t len);
SCM_INTERNAL unsigned long scm_i_utf8_string_hash (const char *str,
size_t len);
SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);