mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-10 14:00:21 +02:00
add hash functions for locale, latin1, and utf8 strings
* libguile/hash.c (scm_i_locale_string_hash) (scm_i_latin1_string_hash, scm_i_utf8_string_hash): New functions.
This commit is contained in:
parent
f756cd3076
commit
622415380c
2 changed files with 87 additions and 1 deletions
|
@ -22,6 +22,12 @@
|
|||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
#include <wchar.h>
|
||||
#endif
|
||||
|
||||
#include <unistr.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/chars.h"
|
||||
#include "libguile/ports.h"
|
||||
|
@ -64,6 +70,79 @@ scm_i_string_hash (SCM str)
|
|||
return h;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
scm_i_locale_string_hash (const char *str, size_t len)
|
||||
{
|
||||
#ifdef HAVE_WCHAR_H
|
||||
mbstate_t state;
|
||||
wchar_t c;
|
||||
size_t byte_idx = 0, nbytes;
|
||||
unsigned long h = 0;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
|
||||
while ((nbytes = mbrtowc (&c, str + byte_idx, len - byte_idx, &state)) > 0)
|
||||
{
|
||||
if (nbytes >= (size_t) -2)
|
||||
/* Invalid input string; punt. */
|
||||
return scm_i_string_hash (scm_from_locale_stringn (str, len));
|
||||
|
||||
h = (unsigned long) c + h * 37;
|
||||
byte_idx += nbytes;
|
||||
}
|
||||
|
||||
return h;
|
||||
#else
|
||||
return scm_i_string_hash (scm_from_locale_stringn (str, len));
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned long
|
||||
scm_i_latin1_string_hash (const char *str, size_t len)
|
||||
{
|
||||
const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
|
||||
size_t i = 0;
|
||||
unsigned long h = 0;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
|
||||
for (; i < len; i++)
|
||||
h = (unsigned long) ustr[i] + h * 37;
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
scm_i_utf8_string_hash (const char *str, size_t len)
|
||||
{
|
||||
const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
|
||||
size_t byte_idx = 0;
|
||||
unsigned long h = 0;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
|
||||
while (byte_idx < len)
|
||||
{
|
||||
ucs4_t c;
|
||||
int nbytes;
|
||||
|
||||
nbytes = u8_mbtouc (&c, ustr + byte_idx, len - byte_idx);
|
||||
if (nbytes == 0)
|
||||
break;
|
||||
else if (nbytes < 0)
|
||||
/* Bad UTF-8; punt. */
|
||||
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
|
||||
|
||||
h = (unsigned long) c + h * 37;
|
||||
byte_idx += nbytes;
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
|
||||
/* Dirk:FIXME:: scm_hasher could be made static. */
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#ifndef SCM_HASH_H
|
||||
#define SCM_HASH_H
|
||||
|
||||
/* Copyright (C) 1995,1996,2000, 2006, 2008 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1995,1996,2000, 2006, 2008, 2011 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -28,6 +28,13 @@
|
|||
|
||||
|
||||
SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
|
||||
SCM_INTERNAL unsigned long scm_i_locale_string_hash (const char *str,
|
||||
size_t len);
|
||||
SCM_INTERNAL unsigned long scm_i_latin1_string_hash (const char *str,
|
||||
size_t len);
|
||||
SCM_INTERNAL unsigned long scm_i_utf8_string_hash (const char *str,
|
||||
size_t len);
|
||||
|
||||
SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
|
||||
SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
|
||||
SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue