mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-12 06:41:13 +02:00
add hash functions for locale, latin1, and utf8 strings
* libguile/hash.c (scm_i_locale_string_hash) (scm_i_latin1_string_hash, scm_i_utf8_string_hash): New functions.
This commit is contained in:
parent
f756cd3076
commit
622415380c
2 changed files with 87 additions and 1 deletions
|
@ -22,6 +22,12 @@
|
||||||
# include <config.h>
|
# include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_WCHAR_H
|
||||||
|
#include <wchar.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <unistr.h>
|
||||||
|
|
||||||
#include "libguile/_scm.h"
|
#include "libguile/_scm.h"
|
||||||
#include "libguile/chars.h"
|
#include "libguile/chars.h"
|
||||||
#include "libguile/ports.h"
|
#include "libguile/ports.h"
|
||||||
|
@ -64,6 +70,79 @@ scm_i_string_hash (SCM str)
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long
|
||||||
|
scm_i_locale_string_hash (const char *str, size_t len)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_WCHAR_H
|
||||||
|
mbstate_t state;
|
||||||
|
wchar_t c;
|
||||||
|
size_t byte_idx = 0, nbytes;
|
||||||
|
unsigned long h = 0;
|
||||||
|
|
||||||
|
if (len == (size_t) -1)
|
||||||
|
len = strlen (str);
|
||||||
|
|
||||||
|
while ((nbytes = mbrtowc (&c, str + byte_idx, len - byte_idx, &state)) > 0)
|
||||||
|
{
|
||||||
|
if (nbytes >= (size_t) -2)
|
||||||
|
/* Invalid input string; punt. */
|
||||||
|
return scm_i_string_hash (scm_from_locale_stringn (str, len));
|
||||||
|
|
||||||
|
h = (unsigned long) c + h * 37;
|
||||||
|
byte_idx += nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
return h;
|
||||||
|
#else
|
||||||
|
return scm_i_string_hash (scm_from_locale_stringn (str, len));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long
|
||||||
|
scm_i_latin1_string_hash (const char *str, size_t len)
|
||||||
|
{
|
||||||
|
const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
|
||||||
|
size_t i = 0;
|
||||||
|
unsigned long h = 0;
|
||||||
|
|
||||||
|
if (len == (size_t) -1)
|
||||||
|
len = strlen (str);
|
||||||
|
|
||||||
|
for (; i < len; i++)
|
||||||
|
h = (unsigned long) ustr[i] + h * 37;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long
|
||||||
|
scm_i_utf8_string_hash (const char *str, size_t len)
|
||||||
|
{
|
||||||
|
const scm_t_uint8 *ustr = (const scm_t_uint8 *) str;
|
||||||
|
size_t byte_idx = 0;
|
||||||
|
unsigned long h = 0;
|
||||||
|
|
||||||
|
if (len == (size_t) -1)
|
||||||
|
len = strlen (str);
|
||||||
|
|
||||||
|
while (byte_idx < len)
|
||||||
|
{
|
||||||
|
ucs4_t c;
|
||||||
|
int nbytes;
|
||||||
|
|
||||||
|
nbytes = u8_mbtouc (&c, ustr + byte_idx, len - byte_idx);
|
||||||
|
if (nbytes == 0)
|
||||||
|
break;
|
||||||
|
else if (nbytes < 0)
|
||||||
|
/* Bad UTF-8; punt. */
|
||||||
|
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
|
||||||
|
|
||||||
|
h = (unsigned long) c + h * 37;
|
||||||
|
byte_idx += nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
|
/* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
|
||||||
/* Dirk:FIXME:: scm_hasher could be made static. */
|
/* Dirk:FIXME:: scm_hasher could be made static. */
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#ifndef SCM_HASH_H
|
#ifndef SCM_HASH_H
|
||||||
#define SCM_HASH_H
|
#define SCM_HASH_H
|
||||||
|
|
||||||
/* Copyright (C) 1995,1996,2000, 2006, 2008 Free Software Foundation, Inc.
|
/* Copyright (C) 1995,1996,2000, 2006, 2008, 2011 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public License
|
* modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -28,6 +28,13 @@
|
||||||
|
|
||||||
|
|
||||||
SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
|
SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
|
||||||
|
SCM_INTERNAL unsigned long scm_i_locale_string_hash (const char *str,
|
||||||
|
size_t len);
|
||||||
|
SCM_INTERNAL unsigned long scm_i_latin1_string_hash (const char *str,
|
||||||
|
size_t len);
|
||||||
|
SCM_INTERNAL unsigned long scm_i_utf8_string_hash (const char *str,
|
||||||
|
size_t len);
|
||||||
|
|
||||||
SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
|
SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
|
||||||
SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
|
SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
|
||||||
SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);
|
SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue