1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

optimize utf8 symbol lookup

* libguile/symbols.c (utf8_string_equals_wide_string)
  (utf8_lookup_predicate_fn, lookup_interned_utf8_symbol): Optimize
  utf8 symbol lookup.
This commit is contained in:
Andy Wingo 2011-10-25 17:32:50 +02:00
parent 8b33752be7
commit f80d15c59e

View file

@ -23,6 +23,8 @@
# include <config.h>
#endif
#include <unistr.h>
#include "libguile/_scm.h"
#include "libguile/chars.h"
#include "libguile/eval.h"
@ -144,6 +146,73 @@ lookup_interned_latin1_symbol (const char *str, size_t len,
&data, SCM_BOOL_F);
}
struct utf8_lookup_data
{
const char *str;
size_t len;
unsigned long string_hash;
};
static int
utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
const scm_t_wchar *wide, size_t wlen)
{
size_t byte_idx = 0, char_idx = 0;
while (byte_idx < nlen && char_idx < wlen)
{
ucs4_t c;
int nbytes;
nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
if (nbytes == 0)
break;
else if (nbytes < 0)
/* Bad UTF-8. */
return 0;
else if (c != wide[char_idx])
return 0;
byte_idx += nbytes;
char_idx++;
}
return byte_idx == nlen && char_idx == wlen;
}
static int
utf8_lookup_predicate_fn (SCM sym, void *closure)
{
struct utf8_lookup_data *data = closure;
if (scm_i_symbol_hash (sym) != data->string_hash)
return 0;
if (scm_i_is_narrow_symbol (sym))
return (scm_i_symbol_length (sym) == data->len
&& strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
else
return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
data->len,
scm_i_symbol_wide_chars (sym),
scm_i_symbol_length (sym));
}
static SCM
lookup_interned_utf8_symbol (const char *str, size_t len,
unsigned long raw_hash)
{
struct utf8_lookup_data data;
data.str = str;
data.len = len;
data.string_hash = raw_hash;
return scm_c_weak_set_lookup (symbols, raw_hash,
utf8_lookup_predicate_fn,
&data, SCM_BOOL_F);
}
static int
symbol_lookup_predicate_fn (SCM sym, void *closure)
{
@ -459,8 +528,21 @@ scm_from_utf8_symbol (const char *sym)
SCM
scm_from_utf8_symboln (const char *sym, size_t len)
{
SCM str = scm_from_utf8_stringn (sym, len);
return scm_i_str2symbol (str);
unsigned long hash;
SCM ret;
if (len == (size_t) -1)
len = strlen (sym);
hash = scm_i_utf8_string_hash (sym, len);
ret = lookup_interned_utf8_symbol (sym, len, hash);
if (scm_is_false (ret))
{
SCM str = scm_from_utf8_stringn (sym, len);
ret = scm_i_str2symbol (str);
}
return ret;
}
void