mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 12:20:26 +02:00
optimize utf8 symbol lookup
* libguile/symbols.c (utf8_string_equals_wide_string) (utf8_lookup_predicate_fn, lookup_interned_utf8_symbol): Optimize utf8 symbol lookup.
This commit is contained in:
parent
8b33752be7
commit
f80d15c59e
1 changed files with 84 additions and 2 deletions
|
@ -23,6 +23,8 @@
|
||||||
# include <config.h>
|
# include <config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <unistr.h>
|
||||||
|
|
||||||
#include "libguile/_scm.h"
|
#include "libguile/_scm.h"
|
||||||
#include "libguile/chars.h"
|
#include "libguile/chars.h"
|
||||||
#include "libguile/eval.h"
|
#include "libguile/eval.h"
|
||||||
|
@ -144,6 +146,73 @@ lookup_interned_latin1_symbol (const char *str, size_t len,
|
||||||
&data, SCM_BOOL_F);
|
&data, SCM_BOOL_F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct utf8_lookup_data
|
||||||
|
{
|
||||||
|
const char *str;
|
||||||
|
size_t len;
|
||||||
|
unsigned long string_hash;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int
|
||||||
|
utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
|
||||||
|
const scm_t_wchar *wide, size_t wlen)
|
||||||
|
{
|
||||||
|
size_t byte_idx = 0, char_idx = 0;
|
||||||
|
|
||||||
|
while (byte_idx < nlen && char_idx < wlen)
|
||||||
|
{
|
||||||
|
ucs4_t c;
|
||||||
|
int nbytes;
|
||||||
|
|
||||||
|
nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
|
||||||
|
if (nbytes == 0)
|
||||||
|
break;
|
||||||
|
else if (nbytes < 0)
|
||||||
|
/* Bad UTF-8. */
|
||||||
|
return 0;
|
||||||
|
else if (c != wide[char_idx])
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
byte_idx += nbytes;
|
||||||
|
char_idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return byte_idx == nlen && char_idx == wlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
utf8_lookup_predicate_fn (SCM sym, void *closure)
|
||||||
|
{
|
||||||
|
struct utf8_lookup_data *data = closure;
|
||||||
|
|
||||||
|
if (scm_i_symbol_hash (sym) != data->string_hash)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (scm_i_is_narrow_symbol (sym))
|
||||||
|
return (scm_i_symbol_length (sym) == data->len
|
||||||
|
&& strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
|
||||||
|
else
|
||||||
|
return utf8_string_equals_wide_string ((const scm_t_uint8 *) data->str,
|
||||||
|
data->len,
|
||||||
|
scm_i_symbol_wide_chars (sym),
|
||||||
|
scm_i_symbol_length (sym));
|
||||||
|
}
|
||||||
|
|
||||||
|
static SCM
|
||||||
|
lookup_interned_utf8_symbol (const char *str, size_t len,
|
||||||
|
unsigned long raw_hash)
|
||||||
|
{
|
||||||
|
struct utf8_lookup_data data;
|
||||||
|
|
||||||
|
data.str = str;
|
||||||
|
data.len = len;
|
||||||
|
data.string_hash = raw_hash;
|
||||||
|
|
||||||
|
return scm_c_weak_set_lookup (symbols, raw_hash,
|
||||||
|
utf8_lookup_predicate_fn,
|
||||||
|
&data, SCM_BOOL_F);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
symbol_lookup_predicate_fn (SCM sym, void *closure)
|
symbol_lookup_predicate_fn (SCM sym, void *closure)
|
||||||
{
|
{
|
||||||
|
@ -459,8 +528,21 @@ scm_from_utf8_symbol (const char *sym)
|
||||||
SCM
|
SCM
|
||||||
scm_from_utf8_symboln (const char *sym, size_t len)
|
scm_from_utf8_symboln (const char *sym, size_t len)
|
||||||
{
|
{
|
||||||
SCM str = scm_from_utf8_stringn (sym, len);
|
unsigned long hash;
|
||||||
return scm_i_str2symbol (str);
|
SCM ret;
|
||||||
|
|
||||||
|
if (len == (size_t) -1)
|
||||||
|
len = strlen (sym);
|
||||||
|
hash = scm_i_utf8_string_hash (sym, len);
|
||||||
|
|
||||||
|
ret = lookup_interned_utf8_symbol (sym, len, hash);
|
||||||
|
if (scm_is_false (ret))
|
||||||
|
{
|
||||||
|
SCM str = scm_from_utf8_stringn (sym, len);
|
||||||
|
ret = scm_i_str2symbol (str);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue