mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-03 18:50:19 +02:00
The symbol table is an ephemeron table
* libguile/symbols.c: Rework the symbol table to be an ephemeron table instead of a weak set. It is no longer resizeable; getting that to work will involve some GC cooperation.
This commit is contained in:
parent
dbc384a6ba
commit
4138d3c646
1 changed files with 120 additions and 136 deletions
|
@ -1,4 +1,4 @@
|
|||
/* Copyright 1995-1998,2000-2001,2003-2004,2006,2009,2011,2013,2015,2018,2022,2023
|
||||
/* Copyright 1995-1998,2000-2001,2003-2004,2006,2009,2011,2013,2015,2018,2022,2023,2025
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is part of Guile.
|
||||
|
@ -29,6 +29,7 @@
|
|||
#include "alist.h"
|
||||
#include "boolean.h"
|
||||
#include "chars.h"
|
||||
#include "ephemerons.h"
|
||||
#include "eval.h"
|
||||
#include "fluids.h"
|
||||
#include "gsubr.h"
|
||||
|
@ -46,14 +47,13 @@
|
|||
#include "threads.h"
|
||||
#include "variable.h"
|
||||
#include "vectors.h"
|
||||
#include "weak-set.h"
|
||||
|
||||
#include "symbols.h"
|
||||
|
||||
|
||||
|
||||
|
||||
static SCM symbols;
|
||||
static struct scm_ephemeron_table *symbols;
|
||||
|
||||
#ifdef GUILE_DEBUG
|
||||
SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
|
||||
|
@ -61,7 +61,7 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
|
|||
"Return the system symbol obarray.")
|
||||
#define FUNC_NAME s_scm_sys_symbols
|
||||
{
|
||||
return symbols;
|
||||
return scm_from_ephemeron_table (symbols);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
#endif
|
||||
|
@ -77,187 +77,171 @@ scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
|
|||
return scm_i_symbol_hash (obj) % n;
|
||||
}
|
||||
|
||||
struct string_lookup_data
|
||||
{
|
||||
SCM string;
|
||||
unsigned long string_hash;
|
||||
};
|
||||
|
||||
static int
|
||||
string_lookup_predicate_fn (SCM sym, void *closure)
|
||||
symbol_equals_string (SCM sym, SCM str, size_t len, unsigned long hash)
|
||||
{
|
||||
struct string_lookup_data *data = closure;
|
||||
|
||||
if (scm_i_symbol_hash (sym) == data->string_hash
|
||||
&& scm_i_symbol_length (sym) == scm_i_string_length (data->string))
|
||||
{
|
||||
size_t n = scm_i_symbol_length (sym);
|
||||
while (n--)
|
||||
if (scm_i_symbol_ref (sym, n) != scm_i_string_ref (data->string, n))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
if (scm_i_symbol_hash (sym) != hash)
|
||||
return 0;
|
||||
if (scm_i_symbol_length (sym) != len)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static SCM
|
||||
lookup_interned_symbol (SCM name, unsigned long raw_hash)
|
||||
{
|
||||
struct string_lookup_data data;
|
||||
|
||||
data.string = name;
|
||||
data.string_hash = raw_hash;
|
||||
|
||||
return scm_c_weak_set_lookup (symbols, raw_hash,
|
||||
string_lookup_predicate_fn,
|
||||
&data, SCM_BOOL_F);
|
||||
}
|
||||
for (size_t i = 0; i < len; i++)
|
||||
if (scm_i_symbol_ref (sym, i) != scm_i_string_ref (str, i))
|
||||
return 0;
|
||||
|
||||
struct latin1_lookup_data
|
||||
{
|
||||
const char *str;
|
||||
size_t len;
|
||||
unsigned long string_hash;
|
||||
};
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
latin1_lookup_predicate_fn (SCM sym, void *closure)
|
||||
symbol_equals_latin1_string (SCM sym, const char *str, size_t len,
|
||||
unsigned long hash)
|
||||
{
|
||||
struct latin1_lookup_data *data = closure;
|
||||
|
||||
return scm_i_symbol_hash (sym) == data->string_hash
|
||||
&& scm_i_is_narrow_symbol (sym)
|
||||
&& scm_i_symbol_length (sym) == data->len
|
||||
&& strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0;
|
||||
if (scm_i_symbol_hash (sym) != hash)
|
||||
return 0;
|
||||
if (scm_i_symbol_length (sym) != len)
|
||||
return 0;
|
||||
if (!scm_i_is_narrow_symbol (sym))
|
||||
return 0;
|
||||
|
||||
return strncmp (scm_i_symbol_chars (sym), str, len) == 0;
|
||||
}
|
||||
|
||||
static SCM
|
||||
lookup_interned_latin1_symbol (const char *str, size_t len,
|
||||
unsigned long raw_hash)
|
||||
{
|
||||
struct latin1_lookup_data data;
|
||||
|
||||
data.str = str;
|
||||
data.len = len;
|
||||
data.string_hash = raw_hash;
|
||||
|
||||
return scm_c_weak_set_lookup (symbols, raw_hash,
|
||||
latin1_lookup_predicate_fn,
|
||||
&data, SCM_BOOL_F);
|
||||
size_t bucket = raw_hash % scm_c_ephemeron_table_length (symbols);
|
||||
for (struct gc_ephemeron *e = scm_c_ephemeron_table_ref (symbols, bucket);
|
||||
e;
|
||||
e = scm_c_ephemeron_next (e))
|
||||
{
|
||||
SCM sym = scm_c_ephemeron_key (e);
|
||||
if (scm_is_true (sym)
|
||||
&& symbol_equals_latin1_string (sym, str, len, raw_hash))
|
||||
return sym;
|
||||
}
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
|
||||
struct utf8_lookup_data
|
||||
{
|
||||
const char *str;
|
||||
size_t len;
|
||||
unsigned long string_hash;
|
||||
};
|
||||
|
||||
static int
|
||||
utf8_string_equals_wide_string (const uint8_t *narrow, size_t nlen,
|
||||
const scm_t_wchar *wide, size_t wlen)
|
||||
utf8_string_equals_narrow_string (const uint8_t *utf8, size_t ulen,
|
||||
const char *narrow)
|
||||
{
|
||||
size_t byte_idx = 0, char_idx = 0;
|
||||
/* Precondition: utf8,ulen is valid UTF-8. */
|
||||
size_t byte_idx = 0;
|
||||
|
||||
while (byte_idx < nlen && char_idx < wlen)
|
||||
while (byte_idx < ulen)
|
||||
{
|
||||
ucs4_t c;
|
||||
int nbytes;
|
||||
|
||||
nbytes = u8_mbtoucr (&c, narrow + byte_idx, nlen - byte_idx);
|
||||
if (nbytes == 0)
|
||||
break;
|
||||
else if (nbytes < 0)
|
||||
/* Bad UTF-8. */
|
||||
ucs4_t c = -1;
|
||||
byte_idx += u8_mbtoucr (&c, utf8 + byte_idx, ulen - byte_idx);
|
||||
if (c != *narrow)
|
||||
return 0;
|
||||
else if (c != wide[char_idx])
|
||||
return 0;
|
||||
|
||||
byte_idx += nbytes;
|
||||
char_idx++;
|
||||
narrow++;
|
||||
}
|
||||
|
||||
return byte_idx == nlen && char_idx == wlen;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_lookup_predicate_fn (SCM sym, void *closure)
|
||||
utf8_string_equals_wide_string (const uint8_t *utf8, size_t ulen,
|
||||
const scm_t_wchar *wide)
|
||||
{
|
||||
struct utf8_lookup_data *data = closure;
|
||||
/* Precondition: utf8,ulen is valid UTF-8. */
|
||||
size_t byte_idx = 0;
|
||||
|
||||
while (byte_idx < ulen)
|
||||
{
|
||||
ucs4_t c = -1;
|
||||
byte_idx += u8_mbtoucr (&c, utf8 + byte_idx, ulen - byte_idx);
|
||||
if (c != *wide)
|
||||
return 0;
|
||||
wide++;
|
||||
}
|
||||
|
||||
if (scm_i_symbol_hash (sym) != data->string_hash)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
symbol_equals_utf8_string (SCM sym, const uint8_t *str, size_t len,
|
||||
unsigned long hash, int codepoint_count)
|
||||
{
|
||||
if (scm_i_symbol_hash (sym) != hash)
|
||||
return 0;
|
||||
if (scm_i_symbol_length (sym) != codepoint_count)
|
||||
return 0;
|
||||
|
||||
if (scm_i_is_narrow_symbol (sym))
|
||||
return (scm_i_symbol_length (sym) == data->len
|
||||
&& strncmp (scm_i_symbol_chars (sym), data->str, data->len) == 0);
|
||||
return utf8_string_equals_narrow_string (str, len,
|
||||
scm_i_symbol_chars (sym));
|
||||
else
|
||||
return utf8_string_equals_wide_string ((const uint8_t *) data->str,
|
||||
data->len,
|
||||
scm_i_symbol_wide_chars (sym),
|
||||
scm_i_symbol_length (sym));
|
||||
return utf8_string_equals_wide_string (str, len,
|
||||
scm_i_symbol_wide_chars (sym));
|
||||
}
|
||||
|
||||
static SCM
|
||||
lookup_interned_utf8_symbol (const char *str, size_t len,
|
||||
lookup_interned_utf8_symbol (const uint8_t *str, size_t len,
|
||||
unsigned long raw_hash)
|
||||
{
|
||||
struct utf8_lookup_data data;
|
||||
int codepoint_count = u8_mbsnlen (str, len);
|
||||
if (codepoint_count == -1)
|
||||
/* Bad UTF-8. */
|
||||
return SCM_BOOL_F;
|
||||
|
||||
data.str = str;
|
||||
data.len = len;
|
||||
data.string_hash = raw_hash;
|
||||
|
||||
return scm_c_weak_set_lookup (symbols, raw_hash,
|
||||
utf8_lookup_predicate_fn,
|
||||
&data, SCM_BOOL_F);
|
||||
}
|
||||
if (codepoint_count == len)
|
||||
return lookup_interned_latin1_symbol ((const char *) str, len, raw_hash);
|
||||
|
||||
static int
|
||||
symbol_lookup_predicate_fn (SCM sym, void *closure)
|
||||
{
|
||||
SCM other = SCM_PACK_POINTER (closure);
|
||||
|
||||
if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (other)
|
||||
&& scm_i_symbol_length (sym) == scm_i_symbol_length (other))
|
||||
size_t bucket = raw_hash % scm_c_ephemeron_table_length (symbols);
|
||||
for (struct gc_ephemeron *e = scm_c_ephemeron_table_ref (symbols, bucket);
|
||||
e;
|
||||
e = scm_c_ephemeron_next (e))
|
||||
{
|
||||
if (scm_i_is_narrow_symbol (sym))
|
||||
return scm_i_is_narrow_symbol (other)
|
||||
&& (strncmp (scm_i_symbol_chars (sym),
|
||||
scm_i_symbol_chars (other),
|
||||
scm_i_symbol_length (other)) == 0);
|
||||
else
|
||||
return scm_is_true
|
||||
(scm_string_equal_p (scm_symbol_to_string (sym),
|
||||
scm_symbol_to_string (other)));
|
||||
SCM sym = scm_c_ephemeron_key (e);
|
||||
if (scm_is_true (sym)
|
||||
&& symbol_equals_utf8_string (sym, str, len, raw_hash,
|
||||
codepoint_count))
|
||||
return sym;
|
||||
}
|
||||
return 0;
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
|
||||
|
||||
static SCM
|
||||
scm_i_str2symbol (SCM str)
|
||||
{
|
||||
SCM symbol;
|
||||
unsigned long raw_hash = scm_i_string_hash (str);
|
||||
size_t bucket = raw_hash % scm_c_ephemeron_table_length (symbols);
|
||||
size_t len = scm_i_string_length (str);
|
||||
|
||||
symbol = lookup_interned_symbol (str, raw_hash);
|
||||
if (scm_is_true (symbol))
|
||||
return symbol;
|
||||
else
|
||||
struct gc_ephemeron *chain = scm_c_ephemeron_table_ref (symbols, bucket);
|
||||
/* First see if a symbol with this name is already interned. */
|
||||
for (struct gc_ephemeron *e = chain; e; e = scm_c_ephemeron_next (e))
|
||||
{
|
||||
/* The symbol was not found, create it. */
|
||||
symbol = scm_i_make_symbol (str, 0, raw_hash);
|
||||
|
||||
/* Might return a different symbol, if another one was interned at
|
||||
the same time. */
|
||||
return scm_c_weak_set_add_x (symbols, raw_hash,
|
||||
symbol_lookup_predicate_fn,
|
||||
SCM_UNPACK_POINTER (symbol), symbol);
|
||||
SCM sym = scm_c_ephemeron_key (e);
|
||||
if (scm_is_true (sym) && symbol_equals_string (sym, str, len, raw_hash))
|
||||
return sym;
|
||||
}
|
||||
|
||||
/* The symbol was not found, create it. */
|
||||
SCM sym = scm_i_make_symbol (str, 0, raw_hash);
|
||||
struct gc_ephemeron *link = scm_c_make_ephemeron (sym, SCM_BOOL_T);
|
||||
while (1)
|
||||
{
|
||||
struct gc_ephemeron *prev =
|
||||
scm_c_ephemeron_table_try_push_x (symbols, bucket, link, chain);
|
||||
if (prev == chain)
|
||||
return sym;
|
||||
/* Lost a race, someone else added a symbol in this bucket. Check
|
||||
the chain and try again. */
|
||||
chain = prev;
|
||||
for (struct gc_ephemeron *e = chain; e; e = scm_c_ephemeron_next (e))
|
||||
{
|
||||
SCM sym = scm_c_ephemeron_key (e);
|
||||
if (scm_is_true (sym)
|
||||
&& symbol_equals_string (sym, str, len, raw_hash))
|
||||
return sym;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static SCM
|
||||
scm_i_str2uninterned_symbol (SCM str)
|
||||
{
|
||||
|
@ -449,7 +433,7 @@ scm_from_utf8_symboln (const char *sym, size_t len)
|
|||
len = strlen (sym);
|
||||
hash = scm_i_utf8_string_hash (sym, len);
|
||||
|
||||
ret = lookup_interned_utf8_symbol (sym, len, hash);
|
||||
ret = lookup_interned_utf8_symbol ((const uint8_t *)sym, len, hash);
|
||||
if (scm_is_false (ret))
|
||||
{
|
||||
SCM str = scm_from_utf8_stringn (sym, len);
|
||||
|
@ -462,7 +446,7 @@ scm_from_utf8_symboln (const char *sym, size_t len)
|
|||
void
|
||||
scm_symbols_prehistory ()
|
||||
{
|
||||
symbols = scm_c_make_weak_set (5000);
|
||||
symbols = scm_c_make_ephemeron_table (5000);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue