/* Copyright 1995-1998,2000-2001,2003-2004,2006,2009,2011,2013,2015,2018,2022,2023,2025 Free Software Foundation, Inc. This file is part of Guile. Guile is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Guile is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Guile. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include "alist.h" #include "boolean.h" #include "chars.h" #include "ephemerons.h" #include "eval.h" #include "fluids.h" #include "gsubr.h" #include "hash.h" #include "list.h" #include "modules.h" #include "numbers.h" #include "pairs.h" #include "private-options.h" #include "read.h" #include "smob.h" #include "srfi-13.h" #include "strings.h" #include "strorder.h" #include "threads.h" #include "variable.h" #include "vectors.h" #include "symbols.h" static struct scm_ephemeron_table *symbols; #ifdef GUILE_DEBUG SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0, (), "Return the system symbol obarray.") #define FUNC_NAME s_scm_sys_symbols { return scm_from_ephemeron_table (symbols); } #undef FUNC_NAME #endif /* {Symbols} */ unsigned long scm_i_hash_symbol (SCM obj, unsigned long n, void *closure) { return scm_i_symbol_hash (obj) % n; } static int symbol_equals_string (SCM sym, SCM str, size_t len, unsigned long hash) { if (scm_i_symbol_hash (sym) != hash) return 0; if (scm_i_symbol_length (sym) != len) return 0; for (size_t i = 0; i < len; i++) if (scm_i_symbol_ref (sym, i) != scm_i_string_ref (str, i)) return 0; return 1; } static int symbol_equals_latin1_string (SCM sym, const char *str, size_t len, unsigned long hash) { if (scm_i_symbol_hash (sym) != hash) return 0; if (scm_i_symbol_length (sym) != len) return 0; if (!scm_i_is_narrow_symbol (sym)) return 0; return strncmp (scm_i_symbol_chars (sym), str, len) == 0; } static SCM lookup_interned_latin1_symbol (const char *str, size_t len, unsigned long raw_hash) { size_t bucket = raw_hash % scm_c_ephemeron_table_length (symbols); for (struct gc_ephemeron *e = scm_c_ephemeron_table_ref (symbols, bucket); e; e = scm_c_ephemeron_next (e)) { SCM sym = scm_c_ephemeron_key (e); if (scm_is_true (sym) && symbol_equals_latin1_string (sym, str, len, raw_hash)) return sym; } return SCM_BOOL_F; } static int utf8_string_equals_narrow_string (const uint8_t *utf8, size_t ulen, const char *narrow) { /* Precondition: utf8,ulen is valid UTF-8. */ size_t byte_idx = 0; while (byte_idx < ulen) { ucs4_t c = -1; byte_idx += u8_mbtoucr (&c, utf8 + byte_idx, ulen - byte_idx); if (c != *narrow) return 0; narrow++; } return 1; } static int utf8_string_equals_wide_string (const uint8_t *utf8, size_t ulen, const scm_t_wchar *wide) { /* Precondition: utf8,ulen is valid UTF-8. */ size_t byte_idx = 0; while (byte_idx < ulen) { ucs4_t c = -1; byte_idx += u8_mbtoucr (&c, utf8 + byte_idx, ulen - byte_idx); if (c != *wide) return 0; wide++; } return 1; } static int symbol_equals_utf8_string (SCM sym, const uint8_t *str, size_t len, unsigned long hash, int codepoint_count) { if (scm_i_symbol_hash (sym) != hash) return 0; if (scm_i_symbol_length (sym) != codepoint_count) return 0; if (scm_i_is_narrow_symbol (sym)) return utf8_string_equals_narrow_string (str, len, scm_i_symbol_chars (sym)); else return utf8_string_equals_wide_string (str, len, scm_i_symbol_wide_chars (sym)); } static SCM lookup_interned_utf8_symbol (const uint8_t *str, size_t len, unsigned long raw_hash) { int codepoint_count = u8_mbsnlen (str, len); if (codepoint_count == -1) /* Bad UTF-8. */ return SCM_BOOL_F; if (codepoint_count == len) return lookup_interned_latin1_symbol ((const char *) str, len, raw_hash); size_t bucket = raw_hash % scm_c_ephemeron_table_length (symbols); for (struct gc_ephemeron *e = scm_c_ephemeron_table_ref (symbols, bucket); e; e = scm_c_ephemeron_next (e)) { SCM sym = scm_c_ephemeron_key (e); if (scm_is_true (sym) && symbol_equals_utf8_string (sym, str, len, raw_hash, codepoint_count)) return sym; } return SCM_BOOL_F; } static SCM scm_i_str2symbol (SCM str) { unsigned long raw_hash = scm_i_string_hash (str); size_t bucket = raw_hash % scm_c_ephemeron_table_length (symbols); size_t len = scm_i_string_length (str); struct gc_ephemeron *chain = scm_c_ephemeron_table_ref (symbols, bucket); /* First see if a symbol with this name is already interned. */ for (struct gc_ephemeron *e = chain; e; e = scm_c_ephemeron_next (e)) { SCM sym = scm_c_ephemeron_key (e); if (scm_is_true (sym) && symbol_equals_string (sym, str, len, raw_hash)) return sym; } /* The symbol was not found, create it. */ SCM sym = scm_i_make_symbol (str, 0, raw_hash); struct gc_ephemeron *link = scm_c_make_ephemeron (sym, SCM_BOOL_T); while (1) { struct gc_ephemeron *prev = scm_c_ephemeron_table_try_push_x (symbols, bucket, link, chain); if (prev == chain) return sym; /* Lost a race, someone else added a symbol in this bucket. Check the chain and try again. */ chain = prev; for (struct gc_ephemeron *e = chain; e; e = scm_c_ephemeron_next (e)) { SCM sym = scm_c_ephemeron_key (e); if (scm_is_true (sym) && symbol_equals_string (sym, str, len, raw_hash)) return sym; } } } static SCM scm_i_str2uninterned_symbol (SCM str) { unsigned long raw_hash = scm_i_string_hash (str); return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash); } SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0, (SCM obj), "Return @code{#t} if @var{obj} is a symbol, otherwise return\n" "@code{#f}.") #define FUNC_NAME s_scm_symbol_p { return scm_from_bool (scm_is_symbol (obj)); } #undef FUNC_NAME SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0, (SCM symbol), "Return @code{#t} if @var{symbol} is interned, otherwise return\n" "@code{#f}.") #define FUNC_NAME s_scm_symbol_interned_p { SCM_VALIDATE_SYMBOL (1, symbol); return scm_from_bool (scm_i_symbol_is_interned (symbol)); } #undef FUNC_NAME SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0, (SCM name), "Return a new uninterned symbol with the name @var{name}. " "The returned symbol is guaranteed to be unique and future " "calls to @code{string->symbol} will not return it.") #define FUNC_NAME s_scm_make_symbol { SCM_VALIDATE_STRING (1, name); return scm_i_str2uninterned_symbol (name); } #undef FUNC_NAME SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0, (SCM s), "Return the name of @var{symbol} as a string. The resulting\n" "string is immutable.") #define FUNC_NAME s_scm_symbol_to_string { SCM_VALIDATE_SYMBOL (1, s); return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s)); } #undef FUNC_NAME SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0, (SCM string), "Return the symbol whose name is @var{string}.") #define FUNC_NAME s_scm_string_to_symbol { SCM_VALIDATE_STRING (1, string); return scm_i_str2symbol (string); } #undef FUNC_NAME SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0, (SCM str), "Return the symbol whose name is @var{str}. @var{str} is\n" "converted to lowercase before the conversion is done, if Guile\n" "is currently reading symbols case-insensitively.") #define FUNC_NAME s_scm_string_ci_to_symbol { return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P ? scm_string_downcase(str) : str); } #undef FUNC_NAME /* The default prefix for `gensym'd symbols. */ static SCM default_gensym_prefix; #define MAX_PREFIX_LENGTH 30 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0, (SCM prefix), "Create a new symbol with a name constructed from a prefix and\n" "a counter value. The string @var{prefix} can be specified as\n" "an optional argument. Default prefix is @code{ g}. The counter\n" "is increased by 1 at each call. There is no provision for\n" "resetting the counter.") #define FUNC_NAME s_scm_gensym { static int gensym_counter = 0; SCM suffix, name; int n, n_digits; char buf[SCM_INTBUFLEN]; if (SCM_UNBNDP (prefix)) prefix = default_gensym_prefix; /* mutex in case another thread looks and incs at the exact same moment */ scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex); n = gensym_counter++; scm_i_pthread_mutex_unlock (&scm_i_misc_mutex); n_digits = scm_iint2str (n, 10, buf); suffix = scm_from_latin1_stringn (buf, n_digits); name = scm_string_append (scm_list_2 (prefix, suffix)); return scm_string_to_symbol (name); } #undef FUNC_NAME SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0, (SCM symbol), "Return a hash value for @var{symbol}.") #define FUNC_NAME s_scm_symbol_hash { SCM_VALIDATE_SYMBOL (1, symbol); return scm_from_ulong (scm_i_symbol_hash (symbol)); } #undef FUNC_NAME SCM scm_from_locale_symbol (const char *sym) { return scm_from_locale_symboln (sym, -1); } SCM scm_from_locale_symboln (const char *sym, size_t len) { SCM str = scm_from_locale_stringn (sym, len); return scm_i_str2symbol (str); } SCM scm_take_locale_symboln (char *sym, size_t len) { SCM str; str = scm_take_locale_stringn (sym, len); return scm_i_str2symbol (str); } SCM scm_take_locale_symbol (char *sym) { return scm_take_locale_symboln (sym, (size_t)-1); } SCM scm_from_latin1_symbol (const char *sym) { return scm_from_latin1_symboln (sym, -1); } SCM scm_from_latin1_symboln (const char *sym, size_t len) { unsigned long hash; SCM ret; if (len == (size_t) -1) len = strlen (sym); hash = scm_i_latin1_string_hash (sym, len); ret = lookup_interned_latin1_symbol (sym, len, hash); if (scm_is_false (ret)) { SCM str = scm_from_latin1_stringn (sym, len); ret = scm_i_str2symbol (str); } return ret; } SCM scm_from_utf8_symbol (const char *sym) { return scm_from_utf8_symboln (sym, -1); } SCM scm_from_utf8_symboln (const char *sym, size_t len) { unsigned long hash; SCM ret; if (len == (size_t) -1) len = strlen (sym); hash = scm_i_utf8_string_hash (sym, len); ret = lookup_interned_utf8_symbol ((const uint8_t *)sym, len, hash); if (scm_is_false (ret)) { SCM str = scm_from_utf8_stringn (sym, len); ret = scm_i_str2symbol (str); } return ret; } void scm_symbols_prehistory () { symbols = scm_c_make_ephemeron_table (5000); } void scm_init_symbols () { #include "symbols.x" default_gensym_prefix = scm_from_latin1_string (" g"); }