From c35738c1ae80f3ed9ae7c54532f6477cf7cc8bde Mon Sep 17 00:00:00 2001 From: Mikael Djurfeldt Date: Wed, 19 Feb 2003 15:04:51 +0000 Subject: [PATCH] * hooks.c (scm_c_hook_add): Fixed bug in append mode. * environments.c (obarray_enter, obarray_retrieve, obarray_remove, leaf_environment_fold, obarray_remove_all): Use hashtable accessors. * gc.c (scm_init_storage): Moved hook initialization to scm_storage_prehistory. (scm_storage_prehistory): New function. (scm_igc): Added commentary about placement of scm_after_sweep_c_hook. * gc-mark.c (scm_mark_all): Use hashtable accessors. (scm_gc_mark_dependencies): Use SCM_WVECT_WEAK_KEY_P and SCM_WVECT_WEAK_VALUE_P. * hashtab.c, hashtab.h (scm_hash_for_each, scm_hash_map): New functions. (scm_vector_to_hash_table, scm_c_make_resizing_hash_table): Removed. (scm_make_weak_key_hash_table, scm_make_weak_value_hash_table, scm_make_doubly_weak_hash_table): Moved here from weaks.c. * init.c (scm_init_guile_1): Removed call to scm_init_weaks; Added calls to scm_storage_prehistory and scm_hashtab_prehistory. * modules.c (module-reverse-lookup): Use hashtable accessors. * symbols.c, symbols.h (scm_i_hash_symbol): New function. * weaks.c, weaks.h (scm_make_weak_key_alist_vector, scm_make_weak_value_alist_vector, scm_make_doubly_weak_alist_vector): New functions. * weaks.c (scm_init_weaks_builtins): New function. * weaks.h (SCM_WVECTF_WEAK_KEY, SCM_WVECTF_WEAK_VALUE, SCM_WVECTF_NOSCAN, SCM_WVECT_WEAK_KEY_P, SCM_WVECT_WEAK_VALUE_P, SCM_WVECT_NOSCAN_P): New macros. * weaks.c (scm_scan_weak_vectors): Use SCM_WVECT_WEAK_KEY_P and SCM_WVECT_WEAK_VALUE_P. * weaks.c, weaks.h (scm_i_allocate_weak_vector): Renamed from allocate_weak_vector and exported. * Makefile.am (ice9_sources): Added weak-vector.scm. * weak-vector.scm: New file. * boot-9.scm (module-clear!): Use hash-clear!. (module-for-each): Use hash-for-each. (module-map): Use hash-map. --- ice-9/ChangeLog | 10 + ice-9/Makefile.am | 3 +- ice-9/boot-9.scm | 29 +-- ice-9/weak-vector.scm | 56 +++++ libguile/ChangeLog | 56 +++++ libguile/environments.c | 48 ++-- libguile/gc-mark.c | 10 +- libguile/gc.c | 37 ++- libguile/gc.h | 3 +- libguile/hashtab.c | 531 ++++++++++++++++++++++++++++++---------- libguile/hashtab.h | 65 ++++- libguile/hooks.c | 4 +- libguile/init.c | 8 +- libguile/modules.c | 6 +- libguile/symbols.c | 36 ++- libguile/symbols.h | 3 +- libguile/weaks.c | 84 ++++--- libguile/weaks.h | 23 +- 18 files changed, 761 insertions(+), 251 deletions(-) create mode 100644 ice-9/weak-vector.scm diff --git a/ice-9/ChangeLog b/ice-9/ChangeLog index 4892a19a7..a71953333 100644 --- a/ice-9/ChangeLog +++ b/ice-9/ChangeLog @@ -1,3 +1,13 @@ +2003-02-19 Mikael Djurfeldt + + * Makefile.am (ice9_sources): Added weak-vector.scm. + + * weak-vector.scm: New file. + + * boot-9.scm (module-clear!): Use hash-clear!. + (module-for-each): Use hash-for-each. + (module-map): Use hash-map. + 2003-02-11 Mikael Djurfeldt * boot-9.scm (make-hash-table): Turned primitive. diff --git a/ice-9/Makefile.am b/ice-9/Makefile.am index e1b82c5ab..13adb5897 100644 --- a/ice-9/Makefile.am +++ b/ice-9/Makefile.am @@ -34,7 +34,8 @@ ice9_sources = \ safe-r5rs.scm safe.scm session.scm slib.scm stack-catch.scm \ streams.scm string-fun.scm syncase.scm threads.scm \ buffered-input.scm time.scm history.scm channel.scm \ - pretty-print.scm ftw.scm gap-buffer.scm occam-channel.scm + pretty-print.scm ftw.scm gap-buffer.scm occam-channel.scm \ + weak-vector.scm subpkgdatadir = $(pkgdatadir)/${GUILE_EFFECTIVE_VERSION}/ice-9 subpkgdata_DATA = $(ice9_sources) diff --git a/ice-9/boot-9.scm b/ice-9/boot-9.scm index 11f45d30b..246caf543 100644 --- a/ice-9/boot-9.scm +++ b/ice-9/boot-9.scm @@ -1003,7 +1003,7 @@ (error "Lazy-binder expected to be a procedure or #f." binder)) - (let ((module (module-constructor (make-vector size '()) + (let ((module (module-constructor (make-hash-table size) uses binder #f #f #f #f '() (make-weak-value-hash-table 31) @@ -1307,11 +1307,11 @@ ;; make sure that a symbol is undefined in the local namespace of M. ;; (define (module-remove! m v) - (module-obarray-remove! (module-obarray m) v) + (module-obarray-remove! (module-obarray m) v) (module-modified m)) (define (module-clear! m) - (vector-fill! (module-obarray m) '()) + (hash-clear! (module-obarray m)) (module-modified m)) ;; MODULE-FOR-EACH -- exported @@ -1319,30 +1319,11 @@ ;; Call PROC on each symbol in MODULE, with arguments of (SYMBOL VARIABLE). ;; (define (module-for-each proc module) - (let ((obarray (module-obarray module))) - (do ((index 0 (+ index 1)) - (end (vector-length obarray))) - ((= index end)) - (for-each - (lambda (bucket) - (proc (car bucket) (cdr bucket))) - (vector-ref obarray index))))) - + (hash-for-each proc (module-obarray module))) (define (module-map proc module) - (let* ((obarray (module-obarray module)) - (end (vector-length obarray))) + (hash-map proc (module-obarray module))) - (let loop ((i 0) - (answer '())) - (if (= i end) - answer - (loop (+ 1 i) - (append! - (map (lambda (bucket) - (proc (car bucket) (cdr bucket))) - (vector-ref obarray i)) - answer)))))) ;;; {Low Level Bootstrapping} diff --git a/ice-9/weak-vector.scm b/ice-9/weak-vector.scm new file mode 100644 index 000000000..fb841af74 --- /dev/null +++ b/ice-9/weak-vector.scm @@ -0,0 +1,56 @@ +;;; installed-scm-file + +;;;; Copyright (C) 2003 Free Software Foundation, Inc. +;;;; +;;;; This program is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 2, or (at your option) +;;;; any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this software; see the file COPYING. If not, write to +;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330, +;;;; Boston, MA 02111-1307 USA +;;;; +;;;; As a special exception, the Free Software Foundation gives permission +;;;; for additional uses of the text contained in its release of GUILE. +;;;; +;;;; The exception is that, if you link the GUILE library with other files +;;;; to produce an executable, this does not by itself cause the +;;;; resulting executable to be covered by the GNU General Public License. +;;;; Your use of that executable is in no way restricted on account of +;;;; linking the GUILE library code into it. +;;;; +;;;; This exception does not however invalidate any other reasons why +;;;; the executable file might be covered by the GNU General Public License. +;;;; +;;;; This exception applies only to the code released by the +;;;; Free Software Foundation under the name GUILE. If you copy +;;;; code from other Free Software Foundation releases into a copy of +;;;; GUILE, as the General Public License permits, the exception does +;;;; not apply to the code that you add in this way. To avoid misleading +;;;; anyone as to the status of such modified files, you must delete +;;;; this exception notice from them. +;;;; +;;;; If you write modifications of your own for GUILE, it is your choice +;;;; whether to permit this exception to apply to your modifications. +;;;; If you do not wish that, delete this exception notice. +;;;; + + +(define-module (ice-9 weak-vector) + :export (make-weak-vector list->weak-vector weak-vector? + make-weak-key-alist-vector + make-weak-value-alist-vector + make-doubly-weak-alist-vector + weak-key-alist-vector? + weak-value-alist-vector? + doubly-weak-alist-vector?) ; C + ) + +(%init-weaks-builtins) ; defined in libguile/weaks.c diff --git a/libguile/ChangeLog b/libguile/ChangeLog index 87247b265..df6d335e6 100644 --- a/libguile/ChangeLog +++ b/libguile/ChangeLog @@ -1,3 +1,59 @@ +2003-02-19 Mikael Djurfeldt + + * hooks.c (scm_c_hook_add): Fixed bug in append mode. + + The following changes introduce the use of resizable hash tables + throughout Guile. It also renames the old *-hash-table* functions + to *-alist-vector* and places them, together with the rest of the + weak vector support, in the module (ice-9 weak-vector). We should + probably introduce a new, better, API for weak references, for + example "weak pairs" a la MIT-Scheme. (In Chez scheme, they even + look like and are used like ordinary pairs.) + + * environments.c (obarray_enter, obarray_retrieve, obarray_remove, + leaf_environment_fold, obarray_remove_all): Use hashtable + accessors. + + * gc.c (scm_init_storage): Moved hook initialization to + scm_storage_prehistory. + (scm_storage_prehistory): New function. + (scm_igc): Added commentary about placement of + scm_after_sweep_c_hook. + + * gc-mark.c (scm_mark_all): Use hashtable accessors. + (scm_gc_mark_dependencies): Use SCM_WVECT_WEAK_KEY_P and + SCM_WVECT_WEAK_VALUE_P. + + * hashtab.c, hashtab.h (scm_hash_for_each, scm_hash_map): New + functions. + (scm_vector_to_hash_table, scm_c_make_resizing_hash_table): + Removed. + (scm_make_weak_key_hash_table, scm_make_weak_value_hash_table, + scm_make_doubly_weak_hash_table): Moved here from weaks.c. + + * init.c (scm_init_guile_1): Removed call to scm_init_weaks; Added + calls to scm_storage_prehistory and scm_hashtab_prehistory. + + * modules.c (module-reverse-lookup): Use hashtable accessors. + + * symbols.c, symbols.h (scm_i_hash_symbol): New function. + + * weaks.c, weaks.h (scm_make_weak_key_alist_vector, + scm_make_weak_value_alist_vector, + scm_make_doubly_weak_alist_vector): New functions. + + * weaks.c (scm_init_weaks_builtins): New function. + + * weaks.h (SCM_WVECTF_WEAK_KEY, SCM_WVECTF_WEAK_VALUE, + SCM_WVECTF_NOSCAN, SCM_WVECT_WEAK_KEY_P, SCM_WVECT_WEAK_VALUE_P, + SCM_WVECT_NOSCAN_P): New macros. + + * weaks.c (scm_scan_weak_vectors): Use SCM_WVECT_WEAK_KEY_P + and SCM_WVECT_WEAK_VALUE_P. + + * weaks.c, weaks.h (scm_i_allocate_weak_vector): Renamed from + allocate_weak_vector and exported. + 2003-02-13 Mikael Djurfeldt * hashtab.c: Undid thread safety. (We decided that it's better to diff --git a/libguile/environments.c b/libguile/environments.c index fadfbc6de..074c49cd2 100644 --- a/libguile/environments.c +++ b/libguile/environments.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1999,2000,2001, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -531,10 +531,12 @@ observer_print (SCM type, SCM port, scm_print_state *pstate SCM_UNUSED) static SCM obarray_enter (SCM obarray, SCM symbol, SCM data) { - size_t hash = SCM_SYMBOL_HASH (symbol) % SCM_VECTOR_LENGTH (obarray); + size_t hash = SCM_SYMBOL_HASH (symbol) % SCM_HASHTABLE_N_BUCKETS (obarray); SCM entry = scm_cons (symbol, data); - SCM slot = scm_cons (entry, SCM_VELTS (obarray)[hash]); - SCM_VECTOR_SET (obarray, hash, slot); + SCM slot = scm_cons (entry, SCM_HASHTABLE_BUCKETS (obarray)[hash]); + SCM_SET_HASHTABLE_BUCKET (obarray, hash, slot); + if (SCM_HASHTABLE_N_ITEMS (obarray) > SCM_HASHTABLE_UPPER (obarray)) + scm_i_rehash (obarray, scm_i_hash_symbol, 0, "obarray_enter"); return entry; } @@ -547,12 +549,14 @@ obarray_enter (SCM obarray, SCM symbol, SCM data) static SCM obarray_replace (SCM obarray, SCM symbol, SCM data) { - size_t hash = SCM_SYMBOL_HASH (symbol) % SCM_VECTOR_LENGTH (obarray); + size_t hash = SCM_SYMBOL_HASH (symbol) % SCM_HASHTABLE_N_BUCKETS (obarray); SCM new_entry = scm_cons (symbol, data); SCM lsym; SCM slot; - for (lsym = SCM_VELTS (obarray)[hash]; !SCM_NULLP (lsym); lsym = SCM_CDR (lsym)) + for (lsym = SCM_HASHTABLE_BUCKETS (obarray)[hash]; + !SCM_NULLP (lsym); + lsym = SCM_CDR (lsym)) { SCM old_entry = SCM_CAR (lsym); if (SCM_EQ_P (SCM_CAR (old_entry), symbol)) @@ -562,8 +566,10 @@ obarray_replace (SCM obarray, SCM symbol, SCM data) } } - slot = scm_cons (new_entry, SCM_VELTS (obarray)[hash]); - SCM_VECTOR_SET (obarray, hash, slot); + slot = scm_cons (new_entry, SCM_HASHTABLE_BUCKETS (obarray)[hash]); + SCM_SET_HASHTABLE_BUCKET (obarray, hash, slot); + if (SCM_HASHTABLE_N_ITEMS (obarray) > SCM_HASHTABLE_UPPER (obarray)) + scm_i_rehash (obarray, scm_i_hash_symbol, 0, "obarray_replace"); return SCM_BOOL_F; } @@ -575,10 +581,12 @@ obarray_replace (SCM obarray, SCM symbol, SCM data) static SCM obarray_retrieve (SCM obarray, SCM sym) { - size_t hash = SCM_SYMBOL_HASH (sym) % SCM_VECTOR_LENGTH (obarray); + size_t hash = SCM_SYMBOL_HASH (sym) % SCM_HASHTABLE_N_BUCKETS (obarray); SCM lsym; - for (lsym = SCM_VELTS (obarray)[hash]; !SCM_NULLP (lsym); lsym = SCM_CDR (lsym)) + for (lsym = SCM_HASHTABLE_BUCKETS (obarray)[hash]; + !SCM_NULLP (lsym); + lsym = SCM_CDR (lsym)) { SCM entry = SCM_CAR (lsym); if (SCM_EQ_P (SCM_CAR (entry), sym)) @@ -596,14 +604,15 @@ obarray_retrieve (SCM obarray, SCM sym) static SCM obarray_remove (SCM obarray, SCM sym) { - size_t hash = SCM_SYMBOL_HASH (sym) % SCM_VECTOR_LENGTH (obarray); - SCM table_entry = SCM_VELTS (obarray)[hash]; + size_t hash = SCM_SYMBOL_HASH (sym) % SCM_HASHTABLE_N_BUCKETS (obarray); + SCM table_entry = SCM_HASHTABLE_BUCKETS (obarray)[hash]; SCM handle = scm_sloppy_assq (sym, table_entry); if (SCM_CONSP (handle)) { SCM new_table_entry = scm_delq1_x (handle, table_entry); - SCM_VECTOR_SET (obarray, hash, new_table_entry); + SCM_SET_HASHTABLE_BUCKET (obarray, hash, new_table_entry); + SCM_HASHTABLE_DECREMENT (obarray); } return handle; @@ -613,13 +622,14 @@ obarray_remove (SCM obarray, SCM sym) static void obarray_remove_all (SCM obarray) { - size_t size = SCM_VECTOR_LENGTH (obarray); + size_t size = SCM_HASHTABLE_N_BUCKETS (obarray); size_t i; for (i = 0; i < size; i++) { - SCM_VECTOR_SET (obarray, i, SCM_EOL); + SCM_SET_HASHTABLE_BUCKET (obarray, i, SCM_EOL); } + SCM_SET_HASHTABLE_N_ITEMS (obarray, 0); } @@ -759,7 +769,7 @@ core_environments_init (struct core_environments_base *body, { body->funcs = funcs; body->observers = SCM_EOL; - body->weak_observers = scm_make_weak_value_hash_table (SCM_MAKINUM (1)); + body->weak_observers = scm_make_weak_value_alist_vector (SCM_MAKINUM (1)); } @@ -897,10 +907,12 @@ leaf_environment_fold (SCM env, scm_environment_folder proc, SCM data, SCM init) SCM result = init; SCM obarray = LEAF_ENVIRONMENT (env)->obarray; - for (i = 0; i < SCM_VECTOR_LENGTH (obarray); i++) + for (i = 0; i < SCM_HASHTABLE_N_BUCKETS (obarray); i++) { SCM l; - for (l = SCM_VELTS (obarray)[i]; !SCM_NULLP (l); l = SCM_CDR (l)) + for (l = SCM_HASHTABLE_BUCKETS (obarray)[i]; + !SCM_NULLP (l); + l = SCM_CDR (l)) { SCM binding = SCM_CAR (l); SCM symbol = SCM_CAR (binding); diff --git a/libguile/gc-mark.c b/libguile/gc-mark.c index b65ab7aa8..c29ec3613 100644 --- a/libguile/gc-mark.c +++ b/libguile/gc-mark.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -120,9 +120,9 @@ scm_mark_all (void) /* mark the registered roots */ { size_t i; - for (i = 0; i < SCM_VECTOR_LENGTH (scm_gc_registered_roots); ++i) + for (i = 0; i < SCM_HASHTABLE_N_BUCKETS (scm_gc_registered_roots); ++i) { - SCM l = SCM_VELTS (scm_gc_registered_roots)[i]; + SCM l = SCM_HASHTABLE_BUCKETS (scm_gc_registered_roots)[i]; for (; !SCM_NULLP (l); l = SCM_CDR (l)) { SCM *p = (SCM *) (scm_num2long (SCM_CAAR (l), 0, NULL)); @@ -313,8 +313,8 @@ scm_gc_mark_dependencies (SCM p) int weak_values; len = SCM_VECTOR_LENGTH (ptr); - weak_keys = SCM_IS_WHVEC (ptr) || SCM_IS_WHVEC_B (ptr); - weak_values = SCM_IS_WHVEC_V (ptr) || SCM_IS_WHVEC_B (ptr); + weak_keys = SCM_WVECT_WEAK_KEY_P (ptr); + weak_values = SCM_WVECT_WEAK_VALUE_P (ptr); for (x = 0; x < len; ++x) { diff --git a/libguile/gc.c b/libguile/gc.c index ba43df315..e912beaa6 100644 --- a/libguile/gc.c +++ b/libguile/gc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -608,6 +608,25 @@ scm_igc (const char *what) /* TODO: this hook should probably be moved to just before the mark, since that's where the sweep is finished in lazy sweeping. + + MDJ 030219 : No, probably not. The + original meaning implied at least two things: that it would be + called when + + 1. the freelist is re-initialized (no evaluation possible, though) + + and + + 2. the heap is "fresh" + (it is well-defined what data is used and what is not) + + Neither of these conditions would hold just before the mark phase. + + Of course, the lazy sweeping has muddled the distinction between + scm_before_sweep_c_hook and scm_after_sweep_c_hook, but even if + there were no difference, it would still be useful to have two + distinct classes of hook functions since this can prevent some + bad interference when several modules adds gc hooks. */ scm_c_hook_run (&scm_after_sweep_c_hook, 0); gc_end_stats (); @@ -873,6 +892,15 @@ scm_getenv_int (const char *var, int def) return res; } +void +scm_storage_prehistory () +{ + scm_c_hook_init (&scm_before_gc_c_hook, 0, SCM_C_HOOK_NORMAL); + scm_c_hook_init (&scm_before_mark_c_hook, 0, SCM_C_HOOK_NORMAL); + scm_c_hook_init (&scm_before_sweep_c_hook, 0, SCM_C_HOOK_NORMAL); + scm_c_hook_init (&scm_after_sweep_c_hook, 0, SCM_C_HOOK_NORMAL); + scm_c_hook_init (&scm_after_gc_c_hook, 0, SCM_C_HOOK_NORMAL); +} int scm_init_storage () @@ -893,13 +921,6 @@ scm_init_storage () j = SCM_HEAP_SEG_SIZE; - - scm_c_hook_init (&scm_before_gc_c_hook, 0, SCM_C_HOOK_NORMAL); - scm_c_hook_init (&scm_before_mark_c_hook, 0, SCM_C_HOOK_NORMAL); - scm_c_hook_init (&scm_before_sweep_c_hook, 0, SCM_C_HOOK_NORMAL); - scm_c_hook_init (&scm_after_sweep_c_hook, 0, SCM_C_HOOK_NORMAL); - scm_c_hook_init (&scm_after_gc_c_hook, 0, SCM_C_HOOK_NORMAL); - /* Initialise the list of ports. */ scm_i_port_table = (scm_t_port **) malloc (sizeof (scm_t_port *) * scm_i_port_table_room); diff --git a/libguile/gc.h b/libguile/gc.h index ca102af11..13f1af854 100644 --- a/libguile/gc.h +++ b/libguile/gc.h @@ -3,7 +3,7 @@ #ifndef SCM_GC_H #define SCM_GC_H -/* Copyright (C) 1995,1996,1998,1999,2000,2001, 2002 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998,1999,2000,2001, 2002, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -373,6 +373,7 @@ SCM_API void scm_gc_register_root (SCM *p); SCM_API void scm_gc_unregister_root (SCM *p); SCM_API void scm_gc_register_roots (SCM *b, unsigned long n); SCM_API void scm_gc_unregister_roots (SCM *b, unsigned long n); +SCM_API void scm_storage_prehistory (void); SCM_API int scm_init_storage (void); SCM_API void *scm_get_stack_base (void); SCM_API void scm_init_gc (void); diff --git a/libguile/hashtab.c b/libguile/hashtab.c index e35cec0ad..b6a7fbc93 100644 --- a/libguile/hashtab.c +++ b/libguile/hashtab.c @@ -49,55 +49,43 @@ #include "libguile/root.h" #include "libguile/vectors.h" #include "libguile/ports.h" -#include "libguile/weaks.h" #include "libguile/validate.h" #include "libguile/hashtab.h" +/* NOTES + * + * 1. The current hash table implementation uses weak alist vectors + * (implementation in weaks.c) internally, but we do the scanning + * ourselves (in scan_weak_hashtables) because we need to update the + * hash table structure when items are dropped during GC. + * + * 2. All hash table operations still work on alist vectors. + * + */ + /* Hash tables are either vectors of association lists or smobs - containing such vectors. Currently, the vector version represents - constant size tables while those wrapped in a smob represents - resizing tables. - - Growing or shrinking, with following rehashing, is triggered when - the load factor - - L = N / S (N: number of items in table, S: bucket vector length) - - passes an upper limit of 0.9 or a lower limit of 0.25. - - The implementation stores the upper and lower number of items which - trigger a resize in the hashtable object. - - Possible hash table sizes (primes) are stored in the array - hashtable_size. + * containing such vectors. Currently, the vector version represents + * constant size tables while those wrapped in a smob represents + * resizing tables. + * + * Growing or shrinking, with following rehashing, is triggered when + * the load factor + * + * L = N / S (N: number of items in table, S: bucket vector length) + * + * passes an upper limit of 0.9 or a lower limit of 0.25. + * + * The implementation stores the upper and lower number of items which + * trigger a resize in the hashtable object. + * + * Possible hash table sizes (primes) are stored in the array + * hashtable_size. */ -/*fixme* Update n_items correctly for weak tables. This can be done - by representing such tables with ordinary vectors and adding a scan - function to the before sweep hook similarly to what is done in weaks.c. - */ - -#define SCM_HASHTABLE_P(x) SCM_TYP16_PREDICATE (scm_tc16_hashtable, x) -#define SCM_HASHTABLE_VECTOR(x) SCM_CELL_OBJECT_1 (x) -#define SCM_SET_HASHTABLE_VECTOR(x, v) SCM_SET_CELL_OBJECT_1 (x, v) -#define SCM_HASHTABLE(x) ((scm_t_hashtable *) SCM_CELL_WORD_2 (x)) -#define SCM_HASHTABLE_N_ITEMS(x) (SCM_HASHTABLE (x)->n_items) -#define SCM_HASHTABLE_INCREMENT(x) (SCM_HASHTABLE_N_ITEMS(x)++) -#define SCM_HASHTABLE_DECREMENT(x) (SCM_HASHTABLE_N_ITEMS(x)--) -#define SCM_HASHTABLE_UPPER(x) (SCM_HASHTABLE (x)->upper) -#define SCM_HASHTABLE_LOWER(x) (SCM_HASHTABLE (x)->lower) - scm_t_bits scm_tc16_hashtable; -typedef struct scm_t_hashtable { - unsigned long n_items; /* number of items in table */ - unsigned long lower; /* when to shrink */ - unsigned long upper; /* when to grow */ - int size_index; /* index into hashtable_size */ -} scm_t_hashtable; - #define HASHTABLE_SIZE_N 25 static unsigned long hashtable_size[] = { @@ -111,32 +99,126 @@ static unsigned long hashtable_size[] = { static char *s_hashtable = "hashtable"; -SCM -scm_vector_to_hash_table (SCM vector) { - SCM table; +SCM weak_hashtables = SCM_EOL; + +static SCM +make_hash_table (int flags, unsigned long k, const char *func_name) { + SCM table, vector; + int i, n = k ? k : 31; + if (flags) + /* The SCM_WVECTF_NOSCAN flag informs the weak vector code not to + perform the final scan for broken references. Instead we do + that ourselves in scan_weak_hashtables. */ + vector = scm_i_allocate_weak_vector (flags | SCM_WVECTF_NOSCAN, + SCM_MAKINUM (n), + SCM_EOL, + func_name); + else + vector = scm_c_make_vector (n, SCM_EOL); scm_t_hashtable *t = scm_gc_malloc (sizeof (*t), s_hashtable); - int i = 0, len = SCM_VECTOR_LENGTH (vector); - while (i < HASHTABLE_SIZE_N && len > hashtable_size[i]) + i = 0; + while (i < HASHTABLE_SIZE_N && n > hashtable_size[i]) ++i; if (i > 0) i = i - 1; - t->size_index = i; + t->min_size_index = t->size_index = i; t->n_items = 0; - if (i == 0) - t->lower = 0; - else - t->lower = hashtable_size[i] / 4; + t->lower = 0; t->upper = 9 * hashtable_size[i] / 10; - SCM_NEWSMOB2 (table, scm_tc16_hashtable, vector, t); + t->flags = flags; + if (flags) + { + SCM_NEWSMOB3 (table, scm_tc16_hashtable, vector, t, weak_hashtables); + weak_hashtables = table; + } + else + SCM_NEWSMOB3 (table, scm_tc16_hashtable, vector, t, SCM_EOL); return table; } + +void +scm_i_rehash (SCM table, + unsigned long (*hash_fn)(), + void *closure, + const char* func_name) +{ + SCM buckets, new_buckets; + int i; + unsigned long old_size; + unsigned long new_size; + + if (SCM_HASHTABLE_N_ITEMS (table) < SCM_HASHTABLE_LOWER (table)) + { + /* rehashing is not triggered when i <= min_size */ + i = SCM_HASHTABLE (table)->size_index; + do + --i; + while (i > SCM_HASHTABLE (table)->min_size_index + && SCM_HASHTABLE_N_ITEMS (table) < hashtable_size[i] / 4); + } + else + { + i = SCM_HASHTABLE (table)->size_index + 1; + if (i >= HASHTABLE_SIZE_N) + /* don't rehash */ + return; + /* store for use in rehash_after_gc */ + SCM_HASHTABLE (table)->hash_fn = hash_fn; + SCM_HASHTABLE (table)->closure = closure; + } + SCM_HASHTABLE (table)->size_index = i; + + new_size = hashtable_size[i]; + if (i <= SCM_HASHTABLE (table)->min_size_index) + SCM_HASHTABLE (table)->lower = 0; + else + SCM_HASHTABLE (table)->lower = new_size / 4; + SCM_HASHTABLE (table)->upper = 9 * new_size / 10; + buckets = SCM_HASHTABLE_VECTOR (table); + + if (SCM_HASHTABLE_WEAK_P (table)) + new_buckets = scm_i_allocate_weak_vector (SCM_HASHTABLE_FLAGS (table) + | SCM_WVECTF_NOSCAN, + SCM_MAKINUM (new_size), + SCM_EOL, + func_name); + else + new_buckets = scm_c_make_vector (new_size, SCM_EOL); + + old_size = SCM_VECTOR_LENGTH (buckets); + for (i = 0; i < old_size; ++i) + { + SCM ls = SCM_VELTS (buckets)[i], handle; + while (!SCM_NULLP (ls)) + { + unsigned long h; + handle = SCM_CAR (ls); + h = hash_fn (SCM_CAR (handle), new_size, closure); + if (h >= new_size) + scm_out_of_range (func_name, scm_ulong2num (h)); + SCM_VECTOR_SET (new_buckets, h, + scm_cons (handle, SCM_VELTS (new_buckets)[h])); + ls = SCM_CDR (ls); + } + } + SCM_SET_HASHTABLE_VECTOR (table, new_buckets); +} + + static int hashtable_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED) { scm_t_hashtable *t = SCM_HASHTABLE (exp); - scm_puts ("#n_items, 10, port); + scm_puts ("#<", port); + if (SCM_HASHTABLE_WEAK_KEY_P (exp)) + scm_puts ("weak-key-", port); + else if (SCM_HASHTABLE_WEAK_VALUE_P (exp)) + scm_puts ("weak-value-", port); + else if (SCM_HASHTABLE_DOUBLY_WEAK_P (exp)) + scm_puts ("doubly-weak-", port); + scm_puts ("hash-table ", port); + scm_intprint ((unsigned long) t->n_items, 10, port); scm_putc ('/', port); scm_intprint ((unsigned long) SCM_VECTOR_LENGTH (SCM_HASHTABLE_VECTOR (exp)), 10, port); @@ -144,6 +226,92 @@ hashtable_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED) return 1; } +#define UNMARKED_CELL_P(x) (SCM_NIMP(x) && !SCM_GC_MARK_P (x)) + +/* keep track of hash tables that need to shrink after scan */ +static SCM to_rehash = SCM_EOL; + +/* scan hash tables for broken references, remove them, and update + hash tables item count */ +static void * +scan_weak_hashtables (void *dummy1 SCM_UNUSED, + void *dummy2 SCM_UNUSED, + void *dummy3 SCM_UNUSED) +{ + SCM *next = &weak_hashtables; + SCM h = *next; + while (!SCM_NULLP (h)) + { + if (!SCM_GC_MARK_P (h)) + *next = h = SCM_HASHTABLE_NEXT (h); + else + { + SCM alist; + int i, n = SCM_HASHTABLE_N_BUCKETS (h); + int weak_car = SCM_HASHTABLE_FLAGS (h) & SCM_HASHTABLEF_WEAK_CAR; + int weak_cdr = SCM_HASHTABLE_FLAGS (h) & SCM_HASHTABLEF_WEAK_CDR; + int check_size_p = 0; + for (i = 0; i < n; ++i) + { + SCM *next_spine = (SCM *) &SCM_HASHTABLE_BUCKETS (h)[i]; + for (alist = *next_spine; + !SCM_NULLP (alist); + alist = SCM_CDR (alist)) + if ((weak_car && UNMARKED_CELL_P (SCM_CAAR (alist))) + || (weak_cdr && UNMARKED_CELL_P (SCM_CDAR (alist)))) + { + *next_spine = SCM_CDR (alist); + SCM_HASHTABLE_DECREMENT (h); + check_size_p = 1; + } + else + next_spine = SCM_CDRLOC (alist); + } + if (check_size_p + && SCM_HASHTABLE_N_ITEMS (h) < SCM_HASHTABLE_LOWER (h)) + { + SCM tmp = SCM_HASHTABLE_NEXT (h); + /* temporarily move table from weak_hashtables to to_rehash */ + SCM_SET_HASHTABLE_NEXT (h, to_rehash); + to_rehash = h; + *next = h = tmp; + } + else + { + next = SCM_HASHTABLE_NEXTLOC (h); + h = SCM_HASHTABLE_NEXT (h); + } + } + } + return 0; +} + +static void * +rehash_after_gc (void *dummy1 SCM_UNUSED, + void *dummy2 SCM_UNUSED, + void *dummy3 SCM_UNUSED) +{ + if (!SCM_NULLP (to_rehash)) + { + SCM h = to_rehash, last; + do + { + scm_i_rehash (h, + /* use same hash_fn and closure as last time */ + SCM_HASHTABLE (h)->hash_fn, + SCM_HASHTABLE (h)->closure, + "rehash_after_gc"); + last = h; + h = SCM_HASHTABLE_NEXT (h); + } while (!SCM_NULLP (h)); + /* move tables back to weak_hashtables */ + SCM_SET_HASHTABLE_NEXT (last, weak_hashtables); + weak_hashtables = to_rehash; + to_rehash = SCM_EOL; + } + return 0; +} + static size_t hashtable_free (SCM obj) { @@ -155,100 +323,132 @@ hashtable_free (SCM obj) SCM scm_c_make_hash_table (unsigned long k) { - return scm_c_make_vector (k, SCM_EOL); -} - -SCM -scm_c_make_resizing_hash_table () -{ - return scm_vector_to_hash_table (scm_c_make_vector (31, SCM_EOL)); + return make_hash_table (0, k, "scm_c_make_hash_table"); } SCM_DEFINE (scm_make_hash_table, "make-hash-table", 0, 1, 0, (SCM n), - "Make a hash table with constant number of buckets @var{n}\n" - "If called with zero arguments, create a resizing hash table.") + "Make a hash table with optional minimum number of buckets @var{n}\n") #define FUNC_NAME s_scm_make_hash_table { if (SCM_UNBNDP (n)) - return scm_c_make_resizing_hash_table (); + return make_hash_table (0, 0, FUNC_NAME); else { int k; SCM_VALIDATE_INUM_COPY (1, n, k); - return scm_c_make_hash_table (k); + return make_hash_table (0, k, FUNC_NAME); } } #undef FUNC_NAME -static void -rehash (SCM table, unsigned long (*hash_fn)(), void *closure) +SCM_DEFINE (scm_make_weak_key_hash_table, "make-weak-key-hash-table", 0, 1, 0, + (SCM n), + "@deffnx {Scheme Procedure} make-weak-value-hash-table size\n" + "@deffnx {Scheme Procedure} make-doubly-weak-hash-table size\n" + "Return a weak hash table with @var{size} buckets. As with any\n" + "hash table, choosing a good size for the table requires some\n" + "caution.\n" + "\n" + "You can modify weak hash tables in exactly the same way you\n" + "would modify regular hash tables. (@pxref{Hash Tables})") +#define FUNC_NAME s_scm_make_weak_key_hash_table { - SCM buckets, new_buckets; - int i; - unsigned long old_size; - unsigned long new_size; - - if (SCM_HASHTABLE_N_ITEMS (table) < SCM_HASHTABLE_LOWER (table)) - /* rehashing is never triggered when i == 0 */ - i = --SCM_HASHTABLE (table)->size_index; + if (SCM_UNBNDP (n)) + return make_hash_table (SCM_HASHTABLEF_WEAK_CAR, 0, FUNC_NAME); else { - i = SCM_HASHTABLE (table)->size_index + 1; - if (i < HASHTABLE_SIZE_N) - SCM_HASHTABLE (table)->size_index = i; - else - /* don't rehash */ - return; + int k; + SCM_VALIDATE_INUM_COPY (1, n, k); + return make_hash_table (SCM_HASHTABLEF_WEAK_CAR, k, FUNC_NAME); } - - new_size = hashtable_size[i]; - if (i == 0) - SCM_HASHTABLE (table)->lower = 0; - else - SCM_HASHTABLE (table)->lower = new_size / 4; - SCM_HASHTABLE (table)->upper = 9 * new_size / 10; - buckets = SCM_HASHTABLE_VECTOR (table); - - if (SCM_VECTORP (buckets)) - new_buckets = scm_c_make_vector (new_size, SCM_EOL); - else - switch (SCM_WVECT_TYPE (buckets)) { - case 1: - new_buckets = scm_make_weak_key_hash_table (SCM_MAKINUM (new_size)); - break; - case 2: - new_buckets = scm_make_weak_value_hash_table (SCM_MAKINUM (new_size)); - break; - case 3: - new_buckets = scm_make_doubly_weak_hash_table (SCM_MAKINUM (new_size)); - break; - default: - abort (); /* never reached */ - } - - old_size = SCM_VECTOR_LENGTH (buckets); - for (i = 0; i < old_size; ++i) - { - SCM ls = SCM_VELTS (buckets)[i], handle; - while (!SCM_NULLP (ls)) - { - unsigned long h; - if (!SCM_CONSP (ls)) - break; - handle = SCM_CAR (ls); - if (!SCM_CONSP (handle)) - continue; - h = hash_fn (SCM_CAR (handle), new_size, closure); - if (h >= new_size) - scm_out_of_range ("hash_fn_create_handle_x", scm_ulong2num (h)); - SCM_VECTOR_SET (new_buckets, h, - scm_cons (handle, SCM_VELTS (new_buckets)[h])); - ls = SCM_CDR (ls); - } - } - SCM_SET_HASHTABLE_VECTOR (table, new_buckets); } +#undef FUNC_NAME + + +SCM_DEFINE (scm_make_weak_value_hash_table, "make-weak-value-hash-table", 0, 1, 0, + (SCM n), + "Return a hash table with weak values with @var{size} buckets.\n" + "(@pxref{Hash Tables})") +#define FUNC_NAME s_scm_make_weak_value_hash_table +{ + if (SCM_UNBNDP (n)) + return make_hash_table (SCM_HASHTABLEF_WEAK_CDR, 0, FUNC_NAME); + else + { + int k; + SCM_VALIDATE_INUM_COPY (1, n, k); + return make_hash_table (SCM_HASHTABLEF_WEAK_CDR, k, FUNC_NAME); + } +} +#undef FUNC_NAME + + +SCM_DEFINE (scm_make_doubly_weak_hash_table, "make-doubly-weak-hash-table", 1, 0, 0, + (SCM n), + "Return a hash table with weak keys and values with @var{size}\n" + "buckets. (@pxref{Hash Tables})") +#define FUNC_NAME s_scm_make_doubly_weak_hash_table +{ + if (SCM_UNBNDP (n)) + return make_hash_table (SCM_HASHTABLEF_WEAK_CAR | SCM_HASHTABLEF_WEAK_CDR, + 0, + FUNC_NAME); + else + { + int k; + SCM_VALIDATE_INUM_COPY (1, n, k); + return make_hash_table (SCM_HASHTABLEF_WEAK_CAR | SCM_HASHTABLEF_WEAK_CDR, + k, + FUNC_NAME); + } +} +#undef FUNC_NAME + + +SCM_DEFINE (scm_hash_table_p, "hash-table?", 1, 0, 0, + (SCM obj), + "Return @code{#t} if @var{obj} is a hash table.") +#define FUNC_NAME s_scm_hash_table_p +{ + return SCM_BOOL (SCM_HASHTABLE_P (obj)); +} +#undef FUNC_NAME + + +SCM_DEFINE (scm_weak_key_hash_table_p, "weak-key-hash-table?", 1, 0, 0, + (SCM obj), + "@deffnx {Scheme Procedure} weak-value-hash-table? obj\n" + "@deffnx {Scheme Procedure} doubly-weak-hash-table? obj\n" + "Return @code{#t} if @var{obj} is the specified weak hash\n" + "table. Note that a doubly weak hash table is neither a weak key\n" + "nor a weak value hash table.") +#define FUNC_NAME s_scm_weak_key_hash_table_p +{ + return SCM_BOOL (SCM_HASHTABLE_P (obj) && SCM_HASHTABLE_WEAK_KEY_P (obj)); +} +#undef FUNC_NAME + + +SCM_DEFINE (scm_weak_value_hash_table_p, "weak-value-hash-table?", 1, 0, 0, + (SCM obj), + "Return @code{#t} if @var{obj} is a weak value hash table.") +#define FUNC_NAME s_scm_weak_value_hash_table_p +{ + return SCM_BOOL (SCM_HASHTABLE_P (obj) && SCM_HASHTABLE_WEAK_VALUE_P (obj)); +} +#undef FUNC_NAME + + +SCM_DEFINE (scm_doubly_weak_hash_table_p, "doubly-weak-hash-table?", 1, 0, 0, + (SCM obj), + "Return @code{#t} if @var{obj} is a doubly weak hash table.") +#define FUNC_NAME s_scm_doubly_weak_hash_table_p +{ + return SCM_BOOL (SCM_HASHTABLE_P (obj) && SCM_HASHTABLE_DOUBLY_WEAK_P (obj)); +} +#undef FUNC_NAME + SCM scm_hash_fn_get_handle (SCM table, SCM obj, unsigned long (*hash_fn)(), SCM (*assoc_fn)(), void * closure) @@ -306,13 +506,7 @@ scm_hash_fn_create_handle_x (SCM table, SCM obj, SCM init, unsigned long (*hash_ { SCM_HASHTABLE_INCREMENT (table); if (SCM_HASHTABLE_N_ITEMS (table) > SCM_HASHTABLE_UPPER (table)) - { - rehash (table, hash_fn, closure); - buckets = SCM_HASHTABLE_VECTOR (table); - k = hash_fn (obj, SCM_VECTOR_LENGTH (buckets), closure); - if (k >= SCM_VECTOR_LENGTH (buckets)) - scm_out_of_range ("hash_fn_create_handle_x", scm_ulong2num (k)); - } + scm_i_rehash (table, hash_fn, closure, FUNC_NAME); } return SCM_CAR (new_bucket); } @@ -377,12 +571,23 @@ scm_hash_fn_remove_x (SCM table, SCM obj, unsigned long (*hash_fn)(), SCM (*asso { SCM_HASHTABLE_DECREMENT (table); if (SCM_HASHTABLE_N_ITEMS (table) < SCM_HASHTABLE_LOWER (table)) - rehash (table, hash_fn, closure); + scm_i_rehash (table, hash_fn, closure, "scm_hash_fn_remove_x"); } } return h; } +SCM_DEFINE (scm_hash_clear_x, "hash-clear!", 1, 0, 0, + (SCM table), + "Remove all items from TABLE (without triggering a resize).") +#define FUNC_NAME s_scm_hash_clear_x +{ + SCM_VALIDATE_HASHTABLE (1, table); + scm_vector_fill_x (SCM_HASHTABLE_VECTOR (table), SCM_EOL); + SCM_SET_HASHTABLE_N_ITEMS (table, 0); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME @@ -777,16 +982,72 @@ scm_internal_hash_fold (SCM (*fn) (), void *closure, SCM init, SCM table) return result; } +static SCM +for_each_proc (void *proc, SCM key, SCM data, SCM value) +{ + return scm_call_2 (SCM_PACK (proc), key, data); +} + +SCM_DEFINE (scm_hash_for_each, "hash-for-each", 2, 0, 0, + (SCM proc, SCM table), + "An iterator over hash-table elements.\n" + "Applies PROC successively on all hash table items.\n" + "The arguments to PROC are \"(key value)\" where key\n" + "and value are successive pairs from the hash table TABLE.") +#define FUNC_NAME s_scm_hash_for_each +{ + SCM_VALIDATE_PROC (1, proc); + if (!SCM_HASHTABLE_P (table)) + SCM_VALIDATE_VECTOR (2, table); + scm_internal_hash_fold (for_each_proc, + (void *) SCM_UNPACK (proc), + SCM_BOOL_F, + table); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + +static SCM +map_proc (void *proc, SCM key, SCM data, SCM value) +{ + return scm_cons (scm_call_2 (SCM_PACK (proc), key, data), value); +} + +SCM_DEFINE (scm_hash_map, "hash-map", 2, 0, 0, + (SCM proc, SCM table), + "An iterator over hash-table elements.\n" + "Accumulates and returns as a list the results of applying PROC successively.\n" + "The arguments to PROC are \"(key value)\" where key\n" + "and value are successive pairs from the hash table TABLE.") +#define FUNC_NAME s_scm_hash_map +{ + SCM_VALIDATE_PROC (1, proc); + if (!SCM_HASHTABLE_P (table)) + SCM_VALIDATE_VECTOR (2, table); + return scm_internal_hash_fold (map_proc, + (void *) SCM_UNPACK (proc), + SCM_EOL, + table); +} +#undef FUNC_NAME + void -scm_init_hashtab () +scm_hashtab_prehistory () { scm_tc16_hashtable = scm_make_smob_type (s_hashtable, 0); scm_set_smob_mark (scm_tc16_hashtable, scm_markcdr); scm_set_smob_print (scm_tc16_hashtable, hashtable_print); scm_set_smob_free (scm_tc16_hashtable, hashtable_free); + scm_c_hook_add (&scm_after_sweep_c_hook, scan_weak_hashtables, 0, 0); + scm_c_hook_add (&scm_after_gc_c_hook, rehash_after_gc, 0, 0); +} + +void +scm_init_hashtab () +{ #include "libguile/hashtab.x" } diff --git a/libguile/hashtab.h b/libguile/hashtab.h index bc98a02c2..ff2f157a3 100644 --- a/libguile/hashtab.h +++ b/libguile/hashtab.h @@ -48,6 +48,56 @@ #include "libguile/__scm.h" +#include "weaks.h" + + + +#define SCM_HASHTABLEF_WEAK_CAR SCM_WVECTF_WEAK_KEY +#define SCM_HASHTABLEF_WEAK_CDR SCM_WVECTF_WEAK_VALUE + +#define SCM_HASHTABLE_P(x) SCM_TYP16_PREDICATE (scm_tc16_hashtable, x) +#define SCM_VALIDATE_HASHTABLE(pos, arg) \ + SCM_MAKE_VALIDATE_MSG (pos, arg, HASHTABLE_P, "hash-table") +#define SCM_HASHTABLE_VECTOR(h) SCM_CELL_OBJECT_1 (h) +#define SCM_SET_HASHTABLE_VECTOR(x, v) SCM_SET_CELL_OBJECT_1 (x, v) +#define SCM_HASHTABLE(x) ((scm_t_hashtable *) SCM_CELL_WORD_2 (x)) +#define SCM_HASHTABLE_NEXT(x) SCM_CELL_OBJECT_3 (x) +#define SCM_HASHTABLE_NEXTLOC(x) ((SCM *) SCM_CELL_WORD_LOC (x, 3)) +#define SCM_SET_HASHTABLE_NEXT(x, n) SCM_SET_CELL_OBJECT_3 (x, n) +#define SCM_HASHTABLE_FLAGS(x) (SCM_HASHTABLE (x)->flags) +#define SCM_HASHTABLE_WEAK_KEY_P(x) \ + (SCM_HASHTABLE_FLAGS (x) & SCM_HASHTABLEF_WEAK_CAR) +#define SCM_HASHTABLE_WEAK_VALUE_P(x) \ + (SCM_HASHTABLE_FLAGS (x) & SCM_HASHTABLEF_WEAK_CDR) +#define SCM_HASHTABLE_DOUBLY_WEAK_P(x) \ + ((SCM_HASHTABLE_FLAGS (x) \ + & (SCM_HASHTABLEF_WEAK_CAR | SCM_HASHTABLEF_WEAK_CDR)) \ + == (SCM_HASHTABLEF_WEAK_CAR | SCM_HASHTABLEF_WEAK_CDR)) +#define SCM_HASHTABLE_WEAK_P(x) SCM_HASHTABLE_FLAGS (x) +#define SCM_HASHTABLE_N_ITEMS(x) (SCM_HASHTABLE (x)->n_items) +#define SCM_SET_HASHTABLE_N_ITEMS(x, n) (SCM_HASHTABLE (x)->n_items = n) +#define SCM_HASHTABLE_INCREMENT(x) (SCM_HASHTABLE_N_ITEMS(x)++) +#define SCM_HASHTABLE_DECREMENT(x) (SCM_HASHTABLE_N_ITEMS(x)--) +#define SCM_HASHTABLE_UPPER(x) (SCM_HASHTABLE (x)->upper) +#define SCM_HASHTABLE_LOWER(x) (SCM_HASHTABLE (x)->lower) + +#define SCM_HASHTABLE_N_BUCKETS(h) \ + SCM_VECTOR_LENGTH (SCM_HASHTABLE_VECTOR (h)) +#define SCM_HASHTABLE_BUCKETS(h) SCM_VELTS (SCM_HASHTABLE_VECTOR (h)) +#define SCM_SET_HASHTABLE_BUCKET(h, i, x) \ + SCM_VECTOR_SET (SCM_HASHTABLE_VECTOR (h), i, x) + +typedef struct scm_t_hashtable { + int flags; /* properties of table */ + unsigned long n_items; /* number of items in table */ + unsigned long lower; /* when to shrink */ + unsigned long upper; /* when to grow */ + int size_index; /* index into hashtable_size */ + int min_size_index; /* minimum size_index */ + unsigned long (*hash_fn) (); + void *closure; +} scm_t_hashtable; + #if 0 @@ -58,8 +108,17 @@ typedef SCM scm_t_delete_fn (SCM elt, SCM list); SCM_API SCM scm_vector_to_hash_table (SCM vector); SCM_API SCM scm_c_make_hash_table (unsigned long k); -SCM_API SCM scm_c_make_resizing_hash_table (void); SCM_API SCM scm_make_hash_table (SCM n); +SCM_API SCM scm_make_weak_key_hash_table (SCM k); +SCM_API SCM scm_make_weak_value_hash_table (SCM k); +SCM_API SCM scm_make_doubly_weak_hash_table (SCM k); + +SCM_API SCM scm_hash_table_p (SCM h); +SCM_API SCM scm_weak_key_hash_table_p (SCM h); +SCM_API SCM scm_weak_value_hash_table_p (SCM h); +SCM_API SCM scm_doubly_weak_hash_table_p (SCM h); + +SCM_API void scm_i_rehash (SCM table, unsigned long (*hash_fn)(), void *closure, const char*func_name); SCM_API SCM scm_hash_fn_get_handle (SCM table, SCM obj, unsigned long (*hash_fn) (), SCM (*assoc_fn) (), void * closure); SCM_API SCM scm_hash_fn_create_handle_x (SCM table, SCM obj, SCM init, unsigned long (*hash_fn) (), SCM (*assoc_fn) (), void * closure); @@ -67,6 +126,7 @@ SCM_API SCM scm_hash_fn_ref (SCM table, SCM obj, SCM dflt, unsigned long (*hash_ SCM_API SCM scm_hash_fn_set_x (SCM table, SCM obj, SCM val, unsigned long (*hash_fn) (), SCM (*assoc_fn) (), void * closure); SCM_API SCM scm_hash_fn_remove_x (SCM table, SCM obj, unsigned long (*hash_fn) (), SCM (*assoc_fn) (), SCM (*delete_fn) (), void * closure); SCM_API SCM scm_internal_hash_fold (SCM (*fn) (), void *closure, SCM init, SCM table); +SCM_API SCM scm_hash_clear_x (SCM table); SCM_API SCM scm_hashq_get_handle (SCM table, SCM obj); SCM_API SCM scm_hashq_create_handle_x (SCM table, SCM obj, SCM init); @@ -89,6 +149,9 @@ SCM_API SCM scm_hashx_ref (SCM hash, SCM assoc, SCM table, SCM obj, SCM dflt); SCM_API SCM scm_hashx_set_x (SCM hash, SCM assoc, SCM table, SCM obj, SCM val); SCM_API SCM scm_hashx_remove_x (SCM hash, SCM assoc, SCM del, SCM table, SCM obj); SCM_API SCM scm_hash_fold (SCM proc, SCM init, SCM hash); +SCM_API SCM scm_hash_for_each (SCM proc, SCM hash); +SCM_API SCM scm_hash_map (SCM proc, SCM hash); +SCM_API void scm_hashtab_prehistory (void); SCM_API void scm_init_hashtab (void); #endif /* SCM_HASHTAB_H */ diff --git a/libguile/hooks.c b/libguile/hooks.c index 3d01de1c8..ec1ad3971 100644 --- a/libguile/hooks.c +++ b/libguile/hooks.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1998,1999,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998,1999,2000,2001, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,7 +81,7 @@ scm_c_hook_add (scm_t_c_hook *hook, scm_t_c_hook_entry **loc = &hook->first; if (appendp) while (*loc) - *loc = (*loc)->next; + loc = &(*loc)->next; entry->next = *loc; entry->func = func; entry->data = func_data; diff --git a/libguile/init.c b/libguile/init.c index cd98511e6..92568c0e3 100644 --- a/libguile/init.c +++ b/libguile/init.c @@ -445,14 +445,18 @@ scm_init_guile_1 (SCM_STACKITEM *base) scm_block_gc = 1; + scm_storage_prehistory (); scm_threads_prehistory (); scm_ports_prehistory (); scm_smob_prehistory (); + scm_hashtab_prehistory (); /* requires storage_prehistory */ scm_tables_prehistory (); #ifdef GUILE_DEBUG_MALLOC scm_debug_malloc_prehistory (); #endif - if (scm_init_storage ()) /* requires threads and smob_prehistory */ + if (scm_init_storage ()) /* requires threads_prehistory, + smob_prehistory and + hashtab_prehistory */ abort (); scm_struct_prehistory (); /* requires storage */ @@ -460,7 +464,7 @@ scm_init_guile_1 (SCM_STACKITEM *base) scm_weaks_prehistory (); /* requires storage */ scm_init_subr_table (); scm_environments_prehistory (); /* requires storage */ - scm_modules_prehistory (); /* requires storage */ + scm_modules_prehistory (); /* requires storage and hash tables */ scm_init_variable (); /* all bindings need variables */ scm_init_continuations (); scm_init_root (); /* requires continuations */ diff --git a/libguile/modules.c b/libguile/modules.c index d54ac5fca..79a2192af 100644 --- a/libguile/modules.c +++ b/libguile/modules.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1998,2000,2001,2002 Free Software Foundation, Inc. +/* Copyright (C) 1998,2000,2001,2002, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -558,10 +558,10 @@ scm_module_reverse_lookup (SCM module, SCM variable) /* XXX - We do not use scm_hash_fold here to avoid searching the whole obarray. We should have a scm_hash_find procedure. */ - n = SCM_VECTOR_LENGTH (obarray); + n = SCM_HASHTABLE_N_BUCKETS (obarray); for (i = 0; i < n; ++i) { - SCM ls = SCM_VELTS (obarray)[i], handle; + SCM ls = SCM_HASHTABLE_BUCKETS (obarray)[i], handle; while (!SCM_NULLP (ls)) { handle = SCM_CAR (ls); diff --git a/libguile/symbols.c b/libguile/symbols.c index 021fb7fd5..1c1a86bed 100644 --- a/libguile/symbols.c +++ b/libguile/symbols.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1997,1998,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -83,19 +83,40 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0, /* {Symbols} */ +/* In order to optimize reading speed, this function breaks part of + * the hashtable abstraction. The optimizations are: + * + * 1. The argument string can be compared directly to symbol objects + * without first creating an SCM string object. (This would have + * been necessary if we had used the hashtable API in hashtab.h.) + * + * 2. We can use the raw hash value stored in SCM_SYMBOL_HASH (sym) + * to speed up lookup. + * + * Both optimizations might be possible without breaking the + * abstraction if the API in hashtab.c is improved. + */ + +unsigned long +scm_i_hash_symbol (SCM obj, unsigned long n, void *closure) +{ + return SCM_SYMBOL_HASH (obj) % n; +} SCM scm_mem2symbol (const char *name, size_t len) { - size_t raw_hash = scm_string_hash ((const unsigned char *) name, len)/2; - size_t hash = raw_hash % SCM_VECTOR_LENGTH (symbols); + size_t raw_hash = scm_string_hash ((const unsigned char *) name, len) / 2; + size_t hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols); { /* Try to find the symbol in the symbols table */ SCM l; - for (l = SCM_VELTS (symbols) [hash]; !SCM_NULLP (l); l = SCM_CDR (l)) + for (l = SCM_HASHTABLE_BUCKETS (symbols) [hash]; + !SCM_NULLP (l); + l = SCM_CDR (l)) { SCM sym = SCM_CAAR (l); if (SCM_SYMBOL_HASH (sym) == raw_hash @@ -126,9 +147,12 @@ scm_mem2symbol (const char *name, size_t len) raw_hash, SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL))); - SCM slot = SCM_VELTS (symbols) [hash]; + SCM slot = SCM_HASHTABLE_BUCKETS (symbols) [hash]; SCM cell = scm_cons (symbol, SCM_UNDEFINED); - SCM_VECTOR_SET (symbols, hash, scm_cons (cell, slot)); + SCM_SET_HASHTABLE_BUCKET (symbols, hash, scm_cons (cell, slot)); + SCM_HASHTABLE_INCREMENT (symbols); + if (SCM_HASHTABLE_N_ITEMS (symbols) > SCM_HASHTABLE_UPPER (symbols)) + scm_i_rehash (symbols, scm_i_hash_symbol, 0, "scm_mem2symbol"); return symbol; } diff --git a/libguile/symbols.h b/libguile/symbols.h index 22dc5cc25..81638f4a7 100644 --- a/libguile/symbols.h +++ b/libguile/symbols.h @@ -3,7 +3,7 @@ #ifndef SCM_SYMBOLS_H #define SCM_SYMBOLS_H -/* Copyright (C) 1995,1996,1997,1998,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -78,6 +78,7 @@ #ifdef GUILE_DEBUG SCM_API SCM scm_sys_symbols (void); #endif +SCM_API unsigned long scm_i_hash_symbol (SCM obj, unsigned long n, void *closure); SCM_API SCM scm_mem2symbol (const char*, size_t); SCM_API SCM scm_mem2uninterned_symbol (const char *name, size_t len); SCM_API SCM scm_str2symbol (const char*); diff --git a/libguile/weaks.c b/libguile/weaks.c index 7f02d3c68..a39b587ff 100644 --- a/libguile/weaks.c +++ b/libguile/weaks.c @@ -52,6 +52,16 @@ +/* 1. The current hash table implementation in hashtab.c uses weak alist + * vectors (formerly called weak hash tables) internally. + * + * 2. All hash table operations still work on alist vectors. + * + * 3. The weak vector and alist vector Scheme API is accessed through + * the module (ice-9 weak-vector). + */ + + /* {Weak Vectors} */ @@ -61,8 +71,8 @@ * elements which are initialized with the 'fill' object, or, if 'fill' is * undefined, with an unspecified object. */ -static SCM -allocate_weak_vector (scm_t_bits type, SCM size, SCM fill, const char* caller) +SCM +scm_i_allocate_weak_vector (scm_t_bits type, SCM size, SCM fill, const char* caller) #define FUNC_NAME caller { if (SCM_INUMP (size)) @@ -108,7 +118,6 @@ allocate_weak_vector (scm_t_bits type, SCM size, SCM fill, const char* caller) } #undef FUNC_NAME - SCM_DEFINE (scm_make_weak_vector, "make-weak-vector", 1, 1, 0, (SCM size, SCM fill), "Return a weak vector with @var{size} elements. If the optional\n" @@ -117,7 +126,7 @@ SCM_DEFINE (scm_make_weak_vector, "make-weak-vector", 1, 1, 0, "empty list.") #define FUNC_NAME s_scm_make_weak_vector { - return allocate_weak_vector (0, size, fill, FUNC_NAME); + return scm_i_allocate_weak_vector (0, size, fill, FUNC_NAME); } #undef FUNC_NAME @@ -170,85 +179,76 @@ SCM_DEFINE (scm_weak_vector_p, "weak-vector?", 1, 0, 0, -SCM_DEFINE (scm_make_weak_key_hash_table, "make-weak-key-hash-table", 0, 1, 0, +SCM_DEFINE (scm_make_weak_key_alist_vector, "make-weak-key-alist-vector", 0, 1, 0, (SCM size), - "@deffnx {Scheme Procedure} make-weak-value-hash-table size\n" - "@deffnx {Scheme Procedure} make-doubly-weak-hash-table size\n" + "@deffnx {Scheme Procedure} make-weak-value-alist-vector size\n" + "@deffnx {Scheme Procedure} make-doubly-weak-alist-vector size\n" "Return a weak hash table with @var{size} buckets. As with any\n" "hash table, choosing a good size for the table requires some\n" "caution.\n" "\n" "You can modify weak hash tables in exactly the same way you\n" "would modify regular hash tables. (@pxref{Hash Tables})") -#define FUNC_NAME s_scm_make_weak_key_hash_table +#define FUNC_NAME s_scm_make_weak_key_alist_vector { - if (SCM_UNBNDP (size)) - return scm_vector_to_hash_table (allocate_weak_vector (1, SCM_MAKINUM (31), - SCM_EOL, FUNC_NAME)); - else - return allocate_weak_vector (1, size, SCM_EOL, FUNC_NAME); + return scm_i_allocate_weak_vector + (1, SCM_UNBNDP (size) ? SCM_MAKINUM (31) : size, SCM_EOL, FUNC_NAME); } #undef FUNC_NAME -SCM_DEFINE (scm_make_weak_value_hash_table, "make-weak-value-hash-table", 0, 1, 0, +SCM_DEFINE (scm_make_weak_value_alist_vector, "make-weak-value-alist-vector", 0, 1, 0, (SCM size), "Return a hash table with weak values with @var{size} buckets.\n" "(@pxref{Hash Tables})") -#define FUNC_NAME s_scm_make_weak_value_hash_table +#define FUNC_NAME s_scm_make_weak_value_alist_vector { - if (SCM_UNBNDP (size)) - return scm_vector_to_hash_table (allocate_weak_vector (2, SCM_MAKINUM (31), - SCM_EOL, FUNC_NAME)); - else - return allocate_weak_vector (2, size, SCM_EOL, FUNC_NAME); + return scm_i_allocate_weak_vector + (2, SCM_UNBNDP (size) ? SCM_MAKINUM (31) : size, SCM_EOL, FUNC_NAME); } #undef FUNC_NAME -SCM_DEFINE (scm_make_doubly_weak_hash_table, "make-doubly-weak-hash-table", 1, 0, 0, +SCM_DEFINE (scm_make_doubly_weak_alist_vector, "make-doubly-weak-alist-vector", 1, 0, 0, (SCM size), "Return a hash table with weak keys and values with @var{size}\n" "buckets. (@pxref{Hash Tables})") -#define FUNC_NAME s_scm_make_doubly_weak_hash_table +#define FUNC_NAME s_scm_make_doubly_weak_alist_vector { - if (SCM_UNBNDP (size)) - return scm_vector_to_hash_table (allocate_weak_vector (3, SCM_MAKINUM (31), - SCM_EOL, FUNC_NAME)); - else - return allocate_weak_vector (3, size, SCM_EOL, FUNC_NAME); + return scm_i_allocate_weak_vector + (3, SCM_UNBNDP (size) ? SCM_MAKINUM (31) : size, SCM_EOL, FUNC_NAME); } #undef FUNC_NAME -SCM_DEFINE (scm_weak_key_hash_table_p, "weak-key-hash-table?", 1, 0, 0, +SCM_DEFINE (scm_weak_key_alist_vector_p, "weak-key-alist-vector?", 1, 0, 0, (SCM obj), - "@deffnx {Scheme Procedure} weak-value-hash-table? obj\n" - "@deffnx {Scheme Procedure} doubly-weak-hash-table? obj\n" + "@deffnx {Scheme Procedure} weak-value-alist-vector? obj\n" + "@deffnx {Scheme Procedure} doubly-weak-alist-vector? obj\n" "Return @code{#t} if @var{obj} is the specified weak hash\n" "table. Note that a doubly weak hash table is neither a weak key\n" "nor a weak value hash table.") -#define FUNC_NAME s_scm_weak_key_hash_table_p +#define FUNC_NAME s_scm_weak_key_alist_vector_p { return SCM_BOOL (SCM_WVECTP (obj) && SCM_IS_WHVEC (obj)); } #undef FUNC_NAME -SCM_DEFINE (scm_weak_value_hash_table_p, "weak-value-hash-table?", 1, 0, 0, +SCM_DEFINE (scm_weak_value_alist_vector_p, "weak-value-alist-vector?", 1, 0, 0, (SCM obj), "Return @code{#t} if @var{obj} is a weak value hash table.") -#define FUNC_NAME s_scm_weak_value_hash_table_p +#define FUNC_NAME s_scm_weak_value_alist_vector_p { return SCM_BOOL (SCM_WVECTP (obj) && SCM_IS_WHVEC_V (obj)); } #undef FUNC_NAME -SCM_DEFINE (scm_doubly_weak_hash_table_p, "doubly-weak-hash-table?", 1, 0, 0, +SCM_DEFINE (scm_doubly_weak_alist_vector_p, "doubly-weak-alist-vector?", 1, 0, 0, (SCM obj), "Return @code{#t} if @var{obj} is a doubly weak hash table.") -#define FUNC_NAME s_scm_doubly_weak_hash_table_p +#define FUNC_NAME s_scm_doubly_weak_alist_vector_p { return SCM_BOOL (SCM_WVECTP (obj) && SCM_IS_WHVEC_B (obj)); } @@ -325,7 +325,9 @@ scm_scan_weak_vectors (void *dummy1 SCM_UNUSED, if (UNMARKED_CELL_P (ptr[j])) ptr[j] = SCM_BOOL_F; } - else /* if (SCM_IS_WHVEC_ANY (scm_weak_vectors[i])) */ + /* check if we should scan the alist vector here (hashtables + have their own scan function in hashtab.c). */ + else if (!SCM_WVECT_NOSCAN_P (w)) { SCM obj = w; register long n = SCM_VECTOR_LENGTH (w); @@ -378,10 +380,18 @@ scm_weaks_prehistory () } +SCM +scm_init_weaks_builtins () +{ +#include "libguile/weaks.x" + return SCM_UNSPECIFIED; +} + void scm_init_weaks () { -#include "libguile/weaks.x" + scm_c_define_gsubr ("%init-weaks-builtins", 0, 0, 0, + scm_init_weaks_builtins); } diff --git a/libguile/weaks.h b/libguile/weaks.h index 690ba6c41..31c87099a 100644 --- a/libguile/weaks.h +++ b/libguile/weaks.h @@ -3,7 +3,7 @@ #ifndef SCM_WEAKS_H #define SCM_WEAKS_H -/* Copyright (C) 1995,1996,2000,2001 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,2000,2001, 2003 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,9 +50,16 @@ +#define SCM_WVECTF_WEAK_KEY 1 +#define SCM_WVECTF_WEAK_VALUE 2 +#define SCM_WVECTF_NOSCAN 4 + #define SCM_WVECTP(x) (!SCM_IMP (x) && SCM_TYP7 (x) == scm_tc7_wvect) #define SCM_WVECT_TYPE(x) (SCM_CELL_WORD_2 (x)) #define SCM_SET_WVECT_TYPE(x, t) (SCM_SET_CELL_WORD_2 ((x), (t))) +#define SCM_WVECT_WEAK_KEY_P(x) (SCM_WVECT_TYPE (x) & SCM_WVECTF_WEAK_KEY) +#define SCM_WVECT_WEAK_VALUE_P(x) (SCM_WVECT_TYPE (x) & SCM_WVECTF_WEAK_VALUE) +#define SCM_WVECT_NOSCAN_P(x) (SCM_WVECT_TYPE (x) & SCM_WVECTF_NOSCAN) #define SCM_IS_WHVEC(X) (SCM_WVECT_TYPE (X) == 1) #define SCM_IS_WHVEC_V(X) (SCM_WVECT_TYPE (X) == 2) #define SCM_IS_WHVEC_B(X) (SCM_WVECT_TYPE (X) == 3) @@ -64,16 +71,18 @@ SCM_API SCM scm_weak_vectors; +SCM_API SCM scm_i_allocate_weak_vector (scm_t_bits type, SCM size, SCM fill, const char* caller); SCM_API SCM scm_make_weak_vector (SCM k, SCM fill); SCM_API SCM scm_weak_vector (SCM l); SCM_API SCM scm_weak_vector_p (SCM x); -SCM_API SCM scm_make_weak_key_hash_table (SCM k); -SCM_API SCM scm_make_weak_value_hash_table (SCM k); -SCM_API SCM scm_make_doubly_weak_hash_table (SCM k); -SCM_API SCM scm_weak_key_hash_table_p (SCM x); -SCM_API SCM scm_weak_value_hash_table_p (SCM x); -SCM_API SCM scm_doubly_weak_hash_table_p (SCM x); +SCM_API SCM scm_make_weak_key_alist_vector (SCM k); +SCM_API SCM scm_make_weak_value_alist_vector (SCM k); +SCM_API SCM scm_make_doubly_weak_alist_vector (SCM k); +SCM_API SCM scm_weak_key_alist_vector_p (SCM x); +SCM_API SCM scm_weak_value_alist_vector_p (SCM x); +SCM_API SCM scm_doubly_weak_alist_vector_p (SCM x); SCM_API void scm_weaks_prehistory (void); +SCM_API SCM scm_init_weaks_builtins (void); SCM_API void scm_init_weaks (void); #endif /* SCM_WEAKS_H */