1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-03 08:10:31 +02:00

Change charsets to use bytevector to store char ranges

This doesn't fundamentally change how charsets are represented, but it
will eventually allow us to migrate more functionality to scheme, as the
charsets have a Scheme-legible representation.  Also, and this is really
the point, give charsets their own type code, so that they can be
traced precisely.

* libguile/eq.c:
* libguile/evalext.c:
* libguile/goops.c:
* libguile/print.c:
* module/oop/goops.scm: Adjust to new tc16.
* libguile/srfi-14.h: Make private things private.
* libguile/srfi-14.c: Change to use bytevectors for the ranges.  No
functional change.
This commit is contained in:
Andy Wingo 2025-06-12 16:49:27 +02:00
parent ec92d6a96e
commit c794c086d5
8 changed files with 577 additions and 550 deletions

View file

@ -39,6 +39,7 @@
#include "pairs.h"
#include "private-options.h"
#include "smob.h"
#include "srfi-14.h"
#include "stackchk.h"
#include "strorder.h"
#include "struct.h"
@ -382,6 +383,15 @@ scm_equal_p (SCM x, SCM y)
x = scm_syntax_expression (x);
y = scm_syntax_expression (y);
goto tailrecurse;
case scm_tc7_ext:
switch (SCM_TYP16 (x))
{
case scm_tc16_charset:
return scm_from_bool (scm_i_char_sets_equal (x, y));
default:
abort ();
}
break;
}
/* Otherwise just return false. Dispatching to the generic is the wrong thing

View file

@ -97,6 +97,7 @@ SCM_DEFINE (scm_self_evaluating_p, "self-evaluating?", 1, 0, 0,
case scm_tc7_ephemeron_table:
case scm_tc7_thread:
case scm_tcs_struct:
case scm_tc7_ext:
return SCM_BOOL_T;
default:
return SCM_BOOL_F;

View file

@ -138,6 +138,7 @@ static SCM class_bitvector;
static SCM class_finalizer;
static SCM class_ephemeron;
static SCM class_ephemeron_table;
static SCM class_character_set;
static struct scm_ephemeron_table *vtable_class_map;
static SCM pre_goops_vtables = SCM_EOL;
@ -338,6 +339,16 @@ SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0,
/* A non-GOOPS struct. */
return scm_i_define_class_for_vtable (vtable);
}
case scm_tc7_ext:
{
switch (SCM_TYP16 (x))
{
case scm_tc16_charset:
return class_character_set;
default:
abort ();
}
}
default:
if (scm_is_pair (x))
return class_pair;
@ -968,6 +979,7 @@ SCM_DEFINE (scm_sys_goops_early_init, "%goops-early-init", 0, 0, 0,
class_input_output_port = scm_variable_ref (scm_c_lookup ("<input-output-port>"));
class_ephemeron = scm_variable_ref (scm_c_lookup ("<ephemeron>"));
class_ephemeron_table = scm_variable_ref (scm_c_lookup ("<ephemeron-table>"));
class_character_set = scm_variable_ref (scm_c_lookup ("<character-set>"));
create_smob_classes ();
create_struct_classes ();

View file

@ -59,6 +59,7 @@
#include "programs.h"
#include "read.h"
#include "smob.h"
#include "srfi-14.h"
#include "strings.h"
#include "strports.h"
#include "struct.h"
@ -785,6 +786,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
SCM_SMOB_DESCRIPTOR (exp).print (exp, port, pstate);
EXIT_NESTED_DATA (pstate);
break;
case scm_tc7_ext:
switch (SCM_TYP16 (exp))
{
case scm_tc16_charset:
scm_i_print_char_set (exp, port, pstate);
break;
default:
abort ();
}
break;
default:
/* case scm_tcs_closures: */
punk:

View file

@ -413,10 +413,10 @@ typedef uintptr_t scm_t_bits;
interest: numbers, ports and smobs in fact each represent
collections of types, which are subdivided using tc16-codes.
tc16 (for tc7==scm_tc7_smob):
The largest part of the space of smob types is not subdivided in a
predefined way, since smobs can be added arbitrarily by user C
code. */
tc16 (for tc7 in {scm_tc7_smob, scm_tc7_port, scm_tc7_ext}): Port
and smob types can be defined by the user and are allocated
dynamically. scm_tc7_ext tags are allocated statically, and are
for Guile-internal objects. */
@ -505,8 +505,10 @@ typedef uintptr_t scm_t_bits;
#define scm_tc7_unused_75 0x75
#define scm_tc7_smob 0x77
#define scm_tc7_port 0x7d
#define scm_tc7_unused_7f 0x7f
#define scm_tc7_ext 0x7f
/* Objects with scm_tc7_ext. */
#define scm_tc16_charset 0x007f
/* Definitions for tc16: */
#define SCM_TYP16(x) (0xffff & SCM_CELL_TYPE (x))

File diff suppressed because it is too large Load diff

View file

@ -24,34 +24,12 @@
#include "libguile/chars.h"
typedef struct
static inline int
scm_is_char_set (SCM x)
{
scm_t_wchar lo;
scm_t_wchar hi;
} scm_t_char_range;
typedef struct
{
size_t len;
scm_t_char_range *ranges;
} scm_t_char_set;
typedef struct
{
size_t range;
scm_t_wchar n;
} scm_t_char_set_cursor;
#define SCM_CHARSET_GET(cs,idx) \
scm_i_charset_get((scm_t_char_set *)SCM_SMOB_DATA(cs),idx)
#define SCM_CHARSETP(x) (SCM_HAS_TYP16 (x, scm_tc16_charset))
/* Smob type code for character sets. */
SCM_API int scm_tc16_charset;
SCM_INTERNAL int scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n);
SCM_INTERNAL void scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n);
SCM_INTERNAL void scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n);
return SCM_HAS_TYP16 (x, scm_tc16_charset);
}
#define SCM_CHARSETP(x) (scm_is_char_set (x))
SCM_API SCM scm_char_set_p (SCM obj);
SCM_API SCM scm_char_set_eq (SCM char_sets);
@ -120,6 +98,9 @@ SCM_API SCM scm_char_set_ascii;
SCM_API SCM scm_char_set_empty;
SCM_API SCM scm_char_set_full;
SCM_INTERNAL int scm_i_char_sets_equal (SCM a, SCM b);
SCM_INTERNAL int scm_i_print_char_set (SCM charset, SCM port,
scm_print_state *pstate);
SCM_INTERNAL void scm_init_srfi_14 (void);
#endif /* SCM_SRFI_14_H */

View file

@ -70,7 +70,7 @@
<vector> <bytevector> <uvec> <foreign> <hashtable>
<fluid> <dynamic-state> <frame> <vm> <vm-continuation>
<keyword> <syntax> <atomic-box> <thread> <bitvector>
<finalizer> <ephemeron> <ephemeron-table>
<finalizer> <ephemeron> <ephemeron-table> <character-set>
;; Numbers.
<number> <complex> <real> <integer> <fraction>
@ -84,7 +84,7 @@
;; smob-type-name->class procedure.
<promise> <mutex> <condition-variable>
<regexp> <hook> <random-state>
<directory> <array> <character-set>
<directory> <array>
<dynamic-object> <macro>
;; Modules.
@ -1082,6 +1082,7 @@ slots as we go."
(define-standard-class <finalizer> (<top>))
(define-standard-class <ephemeron> (<top>))
(define-standard-class <ephemeron-table> (<top>))
(define-standard-class <character-set> (<top>))
(define-standard-class <thread> (<top>))
(define-standard-class <number> (<top>))
(define-standard-class <complex> (<number>))
@ -3538,7 +3539,6 @@ var{initargs}."
(define <random-state> (find-subclass <top> '<random-state>))
(define <directory> (find-subclass <top> '<directory>))
(define <array> (find-subclass <top> '<array>))
(define <character-set> (find-subclass <top> '<character-set>))
(define <macro> (find-subclass <top> '<macro>))
;; <dynamic-object> used to be a SMOB type, albeit not exported even to