From c794c086d5ced7044d3eda00833af6c1c1b35df4 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 12 Jun 2025 16:49:27 +0200 Subject: [PATCH] Change charsets to use bytevector to store char ranges This doesn't fundamentally change how charsets are represented, but it will eventually allow us to migrate more functionality to scheme, as the charsets have a Scheme-legible representation. Also, and this is really the point, give charsets their own type code, so that they can be traced precisely. * libguile/eq.c: * libguile/evalext.c: * libguile/goops.c: * libguile/print.c: * module/oop/goops.scm: Adjust to new tc16. * libguile/srfi-14.h: Make private things private. * libguile/srfi-14.c: Change to use bytevectors for the ranges. No functional change. --- libguile/eq.c | 10 + libguile/evalext.c | 1 + libguile/goops.c | 12 + libguile/print.c | 11 + libguile/scm.h | 12 +- libguile/srfi-14.c | 1040 +++++++++++++++++++++--------------------- libguile/srfi-14.h | 35 +- module/oop/goops.scm | 6 +- 8 files changed, 577 insertions(+), 550 deletions(-) diff --git a/libguile/eq.c b/libguile/eq.c index 57e4b931a..059954620 100644 --- a/libguile/eq.c +++ b/libguile/eq.c @@ -39,6 +39,7 @@ #include "pairs.h" #include "private-options.h" #include "smob.h" +#include "srfi-14.h" #include "stackchk.h" #include "strorder.h" #include "struct.h" @@ -382,6 +383,15 @@ scm_equal_p (SCM x, SCM y) x = scm_syntax_expression (x); y = scm_syntax_expression (y); goto tailrecurse; + case scm_tc7_ext: + switch (SCM_TYP16 (x)) + { + case scm_tc16_charset: + return scm_from_bool (scm_i_char_sets_equal (x, y)); + default: + abort (); + } + break; } /* Otherwise just return false. Dispatching to the generic is the wrong thing diff --git a/libguile/evalext.c b/libguile/evalext.c index a816623f8..d61778f98 100644 --- a/libguile/evalext.c +++ b/libguile/evalext.c @@ -97,6 +97,7 @@ SCM_DEFINE (scm_self_evaluating_p, "self-evaluating?", 1, 0, 0, case scm_tc7_ephemeron_table: case scm_tc7_thread: case scm_tcs_struct: + case scm_tc7_ext: return SCM_BOOL_T; default: return SCM_BOOL_F; diff --git a/libguile/goops.c b/libguile/goops.c index e93f2e99e..4a4fbad99 100644 --- a/libguile/goops.c +++ b/libguile/goops.c @@ -138,6 +138,7 @@ static SCM class_bitvector; static SCM class_finalizer; static SCM class_ephemeron; static SCM class_ephemeron_table; +static SCM class_character_set; static struct scm_ephemeron_table *vtable_class_map; static SCM pre_goops_vtables = SCM_EOL; @@ -338,6 +339,16 @@ SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0, /* A non-GOOPS struct. */ return scm_i_define_class_for_vtable (vtable); } + case scm_tc7_ext: + { + switch (SCM_TYP16 (x)) + { + case scm_tc16_charset: + return class_character_set; + default: + abort (); + } + } default: if (scm_is_pair (x)) return class_pair; @@ -968,6 +979,7 @@ SCM_DEFINE (scm_sys_goops_early_init, "%goops-early-init", 0, 0, 0, class_input_output_port = scm_variable_ref (scm_c_lookup ("")); class_ephemeron = scm_variable_ref (scm_c_lookup ("")); class_ephemeron_table = scm_variable_ref (scm_c_lookup ("")); + class_character_set = scm_variable_ref (scm_c_lookup ("")); create_smob_classes (); create_struct_classes (); diff --git a/libguile/print.c b/libguile/print.c index 8721fbf43..1ab762058 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -59,6 +59,7 @@ #include "programs.h" #include "read.h" #include "smob.h" +#include "srfi-14.h" #include "strings.h" #include "strports.h" #include "struct.h" @@ -785,6 +786,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) SCM_SMOB_DESCRIPTOR (exp).print (exp, port, pstate); EXIT_NESTED_DATA (pstate); break; + case scm_tc7_ext: + switch (SCM_TYP16 (exp)) + { + case scm_tc16_charset: + scm_i_print_char_set (exp, port, pstate); + break; + default: + abort (); + } + break; default: /* case scm_tcs_closures: */ punk: diff --git a/libguile/scm.h b/libguile/scm.h index 2f657f0e1..0ef18a139 100644 --- a/libguile/scm.h +++ b/libguile/scm.h @@ -413,10 +413,10 @@ typedef uintptr_t scm_t_bits; interest: numbers, ports and smobs in fact each represent collections of types, which are subdivided using tc16-codes. - tc16 (for tc7==scm_tc7_smob): - The largest part of the space of smob types is not subdivided in a - predefined way, since smobs can be added arbitrarily by user C - code. */ + tc16 (for tc7 in {scm_tc7_smob, scm_tc7_port, scm_tc7_ext}): Port + and smob types can be defined by the user and are allocated + dynamically. scm_tc7_ext tags are allocated statically, and are + for Guile-internal objects. */ @@ -505,8 +505,10 @@ typedef uintptr_t scm_t_bits; #define scm_tc7_unused_75 0x75 #define scm_tc7_smob 0x77 #define scm_tc7_port 0x7d -#define scm_tc7_unused_7f 0x7f +#define scm_tc7_ext 0x7f +/* Objects with scm_tc7_ext. */ +#define scm_tc16_charset 0x007f /* Definitions for tc16: */ #define SCM_TYP16(x) (0xffff & SCM_CELL_TYPE (x)) diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c index 8cd999cc1..7b11bc9f4 100644 --- a/libguile/srfi-14.c +++ b/libguile/srfi-14.c @@ -26,6 +26,7 @@ #include #include "boolean.h" +#include "bytevectors-internal.h" #include "chars.h" #include "eval.h" #include "gsubr.h" @@ -43,20 +44,113 @@ #include "srfi-14.h" +typedef struct scm_char_range +{ + scm_t_wchar lo; + scm_t_wchar hi; +} scm_t_char_range; + /* Include the pre-computed standard charset data. */ #include "srfi-14.i.c" +/* We call this "charset" instead of "char_set" to avoid confusion with + "set" as a verb. */ +struct scm_charset +{ + scm_t_bits tag_and_flags; + struct scm_bytevector *ranges; +}; + +typedef struct +{ + size_t range; + scm_t_wchar n; +} scm_t_char_set_cursor; + +static inline struct scm_charset* +scm_to_charset (SCM scm) +{ + if (!scm_is_char_set (scm)) abort(); + return (struct scm_charset *) SCM_UNPACK_POINTER (scm); +} + +static inline SCM +scm_from_charset (struct scm_charset *s) +{ + return SCM_PACK_POINTER (s); +} + +static inline scm_t_char_range* +char_ranges (struct scm_bytevector *bv) +{ + return (scm_t_char_range *) scm_bytevector_contents (bv); +} + +static inline size_t +char_ranges_len (struct scm_bytevector *bv) +{ + return bv->length / sizeof (scm_t_char_range); +} + +static inline size_t +charset_len (struct scm_charset *s) +{ + return char_ranges_len (s->ranges); +} + +static inline scm_t_char_range* +charset_range (struct scm_charset *s, size_t i) +{ + return char_ranges (s->ranges) + i; +} + +static inline scm_t_wchar +charset_range_lo (struct scm_charset *s, size_t i) +{ + return charset_range (s, i)->lo; +} + +static inline scm_t_wchar +charset_range_hi (struct scm_charset *s, size_t i) +{ + return charset_range (s, i)->hi; +} + +static inline void +charset_range_set_lo (struct scm_charset *s, size_t i, scm_t_wchar ch) +{ + charset_range (s, i)->lo = ch; +} + +static inline void +charset_range_set_hi (struct scm_charset *s, size_t i, scm_t_wchar ch) +{ + charset_range (s, i)->hi = ch; +} + +static inline int +charset_range_contains (struct scm_charset *s, size_t idx, scm_t_wchar c) +{ + scm_t_char_range *range = charset_range (s, idx); + return range->lo <= c && c <= range->hi; +} + static const scm_t_bits SCM_CHARSET_F_IMMUTABLE = 1 << 16; static inline int -scm_charset_is_immutable (SCM charset) +charset_is_immutable (SCM charset) { - return SCM_SMOB_DATA_0 (charset) & SCM_CHARSET_F_IMMUTABLE; + return scm_to_charset (charset)->tag_and_flags & SCM_CHARSET_F_IMMUTABLE; } +#define SCM_VALIDATE_CHARSET(pos, x) \ + do { \ + SCM_ASSERT_TYPE (SCM_CHARSETP (x), x, pos, FUNC_NAME, "charset"); \ + } while (0) + #define SCM_VALIDATE_MUTABLE_CHARSET(pos, x) \ do { \ - SCM_ASSERT_TYPE (SCM_CHARSETP (x) && !scm_charset_is_immutable (x), \ + SCM_ASSERT_TYPE (SCM_CHARSETP (x) && !charset_is_immutable (x), \ x, pos, FUNC_NAME, "mutable charset"); \ } while (0) @@ -66,30 +160,74 @@ static const scm_t_char_range cs_full_ranges[] = { }; static const size_t cs_full_len = 2; -#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset)) +#define SCM_CHARSET_GET(cs,idx) charset_get (scm_to_charset (cs), idx) +#define SCM_CHARSET_SET(cs, idx) charset_set (scm_to_charset (cs), idx) +#define SCM_CHARSET_UNSET(cs, idx) charset_unset (scm_to_charset (cs), idx) -#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset)) +static struct scm_bytevector *empty_charset_ranges; -#define SCM_CHARSET_SET(cs, idx) \ - scm_i_charset_set (SCM_CHARSET_DATA (cs), idx) +static struct scm_charset * +make_charset (struct scm_bytevector *ranges) +{ + struct scm_charset *p = scm_gc_malloc (sizeof (struct scm_charset), + "charset"); + p->tag_and_flags = scm_tc16_charset; + p->ranges = ranges; + return p; +} -#define SCM_CHARSET_UNSET(cs, idx) \ - scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx) +static struct scm_charset * +make_static_charset (size_t len, const scm_t_char_range *ranges) +{ + size_t u32_count = len * sizeof (*ranges) / sizeof(uint32_t); + SCM bv = scm_c_take_typed_bytevector ((signed char*) ranges, u32_count, + SCM_ARRAY_ELEMENT_TYPE_U32, + SCM_BOOL_F); + struct scm_charset *ret = make_charset (scm_to_bytevector (bv)); + ret->tag_and_flags |= SCM_CHARSET_F_IMMUTABLE; + return ret; +} -/* Smob type code for character sets. */ -int scm_tc16_charset = 0; -int scm_tc16_charset_cursor = 0; +static struct scm_bytevector * +make_ranges (size_t n) +{ + return n + ? scm_i_make_typed_bytevector (n * 2, SCM_ARRAY_ELEMENT_TYPE_U32) + : empty_charset_ranges; +} + +static void +copy_ranges (scm_t_char_range *dst, scm_t_char_range *src, size_t count) +{ + if (count) + memcpy (dst, src, count * sizeof(*dst)); +} + +static struct scm_bytevector * +clone_ranges (struct scm_bytevector *ranges) +{ + size_t len = char_ranges_len (ranges); + struct scm_bytevector *ret = make_ranges (len); + copy_ranges (char_ranges (ret), char_ranges (ranges), len); + return ret; +} + +static struct scm_charset* +clone_charset (struct scm_charset *src) +{ + return make_charset (clone_ranges (src->ranges)); +} /* True if N exists in charset CS. */ -int -scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n) +static int +charset_get (struct scm_charset *cs, scm_t_wchar n) { size_t i; i = 0; - while (i < cs->len) + while (i < charset_len (cs)) { - if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) + if (charset_range_contains (cs, i, n)) return 1; i++; } @@ -97,62 +235,59 @@ scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n) return 0; } -static scm_t_char_range* -char_ranges_insert (scm_t_char_range *ranges, size_t len, size_t idx, +static struct scm_bytevector * +char_ranges_insert (struct scm_bytevector *ranges, size_t idx, scm_t_wchar lo, scm_t_wchar hi) { - scm_t_char_range *new_ranges = - scm_gc_malloc_pointerless (sizeof (scm_t_char_range) * (len + 1), - "charset"); - if (idx) - memcpy (new_ranges, ranges, sizeof (scm_t_char_range) * idx); - new_ranges[idx].lo = lo; - new_ranges[idx].hi = hi; - if (idx < len) - memcpy (new_ranges + idx + 1, ranges + idx, - sizeof (scm_t_char_range) * (len - idx)); + size_t len = char_ranges_len (ranges); + struct scm_bytevector *new_ranges = make_ranges (len + 1); + struct scm_char_range inserted = { lo, hi }; + + copy_ranges (char_ranges (new_ranges), char_ranges (ranges), idx); + char_ranges (new_ranges)[idx] = inserted; + copy_ranges (char_ranges (new_ranges) + idx + 1, + char_ranges (ranges) + idx, + len - idx); + return new_ranges; } -static scm_t_char_range* -char_ranges_delete (scm_t_char_range *ranges, size_t len, size_t idx) +static struct scm_bytevector * +char_ranges_delete (struct scm_bytevector *ranges, size_t idx) { - if (len == 1) - return NULL; + size_t len = char_ranges_len (ranges); + struct scm_bytevector *new_ranges = make_ranges (len - 1); + + copy_ranges (char_ranges (new_ranges), char_ranges (ranges), idx); + copy_ranges (char_ranges (new_ranges) + idx, + char_ranges (ranges) + idx + 1, + len - idx - 1); - scm_t_char_range *new_ranges = - scm_gc_malloc_pointerless (sizeof (scm_t_char_range) * (len - 1), - "charset"); - if (idx) - memcpy (new_ranges, ranges, sizeof (scm_t_char_range) * idx); - if (idx + 1 < len) - memcpy (new_ranges + idx, ranges + idx + 1, - sizeof (scm_t_char_range) * (len - idx - 1)); return new_ranges; } /* Put N into charset CS. */ -void -scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) +static void +charset_set (struct scm_charset *cs, scm_t_wchar n) { size_t i; size_t len; - len = cs->len; + len = charset_len (cs); i = 0; while (i < len) { /* Already in this range */ - if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) + if (charset_range_contains (cs, i, n)) { return; } - if (n == cs->ranges[i].lo - 1) + if (n == charset_range_lo (cs, i) - 1) { /* This char is one below the current range. */ - if (i > 0 && cs->ranges[i - 1].hi + 1 == n) + if (i > 0 && charset_range_hi (cs, i - 1) + 1 == n) { /* It is also one above the previous range. */ /* This is an impossible condition: in the previous @@ -163,33 +298,31 @@ scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) else { /* Expand the range down by one. */ - cs->ranges[i].lo = n; + charset_range_set_lo (cs, i, n); return; } } - else if (n == cs->ranges[i].hi + 1) + else if (n == charset_range_hi (cs, i) + 1) { /* This char is one above the current range. */ - if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n) + if (i < len - 1 && charset_range_lo (cs, i + 1) - 1 == n) { /* It is also one below the next range, so combine them. */ - cs->ranges[i].hi = cs->ranges[i + 1].hi; - cs->ranges = char_ranges_delete (cs->ranges, len, i + 1); - cs->len = len - 1; + charset_range_set_hi (cs, i, charset_range_hi (cs, i + 1)); + cs->ranges = char_ranges_delete (cs->ranges, i + 1); return; } else { /* Expand the range up by one. */ - cs->ranges[i].hi = n; + charset_range_set_hi (cs, i, n); return; } } - else if (n < cs->ranges[i].lo - 1) + else if (n < charset_range_lo (cs, i) - 1) { /* This is a new range below the current one. */ - cs->ranges = char_ranges_insert (cs->ranges, len, i, n, n); - cs->len = len + 1; + cs->ranges = char_ranges_insert (cs->ranges, i, n, n); return; } @@ -197,71 +330,68 @@ scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) } /* This is a new range above all previous ranges. */ - cs->ranges = char_ranges_insert (cs->ranges, len, len, n, n); - cs->len = len + 1; + cs->ranges = char_ranges_insert (cs->ranges, len, n, n); return; } /* Put LO to HI inclusive into charset CS. */ static void -scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi) +charset_set_range (struct scm_charset *cs, scm_t_wchar lo, scm_t_wchar hi) { size_t i; i = 0; - while (i < cs->len) + while (i < charset_len (cs)) { /* Already in this range */ - if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi) + if (charset_range_lo (cs, i) <= lo && charset_range_hi (cs, i) >= hi) return; /* cur: +---+ new: +---+ */ - if (cs->ranges[i].lo - 1 > hi) + if (charset_range_lo (cs, i) - 1 > hi) { /* Add a new range below the current one. */ - cs->ranges = char_ranges_insert (cs->ranges, cs->len, i, lo, hi); - cs->len += 1; + cs->ranges = char_ranges_insert (cs->ranges, i, lo, hi); return; } /* cur: +---+ or +---+ or +---+ new: +---+ +---+ +---+ */ - if (cs->ranges[i].lo > lo - && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi)) + if (charset_range_lo (cs, i) > lo + && (charset_range_lo (cs, i) - 1 <= hi && charset_range_hi (cs, i) >= hi)) { - cs->ranges[i].lo = lo; + charset_range_set_lo (cs, i, lo); return; } /* cur: +---+ or +---+ or +---+ new: +---+ +---+ +---+ */ - else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi) + else if (charset_range_hi (cs, i) + 1 >= lo && charset_range_hi (cs, i) < hi) { - if (cs->ranges[i].lo > lo) - cs->ranges[i].lo = lo; - if (cs->ranges[i].hi < hi) - cs->ranges[i].hi = hi; - while (i < cs->len - 1) + if (charset_range_lo (cs, i) > lo) + charset_range_set_lo (cs, i, lo); + if (charset_range_hi (cs, i) < hi) + charset_range_set_hi (cs, i, hi); + while (i < charset_len (cs) - 1) { /* cur: --+ +---+ new: -----+ */ - if (cs->ranges[i + 1].lo - 1 > hi) + if (charset_range_lo (cs, i + 1) - 1 > hi) break; /* cur: --+ +---+ or --+ +---+ or --+ +--+ new: -----+ ------+ ---------+ */ /* Combine this range with the previous one. */ - if (cs->ranges[i + 1].hi > hi) - cs->ranges[i].hi = cs->ranges[i + 1].hi; - cs->ranges = char_ranges_delete (cs->ranges, cs->len, i + 1); - cs->len -= 1; + if (charset_range_hi (cs, i + 1) > hi) + charset_range_set_hi (cs, i, charset_range_hi (cs, i + 1)); + cs->ranges = char_ranges_delete (cs->ranges, i + 1); } return; } @@ -269,54 +399,51 @@ scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi) } /* This is a new range above all previous ranges. */ - cs->ranges = char_ranges_insert (cs->ranges, cs->len, cs->len, lo, hi); - cs->len += 1; + cs->ranges = char_ranges_insert (cs->ranges, charset_len (cs), lo, hi); return; } /* If N is in charset CS, remove it. */ -void -scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n) +static void +charset_unset (struct scm_charset *cs, scm_t_wchar n) { size_t i; size_t len; - len = cs->len; + len = charset_len (cs); i = 0; while (i < len) { - if (n < cs->ranges[i].lo) + if (n < charset_range_lo (cs, i)) /* Not in this set. */ return; - if (n == cs->ranges[i].lo && n == cs->ranges[i].hi) + if (n == charset_range_lo (cs, i) && n == charset_range_hi (cs, i)) { /* Remove this one-character range. */ - cs->ranges = char_ranges_delete (cs->ranges, len, i); - cs->len = len - 1; + cs->ranges = char_ranges_delete (cs->ranges, i); return; } - else if (n == cs->ranges[i].lo) + else if (n == charset_range_lo (cs, i)) { /* Shrink this range from the left. */ - cs->ranges[i].lo = n + 1; + charset_range_set_lo (cs, i, n + 1); return; } - else if (n == cs->ranges[i].hi) + else if (n == charset_range_hi (cs, i)) { /* Shrink this range from the right. */ - cs->ranges[i].hi = n - 1; + charset_range_set_hi (cs, i, n - 1); return; } - else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi) + else if (n > charset_range_lo (cs, i) && n < charset_range_hi (cs, i)) { /* Split this range into two pieces. */ - cs->ranges = char_ranges_insert (cs->ranges, len, i + 1, - n + 1, cs->ranges[i].hi); - cs->ranges[i].hi = n - 1; - cs->len = len + 1; + cs->ranges = char_ranges_insert (cs->ranges, i + 1, + n + 1, charset_range_hi (cs, i)); + charset_range_set_hi (cs, i, n - 1); return; } @@ -328,46 +455,35 @@ scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n) } static int -charsets_equal (scm_t_char_set *a, scm_t_char_set *b) +charsets_equal (struct scm_charset *a, struct scm_charset *b) { - if (a->len != b->len) - return 0; - - /* Empty charsets may have ranges == NULL. We must avoid passing - NULL to memcmp, even if the length is zero, to avoid undefined - behavior. */ - if (a->len == 0) - return 1; - - if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0) - return 0; - - return 1; + return scm_is_true (scm_bytevector_eq_p (scm_from_bytevector (a->ranges), + scm_from_bytevector (b->ranges))); } /* Return true if every character in A is also in B. */ static int -charsets_leq (scm_t_char_set *a, scm_t_char_set *b) +charsets_leq (struct scm_charset *a, struct scm_charset *b) { size_t i = 0, j = 0; scm_t_wchar alo, ahi; - if (a->len == 0) + if (charset_len (a) == 0) return 1; - if (b->len == 0) + if (charset_len (b) == 0) return 0; - while (i < a->len) + while (i < charset_len (a)) { - alo = a->ranges[i].lo; - ahi = a->ranges[i].hi; - while (b->ranges[j].hi < alo) + alo = charset_range_lo (a, i); + ahi = charset_range_hi (a, i); + while (charset_range_hi (b, j) < alo) { - if (j < b->len - 1) + if (j < charset_len (b) - 1) j++; else return 0; } - if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi) + if (alo < charset_range_lo (b, j) || ahi > charset_range_hi (b, j)) return 0; i++; } @@ -377,28 +493,22 @@ charsets_leq (scm_t_char_set *a, scm_t_char_set *b) /* Merge B into A. */ static void -charsets_union (scm_t_char_set *a, scm_t_char_set *b) +charsets_union (struct scm_charset *a, struct scm_charset *b) { size_t i = 0; scm_t_wchar blo, bhi; - if (b->len == 0) - return; - - if (a->len == 0) + if (charset_len (a) == 0) { - a->len = b->len; - a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len, - "character-set"); - memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len); + a->ranges = clone_ranges (b->ranges); return; } - while (i < b->len) + while (i < charset_len (b)) { - blo = b->ranges[i].lo; - bhi = b->ranges[i].hi; - scm_i_charset_set_range (a, blo, bhi); + blo = charset_range_lo (b, i); + bhi = charset_range_hi (b, i); + charset_set_range (a, blo, bhi); i++; } @@ -408,107 +518,93 @@ charsets_union (scm_t_char_set *a, scm_t_char_set *b) /* Remove elements not both in A and B from A. */ static void -charsets_intersection (scm_t_char_set *a, scm_t_char_set *b) +charsets_intersection (struct scm_charset *a, struct scm_charset *b) { - size_t i = 0; scm_t_wchar blo, bhi, n; - scm_t_char_set *c; - if (a->len == 0) + if (charset_len (a) == 0) return; - if (b->len == 0) + if (charset_len (b) == 0) { - a->len = 0; + a->ranges = empty_charset_ranges; return; } - c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set)); - c->len = 0; - c->ranges = NULL; + struct scm_charset *c = make_charset (empty_charset_ranges); - while (i < b->len) + size_t i = 0; + while (i < charset_len (b)) { - blo = b->ranges[i].lo; - bhi = b->ranges[i].hi; + blo = charset_range_lo (b, i); + bhi = charset_range_hi (b, i); for (n = blo; n <= bhi; n++) - if (scm_i_charset_get (a, n)) - scm_i_charset_set (c, n); + if (charset_get (a, n)) + charset_set (c, n); i++; } - a->len = c->len; - if (c->len != 0) - a->ranges = c->ranges; - else - a->ranges = NULL; - free (c); - return; + a->ranges = c->ranges; } #define SCM_ADD_RANGE(low, high) \ do { \ - p->ranges[idx].lo = (low); \ - p->ranges[idx++].hi = (high); \ + charset_range_set_lo (p, idx,low); \ + charset_range_set_hi (p, idx++, high); \ } while (0) -#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \ - do { \ - p->ranges[idx].lo = (low); \ - p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \ - p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \ - p->ranges[idx++].hi = (high); \ +#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \ + do { \ + charset_range_set_lo (p, idx, low); \ + charset_range_set_hi (p, idx++, SCM_CODEPOINT_SURROGATE_START - 1); \ + charset_range_set_lo (p, idx, SCM_CODEPOINT_SURROGATE_END + 1); \ + charset_range_set_hi (p, idx++, high); \ } while (0) /* Make P the compelement of Q. */ static void -charsets_complement (scm_t_char_set *p, scm_t_char_set *q) +charsets_complement (struct scm_charset *p, struct scm_charset *q) { int k, idx; idx = 0; - if (q->len == 0) + if (charset_len (q) == 0) { /* Fill with all valid codepoints. */ - p->len = 2; - p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2, - "character-set"); + p->ranges = make_ranges (2); SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX); return; } /* Count the number of ranges needed for the output. */ - p->len = 0; - if (q->ranges[0].lo > 0) - p->len++; - if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) - p->len++; - p->len += q->len; - p->ranges = - (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len, - "character-set"); - if (q->ranges[0].lo > 0) + size_t len = charset_len (q); + if (charset_range_lo (q, 0) > 0) + len++; + if (charset_range_hi (q, charset_len (q) - 1) < SCM_CODEPOINT_MAX) + len++; + p->ranges = make_ranges (len); + if (charset_range_lo (q, 0) > 0) { - if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END) - SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1); + if (charset_range_lo (q, 0) > SCM_CODEPOINT_SURROGATE_END) + SCM_ADD_RANGE_SKIP_SURROGATES (0, charset_range_lo (q, 0) - 1); else - SCM_ADD_RANGE (0, q->ranges[0].lo - 1); + SCM_ADD_RANGE (0, charset_range_lo (q, 0) - 1); } - for (k = 1; k < q->len; k++) + for (k = 1; k < charset_len (q); k++) { - if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START - && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END) - SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1); + if (charset_range_hi (q, k - 1) < SCM_CODEPOINT_SURROGATE_START + && charset_range_lo (q, k) - 1 > SCM_CODEPOINT_SURROGATE_END) + SCM_ADD_RANGE_SKIP_SURROGATES (charset_range_hi (q, k - 1) + 1, charset_range_lo (q, k) - 1); else - SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1); + SCM_ADD_RANGE (charset_range_hi (q, k - 1) + 1, charset_range_lo (q, k) - 1); } - if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) + if (charset_range_hi (q, charset_len (q) - 1) < SCM_CODEPOINT_MAX) { - if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START) - SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX); + if (charset_range_hi (q, charset_len (q) - 1) < SCM_CODEPOINT_SURROGATE_START) + SCM_ADD_RANGE_SKIP_SURROGATES (charset_range_hi (q, charset_len (q) - 1) + 1, SCM_CODEPOINT_MAX); else - SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX); + SCM_ADD_RANGE (charset_range_hi (q, charset_len (q) - 1) + 1, SCM_CODEPOINT_MAX); } return; } @@ -517,36 +613,30 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q) /* Replace A with elements only found in one of A or B. */ static void -charsets_xor (scm_t_char_set *a, scm_t_char_set *b) +charsets_xor (struct scm_charset *a, struct scm_charset *b) { size_t i = 0; scm_t_wchar blo, bhi, n; - if (b->len == 0) + if (charset_len (b) == 0) + return; + + if (charset_len (a) == 0) { + a->ranges = clone_ranges (b->ranges); return; } - if (a->len == 0) + while (i < charset_len (b)) { - a->ranges = - (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * - b->len, "character-set"); - a->len = b->len; - memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len); - return; - } - - while (i < b->len) - { - blo = b->ranges[i].lo; - bhi = b->ranges[i].hi; + blo = charset_range_lo (b, i); + bhi = charset_range_hi (b, i); for (n = blo; n <= bhi; n++) { - if (scm_i_charset_get (a, n)) - scm_i_charset_unset (a, n); + if (charset_get (a, n)) + charset_unset (a, n); else - scm_i_charset_set (a, n); + charset_set (a, n); } i++; @@ -554,29 +644,28 @@ charsets_xor (scm_t_char_set *a, scm_t_char_set *b) return; } -/* Smob print hook for character sets. */ -static int -charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) +int +scm_i_print_char_set (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) { size_t i; int first = 1; - scm_t_char_set *p; + struct scm_charset *p; const size_t max_ranges_to_print = 50; - p = SCM_CHARSET_DATA (charset); + p = scm_to_charset (charset); scm_puts ("#len; i++) + for (i = 0; i < charset_len (p); i++) { if (first) first = 0; else scm_puts (" ", port); - scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port); - if (p->ranges[i].lo != p->ranges[i].hi) + scm_write (SCM_MAKE_CHAR (charset_range_lo (p, i)), port); + if (charset_range_lo (p, i) != charset_range_hi (p, i)) { scm_puts ("..", port); - scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port); + scm_write (SCM_MAKE_CHAR (charset_range_hi (p, i)), port); } if (i >= max_ranges_to_print) { @@ -590,6 +679,7 @@ charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) } /* Smob print hook for character sets cursors. */ +int scm_tc16_charset_cursor = 0; static int charset_cursor_print (SCM cursor, SCM port, scm_print_state *pstate SCM_UNUSED) @@ -611,48 +701,41 @@ charset_cursor_print (SCM cursor, SCM port, return 1; } - -/* Create a new, empty character set. */ -static SCM -make_char_set (const char *func_name) -{ - scm_t_char_set *p; - - p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set"); - memset (p, 0, sizeof (scm_t_char_set)); - SCM_RETURN_NEWSMOB (scm_tc16_charset, p); -} - - SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0, (SCM obj), "Return @code{#t} if @var{obj} is a character set, @code{#f}\n" "otherwise.") #define FUNC_NAME s_scm_char_set_p { - return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj)); + return scm_from_bool (scm_is_char_set (obj)); } #undef FUNC_NAME +int +scm_i_char_sets_equal (SCM a, SCM b) +{ + return charsets_equal (scm_to_charset (a), scm_to_charset (b)); +} + SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1, (SCM char_sets), "Return @code{#t} if all given character sets are equal.") #define FUNC_NAME s_scm_char_set_eq { int argnum = 1; - scm_t_char_set *cs1_data = NULL; + struct scm_charset *cs1_data = NULL; SCM_VALIDATE_REST_ARGUMENT (char_sets); while (!scm_is_null (char_sets)) { SCM csi = SCM_CAR (char_sets); - scm_t_char_set *csi_data; + struct scm_charset *csi_data; - SCM_VALIDATE_SMOB (argnum, csi, charset); + SCM_VALIDATE_CHARSET (argnum, csi); argnum++; - csi_data = SCM_CHARSET_DATA (csi); + csi_data = scm_to_charset (csi); if (cs1_data == NULL) cs1_data = csi_data; else if (!charsets_equal (cs1_data, csi_data)) @@ -671,18 +754,18 @@ SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1, #define FUNC_NAME s_scm_char_set_leq { int argnum = 1; - scm_t_char_set *prev_data = NULL; + struct scm_charset *prev_data = NULL; SCM_VALIDATE_REST_ARGUMENT (char_sets); while (!scm_is_null (char_sets)) { SCM csi = SCM_CAR (char_sets); - scm_t_char_set *csi_data; + struct scm_charset *csi_data; - SCM_VALIDATE_SMOB (argnum, csi, charset); + SCM_VALIDATE_CHARSET (argnum, csi); argnum++; - csi_data = SCM_CHARSET_DATA (csi); + csi_data = scm_to_charset (csi); if (prev_data) { if (!charsets_leq (prev_data, csi_data)) @@ -705,12 +788,12 @@ SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, { const unsigned long default_bnd = 871; unsigned long bnd; - scm_t_char_set *p; + struct scm_charset *p; unsigned long val = 0; int k; scm_t_wchar c; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); if (SCM_UNBNDP (bound)) bnd = default_bnd; @@ -721,10 +804,10 @@ SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, bnd = default_bnd; } - p = SCM_CHARSET_DATA (cs); - for (k = 0; k < p->len; k++) + p = scm_to_charset (cs); + for (k = 0; k < charset_len (p); k++) { - for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++) + for (c = charset_range_lo (p, k); c <= charset_range_hi (p, k); c++) val = c + (val << 1); } return scm_from_ulong (val % bnd); @@ -736,15 +819,15 @@ SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, (SCM cs), "Return a cursor into the character set @var{cs}.") #define FUNC_NAME s_scm_char_set_cursor { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; scm_t_char_set_cursor *cur_data; - SCM_VALIDATE_SMOB (1, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); + SCM_VALIDATE_CHARSET (1, cs); + cs_data = scm_to_charset (cs); cur_data = (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor), "charset-cursor"); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) { cur_data->range = (size_t) (-1); cur_data->n = 0; @@ -752,7 +835,7 @@ SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, else { cur_data->range = 0; - cur_data->n = cs_data->ranges[0].lo; + cur_data->n = charset_range_lo (cs_data, 0); } SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data); } @@ -766,22 +849,22 @@ SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0, "pass a cursor for which @code{end-of-char-set?} returns true.") #define FUNC_NAME s_scm_char_set_ref { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; scm_t_char_set_cursor *cur_data; size_t i; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_SMOB (2, cursor, charset_cursor); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); /* Validate that this cursor is still true. */ i = cur_data->range; if (i == (size_t) (-1) - || i >= cs_data->len - || cur_data->n < cs_data->ranges[i].lo - || cur_data->n > cs_data->ranges[i].hi) + || i >= charset_len (cs_data) + || cur_data->n < charset_range_lo (cs_data, i) + || cur_data->n > charset_range_hi (cs_data, i)) SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); return SCM_MAKE_CHAR (cur_data->n); } @@ -795,30 +878,29 @@ SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0, "cursor given satisfies @code{end-of-char-set?}.") #define FUNC_NAME s_scm_char_set_cursor_next { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; scm_t_char_set_cursor *cur_data; size_t i; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_SMOB (2, cursor, charset_cursor); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); /* Validate that this cursor is still true. */ i = cur_data->range; if (i == (size_t) (-1) - || i >= cs_data->len - || cur_data->n < cs_data->ranges[i].lo - || cur_data->n > cs_data->ranges[i].hi) + || i >= charset_len (cs_data) + || !charset_range_contains (cs_data, i, cur_data->n)) SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); /* Increment the cursor. */ - if (cur_data->n == cs_data->ranges[i].hi) + if (cur_data->n == charset_range_hi (cs_data, i)) { - if (i + 1 < cs_data->len) + if (i + 1 < charset_len (cs_data)) { cur_data->range = i + 1; - cur_data->n = cs_data->ranges[i + 1].lo; + cur_data->n = charset_range_lo (cs_data, i + 1); } else { @@ -861,20 +943,20 @@ SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0, "initializing it with @var{knil}.") #define FUNC_NAME s_scm_char_set_fold { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; int k; scm_t_wchar n; SCM_VALIDATE_PROC (1, kons); - SCM_VALIDATE_SMOB (3, cs, charset); + SCM_VALIDATE_CHARSET (3, cs); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) return knil; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil); } @@ -898,18 +980,19 @@ SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, "@end itemize") #define FUNC_NAME s_scm_char_set_unfold { - SCM result, tmp; + SCM tmp; + struct scm_charset *result; SCM_VALIDATE_PROC (1, p); SCM_VALIDATE_PROC (2, f); SCM_VALIDATE_PROC (3, g); if (!SCM_UNBNDP (base_cs)) { - SCM_VALIDATE_SMOB (5, base_cs, charset); - result = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (5, base_cs); + result = scm_to_charset (scm_char_set_copy (base_cs)); } else - result = make_char_set (FUNC_NAME); + result = make_charset (empty_charset_ranges); tmp = scm_call_1 (p, seed); while (scm_is_false (tmp)) @@ -917,12 +1000,12 @@ SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, SCM ch = scm_call_1 (f, seed); if (!SCM_CHARP (ch)) SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); - SCM_CHARSET_SET (result, SCM_CHAR (ch)); + charset_set (result, SCM_CHAR (ch)); seed = scm_call_1 (g, seed); tmp = scm_call_1 (p, seed); } - return result; + return scm_from_charset (result); } #undef FUNC_NAME @@ -971,20 +1054,20 @@ SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0, "@var{cs}. The return value is not specified.") #define FUNC_NAME s_scm_char_set_for_each { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; int k; scm_t_wchar n; SCM_VALIDATE_PROC (1, proc); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) return SCM_UNSPECIFIED; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { scm_call_1 (proc, SCM_MAKE_CHAR (n)); } @@ -1000,30 +1083,25 @@ SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0, "@var{proc} must be a character -> character procedure.") #define FUNC_NAME s_scm_char_set_map { - SCM result; int k; - scm_t_char_set *cs_data; scm_t_wchar n; SCM_VALIDATE_PROC (1, proc); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - result = make_char_set (FUNC_NAME); - cs_data = SCM_CHARSET_DATA (cs); + struct scm_charset *result = make_charset (empty_charset_ranges); + struct scm_charset *cs_data = scm_to_charset (cs); - if (cs_data->len == 0) - return result; - - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n)); if (!SCM_CHARP (ch)) SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc)); - SCM_CHARSET_SET (result, SCM_CHAR (ch)); + charset_set (result, SCM_CHAR (ch)); } - return result; + return scm_from_charset (result); } #undef FUNC_NAME @@ -1034,25 +1112,8 @@ SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0, "characters in @var{cs}.") #define FUNC_NAME s_scm_char_set_copy { - SCM ret; - scm_t_char_set *p1, *p2; - - SCM_VALIDATE_SMOB (1, cs, charset); - ret = make_char_set (FUNC_NAME); - p1 = SCM_CHARSET_DATA (cs); - p2 = SCM_CHARSET_DATA (ret); - p2->len = p1->len; - - if (p1->len == 0) - p2->ranges = NULL; - else - { - p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len, - "character-set"); - memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len); - } - - return ret; + SCM_VALIDATE_CHARSET (1, cs); + return scm_from_charset (clone_charset (scm_to_charset (cs))); } #undef FUNC_NAME @@ -1062,11 +1123,10 @@ SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, "Return a character set containing all given characters.") #define FUNC_NAME s_scm_char_set { - SCM cs; int argnum = 1; SCM_VALIDATE_REST_ARGUMENT (rest); - cs = make_char_set (FUNC_NAME); + struct scm_charset *cs = make_charset (empty_charset_ranges); while (!scm_is_null (rest)) { scm_t_wchar c; @@ -1074,9 +1134,9 @@ SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c); argnum++; rest = SCM_CDR (rest); - SCM_CHARSET_SET (cs, c); + charset_set (cs, c); } - return cs; + return scm_from_charset (cs); } #undef FUNC_NAME @@ -1088,15 +1148,15 @@ SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, "set are also included in the result.") #define FUNC_NAME s_scm_list_to_char_set { - SCM cs; - SCM_VALIDATE_LIST (1, list); + + struct scm_charset *cs; if (SCM_UNBNDP (base_cs)) - cs = make_char_set (FUNC_NAME); + cs = make_charset (empty_charset_ranges); else { - SCM_VALIDATE_SMOB (2, base_cs, charset); - cs = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (2, base_cs); + cs = clone_charset (scm_to_charset (base_cs)); } while (!scm_is_null (list)) { @@ -1106,10 +1166,9 @@ SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, SCM_VALIDATE_CHAR_COPY (0, chr, c); list = SCM_CDR (list); - - SCM_CHARSET_SET (cs, c); + charset_set (cs, c); } - return cs; + return scm_from_charset (cs); } #undef FUNC_NAME @@ -1123,6 +1182,7 @@ SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, { SCM_VALIDATE_LIST (1, list); SCM_VALIDATE_MUTABLE_CHARSET (2, base_cs); + struct scm_charset *cs = scm_to_charset (base_cs); while (!scm_is_null (list)) { SCM chr = SCM_CAR (list); @@ -1131,7 +1191,7 @@ SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, SCM_VALIDATE_CHAR_COPY (0, chr, c); list = SCM_CDR (list); - SCM_CHARSET_SET (base_cs, c); + charset_set (cs, c); } return base_cs; } @@ -1145,25 +1205,25 @@ SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, "set are also included in the result.") #define FUNC_NAME s_scm_string_to_char_set { - SCM cs; size_t k = 0, len; SCM_VALIDATE_STRING (1, str); + struct scm_charset *cs; if (SCM_UNBNDP (base_cs)) - cs = make_char_set (FUNC_NAME); + cs = make_charset (empty_charset_ranges); else { - SCM_VALIDATE_SMOB (2, base_cs, charset); - cs = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (2, base_cs); + cs = clone_charset (scm_to_charset (base_cs)); } len = scm_i_string_length (str); while (k < len) { scm_t_wchar c = scm_i_string_ref (str, k++); - SCM_CHARSET_SET (cs, c); + charset_set (cs, c); } scm_remember_upto_here_1 (str); - return cs; + return scm_from_charset (cs); } #undef FUNC_NAME @@ -1179,11 +1239,12 @@ SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0, SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_MUTABLE_CHARSET (2, base_cs); + struct scm_charset *cs = scm_to_charset (base_cs); len = scm_i_string_length (str); while (k < len) { scm_t_wchar c = scm_i_string_ref (str, k++); - SCM_CHARSET_SET (base_cs, c); + charset_set (cs, c); } scm_remember_upto_here_1 (str); return base_cs; @@ -1198,35 +1259,31 @@ SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0, "from @var{base_cs} are added to the result.") #define FUNC_NAME s_scm_char_set_filter { - SCM ret; + struct scm_charset *ret; int k; scm_t_wchar n; - scm_t_char_set *p; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); if (!SCM_UNBNDP (base_cs)) { - SCM_VALIDATE_SMOB (3, base_cs, charset); - ret = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (3, base_cs); + ret = clone_charset (scm_to_charset (base_cs)); } else - ret = make_char_set (FUNC_NAME); + ret = make_charset (empty_charset_ranges); - p = SCM_CHARSET_DATA (cs); + struct scm_charset *p = scm_to_charset (cs); - if (p->len == 0) - return ret; - - for (k = 0; k < p->len; k++) - for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) + for (k = 0; k < charset_len (p); k++) + for (n = charset_range_lo (p, k); n <= charset_range_hi (p, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) - SCM_CHARSET_SET (ret, n); + charset_set (ret, n); } - return ret; + return scm_from_charset (ret); } #undef FUNC_NAME @@ -1240,22 +1297,20 @@ SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0, { int k; scm_t_wchar n; - scm_t_char_set *p; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); SCM_VALIDATE_MUTABLE_CHARSET (3, base_cs); - p = SCM_CHARSET_DATA (cs); - if (p->len == 0) - return base_cs; + struct scm_charset *src = scm_to_charset (cs); + struct scm_charset *dst = scm_to_charset (base_cs); - for (k = 0; k < p->len; k++) - for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) + for (k = 0; k < charset_len (src); k++) + for (n = charset_range_lo (src, k); n <= charset_range_hi (src, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) - SCM_CHARSET_SET (base_cs, n); + charset_set (dst, n); } return base_cs; } @@ -1269,7 +1324,6 @@ static SCM scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, SCM error, SCM base_cs, int reuse) { - SCM cs; size_t clower, cupper; clower = scm_to_size_t (lower); @@ -1289,25 +1343,26 @@ scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, } } + struct scm_charset *cs; if (SCM_UNBNDP (base_cs)) - cs = make_char_set (FUNC_NAME); + cs = make_charset (empty_charset_ranges); else { if (reuse) { SCM_VALIDATE_MUTABLE_CHARSET (3, base_cs); - cs = base_cs; + cs = scm_to_charset (base_cs); } else { - SCM_VALIDATE_SMOB (3, base_cs, charset); - cs = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (3, base_cs); + cs = clone_charset (scm_to_charset (base_cs)); } } if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END) && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)) - return cs; + return scm_from_charset (cs); if (clower > SCM_CODEPOINT_MAX) clower = SCM_CODEPOINT_MAX; @@ -1319,12 +1374,12 @@ scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, cupper = SCM_CODEPOINT_SURROGATE_START - 1; if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END) { - scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1); - scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper); + charset_set_range (cs, clower, SCM_CODEPOINT_SURROGATE_START - 1); + charset_set_range (cs, SCM_CODEPOINT_SURROGATE_END + 1, cupper); } else - scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper); - return cs; + charset_set_range (cs, clower, cupper); + return scm_from_charset (cs); } SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0, @@ -1365,7 +1420,7 @@ SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, "returned.") #define FUNC_NAME s_scm_ucs_range_to_char_set_x { - SCM_VALIDATE_SMOB (4, base_cs, charset); + SCM_VALIDATE_CHARSET (4, base_cs); return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper, error, base_cs, 1); } @@ -1374,13 +1429,13 @@ SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0, (SCM x), "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.") -#define FUNC_NAME s_scm_to_char_set +#define FUNC_NAME s_scm_to_charset { if (scm_is_string (x)) return scm_string_to_char_set (x, SCM_UNDEFINED); else if (SCM_CHARP (x)) return scm_char_set (scm_list_1 (x)); - else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x)) + else if (scm_is_char_set (x)) return x; else scm_wrong_type_arg (NULL, 0, x); @@ -1393,16 +1448,16 @@ SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0, #define FUNC_NAME s_scm_char_set_size { int k, count = 0; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; - SCM_VALIDATE_SMOB (1, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); + SCM_VALIDATE_CHARSET (1, cs); + cs_data = scm_to_charset (cs); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) return scm_from_int (0); - for (k = 0; k < cs_data->len; k++) - count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1; + for (k = 0; k < charset_len (cs_data); k++) + count += charset_range_hi (cs_data, k) - charset_range_lo (cs_data, k) + 1; return scm_from_int (count); } @@ -1417,16 +1472,16 @@ SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0, { int k, count = 0; scm_t_wchar n; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + SCM_VALIDATE_CHARSET (2, cs); + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return scm_from_int (0); - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) @@ -1446,15 +1501,15 @@ SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0, int k; scm_t_wchar n; SCM result = SCM_EOL; - scm_t_char_set *p; + struct scm_charset *p; - SCM_VALIDATE_SMOB (1, cs, charset); - p = SCM_CHARSET_DATA (cs); - if (p->len == 0) + SCM_VALIDATE_CHARSET (1, cs); + p = scm_to_charset (cs); + if (charset_len (p) == 0) return SCM_EOL; - for (k = p->len - 1; k >= 0; k--) - for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--) + for (k = charset_len (p) - 1; k >= 0; k--) + for (n = charset_range_hi (p, k); n >= charset_range_lo (p, k); n--) result = scm_cons (SCM_MAKE_CHAR (n), result); return result; } @@ -1474,16 +1529,16 @@ SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, int wide = 0; SCM result; scm_t_wchar n; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; char *buf; scm_t_wchar *wbuf; - SCM_VALIDATE_SMOB (1, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + SCM_VALIDATE_CHARSET (1, cs); + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return scm_nullstr; - if (cs_data->ranges[cs_data->len - 1].hi > 255) + if (charset_range_hi (cs_data, charset_len (cs_data) - 1) > 255) wide = 1; count = scm_to_int (scm_char_set_size (cs)); @@ -1492,8 +1547,8 @@ SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, else result = scm_i_make_string (count, &buf, 0); - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { if (wide) wbuf[idx++] = n; @@ -1511,9 +1566,9 @@ SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0, "character set @var{cs}.") #define FUNC_NAME s_scm_char_set_contains_p { - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_CHAR (2, ch); - return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch))); + return scm_from_bool (charset_get (scm_to_charset (cs), SCM_CHAR (ch))); } #undef FUNC_NAME @@ -1527,17 +1582,17 @@ SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0, int k; scm_t_wchar n; SCM res = SCM_BOOL_T; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return SCM_BOOL_T; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_false (res)) @@ -1556,17 +1611,17 @@ SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0, { int k; scm_t_wchar n; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return SCM_BOOL_T; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) @@ -1583,7 +1638,7 @@ SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1, "be a character set.") #define FUNC_NAME s_scm_char_set_adjoin { - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_REST_ARGUMENT (rest); cs = scm_char_set_copy (cs); @@ -1608,7 +1663,7 @@ SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1, "must be a character set.") #define FUNC_NAME s_scm_char_set_delete { - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_REST_ARGUMENT (rest); cs = scm_char_set_copy (cs); @@ -1679,17 +1734,15 @@ SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0, (SCM cs), "Return the complement of the character set @var{cs}.") #define FUNC_NAME s_scm_char_set_complement { - SCM res; - scm_t_char_set *p, *q; + struct scm_charset *p, *q; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); - res = make_char_set (FUNC_NAME); - p = SCM_CHARSET_DATA (res); - q = SCM_CHARSET_DATA (cs); + p = make_charset (empty_charset_ranges); + q = scm_to_charset (cs); charsets_complement (p, q); - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1699,25 +1752,20 @@ SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1, "Return the union of all argument character sets.") #define FUNC_NAME s_scm_char_set_union { - int c = 1; - SCM res; - scm_t_char_set *p; - SCM_VALIDATE_REST_ARGUMENT (rest); - res = make_char_set (FUNC_NAME); - p = SCM_CHARSET_DATA (res); + struct scm_charset *p = make_charset (empty_charset_ranges); + int argnum = 1; while (!scm_is_null (rest)) { SCM cs = SCM_CAR (rest); - SCM_VALIDATE_SMOB (c, cs, charset); - c++; + SCM_VALIDATE_CHARSET (argnum, cs); + argnum++; rest = SCM_CDR (rest); - - charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs)); + charsets_union (p, scm_to_charset (cs)); } - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1727,35 +1775,26 @@ SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1, "Return the intersection of all argument character sets.") #define FUNC_NAME s_scm_char_set_intersection { - SCM res; - SCM_VALIDATE_REST_ARGUMENT (rest); if (scm_is_null (rest)) - res = make_char_set (FUNC_NAME); - else - { - scm_t_char_set *p; - int argnum = 2; + return scm_from_charset (make_charset (empty_charset_ranges)); - res = scm_char_set_copy (SCM_CAR (rest)); - p = SCM_CHARSET_DATA (res); + int argnum = 2; + struct scm_charset *res = clone_charset (scm_to_charset (SCM_CAR (rest))); + + rest = SCM_CDR (rest); + while (scm_is_pair (rest)) + { + SCM cs = SCM_CAR (rest); rest = SCM_CDR (rest); - while (scm_is_pair (rest)) - { - SCM cs = SCM_CAR (rest); - scm_t_char_set *cs_data; - - SCM_VALIDATE_SMOB (argnum, cs, charset); - argnum++; - cs_data = SCM_CHARSET_DATA (cs); - rest = SCM_CDR (rest); - charsets_intersection (p, cs_data); - } + SCM_VALIDATE_CHARSET (argnum, cs); + argnum++; + charsets_intersection (res, scm_to_charset (cs)); } - return res; + return scm_from_charset (res); } #undef FUNC_NAME @@ -1766,27 +1805,24 @@ SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1, #define FUNC_NAME s_scm_char_set_difference { int c = 2; - SCM res, compl; - scm_t_char_set *p, *q; + struct scm_charset *p, *q; - SCM_VALIDATE_SMOB (1, cs1, charset); + SCM_VALIDATE_CHARSET (1, cs1); SCM_VALIDATE_REST_ARGUMENT (rest); - res = scm_char_set_copy (cs1); - p = SCM_CHARSET_DATA (res); - compl = make_char_set (FUNC_NAME); - q = SCM_CHARSET_DATA (compl); + p = clone_charset (scm_to_charset (cs1)); + q = make_charset (empty_charset_ranges); while (!scm_is_null (rest)) { SCM cs = SCM_CAR (rest); - SCM_VALIDATE_SMOB (c, cs, charset); - c++; rest = SCM_CDR (rest); - charsets_complement (q, SCM_CHARSET_DATA (cs)); + SCM_VALIDATE_CHARSET (c, cs); + c++; + charsets_complement (q, scm_to_charset (cs)); charsets_intersection (p, q); } - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1796,35 +1832,19 @@ SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1, "Return the exclusive-or of all argument character sets.") #define FUNC_NAME s_scm_char_set_xor { - SCM res; - SCM_VALIDATE_REST_ARGUMENT (rest); - if (scm_is_null (rest)) - res = make_char_set (FUNC_NAME); - else + struct scm_charset *p = make_charset (empty_charset_ranges); + int argnum = 1; + while (scm_is_pair (rest)) { - int argnum = 2; - scm_t_char_set *p; - - res = scm_char_set_copy (SCM_CAR (rest)); - p = SCM_CHARSET_DATA (res); + SCM cs = SCM_CAR (rest); rest = SCM_CDR (rest); - - while (scm_is_pair (rest)) - { - SCM cs = SCM_CAR (rest); - scm_t_char_set *cs_data; - - SCM_VALIDATE_SMOB (argnum, cs, charset); - argnum++; - cs_data = SCM_CHARSET_DATA (cs); - rest = SCM_CDR (rest); - - charsets_xor (p, cs_data); - } + SCM_VALIDATE_CHARSET (argnum, cs); + argnum++; + charsets_xor (p, scm_to_charset (cs)); } - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1836,30 +1856,27 @@ SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1 #define FUNC_NAME s_scm_char_set_diff_plus_intersection { int c = 2; - SCM res1, res2; - scm_t_char_set *p, *q; + struct scm_charset *p, *q; - SCM_VALIDATE_SMOB (1, cs1, charset); + SCM_VALIDATE_CHARSET (1, cs1); SCM_VALIDATE_REST_ARGUMENT (rest); - res1 = scm_char_set_copy (cs1); - res2 = make_char_set (FUNC_NAME); - p = SCM_CHARSET_DATA (res1); - q = SCM_CHARSET_DATA (res2); + p = clone_charset (scm_to_charset (cs1)); + q = make_charset (empty_charset_ranges); while (!scm_is_null (rest)) { SCM cs = SCM_CAR (rest); - scm_t_char_set *r; + struct scm_charset *r; - SCM_VALIDATE_SMOB (c, cs, charset); + SCM_VALIDATE_CHARSET (c, cs); c++; - r = SCM_CHARSET_DATA (cs); + r = scm_to_charset (cs); charsets_union (q, r); charsets_intersection (p, r); rest = SCM_CDR (rest); } - return scm_values_2 (res1, res2); + return scm_values_2 (scm_from_charset (p), scm_from_charset (q)); } #undef FUNC_NAME @@ -1980,14 +1997,7 @@ SCM scm_char_set_full; static inline SCM define_charset (const char *name, size_t len, const scm_t_char_range *ranges) { - SCM cs; - - scm_t_char_set *p = scm_gc_malloc_pointerless (sizeof (scm_t_char_set), - "charset"); - p->len = len; - /* Strip const qualifier but add immutable flag on SCM. */ - p->ranges = (scm_t_char_range *) ranges; - SCM_NEWSMOB (cs, scm_tc16_charset | SCM_CHARSET_F_IMMUTABLE, p); + SCM cs = scm_from_charset (make_static_charset (len, ranges)); scm_c_define (name, cs); return cs; } @@ -2009,30 +2019,30 @@ SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset), SCM e1, e2, e3; SCM ranges = SCM_EOL, elt; size_t i; - scm_t_char_set *cs; + struct scm_charset *cs; char codepoint_string_lo[13], codepoint_string_hi[13]; - SCM_VALIDATE_SMOB (1, charset, charset); - cs = SCM_CHARSET_DATA (charset); + SCM_VALIDATE_CHARSET (1, charset); + cs = scm_to_charset (charset); e1 = scm_cons (scm_from_latin1_symbol ("char-set"), charset); e2 = scm_cons (scm_from_latin1_symbol ("n"), - scm_from_size_t (cs->len)); + scm_from_size_t (charset_len (cs))); - for (i = 0; i < cs->len; i++) + for (i = 0; i < charset_len (cs); i++) { - if (cs->ranges[i].lo > 0xFFFF) - sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo); + if (charset_range_lo (cs, i) > 0xFFFF) + sprintf (codepoint_string_lo, "U+%06x", charset_range_lo (cs, i)); else - sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo); - if (cs->ranges[i].hi > 0xFFFF) - sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi); + sprintf (codepoint_string_lo, "U+%04x", charset_range_lo (cs, i)); + if (charset_range_hi (cs, i) > 0xFFFF) + sprintf (codepoint_string_hi, "U+%06x", charset_range_hi (cs, i)); else - sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi); + sprintf (codepoint_string_hi, "U+%04x", charset_range_hi (cs, i)); - elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo), - SCM_MAKE_CHAR (cs->ranges[i].hi), + elt = scm_list_4 (SCM_MAKE_CHAR (charset_range_lo (cs, i)), + SCM_MAKE_CHAR (charset_range_hi (cs, i)), scm_from_locale_string (codepoint_string_lo), scm_from_locale_string (codepoint_string_hi)); ranges = scm_append (scm_list_2 (ranges, @@ -2054,8 +2064,8 @@ SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset), void scm_init_srfi_14 (void) { - scm_tc16_charset = scm_make_smob_type ("character-set", 0); - scm_set_smob_print (scm_tc16_charset, charset_print); + empty_charset_ranges = + scm_i_make_typed_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_U32); scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0); scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print); diff --git a/libguile/srfi-14.h b/libguile/srfi-14.h index 69b09aef7..c3cb8f427 100644 --- a/libguile/srfi-14.h +++ b/libguile/srfi-14.h @@ -24,34 +24,12 @@ #include "libguile/chars.h" -typedef struct +static inline int +scm_is_char_set (SCM x) { - scm_t_wchar lo; - scm_t_wchar hi; -} scm_t_char_range; - -typedef struct -{ - size_t len; - scm_t_char_range *ranges; -} scm_t_char_set; - -typedef struct -{ - size_t range; - scm_t_wchar n; -} scm_t_char_set_cursor; - -#define SCM_CHARSET_GET(cs,idx) \ - scm_i_charset_get((scm_t_char_set *)SCM_SMOB_DATA(cs),idx) - -#define SCM_CHARSETP(x) (SCM_HAS_TYP16 (x, scm_tc16_charset)) - -/* Smob type code for character sets. */ -SCM_API int scm_tc16_charset; -SCM_INTERNAL int scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n); -SCM_INTERNAL void scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n); -SCM_INTERNAL void scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n); + return SCM_HAS_TYP16 (x, scm_tc16_charset); +} +#define SCM_CHARSETP(x) (scm_is_char_set (x)) SCM_API SCM scm_char_set_p (SCM obj); SCM_API SCM scm_char_set_eq (SCM char_sets); @@ -120,6 +98,9 @@ SCM_API SCM scm_char_set_ascii; SCM_API SCM scm_char_set_empty; SCM_API SCM scm_char_set_full; +SCM_INTERNAL int scm_i_char_sets_equal (SCM a, SCM b); +SCM_INTERNAL int scm_i_print_char_set (SCM charset, SCM port, + scm_print_state *pstate); SCM_INTERNAL void scm_init_srfi_14 (void); #endif /* SCM_SRFI_14_H */ diff --git a/module/oop/goops.scm b/module/oop/goops.scm index 19eb7362b..203c7fc0e 100644 --- a/module/oop/goops.scm +++ b/module/oop/goops.scm @@ -70,7 +70,7 @@ - + ;; Numbers. @@ -84,7 +84,7 @@ ;; smob-type-name->class procedure. - + ;; Modules. @@ -1082,6 +1082,7 @@ slots as we go." (define-standard-class ()) (define-standard-class ()) (define-standard-class ()) +(define-standard-class ()) (define-standard-class ()) (define-standard-class ()) (define-standard-class ()) @@ -3538,7 +3539,6 @@ var{initargs}." (define (find-subclass ')) (define (find-subclass ')) (define (find-subclass ')) -(define (find-subclass ')) (define (find-subclass ')) ;; used to be a SMOB type, albeit not exported even to