diff --git a/libguile/eq.c b/libguile/eq.c index 57e4b931a..059954620 100644 --- a/libguile/eq.c +++ b/libguile/eq.c @@ -39,6 +39,7 @@ #include "pairs.h" #include "private-options.h" #include "smob.h" +#include "srfi-14.h" #include "stackchk.h" #include "strorder.h" #include "struct.h" @@ -382,6 +383,15 @@ scm_equal_p (SCM x, SCM y) x = scm_syntax_expression (x); y = scm_syntax_expression (y); goto tailrecurse; + case scm_tc7_ext: + switch (SCM_TYP16 (x)) + { + case scm_tc16_charset: + return scm_from_bool (scm_i_char_sets_equal (x, y)); + default: + abort (); + } + break; } /* Otherwise just return false. Dispatching to the generic is the wrong thing diff --git a/libguile/evalext.c b/libguile/evalext.c index a816623f8..d61778f98 100644 --- a/libguile/evalext.c +++ b/libguile/evalext.c @@ -97,6 +97,7 @@ SCM_DEFINE (scm_self_evaluating_p, "self-evaluating?", 1, 0, 0, case scm_tc7_ephemeron_table: case scm_tc7_thread: case scm_tcs_struct: + case scm_tc7_ext: return SCM_BOOL_T; default: return SCM_BOOL_F; diff --git a/libguile/goops.c b/libguile/goops.c index e93f2e99e..4a4fbad99 100644 --- a/libguile/goops.c +++ b/libguile/goops.c @@ -138,6 +138,7 @@ static SCM class_bitvector; static SCM class_finalizer; static SCM class_ephemeron; static SCM class_ephemeron_table; +static SCM class_character_set; static struct scm_ephemeron_table *vtable_class_map; static SCM pre_goops_vtables = SCM_EOL; @@ -338,6 +339,16 @@ SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0, /* A non-GOOPS struct. */ return scm_i_define_class_for_vtable (vtable); } + case scm_tc7_ext: + { + switch (SCM_TYP16 (x)) + { + case scm_tc16_charset: + return class_character_set; + default: + abort (); + } + } default: if (scm_is_pair (x)) return class_pair; @@ -968,6 +979,7 @@ SCM_DEFINE (scm_sys_goops_early_init, "%goops-early-init", 0, 0, 0, class_input_output_port = scm_variable_ref (scm_c_lookup ("")); class_ephemeron = scm_variable_ref (scm_c_lookup ("")); class_ephemeron_table = scm_variable_ref (scm_c_lookup ("")); + class_character_set = scm_variable_ref (scm_c_lookup ("")); create_smob_classes (); create_struct_classes (); diff --git a/libguile/print.c b/libguile/print.c index 8721fbf43..1ab762058 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -59,6 +59,7 @@ #include "programs.h" #include "read.h" #include "smob.h" +#include "srfi-14.h" #include "strings.h" #include "strports.h" #include "struct.h" @@ -785,6 +786,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) SCM_SMOB_DESCRIPTOR (exp).print (exp, port, pstate); EXIT_NESTED_DATA (pstate); break; + case scm_tc7_ext: + switch (SCM_TYP16 (exp)) + { + case scm_tc16_charset: + scm_i_print_char_set (exp, port, pstate); + break; + default: + abort (); + } + break; default: /* case scm_tcs_closures: */ punk: diff --git a/libguile/scm.h b/libguile/scm.h index 2f657f0e1..0ef18a139 100644 --- a/libguile/scm.h +++ b/libguile/scm.h @@ -413,10 +413,10 @@ typedef uintptr_t scm_t_bits; interest: numbers, ports and smobs in fact each represent collections of types, which are subdivided using tc16-codes. - tc16 (for tc7==scm_tc7_smob): - The largest part of the space of smob types is not subdivided in a - predefined way, since smobs can be added arbitrarily by user C - code. */ + tc16 (for tc7 in {scm_tc7_smob, scm_tc7_port, scm_tc7_ext}): Port + and smob types can be defined by the user and are allocated + dynamically. scm_tc7_ext tags are allocated statically, and are + for Guile-internal objects. */ @@ -505,8 +505,10 @@ typedef uintptr_t scm_t_bits; #define scm_tc7_unused_75 0x75 #define scm_tc7_smob 0x77 #define scm_tc7_port 0x7d -#define scm_tc7_unused_7f 0x7f +#define scm_tc7_ext 0x7f +/* Objects with scm_tc7_ext. */ +#define scm_tc16_charset 0x007f /* Definitions for tc16: */ #define SCM_TYP16(x) (0xffff & SCM_CELL_TYPE (x)) diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c index 8cd999cc1..7b11bc9f4 100644 --- a/libguile/srfi-14.c +++ b/libguile/srfi-14.c @@ -26,6 +26,7 @@ #include #include "boolean.h" +#include "bytevectors-internal.h" #include "chars.h" #include "eval.h" #include "gsubr.h" @@ -43,20 +44,113 @@ #include "srfi-14.h" +typedef struct scm_char_range +{ + scm_t_wchar lo; + scm_t_wchar hi; +} scm_t_char_range; + /* Include the pre-computed standard charset data. */ #include "srfi-14.i.c" +/* We call this "charset" instead of "char_set" to avoid confusion with + "set" as a verb. */ +struct scm_charset +{ + scm_t_bits tag_and_flags; + struct scm_bytevector *ranges; +}; + +typedef struct +{ + size_t range; + scm_t_wchar n; +} scm_t_char_set_cursor; + +static inline struct scm_charset* +scm_to_charset (SCM scm) +{ + if (!scm_is_char_set (scm)) abort(); + return (struct scm_charset *) SCM_UNPACK_POINTER (scm); +} + +static inline SCM +scm_from_charset (struct scm_charset *s) +{ + return SCM_PACK_POINTER (s); +} + +static inline scm_t_char_range* +char_ranges (struct scm_bytevector *bv) +{ + return (scm_t_char_range *) scm_bytevector_contents (bv); +} + +static inline size_t +char_ranges_len (struct scm_bytevector *bv) +{ + return bv->length / sizeof (scm_t_char_range); +} + +static inline size_t +charset_len (struct scm_charset *s) +{ + return char_ranges_len (s->ranges); +} + +static inline scm_t_char_range* +charset_range (struct scm_charset *s, size_t i) +{ + return char_ranges (s->ranges) + i; +} + +static inline scm_t_wchar +charset_range_lo (struct scm_charset *s, size_t i) +{ + return charset_range (s, i)->lo; +} + +static inline scm_t_wchar +charset_range_hi (struct scm_charset *s, size_t i) +{ + return charset_range (s, i)->hi; +} + +static inline void +charset_range_set_lo (struct scm_charset *s, size_t i, scm_t_wchar ch) +{ + charset_range (s, i)->lo = ch; +} + +static inline void +charset_range_set_hi (struct scm_charset *s, size_t i, scm_t_wchar ch) +{ + charset_range (s, i)->hi = ch; +} + +static inline int +charset_range_contains (struct scm_charset *s, size_t idx, scm_t_wchar c) +{ + scm_t_char_range *range = charset_range (s, idx); + return range->lo <= c && c <= range->hi; +} + static const scm_t_bits SCM_CHARSET_F_IMMUTABLE = 1 << 16; static inline int -scm_charset_is_immutable (SCM charset) +charset_is_immutable (SCM charset) { - return SCM_SMOB_DATA_0 (charset) & SCM_CHARSET_F_IMMUTABLE; + return scm_to_charset (charset)->tag_and_flags & SCM_CHARSET_F_IMMUTABLE; } +#define SCM_VALIDATE_CHARSET(pos, x) \ + do { \ + SCM_ASSERT_TYPE (SCM_CHARSETP (x), x, pos, FUNC_NAME, "charset"); \ + } while (0) + #define SCM_VALIDATE_MUTABLE_CHARSET(pos, x) \ do { \ - SCM_ASSERT_TYPE (SCM_CHARSETP (x) && !scm_charset_is_immutable (x), \ + SCM_ASSERT_TYPE (SCM_CHARSETP (x) && !charset_is_immutable (x), \ x, pos, FUNC_NAME, "mutable charset"); \ } while (0) @@ -66,30 +160,74 @@ static const scm_t_char_range cs_full_ranges[] = { }; static const size_t cs_full_len = 2; -#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset)) +#define SCM_CHARSET_GET(cs,idx) charset_get (scm_to_charset (cs), idx) +#define SCM_CHARSET_SET(cs, idx) charset_set (scm_to_charset (cs), idx) +#define SCM_CHARSET_UNSET(cs, idx) charset_unset (scm_to_charset (cs), idx) -#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset)) +static struct scm_bytevector *empty_charset_ranges; -#define SCM_CHARSET_SET(cs, idx) \ - scm_i_charset_set (SCM_CHARSET_DATA (cs), idx) +static struct scm_charset * +make_charset (struct scm_bytevector *ranges) +{ + struct scm_charset *p = scm_gc_malloc (sizeof (struct scm_charset), + "charset"); + p->tag_and_flags = scm_tc16_charset; + p->ranges = ranges; + return p; +} -#define SCM_CHARSET_UNSET(cs, idx) \ - scm_i_charset_unset (SCM_CHARSET_DATA (cs), idx) +static struct scm_charset * +make_static_charset (size_t len, const scm_t_char_range *ranges) +{ + size_t u32_count = len * sizeof (*ranges) / sizeof(uint32_t); + SCM bv = scm_c_take_typed_bytevector ((signed char*) ranges, u32_count, + SCM_ARRAY_ELEMENT_TYPE_U32, + SCM_BOOL_F); + struct scm_charset *ret = make_charset (scm_to_bytevector (bv)); + ret->tag_and_flags |= SCM_CHARSET_F_IMMUTABLE; + return ret; +} -/* Smob type code for character sets. */ -int scm_tc16_charset = 0; -int scm_tc16_charset_cursor = 0; +static struct scm_bytevector * +make_ranges (size_t n) +{ + return n + ? scm_i_make_typed_bytevector (n * 2, SCM_ARRAY_ELEMENT_TYPE_U32) + : empty_charset_ranges; +} + +static void +copy_ranges (scm_t_char_range *dst, scm_t_char_range *src, size_t count) +{ + if (count) + memcpy (dst, src, count * sizeof(*dst)); +} + +static struct scm_bytevector * +clone_ranges (struct scm_bytevector *ranges) +{ + size_t len = char_ranges_len (ranges); + struct scm_bytevector *ret = make_ranges (len); + copy_ranges (char_ranges (ret), char_ranges (ranges), len); + return ret; +} + +static struct scm_charset* +clone_charset (struct scm_charset *src) +{ + return make_charset (clone_ranges (src->ranges)); +} /* True if N exists in charset CS. */ -int -scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n) +static int +charset_get (struct scm_charset *cs, scm_t_wchar n) { size_t i; i = 0; - while (i < cs->len) + while (i < charset_len (cs)) { - if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) + if (charset_range_contains (cs, i, n)) return 1; i++; } @@ -97,62 +235,59 @@ scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n) return 0; } -static scm_t_char_range* -char_ranges_insert (scm_t_char_range *ranges, size_t len, size_t idx, +static struct scm_bytevector * +char_ranges_insert (struct scm_bytevector *ranges, size_t idx, scm_t_wchar lo, scm_t_wchar hi) { - scm_t_char_range *new_ranges = - scm_gc_malloc_pointerless (sizeof (scm_t_char_range) * (len + 1), - "charset"); - if (idx) - memcpy (new_ranges, ranges, sizeof (scm_t_char_range) * idx); - new_ranges[idx].lo = lo; - new_ranges[idx].hi = hi; - if (idx < len) - memcpy (new_ranges + idx + 1, ranges + idx, - sizeof (scm_t_char_range) * (len - idx)); + size_t len = char_ranges_len (ranges); + struct scm_bytevector *new_ranges = make_ranges (len + 1); + struct scm_char_range inserted = { lo, hi }; + + copy_ranges (char_ranges (new_ranges), char_ranges (ranges), idx); + char_ranges (new_ranges)[idx] = inserted; + copy_ranges (char_ranges (new_ranges) + idx + 1, + char_ranges (ranges) + idx, + len - idx); + return new_ranges; } -static scm_t_char_range* -char_ranges_delete (scm_t_char_range *ranges, size_t len, size_t idx) +static struct scm_bytevector * +char_ranges_delete (struct scm_bytevector *ranges, size_t idx) { - if (len == 1) - return NULL; + size_t len = char_ranges_len (ranges); + struct scm_bytevector *new_ranges = make_ranges (len - 1); + + copy_ranges (char_ranges (new_ranges), char_ranges (ranges), idx); + copy_ranges (char_ranges (new_ranges) + idx, + char_ranges (ranges) + idx + 1, + len - idx - 1); - scm_t_char_range *new_ranges = - scm_gc_malloc_pointerless (sizeof (scm_t_char_range) * (len - 1), - "charset"); - if (idx) - memcpy (new_ranges, ranges, sizeof (scm_t_char_range) * idx); - if (idx + 1 < len) - memcpy (new_ranges + idx, ranges + idx + 1, - sizeof (scm_t_char_range) * (len - idx - 1)); return new_ranges; } /* Put N into charset CS. */ -void -scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) +static void +charset_set (struct scm_charset *cs, scm_t_wchar n) { size_t i; size_t len; - len = cs->len; + len = charset_len (cs); i = 0; while (i < len) { /* Already in this range */ - if (cs->ranges[i].lo <= n && n <= cs->ranges[i].hi) + if (charset_range_contains (cs, i, n)) { return; } - if (n == cs->ranges[i].lo - 1) + if (n == charset_range_lo (cs, i) - 1) { /* This char is one below the current range. */ - if (i > 0 && cs->ranges[i - 1].hi + 1 == n) + if (i > 0 && charset_range_hi (cs, i - 1) + 1 == n) { /* It is also one above the previous range. */ /* This is an impossible condition: in the previous @@ -163,33 +298,31 @@ scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) else { /* Expand the range down by one. */ - cs->ranges[i].lo = n; + charset_range_set_lo (cs, i, n); return; } } - else if (n == cs->ranges[i].hi + 1) + else if (n == charset_range_hi (cs, i) + 1) { /* This char is one above the current range. */ - if (i < len - 1 && cs->ranges[i + 1].lo - 1 == n) + if (i < len - 1 && charset_range_lo (cs, i + 1) - 1 == n) { /* It is also one below the next range, so combine them. */ - cs->ranges[i].hi = cs->ranges[i + 1].hi; - cs->ranges = char_ranges_delete (cs->ranges, len, i + 1); - cs->len = len - 1; + charset_range_set_hi (cs, i, charset_range_hi (cs, i + 1)); + cs->ranges = char_ranges_delete (cs->ranges, i + 1); return; } else { /* Expand the range up by one. */ - cs->ranges[i].hi = n; + charset_range_set_hi (cs, i, n); return; } } - else if (n < cs->ranges[i].lo - 1) + else if (n < charset_range_lo (cs, i) - 1) { /* This is a new range below the current one. */ - cs->ranges = char_ranges_insert (cs->ranges, len, i, n, n); - cs->len = len + 1; + cs->ranges = char_ranges_insert (cs->ranges, i, n, n); return; } @@ -197,71 +330,68 @@ scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n) } /* This is a new range above all previous ranges. */ - cs->ranges = char_ranges_insert (cs->ranges, len, len, n, n); - cs->len = len + 1; + cs->ranges = char_ranges_insert (cs->ranges, len, n, n); return; } /* Put LO to HI inclusive into charset CS. */ static void -scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi) +charset_set_range (struct scm_charset *cs, scm_t_wchar lo, scm_t_wchar hi) { size_t i; i = 0; - while (i < cs->len) + while (i < charset_len (cs)) { /* Already in this range */ - if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi) + if (charset_range_lo (cs, i) <= lo && charset_range_hi (cs, i) >= hi) return; /* cur: +---+ new: +---+ */ - if (cs->ranges[i].lo - 1 > hi) + if (charset_range_lo (cs, i) - 1 > hi) { /* Add a new range below the current one. */ - cs->ranges = char_ranges_insert (cs->ranges, cs->len, i, lo, hi); - cs->len += 1; + cs->ranges = char_ranges_insert (cs->ranges, i, lo, hi); return; } /* cur: +---+ or +---+ or +---+ new: +---+ +---+ +---+ */ - if (cs->ranges[i].lo > lo - && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi)) + if (charset_range_lo (cs, i) > lo + && (charset_range_lo (cs, i) - 1 <= hi && charset_range_hi (cs, i) >= hi)) { - cs->ranges[i].lo = lo; + charset_range_set_lo (cs, i, lo); return; } /* cur: +---+ or +---+ or +---+ new: +---+ +---+ +---+ */ - else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi) + else if (charset_range_hi (cs, i) + 1 >= lo && charset_range_hi (cs, i) < hi) { - if (cs->ranges[i].lo > lo) - cs->ranges[i].lo = lo; - if (cs->ranges[i].hi < hi) - cs->ranges[i].hi = hi; - while (i < cs->len - 1) + if (charset_range_lo (cs, i) > lo) + charset_range_set_lo (cs, i, lo); + if (charset_range_hi (cs, i) < hi) + charset_range_set_hi (cs, i, hi); + while (i < charset_len (cs) - 1) { /* cur: --+ +---+ new: -----+ */ - if (cs->ranges[i + 1].lo - 1 > hi) + if (charset_range_lo (cs, i + 1) - 1 > hi) break; /* cur: --+ +---+ or --+ +---+ or --+ +--+ new: -----+ ------+ ---------+ */ /* Combine this range with the previous one. */ - if (cs->ranges[i + 1].hi > hi) - cs->ranges[i].hi = cs->ranges[i + 1].hi; - cs->ranges = char_ranges_delete (cs->ranges, cs->len, i + 1); - cs->len -= 1; + if (charset_range_hi (cs, i + 1) > hi) + charset_range_set_hi (cs, i, charset_range_hi (cs, i + 1)); + cs->ranges = char_ranges_delete (cs->ranges, i + 1); } return; } @@ -269,54 +399,51 @@ scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi) } /* This is a new range above all previous ranges. */ - cs->ranges = char_ranges_insert (cs->ranges, cs->len, cs->len, lo, hi); - cs->len += 1; + cs->ranges = char_ranges_insert (cs->ranges, charset_len (cs), lo, hi); return; } /* If N is in charset CS, remove it. */ -void -scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n) +static void +charset_unset (struct scm_charset *cs, scm_t_wchar n) { size_t i; size_t len; - len = cs->len; + len = charset_len (cs); i = 0; while (i < len) { - if (n < cs->ranges[i].lo) + if (n < charset_range_lo (cs, i)) /* Not in this set. */ return; - if (n == cs->ranges[i].lo && n == cs->ranges[i].hi) + if (n == charset_range_lo (cs, i) && n == charset_range_hi (cs, i)) { /* Remove this one-character range. */ - cs->ranges = char_ranges_delete (cs->ranges, len, i); - cs->len = len - 1; + cs->ranges = char_ranges_delete (cs->ranges, i); return; } - else if (n == cs->ranges[i].lo) + else if (n == charset_range_lo (cs, i)) { /* Shrink this range from the left. */ - cs->ranges[i].lo = n + 1; + charset_range_set_lo (cs, i, n + 1); return; } - else if (n == cs->ranges[i].hi) + else if (n == charset_range_hi (cs, i)) { /* Shrink this range from the right. */ - cs->ranges[i].hi = n - 1; + charset_range_set_hi (cs, i, n - 1); return; } - else if (n > cs->ranges[i].lo && n < cs->ranges[i].hi) + else if (n > charset_range_lo (cs, i) && n < charset_range_hi (cs, i)) { /* Split this range into two pieces. */ - cs->ranges = char_ranges_insert (cs->ranges, len, i + 1, - n + 1, cs->ranges[i].hi); - cs->ranges[i].hi = n - 1; - cs->len = len + 1; + cs->ranges = char_ranges_insert (cs->ranges, i + 1, + n + 1, charset_range_hi (cs, i)); + charset_range_set_hi (cs, i, n - 1); return; } @@ -328,46 +455,35 @@ scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n) } static int -charsets_equal (scm_t_char_set *a, scm_t_char_set *b) +charsets_equal (struct scm_charset *a, struct scm_charset *b) { - if (a->len != b->len) - return 0; - - /* Empty charsets may have ranges == NULL. We must avoid passing - NULL to memcmp, even if the length is zero, to avoid undefined - behavior. */ - if (a->len == 0) - return 1; - - if (memcmp (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len) != 0) - return 0; - - return 1; + return scm_is_true (scm_bytevector_eq_p (scm_from_bytevector (a->ranges), + scm_from_bytevector (b->ranges))); } /* Return true if every character in A is also in B. */ static int -charsets_leq (scm_t_char_set *a, scm_t_char_set *b) +charsets_leq (struct scm_charset *a, struct scm_charset *b) { size_t i = 0, j = 0; scm_t_wchar alo, ahi; - if (a->len == 0) + if (charset_len (a) == 0) return 1; - if (b->len == 0) + if (charset_len (b) == 0) return 0; - while (i < a->len) + while (i < charset_len (a)) { - alo = a->ranges[i].lo; - ahi = a->ranges[i].hi; - while (b->ranges[j].hi < alo) + alo = charset_range_lo (a, i); + ahi = charset_range_hi (a, i); + while (charset_range_hi (b, j) < alo) { - if (j < b->len - 1) + if (j < charset_len (b) - 1) j++; else return 0; } - if (alo < b->ranges[j].lo || ahi > b->ranges[j].hi) + if (alo < charset_range_lo (b, j) || ahi > charset_range_hi (b, j)) return 0; i++; } @@ -377,28 +493,22 @@ charsets_leq (scm_t_char_set *a, scm_t_char_set *b) /* Merge B into A. */ static void -charsets_union (scm_t_char_set *a, scm_t_char_set *b) +charsets_union (struct scm_charset *a, struct scm_charset *b) { size_t i = 0; scm_t_wchar blo, bhi; - if (b->len == 0) - return; - - if (a->len == 0) + if (charset_len (a) == 0) { - a->len = b->len; - a->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * b->len, - "character-set"); - memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * b->len); + a->ranges = clone_ranges (b->ranges); return; } - while (i < b->len) + while (i < charset_len (b)) { - blo = b->ranges[i].lo; - bhi = b->ranges[i].hi; - scm_i_charset_set_range (a, blo, bhi); + blo = charset_range_lo (b, i); + bhi = charset_range_hi (b, i); + charset_set_range (a, blo, bhi); i++; } @@ -408,107 +518,93 @@ charsets_union (scm_t_char_set *a, scm_t_char_set *b) /* Remove elements not both in A and B from A. */ static void -charsets_intersection (scm_t_char_set *a, scm_t_char_set *b) +charsets_intersection (struct scm_charset *a, struct scm_charset *b) { - size_t i = 0; scm_t_wchar blo, bhi, n; - scm_t_char_set *c; - if (a->len == 0) + if (charset_len (a) == 0) return; - if (b->len == 0) + if (charset_len (b) == 0) { - a->len = 0; + a->ranges = empty_charset_ranges; return; } - c = (scm_t_char_set *) scm_malloc (sizeof (scm_t_char_set)); - c->len = 0; - c->ranges = NULL; + struct scm_charset *c = make_charset (empty_charset_ranges); - while (i < b->len) + size_t i = 0; + while (i < charset_len (b)) { - blo = b->ranges[i].lo; - bhi = b->ranges[i].hi; + blo = charset_range_lo (b, i); + bhi = charset_range_hi (b, i); for (n = blo; n <= bhi; n++) - if (scm_i_charset_get (a, n)) - scm_i_charset_set (c, n); + if (charset_get (a, n)) + charset_set (c, n); i++; } - a->len = c->len; - if (c->len != 0) - a->ranges = c->ranges; - else - a->ranges = NULL; - free (c); - return; + a->ranges = c->ranges; } #define SCM_ADD_RANGE(low, high) \ do { \ - p->ranges[idx].lo = (low); \ - p->ranges[idx++].hi = (high); \ + charset_range_set_lo (p, idx,low); \ + charset_range_set_hi (p, idx++, high); \ } while (0) -#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \ - do { \ - p->ranges[idx].lo = (low); \ - p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \ - p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \ - p->ranges[idx++].hi = (high); \ +#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \ + do { \ + charset_range_set_lo (p, idx, low); \ + charset_range_set_hi (p, idx++, SCM_CODEPOINT_SURROGATE_START - 1); \ + charset_range_set_lo (p, idx, SCM_CODEPOINT_SURROGATE_END + 1); \ + charset_range_set_hi (p, idx++, high); \ } while (0) /* Make P the compelement of Q. */ static void -charsets_complement (scm_t_char_set *p, scm_t_char_set *q) +charsets_complement (struct scm_charset *p, struct scm_charset *q) { int k, idx; idx = 0; - if (q->len == 0) + if (charset_len (q) == 0) { /* Fill with all valid codepoints. */ - p->len = 2; - p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2, - "character-set"); + p->ranges = make_ranges (2); SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX); return; } /* Count the number of ranges needed for the output. */ - p->len = 0; - if (q->ranges[0].lo > 0) - p->len++; - if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) - p->len++; - p->len += q->len; - p->ranges = - (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len, - "character-set"); - if (q->ranges[0].lo > 0) + size_t len = charset_len (q); + if (charset_range_lo (q, 0) > 0) + len++; + if (charset_range_hi (q, charset_len (q) - 1) < SCM_CODEPOINT_MAX) + len++; + p->ranges = make_ranges (len); + if (charset_range_lo (q, 0) > 0) { - if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END) - SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1); + if (charset_range_lo (q, 0) > SCM_CODEPOINT_SURROGATE_END) + SCM_ADD_RANGE_SKIP_SURROGATES (0, charset_range_lo (q, 0) - 1); else - SCM_ADD_RANGE (0, q->ranges[0].lo - 1); + SCM_ADD_RANGE (0, charset_range_lo (q, 0) - 1); } - for (k = 1; k < q->len; k++) + for (k = 1; k < charset_len (q); k++) { - if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START - && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END) - SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1); + if (charset_range_hi (q, k - 1) < SCM_CODEPOINT_SURROGATE_START + && charset_range_lo (q, k) - 1 > SCM_CODEPOINT_SURROGATE_END) + SCM_ADD_RANGE_SKIP_SURROGATES (charset_range_hi (q, k - 1) + 1, charset_range_lo (q, k) - 1); else - SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1); + SCM_ADD_RANGE (charset_range_hi (q, k - 1) + 1, charset_range_lo (q, k) - 1); } - if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX) + if (charset_range_hi (q, charset_len (q) - 1) < SCM_CODEPOINT_MAX) { - if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START) - SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX); + if (charset_range_hi (q, charset_len (q) - 1) < SCM_CODEPOINT_SURROGATE_START) + SCM_ADD_RANGE_SKIP_SURROGATES (charset_range_hi (q, charset_len (q) - 1) + 1, SCM_CODEPOINT_MAX); else - SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX); + SCM_ADD_RANGE (charset_range_hi (q, charset_len (q) - 1) + 1, SCM_CODEPOINT_MAX); } return; } @@ -517,36 +613,30 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q) /* Replace A with elements only found in one of A or B. */ static void -charsets_xor (scm_t_char_set *a, scm_t_char_set *b) +charsets_xor (struct scm_charset *a, struct scm_charset *b) { size_t i = 0; scm_t_wchar blo, bhi, n; - if (b->len == 0) + if (charset_len (b) == 0) + return; + + if (charset_len (a) == 0) { + a->ranges = clone_ranges (b->ranges); return; } - if (a->len == 0) + while (i < charset_len (b)) { - a->ranges = - (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * - b->len, "character-set"); - a->len = b->len; - memcpy (a->ranges, b->ranges, sizeof (scm_t_char_range) * a->len); - return; - } - - while (i < b->len) - { - blo = b->ranges[i].lo; - bhi = b->ranges[i].hi; + blo = charset_range_lo (b, i); + bhi = charset_range_hi (b, i); for (n = blo; n <= bhi; n++) { - if (scm_i_charset_get (a, n)) - scm_i_charset_unset (a, n); + if (charset_get (a, n)) + charset_unset (a, n); else - scm_i_charset_set (a, n); + charset_set (a, n); } i++; @@ -554,29 +644,28 @@ charsets_xor (scm_t_char_set *a, scm_t_char_set *b) return; } -/* Smob print hook for character sets. */ -static int -charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) +int +scm_i_print_char_set (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) { size_t i; int first = 1; - scm_t_char_set *p; + struct scm_charset *p; const size_t max_ranges_to_print = 50; - p = SCM_CHARSET_DATA (charset); + p = scm_to_charset (charset); scm_puts ("#len; i++) + for (i = 0; i < charset_len (p); i++) { if (first) first = 0; else scm_puts (" ", port); - scm_write (SCM_MAKE_CHAR (p->ranges[i].lo), port); - if (p->ranges[i].lo != p->ranges[i].hi) + scm_write (SCM_MAKE_CHAR (charset_range_lo (p, i)), port); + if (charset_range_lo (p, i) != charset_range_hi (p, i)) { scm_puts ("..", port); - scm_write (SCM_MAKE_CHAR (p->ranges[i].hi), port); + scm_write (SCM_MAKE_CHAR (charset_range_hi (p, i)), port); } if (i >= max_ranges_to_print) { @@ -590,6 +679,7 @@ charset_print (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED) } /* Smob print hook for character sets cursors. */ +int scm_tc16_charset_cursor = 0; static int charset_cursor_print (SCM cursor, SCM port, scm_print_state *pstate SCM_UNUSED) @@ -611,48 +701,41 @@ charset_cursor_print (SCM cursor, SCM port, return 1; } - -/* Create a new, empty character set. */ -static SCM -make_char_set (const char *func_name) -{ - scm_t_char_set *p; - - p = scm_gc_malloc (sizeof (scm_t_char_set), "character-set"); - memset (p, 0, sizeof (scm_t_char_set)); - SCM_RETURN_NEWSMOB (scm_tc16_charset, p); -} - - SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0, (SCM obj), "Return @code{#t} if @var{obj} is a character set, @code{#f}\n" "otherwise.") #define FUNC_NAME s_scm_char_set_p { - return scm_from_bool (SCM_SMOB_PREDICATE (scm_tc16_charset, obj)); + return scm_from_bool (scm_is_char_set (obj)); } #undef FUNC_NAME +int +scm_i_char_sets_equal (SCM a, SCM b) +{ + return charsets_equal (scm_to_charset (a), scm_to_charset (b)); +} + SCM_DEFINE (scm_char_set_eq, "char-set=", 0, 0, 1, (SCM char_sets), "Return @code{#t} if all given character sets are equal.") #define FUNC_NAME s_scm_char_set_eq { int argnum = 1; - scm_t_char_set *cs1_data = NULL; + struct scm_charset *cs1_data = NULL; SCM_VALIDATE_REST_ARGUMENT (char_sets); while (!scm_is_null (char_sets)) { SCM csi = SCM_CAR (char_sets); - scm_t_char_set *csi_data; + struct scm_charset *csi_data; - SCM_VALIDATE_SMOB (argnum, csi, charset); + SCM_VALIDATE_CHARSET (argnum, csi); argnum++; - csi_data = SCM_CHARSET_DATA (csi); + csi_data = scm_to_charset (csi); if (cs1_data == NULL) cs1_data = csi_data; else if (!charsets_equal (cs1_data, csi_data)) @@ -671,18 +754,18 @@ SCM_DEFINE (scm_char_set_leq, "char-set<=", 0, 0, 1, #define FUNC_NAME s_scm_char_set_leq { int argnum = 1; - scm_t_char_set *prev_data = NULL; + struct scm_charset *prev_data = NULL; SCM_VALIDATE_REST_ARGUMENT (char_sets); while (!scm_is_null (char_sets)) { SCM csi = SCM_CAR (char_sets); - scm_t_char_set *csi_data; + struct scm_charset *csi_data; - SCM_VALIDATE_SMOB (argnum, csi, charset); + SCM_VALIDATE_CHARSET (argnum, csi); argnum++; - csi_data = SCM_CHARSET_DATA (csi); + csi_data = scm_to_charset (csi); if (prev_data) { if (!charsets_leq (prev_data, csi_data)) @@ -705,12 +788,12 @@ SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, { const unsigned long default_bnd = 871; unsigned long bnd; - scm_t_char_set *p; + struct scm_charset *p; unsigned long val = 0; int k; scm_t_wchar c; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); if (SCM_UNBNDP (bound)) bnd = default_bnd; @@ -721,10 +804,10 @@ SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0, bnd = default_bnd; } - p = SCM_CHARSET_DATA (cs); - for (k = 0; k < p->len; k++) + p = scm_to_charset (cs); + for (k = 0; k < charset_len (p); k++) { - for (c = p->ranges[k].lo; c <= p->ranges[k].hi; c++) + for (c = charset_range_lo (p, k); c <= charset_range_hi (p, k); c++) val = c + (val << 1); } return scm_from_ulong (val % bnd); @@ -736,15 +819,15 @@ SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, (SCM cs), "Return a cursor into the character set @var{cs}.") #define FUNC_NAME s_scm_char_set_cursor { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; scm_t_char_set_cursor *cur_data; - SCM_VALIDATE_SMOB (1, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); + SCM_VALIDATE_CHARSET (1, cs); + cs_data = scm_to_charset (cs); cur_data = (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor), "charset-cursor"); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) { cur_data->range = (size_t) (-1); cur_data->n = 0; @@ -752,7 +835,7 @@ SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0, else { cur_data->range = 0; - cur_data->n = cs_data->ranges[0].lo; + cur_data->n = charset_range_lo (cs_data, 0); } SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data); } @@ -766,22 +849,22 @@ SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0, "pass a cursor for which @code{end-of-char-set?} returns true.") #define FUNC_NAME s_scm_char_set_ref { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; scm_t_char_set_cursor *cur_data; size_t i; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_SMOB (2, cursor, charset_cursor); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); /* Validate that this cursor is still true. */ i = cur_data->range; if (i == (size_t) (-1) - || i >= cs_data->len - || cur_data->n < cs_data->ranges[i].lo - || cur_data->n > cs_data->ranges[i].hi) + || i >= charset_len (cs_data) + || cur_data->n < charset_range_lo (cs_data, i) + || cur_data->n > charset_range_hi (cs_data, i)) SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); return SCM_MAKE_CHAR (cur_data->n); } @@ -795,30 +878,29 @@ SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0, "cursor given satisfies @code{end-of-char-set?}.") #define FUNC_NAME s_scm_char_set_cursor_next { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; scm_t_char_set_cursor *cur_data; size_t i; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_SMOB (2, cursor, charset_cursor); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor); /* Validate that this cursor is still true. */ i = cur_data->range; if (i == (size_t) (-1) - || i >= cs_data->len - || cur_data->n < cs_data->ranges[i].lo - || cur_data->n > cs_data->ranges[i].hi) + || i >= charset_len (cs_data) + || !charset_range_contains (cs_data, i, cur_data->n)) SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor)); /* Increment the cursor. */ - if (cur_data->n == cs_data->ranges[i].hi) + if (cur_data->n == charset_range_hi (cs_data, i)) { - if (i + 1 < cs_data->len) + if (i + 1 < charset_len (cs_data)) { cur_data->range = i + 1; - cur_data->n = cs_data->ranges[i + 1].lo; + cur_data->n = charset_range_lo (cs_data, i + 1); } else { @@ -861,20 +943,20 @@ SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0, "initializing it with @var{knil}.") #define FUNC_NAME s_scm_char_set_fold { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; int k; scm_t_wchar n; SCM_VALIDATE_PROC (1, kons); - SCM_VALIDATE_SMOB (3, cs, charset); + SCM_VALIDATE_CHARSET (3, cs); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) return knil; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { knil = scm_call_2 (kons, SCM_MAKE_CHAR (n), knil); } @@ -898,18 +980,19 @@ SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, "@end itemize") #define FUNC_NAME s_scm_char_set_unfold { - SCM result, tmp; + SCM tmp; + struct scm_charset *result; SCM_VALIDATE_PROC (1, p); SCM_VALIDATE_PROC (2, f); SCM_VALIDATE_PROC (3, g); if (!SCM_UNBNDP (base_cs)) { - SCM_VALIDATE_SMOB (5, base_cs, charset); - result = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (5, base_cs); + result = scm_to_charset (scm_char_set_copy (base_cs)); } else - result = make_char_set (FUNC_NAME); + result = make_charset (empty_charset_ranges); tmp = scm_call_1 (p, seed); while (scm_is_false (tmp)) @@ -917,12 +1000,12 @@ SCM_DEFINE (scm_char_set_unfold, "char-set-unfold", 4, 1, 0, SCM ch = scm_call_1 (f, seed); if (!SCM_CHARP (ch)) SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f)); - SCM_CHARSET_SET (result, SCM_CHAR (ch)); + charset_set (result, SCM_CHAR (ch)); seed = scm_call_1 (g, seed); tmp = scm_call_1 (p, seed); } - return result; + return scm_from_charset (result); } #undef FUNC_NAME @@ -971,20 +1054,20 @@ SCM_DEFINE (scm_char_set_for_each, "char-set-for-each", 2, 0, 0, "@var{cs}. The return value is not specified.") #define FUNC_NAME s_scm_char_set_for_each { - scm_t_char_set *cs_data; + struct scm_charset *cs_data; int k; scm_t_wchar n; SCM_VALIDATE_PROC (1, proc); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - cs_data = SCM_CHARSET_DATA (cs); + cs_data = scm_to_charset (cs); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) return SCM_UNSPECIFIED; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { scm_call_1 (proc, SCM_MAKE_CHAR (n)); } @@ -1000,30 +1083,25 @@ SCM_DEFINE (scm_char_set_map, "char-set-map", 2, 0, 0, "@var{proc} must be a character -> character procedure.") #define FUNC_NAME s_scm_char_set_map { - SCM result; int k; - scm_t_char_set *cs_data; scm_t_wchar n; SCM_VALIDATE_PROC (1, proc); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - result = make_char_set (FUNC_NAME); - cs_data = SCM_CHARSET_DATA (cs); + struct scm_charset *result = make_charset (empty_charset_ranges); + struct scm_charset *cs_data = scm_to_charset (cs); - if (cs_data->len == 0) - return result; - - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { SCM ch = scm_call_1 (proc, SCM_MAKE_CHAR (n)); if (!SCM_CHARP (ch)) SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (proc)); - SCM_CHARSET_SET (result, SCM_CHAR (ch)); + charset_set (result, SCM_CHAR (ch)); } - return result; + return scm_from_charset (result); } #undef FUNC_NAME @@ -1034,25 +1112,8 @@ SCM_DEFINE (scm_char_set_copy, "char-set-copy", 1, 0, 0, "characters in @var{cs}.") #define FUNC_NAME s_scm_char_set_copy { - SCM ret; - scm_t_char_set *p1, *p2; - - SCM_VALIDATE_SMOB (1, cs, charset); - ret = make_char_set (FUNC_NAME); - p1 = SCM_CHARSET_DATA (cs); - p2 = SCM_CHARSET_DATA (ret); - p2->len = p1->len; - - if (p1->len == 0) - p2->ranges = NULL; - else - { - p2->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * p1->len, - "character-set"); - memcpy (p2->ranges, p1->ranges, sizeof (scm_t_char_range) * p1->len); - } - - return ret; + SCM_VALIDATE_CHARSET (1, cs); + return scm_from_charset (clone_charset (scm_to_charset (cs))); } #undef FUNC_NAME @@ -1062,11 +1123,10 @@ SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, "Return a character set containing all given characters.") #define FUNC_NAME s_scm_char_set { - SCM cs; int argnum = 1; SCM_VALIDATE_REST_ARGUMENT (rest); - cs = make_char_set (FUNC_NAME); + struct scm_charset *cs = make_charset (empty_charset_ranges); while (!scm_is_null (rest)) { scm_t_wchar c; @@ -1074,9 +1134,9 @@ SCM_DEFINE (scm_char_set, "char-set", 0, 0, 1, SCM_VALIDATE_CHAR_COPY (argnum, SCM_CAR (rest), c); argnum++; rest = SCM_CDR (rest); - SCM_CHARSET_SET (cs, c); + charset_set (cs, c); } - return cs; + return scm_from_charset (cs); } #undef FUNC_NAME @@ -1088,15 +1148,15 @@ SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, "set are also included in the result.") #define FUNC_NAME s_scm_list_to_char_set { - SCM cs; - SCM_VALIDATE_LIST (1, list); + + struct scm_charset *cs; if (SCM_UNBNDP (base_cs)) - cs = make_char_set (FUNC_NAME); + cs = make_charset (empty_charset_ranges); else { - SCM_VALIDATE_SMOB (2, base_cs, charset); - cs = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (2, base_cs); + cs = clone_charset (scm_to_charset (base_cs)); } while (!scm_is_null (list)) { @@ -1106,10 +1166,9 @@ SCM_DEFINE (scm_list_to_char_set, "list->char-set", 1, 1, 0, SCM_VALIDATE_CHAR_COPY (0, chr, c); list = SCM_CDR (list); - - SCM_CHARSET_SET (cs, c); + charset_set (cs, c); } - return cs; + return scm_from_charset (cs); } #undef FUNC_NAME @@ -1123,6 +1182,7 @@ SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, { SCM_VALIDATE_LIST (1, list); SCM_VALIDATE_MUTABLE_CHARSET (2, base_cs); + struct scm_charset *cs = scm_to_charset (base_cs); while (!scm_is_null (list)) { SCM chr = SCM_CAR (list); @@ -1131,7 +1191,7 @@ SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0, SCM_VALIDATE_CHAR_COPY (0, chr, c); list = SCM_CDR (list); - SCM_CHARSET_SET (base_cs, c); + charset_set (cs, c); } return base_cs; } @@ -1145,25 +1205,25 @@ SCM_DEFINE (scm_string_to_char_set, "string->char-set", 1, 1, 0, "set are also included in the result.") #define FUNC_NAME s_scm_string_to_char_set { - SCM cs; size_t k = 0, len; SCM_VALIDATE_STRING (1, str); + struct scm_charset *cs; if (SCM_UNBNDP (base_cs)) - cs = make_char_set (FUNC_NAME); + cs = make_charset (empty_charset_ranges); else { - SCM_VALIDATE_SMOB (2, base_cs, charset); - cs = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (2, base_cs); + cs = clone_charset (scm_to_charset (base_cs)); } len = scm_i_string_length (str); while (k < len) { scm_t_wchar c = scm_i_string_ref (str, k++); - SCM_CHARSET_SET (cs, c); + charset_set (cs, c); } scm_remember_upto_here_1 (str); - return cs; + return scm_from_charset (cs); } #undef FUNC_NAME @@ -1179,11 +1239,12 @@ SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0, SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_MUTABLE_CHARSET (2, base_cs); + struct scm_charset *cs = scm_to_charset (base_cs); len = scm_i_string_length (str); while (k < len) { scm_t_wchar c = scm_i_string_ref (str, k++); - SCM_CHARSET_SET (base_cs, c); + charset_set (cs, c); } scm_remember_upto_here_1 (str); return base_cs; @@ -1198,35 +1259,31 @@ SCM_DEFINE (scm_char_set_filter, "char-set-filter", 2, 1, 0, "from @var{base_cs} are added to the result.") #define FUNC_NAME s_scm_char_set_filter { - SCM ret; + struct scm_charset *ret; int k; scm_t_wchar n; - scm_t_char_set *p; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); if (!SCM_UNBNDP (base_cs)) { - SCM_VALIDATE_SMOB (3, base_cs, charset); - ret = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (3, base_cs); + ret = clone_charset (scm_to_charset (base_cs)); } else - ret = make_char_set (FUNC_NAME); + ret = make_charset (empty_charset_ranges); - p = SCM_CHARSET_DATA (cs); + struct scm_charset *p = scm_to_charset (cs); - if (p->len == 0) - return ret; - - for (k = 0; k < p->len; k++) - for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) + for (k = 0; k < charset_len (p); k++) + for (n = charset_range_lo (p, k); n <= charset_range_hi (p, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) - SCM_CHARSET_SET (ret, n); + charset_set (ret, n); } - return ret; + return scm_from_charset (ret); } #undef FUNC_NAME @@ -1240,22 +1297,20 @@ SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0, { int k; scm_t_wchar n; - scm_t_char_set *p; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); SCM_VALIDATE_MUTABLE_CHARSET (3, base_cs); - p = SCM_CHARSET_DATA (cs); - if (p->len == 0) - return base_cs; + struct scm_charset *src = scm_to_charset (cs); + struct scm_charset *dst = scm_to_charset (base_cs); - for (k = 0; k < p->len; k++) - for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++) + for (k = 0; k < charset_len (src); k++) + for (n = charset_range_lo (src, k); n <= charset_range_hi (src, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) - SCM_CHARSET_SET (base_cs, n); + charset_set (dst, n); } return base_cs; } @@ -1269,7 +1324,6 @@ static SCM scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, SCM error, SCM base_cs, int reuse) { - SCM cs; size_t clower, cupper; clower = scm_to_size_t (lower); @@ -1289,25 +1343,26 @@ scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, } } + struct scm_charset *cs; if (SCM_UNBNDP (base_cs)) - cs = make_char_set (FUNC_NAME); + cs = make_charset (empty_charset_ranges); else { if (reuse) { SCM_VALIDATE_MUTABLE_CHARSET (3, base_cs); - cs = base_cs; + cs = scm_to_charset (base_cs); } else { - SCM_VALIDATE_SMOB (3, base_cs, charset); - cs = scm_char_set_copy (base_cs); + SCM_VALIDATE_CHARSET (3, base_cs); + cs = clone_charset (scm_to_charset (base_cs)); } } if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END) && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END)) - return cs; + return scm_from_charset (cs); if (clower > SCM_CODEPOINT_MAX) clower = SCM_CODEPOINT_MAX; @@ -1319,12 +1374,12 @@ scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, cupper = SCM_CODEPOINT_SURROGATE_START - 1; if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > SCM_CODEPOINT_SURROGATE_END) { - scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, SCM_CODEPOINT_SURROGATE_START - 1); - scm_i_charset_set_range (SCM_CHARSET_DATA (cs), SCM_CODEPOINT_SURROGATE_END + 1, cupper); + charset_set_range (cs, clower, SCM_CODEPOINT_SURROGATE_START - 1); + charset_set_range (cs, SCM_CODEPOINT_SURROGATE_END + 1, cupper); } else - scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper); - return cs; + charset_set_range (cs, clower, cupper); + return scm_from_charset (cs); } SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0, @@ -1365,7 +1420,7 @@ SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, "returned.") #define FUNC_NAME s_scm_ucs_range_to_char_set_x { - SCM_VALIDATE_SMOB (4, base_cs, charset); + SCM_VALIDATE_CHARSET (4, base_cs); return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper, error, base_cs, 1); } @@ -1374,13 +1429,13 @@ SCM_DEFINE (scm_ucs_range_to_char_set_x, "ucs-range->char-set!", 4, 0, 0, SCM_DEFINE (scm_to_char_set, "->char-set", 1, 0, 0, (SCM x), "Coerces x into a char-set. @var{x} may be a string, character or char-set. A string is converted to the set of its constituent characters; a character is converted to a singleton set; a char-set is returned as-is.") -#define FUNC_NAME s_scm_to_char_set +#define FUNC_NAME s_scm_to_charset { if (scm_is_string (x)) return scm_string_to_char_set (x, SCM_UNDEFINED); else if (SCM_CHARP (x)) return scm_char_set (scm_list_1 (x)); - else if (SCM_SMOB_PREDICATE (scm_tc16_charset, x)) + else if (scm_is_char_set (x)) return x; else scm_wrong_type_arg (NULL, 0, x); @@ -1393,16 +1448,16 @@ SCM_DEFINE (scm_char_set_size, "char-set-size", 1, 0, 0, #define FUNC_NAME s_scm_char_set_size { int k, count = 0; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; - SCM_VALIDATE_SMOB (1, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); + SCM_VALIDATE_CHARSET (1, cs); + cs_data = scm_to_charset (cs); - if (cs_data->len == 0) + if (charset_len (cs_data) == 0) return scm_from_int (0); - for (k = 0; k < cs_data->len; k++) - count += cs_data->ranges[k].hi - cs_data->ranges[k].lo + 1; + for (k = 0; k < charset_len (cs_data); k++) + count += charset_range_hi (cs_data, k) - charset_range_lo (cs_data, k) + 1; return scm_from_int (count); } @@ -1417,16 +1472,16 @@ SCM_DEFINE (scm_char_set_count, "char-set-count", 2, 0, 0, { int k, count = 0; scm_t_wchar n; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + SCM_VALIDATE_CHARSET (2, cs); + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return scm_from_int (0); - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) @@ -1446,15 +1501,15 @@ SCM_DEFINE (scm_char_set_to_list, "char-set->list", 1, 0, 0, int k; scm_t_wchar n; SCM result = SCM_EOL; - scm_t_char_set *p; + struct scm_charset *p; - SCM_VALIDATE_SMOB (1, cs, charset); - p = SCM_CHARSET_DATA (cs); - if (p->len == 0) + SCM_VALIDATE_CHARSET (1, cs); + p = scm_to_charset (cs); + if (charset_len (p) == 0) return SCM_EOL; - for (k = p->len - 1; k >= 0; k--) - for (n = p->ranges[k].hi; n >= p->ranges[k].lo; n--) + for (k = charset_len (p) - 1; k >= 0; k--) + for (n = charset_range_hi (p, k); n >= charset_range_lo (p, k); n--) result = scm_cons (SCM_MAKE_CHAR (n), result); return result; } @@ -1474,16 +1529,16 @@ SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, int wide = 0; SCM result; scm_t_wchar n; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; char *buf; scm_t_wchar *wbuf; - SCM_VALIDATE_SMOB (1, cs, charset); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + SCM_VALIDATE_CHARSET (1, cs); + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return scm_nullstr; - if (cs_data->ranges[cs_data->len - 1].hi > 255) + if (charset_range_hi (cs_data, charset_len (cs_data) - 1) > 255) wide = 1; count = scm_to_int (scm_char_set_size (cs)); @@ -1492,8 +1547,8 @@ SCM_DEFINE (scm_char_set_to_string, "char-set->string", 1, 0, 0, else result = scm_i_make_string (count, &buf, 0); - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { if (wide) wbuf[idx++] = n; @@ -1511,9 +1566,9 @@ SCM_DEFINE (scm_char_set_contains_p, "char-set-contains?", 2, 0, 0, "character set @var{cs}.") #define FUNC_NAME s_scm_char_set_contains_p { - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_CHAR (2, ch); - return scm_from_bool (SCM_CHARSET_GET (cs, SCM_CHAR (ch))); + return scm_from_bool (charset_get (scm_to_charset (cs), SCM_CHAR (ch))); } #undef FUNC_NAME @@ -1527,17 +1582,17 @@ SCM_DEFINE (scm_char_set_every, "char-set-every", 2, 0, 0, int k; scm_t_wchar n; SCM res = SCM_BOOL_T; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return SCM_BOOL_T; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_false (res)) @@ -1556,17 +1611,17 @@ SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0, { int k; scm_t_wchar n; - scm_t_char_set *cs_data; + struct scm_charset *cs_data; SCM_VALIDATE_PROC (1, pred); - SCM_VALIDATE_SMOB (2, cs, charset); + SCM_VALIDATE_CHARSET (2, cs); - cs_data = SCM_CHARSET_DATA (cs); - if (cs_data->len == 0) + cs_data = scm_to_charset (cs); + if (charset_len (cs_data) == 0) return SCM_BOOL_T; - for (k = 0; k < cs_data->len; k++) - for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++) + for (k = 0; k < charset_len (cs_data); k++) + for (n = charset_range_lo (cs_data, k); n <= charset_range_hi (cs_data, k); n++) { SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n)); if (scm_is_true (res)) @@ -1583,7 +1638,7 @@ SCM_DEFINE (scm_char_set_adjoin, "char-set-adjoin", 1, 0, 1, "be a character set.") #define FUNC_NAME s_scm_char_set_adjoin { - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_REST_ARGUMENT (rest); cs = scm_char_set_copy (cs); @@ -1608,7 +1663,7 @@ SCM_DEFINE (scm_char_set_delete, "char-set-delete", 1, 0, 1, "must be a character set.") #define FUNC_NAME s_scm_char_set_delete { - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); SCM_VALIDATE_REST_ARGUMENT (rest); cs = scm_char_set_copy (cs); @@ -1679,17 +1734,15 @@ SCM_DEFINE (scm_char_set_complement, "char-set-complement", 1, 0, 0, (SCM cs), "Return the complement of the character set @var{cs}.") #define FUNC_NAME s_scm_char_set_complement { - SCM res; - scm_t_char_set *p, *q; + struct scm_charset *p, *q; - SCM_VALIDATE_SMOB (1, cs, charset); + SCM_VALIDATE_CHARSET (1, cs); - res = make_char_set (FUNC_NAME); - p = SCM_CHARSET_DATA (res); - q = SCM_CHARSET_DATA (cs); + p = make_charset (empty_charset_ranges); + q = scm_to_charset (cs); charsets_complement (p, q); - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1699,25 +1752,20 @@ SCM_DEFINE (scm_char_set_union, "char-set-union", 0, 0, 1, "Return the union of all argument character sets.") #define FUNC_NAME s_scm_char_set_union { - int c = 1; - SCM res; - scm_t_char_set *p; - SCM_VALIDATE_REST_ARGUMENT (rest); - res = make_char_set (FUNC_NAME); - p = SCM_CHARSET_DATA (res); + struct scm_charset *p = make_charset (empty_charset_ranges); + int argnum = 1; while (!scm_is_null (rest)) { SCM cs = SCM_CAR (rest); - SCM_VALIDATE_SMOB (c, cs, charset); - c++; + SCM_VALIDATE_CHARSET (argnum, cs); + argnum++; rest = SCM_CDR (rest); - - charsets_union (p, (scm_t_char_set *) SCM_SMOB_DATA (cs)); + charsets_union (p, scm_to_charset (cs)); } - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1727,35 +1775,26 @@ SCM_DEFINE (scm_char_set_intersection, "char-set-intersection", 0, 0, 1, "Return the intersection of all argument character sets.") #define FUNC_NAME s_scm_char_set_intersection { - SCM res; - SCM_VALIDATE_REST_ARGUMENT (rest); if (scm_is_null (rest)) - res = make_char_set (FUNC_NAME); - else - { - scm_t_char_set *p; - int argnum = 2; + return scm_from_charset (make_charset (empty_charset_ranges)); - res = scm_char_set_copy (SCM_CAR (rest)); - p = SCM_CHARSET_DATA (res); + int argnum = 2; + struct scm_charset *res = clone_charset (scm_to_charset (SCM_CAR (rest))); + + rest = SCM_CDR (rest); + while (scm_is_pair (rest)) + { + SCM cs = SCM_CAR (rest); rest = SCM_CDR (rest); - while (scm_is_pair (rest)) - { - SCM cs = SCM_CAR (rest); - scm_t_char_set *cs_data; - - SCM_VALIDATE_SMOB (argnum, cs, charset); - argnum++; - cs_data = SCM_CHARSET_DATA (cs); - rest = SCM_CDR (rest); - charsets_intersection (p, cs_data); - } + SCM_VALIDATE_CHARSET (argnum, cs); + argnum++; + charsets_intersection (res, scm_to_charset (cs)); } - return res; + return scm_from_charset (res); } #undef FUNC_NAME @@ -1766,27 +1805,24 @@ SCM_DEFINE (scm_char_set_difference, "char-set-difference", 1, 0, 1, #define FUNC_NAME s_scm_char_set_difference { int c = 2; - SCM res, compl; - scm_t_char_set *p, *q; + struct scm_charset *p, *q; - SCM_VALIDATE_SMOB (1, cs1, charset); + SCM_VALIDATE_CHARSET (1, cs1); SCM_VALIDATE_REST_ARGUMENT (rest); - res = scm_char_set_copy (cs1); - p = SCM_CHARSET_DATA (res); - compl = make_char_set (FUNC_NAME); - q = SCM_CHARSET_DATA (compl); + p = clone_charset (scm_to_charset (cs1)); + q = make_charset (empty_charset_ranges); while (!scm_is_null (rest)) { SCM cs = SCM_CAR (rest); - SCM_VALIDATE_SMOB (c, cs, charset); - c++; rest = SCM_CDR (rest); - charsets_complement (q, SCM_CHARSET_DATA (cs)); + SCM_VALIDATE_CHARSET (c, cs); + c++; + charsets_complement (q, scm_to_charset (cs)); charsets_intersection (p, q); } - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1796,35 +1832,19 @@ SCM_DEFINE (scm_char_set_xor, "char-set-xor", 0, 0, 1, "Return the exclusive-or of all argument character sets.") #define FUNC_NAME s_scm_char_set_xor { - SCM res; - SCM_VALIDATE_REST_ARGUMENT (rest); - if (scm_is_null (rest)) - res = make_char_set (FUNC_NAME); - else + struct scm_charset *p = make_charset (empty_charset_ranges); + int argnum = 1; + while (scm_is_pair (rest)) { - int argnum = 2; - scm_t_char_set *p; - - res = scm_char_set_copy (SCM_CAR (rest)); - p = SCM_CHARSET_DATA (res); + SCM cs = SCM_CAR (rest); rest = SCM_CDR (rest); - - while (scm_is_pair (rest)) - { - SCM cs = SCM_CAR (rest); - scm_t_char_set *cs_data; - - SCM_VALIDATE_SMOB (argnum, cs, charset); - argnum++; - cs_data = SCM_CHARSET_DATA (cs); - rest = SCM_CDR (rest); - - charsets_xor (p, cs_data); - } + SCM_VALIDATE_CHARSET (argnum, cs); + argnum++; + charsets_xor (p, scm_to_charset (cs)); } - return res; + return scm_from_charset (p); } #undef FUNC_NAME @@ -1836,30 +1856,27 @@ SCM_DEFINE (scm_char_set_diff_plus_intersection, "char-set-diff+intersection", 1 #define FUNC_NAME s_scm_char_set_diff_plus_intersection { int c = 2; - SCM res1, res2; - scm_t_char_set *p, *q; + struct scm_charset *p, *q; - SCM_VALIDATE_SMOB (1, cs1, charset); + SCM_VALIDATE_CHARSET (1, cs1); SCM_VALIDATE_REST_ARGUMENT (rest); - res1 = scm_char_set_copy (cs1); - res2 = make_char_set (FUNC_NAME); - p = SCM_CHARSET_DATA (res1); - q = SCM_CHARSET_DATA (res2); + p = clone_charset (scm_to_charset (cs1)); + q = make_charset (empty_charset_ranges); while (!scm_is_null (rest)) { SCM cs = SCM_CAR (rest); - scm_t_char_set *r; + struct scm_charset *r; - SCM_VALIDATE_SMOB (c, cs, charset); + SCM_VALIDATE_CHARSET (c, cs); c++; - r = SCM_CHARSET_DATA (cs); + r = scm_to_charset (cs); charsets_union (q, r); charsets_intersection (p, r); rest = SCM_CDR (rest); } - return scm_values_2 (res1, res2); + return scm_values_2 (scm_from_charset (p), scm_from_charset (q)); } #undef FUNC_NAME @@ -1980,14 +1997,7 @@ SCM scm_char_set_full; static inline SCM define_charset (const char *name, size_t len, const scm_t_char_range *ranges) { - SCM cs; - - scm_t_char_set *p = scm_gc_malloc_pointerless (sizeof (scm_t_char_set), - "charset"); - p->len = len; - /* Strip const qualifier but add immutable flag on SCM. */ - p->ranges = (scm_t_char_range *) ranges; - SCM_NEWSMOB (cs, scm_tc16_charset | SCM_CHARSET_F_IMMUTABLE, p); + SCM cs = scm_from_charset (make_static_charset (len, ranges)); scm_c_define (name, cs); return cs; } @@ -2009,30 +2019,30 @@ SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset), SCM e1, e2, e3; SCM ranges = SCM_EOL, elt; size_t i; - scm_t_char_set *cs; + struct scm_charset *cs; char codepoint_string_lo[13], codepoint_string_hi[13]; - SCM_VALIDATE_SMOB (1, charset, charset); - cs = SCM_CHARSET_DATA (charset); + SCM_VALIDATE_CHARSET (1, charset); + cs = scm_to_charset (charset); e1 = scm_cons (scm_from_latin1_symbol ("char-set"), charset); e2 = scm_cons (scm_from_latin1_symbol ("n"), - scm_from_size_t (cs->len)); + scm_from_size_t (charset_len (cs))); - for (i = 0; i < cs->len; i++) + for (i = 0; i < charset_len (cs); i++) { - if (cs->ranges[i].lo > 0xFFFF) - sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo); + if (charset_range_lo (cs, i) > 0xFFFF) + sprintf (codepoint_string_lo, "U+%06x", charset_range_lo (cs, i)); else - sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo); - if (cs->ranges[i].hi > 0xFFFF) - sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi); + sprintf (codepoint_string_lo, "U+%04x", charset_range_lo (cs, i)); + if (charset_range_hi (cs, i) > 0xFFFF) + sprintf (codepoint_string_hi, "U+%06x", charset_range_hi (cs, i)); else - sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi); + sprintf (codepoint_string_hi, "U+%04x", charset_range_hi (cs, i)); - elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo), - SCM_MAKE_CHAR (cs->ranges[i].hi), + elt = scm_list_4 (SCM_MAKE_CHAR (charset_range_lo (cs, i)), + SCM_MAKE_CHAR (charset_range_hi (cs, i)), scm_from_locale_string (codepoint_string_lo), scm_from_locale_string (codepoint_string_hi)); ranges = scm_append (scm_list_2 (ranges, @@ -2054,8 +2064,8 @@ SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset), void scm_init_srfi_14 (void) { - scm_tc16_charset = scm_make_smob_type ("character-set", 0); - scm_set_smob_print (scm_tc16_charset, charset_print); + empty_charset_ranges = + scm_i_make_typed_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_U32); scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0); scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print); diff --git a/libguile/srfi-14.h b/libguile/srfi-14.h index 69b09aef7..c3cb8f427 100644 --- a/libguile/srfi-14.h +++ b/libguile/srfi-14.h @@ -24,34 +24,12 @@ #include "libguile/chars.h" -typedef struct +static inline int +scm_is_char_set (SCM x) { - scm_t_wchar lo; - scm_t_wchar hi; -} scm_t_char_range; - -typedef struct -{ - size_t len; - scm_t_char_range *ranges; -} scm_t_char_set; - -typedef struct -{ - size_t range; - scm_t_wchar n; -} scm_t_char_set_cursor; - -#define SCM_CHARSET_GET(cs,idx) \ - scm_i_charset_get((scm_t_char_set *)SCM_SMOB_DATA(cs),idx) - -#define SCM_CHARSETP(x) (SCM_HAS_TYP16 (x, scm_tc16_charset)) - -/* Smob type code for character sets. */ -SCM_API int scm_tc16_charset; -SCM_INTERNAL int scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n); -SCM_INTERNAL void scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n); -SCM_INTERNAL void scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n); + return SCM_HAS_TYP16 (x, scm_tc16_charset); +} +#define SCM_CHARSETP(x) (scm_is_char_set (x)) SCM_API SCM scm_char_set_p (SCM obj); SCM_API SCM scm_char_set_eq (SCM char_sets); @@ -120,6 +98,9 @@ SCM_API SCM scm_char_set_ascii; SCM_API SCM scm_char_set_empty; SCM_API SCM scm_char_set_full; +SCM_INTERNAL int scm_i_char_sets_equal (SCM a, SCM b); +SCM_INTERNAL int scm_i_print_char_set (SCM charset, SCM port, + scm_print_state *pstate); SCM_INTERNAL void scm_init_srfi_14 (void); #endif /* SCM_SRFI_14_H */ diff --git a/module/oop/goops.scm b/module/oop/goops.scm index 19eb7362b..203c7fc0e 100644 --- a/module/oop/goops.scm +++ b/module/oop/goops.scm @@ -70,7 +70,7 @@ - + ;; Numbers. @@ -84,7 +84,7 @@ ;; smob-type-name->class procedure. - + ;; Modules. @@ -1082,6 +1082,7 @@ slots as we go." (define-standard-class ()) (define-standard-class ()) (define-standard-class ()) +(define-standard-class ()) (define-standard-class ()) (define-standard-class ()) (define-standard-class ()) @@ -3538,7 +3539,6 @@ var{initargs}." (define (find-subclass ')) (define (find-subclass ')) (define (find-subclass ')) -(define (find-subclass ')) (define (find-subclass ')) ;; used to be a SMOB type, albeit not exported even to