mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Update unicode tables to Unicode 14.0.0; initial charsets immutable
* libguile/srfi-14.i.c: Update from Unicode 14.0.0. * libguile/unidata_to_charset.pl (compute): Write arrays as static const data, to avoid polluting the namespace and to avoid adding these to the GC root set. * libguile/srfi-14.c (SCM_CODEPOINT_F_IMMUTABLE): New flag. (scm_charset_is_immutable): New internal predicate. (SCM_VALIDATE_MUTABLE_CHARSET): New internal validator. (cs_full_ranges, cs_full_ranges_len): Re-express as separate ranges and len, because the ranges pointer in scm_t_char_set is mutable. (scm_char_set_unfold_x, scm_list_to_char_set_x) (scm_string_to_char_set_x, scm_char_set_filter_x) (scm_i_ucs_range_to_char_set, scm_char_set_adjoin_x) (scm_char_set_delete_x, scm_char_set_complement_x) (scm_char_set_unfold_x, scm_char_set_intersection_x) (scm_char_set_difference_x, scm_char_set_xor_x): Require mutable charsets. (define_charset): Add immutable flag. (scm_init_srfi_14): Adapt initial charset definitions.
This commit is contained in:
parent
7e3470343a
commit
2f9bc7fe61
3 changed files with 5183 additions and 9994 deletions
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright 2001,2004,2006-2007,2009,2011,2018-2019
|
/* Copyright 2001,2004,2006-2007,2009,2011,2018-2019,2022
|
||||||
Free Software Foundation, Inc.
|
Free Software Foundation, Inc.
|
||||||
|
|
||||||
This file is part of Guile.
|
This file is part of Guile.
|
||||||
|
@ -46,17 +46,27 @@
|
||||||
/* Include the pre-computed standard charset data. */
|
/* Include the pre-computed standard charset data. */
|
||||||
#include "srfi-14.i.c"
|
#include "srfi-14.i.c"
|
||||||
|
|
||||||
scm_t_char_range cs_full_ranges[] = {
|
static const scm_t_bits SCM_CHARSET_F_IMMUTABLE = 1 << 16;
|
||||||
{0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
|
|
||||||
,
|
static inline int
|
||||||
|
scm_charset_is_immutable (SCM charset)
|
||||||
|
{
|
||||||
|
return SCM_SMOB_DATA_0 (charset) & SCM_CHARSET_F_IMMUTABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SCM_VALIDATE_MUTABLE_CHARSET(pos, x) \
|
||||||
|
do { \
|
||||||
|
SCM_ASSERT_TYPE (SCM_CHARSETP (x) && !scm_charset_is_immutable (x), \
|
||||||
|
x, pos, FUNC_NAME, "mutable charset"); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static const scm_t_char_range cs_full_ranges[] = {
|
||||||
|
{0x0000, SCM_CODEPOINT_SURROGATE_START - 1},
|
||||||
{SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
|
{SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
|
||||||
};
|
};
|
||||||
|
static const size_t cs_full_len = 2;
|
||||||
|
|
||||||
scm_t_char_set cs_full = {
|
#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
|
||||||
2,
|
|
||||||
cs_full_ranges
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
|
#define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
|
||||||
|
|
||||||
|
@ -997,7 +1007,7 @@ SCM_DEFINE (scm_char_set_unfold_x, "char-set-unfold!", 5, 0, 0,
|
||||||
SCM_VALIDATE_PROC (1, p);
|
SCM_VALIDATE_PROC (1, p);
|
||||||
SCM_VALIDATE_PROC (2, f);
|
SCM_VALIDATE_PROC (2, f);
|
||||||
SCM_VALIDATE_PROC (3, g);
|
SCM_VALIDATE_PROC (3, g);
|
||||||
SCM_VALIDATE_SMOB (5, base_cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (5, base_cs);
|
||||||
|
|
||||||
tmp = scm_call_1 (p, seed);
|
tmp = scm_call_1 (p, seed);
|
||||||
while (scm_is_false (tmp))
|
while (scm_is_false (tmp))
|
||||||
|
@ -1172,7 +1182,7 @@ SCM_DEFINE (scm_list_to_char_set_x, "list->char-set!", 2, 0, 0,
|
||||||
#define FUNC_NAME s_scm_list_to_char_set_x
|
#define FUNC_NAME s_scm_list_to_char_set_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_LIST (1, list);
|
SCM_VALIDATE_LIST (1, list);
|
||||||
SCM_VALIDATE_SMOB (2, base_cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (2, base_cs);
|
||||||
while (!scm_is_null (list))
|
while (!scm_is_null (list))
|
||||||
{
|
{
|
||||||
SCM chr = SCM_CAR (list);
|
SCM chr = SCM_CAR (list);
|
||||||
|
@ -1228,7 +1238,7 @@ SCM_DEFINE (scm_string_to_char_set_x, "string->char-set!", 2, 0, 0,
|
||||||
size_t k = 0, len;
|
size_t k = 0, len;
|
||||||
|
|
||||||
SCM_VALIDATE_STRING (1, str);
|
SCM_VALIDATE_STRING (1, str);
|
||||||
SCM_VALIDATE_SMOB (2, base_cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (2, base_cs);
|
||||||
len = scm_i_string_length (str);
|
len = scm_i_string_length (str);
|
||||||
while (k < len)
|
while (k < len)
|
||||||
{
|
{
|
||||||
|
@ -1294,7 +1304,7 @@ SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 0, 0,
|
||||||
|
|
||||||
SCM_VALIDATE_PROC (1, pred);
|
SCM_VALIDATE_PROC (1, pred);
|
||||||
SCM_VALIDATE_SMOB (2, cs, charset);
|
SCM_VALIDATE_SMOB (2, cs, charset);
|
||||||
SCM_VALIDATE_SMOB (3, base_cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (3, base_cs);
|
||||||
p = SCM_CHARSET_DATA (cs);
|
p = SCM_CHARSET_DATA (cs);
|
||||||
if (p->len == 0)
|
if (p->len == 0)
|
||||||
return base_cs;
|
return base_cs;
|
||||||
|
@ -1343,12 +1353,17 @@ scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper,
|
||||||
cs = make_char_set (FUNC_NAME);
|
cs = make_char_set (FUNC_NAME);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (3, base_cs, charset);
|
|
||||||
if (reuse)
|
if (reuse)
|
||||||
|
{
|
||||||
|
SCM_VALIDATE_MUTABLE_CHARSET (3, base_cs);
|
||||||
cs = base_cs;
|
cs = base_cs;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
SCM_VALIDATE_SMOB (3, base_cs, charset);
|
||||||
cs = scm_char_set_copy (base_cs);
|
cs = scm_char_set_copy (base_cs);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
|
if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= SCM_CODEPOINT_SURROGATE_END)
|
||||||
&& (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
|
&& (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= SCM_CODEPOINT_SURROGATE_END))
|
||||||
|
@ -1678,7 +1693,7 @@ SCM_DEFINE (scm_char_set_adjoin_x, "char-set-adjoin!", 1, 0, 1,
|
||||||
"be a character set.")
|
"be a character set.")
|
||||||
#define FUNC_NAME s_scm_char_set_adjoin_x
|
#define FUNC_NAME s_scm_char_set_adjoin_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (1, cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs);
|
||||||
SCM_VALIDATE_REST_ARGUMENT (rest);
|
SCM_VALIDATE_REST_ARGUMENT (rest);
|
||||||
|
|
||||||
while (!scm_is_null (rest))
|
while (!scm_is_null (rest))
|
||||||
|
@ -1702,7 +1717,7 @@ SCM_DEFINE (scm_char_set_delete_x, "char-set-delete!", 1, 0, 1,
|
||||||
"must be a character set.")
|
"must be a character set.")
|
||||||
#define FUNC_NAME s_scm_char_set_delete_x
|
#define FUNC_NAME s_scm_char_set_delete_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (1, cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs);
|
||||||
SCM_VALIDATE_REST_ARGUMENT (rest);
|
SCM_VALIDATE_REST_ARGUMENT (rest);
|
||||||
|
|
||||||
while (!scm_is_null (rest))
|
while (!scm_is_null (rest))
|
||||||
|
@ -1913,7 +1928,7 @@ SCM_DEFINE (scm_char_set_complement_x, "char-set-complement!", 1, 0, 0,
|
||||||
(SCM cs), "Return the complement of the character set @var{cs}.")
|
(SCM cs), "Return the complement of the character set @var{cs}.")
|
||||||
#define FUNC_NAME s_scm_char_set_complement_x
|
#define FUNC_NAME s_scm_char_set_complement_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (1, cs, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs);
|
||||||
cs = scm_char_set_complement (cs);
|
cs = scm_char_set_complement (cs);
|
||||||
return cs;
|
return cs;
|
||||||
}
|
}
|
||||||
|
@ -1925,7 +1940,7 @@ SCM_DEFINE (scm_char_set_union_x, "char-set-union!", 1, 0, 1,
|
||||||
"Return the union of all argument character sets.")
|
"Return the union of all argument character sets.")
|
||||||
#define FUNC_NAME s_scm_char_set_union_x
|
#define FUNC_NAME s_scm_char_set_union_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (1, cs1, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs1);
|
||||||
SCM_VALIDATE_REST_ARGUMENT (rest);
|
SCM_VALIDATE_REST_ARGUMENT (rest);
|
||||||
|
|
||||||
cs1 = scm_char_set_union (scm_cons (cs1, rest));
|
cs1 = scm_char_set_union (scm_cons (cs1, rest));
|
||||||
|
@ -1939,7 +1954,7 @@ SCM_DEFINE (scm_char_set_intersection_x, "char-set-intersection!", 1, 0, 1,
|
||||||
"Return the intersection of all argument character sets.")
|
"Return the intersection of all argument character sets.")
|
||||||
#define FUNC_NAME s_scm_char_set_intersection_x
|
#define FUNC_NAME s_scm_char_set_intersection_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (1, cs1, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs1);
|
||||||
SCM_VALIDATE_REST_ARGUMENT (rest);
|
SCM_VALIDATE_REST_ARGUMENT (rest);
|
||||||
|
|
||||||
cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
|
cs1 = scm_char_set_intersection (scm_cons (cs1, rest));
|
||||||
|
@ -1953,7 +1968,7 @@ SCM_DEFINE (scm_char_set_difference_x, "char-set-difference!", 1, 0, 1,
|
||||||
"Return the difference of all argument character sets.")
|
"Return the difference of all argument character sets.")
|
||||||
#define FUNC_NAME s_scm_char_set_difference_x
|
#define FUNC_NAME s_scm_char_set_difference_x
|
||||||
{
|
{
|
||||||
SCM_VALIDATE_SMOB (1, cs1, charset);
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs1);
|
||||||
SCM_VALIDATE_REST_ARGUMENT (rest);
|
SCM_VALIDATE_REST_ARGUMENT (rest);
|
||||||
|
|
||||||
cs1 = scm_char_set_difference (cs1, rest);
|
cs1 = scm_char_set_difference (cs1, rest);
|
||||||
|
@ -1967,6 +1982,7 @@ SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
|
||||||
"Return the exclusive-or of all argument character sets.")
|
"Return the exclusive-or of all argument character sets.")
|
||||||
#define FUNC_NAME s_scm_char_set_xor_x
|
#define FUNC_NAME s_scm_char_set_xor_x
|
||||||
{
|
{
|
||||||
|
SCM_VALIDATE_MUTABLE_CHARSET (1, cs1);
|
||||||
/* a side-effecting variant should presumably give consistent results:
|
/* a side-effecting variant should presumably give consistent results:
|
||||||
(define a (char-set #\a))
|
(define a (char-set #\a))
|
||||||
(char-set-xor a a a) -> char set #\a
|
(char-set-xor a a a) -> char set #\a
|
||||||
|
@ -2022,11 +2038,16 @@ SCM scm_char_set_full;
|
||||||
|
|
||||||
/* Create an empty character set and return it after binding it to NAME. */
|
/* Create an empty character set and return it after binding it to NAME. */
|
||||||
static inline SCM
|
static inline SCM
|
||||||
define_charset (const char *name, const scm_t_char_set *p)
|
define_charset (const char *name, size_t len, const scm_t_char_range *ranges)
|
||||||
{
|
{
|
||||||
SCM cs;
|
SCM cs;
|
||||||
|
|
||||||
SCM_NEWSMOB (cs, scm_tc16_charset, p);
|
scm_t_char_set *p = scm_gc_malloc_pointerless (sizeof (scm_t_char_set),
|
||||||
|
"charset");
|
||||||
|
p->len = len;
|
||||||
|
/* Strip const qualifier but add immutable flag on SCM. */
|
||||||
|
p->ranges = (scm_t_char_range *) ranges;
|
||||||
|
SCM_NEWSMOB (cs, scm_tc16_charset | SCM_CHARSET_F_IMMUTABLE, p);
|
||||||
scm_c_define (name, cs);
|
scm_c_define (name, cs);
|
||||||
return cs;
|
return cs;
|
||||||
}
|
}
|
||||||
|
@ -2087,6 +2108,9 @@ SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define DEFINE_CHARSET(name, stem) \
|
||||||
|
define_charset ("char-set:" name, cs_##stem##_len, cs_##stem##_ranges)
|
||||||
|
|
||||||
void
|
void
|
||||||
scm_init_srfi_14 (void)
|
scm_init_srfi_14 (void)
|
||||||
{
|
{
|
||||||
|
@ -2096,32 +2120,25 @@ scm_init_srfi_14 (void)
|
||||||
scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
|
scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
|
||||||
scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
|
scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
|
||||||
|
|
||||||
scm_char_set_upper_case =
|
scm_char_set_upper_case = DEFINE_CHARSET ("upper-case", upper_case);
|
||||||
define_charset ("char-set:upper-case", &cs_upper_case);
|
scm_char_set_lower_case = DEFINE_CHARSET ("lower-case", lower_case);
|
||||||
scm_char_set_lower_case =
|
scm_char_set_title_case = DEFINE_CHARSET ("title-case", title_case);
|
||||||
define_charset ("char-set:lower-case", &cs_lower_case);
|
scm_char_set_letter = DEFINE_CHARSET ("letter", letter);
|
||||||
scm_char_set_title_case =
|
scm_char_set_digit = DEFINE_CHARSET ("digit", digit);
|
||||||
define_charset ("char-set:title-case", &cs_title_case);
|
scm_char_set_letter_and_digit = DEFINE_CHARSET ("letter+digit",
|
||||||
scm_char_set_letter = define_charset ("char-set:letter", &cs_letter);
|
letter_plus_digit);
|
||||||
scm_char_set_digit = define_charset ("char-set:digit", &cs_digit);
|
scm_char_set_graphic = DEFINE_CHARSET ("graphic", graphic);
|
||||||
scm_char_set_letter_and_digit =
|
scm_char_set_printing = DEFINE_CHARSET ("printing", printing);
|
||||||
define_charset ("char-set:letter+digit", &cs_letter_plus_digit);
|
scm_char_set_whitespace = DEFINE_CHARSET ("whitespace", whitespace);
|
||||||
scm_char_set_graphic = define_charset ("char-set:graphic", &cs_graphic);
|
scm_char_set_iso_control = DEFINE_CHARSET ("iso-control", iso_control);
|
||||||
scm_char_set_printing = define_charset ("char-set:printing", &cs_printing);
|
scm_char_set_punctuation = DEFINE_CHARSET ("punctuation", punctuation);
|
||||||
scm_char_set_whitespace =
|
scm_char_set_symbol = DEFINE_CHARSET ("symbol", symbol);
|
||||||
define_charset ("char-set:whitespace", &cs_whitespace);
|
scm_char_set_hex_digit = DEFINE_CHARSET ("hex-digit", hex_digit);
|
||||||
scm_char_set_iso_control =
|
scm_char_set_blank = DEFINE_CHARSET ("blank", blank);
|
||||||
define_charset ("char-set:iso-control", &cs_iso_control);
|
scm_char_set_ascii = DEFINE_CHARSET ("ascii", ascii);
|
||||||
scm_char_set_punctuation =
|
scm_char_set_empty = DEFINE_CHARSET ("empty", empty);
|
||||||
define_charset ("char-set:punctuation", &cs_punctuation);
|
scm_char_set_designated = DEFINE_CHARSET ("designated", designated);
|
||||||
scm_char_set_symbol = define_charset ("char-set:symbol", &cs_symbol);
|
scm_char_set_full = DEFINE_CHARSET ("full", full);
|
||||||
scm_char_set_hex_digit =
|
|
||||||
define_charset ("char-set:hex-digit", &cs_hex_digit);
|
|
||||||
scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
|
|
||||||
scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
|
|
||||||
scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
|
|
||||||
scm_char_set_designated = define_charset ("char-set:designated", &cs_designated);
|
|
||||||
scm_char_set_full = define_charset ("char-set:full", &cs_full);
|
|
||||||
|
|
||||||
#include "srfi-14.x"
|
#include "srfi-14.x"
|
||||||
}
|
}
|
||||||
|
|
15051
libguile/srfi-14.i.c
15051
libguile/srfi-14.i.c
File diff suppressed because it is too large
Load diff
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/perl
|
||||||
# unidata_to_charset.pl --- Compute SRFI-14 charsets from UnicodeData.txt
|
# unidata_to_charset.pl --- Compute SRFI-14 charsets from UnicodeData.txt
|
||||||
#
|
#
|
||||||
# Copyright (C) 2009, 2010 Free Software Foundation, Inc.
|
# Copyright (C) 2009, 2010, 2022 Free Software Foundation, Inc.
|
||||||
#
|
#
|
||||||
# This library is free software; you can redistribute it and/or
|
# This library is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU Lesser General Public
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ -347,7 +347,7 @@ sub compute {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Print the C struct that contains the range list.
|
# Print the C struct that contains the range list.
|
||||||
print $out "scm_t_char_range cs_" . $f . "_ranges[] = {\n";
|
print $out "static const scm_t_char_range cs_" . $f . "_ranges[] = {\n";
|
||||||
if ($rstart[0] != -1) {
|
if ($rstart[0] != -1) {
|
||||||
for (my $i=0; $i<@rstart-1; $i++) {
|
for (my $i=0; $i<@rstart-1; $i++) {
|
||||||
printf $out " {0x%04x, 0x%04x},\n", $rstart[$i], $rend[$i];
|
printf $out " {0x%04x, 0x%04x},\n", $rstart[$i], $rend[$i];
|
||||||
|
@ -358,10 +358,7 @@ sub compute {
|
||||||
|
|
||||||
# Print the C struct that contains the range list length and
|
# Print the C struct that contains the range list length and
|
||||||
# pointer to the range list.
|
# pointer to the range list.
|
||||||
print $out "scm_t_char_set cs_${f} = {\n";
|
print $out "static const size_t cs_${f}_len = $len;\n\n";
|
||||||
print $out " $len,\n";
|
|
||||||
print $out " cs_" . $f . "_ranges\n";
|
|
||||||
print $out "};\n\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Write a bit of a header
|
# Write a bit of a header
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue