1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

Surrogate characters shouldn't be in charsets

* libguile/srfi-14.c (charsets_complement): use surrogate #defines instead
  of hardcoded numbers

* libguile/srfi-14.i.c (cs_full_ranges): remove surrogates from full
  charset

* libguile/unidata_to_charset.pl (full): test for surrogates
This commit is contained in:
Michael Gran 2009-08-28 23:47:42 -07:00
parent 526ee76ac3
commit 24d23822ee
3 changed files with 13 additions and 7 deletions

View file

@ -29,6 +29,7 @@
#include "libguile.h"
#include "libguile/srfi-14.h"
#include "libguile/strings.h"
#include "libguile/chars.h"
/* Include the pre-computed standard charset data. */
#include "libguile/srfi-14.i.c"
@ -386,8 +387,8 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
"character-set");
p->ranges[0].lo = 0;
p->ranges[0].hi = 0xd7ff;
p->ranges[1].lo = 0xe000;
p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
p->ranges[1].hi = SCM_CODEPOINT_MAX;
return;
}

View file

@ -2,7 +2,8 @@
/* This file is #include'd by srfi-14.c. */
/* This file was generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt
/* This file was generated from
http://unicode.org/Public/UNIDATA/UnicodeData.txt
with the unidata_to_charset.pl script. */
scm_t_char_range cs_lower_case_ranges[] = {
@ -6925,7 +6926,7 @@ scm_t_char_range cs_full_ranges[] = {
,
{0xac00, 0xd7a3}
,
{0xd800, 0xfa2d}
{0xe000, 0xfa2d}
,
{0xfa30, 0xfa6a}
,

View file

@ -254,10 +254,14 @@ sub empty {
return 0;
}
# Full -- All characters.
# Full -- All characters except for the surrogates
sub full {
my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
return 1;
if ($category =~ (/Cs/)) {
return 0;
} else {
return 1;
}
}
@ -362,7 +366,7 @@ sub compute {
# Write a bit of a header
print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
print $out "/* This file is #include'd by srfi-14.c. */\n\n";
print $out "/* This file was generated from\n"
print $out "/* This file was generated from\n";
print $out " http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
print $out " with the unidata_to_charset.pl script. */\n\n";