1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 04:10:18 +02:00

Surrogate characters shouldn't be in charsets

* libguile/srfi-14.c (charsets_complement): use surrogate #defines instead
  of hardcoded numbers

* libguile/srfi-14.i.c (cs_full_ranges): remove surrogates from full
  charset

* libguile/unidata_to_charset.pl (full): test for surrogates
This commit is contained in:
Michael Gran 2009-08-28 23:47:42 -07:00
parent 526ee76ac3
commit 24d23822ee
3 changed files with 13 additions and 7 deletions

View file

@ -29,6 +29,7 @@
#include "libguile.h" #include "libguile.h"
#include "libguile/srfi-14.h" #include "libguile/srfi-14.h"
#include "libguile/strings.h" #include "libguile/strings.h"
#include "libguile/chars.h"
/* Include the pre-computed standard charset data. */ /* Include the pre-computed standard charset data. */
#include "libguile/srfi-14.i.c" #include "libguile/srfi-14.i.c"
@ -386,8 +387,8 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2, p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
"character-set"); "character-set");
p->ranges[0].lo = 0; p->ranges[0].lo = 0;
p->ranges[0].hi = 0xd7ff; p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
p->ranges[1].lo = 0xe000; p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
p->ranges[1].hi = SCM_CODEPOINT_MAX; p->ranges[1].hi = SCM_CODEPOINT_MAX;
return; return;
} }

View file

@ -2,7 +2,8 @@
/* This file is #include'd by srfi-14.c. */ /* This file is #include'd by srfi-14.c. */
/* This file was generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt /* This file was generated from
http://unicode.org/Public/UNIDATA/UnicodeData.txt
with the unidata_to_charset.pl script. */ with the unidata_to_charset.pl script. */
scm_t_char_range cs_lower_case_ranges[] = { scm_t_char_range cs_lower_case_ranges[] = {
@ -6925,7 +6926,7 @@ scm_t_char_range cs_full_ranges[] = {
, ,
{0xac00, 0xd7a3} {0xac00, 0xd7a3}
, ,
{0xd800, 0xfa2d} {0xe000, 0xfa2d}
, ,
{0xfa30, 0xfa6a} {0xfa30, 0xfa6a}
, ,

View file

@ -254,11 +254,15 @@ sub empty {
return 0; return 0;
} }
# Full -- All characters. # Full -- All characters except for the surrogates
sub full { sub full {
my($codepoint, $name, $category, $uppercase, $lowercase)= @_; my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
if ($category =~ (/Cs/)) {
return 0;
} else {
return 1; return 1;
} }
}
# The procedure generates the two C structures necessary to describe a # The procedure generates the two C structures necessary to describe a
@ -362,7 +366,7 @@ sub compute {
# Write a bit of a header # Write a bit of a header
print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n"; print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
print $out "/* This file is #include'd by srfi-14.c. */\n\n"; print $out "/* This file is #include'd by srfi-14.c. */\n\n";
print $out "/* This file was generated from\n" print $out "/* This file was generated from\n";
print $out " http://unicode.org/Public/UNIDATA/UnicodeData.txt\n"; print $out " http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
print $out " with the unidata_to_charset.pl script. */\n\n"; print $out " with the unidata_to_charset.pl script. */\n\n";