mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 04:10:18 +02:00
Surrogate characters shouldn't be in charsets
* libguile/srfi-14.c (charsets_complement): use surrogate #defines instead of hardcoded numbers * libguile/srfi-14.i.c (cs_full_ranges): remove surrogates from full charset * libguile/unidata_to_charset.pl (full): test for surrogates
This commit is contained in:
parent
526ee76ac3
commit
24d23822ee
3 changed files with 13 additions and 7 deletions
|
@ -29,6 +29,7 @@
|
||||||
#include "libguile.h"
|
#include "libguile.h"
|
||||||
#include "libguile/srfi-14.h"
|
#include "libguile/srfi-14.h"
|
||||||
#include "libguile/strings.h"
|
#include "libguile/strings.h"
|
||||||
|
#include "libguile/chars.h"
|
||||||
|
|
||||||
/* Include the pre-computed standard charset data. */
|
/* Include the pre-computed standard charset data. */
|
||||||
#include "libguile/srfi-14.i.c"
|
#include "libguile/srfi-14.i.c"
|
||||||
|
@ -386,8 +387,8 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
|
||||||
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
|
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
|
||||||
"character-set");
|
"character-set");
|
||||||
p->ranges[0].lo = 0;
|
p->ranges[0].lo = 0;
|
||||||
p->ranges[0].hi = 0xd7ff;
|
p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
|
||||||
p->ranges[1].lo = 0xe000;
|
p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
|
||||||
p->ranges[1].hi = SCM_CODEPOINT_MAX;
|
p->ranges[1].hi = SCM_CODEPOINT_MAX;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
|
|
||||||
/* This file is #include'd by srfi-14.c. */
|
/* This file is #include'd by srfi-14.c. */
|
||||||
|
|
||||||
/* This file was generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt
|
/* This file was generated from
|
||||||
|
http://unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||||
with the unidata_to_charset.pl script. */
|
with the unidata_to_charset.pl script. */
|
||||||
|
|
||||||
scm_t_char_range cs_lower_case_ranges[] = {
|
scm_t_char_range cs_lower_case_ranges[] = {
|
||||||
|
@ -6925,7 +6926,7 @@ scm_t_char_range cs_full_ranges[] = {
|
||||||
,
|
,
|
||||||
{0xac00, 0xd7a3}
|
{0xac00, 0xd7a3}
|
||||||
,
|
,
|
||||||
{0xd800, 0xfa2d}
|
{0xe000, 0xfa2d}
|
||||||
,
|
,
|
||||||
{0xfa30, 0xfa6a}
|
{0xfa30, 0xfa6a}
|
||||||
,
|
,
|
||||||
|
|
|
@ -254,11 +254,15 @@ sub empty {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Full -- All characters.
|
# Full -- All characters except for the surrogates
|
||||||
sub full {
|
sub full {
|
||||||
my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
|
my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
|
||||||
|
if ($category =~ (/Cs/)) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# The procedure generates the two C structures necessary to describe a
|
# The procedure generates the two C structures necessary to describe a
|
||||||
|
@ -362,7 +366,7 @@ sub compute {
|
||||||
# Write a bit of a header
|
# Write a bit of a header
|
||||||
print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
|
print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
|
||||||
print $out "/* This file is #include'd by srfi-14.c. */\n\n";
|
print $out "/* This file is #include'd by srfi-14.c. */\n\n";
|
||||||
print $out "/* This file was generated from\n"
|
print $out "/* This file was generated from\n";
|
||||||
print $out " http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
|
print $out " http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
|
||||||
print $out " with the unidata_to_charset.pl script. */\n\n";
|
print $out " with the unidata_to_charset.pl script. */\n\n";
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue