Surrogate characters shouldn't be in charsets

* libguile/srfi-14.c (charsets_complement): use surrogate #defines instead of hardcoded numbers * libguile/srfi-14.i.c (cs_full_ranges): remove surrogates from full charset * libguile/unidata_to_charset.pl (full): test for surrogates
2025-07-12 12:10:30 +02:00 · 2009-08-28 23:47:42 -07:00 · 2009-08-28 23:47:42 -07:00 · 24d23822ee
commit 24d23822ee
parent 526ee76ac3
3 changed files with 13 additions and 7 deletions
--- a/libguile/srfi-14.c
+++ b/libguile/srfi-14.c
@ -29,6 +29,7 @@
 #include "libguile.h"
 #include "libguile/srfi-14.h"
 #include "libguile/strings.h"
 #include "libguile/chars.h"
 /* Include the pre-computed standard charset data.  */
 #include "libguile/srfi-14.i.c"
@ -386,8 +387,8 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
      p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
                                 "character-set");
      p->ranges[0].lo = 0;
-      p->ranges[0].hi = 0xd7ff;
+      p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
-      p->ranges[1].lo = 0xe000;
+      p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
      p->ranges[1].hi = SCM_CODEPOINT_MAX;
      return;
    }
--- a/libguile/srfi-14.i.c
+++ b/libguile/srfi-14.i.c
@ -2,7 +2,8 @@
 /* This file is #include'd by srfi-14.c.  */
-/* This file was generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt
+/* This file was generated from
   http://unicode.org/Public/UNIDATA/UnicodeData.txt
   with the unidata_to_charset.pl script.  */
 scm_t_char_range cs_lower_case_ranges[] = {
@ -6925,7 +6926,7 @@ scm_t_char_range cs_full_ranges[] = {
  ,
  {0xac00, 0xd7a3}
  ,
-  {0xd800, 0xfa2d}
+  {0xe000, 0xfa2d}
  ,
  {0xfa30, 0xfa6a}
  ,
--- a/libguile/unidata_to_charset.pl
+++ b/libguile/unidata_to_charset.pl
@ -254,11 +254,15 @@ sub empty {
    return 0;
 }
-# Full -- All characters.  
+# Full -- All characters except for the surrogates
 sub full {
    my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
    if ($category =~ (/Cs/)) {
        return 0;
    } else {
        return 1;
    }
 }
 # The procedure generates the two C structures necessary to describe a
@ -362,7 +366,7 @@ sub compute {
 # Write a bit of a header
 print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
 print $out "/* This file is #include'd by srfi-14.c.  */\n\n";
-print $out "/* This file was generated from\n"
+print $out "/* This file was generated from\n";
 print $out "   http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
 print $out "   with the unidata_to_charset.pl script.  */\n\n";