mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-02 13:00:26 +02:00
The charset complement operator should not include surrogates
* libguile/srfi-14.c (charsets_complement): skip over surrogates when making a charset complement
This commit is contained in:
parent
bde543e88b
commit
f4cdfe6140
1 changed files with 34 additions and 12 deletions
|
@ -471,22 +471,35 @@ charsets_intersection (scm_t_char_set *a, scm_t_char_set *b)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define SCM_ADD_RANGE(low, high) \
|
||||||
|
do { \
|
||||||
|
p->ranges[idx].lo = (low); \
|
||||||
|
p->ranges[idx++].hi = (high); \
|
||||||
|
} while (0)
|
||||||
|
#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high) \
|
||||||
|
do { \
|
||||||
|
p->ranges[idx].lo = (low); \
|
||||||
|
p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1; \
|
||||||
|
p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1; \
|
||||||
|
p->ranges[idx++].hi = (high); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Make P the compelement of Q. */
|
/* Make P the compelement of Q. */
|
||||||
static void
|
static void
|
||||||
charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
|
charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
|
||||||
{
|
{
|
||||||
int k, idx;
|
int k, idx;
|
||||||
|
|
||||||
|
idx = 0;
|
||||||
if (q->len == 0)
|
if (q->len == 0)
|
||||||
{
|
{
|
||||||
/* Fill with all valid codepoints. */
|
/* Fill with all valid codepoints. */
|
||||||
p->len = 2;
|
p->len = 2;
|
||||||
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
|
p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
|
||||||
"character-set");
|
"character-set");
|
||||||
p->ranges[0].lo = 0;
|
SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
|
||||||
p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
|
|
||||||
p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
|
|
||||||
p->ranges[1].hi = SCM_CODEPOINT_MAX;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -494,33 +507,42 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
|
||||||
scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
|
scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
|
||||||
"character-set");
|
"character-set");
|
||||||
|
|
||||||
|
/* Count the number of ranges needed for the output. */
|
||||||
p->len = 0;
|
p->len = 0;
|
||||||
if (q->ranges[0].lo > 0)
|
if (q->ranges[0].lo > 0)
|
||||||
p->len++;
|
p->len++;
|
||||||
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
|
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
|
||||||
p->len++;
|
p->len++;
|
||||||
p->len += q->len - 1;
|
p->len += q->len;
|
||||||
p->ranges =
|
p->ranges =
|
||||||
(scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
|
(scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
|
||||||
"character-set");
|
"character-set");
|
||||||
idx = 0;
|
|
||||||
if (q->ranges[0].lo > 0)
|
if (q->ranges[0].lo > 0)
|
||||||
{
|
{
|
||||||
p->ranges[idx].lo = 0;
|
if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
|
||||||
p->ranges[idx++].hi = q->ranges[0].lo - 1;
|
SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
|
||||||
|
else
|
||||||
|
SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
|
||||||
}
|
}
|
||||||
for (k = 1; k < q->len; k++)
|
for (k = 1; k < q->len; k++)
|
||||||
{
|
{
|
||||||
p->ranges[idx].lo = q->ranges[k - 1].hi + 1;
|
if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
|
||||||
p->ranges[idx++].hi = q->ranges[k].lo - 1;
|
&& q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
|
||||||
|
SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
|
||||||
|
else
|
||||||
|
SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
|
||||||
}
|
}
|
||||||
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
|
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
|
||||||
{
|
{
|
||||||
p->ranges[idx].lo = q->ranges[q->len - 1].hi + 1;
|
if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
|
||||||
p->ranges[idx].hi = SCM_CODEPOINT_MAX;
|
SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
|
||||||
|
else
|
||||||
|
SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#undef SCM_ADD_RANGE
|
||||||
|
#undef SCM_ADD_RANGE_SKIP_SURROGATES
|
||||||
|
|
||||||
/* Replace A with elements only found in one of A or B. */
|
/* Replace A with elements only found in one of A or B. */
|
||||||
static void
|
static void
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue