diff --git a/NEWS b/NEWS index 4797b0cb7..97b55e99c 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,12 @@ prerelease, and a full NEWS corresponding to 1.8 -> 2.0.) Changes in 1.9.3 (since the 1.9.2 prerelease): +** Character functions operate on Unicode characters + +char-upcase and char-downcase use default Unicode casing rules. +Character comparisons such as char? @deffn {Scheme Procedure} char>? x y -Return @code{#t} iff @var{x} is greater than @var{y} in the @acronym{ASCII} -sequence, else @code{#f}. +Return @code{#t} iff the code point of @var{x} is greater than the +code point of @var{y}, else @code{#f}. @end deffn @rnindex char>=? @deffn {Scheme Procedure} char>=? x y -Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the -@acronym{ASCII} sequence, else @code{#f}. +Return @code{#t} iff the code point of @var{x} is greater than or +equal to the code point of @var{y}, else @code{#f}. @end deffn +Case-insensitive character comparisons of characters use @emph{Unicode +case folding}. In case folding comparisons, if a character is +lowercase and has an uppercase form that can be expressed as a single +character, it is converted to uppercase before comparison. Unicode +case folding is language independent: it uses rules that are generally +true, but, it cannot cover all cases for all languages. + @rnindex char-ci=? @deffn {Scheme Procedure} char-ci=? x y -Return @code{#t} iff @var{x} is the same character as @var{y} ignoring -case, else @code{#f}. +Return @code{#t} iff the case-folded code point of @var{x} is the same +as the case-folded code point of @var{y}, else @code{#f}. @end deffn @rnindex char-ci? @deffn {Scheme Procedure} char-ci>? x y -Return @code{#t} iff @var{x} is greater than @var{y} in the @acronym{ASCII} -sequence ignoring case, else @code{#f}. +Return @code{#t} iff the case-folded code point of @var{x} is greater +than the case-folded code point of @var{y}, else @code{#f}. @end deffn @rnindex char-ci>=? @deffn {Scheme Procedure} char-ci>=? x y -Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the -@acronym{ASCII} sequence ignoring case, else @code{#f}. +Return @code{#t} iff the case-folded code point of @var{x} is greater +than or equal to the case-folded code point of @var{y}, else +@code{#f}. @end deffn @rnindex char-alphabetic? @@ -1946,14 +1982,15 @@ Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @rnindex char->integer @deffn {Scheme Procedure} char->integer chr @deffnx {C Function} scm_char_to_integer (chr) -Return the number corresponding to ordinal position of @var{chr} in the -@acronym{ASCII} sequence. +Return the code point of @var{chr}. @end deffn @rnindex integer->char @deffn {Scheme Procedure} integer->char n @deffnx {C Function} scm_integer_to_char (n) -Return the character at position @var{n} in the @acronym{ASCII} sequence. +Return the character that has code point @var{n}. The integer @var{n} +must be a valid code point. Valid code points are in the ranges 0 to +@code{#xD7FF} inclusive or @code{#xE000} to @code{#x10FFFF} inclusive. @end deffn @rnindex char-upcase diff --git a/libguile/chars.c b/libguile/chars.c index c7cb09c47..c2feaa69e 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -45,7 +45,8 @@ SCM_DEFINE (scm_char_p, "char?", 1, 0, 0, SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is the same character as @var{y}, else @code{#f}.") + "Return @code{#t} iff code point of @var{x} is equal to the code point\n" + "of @var{y}, else @code{#f}.\n") #define FUNC_NAME s_scm_char_eq_p { SCM_VALIDATE_CHAR (1, x); @@ -57,8 +58,8 @@ SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_less_p, "char?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n" - "sequence, else @code{#f}.") + "Return @code{#t} iff the code point of @var{x} is greater than the\n" + "code point of @var{y}, else @code{#f}.") #define FUNC_NAME s_scm_char_gr_p { SCM_VALIDATE_CHAR (1, x); @@ -93,8 +94,8 @@ SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n" - "Unicode sequence, else @code{#f}.") + "Return @code{#t} iff the code point of @var{x} is greater than or\n" + "equal to the code point of @var{y}, else @code{#f}.") #define FUNC_NAME s_scm_char_geq_p { SCM_VALIDATE_CHAR (1, x); @@ -103,10 +104,17 @@ SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr, } #undef FUNC_NAME +/* FIXME?: R6RS specifies that these comparisons are case-folded. + This is the same thing as comparing the uppercase characters in + practice, but, not in theory. Unicode has table containing their + definition of case-folded character mappings. A more correct + implementation would be to use that table and make a char-foldcase + function. */ + SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n" - "case, else @code{#f}. Case is locale free and not context sensitive.") + "Return @code{#t} iff the case-folded code point of @var{x} is the same\n" + "as the case-folded code point of @var{y}, else @code{#f}.") #define FUNC_NAME s_scm_char_ci_eq_p { SCM_VALIDATE_CHAR (1, x); @@ -117,9 +125,8 @@ SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_ci_less_p, "char-ci?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n" - "than the Unicode uppercase form of @var{y} in the Unicode\n" - "sequence, else @code{#f}.") + "Return @code{#t} iff the case-folded code point of @var{x} is greater\n" + "than the case-folded code point of @var{y}, else @code{#f}.") #define FUNC_NAME s_scm_char_ci_gr_p { SCM_VALIDATE_CHAR (1, x); @@ -156,9 +162,9 @@ SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n" - "than or equal to the Unicode uppercase form of @var{y} in the\n" - "Unicode sequence, else @code{#f}.") + "Return @code{#t} iff the case-folded code point of @var{x} is greater\n" + "than or equal to the case-folded code point of @var{y}, else\n" + "@code{#f}.") #define FUNC_NAME s_scm_char_ci_geq_p { SCM_VALIDATE_CHAR (1, x); @@ -196,7 +202,6 @@ SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 0, 0, #undef FUNC_NAME - SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0, (SCM chr), "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n") @@ -217,7 +222,6 @@ SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0, #undef FUNC_NAME - SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0, (SCM chr), "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}.\n") @@ -230,12 +234,9 @@ SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0, #undef FUNC_NAME - - SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0, (SCM chr), - "Return the number corresponding to ordinal position of @var{chr} in the\n" - "ASCII sequence.") + "Return the code point of @var{chr}.") #define FUNC_NAME s_scm_char_to_integer { SCM_VALIDATE_CHAR (1, chr); @@ -244,10 +245,11 @@ SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0, #undef FUNC_NAME - SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0, (SCM n), - "Return the character at position @var{n} in the ASCII sequence.") + "Return the character that has code point @var{n}. The integer @var{n}\n" + "must be a valid code point. Valid code points are in the ranges 0 to\n" + "@code{#xD7FF} inclusive or @code{#xE000} to @code{#x10FFFF} inclusive.") #define FUNC_NAME s_scm_integer_to_char { scm_t_wchar cn;