1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 20:00:19 +02:00

Improved support for Unicode title case in Guile's string and character APIs.

* doc/ref/api-data.texi (Characters): Documentation for `char-titlecase'.
* doc/ref/api-i18n.texi (Character Case Mapping): Documentation for
  `char-locale-titlecase' and `string-locale-titlecase'.

* libguile/chars.c, libguile/chars.h (scm_char_titlecase, scm_c_titlecase): New
  functions.

* libguile/i18n.c, libguile/i18n.h (chr_to_case, scm_char_locale_titlecase,
  str_to_case, scm_string_locale_titlecase): New functions.
* libguile/i18n.c (scm_char_locale_downcase, scm_char_locale_upcase,
  scm_string_locale_downcase, scm_string_locale_upcase): Refactor to share code
  via chr_to_case and str_to_case, as appropriate.
* module/ice-9/i18n.scm (char-locale-title-case, string-locale-titlecase): New
  functions.

* libguile/srfi-13.c (string_titlecase_x): Use uc_totitle instead of uc_toupper.

* test-suite/tests/chars.test: Tests for `char-titlecase'.
* test-suite/tests/i18n.test: Tests for `char-locale-titlecase' and
  `string-locale-titlecase'.
* test-suite/tests/srfi-13.test: Tests for `string-titlecase'.
This commit is contained in:
Julian Graham 2009-12-22 00:19:56 -05:00
parent 9b5a0d8460
commit 820f33aaed
11 changed files with 227 additions and 103 deletions

View file

@ -1901,6 +1901,19 @@ Return the uppercase character version of @var{chr}.
Return the lowercase character version of @var{chr}.
@end deffn
@rnindex char-titlecase
@deffn {Scheme Procedure} char-titlecase chr
@deffnx {C Function} scm_char_titlecase (chr)
Return the titlecase character version of @var{chr} if one exists;
otherwise return the uppercase version.
For most characters these will be the same, but the Unicode Standard
includes certain digraph compatibility characters, such as @code{U+01F3}
``dz'', for which the uppercase and titlecase characters are different
(@code{U+01F1} ``DZ'' and @code{U+01F2} ``Dz'' in this case,
respectively).
@end deffn
@node Character Sets
@subsection Character Sets

View file

@ -197,6 +197,12 @@ Return the uppercase character that corresponds to @var{chr} according
to either @var{locale} or the current locale.
@end deffn
@deffn {Scheme Procedure} char-locale-titlecase chr [locale]
@deffnx {C Function} scm_char_locale_titlecase (chr, locale)
Return the titlecase character that corresponds to @var{chr} according
to either @var{locale} or the current locale.
@end deffn
@deffn {Scheme Procedure} string-locale-upcase str [locale]
@deffnx {C Function} scm_string_locale_upcase (str, locale)
Return a new string that is the uppercase version of @var{str}
@ -209,6 +215,12 @@ Return a new string that is the down-case version of @var{str}
according to either @var{locale} or the current locale.
@end deffn
@deffn {Scheme Procedure} string-locale-titlecase str [locale]
@deffnx {C Function} scm_string_locale_titlecase (str, locale)
Return a new string that is the titlecase version of @var{str}
according to either @var{locale} or the current locale.
@end deffn
Note that in the current implementation Guile has no notion of
multibyte characters and in a multibyte locale characters may not be
converted correctly.

View file

@ -391,7 +391,6 @@ SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0,
}
#undef FUNC_NAME
SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
(SCM chr),
"Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n"
@ -458,6 +457,16 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
}
#undef FUNC_NAME
SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
(SCM chr),
"Return the titlecase character version of @var{chr}.")
#define FUNC_NAME s_scm_char_titlecase
{
SCM_VALIDATE_CHAR (1, chr);
return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr)));
}
#undef FUNC_NAME
@ -480,6 +489,12 @@ scm_c_downcase (scm_t_wchar c)
return uc_tolower ((int) c);
}
scm_t_wchar
scm_c_titlecase (scm_t_wchar c)
{
return uc_totitle ((int) c);
}
/* There are a few sets of character names: R5RS, Guile

View file

@ -80,8 +80,10 @@ SCM_API SCM scm_char_to_integer (SCM chr);
SCM_API SCM scm_integer_to_char (SCM n);
SCM_API SCM scm_char_upcase (SCM chr);
SCM_API SCM scm_char_downcase (SCM chr);
SCM_API SCM scm_char_titlecase (SCM chr);
SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c);
SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c);
SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c);
SCM_INTERNAL const char *scm_i_charname (SCM chr);
SCM_INTERNAL SCM scm_i_charname_to_char (const char *charname,
size_t charname_len);

View file

@ -1098,22 +1098,17 @@ u32_locale_tocase (const scm_t_uint32 *c_s1, size_t len,
}
SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0,
(SCM chr, SCM locale),
"Return the lowercase character that corresponds to @var{chr} "
"according to either @var{locale} or the current locale.")
#define FUNC_NAME s_scm_char_locale_downcase
static SCM
chr_to_case (SCM chr, scm_t_locale c_locale,
scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *,
uninorm_t, scm_t_uint32 *, size_t *),
int *err)
{
int ret;
scm_t_locale c_locale;
scm_t_wchar *buf;
scm_t_uint32 *downbuf;
size_t downlen;
SCM str, downchar;
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
scm_t_uint32 *convbuf;
size_t convlen;
SCM str, convchar;
str = scm_i_make_wide_string (1, &buf);
buf[0] = SCM_CHAR (chr);
@ -1121,26 +1116,49 @@ SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0,
if (c_locale != NULL)
RUN_IN_LOCALE_SECTION (c_locale, ret =
u32_locale_tocase ((scm_t_uint32 *) buf, 1,
&downbuf,
&downlen, u32_tolower));
&convbuf,
&convlen, func));
else
ret =
u32_locale_tocase ((scm_t_uint32 *) buf, 1, &downbuf,
&downlen, u32_tolower);
u32_locale_tocase ((scm_t_uint32 *) buf, 1, &convbuf,
&convlen, func);
if (SCM_UNLIKELY (ret != 0))
{
errno = ret;
scm_syserror (FUNC_NAME);
*err = ret;
return NULL;
}
if (downlen == 1)
downchar = SCM_MAKE_CHAR ((scm_t_wchar) downbuf[0]);
if (convlen == 1)
convchar = SCM_MAKE_CHAR ((scm_t_wchar) convbuf[0]);
else
downchar = chr;
free (downbuf);
convchar = chr;
free (convbuf);
return downchar;
return convchar;
}
SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0,
(SCM chr, SCM locale),
"Return the lowercase character that corresponds to @var{chr} "
"according to either @var{locale} or the current locale.")
#define FUNC_NAME s_scm_char_locale_downcase
{
scm_t_locale c_locale;
SCM ret;
int err = 0;
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
ret = chr_to_case (chr, c_locale, u32_tolower, &err);
if (err != 0)
{
errno = err;
scm_syserror (FUNC_NAME);
}
return ret;
}
#undef FUNC_NAME
@ -1150,59 +1168,60 @@ SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0,
"according to either @var{locale} or the current locale.")
#define FUNC_NAME s_scm_char_locale_upcase
{
int ret;
scm_t_locale c_locale;
scm_t_wchar *buf;
scm_t_uint32 *upbuf;
size_t uplen;
SCM str, upchar;
SCM ret;
int err = 0;
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
str = scm_i_make_wide_string (1, &buf);
buf[0] = SCM_CHAR (chr);
ret = chr_to_case (chr, c_locale, u32_toupper, &err);
if (c_locale != NULL)
RUN_IN_LOCALE_SECTION (c_locale, ret =
u32_locale_tocase ((scm_t_uint32 *) buf, 1,
&upbuf,
&uplen, u32_toupper));
else
ret =
u32_locale_tocase ((scm_t_uint32 *) buf, 1, &upbuf,
&uplen, u32_toupper);
if (SCM_UNLIKELY (ret != 0))
if (err != 0)
{
errno = ret;
errno = err;
scm_syserror (FUNC_NAME);
}
if (uplen == 1)
upchar = SCM_MAKE_CHAR ((scm_t_wchar) upbuf[0]);
else
upchar = chr;
free (upbuf);
return upchar;
return ret;
}
#undef FUNC_NAME
SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0,
(SCM str, SCM locale),
"Return a new string that is the uppercase version of "
"@var{str} according to either @var{locale} or the current "
"locale.")
#define FUNC_NAME s_scm_string_locale_upcase
SCM_DEFINE (scm_char_locale_titlecase, "char-locale-titlecase", 1, 1, 0,
(SCM chr, SCM locale),
"Return the titlecase character that corresponds to @var{chr} "
"according to either @var{locale} or the current locale.")
#define FUNC_NAME s_scm_char_locale_titlecase
{
scm_t_locale c_locale;
SCM ret;
int err = 0;
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
ret = chr_to_case (chr, c_locale, u32_totitle, &err);
if (err != 0)
{
errno = err;
scm_syserror (FUNC_NAME);
}
return ret;
}
#undef FUNC_NAME
static SCM
str_to_case (SCM str, scm_t_locale c_locale,
scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *,
uninorm_t, scm_t_uint32 *, size_t *),
int *err)
{
scm_t_wchar *c_str, *c_buf;
scm_t_uint32 *c_upstr;
size_t len, uplen;
scm_t_uint32 *c_convstr;
size_t len, convlen;
int ret;
scm_t_locale c_locale;
SCM upstr;
SCM convstr;
SCM_VALIDATE_STRING (1, str);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
len = scm_i_string_length (str);
if (len == 0)
return scm_nullstr;
@ -1211,28 +1230,52 @@ SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0,
if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale, ret =
u32_locale_tocase ((scm_t_uint32 *) c_str, len,
&c_upstr,
&uplen, u32_toupper));
&c_convstr,
&convlen, func));
else
ret =
u32_locale_tocase ((scm_t_uint32 *) c_str, len,
&c_upstr, &uplen, u32_toupper);
&c_convstr, &convlen, func);
scm_remember_upto_here (str);
if (SCM_UNLIKELY (ret != 0))
{
errno = ret;
scm_syserror (FUNC_NAME);
*err = ret;
return NULL;
}
upstr = scm_i_make_wide_string (uplen, &c_buf);
memcpy (c_buf, c_upstr, uplen * sizeof (scm_t_wchar));
free (c_upstr);
convstr = scm_i_make_wide_string (convlen, &c_buf);
memcpy (c_buf, c_convstr, convlen * sizeof (scm_t_wchar));
free (c_convstr);
scm_i_try_narrow_string (upstr);
scm_i_try_narrow_string (convstr);
return upstr;
return convstr;
}
SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0,
(SCM str, SCM locale),
"Return a new string that is the uppercase version of "
"@var{str} according to either @var{locale} or the current "
"locale.")
#define FUNC_NAME s_scm_string_locale_upcase
{
scm_t_locale c_locale;
SCM ret;
int err = 0;
SCM_VALIDATE_STRING (1, str);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
ret = str_to_case (str, c_locale, u32_toupper, &err);
if (err != 0)
{
errno = err;
scm_syserror (FUNC_NAME);
}
return ret;
}
#undef FUNC_NAME
@ -1243,45 +1286,46 @@ SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0,
"locale.")
#define FUNC_NAME s_scm_string_locale_downcase
{
scm_t_wchar *c_str, *c_buf;
scm_t_uint32 *c_downstr;
size_t len, downlen;
int ret;
scm_t_locale c_locale;
SCM downstr;
SCM ret;
int err = 0;
SCM_VALIDATE_STRING (1, str);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
len = scm_i_string_length (str);
if (len == 0)
return scm_nullstr;
SCM_STRING_TO_U32_BUF (str, c_str);
if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale, ret =
u32_locale_tocase ((scm_t_uint32 *) c_str, len,
&c_downstr,
&downlen, u32_tolower));
else
ret =
u32_locale_tocase ((scm_t_uint32 *) c_str, len,
&c_downstr, &downlen, u32_tolower);
ret = str_to_case (str, c_locale, u32_tolower, &err);
scm_remember_upto_here (str);
if (SCM_UNLIKELY (ret != 0))
if (err != 0)
{
errno = ret;
errno = err;
scm_syserror (FUNC_NAME);
}
return ret;
}
#undef FUNC_NAME
downstr = scm_i_make_wide_string (downlen, &c_buf);
memcpy (c_buf, c_downstr, downlen * sizeof (scm_t_wchar));
free (c_downstr);
SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0,
(SCM str, SCM locale),
"Return a new string that is the title-case version of "
"@var{str} according to either @var{locale} or the current "
"locale.")
#define FUNC_NAME s_scm_string_locale_titlecase
{
scm_t_locale c_locale;
SCM ret;
int err = 0;
scm_i_try_narrow_string (downstr);
SCM_VALIDATE_STRING (1, str);
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale);
return downstr;
ret = str_to_case (str, c_locale, u32_totitle, &err);
if (err != 0)
{
errno = err;
scm_syserror (FUNC_NAME);
}
return ret;
}
#undef FUNC_NAME

View file

@ -38,8 +38,10 @@ SCM_API SCM scm_char_locale_ci_gt (SCM c1, SCM c2, SCM locale);
SCM_API SCM scm_char_locale_ci_eq (SCM c1, SCM c2, SCM locale);
SCM_API SCM scm_char_locale_upcase (SCM chr, SCM locale);
SCM_API SCM scm_char_locale_downcase (SCM chr, SCM locale);
SCM_API SCM scm_char_locale_titlecase (SCM chr, SCM locale);
SCM_API SCM scm_string_locale_upcase (SCM chr, SCM locale);
SCM_API SCM scm_string_locale_downcase (SCM chr, SCM locale);
SCM_API SCM scm_string_locale_titlecase (SCM chr, SCM locale);
SCM_API SCM scm_locale_string_to_integer (SCM str, SCM base, SCM locale);
SCM_API SCM scm_locale_string_to_inexact (SCM str, SCM locale);

View file

@ -2198,7 +2198,7 @@ string_titlecase_x (SCM str, size_t start, size_t end)
{
if (!in_word)
{
scm_i_string_set_x (str, i, uc_toupper (SCM_CHAR (ch)));
scm_i_string_set_x (str, i, uc_totitle (SCM_CHAR (ch)));
in_word = 1;
}
else

View file

@ -42,8 +42,8 @@
char-locale-ci<? char-locale-ci>? char-locale-ci=?
;; character mapping
char-locale-downcase char-locale-upcase
string-locale-downcase string-locale-upcase
char-locale-downcase char-locale-upcase char-locale-titlecase
string-locale-downcase string-locale-upcase string-locale-titlecase
;; reading numbers
locale-string->integer locale-string->inexact

View file

@ -245,7 +245,11 @@
(eqv? (char-upcase #\a) #\A))
(pass-if "char-downcase"
(eqv? (char-downcase #\A) #\a)))
(eqv? (char-downcase #\A) #\a))
(pass-if "char-titlecase"
(and (eqv? (char-titlecase #\a) #\A)
(eqv? (char-titlecase #\763) #\762))))
(with-test-prefix "charnames"

View file

@ -202,6 +202,14 @@
(and (eq? #\Z (char-locale-upcase #\z))
(eq? #\Z (char-locale-upcase #\z (make-locale LC_ALL "C")))))
(pass-if "char-locale-titlecase"
(and (eq? #\T (char-locale-titlecase #\t))
(eq? #\T (char-locale-titlecase #\t (make-locale LC_ALL "C")))))
(pass-if "char-locale-titlecase Dž"
(and (eq? #\762 (char-locale-titlecase #\763))
(eq? #\762 (char-locale-titlecase #\763 (make-locale LC_ALL "C")))))
(pass-if "char-locale-upcase Turkish"
(under-turkish-utf8-locale-or-unresolved
(lambda ()
@ -229,6 +237,11 @@
(and (string=? "Z" (string-locale-upcase "z"))
(string=? "Z" (string-locale-upcase "z" (make-locale LC_ALL "C")))))
(pass-if "string-locale-titlecase"
(and (string=? "Hello, World" (string-locale-titlecase "hello, world"))
(string=? "Hello, World" (string-locale-titlecase
"hello, world" (make-locale LC_ALL "C")))))
(pass-if "string-locale-upcase Turkish"
(under-turkish-utf8-locale-or-unresolved
(lambda ()

View file

@ -147,6 +147,25 @@
(pass-if "more than one match, start and end index"
(string-any char-upper-case? "abCDE" 1 4))))
;;;
;;; string-titlecase
;;;
(with-test-prefix "string-titlecase"
(pass-if "all-lower"
(string=? "Foo" (string-titlecase "foo")))
(pass-if "all-upper"
(string=? "Foo" (string-titlecase "FOO")))
(pass-if "two-words"
(string=? "Hello, World!" (string-titlecase "hello, world!")))
(pass-if "titlecase-characters"
(string=? (list->string '(#\762))
(string-titlecase (list->string '(#\763))))))
;;;
;;; string-append/shared
;;;