diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 81f44de39..6721b12bc 100755 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -1901,6 +1901,19 @@ Return the uppercase character version of @var{chr}. Return the lowercase character version of @var{chr}. @end deffn +@rnindex char-titlecase +@deffn {Scheme Procedure} char-titlecase chr +@deffnx {C Function} scm_char_titlecase (chr) +Return the titlecase character version of @var{chr} if one exists; +otherwise return the uppercase version. + +For most characters these will be the same, but the Unicode Standard +includes certain digraph compatibility characters, such as @code{U+01F3} +``dz'', for which the uppercase and titlecase characters are different +(@code{U+01F1} ``DZ'' and @code{U+01F2} ``Dz'' in this case, +respectively). +@end deffn + @node Character Sets @subsection Character Sets diff --git a/doc/ref/api-i18n.texi b/doc/ref/api-i18n.texi index ee76544eb..b82a3a276 100644 --- a/doc/ref/api-i18n.texi +++ b/doc/ref/api-i18n.texi @@ -197,6 +197,12 @@ Return the uppercase character that corresponds to @var{chr} according to either @var{locale} or the current locale. @end deffn +@deffn {Scheme Procedure} char-locale-titlecase chr [locale] +@deffnx {C Function} scm_char_locale_titlecase (chr, locale) +Return the titlecase character that corresponds to @var{chr} according +to either @var{locale} or the current locale. +@end deffn + @deffn {Scheme Procedure} string-locale-upcase str [locale] @deffnx {C Function} scm_string_locale_upcase (str, locale) Return a new string that is the uppercase version of @var{str} @@ -209,6 +215,12 @@ Return a new string that is the down-case version of @var{str} according to either @var{locale} or the current locale. @end deffn +@deffn {Scheme Procedure} string-locale-titlecase str [locale] +@deffnx {C Function} scm_string_locale_titlecase (str, locale) +Return a new string that is the titlecase version of @var{str} +according to either @var{locale} or the current locale. +@end deffn + Note that in the current implementation Guile has no notion of multibyte characters and in a multibyte locale characters may not be converted correctly. diff --git a/libguile/chars.c b/libguile/chars.c index 68e6dc192..1c4d10609 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -391,7 +391,6 @@ SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 0, 0, } #undef FUNC_NAME - SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0, (SCM chr), "Return @code{#t} iff @var{chr} is either uppercase or lowercase, else\n" @@ -458,6 +457,16 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0, } #undef FUNC_NAME +SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0, + (SCM chr), + "Return the titlecase character version of @var{chr}.") +#define FUNC_NAME s_scm_char_titlecase +{ + SCM_VALIDATE_CHAR (1, chr); + return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr))); +} +#undef FUNC_NAME + @@ -480,6 +489,12 @@ scm_c_downcase (scm_t_wchar c) return uc_tolower ((int) c); } +scm_t_wchar +scm_c_titlecase (scm_t_wchar c) +{ + return uc_totitle ((int) c); +} + /* There are a few sets of character names: R5RS, Guile diff --git a/libguile/chars.h b/libguile/chars.h index 04eb9f09f..2b00645ad 100644 --- a/libguile/chars.h +++ b/libguile/chars.h @@ -80,8 +80,10 @@ SCM_API SCM scm_char_to_integer (SCM chr); SCM_API SCM scm_integer_to_char (SCM n); SCM_API SCM scm_char_upcase (SCM chr); SCM_API SCM scm_char_downcase (SCM chr); +SCM_API SCM scm_char_titlecase (SCM chr); SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c); +SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c); SCM_INTERNAL const char *scm_i_charname (SCM chr); SCM_INTERNAL SCM scm_i_charname_to_char (const char *charname, size_t charname_len); diff --git a/libguile/i18n.c b/libguile/i18n.c index 3a6cb0687..b689cafc3 100644 --- a/libguile/i18n.c +++ b/libguile/i18n.c @@ -1098,22 +1098,17 @@ u32_locale_tocase (const scm_t_uint32 *c_s1, size_t len, } - -SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0, - (SCM chr, SCM locale), - "Return the lowercase character that corresponds to @var{chr} " - "according to either @var{locale} or the current locale.") -#define FUNC_NAME s_scm_char_locale_downcase +static SCM +chr_to_case (SCM chr, scm_t_locale c_locale, + scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *, + uninorm_t, scm_t_uint32 *, size_t *), + int *err) { int ret; - scm_t_locale c_locale; scm_t_wchar *buf; - scm_t_uint32 *downbuf; - size_t downlen; - SCM str, downchar; - - SCM_VALIDATE_CHAR (1, chr); - SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); + scm_t_uint32 *convbuf; + size_t convlen; + SCM str, convchar; str = scm_i_make_wide_string (1, &buf); buf[0] = SCM_CHAR (chr); @@ -1121,26 +1116,49 @@ SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0, if (c_locale != NULL) RUN_IN_LOCALE_SECTION (c_locale, ret = u32_locale_tocase ((scm_t_uint32 *) buf, 1, - &downbuf, - &downlen, u32_tolower)); + &convbuf, + &convlen, func)); else ret = - u32_locale_tocase ((scm_t_uint32 *) buf, 1, &downbuf, - &downlen, u32_tolower); + u32_locale_tocase ((scm_t_uint32 *) buf, 1, &convbuf, + &convlen, func); if (SCM_UNLIKELY (ret != 0)) { - errno = ret; - scm_syserror (FUNC_NAME); + *err = ret; + return NULL; } - if (downlen == 1) - downchar = SCM_MAKE_CHAR ((scm_t_wchar) downbuf[0]); + if (convlen == 1) + convchar = SCM_MAKE_CHAR ((scm_t_wchar) convbuf[0]); else - downchar = chr; - free (downbuf); + convchar = chr; + free (convbuf); - return downchar; + return convchar; +} + +SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0, + (SCM chr, SCM locale), + "Return the lowercase character that corresponds to @var{chr} " + "according to either @var{locale} or the current locale.") +#define FUNC_NAME s_scm_char_locale_downcase +{ + scm_t_locale c_locale; + SCM ret; + int err = 0; + + SCM_VALIDATE_CHAR (1, chr); + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); + + ret = chr_to_case (chr, c_locale, u32_tolower, &err); + + if (err != 0) + { + errno = err; + scm_syserror (FUNC_NAME); + } + return ret; } #undef FUNC_NAME @@ -1150,59 +1168,60 @@ SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0, "according to either @var{locale} or the current locale.") #define FUNC_NAME s_scm_char_locale_upcase { - int ret; scm_t_locale c_locale; - scm_t_wchar *buf; - scm_t_uint32 *upbuf; - size_t uplen; - SCM str, upchar; + SCM ret; + int err = 0; SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - str = scm_i_make_wide_string (1, &buf); - buf[0] = SCM_CHAR (chr); + ret = chr_to_case (chr, c_locale, u32_toupper, &err); - if (c_locale != NULL) - RUN_IN_LOCALE_SECTION (c_locale, ret = - u32_locale_tocase ((scm_t_uint32 *) buf, 1, - &upbuf, - &uplen, u32_toupper)); - else - ret = - u32_locale_tocase ((scm_t_uint32 *) buf, 1, &upbuf, - &uplen, u32_toupper); - - if (SCM_UNLIKELY (ret != 0)) + if (err != 0) { - errno = ret; + errno = err; scm_syserror (FUNC_NAME); } - if (uplen == 1) - upchar = SCM_MAKE_CHAR ((scm_t_wchar) upbuf[0]); - else - upchar = chr; - free (upbuf); - return upchar; + return ret; } #undef FUNC_NAME -SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0, - (SCM str, SCM locale), - "Return a new string that is the uppercase version of " - "@var{str} according to either @var{locale} or the current " - "locale.") -#define FUNC_NAME s_scm_string_locale_upcase +SCM_DEFINE (scm_char_locale_titlecase, "char-locale-titlecase", 1, 1, 0, + (SCM chr, SCM locale), + "Return the titlecase character that corresponds to @var{chr} " + "according to either @var{locale} or the current locale.") +#define FUNC_NAME s_scm_char_locale_titlecase +{ + scm_t_locale c_locale; + SCM ret; + int err = 0; + + SCM_VALIDATE_CHAR (1, chr); + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); + + ret = chr_to_case (chr, c_locale, u32_totitle, &err); + + if (err != 0) + { + errno = err; + scm_syserror (FUNC_NAME); + } + return ret; +} +#undef FUNC_NAME + +static SCM +str_to_case (SCM str, scm_t_locale c_locale, + scm_t_uint32 *(*func) (const scm_t_uint32 *, size_t, const char *, + uninorm_t, scm_t_uint32 *, size_t *), + int *err) { scm_t_wchar *c_str, *c_buf; - scm_t_uint32 *c_upstr; - size_t len, uplen; + scm_t_uint32 *c_convstr; + size_t len, convlen; int ret; - scm_t_locale c_locale; - SCM upstr; + SCM convstr; - SCM_VALIDATE_STRING (1, str); - SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); len = scm_i_string_length (str); if (len == 0) return scm_nullstr; @@ -1211,28 +1230,52 @@ SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0, if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, ret = u32_locale_tocase ((scm_t_uint32 *) c_str, len, - &c_upstr, - &uplen, u32_toupper)); + &c_convstr, + &convlen, func)); else ret = u32_locale_tocase ((scm_t_uint32 *) c_str, len, - &c_upstr, &uplen, u32_toupper); + &c_convstr, &convlen, func); scm_remember_upto_here (str); if (SCM_UNLIKELY (ret != 0)) { - errno = ret; - scm_syserror (FUNC_NAME); + *err = ret; + return NULL; } - upstr = scm_i_make_wide_string (uplen, &c_buf); - memcpy (c_buf, c_upstr, uplen * sizeof (scm_t_wchar)); - free (c_upstr); + convstr = scm_i_make_wide_string (convlen, &c_buf); + memcpy (c_buf, c_convstr, convlen * sizeof (scm_t_wchar)); + free (c_convstr); - scm_i_try_narrow_string (upstr); + scm_i_try_narrow_string (convstr); - return upstr; + return convstr; +} + +SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0, + (SCM str, SCM locale), + "Return a new string that is the uppercase version of " + "@var{str} according to either @var{locale} or the current " + "locale.") +#define FUNC_NAME s_scm_string_locale_upcase +{ + scm_t_locale c_locale; + SCM ret; + int err = 0; + + SCM_VALIDATE_STRING (1, str); + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); + + ret = str_to_case (str, c_locale, u32_toupper, &err); + + if (err != 0) + { + errno = err; + scm_syserror (FUNC_NAME); + } + return ret; } #undef FUNC_NAME @@ -1243,45 +1286,46 @@ SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0, "locale.") #define FUNC_NAME s_scm_string_locale_downcase { - scm_t_wchar *c_str, *c_buf; - scm_t_uint32 *c_downstr; - size_t len, downlen; - int ret; scm_t_locale c_locale; - SCM downstr; + SCM ret; + int err = 0; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - len = scm_i_string_length (str); - if (len == 0) - return scm_nullstr; - SCM_STRING_TO_U32_BUF (str, c_str); - if (c_locale) - RUN_IN_LOCALE_SECTION (c_locale, ret = - u32_locale_tocase ((scm_t_uint32 *) c_str, len, - &c_downstr, - &downlen, u32_tolower)); - else - ret = - u32_locale_tocase ((scm_t_uint32 *) c_str, len, - &c_downstr, &downlen, u32_tolower); + ret = str_to_case (str, c_locale, u32_tolower, &err); - scm_remember_upto_here (str); - - if (SCM_UNLIKELY (ret != 0)) + if (err != 0) { - errno = ret; + errno = err; scm_syserror (FUNC_NAME); } + return ret; +} +#undef FUNC_NAME - downstr = scm_i_make_wide_string (downlen, &c_buf); - memcpy (c_buf, c_downstr, downlen * sizeof (scm_t_wchar)); - free (c_downstr); +SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0, + (SCM str, SCM locale), + "Return a new string that is the title-case version of " + "@var{str} according to either @var{locale} or the current " + "locale.") +#define FUNC_NAME s_scm_string_locale_titlecase +{ + scm_t_locale c_locale; + SCM ret; + int err = 0; - scm_i_try_narrow_string (downstr); + SCM_VALIDATE_STRING (1, str); + SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - return downstr; + ret = str_to_case (str, c_locale, u32_totitle, &err); + + if (err != 0) + { + errno = err; + scm_syserror (FUNC_NAME); + } + return ret; } #undef FUNC_NAME diff --git a/libguile/i18n.h b/libguile/i18n.h index 16045eb0c..c2792aca0 100644 --- a/libguile/i18n.h +++ b/libguile/i18n.h @@ -38,8 +38,10 @@ SCM_API SCM scm_char_locale_ci_gt (SCM c1, SCM c2, SCM locale); SCM_API SCM scm_char_locale_ci_eq (SCM c1, SCM c2, SCM locale); SCM_API SCM scm_char_locale_upcase (SCM chr, SCM locale); SCM_API SCM scm_char_locale_downcase (SCM chr, SCM locale); +SCM_API SCM scm_char_locale_titlecase (SCM chr, SCM locale); SCM_API SCM scm_string_locale_upcase (SCM chr, SCM locale); SCM_API SCM scm_string_locale_downcase (SCM chr, SCM locale); +SCM_API SCM scm_string_locale_titlecase (SCM chr, SCM locale); SCM_API SCM scm_locale_string_to_integer (SCM str, SCM base, SCM locale); SCM_API SCM scm_locale_string_to_inexact (SCM str, SCM locale); diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index cf2abfc70..c4e85712c 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -2198,7 +2198,7 @@ string_titlecase_x (SCM str, size_t start, size_t end) { if (!in_word) { - scm_i_string_set_x (str, i, uc_toupper (SCM_CHAR (ch))); + scm_i_string_set_x (str, i, uc_totitle (SCM_CHAR (ch))); in_word = 1; } else diff --git a/module/ice-9/i18n.scm b/module/ice-9/i18n.scm index e63ec7421..52d7cb48b 100644 --- a/module/ice-9/i18n.scm +++ b/module/ice-9/i18n.scm @@ -42,8 +42,8 @@ char-locale-ci? char-locale-ci=? ;; character mapping - char-locale-downcase char-locale-upcase - string-locale-downcase string-locale-upcase + char-locale-downcase char-locale-upcase char-locale-titlecase + string-locale-downcase string-locale-upcase string-locale-titlecase ;; reading numbers locale-string->integer locale-string->inexact diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test index 67e72a6f1..72805d1de 100644 --- a/test-suite/tests/chars.test +++ b/test-suite/tests/chars.test @@ -245,7 +245,11 @@ (eqv? (char-upcase #\a) #\A)) (pass-if "char-downcase" - (eqv? (char-downcase #\A) #\a))) + (eqv? (char-downcase #\A) #\a)) + + (pass-if "char-titlecase" + (and (eqv? (char-titlecase #\a) #\A) + (eqv? (char-titlecase #\763) #\762)))) (with-test-prefix "charnames" diff --git a/test-suite/tests/i18n.test b/test-suite/tests/i18n.test index 89924b612..1cb48e742 100644 --- a/test-suite/tests/i18n.test +++ b/test-suite/tests/i18n.test @@ -202,6 +202,14 @@ (and (eq? #\Z (char-locale-upcase #\z)) (eq? #\Z (char-locale-upcase #\z (make-locale LC_ALL "C"))))) + (pass-if "char-locale-titlecase" + (and (eq? #\T (char-locale-titlecase #\t)) + (eq? #\T (char-locale-titlecase #\t (make-locale LC_ALL "C"))))) + + (pass-if "char-locale-titlecase Dž" + (and (eq? #\762 (char-locale-titlecase #\763)) + (eq? #\762 (char-locale-titlecase #\763 (make-locale LC_ALL "C"))))) + (pass-if "char-locale-upcase Turkish" (under-turkish-utf8-locale-or-unresolved (lambda () @@ -229,6 +237,11 @@ (and (string=? "Z" (string-locale-upcase "z")) (string=? "Z" (string-locale-upcase "z" (make-locale LC_ALL "C"))))) + (pass-if "string-locale-titlecase" + (and (string=? "Hello, World" (string-locale-titlecase "hello, world")) + (string=? "Hello, World" (string-locale-titlecase + "hello, world" (make-locale LC_ALL "C"))))) + (pass-if "string-locale-upcase Turkish" (under-turkish-utf8-locale-or-unresolved (lambda () diff --git a/test-suite/tests/srfi-13.test b/test-suite/tests/srfi-13.test index 0d2ff59a0..6864287c2 100644 --- a/test-suite/tests/srfi-13.test +++ b/test-suite/tests/srfi-13.test @@ -147,6 +147,25 @@ (pass-if "more than one match, start and end index" (string-any char-upper-case? "abCDE" 1 4)))) +;;; +;;; string-titlecase +;;; + +(with-test-prefix "string-titlecase" + + (pass-if "all-lower" + (string=? "Foo" (string-titlecase "foo"))) + + (pass-if "all-upper" + (string=? "Foo" (string-titlecase "FOO"))) + + (pass-if "two-words" + (string=? "Hello, World!" (string-titlecase "hello, world!"))) + + (pass-if "titlecase-characters" + (string=? (list->string '(#\762)) + (string-titlecase (list->string '(#\763)))))) + ;;; ;;; string-append/shared ;;;