/* Copyright 2006-2014,2017-2019,2025 Free Software Foundation, Inc. This file is part of Guile. Guile is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Guile is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Guile. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include /* `strcoll ()' */ #include /* `toupper ()' et al. */ #include #include #include #include "boolean.h" #include "chars.h" #include "dynwind.h" #include "extensions.h" #include "feature.h" #include "finalizers.h" #include "gsubr.h" #include "list.h" #include "modules.h" #include "numbers.h" #include "pairs.h" #include "posix.h" /* for `scm_i_locale_mutex' */ #include "strings-internal.h" #include "symbols.h" #include "syscalls.h" #include "threads-internal.h" #include "values.h" #include "variable.h" #include "version.h" #include "i18n.h" #ifndef SCM_MAX_ALLOCA # define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */ #endif #if !defined HAVE_NEWLOCALE || !defined HAVE_USELOCALE #warning POSIX 2008 locale API (newlocale, uselocale) seems to be missing #endif /* Use Gnulib's header, which also provides `nl_item' & co. */ #include /* The newlib C library has alternative names for locale constants. */ #if HAVE_DECL__NL_NUMERIC_GROUPING #define INT_CURR_SYMBOL _NL_MONETARY_INT_CURR_SYMBOL #define MON_DECIMAL_POINT _NL_MONETARY_MON_DECIMAL_POINT #define MON_THOUSANDS_SEP _NL_MONETARY_MON_THOUSANDS_SEP #define MON_GROUPING _NL_MONETARY_MON_GROUPING #define POSITIVE_SIGN _NL_MONETARY_POSITIVE_SIGN #define NEGATIVE_SIGN _NL_MONETARY_NEGATIVE_SIGN #define GROUPING _NL_NUMERIC_GROUPING #define INT_FRAC_DIGITS _NL_MONETARY_INT_FRAC_DIGITS #define FRAC_DIGITS _NL_MONETARY_FRAC_DIGITS #define P_CS_PRECEDES _NL_MONETARY_P_CS_PRECEDES #define P_SEP_BY_SPACE _NL_MONETARY_P_SEP_BY_SPACE #define N_CS_PRECEDES _NL_MONETARY_N_CS_PRECEDES #define N_SEP_BY_SPACE _NL_MONETARY_N_SEP_BY_SPACE #define P_SIGN_POSN _NL_MONETARY_P_SIGN_POSN #define N_SIGN_POSN _NL_MONETARY_N_SIGN_POSN #define INT_P_CS_PRECEDES _NL_MONETARY_INT_P_CS_PRECEDES #define INT_P_SEP_BY_SPACE _NL_MONETARY_INT_P_SEP_BY_SPACE #define INT_N_CS_PRECEDES _NL_MONETARY_INT_N_CS_PRECEDES #define INT_N_SEP_BY_SPACE _NL_MONETARY_INT_N_SEP_BY_SPACE #define INT_P_SIGN_POSN _NL_MONETARY_INT_P_SIGN_POSN #define INT_N_SIGN_POSN _NL_MONETARY_INT_N_SIGN_POSN #endif /* Helper stringification macro. */ #define SCM_I18N_STRINGIFY(_name) # _name /* Acquiring and releasing the locale lock. */ static inline void lock_locale_mutex (void) { #ifdef HAVE_POSIX scm_i_pthread_mutex_lock (&scm_i_locale_mutex); #else #endif } static inline void unlock_locale_mutex (void) { #ifdef HAVE_POSIX scm_i_pthread_mutex_unlock (&scm_i_locale_mutex); #else #endif } /* Locale objects, string and character collation, and other locale-dependent string operations. Note: We don't wrap glibc's `uselocale ()' call because it sets the locale of the current _thread_ (unlike `setlocale ()') and doing so would require maintaining per-thread locale information on non-GNU systems and always re-installing this locale upon locale-dependent calls. */ /* Return the category mask corresponding to CAT. */ #define SCM_LOCALE_CATEGORY_MASK(_cat) LC_ ## _cat ## _MASK /* Alias for glibc's locale type. */ typedef locale_t scm_t_locale; /* A locale object denoting the global locale. */ SCM_GLOBAL_VARIABLE (scm_global_locale, "%global-locale"); struct scm_locale { scm_t_bits tag; locale_t locale; }; static inline int scm_is_locale (SCM x) { return SCM_HAS_TYP16 (x, scm_tc16_locale); } static inline struct scm_locale * scm_to_locale (SCM x) { if (!scm_is_locale (x)) abort (); return (struct scm_locale *) SCM_UNPACK_POINTER (x); } static inline SCM scm_from_locale (struct scm_locale *x) { return SCM_PACK_POINTER (x); } #define SCM_LOCALE_P(x) (scm_is_locale (x)) #define SCM_VALIDATE_LOCALE(pos, x) \ SCM_MAKE_VALIDATE_MSG (pos, x, LOCALE_P, "locale") /* Validate parameter ARG as a locale object and set C_LOCALE to the corresponding C locale object. */ #define SCM_VALIDATE_LOCALE_COPY(_pos, _arg, _c_locale) \ do \ { \ SCM_VALIDATE_LOCALE ((_pos), (_arg)); \ (_c_locale) = scm_to_locale (_arg)->locale; \ } \ while (0) /* Validate optional parameter ARG as either undefined or bound to a locale object. Set C_LOCALE to the corresponding C locale object or NULL. */ #define SCM_VALIDATE_OPTIONAL_LOCALE_COPY(_pos, _arg, _c_locale) \ do \ { \ if (!SCM_UNBNDP (_arg)) \ SCM_VALIDATE_LOCALE_COPY (_pos, _arg, _c_locale); \ else \ (_c_locale) = NULL; \ } \ while (0) void scm_i_finalize_locale (struct scm_thread *thread, SCM locale) { locale_t c_locale = (locale_t) scm_to_locale (locale)->locale; if (c_locale) freelocale (c_locale); } static void inline scm_locale_error (const char *, int) SCM_NORETURN; /* Throw an exception corresponding to error ERR. */ static void inline scm_locale_error (const char *func_name, int err) { scm_syserror_msg (func_name, "Failed to install locale", SCM_EOL, err); } /* Convenient macro to run STATEMENT in the locale context of C_LOCALE. */ #define RUN_IN_LOCALE_SECTION(_c_locale, _statement) \ do \ { \ locale_t old_loc; \ \ old_loc = uselocale (_c_locale); \ _statement ; \ uselocale (old_loc); \ } \ while (0) /* `make-locale' can take either category lists or single categories (the `LC_*' integer constants). */ #define SCM_LIST_OR_INTEGER_P(arg) \ (scm_is_integer (arg) || scm_is_true (scm_list_p (arg))) /* Return the category mask corresponding to CATEGORY (an `LC_' integer constant). */ static inline int category_to_category_mask (SCM category, const char *func_name, int pos) { int c_category; int c_category_mask; c_category = scm_to_int (category); #define SCM_DEFINE_LOCALE_CATEGORY(_name) \ case LC_ ## _name: \ c_category_mask = SCM_LOCALE_CATEGORY_MASK (_name); \ break; switch (c_category) { #include "locale-categories.h" case LC_ALL: c_category_mask = LC_ALL_MASK; break; default: scm_wrong_type_arg_msg (func_name, pos, category, "locale category"); } #undef SCM_DEFINE_LOCALE_CATEGORY return c_category_mask; } /* Convert CATEGORIES, a list of locale categories or a single category (an integer), into a category mask. */ static int category_list_to_category_mask (SCM categories, const char *func_name, int pos) { int c_category_mask = 0; if (scm_is_integer (categories)) c_category_mask = category_to_category_mask (categories, func_name, pos); else for (; !scm_is_null (categories); categories = SCM_CDR (categories)) { SCM category = SCM_CAR (categories); c_category_mask |= category_to_category_mask (category, func_name, pos); } return c_category_mask; } SCM_DEFINE (scm_make_locale, "make-locale", 2, 1, 0, (SCM category_list, SCM locale_name, SCM base_locale), "Return a reference to a data structure representing a set of " "locale datasets. @var{category_list} should be either a list " "of locale categories or a single category as used with " "@code{setlocale} (@pxref{Locales, @code{setlocale}}) and " "@var{locale_name} should be the name of the locale considered " "(e.g., @code{\"sl_SI\"}). Optionally, if @code{base_locale} is " "passed, it should be a locale object denoting settings for " "categories not listed in @var{category_list}.") #define FUNC_NAME s_scm_make_locale { int err = 0; int c_category_mask; char *c_locale_name; locale_t c_base_locale, c_locale; SCM_MAKE_VALIDATE (1, category_list, LIST_OR_INTEGER_P); SCM_VALIDATE_STRING (2, locale_name); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, base_locale, c_base_locale); c_category_mask = category_list_to_category_mask (category_list, FUNC_NAME, 1); c_locale_name = scm_to_locale_string (locale_name); if (scm_is_eq (base_locale, SCM_VARIABLE_REF (scm_global_locale))) c_base_locale = LC_GLOBAL_LOCALE; if (c_base_locale != (locale_t) 0) { /* C_BASE_LOCALE is to be consumed by `newlocale ()' so it needs to be duplicated before. */ c_base_locale = duplocale (c_base_locale); if (c_base_locale == (locale_t) 0) { err = errno; goto fail; } } c_locale = newlocale (c_category_mask, c_locale_name, c_base_locale); free (c_locale_name); c_locale_name = NULL; if (c_locale == (locale_t) 0) { if (c_base_locale != (locale_t) 0) freelocale (c_base_locale); scm_locale_error (FUNC_NAME, errno); } struct scm_locale *locale = scm_allocate_tagged (SCM_I_CURRENT_THREAD, sizeof (*locale)); locale->tag = scm_tc16_locale; locale->locale = c_locale; scm_i_add_locale_finalizer (SCM_I_CURRENT_THREAD, scm_from_locale (locale)); return scm_from_locale (locale); fail: free (c_locale_name); scm_locale_error (FUNC_NAME, err); return SCM_BOOL_F; } #undef FUNC_NAME SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0, (SCM obj), "Return true if @var{obj} is a locale object.") #define FUNC_NAME s_scm_locale_p { return scm_from_bool (scm_is_locale (obj)); } #undef FUNC_NAME /* Locale-dependent string comparison. A similar API can be found in MzScheme starting from version 200: http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */ #define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \ do \ { \ if (scm_i_is_narrow_string (str)) \ { \ size_t i, len, bytes; \ const char *buf = scm_i_string_chars (str); \ \ len = scm_i_string_length (str); \ bytes = (len + 1) * sizeof (scm_t_wchar); \ c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \ c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \ \ for (i = 0; i < len; i ++) \ c_str[i] = (unsigned char ) buf[i]; \ c_str[len] = 0; \ } \ else \ { \ c_str_malloc_p = 0; \ c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \ } \ } while (0) #define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \ do \ { \ if (c_str_malloc_p) \ free (c_str); \ } while (0) /* Compare strings according to LOCALE. Returns a negative value if S1 compares smaller than S2, a positive value if S1 compares larger than S2, or 0 if they compare equal. */ static inline int compare_strings (SCM s1, SCM s2, SCM locale, const char *func_name) #define FUNC_NAME func_name { int result; locale_t c_locale; uint8_t *c_s1, *c_s2; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); c_s1 = (uint8_t *) scm_to_utf8_string (s1); c_s2 = (uint8_t *) scm_to_utf8_string (s2); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, result = u8_strcoll (c_s1, c_s2)); else result = u8_strcoll (c_s1, c_s2); free (c_s1); free (c_s2); scm_remember_upto_here (locale); return result; } #undef FUNC_NAME /* Return the current language of the locale. */ static const char * locale_language () { /* Note: If the locale has been set with 'uselocale', uc_locale_language from libunistring versions 0.9.1 and older will return the incorrect (non-thread-specific) locale. This is fixed in versions 0.9.2 and newer. */ return uc_locale_language (); } static inline int u32_locale_casecoll (const char *func_name, const uint32_t *c_s1, size_t s1_len, const uint32_t *c_s2, size_t s2_len, int *result) { /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must note make any non-local exit. */ int ret; const char *loc = locale_language (); ret = u32_casecoll (c_s1, s1_len, c_s2, s2_len, loc, UNINORM_NFC, result); return ret == 0 ? ret : errno; } static inline int compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name) #define FUNC_NAME func_name { int result, ret = 0; locale_t c_locale; scm_t_wchar *c_s1, *c_s2; int c_s1_malloc_p, c_s2_malloc_p; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); size_t s1_len = scm_i_string_length (s1); size_t s2_len = scm_i_string_length (s2); SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p); SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, ret = u32_locale_casecoll (func_name, (const uint32_t *) c_s1, s1_len, (const uint32_t *) c_s2, s2_len, &result)); else ret = u32_locale_casecoll (func_name, (const uint32_t *) c_s1, s1_len, (const uint32_t *) c_s2, s2_len, &result); SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p); SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p); if (SCM_UNLIKELY (ret != 0)) { errno = ret; scm_syserror (FUNC_NAME); } scm_remember_upto_here_2 (s1, s2); scm_remember_upto_here (locale); return result; } #undef FUNC_NAME SCM_DEFINE (scm_string_locale_lt, "string-locale?", 2, 1, 0, (SCM s1, SCM s2, SCM locale), "Compare strings @var{s1} and @var{s2} in a locale-dependent way." "If @var{locale} is provided, it should be locale object (as " "returned by @code{make-locale}) and will be used to perform the " "comparison; otherwise, the current system locale is used.") #define FUNC_NAME s_scm_string_locale_gt { int result; SCM_VALIDATE_STRING (1, s1); SCM_VALIDATE_STRING (2, s2); result = compare_strings (s1, s2, locale, FUNC_NAME); return scm_from_bool (result > 0); } #undef FUNC_NAME SCM_DEFINE (scm_string_locale_ci_lt, "string-locale-ci?", 2, 1, 0, (SCM s1, SCM s2, SCM locale), "Compare strings @var{s1} and @var{s2} in a case-insensitive, " "and locale-dependent way. If @var{locale} is provided, it " "should be locale object (as returned by @code{make-locale}) " "and will be used to perform the comparison; otherwise, the " "current system locale is used.") #define FUNC_NAME s_scm_string_locale_ci_gt { int result; SCM_VALIDATE_STRING (1, s1); SCM_VALIDATE_STRING (2, s2); result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME); return scm_from_bool (result > 0); } #undef FUNC_NAME SCM_DEFINE (scm_string_locale_ci_eq, "string-locale-ci=?", 2, 1, 0, (SCM s1, SCM s2, SCM locale), "Compare strings @var{s1} and @var{s2} in a case-insensitive, " "and locale-dependent way. If @var{locale} is provided, it " "should be locale object (as returned by @code{make-locale}) " "and will be used to perform the comparison; otherwise, the " "current system locale is used.") #define FUNC_NAME s_scm_string_locale_ci_eq { int result; SCM_VALIDATE_STRING (1, s1); SCM_VALIDATE_STRING (2, s2); result = compare_u32_strings_ci (s1, s2, locale, FUNC_NAME); return scm_from_bool (result == 0); } #undef FUNC_NAME SCM_DEFINE (scm_char_locale_lt, "char-locale?", 2, 1, 0, (SCM c1, SCM c2, SCM locale), "Return true if character @var{c1} is greater than @var{c2} " "according to @var{locale} or to the current locale.") #define FUNC_NAME s_scm_char_locale_gt { int result; SCM_VALIDATE_CHAR (1, c1); SCM_VALIDATE_CHAR (2, c2); result = compare_strings (scm_string (scm_list_1 (c1)), scm_string (scm_list_1 (c2)), locale, FUNC_NAME); return scm_from_bool (result > 0); } #undef FUNC_NAME SCM_DEFINE (scm_char_locale_ci_lt, "char-locale-ci?", 2, 1, 0, (SCM c1, SCM c2, SCM locale), "Return true if character @var{c1} is greater than @var{c2}, " "in a case insensitive way according to @var{locale} or to " "the current locale.") #define FUNC_NAME s_scm_char_locale_ci_gt { int result; SCM_VALIDATE_CHAR (1, c1); SCM_VALIDATE_CHAR (2, c2); result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)), scm_string (scm_list_1 (c2)), locale, FUNC_NAME); return scm_from_bool (result > 0); } #undef FUNC_NAME SCM_DEFINE (scm_char_locale_ci_eq, "char-locale-ci=?", 2, 1, 0, (SCM c1, SCM c2, SCM locale), "Return true if character @var{c1} is equal to @var{c2}, " "in a case insensitive way according to @var{locale} or to " "the current locale.") #define FUNC_NAME s_scm_char_locale_ci_eq { int result; SCM_VALIDATE_CHAR (1, c1); SCM_VALIDATE_CHAR (2, c2); result = compare_u32_strings_ci (scm_string (scm_list_1 (c1)), scm_string (scm_list_1 (c2)), locale, FUNC_NAME); return scm_from_bool (result == 0); } #undef FUNC_NAME /* Locale-dependent alphabetic character mapping. */ static inline int u32_locale_tocase (const uint32_t *c_s1, size_t len, uint32_t **p_c_s2, size_t * p_len2, uint32_t *(*func) (const uint32_t *, size_t, const char *, uninorm_t, uint32_t *, size_t *)) { /* Note: Since this is called from `RUN_IN_LOCALE_SECTION', it must not make any non-local exit. */ uint32_t *ret; const char *loc = locale_language (); /* The first NULL here indicates that no NFC or NFKC normalization is done. The second NULL means the return buffer is malloc'ed here. */ ret = func (c_s1, len, loc, NULL, NULL, p_len2); if (ret == NULL) { *p_c_s2 = (uint32_t *) NULL; *p_len2 = 0; return errno; } *p_c_s2 = ret; return 0; } static SCM chr_to_case (SCM chr, locale_t c_locale, uint32_t *(*func) (const uint32_t *, size_t, const char *, uninorm_t, uint32_t *, size_t *), const char *func_name, int *err) #define FUNC_NAME func_name { int ret; uint32_t c; uint32_t *convbuf; size_t convlen; SCM convchar; c = SCM_CHAR (chr); if (c_locale != NULL) RUN_IN_LOCALE_SECTION (c_locale, ret = u32_locale_tocase (&c, 1, &convbuf, &convlen, func)); else ret = u32_locale_tocase (&c, 1, &convbuf, &convlen, func); if (SCM_UNLIKELY (ret != 0)) { *err = ret; return SCM_BOOL_F; } if (convlen == 1) convchar = SCM_MAKE_CHAR ((scm_t_wchar) convbuf[0]); else convchar = chr; free (convbuf); return convchar; } #undef FUNC_NAME SCM_DEFINE (scm_char_locale_downcase, "char-locale-downcase", 1, 1, 0, (SCM chr, SCM locale), "Return the lowercase character that corresponds to @var{chr} " "according to either @var{locale} or the current locale.") #define FUNC_NAME s_scm_char_locale_downcase { locale_t c_locale; SCM ret; int err = 0; SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); ret = chr_to_case (chr, c_locale, u32_tolower, FUNC_NAME, &err); if (err != 0) { errno = err; scm_syserror (FUNC_NAME); } return ret; } #undef FUNC_NAME SCM_DEFINE (scm_char_locale_upcase, "char-locale-upcase", 1, 1, 0, (SCM chr, SCM locale), "Return the uppercase character that corresponds to @var{chr} " "according to either @var{locale} or the current locale.") #define FUNC_NAME s_scm_char_locale_upcase { locale_t c_locale; SCM ret; int err = 0; SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); ret = chr_to_case (chr, c_locale, u32_toupper, FUNC_NAME, &err); if (err != 0) { errno = err; scm_syserror (FUNC_NAME); } return ret; } #undef FUNC_NAME SCM_DEFINE (scm_char_locale_titlecase, "char-locale-titlecase", 1, 1, 0, (SCM chr, SCM locale), "Return the titlecase character that corresponds to @var{chr} " "according to either @var{locale} or the current locale.") #define FUNC_NAME s_scm_char_locale_titlecase { locale_t c_locale; SCM ret; int err = 0; SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); ret = chr_to_case (chr, c_locale, u32_totitle, FUNC_NAME, &err); if (err != 0) { errno = err; scm_syserror (FUNC_NAME); } return ret; } #undef FUNC_NAME static SCM str_to_case (SCM str, locale_t c_locale, uint32_t *(*func) (const uint32_t *, size_t, const char *, uninorm_t, uint32_t *, size_t *), const char *func_name, int *err) #define FUNC_NAME func_name { scm_t_wchar *c_str, *c_buf; uint32_t *c_convstr; size_t len, convlen; int ret, c_str_malloc_p; SCM convstr; len = scm_i_string_length (str); if (len == 0) return scm_nullstr; SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p); if (c_locale) RUN_IN_LOCALE_SECTION (c_locale, ret = u32_locale_tocase ((uint32_t *) c_str, len, &c_convstr, &convlen, func)); else ret = u32_locale_tocase ((uint32_t *) c_str, len, &c_convstr, &convlen, func); SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p); scm_remember_upto_here (str); if (SCM_UNLIKELY (ret != 0)) { *err = ret; return SCM_BOOL_F; } convstr = scm_i_make_wide_string (convlen, &c_buf, 0); memcpy (c_buf, c_convstr, convlen * sizeof (scm_t_wchar)); free (c_convstr); scm_i_try_narrow_string (convstr); return convstr; } #undef FUNC_NAME SCM_DEFINE (scm_string_locale_upcase, "string-locale-upcase", 1, 1, 0, (SCM str, SCM locale), "Return a new string that is the uppercase version of " "@var{str} according to either @var{locale} or the current " "locale.") #define FUNC_NAME s_scm_string_locale_upcase { locale_t c_locale; SCM ret; int err = 0; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); ret = str_to_case (str, c_locale, u32_toupper, FUNC_NAME, &err); if (err != 0) { errno = err; scm_syserror (FUNC_NAME); } return ret; } #undef FUNC_NAME SCM_DEFINE (scm_string_locale_downcase, "string-locale-downcase", 1, 1, 0, (SCM str, SCM locale), "Return a new string that is the down-case version of " "@var{str} according to either @var{locale} or the current " "locale.") #define FUNC_NAME s_scm_string_locale_downcase { locale_t c_locale; SCM ret; int err = 0; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); ret = str_to_case (str, c_locale, u32_tolower, FUNC_NAME, &err); if (err != 0) { errno = err; scm_syserror (FUNC_NAME); } return ret; } #undef FUNC_NAME SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0, (SCM str, SCM locale), "Return a new string that is the title-case version of " "@var{str} according to either @var{locale} or the current " "locale.") #define FUNC_NAME s_scm_string_locale_titlecase { locale_t c_locale; SCM ret; int err = 0; SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); ret = str_to_case (str, c_locale, u32_totitle, FUNC_NAME, &err); if (err != 0) { errno = err; scm_syserror (FUNC_NAME); } return ret; } #undef FUNC_NAME /* Note: We don't provide mutative versions of `string-locale-(up|down)case' because, in some languages, a single downcase character maps to a couple of uppercase characters. Read the SRFI-13 document for a detailed discussion about this. */ /* Locale-dependent number parsing. */ SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer", 1, 2, 0, (SCM str, SCM base, SCM locale), "Convert string @var{str} into an integer according to either " "@var{locale} (a locale object as returned by @code{make-locale}) " "or the current process locale. Return two values: an integer " "(on success) or @code{#f}, and the number of characters read " "from @var{str} (@code{0} on failure).") #define FUNC_NAME s_scm_locale_string_to_integer { SCM result; long c_result; int c_base; const char *c_str; char *c_endptr; locale_t c_locale; SCM_VALIDATE_STRING (1, str); c_str = scm_i_string_chars (str); if (!scm_is_eq (base, SCM_UNDEFINED)) SCM_VALIDATE_INT_COPY (2, base, c_base); else c_base = 10; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); if (c_locale != NULL) { #ifdef HAVE_STRTOL_L c_result = strtol_l (c_str, &c_endptr, c_base, c_locale); #else RUN_IN_LOCALE_SECTION (c_locale, c_result = strtol (c_str, &c_endptr, c_base)); #endif } else c_result = strtol (c_str, &c_endptr, c_base); scm_remember_upto_here (str); if (c_endptr == c_str) result = SCM_BOOL_F; else result = scm_from_long (c_result); return scm_values_2 (result, scm_from_long (c_endptr - c_str)); } #undef FUNC_NAME SCM_DEFINE (scm_locale_string_to_inexact, "locale-string->inexact", 1, 1, 0, (SCM str, SCM locale), "Convert string @var{str} into an inexact number according to " "either @var{locale} (a locale object as returned by " "@code{make-locale}) or the current process locale. Return " "two values: an inexact number (on success) or @code{#f}, and " "the number of characters read from @var{str} (@code{0} on " "failure).") #define FUNC_NAME s_scm_locale_string_to_inexact { SCM result; double c_result; const char *c_str; char *c_endptr; locale_t c_locale; SCM_VALIDATE_STRING (1, str); c_str = scm_i_string_chars (str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); if (c_locale != NULL) { #ifdef HAVE_STRTOD_L c_result = strtod_l (c_str, &c_endptr, c_locale); #else RUN_IN_LOCALE_SECTION (c_locale, c_result = strtod (c_str, &c_endptr)); #endif } else c_result = strtod (c_str, &c_endptr); scm_remember_upto_here (str); if (c_endptr == c_str) result = SCM_BOOL_F; else result = scm_from_double (c_result); return scm_values_2 (result, scm_from_long (c_endptr - c_str)); } #undef FUNC_NAME /* Language information, aka. `nl_langinfo ()'. */ /* FIXME: Issues related to `nl-langinfo'. 1. The `CODESET' value is not normalized. This is a secondary issue, but still a practical issue. See http://www.cl.cam.ac.uk/~mgk25/ucs/norm_charmap.c for codeset normalization. 2. `nl_langinfo ()' is not available on Windows. 3. `nl_langinfo ()' may return strings encoded in a locale different from the current one. For example: (nl-langinfo DAY_1 (make-locale LC_ALL "eo_EO.UTF-8")) returns a result that is a UTF-8 string, regardless of the setting of the current locale. If nl_langinfo supports CODESET, we can convert the string properly using scm_from_stringn. If CODESET is not supported, we won't be able to make much sense of the returned string. Note: We don't use Gnulib's `nl_langinfo' module because it's currently not as complete as the compatibility hacks in `i18n.scm'. */ static char * copy_string_or_null (const char *s) { if (s == NULL) return NULL; else return strdup (s); } SCM_DEFINE (scm_nl_langinfo, "nl-langinfo", 1, 1, 0, (SCM item, SCM locale), "Return a string denoting locale information for @var{item} " "in the current locale or that specified by @var{locale}. " "The semantics and arguments are the same as those of the " "X/Open @code{nl_langinfo} function (@pxref{The Elegant and " "Fast Way, @code{nl_langinfo},, libc, The GNU C Library " "Reference Manual}).") #define FUNC_NAME s_scm_nl_langinfo { SCM result; nl_item c_item; char *c_result; locale_t c_locale; char *codeset; SCM_VALIDATE_INT_COPY (2, item, c_item); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); /* Sadly, `nl_langinfo ()' returns a pointer to a static string. According to SuS v2, that static string may be modified by subsequent calls to `nl_langinfo ()' as well as by calls to `setlocale ()'. Thus, we must acquire the locale mutex before doing invoking `nl_langinfo ()'. See http://opengroup.org/onlinepubs/007908799/xsh/nl_langinfo.html for details. */ lock_locale_mutex (); if (c_locale != NULL) { #ifdef HAVE_NL_LANGINFO_L c_result = copy_string_or_null (nl_langinfo_l (c_item, c_locale)); codeset = copy_string_or_null (nl_langinfo_l (CODESET, c_locale)); #else locale_t old_loc = uselocale (c_locale); c_result = copy_string_or_null (nl_langinfo (c_item)); codeset = copy_string_or_null (nl_langinfo (CODESET)); uselocale (old_loc); #endif } else { c_result = copy_string_or_null (nl_langinfo (c_item)); codeset = copy_string_or_null (nl_langinfo (CODESET)); } unlock_locale_mutex (); if (c_result == NULL) result = SCM_BOOL_F; else { switch (c_item) { #if (defined GROUPING) && (defined MON_GROUPING) case GROUPING: case MON_GROUPING: { char *p; /* In this cases, the result is to be interpreted as a list of numbers. If the last item is `CHAR_MAX' or a negative number, it has the special meaning "no more grouping" (negative numbers aren't specified in POSIX but can be used by glibc; see ). */ result = SCM_EOL; for (p = c_result; (*p > 0) && (*p != CHAR_MAX); p++) result = scm_cons (SCM_I_MAKINUM ((int) *p), result); { SCM last_pair = result; result = scm_reverse_x (result, SCM_EOL); if (*p == 0) { /* Cyclic grouping information. */ if (!scm_is_null (last_pair)) SCM_SETCDR (last_pair, result); } } free (c_result); break; } #endif #if defined FRAC_DIGITS || defined INT_FRAC_DIGITS #ifdef FRAC_DIGITS case FRAC_DIGITS: #endif #ifdef INT_FRAC_DIGITS case INT_FRAC_DIGITS: #endif /* This is to be interpreted as a single integer. */ if (*c_result == CHAR_MAX) /* Unspecified. */ result = SCM_BOOL_F; else result = SCM_I_MAKINUM (*c_result); free (c_result); break; #endif #if defined P_CS_PRECEDES || defined N_CS_PRECEDES || \ defined INT_P_CS_PRECEDES || defined INT_N_CS_PRECEDES || \ defined P_SEP_BY_SPACE || defined N_SEP_BY_SPACE || \ defined INT_P_SEP_BY_SPACE || defined INT_N_SEP_BY_SPACE #ifdef P_CS_PRECEDES case P_CS_PRECEDES: case N_CS_PRECEDES: #endif #ifdef INT_N_CS_PRECEDES case INT_P_CS_PRECEDES: case INT_N_CS_PRECEDES: #endif #ifdef P_SEP_BY_SPACE case P_SEP_BY_SPACE: case N_SEP_BY_SPACE: #endif #ifdef INT_P_SEP_BY_SPACE case INT_P_SEP_BY_SPACE: case INT_N_SEP_BY_SPACE: #endif /* This is to be interpreted as a boolean. */ result = scm_from_bool (*c_result); free (c_result); break; #endif #if defined P_SIGN_POSN || defined N_SIGN_POSN || \ defined INT_P_SIGN_POSN || defined INT_N_SIGN_POSN #ifdef P_SIGN_POSN case P_SIGN_POSN: case N_SIGN_POSN: #endif #ifdef INT_P_SIGN_POSN case INT_P_SIGN_POSN: case INT_N_SIGN_POSN: #endif /* See `(libc) Sign of Money Amount' for the interpretation of the return value here. */ switch (*c_result) { case 0: result = scm_from_latin1_symbol ("parenthesize"); break; case 1: result = scm_from_latin1_symbol ("sign-before"); break; case 2: result = scm_from_latin1_symbol ("sign-after"); break; case 3: result = scm_from_latin1_symbol ("sign-before-currency-symbol"); break; case 4: result = scm_from_latin1_symbol ("sign-after-currency-symbol"); break; default: result = scm_from_latin1_symbol ("unspecified"); } free (c_result); break; #endif default: result = scm_from_stringn (c_result, strlen (c_result), codeset, SCM_FAILED_CONVERSION_QUESTION_MARK); free (c_result); } } if (codeset != NULL) free (codeset); return result; } #undef FUNC_NAME /* Define the `nl_item' constants. */ static inline void define_langinfo_items (void) { #define DEFINE_NLITEM_CONSTANT(_item) \ scm_c_define (# _item, scm_from_int (_item)) DEFINE_NLITEM_CONSTANT (CODESET); /* Abbreviated days of the week. */ DEFINE_NLITEM_CONSTANT (ABDAY_1); DEFINE_NLITEM_CONSTANT (ABDAY_2); DEFINE_NLITEM_CONSTANT (ABDAY_3); DEFINE_NLITEM_CONSTANT (ABDAY_4); DEFINE_NLITEM_CONSTANT (ABDAY_5); DEFINE_NLITEM_CONSTANT (ABDAY_6); DEFINE_NLITEM_CONSTANT (ABDAY_7); /* Long-named days of the week. */ DEFINE_NLITEM_CONSTANT (DAY_1); /* Sunday */ DEFINE_NLITEM_CONSTANT (DAY_2); /* Monday */ DEFINE_NLITEM_CONSTANT (DAY_3); /* Tuesday */ DEFINE_NLITEM_CONSTANT (DAY_4); /* Wednesday */ DEFINE_NLITEM_CONSTANT (DAY_5); /* Thursday */ DEFINE_NLITEM_CONSTANT (DAY_6); /* Friday */ DEFINE_NLITEM_CONSTANT (DAY_7); /* Saturday */ /* Abbreviated month names. */ DEFINE_NLITEM_CONSTANT (ABMON_1); /* Jan */ DEFINE_NLITEM_CONSTANT (ABMON_2); DEFINE_NLITEM_CONSTANT (ABMON_3); DEFINE_NLITEM_CONSTANT (ABMON_4); DEFINE_NLITEM_CONSTANT (ABMON_5); DEFINE_NLITEM_CONSTANT (ABMON_6); DEFINE_NLITEM_CONSTANT (ABMON_7); DEFINE_NLITEM_CONSTANT (ABMON_8); DEFINE_NLITEM_CONSTANT (ABMON_9); DEFINE_NLITEM_CONSTANT (ABMON_10); DEFINE_NLITEM_CONSTANT (ABMON_11); DEFINE_NLITEM_CONSTANT (ABMON_12); /* Long month names. */ DEFINE_NLITEM_CONSTANT (MON_1); /* January */ DEFINE_NLITEM_CONSTANT (MON_2); DEFINE_NLITEM_CONSTANT (MON_3); DEFINE_NLITEM_CONSTANT (MON_4); DEFINE_NLITEM_CONSTANT (MON_5); DEFINE_NLITEM_CONSTANT (MON_6); DEFINE_NLITEM_CONSTANT (MON_7); DEFINE_NLITEM_CONSTANT (MON_8); DEFINE_NLITEM_CONSTANT (MON_9); DEFINE_NLITEM_CONSTANT (MON_10); DEFINE_NLITEM_CONSTANT (MON_11); DEFINE_NLITEM_CONSTANT (MON_12); DEFINE_NLITEM_CONSTANT (AM_STR); /* Ante meridiem string. */ DEFINE_NLITEM_CONSTANT (PM_STR); /* Post meridiem string. */ DEFINE_NLITEM_CONSTANT (D_T_FMT); /* Date and time format for strftime. */ DEFINE_NLITEM_CONSTANT (D_FMT); /* Date format for strftime. */ DEFINE_NLITEM_CONSTANT (T_FMT); /* Time format for strftime. */ DEFINE_NLITEM_CONSTANT (T_FMT_AMPM);/* 12-hour time format for strftime. */ #ifdef ERA DEFINE_NLITEM_CONSTANT (ERA); /* Alternate era. */ #endif #ifdef ERA_D_FMT DEFINE_NLITEM_CONSTANT (ERA_D_FMT); /* Date in alternate era format. */ #endif #ifdef ERA_D_T_FMT DEFINE_NLITEM_CONSTANT (ERA_D_T_FMT); /* Date and time in alternate era format. */ #endif #ifdef ERA_T_FMT DEFINE_NLITEM_CONSTANT (ERA_T_FMT); /* Time in alternate era format. */ #endif #ifdef ALT_DIGITS DEFINE_NLITEM_CONSTANT (ALT_DIGITS); /* Alternate symbols for digits. */ #endif DEFINE_NLITEM_CONSTANT (RADIXCHAR); DEFINE_NLITEM_CONSTANT (THOUSEP); #ifdef YESEXPR DEFINE_NLITEM_CONSTANT (YESEXPR); #endif #ifdef NOEXPR DEFINE_NLITEM_CONSTANT (NOEXPR); #endif #ifdef CRNCYSTR /* currency symbol */ DEFINE_NLITEM_CONSTANT (CRNCYSTR); #endif /* GNU extensions. */ #ifdef ERA_YEAR DEFINE_NLITEM_CONSTANT (ERA_YEAR); /* Year in alternate era format. */ #endif /* LC_MONETARY category: formatting of monetary quantities. These items each correspond to a member of `struct lconv', defined in . */ #ifdef INT_CURR_SYMBOL DEFINE_NLITEM_CONSTANT (INT_CURR_SYMBOL); #endif #ifdef MON_DECIMAL_POINT DEFINE_NLITEM_CONSTANT (MON_DECIMAL_POINT); #endif #ifdef MON_THOUSANDS_SEP DEFINE_NLITEM_CONSTANT (MON_THOUSANDS_SEP); #endif #ifdef MON_GROUPING DEFINE_NLITEM_CONSTANT (MON_GROUPING); #endif #ifdef POSITIVE_SIGN DEFINE_NLITEM_CONSTANT (POSITIVE_SIGN); #endif #ifdef NEGATIVE_SIGN DEFINE_NLITEM_CONSTANT (NEGATIVE_SIGN); #endif #ifdef GROUPING DEFINE_NLITEM_CONSTANT (GROUPING); #endif #ifdef INT_FRAC_DIGITS DEFINE_NLITEM_CONSTANT (INT_FRAC_DIGITS); #endif #ifdef FRAC_DIGITS DEFINE_NLITEM_CONSTANT (FRAC_DIGITS); #endif #ifdef P_CS_PRECEDES DEFINE_NLITEM_CONSTANT (P_CS_PRECEDES); #endif #ifdef P_SEP_BY_SPACE DEFINE_NLITEM_CONSTANT (P_SEP_BY_SPACE); #endif #ifdef N_CS_PRECEDES DEFINE_NLITEM_CONSTANT (N_CS_PRECEDES); #endif #ifdef N_SEP_BY_SPACE DEFINE_NLITEM_CONSTANT (N_SEP_BY_SPACE); #endif #ifdef P_SIGN_POSN DEFINE_NLITEM_CONSTANT (P_SIGN_POSN); #endif #ifdef N_SIGN_POSN DEFINE_NLITEM_CONSTANT (N_SIGN_POSN); #endif #ifdef INT_P_CS_PRECEDES DEFINE_NLITEM_CONSTANT (INT_P_CS_PRECEDES); #endif #ifdef INT_P_SEP_BY_SPACE DEFINE_NLITEM_CONSTANT (INT_P_SEP_BY_SPACE); #endif #ifdef INT_N_CS_PRECEDES DEFINE_NLITEM_CONSTANT (INT_N_CS_PRECEDES); #endif #ifdef INT_N_SEP_BY_SPACE DEFINE_NLITEM_CONSTANT (INT_N_SEP_BY_SPACE); #endif #ifdef INT_P_SIGN_POSN DEFINE_NLITEM_CONSTANT (INT_P_SIGN_POSN); #endif #ifdef INT_N_SIGN_POSN DEFINE_NLITEM_CONSTANT (INT_N_SIGN_POSN); #endif #undef DEFINE_NLITEM_CONSTANT } void scm_init_i18n () { scm_add_feature ("nl-langinfo"); define_langinfo_items (); #include "i18n.x" /* Initialize the global locale object with a special `locale' object. */ /* XXX: We don't define it as `LC_GLOBAL_LOCALE' because of bugs as of glibc <= 2.11 not (yet) worked around by Gnulib. See http://sourceware.org/bugzilla/show_bug.cgi?id=11009 for details. */ struct scm_locale *locale = scm_allocate_tagged (SCM_I_CURRENT_THREAD, sizeof (*locale)); locale->tag = scm_tc16_locale; locale->locale = NULL; SCM_VARIABLE_SET (scm_global_locale, scm_from_locale (locale)); } void scm_bootstrap_i18n () { scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION, "scm_init_i18n", (scm_t_extension_init_func) scm_init_i18n, NULL); }