1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-22 11:34:09 +02:00

Support for Unicode general categories

* libguile/chars.c, libguile/chars.h (scm_char_general_category): New function.
* test-suite/tests/chars.test: Unit tests for `char-general-category'.
* doc/ref/api-data.texi (Characters): Documentation for
  `char-general-category'.
This commit is contained in:
Julian Graham 2009-12-24 00:25:19 -05:00
parent d7a4096d25
commit 0ca3a342d1
4 changed files with 102 additions and 1 deletions

View file

@ -1875,6 +1875,81 @@ Return @code{#t} iff @var{chr} is either uppercase or lowercase, else
@code{#f}. @code{#f}.
@end deffn @end deffn
@deffn {Scheme Procedure} char-general-category chr
@deffnx {C Function} scm_char_general_category (chr)
Return a symbol giving the two-letter name of the Unicode general
category assigned to @var{chr} or @code{#f} if no named category is
assigned. The following table provides a list of category names along
with their meanings.
@multitable @columnfractions .1 .4 .1 .4
@item Lu
@tab Uppercase letter
@tab Pf
@tab Final quote punctuation
@item Ll
@tab Lowercase letter
@tab Po
@tab Other punctuation
@item Lt
@tab Titlecase letter
@tab Sm
@tab Math symbol
@item Lm
@tab Modifier letter
@tab Sc
@tab Currency symbol
@item Lo
@tab Other letter
@tab Sk
@tab Modifier symbol
@item Mn
@tab Non-spacing mark
@tab So
@tab Other symbol
@item Mc
@tab Combining spacing mark
@tab Zs
@tab Space separator
@item Me
@tab Enclosing mark
@tab Zl
@tab Line separator
@item Nd
@tab Decimal digit number
@tab Zp
@tab Paragraph separator
@item Nl
@tab Letter number
@tab Cc
@tab Control
@item No
@tab Other number
@tab Cf
@tab Format
@item Pc
@tab Connector punctuation
@tab Cs
@tab Surrogate
@item Pd
@tab Dash punctuation
@tab Co
@tab Private use
@item Ps
@tab Open punctuation
@tab Cn
@tab Unassigned
@item Pe
@tab Close punctuation
@tab
@tab
@item Pi
@tab Initial quote punctuation
@tab
@tab
@end multitable
@end deffn
@rnindex char->integer @rnindex char->integer
@deffn {Scheme Procedure} char->integer chr @deffn {Scheme Procedure} char->integer chr
@deffnx {C Function} scm_char_to_integer (chr) @deffnx {C Function} scm_char_to_integer (chr)

View file

@ -25,6 +25,7 @@
#include <ctype.h> #include <ctype.h>
#include <limits.h> #include <limits.h>
#include <unicase.h> #include <unicase.h>
#include <unictype.h>
#include "libguile/_scm.h" #include "libguile/_scm.h"
#include "libguile/validate.h" #include "libguile/validate.h"
@ -467,6 +468,25 @@ SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0,
(SCM chr),
"Return a symbol representing the Unicode general category of "
"@var{chr} or @code{#f} if a named category cannot be found.")
#define FUNC_NAME s_scm_char_general_category
{
const char *sym;
uc_general_category_t cat;
SCM_VALIDATE_CHAR (1, chr);
cat = uc_general_category (SCM_CHAR (chr));
sym = uc_general_category_name (cat);
if (sym != NULL)
return scm_from_locale_symbol (sym);
return SCM_BOOL_F;
}
#undef FUNC_NAME

View file

@ -81,6 +81,7 @@ SCM_API SCM scm_integer_to_char (SCM n);
SCM_API SCM scm_char_upcase (SCM chr); SCM_API SCM scm_char_upcase (SCM chr);
SCM_API SCM scm_char_downcase (SCM chr); SCM_API SCM scm_char_downcase (SCM chr);
SCM_API SCM scm_char_titlecase (SCM chr); SCM_API SCM scm_char_titlecase (SCM chr);
SCM_API SCM scm_char_general_category (SCM chr);
SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c);
SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c);
SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c);

View file

@ -210,7 +210,12 @@
(not (char-is-both? #\newline)) (not (char-is-both? #\newline))
(char-is-both? #\a) (char-is-both? #\a)
(char-is-both? #\Z) (char-is-both? #\Z)
(not (char-is-both? #\1))))) (not (char-is-both? #\1))))
(pass-if "char-general-category"
(and (eq? (char-general-category #\a) 'Ll)
(eq? (char-general-category #\A) 'Lu)
(eq? (char-general-category #\762) 'Lt))))
(with-test-prefix "integer" (with-test-prefix "integer"