mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-22 03:30:22 +02:00
Support for Unicode general categories
* libguile/chars.c, libguile/chars.h (scm_char_general_category): New function. * test-suite/tests/chars.test: Unit tests for `char-general-category'. * doc/ref/api-data.texi (Characters): Documentation for `char-general-category'.
This commit is contained in:
parent
d7a4096d25
commit
0ca3a342d1
4 changed files with 102 additions and 1 deletions
|
@ -1875,6 +1875,81 @@ Return @code{#t} iff @var{chr} is either uppercase or lowercase, else
|
||||||
@code{#f}.
|
@code{#f}.
|
||||||
@end deffn
|
@end deffn
|
||||||
|
|
||||||
|
@deffn {Scheme Procedure} char-general-category chr
|
||||||
|
@deffnx {C Function} scm_char_general_category (chr)
|
||||||
|
Return a symbol giving the two-letter name of the Unicode general
|
||||||
|
category assigned to @var{chr} or @code{#f} if no named category is
|
||||||
|
assigned. The following table provides a list of category names along
|
||||||
|
with their meanings.
|
||||||
|
|
||||||
|
@multitable @columnfractions .1 .4 .1 .4
|
||||||
|
@item Lu
|
||||||
|
@tab Uppercase letter
|
||||||
|
@tab Pf
|
||||||
|
@tab Final quote punctuation
|
||||||
|
@item Ll
|
||||||
|
@tab Lowercase letter
|
||||||
|
@tab Po
|
||||||
|
@tab Other punctuation
|
||||||
|
@item Lt
|
||||||
|
@tab Titlecase letter
|
||||||
|
@tab Sm
|
||||||
|
@tab Math symbol
|
||||||
|
@item Lm
|
||||||
|
@tab Modifier letter
|
||||||
|
@tab Sc
|
||||||
|
@tab Currency symbol
|
||||||
|
@item Lo
|
||||||
|
@tab Other letter
|
||||||
|
@tab Sk
|
||||||
|
@tab Modifier symbol
|
||||||
|
@item Mn
|
||||||
|
@tab Non-spacing mark
|
||||||
|
@tab So
|
||||||
|
@tab Other symbol
|
||||||
|
@item Mc
|
||||||
|
@tab Combining spacing mark
|
||||||
|
@tab Zs
|
||||||
|
@tab Space separator
|
||||||
|
@item Me
|
||||||
|
@tab Enclosing mark
|
||||||
|
@tab Zl
|
||||||
|
@tab Line separator
|
||||||
|
@item Nd
|
||||||
|
@tab Decimal digit number
|
||||||
|
@tab Zp
|
||||||
|
@tab Paragraph separator
|
||||||
|
@item Nl
|
||||||
|
@tab Letter number
|
||||||
|
@tab Cc
|
||||||
|
@tab Control
|
||||||
|
@item No
|
||||||
|
@tab Other number
|
||||||
|
@tab Cf
|
||||||
|
@tab Format
|
||||||
|
@item Pc
|
||||||
|
@tab Connector punctuation
|
||||||
|
@tab Cs
|
||||||
|
@tab Surrogate
|
||||||
|
@item Pd
|
||||||
|
@tab Dash punctuation
|
||||||
|
@tab Co
|
||||||
|
@tab Private use
|
||||||
|
@item Ps
|
||||||
|
@tab Open punctuation
|
||||||
|
@tab Cn
|
||||||
|
@tab Unassigned
|
||||||
|
@item Pe
|
||||||
|
@tab Close punctuation
|
||||||
|
@tab
|
||||||
|
@tab
|
||||||
|
@item Pi
|
||||||
|
@tab Initial quote punctuation
|
||||||
|
@tab
|
||||||
|
@tab
|
||||||
|
@end multitable
|
||||||
|
@end deffn
|
||||||
|
|
||||||
@rnindex char->integer
|
@rnindex char->integer
|
||||||
@deffn {Scheme Procedure} char->integer chr
|
@deffn {Scheme Procedure} char->integer chr
|
||||||
@deffnx {C Function} scm_char_to_integer (chr)
|
@deffnx {C Function} scm_char_to_integer (chr)
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <unicase.h>
|
#include <unicase.h>
|
||||||
|
#include <unictype.h>
|
||||||
|
|
||||||
#include "libguile/_scm.h"
|
#include "libguile/_scm.h"
|
||||||
#include "libguile/validate.h"
|
#include "libguile/validate.h"
|
||||||
|
@ -467,6 +468,25 @@ SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0,
|
||||||
}
|
}
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
|
SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0,
|
||||||
|
(SCM chr),
|
||||||
|
"Return a symbol representing the Unicode general category of "
|
||||||
|
"@var{chr} or @code{#f} if a named category cannot be found.")
|
||||||
|
#define FUNC_NAME s_scm_char_general_category
|
||||||
|
{
|
||||||
|
const char *sym;
|
||||||
|
uc_general_category_t cat;
|
||||||
|
|
||||||
|
SCM_VALIDATE_CHAR (1, chr);
|
||||||
|
cat = uc_general_category (SCM_CHAR (chr));
|
||||||
|
sym = uc_general_category_name (cat);
|
||||||
|
|
||||||
|
if (sym != NULL)
|
||||||
|
return scm_from_locale_symbol (sym);
|
||||||
|
return SCM_BOOL_F;
|
||||||
|
}
|
||||||
|
#undef FUNC_NAME
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -81,6 +81,7 @@ SCM_API SCM scm_integer_to_char (SCM n);
|
||||||
SCM_API SCM scm_char_upcase (SCM chr);
|
SCM_API SCM scm_char_upcase (SCM chr);
|
||||||
SCM_API SCM scm_char_downcase (SCM chr);
|
SCM_API SCM scm_char_downcase (SCM chr);
|
||||||
SCM_API SCM scm_char_titlecase (SCM chr);
|
SCM_API SCM scm_char_titlecase (SCM chr);
|
||||||
|
SCM_API SCM scm_char_general_category (SCM chr);
|
||||||
SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c);
|
SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c);
|
||||||
SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c);
|
SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c);
|
||||||
SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c);
|
SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c);
|
||||||
|
|
|
@ -210,7 +210,12 @@
|
||||||
(not (char-is-both? #\newline))
|
(not (char-is-both? #\newline))
|
||||||
(char-is-both? #\a)
|
(char-is-both? #\a)
|
||||||
(char-is-both? #\Z)
|
(char-is-both? #\Z)
|
||||||
(not (char-is-both? #\1)))))
|
(not (char-is-both? #\1))))
|
||||||
|
|
||||||
|
(pass-if "char-general-category"
|
||||||
|
(and (eq? (char-general-category #\a) 'Ll)
|
||||||
|
(eq? (char-general-category #\A) 'Lu)
|
||||||
|
(eq? (char-general-category #\762) 'Lt))))
|
||||||
|
|
||||||
(with-test-prefix "integer"
|
(with-test-prefix "integer"
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue