mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-02 21:10:27 +02:00
Unicode-capable srfi-14 charsets
* libguile/Makefile.am: distribute new files srfi-14.i.c and unidata_to_charset.pl * chars.c (scm_c_upcase, scm_c_downcase): use unicode-enable toupper and tolower * libguile/srfi-14.h (scm_t_char_range, scm_t_char_set): new structures to describe char-sets (scm_t_char_set_cursor): new structure to describe char-set-cursors (SCM_BITS_PER_LONG): removed (SCM_CHARSET_GET): calls function New declarations for scm_i_charset_get, scm_i_charset_set, scm_i_charset_unset, and scm_debug_char_set. * test-suite/tests/srfi-14.test: new tests * libguile/srfi-14.c (SCM_CHARSET_DATA): new macro (SCM_CHARSET_SET, SCM_CHARSET_UNSET): call function (BYTES_PER_CHARSET, LONGS_PER_CHARSET): removed (scm_i_charset_get, scm_i_charset_set, scm_i_charset_unset) (charsets_equal, charsets_leq, charsets_union) (charsets_intersection, charsets_complement, charsets_xor): new functions that are low-level charset operators (charset_print, charset_free): modified for new charset struct (charset_cursor_print, charset_cursor_free): new function (make_char_set, scm_char_set_p, scm_char_set_eq, scm_car_set_leq) (scm_char_set_hash, scm_char_set_cursor, scm_char_set_ref) (scm_char_set_cursor_next, scm_end_of_char_set_p, scm_char_set_fold) (scm_char_set_unfold, scm_char_set_unfold_x, scm_char_set_for_each) (scm_char_set_map, scm_char_set_copy, scm_char_set, scm_list_to_char_set) (scm_list_to_char_set_x, scm_string_to_char_set, scm_string_to_char_set_x) (scm_char_set_filter, scm_char_set_filter_x, scm_ucs_range_to_char_set) (scm_ucs_range_to_char_set_x, scm_to_char_set, scm_char_set_size) (scm_char_set_count, scm_char_set_to_list, scm_char_set_to_string) (scm_char_set_contains_p, scm_char_set_every, scm_char_set_any) (scm_char_set_adjoin, scm_char_set_delete, scm_char_set_adjoin_x) (scm_char_set_delete_x, scm_char_set_complement, scm_char_set_union) (scm_char_set_intersection, scm_char_set_difference, scm_char_set_xor) (scm_char_set_diff_plus_intersection, scm_char_set_complement_x) (scm_char_set_union_x, scm_char_set_intersection_x, scm_char_set_difference_x) (scm_char_set_xor_x, scm_char_set_diff_plus_intersection_x): modified to use new charset and charset-cursor data structures (CSET_BLANK_PRED, CSET_SYMBOL_PRED, CSET_PUNCT_PRED, CSET_LOWER_PRED) (CSET_UPPER_PRED, CSET_LETTER_PRED, CSET_DIGIT_PRED, CSET_WHITESPACE_PRED) (CSET_CONTROL_PRED, CSET_HEX_DIGIT_PRED, CSET_ASCII_PRED, CSET_LETTER_PRED) (CSET_LETTER_AND_DIGIT_PRED, CSET_PRINTING_PRED, CSET_TRUE_PRED) (CSET_FALSE_PRED): removed (scm_srfi_14_compute_char_sets): removed - too slow to iterate over all of unicode at startup (scm_debug_char_set) [SCM_CHARSET_DEBUG]: new function
This commit is contained in:
parent
3bcf189ba0
commit
f49dbcadf3
5 changed files with 1234 additions and 630 deletions
|
@ -446,7 +446,7 @@ install-exec-hook:
|
||||||
## working.
|
## working.
|
||||||
noinst_HEADERS = conv-integer.i.c conv-uinteger.i.c \
|
noinst_HEADERS = conv-integer.i.c conv-uinteger.i.c \
|
||||||
eval.i.c ieee-754.h \
|
eval.i.c ieee-754.h \
|
||||||
srfi-4.i.c \
|
srfi-4.i.c srfi-14.i.c \
|
||||||
quicksort.i.c \
|
quicksort.i.c \
|
||||||
win32-uname.h win32-dirent.h win32-socket.h \
|
win32-uname.h win32-dirent.h win32-socket.h \
|
||||||
private-gc.h private-options.h
|
private-gc.h private-options.h
|
||||||
|
@ -597,7 +597,7 @@ EXTRA_DIST = ChangeLog-scm ChangeLog-threads \
|
||||||
cpp_errno.c cpp_err_symbols.in cpp_err_symbols.c \
|
cpp_errno.c cpp_err_symbols.in cpp_err_symbols.c \
|
||||||
cpp_sig_symbols.c cpp_sig_symbols.in cpp_cnvt.awk \
|
cpp_sig_symbols.c cpp_sig_symbols.in cpp_cnvt.awk \
|
||||||
c-tokenize.lex version.h.in \
|
c-tokenize.lex version.h.in \
|
||||||
scmconfig.h.top libgettext.h libguile.map
|
scmconfig.h.top libgettext.h unidata_to_charset.pl libguile.map
|
||||||
# $(DOT_DOC_FILES) $(EXTRA_DOT_DOC_FILES) \
|
# $(DOT_DOC_FILES) $(EXTRA_DOT_DOC_FILES) \
|
||||||
# guile-procedures.txt guile.texi
|
# guile-procedures.txt guile.texi
|
||||||
|
|
||||||
|
|
|
@ -296,20 +296,14 @@ TODO: change name to scm_i_.. ? --hwn
|
||||||
scm_t_wchar
|
scm_t_wchar
|
||||||
scm_c_upcase (scm_t_wchar c)
|
scm_c_upcase (scm_t_wchar c)
|
||||||
{
|
{
|
||||||
if (c > 255)
|
return uc_toupper ((int) c);
|
||||||
return c;
|
|
||||||
|
|
||||||
return toupper ((int) c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
scm_t_wchar
|
scm_t_wchar
|
||||||
scm_c_downcase (scm_t_wchar c)
|
scm_c_downcase (scm_t_wchar c)
|
||||||
{
|
{
|
||||||
if (c > 255)
|
return uc_tolower ((int) c);
|
||||||
return c;
|
|
||||||
|
|
||||||
return tolower ((int) c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
1527
libguile/srfi-14.c
1527
libguile/srfi-14.c
File diff suppressed because it is too large
Load diff
|
@ -24,22 +24,34 @@
|
||||||
|
|
||||||
#include "libguile/__scm.h"
|
#include "libguile/__scm.h"
|
||||||
|
|
||||||
#define SCM_CHARSET_SIZE 256
|
typedef struct
|
||||||
|
{
|
||||||
|
scm_t_wchar lo;
|
||||||
|
scm_t_wchar hi;
|
||||||
|
} scm_t_char_range;
|
||||||
|
|
||||||
/* We expect 8-bit bytes here. Should be no problem in the year
|
typedef struct
|
||||||
2001. */
|
{
|
||||||
#ifndef SCM_BITS_PER_LONG
|
size_t len;
|
||||||
# define SCM_BITS_PER_LONG (sizeof (long) * 8)
|
scm_t_char_range *ranges;
|
||||||
#endif
|
} scm_t_char_set;
|
||||||
|
|
||||||
#define SCM_CHARSET_GET(cs, idx) (((long *) SCM_SMOB_DATA (cs))\
|
typedef struct
|
||||||
[((unsigned char) (idx)) / SCM_BITS_PER_LONG] &\
|
{
|
||||||
(1L << (((unsigned char) (idx)) % SCM_BITS_PER_LONG)))
|
size_t range;
|
||||||
|
scm_t_wchar n;
|
||||||
|
} scm_t_char_set_cursor;
|
||||||
|
|
||||||
|
#define SCM_CHARSET_GET(cs,idx) \
|
||||||
|
scm_i_charset_get((scm_t_char_set *)SCM_SMOB_DATA(cs),idx)
|
||||||
|
|
||||||
#define SCM_CHARSETP(x) (!SCM_IMP (x) && (SCM_TYP16 (x) == scm_tc16_charset))
|
#define SCM_CHARSETP(x) (!SCM_IMP (x) && (SCM_TYP16 (x) == scm_tc16_charset))
|
||||||
|
|
||||||
/* Smob type code for character sets. */
|
/* Smob type code for character sets. */
|
||||||
SCM_API int scm_tc16_charset;
|
SCM_API int scm_tc16_charset;
|
||||||
|
SCM_INTERNAL int scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n);
|
||||||
|
SCM_INTERNAL void scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n);
|
||||||
|
SCM_INTERNAL void scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n);
|
||||||
|
|
||||||
SCM_API SCM scm_char_set_p (SCM obj);
|
SCM_API SCM scm_char_set_p (SCM obj);
|
||||||
SCM_API SCM scm_char_set_eq (SCM char_sets);
|
SCM_API SCM scm_char_set_eq (SCM char_sets);
|
||||||
|
@ -88,6 +100,9 @@ SCM_API SCM scm_char_set_intersection_x (SCM cs1, SCM rest);
|
||||||
SCM_API SCM scm_char_set_difference_x (SCM cs1, SCM rest);
|
SCM_API SCM scm_char_set_difference_x (SCM cs1, SCM rest);
|
||||||
SCM_API SCM scm_char_set_xor_x (SCM cs1, SCM rest);
|
SCM_API SCM scm_char_set_xor_x (SCM cs1, SCM rest);
|
||||||
SCM_API SCM scm_char_set_diff_plus_intersection_x (SCM cs1, SCM cs2, SCM rest);
|
SCM_API SCM scm_char_set_diff_plus_intersection_x (SCM cs1, SCM cs2, SCM rest);
|
||||||
|
#if SCM_CHARSET_DEBUG
|
||||||
|
SCM_API SCM scm_debug_char_set (SCM cs);
|
||||||
|
#endif
|
||||||
|
|
||||||
SCM_API SCM scm_char_set_lower_case;
|
SCM_API SCM scm_char_set_lower_case;
|
||||||
SCM_API SCM scm_char_set_upper_case;
|
SCM_API SCM scm_char_set_upper_case;
|
||||||
|
@ -107,7 +122,6 @@ SCM_API SCM scm_char_set_ascii;
|
||||||
SCM_API SCM scm_char_set_empty;
|
SCM_API SCM scm_char_set_empty;
|
||||||
SCM_API SCM scm_char_set_full;
|
SCM_API SCM scm_char_set_full;
|
||||||
|
|
||||||
SCM_INTERNAL void scm_srfi_14_compute_char_sets (void);
|
|
||||||
SCM_INTERNAL void scm_init_srfi_14 (void);
|
SCM_INTERNAL void scm_init_srfi_14 (void);
|
||||||
|
|
||||||
#endif /* SCM_SRFI_14_H */
|
#endif /* SCM_SRFI_14_H */
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
;;;; srfi-14.test --- Test suite for Guile's SRFI-14 functions.
|
;;;; srfi-14.test -*- mode:scheme; coding: iso-8859-1 -*-
|
||||||
|
;;;; --- Test suite for Guile's SRFI-14 functions.
|
||||||
;;;; Martin Grabmueller, 2001-07-16
|
;;;; Martin Grabmueller, 2001-07-16
|
||||||
;;;;
|
;;;;
|
||||||
;;;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
|
;;;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
|
||||||
|
@ -29,6 +30,30 @@
|
||||||
(define exception:non-char-return
|
(define exception:non-char-return
|
||||||
(cons 'misc-error "returned non-char"))
|
(cons 'misc-error "returned non-char"))
|
||||||
|
|
||||||
|
|
||||||
|
(with-test-prefix "char set contents"
|
||||||
|
|
||||||
|
(pass-if "empty set"
|
||||||
|
(list= eqv?
|
||||||
|
(char-set->list (char-set))
|
||||||
|
'()))
|
||||||
|
|
||||||
|
(pass-if "single char"
|
||||||
|
(list= eqv?
|
||||||
|
(char-set->list (char-set #\a))
|
||||||
|
(list #\a)))
|
||||||
|
|
||||||
|
(pass-if "contiguous chars"
|
||||||
|
(list= eqv?
|
||||||
|
(char-set->list (char-set #\a #\b #\c))
|
||||||
|
(list #\a #\b #\c)))
|
||||||
|
|
||||||
|
(pass-if "discontiguous chars"
|
||||||
|
(list= eqv?
|
||||||
|
(char-set->list (char-set #\a #\c #\e))
|
||||||
|
(list #\a #\c #\e))))
|
||||||
|
|
||||||
|
|
||||||
(with-test-prefix "char-set?"
|
(with-test-prefix "char-set?"
|
||||||
|
|
||||||
(pass-if "success on empty set"
|
(pass-if "success on empty set"
|
||||||
|
@ -113,7 +138,7 @@
|
||||||
(with-test-prefix "char-set cursor"
|
(with-test-prefix "char-set cursor"
|
||||||
|
|
||||||
(pass-if-exception "invalid character cursor"
|
(pass-if-exception "invalid character cursor"
|
||||||
exception:invalid-char-set-cursor
|
exception:wrong-type-arg
|
||||||
(let* ((cs (char-set #\B #\r #\a #\z))
|
(let* ((cs (char-set #\B #\r #\a #\z))
|
||||||
(cc (char-set-cursor cs)))
|
(cc (char-set-cursor cs)))
|
||||||
(char-set-ref cs 1000)))
|
(char-set-ref cs 1000)))
|
||||||
|
@ -148,30 +173,33 @@
|
||||||
(= (char-set-size (char-set-fold (lambda (c cs) (char-set-adjoin cs c))
|
(= (char-set-size (char-set-fold (lambda (c cs) (char-set-adjoin cs c))
|
||||||
(char-set) (char-set #\a #\b))) 2)))
|
(char-set) (char-set #\a #\b))) 2)))
|
||||||
|
|
||||||
|
(define char-set:256
|
||||||
|
(string->char-set (apply string (map integer->char (iota 256)))))
|
||||||
|
|
||||||
(with-test-prefix "char-set-unfold"
|
(with-test-prefix "char-set-unfold"
|
||||||
|
|
||||||
(pass-if "create char set"
|
(pass-if "create char set"
|
||||||
(char-set= char-set:full
|
(char-set= char-set:256
|
||||||
(char-set-unfold (lambda (s) (= s 256)) integer->char
|
(char-set-unfold (lambda (s) (= s 256)) integer->char
|
||||||
(lambda (s) (+ s 1)) 0)))
|
(lambda (s) (+ s 1)) 0)))
|
||||||
(pass-if "create char set (base set)"
|
(pass-if "create char set (base set)"
|
||||||
(char-set= char-set:full
|
(char-set= char-set:256
|
||||||
(char-set-unfold (lambda (s) (= s 256)) integer->char
|
(char-set-unfold (lambda (s) (= s 256)) integer->char
|
||||||
(lambda (s) (+ s 1)) 0 char-set:empty))))
|
(lambda (s) (+ s 1)) 0 char-set:empty))))
|
||||||
|
|
||||||
(with-test-prefix "char-set-unfold!"
|
(with-test-prefix "char-set-unfold!"
|
||||||
|
|
||||||
(pass-if "create char set"
|
(pass-if "create char set"
|
||||||
(char-set= char-set:full
|
(char-set= char-set:256
|
||||||
(char-set-unfold! (lambda (s) (= s 256)) integer->char
|
(char-set-unfold! (lambda (s) (= s 256)) integer->char
|
||||||
(lambda (s) (+ s 1)) 0
|
(lambda (s) (+ s 1)) 0
|
||||||
(char-set-copy char-set:empty))))
|
(char-set-copy char-set:empty))))
|
||||||
|
|
||||||
(pass-if "create char set"
|
(pass-if "create char set"
|
||||||
(char-set= char-set:full
|
(char-set= char-set:256
|
||||||
(char-set-unfold! (lambda (s) (= s 32)) integer->char
|
(char-set-unfold! (lambda (s) (= s 32)) integer->char
|
||||||
(lambda (s) (+ s 1)) 0
|
(lambda (s) (+ s 1)) 0
|
||||||
(char-set-copy char-set:full)))))
|
(char-set-copy char-set:256)))))
|
||||||
|
|
||||||
|
|
||||||
(with-test-prefix "char-set-for-each"
|
(with-test-prefix "char-set-for-each"
|
||||||
|
@ -186,9 +214,15 @@
|
||||||
|
|
||||||
(with-test-prefix "char-set-map"
|
(with-test-prefix "char-set-map"
|
||||||
|
|
||||||
(pass-if "upper case char set"
|
(pass-if "upper case char set 1"
|
||||||
(char-set= (char-set-map char-upcase char-set:lower-case)
|
(char-set= (char-set-map char-upcase
|
||||||
char-set:upper-case)))
|
(string->char-set "abcdefghijklmnopqrstuvwxyz"))
|
||||||
|
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")))
|
||||||
|
|
||||||
|
(pass-if "upper case char set 2"
|
||||||
|
(char-set= (char-set-map char-upcase
|
||||||
|
(string->char-set "àáâãäåæçèéêëìíîïñòóôõöøùúûüýþ"))
|
||||||
|
(string->char-set "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝÞ"))))
|
||||||
|
|
||||||
(with-test-prefix "string->char-set"
|
(with-test-prefix "string->char-set"
|
||||||
|
|
||||||
|
@ -197,42 +231,107 @@
|
||||||
(char-set= (list->char-set chars)
|
(char-set= (list->char-set chars)
|
||||||
(string->char-set (apply string chars))))))
|
(string->char-set (apply string chars))))))
|
||||||
|
|
||||||
|
(with-test-prefix "char-set->string"
|
||||||
|
|
||||||
|
(pass-if "some char set"
|
||||||
|
(let ((cs (char-set #\g #\u #\i #\l #\e)))
|
||||||
|
(string=? (char-set->string cs)
|
||||||
|
"egilu"))))
|
||||||
|
|
||||||
;; Make sure we get an ASCII charset and character classification.
|
;; Make sure we get an ASCII charset and character classification.
|
||||||
(if (defined? 'setlocale) (setlocale LC_CTYPE "C"))
|
(if (defined? 'setlocale) (setlocale LC_CTYPE "C"))
|
||||||
|
|
||||||
(with-test-prefix "standard char sets (ASCII)"
|
(with-test-prefix "standard char sets (ASCII)"
|
||||||
|
|
||||||
|
(pass-if "char-set:lower-case"
|
||||||
|
(char-set<= (string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||||
|
char-set:lower-case))
|
||||||
|
|
||||||
|
(pass-if "char-set:upper-case"
|
||||||
|
(char-set<= (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
char-set:upper-case))
|
||||||
|
|
||||||
|
(pass-if "char-set:title-case"
|
||||||
|
(char-set<= (string->char-set "")
|
||||||
|
char-set:title-case))
|
||||||
|
|
||||||
(pass-if "char-set:letter"
|
(pass-if "char-set:letter"
|
||||||
(char-set= (string->char-set
|
(char-set<= (char-set-union
|
||||||
(string-append "abcdefghijklmnopqrstuvwxyz"
|
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
|
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
|
||||||
char-set:letter))
|
char-set:letter))
|
||||||
|
|
||||||
(pass-if "char-set:punctuation"
|
(pass-if "char-set:digit"
|
||||||
(char-set= (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
(char-set<= (string->char-set "0123456789")
|
||||||
char-set:punctuation))
|
char-set:digit))
|
||||||
|
|
||||||
(pass-if "char-set:symbol"
|
(pass-if "char-set:hex-digit"
|
||||||
(char-set= (string->char-set "$+<=>^`|~")
|
(char-set<= (string->char-set "0123456789abcdefABCDEF")
|
||||||
char-set:symbol))
|
char-set:hex-digit))
|
||||||
|
|
||||||
(pass-if "char-set:letter+digit"
|
(pass-if "char-set:letter+digit"
|
||||||
(char-set= char-set:letter+digit
|
(char-set<= (char-set-union
|
||||||
(char-set-union char-set:letter char-set:digit)))
|
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||||
|
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
(string->char-set "0123456789"))
|
||||||
|
char-set:letter+digit))
|
||||||
|
|
||||||
|
(pass-if "char-set:punctuation"
|
||||||
|
(char-set<= (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||||
|
char-set:punctuation))
|
||||||
|
|
||||||
|
(pass-if "char-set:symbol"
|
||||||
|
(char-set<= (string->char-set "$+<=>^`|~")
|
||||||
|
char-set:symbol))
|
||||||
|
|
||||||
(pass-if "char-set:graphic"
|
(pass-if "char-set:graphic"
|
||||||
(char-set= char-set:graphic
|
(char-set<= (char-set-union
|
||||||
(char-set-union char-set:letter char-set:digit
|
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||||
char-set:punctuation char-set:symbol)))
|
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
(string->char-set "0123456789")
|
||||||
|
(string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||||
|
(string->char-set "$+<=>^`|~"))
|
||||||
|
char-set:graphic))
|
||||||
|
|
||||||
|
(pass-if "char-set:whitespace"
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(string
|
||||||
|
(integer->char #x09)
|
||||||
|
(integer->char #x0a)
|
||||||
|
(integer->char #x0b)
|
||||||
|
(integer->char #x0c)
|
||||||
|
(integer->char #x0d)
|
||||||
|
(integer->char #x20)))
|
||||||
|
char-set:whitespace))
|
||||||
|
|
||||||
(pass-if "char-set:printing"
|
(pass-if "char-set:printing"
|
||||||
(char-set= char-set:printing
|
(char-set<= (char-set-union
|
||||||
(char-set-union char-set:whitespace char-set:graphic))))
|
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||||
|
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
(string->char-set "0123456789")
|
||||||
|
(string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||||
|
(string->char-set "$+<=>^`|~")
|
||||||
|
(string->char-set (string
|
||||||
|
(integer->char #x09)
|
||||||
|
(integer->char #x0a)
|
||||||
|
(integer->char #x0b)
|
||||||
|
(integer->char #x0c)
|
||||||
|
(integer->char #x0d)
|
||||||
|
(integer->char #x20))))
|
||||||
|
char-set:printing))
|
||||||
|
|
||||||
|
(pass-if "char-set:iso-control"
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(apply string
|
||||||
|
(map integer->char (append
|
||||||
|
;; U+0000 to U+001F
|
||||||
|
(iota #x20)
|
||||||
|
(list #x7f)))))
|
||||||
|
char-set:iso-control)))
|
||||||
|
|
||||||
|
|
||||||
;;;
|
;;;
|
||||||
;;; 8-bit charsets.
|
;;; Non-ASCII codepoints
|
||||||
;;;
|
;;;
|
||||||
;;; Here, we only test ISO-8859-1 (Latin-1), notably because behavior of
|
;;; Here, we only test ISO-8859-1 (Latin-1), notably because behavior of
|
||||||
;;; SRFI-14 for implementations supporting this charset is well-defined.
|
;;; SRFI-14 for implementations supporting this charset is well-defined.
|
||||||
|
@ -260,57 +359,123 @@
|
||||||
|
|
||||||
(with-test-prefix "Latin-1 (8-bit charset)"
|
(with-test-prefix "Latin-1 (8-bit charset)"
|
||||||
|
|
||||||
;; Note: the membership tests below are not exhaustive.
|
(pass-if "char-set:lower-case"
|
||||||
|
|
||||||
(pass-if "char-set:letter (membership)"
|
|
||||||
(if (not %latin1)
|
(if (not %latin1)
|
||||||
(throw 'unresolved)
|
(throw 'unresolved)
|
||||||
(let ((letters (char-set->list char-set:letter)))
|
(char-set<= (string->char-set
|
||||||
(every? (lambda (8-bit-char)
|
(string-append "abcdefghijklmnopqrstuvwxyz"
|
||||||
(memq 8-bit-char letters))
|
"µßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ")
|
||||||
(append '(#\a #\b #\c) ;; ASCII
|
char-set:lower-case))))
|
||||||
(string->list "çéèâùÉÀÈÊ") ;; French
|
|
||||||
(string->list "øñÑíßåæðþ"))))))
|
|
||||||
|
|
||||||
(pass-if "char-set:letter (size)"
|
(pass-if "char-set:upper-case"
|
||||||
|
(if (not %latin1)
|
||||||
|
(throw 'unresolved)
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(string-append "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ")
|
||||||
|
char-set:lower-case))))
|
||||||
|
|
||||||
|
(pass-if "char-set:title-case"
|
||||||
(if (not %latin1)
|
(if (not %latin1)
|
||||||
(throw 'unresolved)
|
(throw 'unresolved)
|
||||||
(= (char-set-size char-set:letter) 117)))
|
(char-set<= (string->char-set "")
|
||||||
|
char-set:title-case)))
|
||||||
|
|
||||||
(pass-if "char-set:lower-case (size)"
|
(pass-if "char-set:letter"
|
||||||
(if (not %latin1)
|
(if (not %latin1)
|
||||||
(throw 'unresolved)
|
(throw 'unresolved)
|
||||||
(= (char-set-size char-set:lower-case) (+ 26 33))))
|
(char-set<= (string->char-set
|
||||||
|
(string-append
|
||||||
|
;; Lowercase
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"µßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
||||||
|
;; Uppercase
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"
|
||||||
|
;; Uncased
|
||||||
|
"ªº"))
|
||||||
|
char-set:letter)))
|
||||||
|
|
||||||
(pass-if "char-set:upper-case (size)"
|
(pass-if "char-set:digit"
|
||||||
(if (not %latin1)
|
(if (not %latin1)
|
||||||
(throw 'unresolved)
|
(throw 'unresolved)
|
||||||
(= (char-set-size char-set:upper-case) (+ 26 30))))
|
(char-set<= (string->char-set "0123456789")
|
||||||
|
char-set:digit)))
|
||||||
|
|
||||||
(pass-if "char-set:punctuation (membership)"
|
(pass-if "char-set:hex-digit"
|
||||||
(if (not %latin1)
|
(if (not %latin1)
|
||||||
(throw 'unresolved)
|
(throw 'unresolved)
|
||||||
(let ((punctuation (char-set->list char-set:punctuation)))
|
(char-set<= (string->char-set "0123456789abcdefABCDEF")
|
||||||
(every? (lambda (8-bit-char)
|
char-set:hex-digit)))
|
||||||
(memq 8-bit-char punctuation))
|
|
||||||
(append '(#\! #\. #\?) ;; ASCII
|
|
||||||
(string->list "¡¿") ;; Castellano
|
|
||||||
(string->list "«»")))))) ;; French
|
|
||||||
|
|
||||||
(pass-if "char-set:letter+digit"
|
(pass-if "char-set:letter+digit"
|
||||||
(char-set= char-set:letter+digit
|
(if (not %latin1)
|
||||||
(char-set-union char-set:letter char-set:digit)))
|
(throw 'unresolved)
|
||||||
|
(char-set<= (char-set-union
|
||||||
|
char-set:letter
|
||||||
|
char-set:digit)
|
||||||
|
char-set:letter+digit)))
|
||||||
|
|
||||||
|
(pass-if "char-set:punctuation"
|
||||||
|
(if (not %latin1)
|
||||||
|
(throw 'unresolved)
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(string-append "!\"#%&'()*,-./:;?@[\\]_{}"
|
||||||
|
"¡«·»¿"))
|
||||||
|
char-set:punctuation)))
|
||||||
|
|
||||||
|
(pass-if "char-set:symbol"
|
||||||
|
(if (not %latin1)
|
||||||
|
(throw 'unresolved)
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(string-append "$+<=>^`|~"
|
||||||
|
"¢£¤¥¦§¨©¬®¯°±´¶¸×÷"))
|
||||||
|
char-set:symbol)))
|
||||||
|
|
||||||
|
;; Note that SRFI-14 itself is inconsistent here. Characters that
|
||||||
|
;; are non-digit numbers (such as category No) are clearly 'graphic'
|
||||||
|
;; but don't occur in the letter, digit, punct, or symbol charsets.
|
||||||
(pass-if "char-set:graphic"
|
(pass-if "char-set:graphic"
|
||||||
(char-set= char-set:graphic
|
(if (not %latin1)
|
||||||
(char-set-union char-set:letter char-set:digit
|
(throw 'unresolved)
|
||||||
char-set:punctuation char-set:symbol)))
|
(char-set<= (char-set-union
|
||||||
|
char-set:letter
|
||||||
|
char-set:digit
|
||||||
|
char-set:punctuation
|
||||||
|
char-set:symbol)
|
||||||
|
char-set:graphic)))
|
||||||
|
|
||||||
|
(pass-if "char-set:whitespace"
|
||||||
|
(if (not %latin1)
|
||||||
|
(throw 'unresolved)
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(string
|
||||||
|
(integer->char #x09)
|
||||||
|
(integer->char #x0a)
|
||||||
|
(integer->char #x0b)
|
||||||
|
(integer->char #x0c)
|
||||||
|
(integer->char #x0d)
|
||||||
|
(integer->char #x20)
|
||||||
|
(integer->char #xa0)))
|
||||||
|
char-set:whitespace)))
|
||||||
|
|
||||||
(pass-if "char-set:printing"
|
(pass-if "char-set:printing"
|
||||||
(char-set= char-set:printing
|
(if (not %latin1)
|
||||||
(char-set-union char-set:whitespace char-set:graphic))))
|
(throw 'unresolved)
|
||||||
|
(char-set<= (char-set-union char-set:graphic char-set:whitespace)
|
||||||
|
char-set:printing)))
|
||||||
|
|
||||||
|
(pass-if "char-set:iso-control"
|
||||||
|
(if (not %latin1)
|
||||||
|
(throw 'unresolved)
|
||||||
|
(char-set<= (string->char-set
|
||||||
|
(apply string
|
||||||
|
(map integer->char (append
|
||||||
|
;; U+0000 to U+001F
|
||||||
|
(iota #x20)
|
||||||
|
(list #x7f)
|
||||||
|
;; U+007F to U+009F
|
||||||
|
(map (lambda (x) (+ #x80 x))
|
||||||
|
(iota #x20))))))
|
||||||
|
char-set:iso-control))))
|
||||||
|
|
||||||
;; Local Variables:
|
|
||||||
;; mode: scheme
|
|
||||||
;; coding: latin-1
|
|
||||||
;; End:
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue