mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-09 07:00:23 +02:00
Unicode-capable srfi-14 charsets
* libguile/Makefile.am: distribute new files srfi-14.i.c and unidata_to_charset.pl * chars.c (scm_c_upcase, scm_c_downcase): use unicode-enable toupper and tolower * libguile/srfi-14.h (scm_t_char_range, scm_t_char_set): new structures to describe char-sets (scm_t_char_set_cursor): new structure to describe char-set-cursors (SCM_BITS_PER_LONG): removed (SCM_CHARSET_GET): calls function New declarations for scm_i_charset_get, scm_i_charset_set, scm_i_charset_unset, and scm_debug_char_set. * test-suite/tests/srfi-14.test: new tests * libguile/srfi-14.c (SCM_CHARSET_DATA): new macro (SCM_CHARSET_SET, SCM_CHARSET_UNSET): call function (BYTES_PER_CHARSET, LONGS_PER_CHARSET): removed (scm_i_charset_get, scm_i_charset_set, scm_i_charset_unset) (charsets_equal, charsets_leq, charsets_union) (charsets_intersection, charsets_complement, charsets_xor): new functions that are low-level charset operators (charset_print, charset_free): modified for new charset struct (charset_cursor_print, charset_cursor_free): new function (make_char_set, scm_char_set_p, scm_char_set_eq, scm_car_set_leq) (scm_char_set_hash, scm_char_set_cursor, scm_char_set_ref) (scm_char_set_cursor_next, scm_end_of_char_set_p, scm_char_set_fold) (scm_char_set_unfold, scm_char_set_unfold_x, scm_char_set_for_each) (scm_char_set_map, scm_char_set_copy, scm_char_set, scm_list_to_char_set) (scm_list_to_char_set_x, scm_string_to_char_set, scm_string_to_char_set_x) (scm_char_set_filter, scm_char_set_filter_x, scm_ucs_range_to_char_set) (scm_ucs_range_to_char_set_x, scm_to_char_set, scm_char_set_size) (scm_char_set_count, scm_char_set_to_list, scm_char_set_to_string) (scm_char_set_contains_p, scm_char_set_every, scm_char_set_any) (scm_char_set_adjoin, scm_char_set_delete, scm_char_set_adjoin_x) (scm_char_set_delete_x, scm_char_set_complement, scm_char_set_union) (scm_char_set_intersection, scm_char_set_difference, scm_char_set_xor) (scm_char_set_diff_plus_intersection, scm_char_set_complement_x) (scm_char_set_union_x, scm_char_set_intersection_x, scm_char_set_difference_x) (scm_char_set_xor_x, scm_char_set_diff_plus_intersection_x): modified to use new charset and charset-cursor data structures (CSET_BLANK_PRED, CSET_SYMBOL_PRED, CSET_PUNCT_PRED, CSET_LOWER_PRED) (CSET_UPPER_PRED, CSET_LETTER_PRED, CSET_DIGIT_PRED, CSET_WHITESPACE_PRED) (CSET_CONTROL_PRED, CSET_HEX_DIGIT_PRED, CSET_ASCII_PRED, CSET_LETTER_PRED) (CSET_LETTER_AND_DIGIT_PRED, CSET_PRINTING_PRED, CSET_TRUE_PRED) (CSET_FALSE_PRED): removed (scm_srfi_14_compute_char_sets): removed - too slow to iterate over all of unicode at startup (scm_debug_char_set) [SCM_CHARSET_DEBUG]: new function
This commit is contained in:
parent
3bcf189ba0
commit
f49dbcadf3
5 changed files with 1234 additions and 630 deletions
|
@ -446,7 +446,7 @@ install-exec-hook:
|
|||
## working.
|
||||
noinst_HEADERS = conv-integer.i.c conv-uinteger.i.c \
|
||||
eval.i.c ieee-754.h \
|
||||
srfi-4.i.c \
|
||||
srfi-4.i.c srfi-14.i.c \
|
||||
quicksort.i.c \
|
||||
win32-uname.h win32-dirent.h win32-socket.h \
|
||||
private-gc.h private-options.h
|
||||
|
@ -597,7 +597,7 @@ EXTRA_DIST = ChangeLog-scm ChangeLog-threads \
|
|||
cpp_errno.c cpp_err_symbols.in cpp_err_symbols.c \
|
||||
cpp_sig_symbols.c cpp_sig_symbols.in cpp_cnvt.awk \
|
||||
c-tokenize.lex version.h.in \
|
||||
scmconfig.h.top libgettext.h libguile.map
|
||||
scmconfig.h.top libgettext.h unidata_to_charset.pl libguile.map
|
||||
# $(DOT_DOC_FILES) $(EXTRA_DOT_DOC_FILES) \
|
||||
# guile-procedures.txt guile.texi
|
||||
|
||||
|
|
|
@ -296,20 +296,14 @@ TODO: change name to scm_i_.. ? --hwn
|
|||
scm_t_wchar
|
||||
scm_c_upcase (scm_t_wchar c)
|
||||
{
|
||||
if (c > 255)
|
||||
return c;
|
||||
|
||||
return toupper ((int) c);
|
||||
return uc_toupper ((int) c);
|
||||
}
|
||||
|
||||
|
||||
scm_t_wchar
|
||||
scm_c_downcase (scm_t_wchar c)
|
||||
{
|
||||
if (c > 255)
|
||||
return c;
|
||||
|
||||
return tolower ((int) c);
|
||||
return uc_tolower ((int) c);
|
||||
}
|
||||
|
||||
|
||||
|
|
1351
libguile/srfi-14.c
1351
libguile/srfi-14.c
File diff suppressed because it is too large
Load diff
|
@ -24,22 +24,34 @@
|
|||
|
||||
#include "libguile/__scm.h"
|
||||
|
||||
#define SCM_CHARSET_SIZE 256
|
||||
typedef struct
|
||||
{
|
||||
scm_t_wchar lo;
|
||||
scm_t_wchar hi;
|
||||
} scm_t_char_range;
|
||||
|
||||
/* We expect 8-bit bytes here. Should be no problem in the year
|
||||
2001. */
|
||||
#ifndef SCM_BITS_PER_LONG
|
||||
# define SCM_BITS_PER_LONG (sizeof (long) * 8)
|
||||
#endif
|
||||
typedef struct
|
||||
{
|
||||
size_t len;
|
||||
scm_t_char_range *ranges;
|
||||
} scm_t_char_set;
|
||||
|
||||
#define SCM_CHARSET_GET(cs, idx) (((long *) SCM_SMOB_DATA (cs))\
|
||||
[((unsigned char) (idx)) / SCM_BITS_PER_LONG] &\
|
||||
(1L << (((unsigned char) (idx)) % SCM_BITS_PER_LONG)))
|
||||
typedef struct
|
||||
{
|
||||
size_t range;
|
||||
scm_t_wchar n;
|
||||
} scm_t_char_set_cursor;
|
||||
|
||||
#define SCM_CHARSET_GET(cs,idx) \
|
||||
scm_i_charset_get((scm_t_char_set *)SCM_SMOB_DATA(cs),idx)
|
||||
|
||||
#define SCM_CHARSETP(x) (!SCM_IMP (x) && (SCM_TYP16 (x) == scm_tc16_charset))
|
||||
|
||||
/* Smob type code for character sets. */
|
||||
SCM_API int scm_tc16_charset;
|
||||
SCM_INTERNAL int scm_i_charset_get (scm_t_char_set *cs, scm_t_wchar n);
|
||||
SCM_INTERNAL void scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n);
|
||||
SCM_INTERNAL void scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n);
|
||||
|
||||
SCM_API SCM scm_char_set_p (SCM obj);
|
||||
SCM_API SCM scm_char_set_eq (SCM char_sets);
|
||||
|
@ -88,6 +100,9 @@ SCM_API SCM scm_char_set_intersection_x (SCM cs1, SCM rest);
|
|||
SCM_API SCM scm_char_set_difference_x (SCM cs1, SCM rest);
|
||||
SCM_API SCM scm_char_set_xor_x (SCM cs1, SCM rest);
|
||||
SCM_API SCM scm_char_set_diff_plus_intersection_x (SCM cs1, SCM cs2, SCM rest);
|
||||
#if SCM_CHARSET_DEBUG
|
||||
SCM_API SCM scm_debug_char_set (SCM cs);
|
||||
#endif
|
||||
|
||||
SCM_API SCM scm_char_set_lower_case;
|
||||
SCM_API SCM scm_char_set_upper_case;
|
||||
|
@ -107,7 +122,6 @@ SCM_API SCM scm_char_set_ascii;
|
|||
SCM_API SCM scm_char_set_empty;
|
||||
SCM_API SCM scm_char_set_full;
|
||||
|
||||
SCM_INTERNAL void scm_srfi_14_compute_char_sets (void);
|
||||
SCM_INTERNAL void scm_init_srfi_14 (void);
|
||||
|
||||
#endif /* SCM_SRFI_14_H */
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
;;;; srfi-14.test --- Test suite for Guile's SRFI-14 functions.
|
||||
;;;; srfi-14.test -*- mode:scheme; coding: iso-8859-1 -*-
|
||||
;;;; --- Test suite for Guile's SRFI-14 functions.
|
||||
;;;; Martin Grabmueller, 2001-07-16
|
||||
;;;;
|
||||
;;;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
|
||||
|
@ -29,6 +30,30 @@
|
|||
(define exception:non-char-return
|
||||
(cons 'misc-error "returned non-char"))
|
||||
|
||||
|
||||
(with-test-prefix "char set contents"
|
||||
|
||||
(pass-if "empty set"
|
||||
(list= eqv?
|
||||
(char-set->list (char-set))
|
||||
'()))
|
||||
|
||||
(pass-if "single char"
|
||||
(list= eqv?
|
||||
(char-set->list (char-set #\a))
|
||||
(list #\a)))
|
||||
|
||||
(pass-if "contiguous chars"
|
||||
(list= eqv?
|
||||
(char-set->list (char-set #\a #\b #\c))
|
||||
(list #\a #\b #\c)))
|
||||
|
||||
(pass-if "discontiguous chars"
|
||||
(list= eqv?
|
||||
(char-set->list (char-set #\a #\c #\e))
|
||||
(list #\a #\c #\e))))
|
||||
|
||||
|
||||
(with-test-prefix "char-set?"
|
||||
|
||||
(pass-if "success on empty set"
|
||||
|
@ -113,7 +138,7 @@
|
|||
(with-test-prefix "char-set cursor"
|
||||
|
||||
(pass-if-exception "invalid character cursor"
|
||||
exception:invalid-char-set-cursor
|
||||
exception:wrong-type-arg
|
||||
(let* ((cs (char-set #\B #\r #\a #\z))
|
||||
(cc (char-set-cursor cs)))
|
||||
(char-set-ref cs 1000)))
|
||||
|
@ -148,30 +173,33 @@
|
|||
(= (char-set-size (char-set-fold (lambda (c cs) (char-set-adjoin cs c))
|
||||
(char-set) (char-set #\a #\b))) 2)))
|
||||
|
||||
(define char-set:256
|
||||
(string->char-set (apply string (map integer->char (iota 256)))))
|
||||
|
||||
(with-test-prefix "char-set-unfold"
|
||||
|
||||
(pass-if "create char set"
|
||||
(char-set= char-set:full
|
||||
(char-set= char-set:256
|
||||
(char-set-unfold (lambda (s) (= s 256)) integer->char
|
||||
(lambda (s) (+ s 1)) 0)))
|
||||
(pass-if "create char set (base set)"
|
||||
(char-set= char-set:full
|
||||
(char-set= char-set:256
|
||||
(char-set-unfold (lambda (s) (= s 256)) integer->char
|
||||
(lambda (s) (+ s 1)) 0 char-set:empty))))
|
||||
|
||||
(with-test-prefix "char-set-unfold!"
|
||||
|
||||
(pass-if "create char set"
|
||||
(char-set= char-set:full
|
||||
(char-set= char-set:256
|
||||
(char-set-unfold! (lambda (s) (= s 256)) integer->char
|
||||
(lambda (s) (+ s 1)) 0
|
||||
(char-set-copy char-set:empty))))
|
||||
|
||||
(pass-if "create char set"
|
||||
(char-set= char-set:full
|
||||
(char-set= char-set:256
|
||||
(char-set-unfold! (lambda (s) (= s 32)) integer->char
|
||||
(lambda (s) (+ s 1)) 0
|
||||
(char-set-copy char-set:full)))))
|
||||
(char-set-copy char-set:256)))))
|
||||
|
||||
|
||||
(with-test-prefix "char-set-for-each"
|
||||
|
@ -186,9 +214,15 @@
|
|||
|
||||
(with-test-prefix "char-set-map"
|
||||
|
||||
(pass-if "upper case char set"
|
||||
(char-set= (char-set-map char-upcase char-set:lower-case)
|
||||
char-set:upper-case)))
|
||||
(pass-if "upper case char set 1"
|
||||
(char-set= (char-set-map char-upcase
|
||||
(string->char-set "abcdefghijklmnopqrstuvwxyz"))
|
||||
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")))
|
||||
|
||||
(pass-if "upper case char set 2"
|
||||
(char-set= (char-set-map char-upcase
|
||||
(string->char-set "àáâãäåæçèéêëìíîïñòóôõöøùúûüýþ"))
|
||||
(string->char-set "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝÞ"))))
|
||||
|
||||
(with-test-prefix "string->char-set"
|
||||
|
||||
|
@ -197,42 +231,107 @@
|
|||
(char-set= (list->char-set chars)
|
||||
(string->char-set (apply string chars))))))
|
||||
|
||||
(with-test-prefix "char-set->string"
|
||||
|
||||
(pass-if "some char set"
|
||||
(let ((cs (char-set #\g #\u #\i #\l #\e)))
|
||||
(string=? (char-set->string cs)
|
||||
"egilu"))))
|
||||
|
||||
;; Make sure we get an ASCII charset and character classification.
|
||||
(if (defined? 'setlocale) (setlocale LC_CTYPE "C"))
|
||||
|
||||
(with-test-prefix "standard char sets (ASCII)"
|
||||
|
||||
(pass-if "char-set:lower-case"
|
||||
(char-set<= (string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||
char-set:lower-case))
|
||||
|
||||
(pass-if "char-set:upper-case"
|
||||
(char-set<= (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
char-set:upper-case))
|
||||
|
||||
(pass-if "char-set:title-case"
|
||||
(char-set<= (string->char-set "")
|
||||
char-set:title-case))
|
||||
|
||||
(pass-if "char-set:letter"
|
||||
(char-set= (string->char-set
|
||||
(string-append "abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
|
||||
(char-set<= (char-set-union
|
||||
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
|
||||
char-set:letter))
|
||||
|
||||
(pass-if "char-set:digit"
|
||||
(char-set<= (string->char-set "0123456789")
|
||||
char-set:digit))
|
||||
|
||||
(pass-if "char-set:hex-digit"
|
||||
(char-set<= (string->char-set "0123456789abcdefABCDEF")
|
||||
char-set:hex-digit))
|
||||
|
||||
(pass-if "char-set:letter+digit"
|
||||
(char-set<= (char-set-union
|
||||
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
(string->char-set "0123456789"))
|
||||
char-set:letter+digit))
|
||||
|
||||
(pass-if "char-set:punctuation"
|
||||
(char-set= (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||
(char-set<= (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||
char-set:punctuation))
|
||||
|
||||
(pass-if "char-set:symbol"
|
||||
(char-set= (string->char-set "$+<=>^`|~")
|
||||
(char-set<= (string->char-set "$+<=>^`|~")
|
||||
char-set:symbol))
|
||||
|
||||
(pass-if "char-set:letter+digit"
|
||||
(char-set= char-set:letter+digit
|
||||
(char-set-union char-set:letter char-set:digit)))
|
||||
|
||||
(pass-if "char-set:graphic"
|
||||
(char-set= char-set:graphic
|
||||
(char-set-union char-set:letter char-set:digit
|
||||
char-set:punctuation char-set:symbol)))
|
||||
(char-set<= (char-set-union
|
||||
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
(string->char-set "0123456789")
|
||||
(string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||
(string->char-set "$+<=>^`|~"))
|
||||
char-set:graphic))
|
||||
|
||||
(pass-if "char-set:whitespace"
|
||||
(char-set<= (string->char-set
|
||||
(string
|
||||
(integer->char #x09)
|
||||
(integer->char #x0a)
|
||||
(integer->char #x0b)
|
||||
(integer->char #x0c)
|
||||
(integer->char #x0d)
|
||||
(integer->char #x20)))
|
||||
char-set:whitespace))
|
||||
|
||||
(pass-if "char-set:printing"
|
||||
(char-set= char-set:printing
|
||||
(char-set-union char-set:whitespace char-set:graphic))))
|
||||
(char-set<= (char-set-union
|
||||
(string->char-set "abcdefghijklmnopqrstuvwxyz")
|
||||
(string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
(string->char-set "0123456789")
|
||||
(string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
|
||||
(string->char-set "$+<=>^`|~")
|
||||
(string->char-set (string
|
||||
(integer->char #x09)
|
||||
(integer->char #x0a)
|
||||
(integer->char #x0b)
|
||||
(integer->char #x0c)
|
||||
(integer->char #x0d)
|
||||
(integer->char #x20))))
|
||||
char-set:printing))
|
||||
|
||||
(pass-if "char-set:iso-control"
|
||||
(char-set<= (string->char-set
|
||||
(apply string
|
||||
(map integer->char (append
|
||||
;; U+0000 to U+001F
|
||||
(iota #x20)
|
||||
(list #x7f)))))
|
||||
char-set:iso-control)))
|
||||
|
||||
|
||||
;;;
|
||||
;;; 8-bit charsets.
|
||||
;;; Non-ASCII codepoints
|
||||
;;;
|
||||
;;; Here, we only test ISO-8859-1 (Latin-1), notably because behavior of
|
||||
;;; SRFI-14 for implementations supporting this charset is well-defined.
|
||||
|
@ -260,57 +359,123 @@
|
|||
|
||||
(with-test-prefix "Latin-1 (8-bit charset)"
|
||||
|
||||
;; Note: the membership tests below are not exhaustive.
|
||||
|
||||
(pass-if "char-set:letter (membership)"
|
||||
(pass-if "char-set:lower-case"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(let ((letters (char-set->list char-set:letter)))
|
||||
(every? (lambda (8-bit-char)
|
||||
(memq 8-bit-char letters))
|
||||
(append '(#\a #\b #\c) ;; ASCII
|
||||
(string->list "çéèâùÉÀÈÊ") ;; French
|
||||
(string->list "øñÑíßåæðþ"))))))
|
||||
(char-set<= (string->char-set
|
||||
(string-append "abcdefghijklmnopqrstuvwxyz"
|
||||
"µßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ")
|
||||
char-set:lower-case))))
|
||||
|
||||
(pass-if "char-set:letter (size)"
|
||||
(pass-if "char-set:upper-case"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(= (char-set-size char-set:letter) 117)))
|
||||
(char-set<= (string->char-set
|
||||
(string-append "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ")
|
||||
char-set:lower-case))))
|
||||
|
||||
(pass-if "char-set:lower-case (size)"
|
||||
(pass-if "char-set:title-case"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(= (char-set-size char-set:lower-case) (+ 26 33))))
|
||||
(char-set<= (string->char-set "")
|
||||
char-set:title-case)))
|
||||
|
||||
(pass-if "char-set:upper-case (size)"
|
||||
(pass-if "char-set:letter"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(= (char-set-size char-set:upper-case) (+ 26 30))))
|
||||
(char-set<= (string->char-set
|
||||
(string-append
|
||||
;; Lowercase
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"µßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
||||
;; Uppercase
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"
|
||||
;; Uncased
|
||||
"ªº"))
|
||||
char-set:letter)))
|
||||
|
||||
(pass-if "char-set:punctuation (membership)"
|
||||
(pass-if "char-set:digit"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(let ((punctuation (char-set->list char-set:punctuation)))
|
||||
(every? (lambda (8-bit-char)
|
||||
(memq 8-bit-char punctuation))
|
||||
(append '(#\! #\. #\?) ;; ASCII
|
||||
(string->list "¡¿") ;; Castellano
|
||||
(string->list "«»")))))) ;; French
|
||||
(char-set<= (string->char-set "0123456789")
|
||||
char-set:digit)))
|
||||
|
||||
(pass-if "char-set:hex-digit"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (string->char-set "0123456789abcdefABCDEF")
|
||||
char-set:hex-digit)))
|
||||
|
||||
(pass-if "char-set:letter+digit"
|
||||
(char-set= char-set:letter+digit
|
||||
(char-set-union char-set:letter char-set:digit)))
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (char-set-union
|
||||
char-set:letter
|
||||
char-set:digit)
|
||||
char-set:letter+digit)))
|
||||
|
||||
(pass-if "char-set:punctuation"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (string->char-set
|
||||
(string-append "!\"#%&'()*,-./:;?@[\\]_{}"
|
||||
"¡«·»¿"))
|
||||
char-set:punctuation)))
|
||||
|
||||
(pass-if "char-set:symbol"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (string->char-set
|
||||
(string-append "$+<=>^`|~"
|
||||
"¢£¤¥¦§¨©¬®¯°±´¶¸×÷"))
|
||||
char-set:symbol)))
|
||||
|
||||
;; Note that SRFI-14 itself is inconsistent here. Characters that
|
||||
;; are non-digit numbers (such as category No) are clearly 'graphic'
|
||||
;; but don't occur in the letter, digit, punct, or symbol charsets.
|
||||
(pass-if "char-set:graphic"
|
||||
(char-set= char-set:graphic
|
||||
(char-set-union char-set:letter char-set:digit
|
||||
char-set:punctuation char-set:symbol)))
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (char-set-union
|
||||
char-set:letter
|
||||
char-set:digit
|
||||
char-set:punctuation
|
||||
char-set:symbol)
|
||||
char-set:graphic)))
|
||||
|
||||
(pass-if "char-set:whitespace"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (string->char-set
|
||||
(string
|
||||
(integer->char #x09)
|
||||
(integer->char #x0a)
|
||||
(integer->char #x0b)
|
||||
(integer->char #x0c)
|
||||
(integer->char #x0d)
|
||||
(integer->char #x20)
|
||||
(integer->char #xa0)))
|
||||
char-set:whitespace)))
|
||||
|
||||
(pass-if "char-set:printing"
|
||||
(char-set= char-set:printing
|
||||
(char-set-union char-set:whitespace char-set:graphic))))
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (char-set-union char-set:graphic char-set:whitespace)
|
||||
char-set:printing)))
|
||||
|
||||
(pass-if "char-set:iso-control"
|
||||
(if (not %latin1)
|
||||
(throw 'unresolved)
|
||||
(char-set<= (string->char-set
|
||||
(apply string
|
||||
(map integer->char (append
|
||||
;; U+0000 to U+001F
|
||||
(iota #x20)
|
||||
(list #x7f)
|
||||
;; U+007F to U+009F
|
||||
(map (lambda (x) (+ #x80 x))
|
||||
(iota #x20))))))
|
||||
char-set:iso-control))))
|
||||
|
||||
;; Local Variables:
|
||||
;; mode: scheme
|
||||
;; coding: latin-1
|
||||
;; End:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue