guile/test-suite/tests/srfi-14.test

;;;; srfi-14.test          -*- mode:scheme; coding: iso-8859-1 -*-
;;;; --- Test suite for Guile's SRFI-14 functions.
;;;; Martin Grabmueller, 2001-07-16
;;;;
;;;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

(define-module (test-suite test-srfi-14)
  :use-module (srfi srfi-14)
  :use-module (srfi srfi-1) ;; `every'
  :use-module (test-suite lib))


(define exception:invalid-char-set-cursor
  (cons 'misc-error "^invalid character set cursor"))

(define exception:non-char-return
  (cons 'misc-error "returned non-char"))


(with-test-prefix "char set contents"

  (pass-if "empty set"
    (list= eqv?
           (char-set->list (char-set))
           '()))

  (pass-if "single char"
    (list= eqv?
           (char-set->list (char-set #\a))
           (list #\a)))

  (pass-if "contiguous chars"
    (list= eqv?
           (char-set->list (char-set #\a #\b #\c))
           (list #\a #\b #\c)))

  (pass-if "discontiguous chars"
    (list= eqv?
           (char-set->list (char-set #\a #\c #\e))
           (list #\a #\c #\e))))


(with-test-prefix "char-set?"

  (pass-if "success on empty set"
    (char-set? (char-set)))

  (pass-if "success on non-empty set"
    (char-set? char-set:printing))

  (pass-if "failure on empty set"
    (not (char-set? #t))))


(with-test-prefix "char-set="
  (pass-if "success, no arg"
    (char-set=))

  (pass-if "success, one arg"
    (char-set= char-set:lower-case))

  (pass-if "success, two args"
    (char-set= char-set:upper-case char-set:upper-case))

  (pass-if "failure, first empty"
    (not (char-set= (char-set) (char-set #\a))))

  (pass-if "failure, second empty"
    (not (char-set= (char-set #\a) (char-set))))

  (pass-if "success, more args"
    (char-set= char-set:blank char-set:blank char-set:blank)))

(with-test-prefix "char-set<="
  (pass-if "success, no arg"
    (char-set<=))

  (pass-if "success, one arg"
    (char-set<= char-set:lower-case))

  (pass-if "success, two args"
    (char-set<= char-set:upper-case char-set:upper-case))

  (pass-if "success, first empty"
    (char-set<= (char-set) (char-set #\a)))

  (pass-if "failure, second empty"
    (not (char-set<= (char-set #\a) (char-set))))

  (pass-if "success, more args, equal"
    (char-set<= char-set:blank char-set:blank char-set:blank))

  (pass-if "success, more args, not equal"
    (char-set<= char-set:blank
		(char-set-adjoin char-set:blank #\F)
		(char-set-adjoin char-set:blank #\F #\o))))

(with-test-prefix "char-set-hash"
   (pass-if "empty set, bound"
      (let ((h (char-set-hash char-set:empty 31)))
	(and h (number? h) (exact? h) (>= h 0) (< h 31))))

   (pass-if "empty set, no bound"
      (let ((h (char-set-hash char-set:empty)))
	(and h (number? h) (exact? h) (>= h 0))))

   (pass-if "full set, bound"
      (let ((h (char-set-hash char-set:full 31)))
	(and h (number? h) (exact? h) (>= h 0) (< h 31))))

   (pass-if "full set, no bound"
      (let ((h (char-set-hash char-set:full)))
	(and h (number? h) (exact? h) (>= h 0))))

   (pass-if "other set, bound"
      (let ((h (char-set-hash (char-set #\f #\o #\b #\a #\r) 31)))
	(and h (number? h) (exact? h) (>= h 0) (< h 31))))

   (pass-if "other set, no bound"
      (let ((h (char-set-hash (char-set #\f #\o #\b #\a #\r))))
	(and h (number? h) (exact? h) (>= h 0)))))


(with-test-prefix "char-set cursor"

  (pass-if-exception "invalid character cursor"
     exception:wrong-type-arg
     (let* ((cs (char-set #\B #\r #\a #\z))
	    (cc (char-set-cursor cs)))
       (char-set-ref cs 1000)))

  (pass-if "success"
     (let* ((cs (char-set #\B #\r #\a #\z))
	    (cc (char-set-cursor cs)))
       (char? (char-set-ref cs cc))))

  (pass-if "end of set fails"
     (let* ((cs (char-set #\a))
	    (cc (char-set-cursor cs)))
       (not (end-of-char-set? cc))))

  (pass-if "end of set succeeds, empty set"
     (let* ((cs (char-set))
	    (cc (char-set-cursor cs)))
       (end-of-char-set? cc)))

  (pass-if "end of set succeeds, non-empty set"
     (let* ((cs (char-set #\a))
	    (cc (char-set-cursor cs))
	    (cc (char-set-cursor-next cs cc)))
       (end-of-char-set? cc))))

(with-test-prefix "char-set-fold"

  (pass-if "count members"
     (= (char-set-fold (lambda (c n) (+ n 1)) 0 (char-set #\a #\b)) 2))

  (pass-if "copy set"
     (= (char-set-size (char-set-fold (lambda (c cs) (char-set-adjoin cs c))
				      (char-set) (char-set #\a #\b))) 2)))

(define char-set:256
  (string->char-set (apply string (map integer->char (iota 256)))))

(with-test-prefix "char-set-unfold"

  (pass-if "create char set"
     (char-set= char-set:256
		(char-set-unfold (lambda (s) (= s 256)) integer->char
				 (lambda (s) (+ s 1)) 0)))
  (pass-if "create char set (base set)"
     (char-set= char-set:256
		(char-set-unfold (lambda (s) (= s 256)) integer->char
				 (lambda (s) (+ s 1)) 0 char-set:empty))))

(with-test-prefix "char-set-unfold!"

  (pass-if "create char set"
     (char-set= char-set:256
		(char-set-unfold! (lambda (s) (= s 256)) integer->char
				 (lambda (s) (+ s 1)) 0
				 (char-set-copy char-set:empty))))

  (pass-if "create char set"
     (char-set= char-set:256
		(char-set-unfold! (lambda (s) (= s 32)) integer->char
				 (lambda (s) (+ s 1)) 0
				 (char-set-copy char-set:256)))))


(with-test-prefix "char-set-for-each"

  (pass-if "copy char set"
     (= (char-set-size (let ((cs (char-set)))
			 (char-set-for-each
			  (lambda (c) (char-set-adjoin! cs c))
			  (char-set #\a #\b))
			 cs))
	2)))

(with-test-prefix "char-set-map"

  (pass-if "upper case char set 1"
     (char-set= (char-set-map char-upcase
                              (string->char-set "abcdefghijklmnopqrstuvwxyz"))
                (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")))

  (pass-if "upper case char set 2"
     (char-set= (char-set-map char-upcase
                              (string->char-set "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"))
                (string->char-set "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"))))

(with-test-prefix "string->char-set"

  (pass-if "some char set"
     (let ((chars '(#\g #\u #\i #\l #\e)))
       (char-set= (list->char-set chars)
		  (string->char-set (apply string chars))))))

(with-test-prefix "char-set->string"

  (pass-if "some char set"
     (let ((cs (char-set #\g #\u #\i #\l #\e)))
       (string=? (char-set->string cs)
                 "egilu"))))

(with-test-prefix "standard char sets (ASCII)"

  (pass-if "char-set:lower-case"
     (char-set<= (string->char-set "abcdefghijklmnopqrstuvwxyz")
                 char-set:lower-case))

  (pass-if "char-set:upper-case"
     (char-set<= (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
                 char-set:upper-case))

  (pass-if "char-set:title-case"
     (char-set<= (string->char-set "")
                 char-set:title-case))

  (pass-if "char-set:letter"
     (char-set<= (char-set-union
                  (string->char-set "abcdefghijklmnopqrstuvwxyz")
                  (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
                 char-set:letter))

  (pass-if "char-set:digit"
     (char-set<= (string->char-set "0123456789")
                 char-set:digit))

  (pass-if "char-set:hex-digit"
     (char-set<= (string->char-set "0123456789abcdefABCDEF")
                 char-set:hex-digit))

  (pass-if "char-set:letter+digit"
     (char-set<= (char-set-union
                  (string->char-set "abcdefghijklmnopqrstuvwxyz")
                  (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
                  (string->char-set "0123456789"))
                 char-set:letter+digit))

  (pass-if "char-set:punctuation"
     (char-set<= (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
                 char-set:punctuation))

  (pass-if "char-set:symbol"
     (char-set<= (string->char-set "$+<=>^`|~")
                 char-set:symbol))

  (pass-if "char-set:graphic"
     (char-set<= (char-set-union
                  (string->char-set "abcdefghijklmnopqrstuvwxyz")
                  (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
                  (string->char-set "0123456789")
                  (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
                  (string->char-set "$+<=>^`|~"))
                 char-set:graphic))

  (pass-if "char-set:whitespace"
     (char-set<= (string->char-set
                  (string
                   (integer->char #x09)
                   (integer->char #x0a)
                   (integer->char #x0b)
                   (integer->char #x0c)
                   (integer->char #x0d)
                   (integer->char #x20)))
                 char-set:whitespace))

  (pass-if "char-set:printing"
     (char-set<= (char-set-union
                  (string->char-set "abcdefghijklmnopqrstuvwxyz")
                  (string->char-set "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
                  (string->char-set "0123456789")
                  (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
                  (string->char-set "$+<=>^`|~")
                  (string->char-set (string
                                     (integer->char #x09)
                                     (integer->char #x0a)
                                     (integer->char #x0b)
                                     (integer->char #x0c)
                                     (integer->char #x0d)
                                     (integer->char #x20))))
                 char-set:printing))

  (pass-if "char-set:iso-control"
     (char-set<= (string->char-set
                  (apply string
                         (map integer->char (append
                                             ;; U+0000 to U+001F
                                             (iota #x20)
                                             (list #x7f)))))
                 char-set:iso-control)))


;;;
;;; Non-ASCII codepoints
;;;
;;; Here, we only test ISO-8859-1 (Latin-1), notably because behavior of
;;; SRFI-14 for implementations supporting this charset is well-defined.
;;;

(define (every? pred lst)
  (not (not (every pred lst))))

(define oldlocale #f)
(if (defined? 'setlocale)
    (set! oldlocale (setlocale LC_ALL "")))

(with-test-prefix "Latin-1 (8-bit charset)"

  (pass-if "char-set:lower-case"
    (char-set<= (string->char-set
                 (string-append "abcdefghijklmnopqrstuvwxyz"
                                "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>")
                 char-set:lower-case)))

  (pass-if "char-set:upper-case"
    (char-set<= (string->char-set
                 (string-append "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>")
                 char-set:lower-case)))

  (pass-if "char-set:title-case"
    (char-set<= (string->char-set "")
                char-set:title-case))

  (pass-if "char-set:letter"
    (char-set<= (string->char-set
                 (string-append
                  ;; Lowercase
                  "abcdefghijklmnopqrstuvwxyz"
                  "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
                  ;; Uppercase
                  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                  "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
                  ;; Uncased
                  "<22><>"))
                char-set:letter))

  (pass-if "char-set:digit"
    (char-set<= (string->char-set "0123456789")
                char-set:digit))

  (pass-if "char-set:hex-digit"
    (char-set<= (string->char-set "0123456789abcdefABCDEF")
                char-set:hex-digit))

  (pass-if "char-set:letter+digit"
    (char-set<= (char-set-union
                 char-set:letter
                 char-set:digit)
                char-set:letter+digit))

  (pass-if "char-set:punctuation"
    (char-set<= (string->char-set
                 (string-append "!\"#%&'()*,-./:;?@[\\]_{}"
                                "<22><><EFBFBD><EFBFBD><EFBFBD>"))
                char-set:punctuation))

  (pass-if "char-set:symbol"
    (char-set<= (string->char-set
                 (string-append "$+<=>^`|~"
                                "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"))
                char-set:symbol))

  ;; Note that SRFI-14 itself is inconsistent here.  Characters that
  ;; are non-digit numbers (such as category No) are clearly 'graphic'
  ;; but don't occur in the letter, digit, punct, or symbol charsets.
  (pass-if "char-set:graphic"
    (char-set<= (char-set-union
                 char-set:letter
                 char-set:digit
                 char-set:punctuation
                 char-set:symbol)
                char-set:graphic))

  (pass-if "char-set:whitespace"
    (char-set<= (string->char-set
                 (string
                  (integer->char #x09)
                  (integer->char #x0a)
                  (integer->char #x0b)
                  (integer->char #x0c)
                  (integer->char #x0d)
                  (integer->char #x20)
                  (integer->char #xa0)))
                char-set:whitespace))

  (pass-if "char-set:printing"
    (char-set<= (char-set-union char-set:graphic char-set:whitespace)
                char-set:printing))

  (pass-if "char-set:iso-control"
    (char-set<= (string->char-set
                 (apply string
                        (map integer->char (append
                                            ;; U+0000 to U+001F
                                            (iota #x20)
                                            (list #x7f)
                                            ;; U+007F to U+009F
                                            (map (lambda (x) (+ #x80 x))
                                                 (iota #x20))))))
                char-set:iso-control)))

(if (defined? 'setlocale)
    (setlocale LC_ALL oldlocale))