mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-26 13:10:22 +02:00
detect and consume byte-order marks for textual ports
* libguile/ports.h: * libguile/ports.c (scm_consume_byte_order_mark): New procedure. * libguile/fports.c (scm_open_file): Call consume-byte-order-mark if we are opening a file in "r" mode. * libguile/read.c (scm_i_scan_for_encoding): Don't do anything about byte-order marks. * libguile/load.c (scm_primitive_load): Add a note about the duplicate encoding scan. * test-suite/tests/filesys.test: Add tests for UTF-8, UTF-16BE, and UTF-16LE BOM handling.
This commit is contained in:
parent
a8fa310b04
commit
b2cb557d75
6 changed files with 169 additions and 30 deletions
|
@ -1,6 +1,6 @@
|
|||
;;;; filesys.test --- test file system functions -*- scheme -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2004, 2006 Free Software Foundation, Inc.
|
||||
;;;; Copyright (C) 2004, 2006, 2013 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -17,6 +17,8 @@
|
|||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-suite test-filesys)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (ice-9 binary-ports)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (test-suite guile-test))
|
||||
|
||||
|
@ -127,3 +129,58 @@
|
|||
|
||||
(delete-file (test-file))
|
||||
(delete-file (test-symlink))
|
||||
|
||||
(let ((s "\ufeffHello, world!"))
|
||||
(define* (test-encoding encoding #:optional (ambient "ISO-8859-1"))
|
||||
(with-fluids ((%default-port-encoding ambient))
|
||||
(let* ((bytes (catch 'misc-error
|
||||
(lambda ()
|
||||
(call-with-values open-bytevector-output-port
|
||||
(lambda (port get-bytevector)
|
||||
(set-port-encoding! port encoding)
|
||||
(display s port)
|
||||
(get-bytevector))))
|
||||
(lambda args
|
||||
(throw 'unresolved))))
|
||||
(name (string-copy "myfile-XXXXXX"))
|
||||
(port (mkstemp! name)))
|
||||
(put-bytevector port bytes)
|
||||
(close-port port)
|
||||
(let ((contents (call-with-input-file name read-string)))
|
||||
(delete-file name)
|
||||
contents))))
|
||||
|
||||
(pass-if "UTF-8"
|
||||
(equal? (test-encoding "UTF-8")
|
||||
"Hello, world!"))
|
||||
|
||||
(pass-if "UTF-16BE"
|
||||
(equal? (test-encoding "UTF-16BE")
|
||||
"Hello, world!"))
|
||||
|
||||
(pass-if "UTF-16LE"
|
||||
(equal? (test-encoding "UTF-16LE")
|
||||
"Hello, world!"))
|
||||
|
||||
(pass-if "UTF-8 (ambient)"
|
||||
(equal? (test-encoding "UTF-8" "UTF-8")
|
||||
"Hello, world!"))
|
||||
|
||||
(pass-if "UTF-8 (UTF-16 ambient)"
|
||||
(equal? (test-encoding "UTF-8" "UTF-16")
|
||||
"Hello, world!"))
|
||||
|
||||
;; Unicode 6.2 section 16.8:
|
||||
;;
|
||||
;; For compatibility with versions of the Unicode Standard prior to
|
||||
;; Version 3.2, the code point U+FEFF has the word-joining semantics
|
||||
;; of zero width no-break space when it is not used as a BOM. [...]
|
||||
;;
|
||||
;; Where the byte order is explicitly specified, such as in UTF-16BE
|
||||
;; or UTF-16LE, then all U+FEFF characters -- even at the very
|
||||
;; beginning of the text -- are to be interpreted as zero width
|
||||
;; no-break spaces.
|
||||
;;
|
||||
(pass-if "UTF-16LE (ambient)"
|
||||
(equal? (test-encoding "UTF-16LE" "UTF-16LE")
|
||||
"\ufeffHello, world!")))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue