1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-27 21:40:34 +02:00

String ports use UTF-8; ignore %default-port-encoding.

* libguile/strports.c (scm_mkstrport): Use UTF-8; ignore
  %default-port-encoding.  Rename 'str_len' and 'c_pos' to
  'num_bytes' and 'c_byte_pos'.  Interpret 'pos' argument
  as a character index instead of a byte index.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-6 to the
  list of core features.

* module/srfi/srfi-6.scm (open-input-string, open-output-string): Simply
  re-export these, since the core versions are now compliant.

* doc/ref/api-io.texi (String Ports): Remove text that describes
  non-compliant behavior of string ports with regard to encoding.

* doc/ref/srfi-modules.texi (SRFI-0): Add srfi-6 to the list of
  core features.
  (SRFI-6): Remove text that mentions non-compliant behavior of
  core string ports.

* module/ice-9/format.scm (format):
* module/ice-9/pretty-print.scm (truncated-print):
* module/rnrs/io/ports.scm (open-string-input-port,
  open-string-output-port):
* test-suite/test-suite/lib.scm (format-test-name):
* test-suite/tests/chars.test ("combining accent is pretty-printed",
  "combining X is pretty-printed"):
* test-suite/tests/ecmascript.test (eread, eread/1):
* test-suite/tests/rdelim.test:
* test-suite/tests/reader.test (read-string):
* test-suite/tests/regexp.test:
* test-suite/tests/srfi-105.test (read-string): Don't set
  %default-port-encoding before creating string ports.

* benchmark-suite/benchmarks/ports.bm (%latin1-port): Use
  'set-port-encoding!' to set the string port encoding.
  (%utf8/ascii-port, %utf8/wide-port, "rdelim"): Don't set
  %default-port-encoding before creating string ports.

* test-suite/tests/r6rs-ports.test ("lookahead-u8 non-ASCII"): Don't set
  %default-port-encoding before creating string ports.
  ("put-bytevector with UTF-16 string port", "put-bytevector with
  wrong-encoding string port"): Use 'set-port-encoding!' to set the
  string port encoding.

* test-suite/tests/print.test (tprint): Use 'set-port-encoding!' to set
  the string port encoding.
  ("truncated-print"): Use 'pass-if-equal'.

* test-suite/tests/ports.test ("encoding failure leads to exception",
  "%default-port-encoding is honored", "peek-char [latin-1]", "peek-char
  [utf-8]", "peek-char [utf-16]"): Remove tests.
  ("%default-port-encoding is ignored", "peek-char"): Add tests.
  ("suitable encoding [latin-1]", "suitable encoding [latin-3]",
  "wrong encoding, error", "wrong encoding, substitute",
  "wrong encoding, escape"): Use 'set-port-encoding!' to set the
  string port encoding.
  ("%default-port-encoding, wrong encoding"): Rewrite to use
  a file port instead of a string port.
This commit is contained in:
Mark H Weaver 2013-08-07 00:46:34 -04:00
parent d8d7c7bf57
commit 6dce942c46
19 changed files with 468 additions and 573 deletions

View file

@ -1,6 +1,6 @@
;;; ports.bm --- Port I/O. -*- mode: scheme; coding: utf-8; -*- ;;; ports.bm --- Port I/O. -*- mode: scheme; coding: utf-8; -*-
;;; ;;;
;;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. ;;; Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
;;; ;;;
;;; This program is free software; you can redistribute it and/or ;;; This program is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License ;;; modify it under the terms of the GNU Lesser General Public License
@ -34,16 +34,15 @@
(string-concatenate (make-list (* iteration-factor 10000) s))) (string-concatenate (make-list (* iteration-factor 10000) s)))
(define %latin1-port (define %latin1-port
(with-fluids ((%default-port-encoding #f)) (let ((p (open-input-string (large-string "hello, world"))))
(open-input-string (large-string "hello, world")))) (set-port-encoding! p "ISO-8859-1")
p))
(define %utf8/ascii-port (define %utf8/ascii-port
(with-fluids ((%default-port-encoding "UTF-8")) (open-input-string (large-string "hello, world")))
(open-input-string (large-string "hello, world"))))
(define %utf8/wide-port (define %utf8/wide-port
(with-fluids ((%default-port-encoding "UTF-8")) (open-input-string (large-string "안녕하세요")))
(open-input-string (large-string "안녕하세요"))))
(with-benchmark-prefix "peek-char" (with-benchmark-prefix "peek-char"
@ -87,6 +86,5 @@
(let ((str (string-concatenate (make-list 1000 "one line\n")))) (let ((str (string-concatenate (make-list 1000 "one line\n"))))
(benchmark "read-line" 1000 (benchmark "read-line" 1000
(let ((port (with-fluids ((%default-port-encoding "UTF-8")) (let ((port (open-input-string str)))
(open-input-string str))))
(sequence (read-line port) 1000))))) (sequence (read-line port) 1000)))))

View file

@ -1066,28 +1066,6 @@ away from its default.
Calls the one-argument procedure @var{proc} with a newly created output Calls the one-argument procedure @var{proc} with a newly created output
port. When the function returns, the string composed of the characters port. When the function returns, the string composed of the characters
written into the port is returned. @var{proc} should not close the port. written into the port is returned. @var{proc} should not close the port.
Note that which characters can be written to a string port depend on the port's
encoding. The default encoding of string ports is specified by the
@code{%default-port-encoding} fluid (@pxref{Ports,
@code{%default-port-encoding}}). For instance, it is an error to write Greek
letter alpha to an ISO-8859-1-encoded string port since this character cannot be
represented with ISO-8859-1:
@example
(define alpha (integer->char #x03b1)) ; GREEK SMALL LETTER ALPHA
(with-fluids ((%default-port-encoding "ISO-8859-1"))
(call-with-output-string
(lambda (p)
(display alpha p))))
@result{}
Throw to key `encoding-error'
@end example
Changing the string port's encoding to a Unicode-capable encoding such as UTF-8
solves the problem.
@end deffn @end deffn
@deffn {Scheme Procedure} call-with-input-string string proc @deffn {Scheme Procedure} call-with-input-string string proc
@ -1101,8 +1079,6 @@ read. The value yielded by the @var{proc} is returned.
Calls the zero-argument procedure @var{thunk} with the current output Calls the zero-argument procedure @var{thunk} with the current output
port set temporarily to a new string port. It returns a string port set temporarily to a new string port. It returns a string
composed of the characters written to the current output. composed of the characters written to the current output.
See @code{call-with-output-string} above for character encoding considerations.
@end deffn @end deffn
@deffn {Scheme Procedure} with-input-from-string string thunk @deffn {Scheme Procedure} with-input-from-string string thunk

View file

@ -146,6 +146,7 @@ guile-2 ;; starting from Guile 2.x
r5rs r5rs
srfi-0 srfi-0
srfi-4 srfi-4
srfi-6
srfi-13 srfi-13
srfi-14 srfi-14
srfi-23 srfi-23
@ -1851,19 +1852,11 @@ uniform numeric vector, it is returned unchanged.
@cindex SRFI-6 @cindex SRFI-6
SRFI-6 defines the procedures @code{open-input-string}, SRFI-6 defines the procedures @code{open-input-string},
@code{open-output-string} and @code{get-output-string}. @code{open-output-string} and @code{get-output-string}. These
procedures are included in the Guile core, so using this module does not
Note that although versions of these procedures are included in the make any difference at the moment. But it is possible that support for
Guile core, the core versions are not fully conformant with SRFI-6: SRFI-6 will be factored out of the core library in the future, so using
attempts to read or write characters that are not supported by the this module does not hurt, after all.
current @code{%default-port-encoding} will fail.
We therefore recommend that you import this module, which supports all
characters:
@example
(use-modules (srfi srfi-6))
@end example
@node SRFI-8 @node SRFI-8
@subsection SRFI-8 - receive @subsection SRFI-8 - receive

View file

@ -251,57 +251,60 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
{ {
SCM z, buf; SCM z, buf;
scm_t_port *pt; scm_t_port *pt;
const char *encoding; size_t read_buf_size, num_bytes, c_byte_pos;
size_t read_buf_size, str_len, c_pos;
char *c_buf; char *c_buf;
if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG))) if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL); scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
encoding = scm_i_default_port_encoding ();
if (scm_is_false (str)) if (scm_is_false (str))
{ {
/* Allocate a new buffer to write to. */ /* Allocate a new buffer to write to. */
str_len = INITIAL_BUFFER_SIZE; num_bytes = INITIAL_BUFFER_SIZE;
buf = scm_c_make_bytevector (str_len); buf = scm_c_make_bytevector (num_bytes);
c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf); c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf);
/* Reset `read_buf_size'. It will contain the actual number of /* Reset `read_buf_size'. It will contain the actual number of
bytes written to the port. */ bytes written to the port. */
read_buf_size = 0; read_buf_size = 0;
c_pos = 0; c_byte_pos = 0;
} }
else else
{ {
/* STR is a string. */
char *copy; char *copy;
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller); SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
/* Create a copy of STR in ENCODING. */ /* STR is a string. */
copy = scm_to_stringn (str, &str_len, encoding, /* Create a copy of STR in UTF-8. */
SCM_FAILED_CONVERSION_ERROR); copy = scm_to_utf8_stringn (str, &num_bytes);
buf = scm_c_make_bytevector (str_len); buf = scm_c_make_bytevector (num_bytes);
c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf); c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf);
memcpy (c_buf, copy, str_len); memcpy (c_buf, copy, num_bytes);
free (copy); free (copy);
c_pos = scm_to_unsigned_integer (pos, 0, str_len); read_buf_size = num_bytes;
read_buf_size = str_len;
if (scm_is_eq (pos, SCM_INUM0))
c_byte_pos = 0;
else
/* Inefficient but simple way to convert the character position
POS into a byte position C_BYTE_POS. */
free (scm_to_utf8_stringn (scm_substring (str, SCM_INUM0, pos),
&c_byte_pos));
} }
z = scm_c_make_port_with_encoding (scm_tc16_strport, modes, z = scm_c_make_port_with_encoding (scm_tc16_strport, modes,
encoding, "UTF-8",
scm_i_default_port_conversion_handler (), scm_i_default_port_conversion_handler (),
(scm_t_bits)buf); (scm_t_bits)buf);
pt = SCM_PTAB_ENTRY (z); pt = SCM_PTAB_ENTRY (z);
pt->write_buf = pt->read_buf = (unsigned char *) c_buf; pt->write_buf = pt->read_buf = (unsigned char *) c_buf;
pt->read_pos = pt->write_pos = pt->read_buf + c_pos; pt->read_pos = pt->write_pos = pt->read_buf + c_byte_pos;
pt->read_buf_size = read_buf_size; pt->read_buf_size = read_buf_size;
pt->write_buf_size = str_len; pt->write_buf_size = num_bytes;
pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size; pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size;
pt->rw_random = 1; pt->rw_random = 1;

View file

@ -4196,9 +4196,7 @@ when none is available, reading FILE-NAME with READER."
r5rs r5rs
srfi-0 ;; cond-expand itself srfi-0 ;; cond-expand itself
srfi-4 ;; homogeneous numeric vectors srfi-4 ;; homogeneous numeric vectors
;; We omit srfi-6 because the 'open-input-string' etc in Guile srfi-6 ;; string ports
;; core are not conformant with SRFI-6; they expose details
;; of the binary I/O model and may fail to support some characters.
srfi-13 ;; string library srfi-13 ;; string library
srfi-14 ;; character sets srfi-14 ;; character sets
srfi-23 ;; `error` procedure srfi-23 ;; `error` procedure

View file

@ -1,5 +1,5 @@
;;;; "format.scm" Common LISP text output formatter for SLIB ;;;; "format.scm" Common LISP text output formatter for SLIB
;;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. ;;; Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
;;; ;;;
;;; This library is free software; you can redistribute it and/or ;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public ;;; modify it under the terms of the GNU Lesser General Public
@ -42,10 +42,7 @@
(let* ((port (let* ((port
(cond (cond
((not destination) ((not destination) (open-output-string))
;; Use a Unicode-capable output string port.
(with-fluids ((%default-port-encoding "UTF-8"))
(open-output-string)))
((boolean? destination) (current-output-port)) ; boolean but not false ((boolean? destination) (current-output-port)) ; boolean but not false
((output-port? destination) destination) ((output-port? destination) destination)
((number? destination) ((number? destination)

View file

@ -1,7 +1,7 @@
;;;; -*- coding: utf-8; mode: scheme -*- ;;;; -*- coding: utf-8; mode: scheme -*-
;;;; ;;;;
;;;; Copyright (C) 2001, 2004, 2006, 2009, 2010, ;;;; Copyright (C) 2001, 2004, 2006, 2009, 2010,
;;;; 2012 Free Software Foundation, Inc. ;;;; 2012, 2013 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -311,9 +311,6 @@ e.g., if @var{x} is a vector, each member of @var{x}. One can attempt to
\"ration\" the available width, trying to allocate it equally to each \"ration\" the available width, trying to allocate it equally to each
sub-expression, via the @var{breadth-first?} keyword argument." sub-expression, via the @var{breadth-first?} keyword argument."
;; Make sure string ports are created with the right encoding.
(with-fluids ((%default-port-encoding (port-encoding port)))
(define ellipsis (define ellipsis
;; Choose between `HORIZONTAL ELLIPSIS' (U+2026) and three dots, depending ;; Choose between `HORIZONTAL ELLIPSIS' (U+2026) and three dots, depending
;; on the encoding of PORT. ;; on the encoding of PORT.
@ -321,9 +318,10 @@ sub-expression, via the @var{breadth-first?} keyword argument."
(catch 'encoding-error (catch 'encoding-error
(lambda () (lambda ()
(with-fluids ((%default-port-conversion-strategy 'error)) (with-fluids ((%default-port-conversion-strategy 'error))
(with-output-to-string (call-with-output-string
(lambda () (lambda (p)
(display e))))) (set-port-encoding! p (port-encoding port))
(display e p)))))
(lambda (key . args) (lambda (key . args)
"...")))) "..."))))
@ -342,15 +340,13 @@ sub-expression, via the @var{breadth-first?} keyword argument."
((<= width (+ 1 ellipsis-width)) ((<= width (+ 1 ellipsis-width))
(display ellipsis)) (display ellipsis))
(else (else
(let ((str (let ((str (with-output-to-string
(with-fluids ((%default-port-encoding (port-encoding port)))
(with-output-to-string
(lambda () (lambda ()
(print (ref x i) (print (ref x i)
(if breadth-first? (if breadth-first?
(max 1 (max 1
(1- (floor (/ width (- len i))))) (1- (floor (/ width (- len i)))))
(- width (+ 1 ellipsis-width))))))))) (- width (+ 1 ellipsis-width))))))))
(display str) (display str)
(lp (next x) (- width 1 (string-length str)) (1+ i))))))) (lp (next x) (- width 1 (string-length str)) (1+ i)))))))
@ -449,4 +445,4 @@ sub-expression, via the @var{breadth-first?} keyword argument."
(with-output-to-port port (with-output-to-port port
(lambda () (lambda ()
(print x width)))))) (print x width)))))

View file

@ -303,8 +303,7 @@ read from/written to in @var{port}."
(define (open-string-input-port str) (define (open-string-input-port str)
"Open an input port that will read from @var{str}." "Open an input port that will read from @var{str}."
(with-fluids ((%default-port-encoding "UTF-8")) (open-input-string str))
(open-input-string str)))
(define (r6rs-open filename mode buffer-mode transcoder) (define (r6rs-open filename mode buffer-mode transcoder)
(let ((port (with-i/o-filename-conditions filename (let ((port (with-i/o-filename-conditions filename
@ -349,8 +348,7 @@ read from/written to in @var{port}."
(define (open-string-output-port) (define (open-string-output-port)
"Return two values: an output port that will collect characters written to it "Return two values: an output port that will collect characters written to it
as a string, and a thunk to retrieve the characters associated with that port." as a string, and a thunk to retrieve the characters associated with that port."
(let ((port (with-fluids ((%default-port-encoding "UTF-8")) (let ((port (open-output-string)))
(open-output-string))))
(values port (values port
(lambda () (get-output-string port))))) (lambda () (get-output-string port)))))

View file

@ -1,6 +1,7 @@
;;; srfi-6.scm --- Basic String Ports ;;; srfi-6.scm --- Basic String Ports
;; Copyright (C) 2001, 2002, 2003, 2006, 2012 Free Software Foundation, Inc. ;; Copyright (C) 2001, 2002, 2003, 2006, 2012,
;; 2013 Free Software Foundation, Inc.
;; ;;
;; This library is free software; you can redistribute it and/or ;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public ;; modify it under the terms of the GNU Lesser General Public
@ -23,21 +24,6 @@
;;; Code: ;;; Code:
(define-module (srfi srfi-6) (define-module (srfi srfi-6)
#:replace (open-input-string open-output-string) #:re-export (open-input-string open-output-string get-output-string))
#:re-export (get-output-string))
;; SRFI-6 says nothing about encodings, and assumes that any character
;; or string can be written to a string port. Thus, make all SRFI-6
;; string ports Unicode capable. See <http://bugs.gnu.org/11197>.
(define (open-input-string s)
(with-fluids ((%default-port-encoding "UTF-8"))
((@ (guile) open-input-string) s)))
(define (open-output-string)
(with-fluids ((%default-port-encoding "UTF-8"))
((@ (guile) open-output-string))))
(cond-expand-provide (current-module) '(srfi-6))
;;; srfi-6.scm ends here ;;; srfi-6.scm ends here

View file

@ -428,9 +428,6 @@
;;;; Turn a test name into a nice human-readable string. ;;;; Turn a test name into a nice human-readable string.
(define (format-test-name name) (define (format-test-name name)
;; Choose a Unicode-capable encoding so that the string port can contain any
;; valid Unicode character.
(with-fluids ((%default-port-encoding "UTF-8"))
(call-with-output-string (call-with-output-string
(lambda (port) (lambda (port)
(let loop ((name name) (let loop ((name name)
@ -439,7 +436,7 @@
(begin (begin
(display separator port) (display separator port)
(display (car name) port) (display (car name) port)
(loop (cdr name) ": ")))))))) (loop (cdr name) ": ")))))))
;;;; For a given test-name, deliver the full name including all prefixes. ;;;; For a given test-name, deliver the full name including all prefixes.
(define (full-name name) (define (full-name name)

View file

@ -1,7 +1,7 @@
;;;; chars.test --- Characters. -*- coding: utf-8; mode: scheme; -*- ;;;; chars.test --- Characters. -*- coding: utf-8; mode: scheme; -*-
;;;; Greg J. Badros <gjb@cs.washington.edu> ;;;; Greg J. Badros <gjb@cs.washington.edu>
;;;; ;;;;
;;;; Copyright (C) 2000, 2006, 2009, 2010 Free Software Foundation, Inc. ;;;; Copyright (C) 2000, 2006, 2009, 2010, 2013 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -316,13 +316,11 @@
(pass-if "combining accent is pretty-printed" (pass-if "combining accent is pretty-printed"
(let ((accent (integer->char #x030f))) ; COMBINING DOUBLE GRAVE ACCENT (let ((accent (integer->char #x030f))) ; COMBINING DOUBLE GRAVE ACCENT
(string=? (string=?
(with-fluids ((%default-port-encoding "UTF-8")) (with-output-to-string (lambda () (write accent)))
(with-output-to-string (lambda () (write accent))))
"#\\◌̏"))) "#\\◌̏")))
(pass-if "combining X is pretty-printed" (pass-if "combining X is pretty-printed"
(let ((x (integer->char #x0353))) ; COMBINING X BELOW (let ((x (integer->char #x0353))) ; COMBINING X BELOW
(string=? (string=?
(with-fluids ((%default-port-encoding "UTF-8")) (with-output-to-string (lambda () (write x)))
(with-output-to-string (lambda () (write x))))
"#\\◌͓"))))) "#\\◌͓")))))

View file

@ -23,11 +23,9 @@
(define (eread str) (define (eread str)
(with-fluids ((%default-port-encoding "utf-8")) (call-with-input-string str read-ecmascript))
(call-with-input-string str read-ecmascript)))
(define (eread/1 str) (define (eread/1 str)
(with-fluids ((%default-port-encoding "utf-8")) (call-with-input-string str read-ecmascript/1))
(call-with-input-string str read-ecmascript/1)))
(define-syntax parse (define-syntax parse
(syntax-rules () (syntax-rules ()

View file

@ -698,32 +698,15 @@
(pass-if "output check" (pass-if "output check"
(string=? text result))) (string=? text result)))
(pass-if "encoding failure leads to exception" (pass-if "%default-port-encoding is ignored"
;; Prior to 2.0.6, this would trigger a deadlock in `scm_mkstrport'. (let ((str "ĉu bone?"))
;; See the discussion at <http://bugs.gnu.org/11197>, for details. ;; Latin-1 cannot represent ‘ĉ’.
(catch 'encoding-error
(lambda ()
(with-fluids ((%default-port-encoding "ISO-8859-1")) (with-fluids ((%default-port-encoding "ISO-8859-1"))
(let ((p (open-input-string "λ"))) ; raise an exception (string=? (call-with-output-string
#f)))
(lambda (key . rest)
#t)
(lambda (key . rest)
;; At this point, the port-table mutex used to be still held,
;; hence the deadlock. This situation would occur when trying
;; to print a backtrace, for instance.
(input-port? (open-input-string "foo")))))
(pass-if "%default-port-encoding is honored"
(let ((encodings '("UTF-8" "UTF-16" "ISO-8859-1" "ISO-8859-3")))
(equal? (map (lambda (e)
(with-fluids ((%default-port-encoding e))
(call-with-output-string
(lambda (p) (lambda (p)
(and (string=? e (port-encoding p)) (set-port-conversion-strategy! p 'substitute)
(display (port-encoding p) p)))))) (display str p)))
encodings) "ĉu bone?"))))
encodings)))
(pass-if "%default-port-conversion-strategy is honored" (pass-if "%default-port-conversion-strategy is honored"
(let ((strategies '(error substitute escape))) (let ((strategies '(error substitute escape)))
@ -740,31 +723,33 @@
(map symbol->string strategies)))) (map symbol->string strategies))))
(pass-if "suitable encoding [latin-1]" (pass-if "suitable encoding [latin-1]"
(let ((str "hello, world")) (let ((str "hello, world")
(with-fluids ((%default-port-encoding "ISO-8859-1")) (encoding "ISO-8859-1"))
(equal? str (equal? str
(with-output-to-string (call-with-output-string
(lambda () (lambda (p)
(display str))))))) (set-port-encoding! p encoding)
(display str p))))))
(pass-if "suitable encoding [latin-3]" (pass-if "suitable encoding [latin-3]"
(let ((str "ĉu bone?")) (let ((str "ĉu bone?")
(with-fluids ((%default-port-encoding "ISO-8859-3")) (encoding "ISO-8859-3"))
(equal? str (equal? str
(with-output-to-string (call-with-output-string
(lambda () (lambda (p)
(display str))))))) (set-port-encoding! p encoding)
(display str p))))))
(pass-if "wrong encoding, error" (pass-if "wrong encoding, error"
(let ((str "ĉu bone?")) (let ((str "ĉu bone?"))
(catch 'encoding-error (catch 'encoding-error
(lambda () (lambda ()
(with-fluids ((%default-port-conversion-strategy 'error))
(call-with-output-string
(lambda (p)
;; Latin-1 cannot represent ‘ĉ’. ;; Latin-1 cannot represent ‘ĉ’.
(with-fluids ((%default-port-encoding "ISO-8859-1") (set-port-encoding! p "ISO-8859-1")
(%default-port-conversion-strategy 'error)) (display str p))))
(with-output-to-string
(lambda ()
(display str))))
#f) ; so the test really fails here #f) ; so the test really fails here
(lambda (key subr message errno port chr) (lambda (key subr message errno port chr)
(and (eqv? chr #\ĉ) (and (eqv? chr #\ĉ)
@ -772,45 +757,24 @@
(pass-if "wrong encoding, substitute" (pass-if "wrong encoding, substitute"
(let ((str "ĉu bone?")) (let ((str "ĉu bone?"))
(with-fluids ((%default-port-encoding "ISO-8859-1")) (string=? (call-with-output-string
(string=? (with-output-to-string (lambda (p)
(lambda () (set-port-encoding! p "ISO-8859-1")
(set-port-conversion-strategy! (current-output-port) (set-port-conversion-strategy! p 'substitute)
'substitute) (display str p)))
(display str))) "?u bone?")))
"?u bone?"))))
(pass-if "wrong encoding, escape" (pass-if "wrong encoding, escape"
(let ((str "ĉu bone?")) (let ((str "ĉu bone?"))
(with-fluids ((%default-port-encoding "ISO-8859-1")) (string=? (call-with-output-string
(string=? (with-output-to-string (lambda (p)
(lambda () (set-port-encoding! p "ISO-8859-1")
(set-port-conversion-strategy! (current-output-port) (set-port-conversion-strategy! p 'escape)
'escape) (display str p)))
(display str))) "\\u0109u bone?")))
"\\u0109u bone?"))))
(pass-if "peek-char [latin-1]" (pass-if "peek-char"
(let ((p (with-fluids ((%default-port-encoding #f)) (let ((p (open-input-string "안녕하세요")))
(open-input-string "hello, world"))))
(and (char=? (peek-char p) #\h)
(char=? (peek-char p) #\h)
(char=? (peek-char p) #\h)
(= (port-line p) 0)
(= (port-column p) 0))))
(pass-if "peek-char [utf-8]"
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
(open-input-string "안녕하세요"))))
(and (char=? (peek-char p) #\안)
(char=? (peek-char p) #\안)
(char=? (peek-char p) #\안)
(= (port-line p) 0)
(= (port-column p) 0))))
(pass-if "peek-char [utf-16]"
(let ((p (with-fluids ((%default-port-encoding "UTF-16BE"))
(open-input-string "안녕하세요"))))
(and (char=? (peek-char p) #\안) (and (char=? (peek-char p) #\안)
(char=? (peek-char p) #\안) (char=? (peek-char p) #\안)
(char=? (peek-char p) #\안) (char=? (peek-char p) #\안)
@ -1207,10 +1171,15 @@
(set-port-encoding! p "does-not-exist") (set-port-encoding! p "does-not-exist")
(read p))) (read p)))
(let ((filename (test-file)))
(with-output-to-file filename (lambda () (write 'test)))
(pass-if-exception "%default-port-encoding, wrong encoding" (pass-if-exception "%default-port-encoding, wrong encoding"
exception:miscellaneous-error exception:miscellaneous-error
(read (with-fluids ((%default-port-encoding "does-not-exist")) (read (with-fluids ((%default-port-encoding "does-not-exist"))
(open-input-string ""))))) (open-input-file filename))))
(delete-file filename)))
;;; ;;;
;;; port-for-each ;;; port-for-each

View file

@ -1,6 +1,6 @@
;;;; -*- coding: utf-8; mode: scheme; -*- ;;;; -*- coding: utf-8; mode: scheme; -*-
;;;; ;;;;
;;;; Copyright (C) 2010 Free Software Foundation, Inc. ;;;; Copyright (C) 2010, 2013 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -59,31 +59,31 @@
(define exp '(a b #(c d e) f . g)) (define exp '(a b #(c d e) f . g))
(define (tprint x width encoding) (define (tprint x width encoding)
(with-fluids ((%default-port-encoding encoding)) (call-with-output-string
(with-output-to-string (lambda (p)
(lambda () (set-port-encoding! p encoding)
(truncated-print x #:width width))))) (truncated-print x p #:width width))))
(pass-if (equal? (tprint exp 10 "ISO-8859-1") (pass-if-equal "(a b . #)"
"(a b . #)")) (tprint exp 10 "ISO-8859-1"))
(pass-if (equal? (tprint exp 15 "ISO-8859-1") (pass-if-equal "(a b # f . g)"
"(a b # f . g)")) (tprint exp 15 "ISO-8859-1"))
(pass-if (equal? (tprint exp 18 "ISO-8859-1") (pass-if-equal "(a b #(c ...) . #)"
"(a b #(c ...) . #)")) (tprint exp 18 "ISO-8859-1"))
(pass-if (equal? (tprint exp 20 "ISO-8859-1") (pass-if-equal "(a b #(c d e) f . g)"
"(a b #(c d e) f . g)")) (tprint exp 20 "ISO-8859-1"))
(pass-if (equal? (tprint "The quick brown fox" 20 "ISO-8859-1") (pass-if-equal "\"The quick brown...\""
"\"The quick brown...\"")) (tprint "The quick brown fox" 20 "ISO-8859-1"))
(pass-if (equal? (tprint "The quick brown fox" 20 "UTF-8") (pass-if-equal "\"The quick brown f…\""
"\"The quick brown f…\"")) (tprint "The quick brown fox" 20 "UTF-8"))
(pass-if (equal? (tprint (current-module) 20 "ISO-8859-1") (pass-if-equal "#<directory (tes...>"
"#<directory (tes...>")) (tprint (current-module) 20 "ISO-8859-1"))
(pass-if (equal? (tprint (current-module) 20 "UTF-8") (pass-if-equal "#<directory (test-…>"
"#<directory (test-…>"))) (tprint (current-module) 20 "UTF-8")))

View file

@ -1,6 +1,6 @@
;;;; r6rs-ports.test --- R6RS I/O port tests. -*- coding: utf-8; -*- ;;;; r6rs-ports.test --- R6RS I/O port tests. -*- coding: utf-8; -*-
;;;; ;;;;
;;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc. ;;;; Copyright (C) 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
;;;; Ludovic Courtès ;;;; Ludovic Courtès
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
@ -98,8 +98,7 @@
(eof-object? (get-u8 port))))) (eof-object? (get-u8 port)))))
(pass-if "lookahead-u8 non-ASCII" (pass-if "lookahead-u8 non-ASCII"
(let ((port (with-fluids ((%default-port-encoding "UTF-8")) (let ((port (open-input-string "λ")))
(open-input-string "λ"))))
(and (= 206 (lookahead-u8 port)) (and (= 206 (lookahead-u8 port))
(= 206 (lookahead-u8 port)) (= 206 (lookahead-u8 port))
(= 206 (get-u8 port)) (= 206 (get-u8 port))
@ -272,20 +271,20 @@
(let* ((str "hello, world") (let* ((str "hello, world")
(bv (string->utf16 str))) (bv (string->utf16 str)))
(equal? str (equal? str
(with-fluids ((%default-port-encoding "UTF-16BE"))
(call-with-output-string (call-with-output-string
(lambda (port) (lambda (port)
(put-bytevector port bv))))))) (set-port-encoding! port "UTF-16BE")
(put-bytevector port bv))))))
(pass-if "put-bytevector with wrong-encoding string port" (pass-if "put-bytevector with wrong-encoding string port"
(let* ((str "hello, world") (let* ((str "hello, world")
(bv (string->utf16 str))) (bv (string->utf16 str)))
(catch 'decoding-error (catch 'decoding-error
(lambda () (lambda ()
(with-fluids ((%default-port-encoding "UTF-32") (with-fluids ((%default-port-conversion-strategy 'error))
(%default-port-conversion-strategy 'error))
(call-with-output-string (call-with-output-string
(lambda (port) (lambda (port)
(set-port-encoding! port "UTF-32")
(put-bytevector port bv))) (put-bytevector port bv)))
#f)) ; fail if we reach this point #f)) ; fail if we reach this point
(lambda (key subr message errno port) (lambda (key subr message errno port)

View file

@ -22,8 +22,6 @@
#:use-module ((rnrs io ports) #:select (open-bytevector-input-port)) #:use-module ((rnrs io ports) #:select (open-bytevector-input-port))
#:use-module (test-suite lib)) #:use-module (test-suite lib))
(with-fluids ((%default-port-encoding "UTF-8"))
(with-test-prefix "read-line" (with-test-prefix "read-line"
(pass-if "one line" (pass-if "one line"
@ -242,7 +240,7 @@
(string=? (substring buf 0 len) s) (string=? (substring buf 0 len) s)
(= (read-string! buf p) 0) (= (read-string! buf p) 0)
(string=? (substring buf 0 len) s) (string=? (substring buf 0 len) s)
(string=? (substring buf len) ".")))))) (string=? (substring buf len) ".")))))
;;; Local Variables: ;;; Local Variables:
;;; eval: (put 'with-test-prefix 'scheme-indent-function 1) ;;; eval: (put 'with-test-prefix 'scheme-indent-function 1)

View file

@ -1,6 +1,7 @@
;;;; reader.test --- Reader test. -*- coding: iso-8859-1; mode: scheme -*- ;;;; reader.test --- Reader test. -*- coding: iso-8859-1; mode: scheme -*-
;;;; ;;;;
;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. ;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009, 2010, 2011,
;;;; 2013 Free Software Foundation, Inc.
;;;; Jim Blandy <jimb@red-bean.com> ;;;; Jim Blandy <jimb@red-bean.com>
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
@ -47,8 +48,7 @@
(define (read-string s) (define (read-string s)
(with-fluids ((%default-port-encoding #f)) (with-input-from-string s (lambda () (read))))
(with-input-from-string s (lambda () (read)))))
(define (with-read-options opts thunk) (define (with-read-options opts thunk)
(let ((saved-options (read-options))) (let ((saved-options (read-options)))

View file

@ -155,14 +155,6 @@
(define char-code-limit 256) (define char-code-limit 256)
;; Since `regexp-quote' uses string ports, and since it is used below
;; with non-ASCII characters, these ports must be Unicode-capable.
(define-syntax with-unicode
(syntax-rules ()
((_ exp)
(with-fluids ((%default-port-encoding "UTF-8"))
exp))))
(with-test-prefix "regexp-quote" (with-test-prefix "regexp-quote"
(pass-if-exception "no args" exception:wrong-num-args (pass-if-exception "no args" exception:wrong-num-args
@ -191,7 +183,7 @@
(s (string c))) (s (string c)))
(pass-if (list "char" i (format #f "~s ~s" c s)) (pass-if (list "char" i (format #f "~s ~s" c s))
(with-ascii-or-latin1-locale i (with-ascii-or-latin1-locale i
(let* ((q (with-unicode (regexp-quote s))) (let* ((q (regexp-quote s))
(m (regexp-exec (make-regexp q flag) s))) (m (regexp-exec (make-regexp q flag) s)))
(and (= 0 (match:start m)) (and (= 0 (match:start m))
(= 1 (match:end m)))))))) (= 1 (match:end m))))))))
@ -204,7 +196,7 @@
((>= i 256)) ((>= i 256))
(let* ((c (integer->char i)) (let* ((c (integer->char i))
(s (string #\a c)) (s (string #\a c))
(q (with-unicode (regexp-quote s)))) (q (regexp-quote s)))
(pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q)) (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q))
(with-ascii-or-latin1-locale i (with-ascii-or-latin1-locale i
(let* ((m (regexp-exec (make-regexp q flag) s))) (let* ((m (regexp-exec (make-regexp q flag) s)))
@ -213,9 +205,9 @@
(pass-if "string of all chars" (pass-if "string of all chars"
(with-latin1-locale (with-latin1-locale
(let ((m (regexp-exec (make-regexp (with-unicode (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
(regexp-quote allchars)) flag)
flag) allchars))) allchars)))
(and (= 0 (match:start m)) (and (= 0 (match:start m))
(= (string-length allchars) (match:end m))))))))) (= (string-length allchars) (match:end m)))))))))
lst))) lst)))

View file

@ -1,6 +1,6 @@
;;;; srfi-105.test --- Test suite for Guile's SRFI-105 reader. -*- scheme -*- ;;;; srfi-105.test --- Test suite for Guile's SRFI-105 reader. -*- scheme -*-
;;;; ;;;;
;;;; Copyright (C) 2012 Free Software Foundation, Inc. ;;;; Copyright (C) 2012, 2013 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -21,8 +21,7 @@
#:use-module (srfi srfi-1)) #:use-module (srfi srfi-1))
(define (read-string s) (define (read-string s)
(with-fluids ((%default-port-encoding #f)) (with-input-from-string s read))
(with-input-from-string s read)))
(define (with-read-options opts thunk) (define (with-read-options opts thunk)
(let ((saved-options (read-options))) (let ((saved-options (read-options)))