1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 19:50:24 +02:00

Have `scm_getc' honor the port's conversion strategy.

* libguile/ports.c (get_codepoint): Reset `pt->input_cd' upon failure.
  If `pt->ilseq_handler' is `SCM_ICONVEH_QUESTION_MARK', then return a
  question mark.
  [failure]: Use `scm_encoding_error' when raising an error.

* test-suite/lib.scm (exception:encoding-error): Adjust regexp.

* test-suite/tests/ports.test ("string ports")["read-char, wrong
  encoding, error", "read-char, wrong encoding, escape", "read-char,
  wrong encoding, substitute"]: New tests.
This commit is contained in:
Ludovic Courtès 2011-01-26 00:16:10 +01:00
parent 2e59af2100
commit cc540d0bbd
3 changed files with 52 additions and 18 deletions

View file

@ -1114,6 +1114,7 @@ utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
failure. */ failure. */
static scm_t_wchar static scm_t_wchar
get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
#define FUNC_NAME "scm_getc"
{ {
int err, byte_read; int err, byte_read;
size_t bytes_consumed, output_size; size_t bytes_consumed, output_size;
@ -1164,10 +1165,22 @@ get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
} }
if (err != 0) if (err != 0)
goto failure; {
/* Reset the `iconv' state. */
iconv (pt->input_cd, NULL, NULL, NULL, NULL);
if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
codepoint = '?';
else
/* Fail when the strategy is SCM_ICONVEH_ERROR or
SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense
for input encoding errors.) */
goto failure;
}
else
/* Convert the UTF8_BUF sequence to a Unicode code point. */ /* Convert the UTF8_BUF sequence to a Unicode code point. */
codepoint = utf8_to_codepoint (utf8_buf, output_size); codepoint = utf8_to_codepoint (utf8_buf, output_size);
update_port_lf (codepoint, port); update_port_lf (codepoint, port);
*len = bytes_consumed; *len = bytes_consumed;
@ -1176,23 +1189,18 @@ get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
failure: failure:
{ {
char *err_buf; SCM bv;
SCM err_str = scm_i_make_string (bytes_consumed, &err_buf);
memcpy (err_buf, buf, bytes_consumed);
if (err == EILSEQ) bv = scm_c_make_bytevector (bytes_consumed);
scm_misc_error (NULL, "input encoding error for ~s: ~s", memcpy (SCM_BYTEVECTOR_CONTENTS (bv), buf, bytes_consumed);
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)), scm_encoding_error (FUNC_NAME, err, "input decoding error",
err_str)); pt->encoding, "UTF-8", bv);
else
scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
err_str));
} }
/* Never gets here. */ /* Never gets here. */
return 0; return 0;
} }
#undef FUNC_NAME
/* Read a codepoint from PORT and return it. */ /* Read a codepoint from PORT and return it. */
scm_t_wchar scm_t_wchar

View file

@ -1,5 +1,6 @@
;;;; test-suite/lib.scm --- generic support for testing ;;;; test-suite/lib.scm --- generic support for testing
;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. ;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010,
;;;; 2011 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This program is free software; you can redistribute it and/or ;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -279,7 +280,7 @@
(define exception:system-error (define exception:system-error
(cons 'system-error ".*")) (cons 'system-error ".*"))
(define exception:encoding-error (define exception:encoding-error
(cons 'encoding-error "(cannot convert to output locale|input locale conversion error)")) (cons 'encoding-error "(cannot convert to output locale|input (locale conversion|decoding) error)"))
(define exception:miscellaneous-error (define exception:miscellaneous-error
(cons 'misc-error "^.*")) (cons 'misc-error "^.*"))
(define exception:read-error (define exception:read-error

View file

@ -23,7 +23,8 @@
#:use-module (test-suite guile-test) #:use-module (test-suite guile-test)
#:use-module (ice-9 popen) #:use-module (ice-9 popen)
#:use-module (ice-9 rdelim) #:use-module (ice-9 rdelim)
#:use-module (rnrs bytevectors)) #:use-module (rnrs bytevectors)
#:use-module ((rnrs io ports) #:select (open-bytevector-input-port)))
(define (display-line . args) (define (display-line . args)
(for-each display args) (for-each display args)
@ -461,7 +462,31 @@
(char=? (peek-char p) #\안) (char=? (peek-char p) #\안)
(char=? (peek-char p) #\안) (char=? (peek-char p) #\안)
(= (port-line p) 0) (= (port-line p) 0)
(= (port-column p) 0))))) (= (port-column p) 0))))
(pass-if-exception "read-char, wrong encoding, error"
exception:encoding-error
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
(open-bytevector-input-port #vu8(255 1 2 3)))))
(set-port-conversion-strategy! p 'error)
(read-char p)
#t))
(pass-if-exception "read-char, wrong encoding, escape"
exception:encoding-error
;; `escape' should behave like `error'.
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
(open-bytevector-input-port #vu8(255 1 2 3)))))
(set-port-conversion-strategy! p 'escape)
(read-char p)
#t))
(pass-if "read-char, wrong encoding, substitute"
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
(open-bytevector-input-port #vu8(255 206 187 206 188)))))
(set-port-conversion-strategy! p 'substitute)
(equal? (list (read-char p) (read-char p) (read-char p))
'(#\? #\λ #\μ)))))
(with-test-prefix "call-with-output-string" (with-test-prefix "call-with-output-string"