diff --git a/libguile/ports.c b/libguile/ports.c index 36f4b8859..1cfcba021 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1114,6 +1114,7 @@ utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size) failure. */ static scm_t_wchar get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +#define FUNC_NAME "scm_getc" { int err, byte_read; size_t bytes_consumed, output_size; @@ -1164,10 +1165,22 @@ get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) } if (err != 0) - goto failure; + { + /* Reset the `iconv' state. */ + iconv (pt->input_cd, NULL, NULL, NULL, NULL); + + if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK) + codepoint = '?'; + else + /* Fail when the strategy is SCM_ICONVEH_ERROR or + SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense + for input encoding errors.) */ + goto failure; + } + else + /* Convert the UTF8_BUF sequence to a Unicode code point. */ + codepoint = utf8_to_codepoint (utf8_buf, output_size); - /* Convert the UTF8_BUF sequence to a Unicode code point. */ - codepoint = utf8_to_codepoint (utf8_buf, output_size); update_port_lf (codepoint, port); *len = bytes_consumed; @@ -1176,23 +1189,18 @@ get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) failure: { - char *err_buf; - SCM err_str = scm_i_make_string (bytes_consumed, &err_buf); - memcpy (err_buf, buf, bytes_consumed); + SCM bv; - if (err == EILSEQ) - scm_misc_error (NULL, "input encoding error for ~s: ~s", - scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)), - err_str)); - else - scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n", - scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)), - err_str)); + bv = scm_c_make_bytevector (bytes_consumed); + memcpy (SCM_BYTEVECTOR_CONTENTS (bv), buf, bytes_consumed); + scm_encoding_error (FUNC_NAME, err, "input decoding error", + pt->encoding, "UTF-8", bv); } /* Never gets here. */ return 0; } +#undef FUNC_NAME /* Read a codepoint from PORT and return it. */ scm_t_wchar diff --git a/test-suite/lib.scm b/test-suite/lib.scm index 8ebcb0176..f3cbfd786 100644 --- a/test-suite/lib.scm +++ b/test-suite/lib.scm @@ -1,5 +1,6 @@ ;;;; test-suite/lib.scm --- generic support for testing -;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. +;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010, +;;;; 2011 Free Software Foundation, Inc. ;;;; ;;;; This program is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -279,7 +280,7 @@ (define exception:system-error (cons 'system-error ".*")) (define exception:encoding-error - (cons 'encoding-error "(cannot convert to output locale|input locale conversion error)")) + (cons 'encoding-error "(cannot convert to output locale|input (locale conversion|decoding) error)")) (define exception:miscellaneous-error (cons 'misc-error "^.*")) (define exception:read-error diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index 789c58120..8d3f6727c 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -23,7 +23,8 @@ #:use-module (test-suite guile-test) #:use-module (ice-9 popen) #:use-module (ice-9 rdelim) - #:use-module (rnrs bytevectors)) + #:use-module (rnrs bytevectors) + #:use-module ((rnrs io ports) #:select (open-bytevector-input-port))) (define (display-line . args) (for-each display args) @@ -461,7 +462,31 @@ (char=? (peek-char p) #\안) (char=? (peek-char p) #\안) (= (port-line p) 0) - (= (port-column p) 0))))) + (= (port-column p) 0)))) + + (pass-if-exception "read-char, wrong encoding, error" + exception:encoding-error + (let ((p (with-fluids ((%default-port-encoding "UTF-8")) + (open-bytevector-input-port #vu8(255 1 2 3))))) + (set-port-conversion-strategy! p 'error) + (read-char p) + #t)) + + (pass-if-exception "read-char, wrong encoding, escape" + exception:encoding-error + ;; `escape' should behave like `error'. + (let ((p (with-fluids ((%default-port-encoding "UTF-8")) + (open-bytevector-input-port #vu8(255 1 2 3))))) + (set-port-conversion-strategy! p 'escape) + (read-char p) + #t)) + + (pass-if "read-char, wrong encoding, substitute" + (let ((p (with-fluids ((%default-port-encoding "UTF-8")) + (open-bytevector-input-port #vu8(255 206 187 206 188))))) + (set-port-conversion-strategy! p 'substitute) + (equal? (list (read-char p) (read-char p) (read-char p)) + '(#\? #\λ #\μ))))) (with-test-prefix "call-with-output-string"