mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
Have `scm_getc' honor the port's conversion strategy.
* libguile/ports.c (get_codepoint): Reset `pt->input_cd' upon failure. If `pt->ilseq_handler' is `SCM_ICONVEH_QUESTION_MARK', then return a question mark. [failure]: Use `scm_encoding_error' when raising an error. * test-suite/lib.scm (exception:encoding-error): Adjust regexp. * test-suite/tests/ports.test ("string ports")["read-char, wrong encoding, error", "read-char, wrong encoding, escape", "read-char, wrong encoding, substitute"]: New tests.
This commit is contained in:
parent
2e59af2100
commit
cc540d0bbd
3 changed files with 52 additions and 18 deletions
|
@ -1114,6 +1114,7 @@ utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
|
|||
failure. */
|
||||
static scm_t_wchar
|
||||
get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
||||
#define FUNC_NAME "scm_getc"
|
||||
{
|
||||
int err, byte_read;
|
||||
size_t bytes_consumed, output_size;
|
||||
|
@ -1164,10 +1165,22 @@ get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
|||
}
|
||||
|
||||
if (err != 0)
|
||||
goto failure;
|
||||
{
|
||||
/* Reset the `iconv' state. */
|
||||
iconv (pt->input_cd, NULL, NULL, NULL, NULL);
|
||||
|
||||
if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
|
||||
codepoint = '?';
|
||||
else
|
||||
/* Fail when the strategy is SCM_ICONVEH_ERROR or
|
||||
SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense
|
||||
for input encoding errors.) */
|
||||
goto failure;
|
||||
}
|
||||
else
|
||||
/* Convert the UTF8_BUF sequence to a Unicode code point. */
|
||||
codepoint = utf8_to_codepoint (utf8_buf, output_size);
|
||||
|
||||
/* Convert the UTF8_BUF sequence to a Unicode code point. */
|
||||
codepoint = utf8_to_codepoint (utf8_buf, output_size);
|
||||
update_port_lf (codepoint, port);
|
||||
|
||||
*len = bytes_consumed;
|
||||
|
@ -1176,23 +1189,18 @@ get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
|||
|
||||
failure:
|
||||
{
|
||||
char *err_buf;
|
||||
SCM err_str = scm_i_make_string (bytes_consumed, &err_buf);
|
||||
memcpy (err_buf, buf, bytes_consumed);
|
||||
SCM bv;
|
||||
|
||||
if (err == EILSEQ)
|
||||
scm_misc_error (NULL, "input encoding error for ~s: ~s",
|
||||
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
|
||||
err_str));
|
||||
else
|
||||
scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
|
||||
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
|
||||
err_str));
|
||||
bv = scm_c_make_bytevector (bytes_consumed);
|
||||
memcpy (SCM_BYTEVECTOR_CONTENTS (bv), buf, bytes_consumed);
|
||||
scm_encoding_error (FUNC_NAME, err, "input decoding error",
|
||||
pt->encoding, "UTF-8", bv);
|
||||
}
|
||||
|
||||
/* Never gets here. */
|
||||
return 0;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
/* Read a codepoint from PORT and return it. */
|
||||
scm_t_wchar
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
;;;; test-suite/lib.scm --- generic support for testing
|
||||
;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
|
||||
;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010,
|
||||
;;;; 2011 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -279,7 +280,7 @@
|
|||
(define exception:system-error
|
||||
(cons 'system-error ".*"))
|
||||
(define exception:encoding-error
|
||||
(cons 'encoding-error "(cannot convert to output locale|input locale conversion error)"))
|
||||
(cons 'encoding-error "(cannot convert to output locale|input (locale conversion|decoding) error)"))
|
||||
(define exception:miscellaneous-error
|
||||
(cons 'misc-error "^.*"))
|
||||
(define exception:read-error
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
#:use-module (test-suite guile-test)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (rnrs bytevectors))
|
||||
#:use-module (rnrs bytevectors)
|
||||
#:use-module ((rnrs io ports) #:select (open-bytevector-input-port)))
|
||||
|
||||
(define (display-line . args)
|
||||
(for-each display args)
|
||||
|
@ -461,7 +462,31 @@
|
|||
(char=? (peek-char p) #\안)
|
||||
(char=? (peek-char p) #\안)
|
||||
(= (port-line p) 0)
|
||||
(= (port-column p) 0)))))
|
||||
(= (port-column p) 0))))
|
||||
|
||||
(pass-if-exception "read-char, wrong encoding, error"
|
||||
exception:encoding-error
|
||||
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
|
||||
(open-bytevector-input-port #vu8(255 1 2 3)))))
|
||||
(set-port-conversion-strategy! p 'error)
|
||||
(read-char p)
|
||||
#t))
|
||||
|
||||
(pass-if-exception "read-char, wrong encoding, escape"
|
||||
exception:encoding-error
|
||||
;; `escape' should behave like `error'.
|
||||
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
|
||||
(open-bytevector-input-port #vu8(255 1 2 3)))))
|
||||
(set-port-conversion-strategy! p 'escape)
|
||||
(read-char p)
|
||||
#t))
|
||||
|
||||
(pass-if "read-char, wrong encoding, substitute"
|
||||
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
|
||||
(open-bytevector-input-port #vu8(255 206 187 206 188)))))
|
||||
(set-port-conversion-strategy! p 'substitute)
|
||||
(equal? (list (read-char p) (read-char p) (read-char p))
|
||||
'(#\? #\λ #\μ)))))
|
||||
|
||||
(with-test-prefix "call-with-output-string"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue