From 1953d2903801806a2648e29e284c694459ae9cf5 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 10 May 2016 11:34:17 +0200 Subject: [PATCH] Decoding errors do not advance read pointer * libguile/ports.c (scm_getc): If the port conversion strategy is 'error, signal an error before advancing the read pointer. This is a change from previous behavior; before, we advanced the read pointer under an understanding that that was what R6RS required. But, that seems to be not the case. * test-suite/tests/ports.test ("string ports"): Update decoding-error tests to assume that read-char with an error doesn't advance the read pointer. * test-suite/tests/rdelim.test ("read-line"): Likewise. --- libguile/ports.c | 26 +++++++------------------- test-suite/tests/ports.test | 19 +++++++++++++++---- test-suite/tests/rdelim.test | 5 ++--- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/libguile/ports.c b/libguile/ports.c index 49e10792f..6b9c4f5db 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1811,34 +1811,22 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len) return err; } -static SCM_C_INLINE int -get_codepoint (SCM port, scm_t_wchar *codepoint) -{ - int err; - size_t len = 0; - scm_t_port *pt = SCM_PTAB_ENTRY (port); - - err = peek_codepoint (port, codepoint, &len); - scm_port_buffer_did_take (pt->read_buf, len); - if (*codepoint == EOF) - scm_i_clear_pending_eof (port); - update_port_lf (*codepoint, port); - return err; -} - /* Read a codepoint from PORT and return it. */ scm_t_wchar scm_getc (SCM port) #define FUNC_NAME "scm_getc" { int err; - scm_t_wchar codepoint; + size_t len = 0; + scm_t_wchar codepoint = EOF; - err = get_codepoint (port, &codepoint); + err = peek_codepoint (port, &codepoint, &len); if (SCM_UNLIKELY (err != 0)) - /* At this point PORT should point past the invalid encoding, as per - R6RS-lib Section 8.2.4. */ scm_decoding_error (FUNC_NAME, err, "input decoding error", port); + scm_port_buffer_did_take (SCM_PTAB_ENTRY (port)->read_buf, len); + if (codepoint == EOF) + scm_i_clear_pending_eof (port); + update_port_lf (codepoint, port); return codepoint; } diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index 33050fd7f..3bb001e4d 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -822,21 +822,32 @@ ;; Mini DSL to test decoding error handling. (letrec-syntax ((decoding-error? (syntax-rules () - ((_ port exp) + ((_ port proc) (catch 'decoding-error (lambda () - (pk 'exp exp) + (pk 'proc (proc port)) #f) (lambda (key subr message errno p) + (define (skip-over-error) + (let ((strategy (port-conversion-strategy p))) + (set-port-conversion-strategy! p 'substitute) + ;; If `proc' is `read-char', this will + ;; skip over the bad bytes. + (let ((c (proc p))) + (unless (eqv? c #\?) + (error "unexpected char" c)) + (set-port-conversion-strategy! p strategy) + #t))) (and (eq? p port) - (not (= 0 errno)))))))) + (not (= 0 errno)) + (skip-over-error))))))) (make-check (syntax-rules (-> error eof) ((_ port (proc -> error)) (if (eq? 'substitute (port-conversion-strategy port)) (eqv? (proc port) #\?) - (decoding-error? port (proc port)))) + (decoding-error? port proc))) ((_ port (proc -> eof)) (eof-object? (proc port))) ((_ port (proc -> char)) diff --git a/test-suite/tests/rdelim.test b/test-suite/tests/rdelim.test index 617e65167..de384c508 100644 --- a/test-suite/tests/rdelim.test +++ b/test-suite/tests/rdelim.test @@ -19,7 +19,7 @@ (define-module (test-suite test-rdelim) #:use-module (ice-9 rdelim) - #:use-module ((rnrs io ports) #:select (open-bytevector-input-port)) + #:use-module ((rnrs io ports) #:select (open-bytevector-input-port get-u8)) #:use-module (test-suite lib)) (with-test-prefix "read-line" @@ -79,8 +79,7 @@ #f) (lambda (key subr message err port) (and (eq? port p) - - ;; PORT should now point past the error. + (eqv? (get-u8 p) 255) (string=? (read-line p) "BCD") (eof-object? (read-line p)))))))