From 4e27e3c054442189f05355f631176d94b4f5019f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 21 Jun 2016 11:27:21 +0200 Subject: [PATCH] Add R6RS bytevector->string, string->bytevector * module/rnrs/io/ports.scm (string->bytevector): (bytevector->string): New procedures. * module/rnrs.scm: Export new procedures. * test-suite/tests/r6rs-ports.test: Add string->bytevector and bytevector->string tests. --- module/rnrs.scm | 1 + module/rnrs/io/ports.scm | 31 +++++++ test-suite/tests/r6rs-ports.test | 134 +++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) diff --git a/module/rnrs.scm b/module/rnrs.scm index 436821642..d2b4cb3f6 100644 --- a/module/rnrs.scm +++ b/module/rnrs.scm @@ -170,6 +170,7 @@ make-transcoder transcoder-codec transcoder-eol-style transcoder-error-handling-mode native-transcoder latin-1-codec utf-8-codec utf-16-codec + string->bytevector bytevector->string eof-object? port? input-port? output-port? eof-object port-eof? port-transcoder diff --git a/module/rnrs/io/ports.scm b/module/rnrs/io/ports.scm index 8ff674894..5ddc3d58d 100644 --- a/module/rnrs/io/ports.scm +++ b/module/rnrs/io/ports.scm @@ -36,6 +36,9 @@ transcoder-error-handling-mode native-transcoder latin-1-codec utf-8-codec utf-16-codec + ;; transcoding bytevectors + bytevector->string string->bytevector + ;; input & output ports port? input-port? output-port? port-eof? @@ -110,6 +113,7 @@ (only (ice-9 ports internal) port-write-buffer port-buffer-bytevector port-line-buffered?) (only (rnrs bytevectors) bytevector-length) + (prefix (ice-9 iconv) iconv:) (rnrs enums) (rnrs records syntactic) (rnrs exceptions) @@ -171,6 +175,33 @@ (define (utf-16-codec) "UTF-16") + +;;; +;;; Transcoding bytevectors +;;; + +(define (string->bytevector str transcoder) + "Encode @var{str} using @var{transcoder}, returning a bytevector." + (iconv:string->bytevector + str + (transcoder-codec transcoder) + (case (transcoder-error-handling-mode transcoder) + ((raise) 'error) + ((replace) 'substitute) + (else (error "unsupported error handling mode" + (transcoder-error-handling-mode transcoder)))))) + +(define (bytevector->string bv transcoder) + "Decode @var{bv} using @var{transcoder}, returning a string." + (iconv:bytevector->string + bv + (transcoder-codec transcoder) + (case (transcoder-error-handling-mode transcoder) + ((raise) 'error) + ((replace) 'substitute) + (else (error "unsupported error handling mode" + (transcoder-error-handling-mode transcoder)))))) + ;;; ;;; Internal helpers diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test index 8c4ef57e1..b3f11bb20 100644 --- a/test-suite/tests/r6rs-ports.test +++ b/test-suite/tests/r6rs-ports.test @@ -1065,6 +1065,140 @@ not `set-port-position!'" (with-test-prefix "open-file-input/output-port [input]" (test-input-file-opener open-file-input/output-port (test-file)))) +(define exception:encoding-error + '(encoding-error . "")) + +(define exception:decoding-error + '(decoding-error . "")) + + +(with-test-prefix "ascii string" + (let ((s "Hello, World!")) + ;; For ASCII, all of these encodings should be the same. + + (pass-if "to ascii bytevector" + (equal? (string->bytevector s (make-transcoder "ASCII")) + #vu8(72 101 108 108 111 44 32 87 111 114 108 100 33))) + + (pass-if "to ascii bytevector (length check)" + (equal? (string-length s) + (bytevector-length + (string->bytevector s (make-transcoder "ascii"))))) + + (pass-if "from ascii bytevector" + (equal? s + (bytevector->string + (string->bytevector s (make-transcoder "ascii")) + (make-transcoder "ascii")))) + + (pass-if "to utf-8 bytevector" + (equal? (string->bytevector s (make-transcoder "ASCII")) + (string->bytevector s (make-transcoder "utf-8")))) + + (pass-if "to UTF-8 bytevector (testing encoding case sensitivity)" + (equal? (string->bytevector s (make-transcoder "ascii")) + (string->bytevector s (make-transcoder "UTF-8")))) + + (pass-if "from utf-8 bytevector" + (equal? s + (bytevector->string + (string->bytevector s (make-transcoder "utf-8")) + (make-transcoder "utf-8")))) + + (pass-if "to latin1 bytevector" + (equal? (string->bytevector s (make-transcoder "ASCII")) + (string->bytevector s (make-transcoder "latin1")))) + + (pass-if "from latin1 bytevector" + (equal? s + (bytevector->string + (string->bytevector s (make-transcoder "utf-8")) + (make-transcoder "utf-8")))))) + +(with-test-prefix "narrow non-ascii string" + (let ((s "été")) + (pass-if "to latin1 bytevector" + (equal? (string->bytevector s (make-transcoder "latin1")) + #vu8(233 116 233))) + + (pass-if "to latin1 bytevector (length check)" + (equal? (string-length s) + (bytevector-length + (string->bytevector s (make-transcoder "latin1"))))) + + (pass-if "from latin1 bytevector" + (equal? s + (bytevector->string + (string->bytevector s (make-transcoder "latin1")) + (make-transcoder "latin1")))) + + (pass-if "to utf-8 bytevector" + (equal? (string->bytevector s (make-transcoder "utf-8")) + #vu8(195 169 116 195 169))) + + (pass-if "from utf-8 bytevector" + (equal? s + (bytevector->string + (string->bytevector s (make-transcoder "utf-8")) + (make-transcoder "utf-8")))) + + (pass-if-exception "encode latin1 as ascii" exception:encoding-error + (string->bytevector s (make-transcoder "ascii" + (native-eol-style) + (error-handling-mode raise)))) + + (pass-if-exception "misparse latin1 as utf8" exception:decoding-error + (bytevector->string + (string->bytevector s (make-transcoder "latin1")) + (make-transcoder "utf-8" + (native-eol-style) + (error-handling-mode raise)))) + + (pass-if "misparse latin1 as utf8 with substitutions" + (equal? (bytevector->string + (string->bytevector s (make-transcoder "latin1")) + (make-transcoder "utf-8" (native-eol-style) + (error-handling-mode replace))) + "\uFFFDt\uFFFD")) + + (pass-if-exception "misparse latin1 as ascii" exception:decoding-error + (bytevector->string (string->bytevector s (make-transcoder "latin1")) + (make-transcoder "ascii" + (native-eol-style) + (error-handling-mode raise)))))) + + +(with-test-prefix "wide non-ascii string" + (let ((s "ΧΑΟΣ")) + (pass-if "to utf-8 bytevector" + (equal? (string->bytevector s (make-transcoder "utf-8")) + #vu8(206 167 206 145 206 159 206 163) )) + + (pass-if "from utf-8 bytevector" + (equal? s + (bytevector->string + (string->bytevector s (make-transcoder "utf-8")) + (make-transcoder "utf-8")))) + + (pass-if-exception "encode as ascii" exception:encoding-error + (string->bytevector s (make-transcoder "ascii" + (native-eol-style) + (error-handling-mode raise)))) + + (pass-if-exception "encode as latin1" exception:encoding-error + (string->bytevector s (make-transcoder "latin1" + (native-eol-style) + (error-handling-mode raise)))) + + (pass-if "encode as ascii with substitutions" + (equal? (make-string (string-length s) #\?) + (bytevector->string + (string->bytevector s (make-transcoder + "ascii" + (native-eol-style) + (error-handling-mode replace))) + (make-transcoder "ascii")))))) + ;;; Local Variables: ;;; mode: scheme ;;; eval: (put 'guard 'scheme-indent-function 1)