1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Add R6RS bytevector->string, string->bytevector

* module/rnrs/io/ports.scm (string->bytevector):
  (bytevector->string): New procedures.
* module/rnrs.scm: Export new procedures.
* test-suite/tests/r6rs-ports.test: Add string->bytevector and
  bytevector->string tests.
This commit is contained in:
Andy Wingo 2016-06-21 11:27:21 +02:00
parent 59f062ec78
commit 4e27e3c054
3 changed files with 166 additions and 0 deletions

View file

@ -170,6 +170,7 @@
make-transcoder transcoder-codec transcoder-eol-style make-transcoder transcoder-codec transcoder-eol-style
transcoder-error-handling-mode native-transcoder transcoder-error-handling-mode native-transcoder
latin-1-codec utf-8-codec utf-16-codec latin-1-codec utf-8-codec utf-16-codec
string->bytevector bytevector->string
eof-object? port? input-port? output-port? eof-object port-eof? eof-object? port? input-port? output-port? eof-object port-eof?
port-transcoder port-transcoder

View file

@ -36,6 +36,9 @@
transcoder-error-handling-mode native-transcoder transcoder-error-handling-mode native-transcoder
latin-1-codec utf-8-codec utf-16-codec latin-1-codec utf-8-codec utf-16-codec
;; transcoding bytevectors
bytevector->string string->bytevector
;; input & output ports ;; input & output ports
port? input-port? output-port? port? input-port? output-port?
port-eof? port-eof?
@ -110,6 +113,7 @@
(only (ice-9 ports internal) (only (ice-9 ports internal)
port-write-buffer port-buffer-bytevector port-line-buffered?) port-write-buffer port-buffer-bytevector port-line-buffered?)
(only (rnrs bytevectors) bytevector-length) (only (rnrs bytevectors) bytevector-length)
(prefix (ice-9 iconv) iconv:)
(rnrs enums) (rnrs enums)
(rnrs records syntactic) (rnrs records syntactic)
(rnrs exceptions) (rnrs exceptions)
@ -171,6 +175,33 @@
(define (utf-16-codec) (define (utf-16-codec)
"UTF-16") "UTF-16")
;;;
;;; Transcoding bytevectors
;;;
(define (string->bytevector str transcoder)
"Encode @var{str} using @var{transcoder}, returning a bytevector."
(iconv:string->bytevector
str
(transcoder-codec transcoder)
(case (transcoder-error-handling-mode transcoder)
((raise) 'error)
((replace) 'substitute)
(else (error "unsupported error handling mode"
(transcoder-error-handling-mode transcoder))))))
(define (bytevector->string bv transcoder)
"Decode @var{bv} using @var{transcoder}, returning a string."
(iconv:bytevector->string
bv
(transcoder-codec transcoder)
(case (transcoder-error-handling-mode transcoder)
((raise) 'error)
((replace) 'substitute)
(else (error "unsupported error handling mode"
(transcoder-error-handling-mode transcoder))))))
;;; ;;;
;;; Internal helpers ;;; Internal helpers

View file

@ -1065,6 +1065,140 @@ not `set-port-position!'"
(with-test-prefix "open-file-input/output-port [input]" (with-test-prefix "open-file-input/output-port [input]"
(test-input-file-opener open-file-input/output-port (test-file)))) (test-input-file-opener open-file-input/output-port (test-file))))
(define exception:encoding-error
'(encoding-error . ""))
(define exception:decoding-error
'(decoding-error . ""))
(with-test-prefix "ascii string"
(let ((s "Hello, World!"))
;; For ASCII, all of these encodings should be the same.
(pass-if "to ascii bytevector"
(equal? (string->bytevector s (make-transcoder "ASCII"))
#vu8(72 101 108 108 111 44 32 87 111 114 108 100 33)))
(pass-if "to ascii bytevector (length check)"
(equal? (string-length s)
(bytevector-length
(string->bytevector s (make-transcoder "ascii")))))
(pass-if "from ascii bytevector"
(equal? s
(bytevector->string
(string->bytevector s (make-transcoder "ascii"))
(make-transcoder "ascii"))))
(pass-if "to utf-8 bytevector"
(equal? (string->bytevector s (make-transcoder "ASCII"))
(string->bytevector s (make-transcoder "utf-8"))))
(pass-if "to UTF-8 bytevector (testing encoding case sensitivity)"
(equal? (string->bytevector s (make-transcoder "ascii"))
(string->bytevector s (make-transcoder "UTF-8"))))
(pass-if "from utf-8 bytevector"
(equal? s
(bytevector->string
(string->bytevector s (make-transcoder "utf-8"))
(make-transcoder "utf-8"))))
(pass-if "to latin1 bytevector"
(equal? (string->bytevector s (make-transcoder "ASCII"))
(string->bytevector s (make-transcoder "latin1"))))
(pass-if "from latin1 bytevector"
(equal? s
(bytevector->string
(string->bytevector s (make-transcoder "utf-8"))
(make-transcoder "utf-8"))))))
(with-test-prefix "narrow non-ascii string"
(let ((s "été"))
(pass-if "to latin1 bytevector"
(equal? (string->bytevector s (make-transcoder "latin1"))
#vu8(233 116 233)))
(pass-if "to latin1 bytevector (length check)"
(equal? (string-length s)
(bytevector-length
(string->bytevector s (make-transcoder "latin1")))))
(pass-if "from latin1 bytevector"
(equal? s
(bytevector->string
(string->bytevector s (make-transcoder "latin1"))
(make-transcoder "latin1"))))
(pass-if "to utf-8 bytevector"
(equal? (string->bytevector s (make-transcoder "utf-8"))
#vu8(195 169 116 195 169)))
(pass-if "from utf-8 bytevector"
(equal? s
(bytevector->string
(string->bytevector s (make-transcoder "utf-8"))
(make-transcoder "utf-8"))))
(pass-if-exception "encode latin1 as ascii" exception:encoding-error
(string->bytevector s (make-transcoder "ascii"
(native-eol-style)
(error-handling-mode raise))))
(pass-if-exception "misparse latin1 as utf8" exception:decoding-error
(bytevector->string
(string->bytevector s (make-transcoder "latin1"))
(make-transcoder "utf-8"
(native-eol-style)
(error-handling-mode raise))))
(pass-if "misparse latin1 as utf8 with substitutions"
(equal? (bytevector->string
(string->bytevector s (make-transcoder "latin1"))
(make-transcoder "utf-8" (native-eol-style)
(error-handling-mode replace)))
"\uFFFDt\uFFFD"))
(pass-if-exception "misparse latin1 as ascii" exception:decoding-error
(bytevector->string (string->bytevector s (make-transcoder "latin1"))
(make-transcoder "ascii"
(native-eol-style)
(error-handling-mode raise))))))
(with-test-prefix "wide non-ascii string"
(let ((s "ΧΑΟΣ"))
(pass-if "to utf-8 bytevector"
(equal? (string->bytevector s (make-transcoder "utf-8"))
#vu8(206 167 206 145 206 159 206 163) ))
(pass-if "from utf-8 bytevector"
(equal? s
(bytevector->string
(string->bytevector s (make-transcoder "utf-8"))
(make-transcoder "utf-8"))))
(pass-if-exception "encode as ascii" exception:encoding-error
(string->bytevector s (make-transcoder "ascii"
(native-eol-style)
(error-handling-mode raise))))
(pass-if-exception "encode as latin1" exception:encoding-error
(string->bytevector s (make-transcoder "latin1"
(native-eol-style)
(error-handling-mode raise))))
(pass-if "encode as ascii with substitutions"
(equal? (make-string (string-length s) #\?)
(bytevector->string
(string->bytevector s (make-transcoder
"ascii"
(native-eol-style)
(error-handling-mode replace)))
(make-transcoder "ascii"))))))
;;; Local Variables: ;;; Local Variables:
;;; mode: scheme ;;; mode: scheme
;;; eval: (put 'guard 'scheme-indent-function 1) ;;; eval: (put 'guard 'scheme-indent-function 1)