mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-11 22:31:12 +02:00
This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
123 lines
4.7 KiB
Scheme
123 lines
4.7 KiB
Scheme
;;;; test assembly to bytecode compilation -*- scheme -*-
|
||
;;;;
|
||
;;;; This library is free software; you can redistribute it and/or
|
||
;;;; modify it under the terms of the GNU Lesser General Public
|
||
;;;; License as published by the Free Software Foundation; either
|
||
;;;; version 3 of the License, or (at your option) any later version.
|
||
;;;;
|
||
;;;; This library is distributed in the hope that it will be useful,
|
||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
;;;; Lesser General Public License for more details.
|
||
;;;;
|
||
;;;; You should have received a copy of the GNU Lesser General Public
|
||
;;;; License along with this library; if not, write to the Free Software
|
||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
|
||
(define-module (test-suite tests asm-to-bytecode)
|
||
#:use-module (rnrs bytevector)
|
||
#:use-module (test-suite lib)
|
||
#:use-module (system vm instruction)
|
||
#:use-module (language assembly compile-bytecode))
|
||
|
||
(define (->u8-list sym val)
|
||
(let ((entry (assq-ref `((uint16 2 ,bytevector-u16-native-set!)
|
||
(uint32 4 ,bytevector-u32-native-set!))
|
||
sym)))
|
||
(or entry (error "unknown sym" sym))
|
||
(let ((bv (make-bytevector (car entry))))
|
||
((cadr entry) bv 0 val)
|
||
(bytevector->u8-list bv))))
|
||
|
||
(define (munge-bytecode v)
|
||
(let lp ((i 0) (out '()))
|
||
(if (= i (vector-length v))
|
||
(list->u8vector (reverse out))
|
||
(let ((x (vector-ref v i)))
|
||
(cond
|
||
((symbol? x)
|
||
(lp (1+ i) (cons (instruction->opcode x) out)))
|
||
((integer? x)
|
||
(lp (1+ i) (cons x out)))
|
||
((pair? x)
|
||
(lp (1+ i) (append (reverse (apply ->u8-list x)) out)))
|
||
(else (error "bad test bytecode" x)))))))
|
||
|
||
(define (comp-test x y)
|
||
(let* ((y (munge-bytecode y))
|
||
(len (u8vector-length y))
|
||
(v (make-u8vector len))
|
||
(i 0))
|
||
(define (write-byte b) (u8vector-set! v i b) (set! i (1+ i)))
|
||
(define (get-addr) i)
|
||
(run-test `(length ,x) #t
|
||
(lambda ()
|
||
(write-bytecode x write-byte get-addr '())
|
||
(= i len)))
|
||
(run-test `(compile-equal? ,x ,y) #t
|
||
(lambda ()
|
||
(equal? v y)))))
|
||
|
||
|
||
(with-test-prefix "compiler"
|
||
(with-test-prefix "asm-to-bytecode"
|
||
|
||
(comp-test '(make-int8 3)
|
||
#(make-int8 3))
|
||
|
||
(comp-test `(load-integer ,(string (integer->char 0)))
|
||
#(load-integer 0 0 1 0))
|
||
|
||
(comp-test `(load-integer ,(string (integer->char 255)))
|
||
#(load-integer 0 0 1 255))
|
||
|
||
(comp-test `(load-integer ,(string (integer->char 1) (integer->char 0)))
|
||
#(load-integer 0 0 2 1 0))
|
||
|
||
(comp-test '(load-number "3.14")
|
||
(vector 'load-number 0 0 4 (char->integer #\3) (char->integer #\.)
|
||
(char->integer #\1) (char->integer #\4)))
|
||
|
||
(comp-test '(load-string "foo")
|
||
(vector 'load-string 0 0 3 1 (char->integer #\f) (char->integer #\o)
|
||
(char->integer #\o)))
|
||
|
||
(comp-test '(load-symbol "foo")
|
||
(vector 'load-symbol 0 0 3 1 (char->integer #\f) (char->integer #\o)
|
||
(char->integer #\o)))
|
||
|
||
(comp-test '(load-keyword "qux")
|
||
(vector 'load-keyword 0 0 3 1 (char->integer #\q) (char->integer #\u)
|
||
(char->integer #\x)))
|
||
|
||
(comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return))
|
||
#(load-program
|
||
3 2 (uint16 1) ;; nargs, nrest, nlocs
|
||
(uint32 3) ;; len
|
||
(uint32 0) ;; metalen
|
||
(uint32 0) ;; padding
|
||
make-int8 3
|
||
return))
|
||
|
||
;; the nops are to pad meta to an 8-byte alignment. not strictly
|
||
;; necessary for this test, but representative of the common case.
|
||
(comp-test '(load-program 3 2 1 () 8
|
||
(load-program 3 2 1 () 3
|
||
#f
|
||
(make-int8 3) (return))
|
||
(make-int8 3) (return)
|
||
(nop) (nop) (nop) (nop) (nop))
|
||
#(load-program
|
||
3 2 (uint16 1) ;; nargs, nrest, nlocs
|
||
(uint32 8) ;; len
|
||
(uint32 19) ;; metalen
|
||
(uint32 0) ;; padding
|
||
make-int8 3
|
||
return
|
||
nop nop nop nop nop
|
||
3 2 (uint16 1) ;; nargs, nrest, nlocs
|
||
(uint32 3) ;; len
|
||
(uint32 0) ;; metalen
|
||
(uint32 0) ;; padding
|
||
make-int8 3
|
||
return))))
|