1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 20:00:19 +02:00
guile/module/language/assembly/compile-bytecode.scm
Michael Gran 9c44cd4559 Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00

159 lines
6.5 KiB
Scheme

;;; Guile VM assembler
;; Copyright (C) 2001, 2009 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Code:
(define-module (language assembly compile-bytecode)
#:use-module (system base pmatch)
#:use-module (language assembly)
#:use-module (system vm instruction)
#:use-module (srfi srfi-4)
#:use-module (rnrs bytevector)
#:use-module ((srfi srfi-1) #:select (fold))
#:use-module ((system vm objcode) #:select (byte-order))
#:export (compile-bytecode write-bytecode))
(define (compile-bytecode assembly env . opts)
(pmatch assembly
((load-program . _)
;; the 1- and -1 are so that we drop the load-program byte
(letrec ((v (make-u8vector (1- (byte-length assembly))))
(i -1)
(write-byte (lambda (b)
(if (>= i 0) (u8vector-set! v i b))
(set! i (1+ i))))
(get-addr (lambda () i)))
(write-bytecode assembly write-byte get-addr '())
(if (= i (u8vector-length v))
(values v env env)
(error "incorrect length in assembly" i (u8vector-length v)))))
(else (error "bad assembly" assembly))))
(define (write-bytecode asm write-byte get-addr labels)
(define (write-char c)
(write-byte (char->integer c)))
(define (write-string s)
(string-for-each write-char s))
(define (write-uint16-be x)
(write-byte (logand (ash x -8) 255))
(write-byte (logand x 255)))
(define (write-uint16-le x)
(write-byte (logand x 255))
(write-byte (logand (ash x -8) 255)))
(define (write-uint32-be x)
(write-byte (logand (ash x -24) 255))
(write-byte (logand (ash x -16) 255))
(write-byte (logand (ash x -8) 255))
(write-byte (logand x 255)))
(define (write-uint32-le x)
(write-byte (logand x 255))
(write-byte (logand (ash x -8) 255))
(write-byte (logand (ash x -16) 255))
(write-byte (logand (ash x -24) 255)))
(define (write-uint32 x) (case byte-order
((1234) (write-uint32-le x))
((4321) (write-uint32-be x))
(else (error "unknown endianness" byte-order))))
(define (write-wide-string s)
(string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
(define (write-loader-len len)
(write-byte (ash len -16))
(write-byte (logand (ash len -8) 255))
(write-byte (logand len 255)))
(define (write-loader str)
(write-loader-len (string-length str))
(write-string str))
(define (write-sized-loader str)
(let ((len (string-length str))
(wid (string-width str)))
(write-loader-len len)
(write-byte wid)
(if (= wid 4)
(write-wide-string str)
(write-string str))))
(define (write-bytevector bv)
(write-loader-len (bytevector-length bv))
;; Ew!
(for-each write-byte (bytevector->u8-list bv)))
(define (write-break label)
(let ((offset (- (assq-ref labels label)
(logand (+ (get-addr) 2) (lognot #x7)))))
(cond ((not (= 0 (modulo offset 8))) (error "unaligned jump" offset))
((>= offset (ash 1 18)) (error "jump too far forward" offset))
((< offset (- (ash 1 18))) (error "jump too far backwards" offset))
(else (write-uint16-be (ash offset -3))))))
(let ((inst (car asm))
(args (cdr asm))
(write-uint16 (case byte-order
((1234) write-uint16-le)
((4321) write-uint16-be)
(else (error "unknown endianness" byte-order)))))
(let ((opcode (instruction->opcode inst))
(len (instruction-length inst)))
(write-byte opcode)
(pmatch asm
((load-program ,nargs ,nrest ,nlocs ,labels ,length ,meta . ,code)
(write-byte nargs)
(write-byte nrest)
(write-uint16 nlocs)
(write-uint32 length)
(write-uint32 (if meta (1- (byte-length meta)) 0))
(write-uint32 0) ; padding
(letrec ((i 0)
(write (lambda (x) (set! i (1+ i)) (write-byte x)))
(get-addr (lambda () i)))
(for-each (lambda (asm)
(write-bytecode asm write get-addr labels))
code))
(if meta
;; don't write the load-program byte for metadata
(letrec ((i -1)
(write (lambda (x)
(set! i (1+ i))
(if (> i 0) (write-byte x))))
(get-addr (lambda () i)))
;; FIXME: We should add padding here so that META's bytecode
;; meets the alignment requirements of `scm_objcode'. See
;; `scm_c_make_objcode_slice ()'.
(write-bytecode meta write get-addr '()))))
((make-char32 ,x) (write-uint32-be x))
((load-unsigned-integer ,str) (write-loader str))
((load-integer ,str) (write-loader str))
((load-number ,str) (write-loader str))
((load-string ,str) (write-sized-loader str))
((load-symbol ,str) (write-sized-loader str))
((load-keyword ,str) (write-sized-loader str))
((load-array ,bv) (write-bytevector bv))
((define ,str) (write-sized-loader str))
((br ,l) (write-break l))
((br-if ,l) (write-break l))
((br-if-not ,l) (write-break l))
((br-if-eq ,l) (write-break l))
((br-if-not-eq ,l) (write-break l))
((br-if-null ,l) (write-break l))
((br-if-not-null ,l) (write-break l))
((mv-call ,n ,l) (write-byte n) (write-break l))
(else
(cond
((< (instruction-length inst) 0)
(error "unhanded variable-length instruction" asm))
((not (= (length args) len))
(error "bad number of args to instruction" asm len))
(else
(for-each write-byte args))))))))