1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-06 12:10:28 +02:00
guile/module/language/assembly.scm
Michael Gran 9c44cd4559 Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00

175 lines
5.6 KiB
Scheme
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; Guile Virtual Machine Assembly
;; Copyright (C) 2001, 2009 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Code:
(define-module (language assembly)
#:use-module (rnrs bytevector)
#:use-module (system base pmatch)
#:use-module (system vm instruction)
#:use-module ((srfi srfi-1) #:select (fold))
#:export (byte-length
addr+ align-program align-code align-block
assembly-pack assembly-unpack
object->assembly assembly->object))
;; nargs, nrest, nlocs, len, metalen, padding
(define *program-header-len* (+ 1 1 2 4 4 4))
;; lengths are encoded in 3 bytes
(define *len-len* 3)
;; the number of bytes per string character is encoded in 1 byte
(define *width-len* 1)
(define (byte-length assembly)
(pmatch assembly
(,label (guard (not (pair? label)))
0)
((load-unsigned-integer ,str)
(+ 1 *len-len* (string-length str)))
((load-integer ,str)
(+ 1 *len-len* (string-length str)))
((load-number ,str)
(+ 1 *len-len* (string-length str)))
((load-string ,str)
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-symbol ,str)
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-keyword ,str)
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-array ,bv)
(+ 1 *len-len* (bytevector-length bv)))
((define ,str)
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code)
(+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
((,inst . _) (guard (>= (instruction-length inst) 0))
(+ 1 (instruction-length inst)))
(else (error "unknown instruction" assembly))))
(define *program-alignment* 8)
(define *block-alignment* 8)
(define (addr+ addr code)
(fold (lambda (x len) (+ (byte-length x) len))
addr
code))
(define (code-alignment addr alignment header-len)
(make-list (modulo (- alignment
(modulo (+ addr header-len) alignment))
alignment)
'(nop)))
(define (align-block addr)
(code-alignment addr *block-alignment* 0))
(define (align-code code addr alignment header-len)
`(,@(code-alignment addr alignment header-len)
,code))
(define (align-program prog addr)
(align-code prog addr *program-alignment* 1))
;;;
;;; Code compress/decompression
;;;
(define *abbreviations*
'(((make-int8 0) . (make-int8:0))
((make-int8 1) . (make-int8:1))))
(define *expansions*
(map (lambda (x) (cons (cdr x) (car x))) *abbreviations*))
(define (assembly-pack code)
(or (assoc-ref *abbreviations* code)
code))
(define (assembly-unpack code)
(or (assoc-ref *expansions* code)
code))
;;;
;;; Encoder/decoder
;;;
(define (object->assembly x)
(cond ((eq? x #t) `(make-true))
((eq? x #f) `(make-false))
((null? x) `(make-eol))
((and (integer? x) (exact? x))
(cond ((and (<= -128 x) (< x 128))
(assembly-pack `(make-int8 ,(modulo x 256))))
((and (<= -32768 x) (< x 32768))
(let ((n (if (< x 0) (+ x 65536) x)))
`(make-int16 ,(quotient n 256) ,(modulo n 256))))
((and (<= 0 x #xffffffffffffffff))
`(make-uint64 ,@(bytevector->u8-list
(let ((bv (make-bytevector 8)))
(bytevector-u64-set! bv 0 x (endianness big))
bv))))
((and (<= 0 (+ x #x8000000000000000) #x7fffffffffffffff))
`(make-int64 ,@(bytevector->u8-list
(let ((bv (make-bytevector 8)))
(bytevector-s64-set! bv 0 x (endianness big))
bv))))
(else #f)))
((char? x)
(cond ((<= (char->integer x) #xff)
`(make-char8 ,(char->integer x)))
(else
`(make-char32 ,(char->integer x)))))
(else #f)))
(define (assembly->object code)
(pmatch code
((make-true) #t)
((make-false) #f) ;; FIXME: Same as the `else' case!
((make-eol) '())
((make-int8 ,n)
(if (< n 128) n (- n 256)))
((make-int16 ,n1 ,n2)
(let ((n (+ (* n1 256) n2)))
(if (< n 32768) n (- n 65536))))
((make-uint64 ,n1 ,n2 ,n3 ,n4 ,n5 ,n6 ,n7 ,n8)
(bytevector-u64-ref
(u8-list->bytevector (list n1 n2 n3 n4 n5 n6 n7 n8))
0
(endianness big)))
((make-int64 ,n1 ,n2 ,n3 ,n4 ,n5 ,n6 ,n7 ,n8)
(bytevector-s64-ref
(u8-list->bytevector (list n1 n2 n3 n4 n5 n6 n7 n8))
0
(endianness big)))
((make-char8 ,n)
(integer->char n))
((make-char32 ,n1 ,n2 ,n3 ,n4)
(integer->char (+ (* n1 #x1000000)
(* n2 #x10000)
(* n3 #x100)
n4)))
((load-string ,s) s)
((load-symbol ,s) (string->symbol s))
((load-keyword ,s) (symbol->keyword (string->symbol s)))
(else #f)))