mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
rework the vm support for wide strings
* libguile/_scm.h (SCM_OBJCODE_MINOR_VERSION): Bump. * libguile/vm-engine.c (vm_error_bad_wide_string_length): New error case. * libguile/vm-i-loader.c (load-unsigned-integer, load-integer) (load-keyword): Remove these instructions. The former two are obsoleted by make-int64/make-uint64, the latter via make-keyword. (load-string): Only handle narrow strings. (load-symbol): Only handle narrow symbols. The wide case is handled via make-symbol. (load-wide-string): New instruction, for wide strings. * libguile/vm-i-system.c (define): Move here from loaders.c, as now it just takes a sym on the stack. (make-keyword, make-symbol): New instructions. * module/language/assembly.scm: Remove removed instructions. No more width byte in load-string etc. * module/language/assembly/compile-bytecode.scm (write-bytecode): Adapt to change in instruction set. * module/language/glil/compile-assembly.scm (glil->assembly): Compile define by pushing the sym then emitting (define). (dump-object): Dump narrow and wide strings differently. Use make-keyword and make-symbol as appropriate. * module/language/tree-il/compile-glil.scm (flatten): When compiling a ref to a primitive (not a call), first see if the primitive is actually bound in the root module. (That's not the case with e.g. bytevector-u8-ref). * module/system/xref.scm (program-callee-rev-vars): Don't parse out "nexts". * test-suite/tests/asm-to-bytecode.test ("compiler"): Adapt to bytecode format change.
This commit is contained in:
parent
6cf4830798
commit
94ff26b96b
11 changed files with 101 additions and 201 deletions
|
@ -172,7 +172,7 @@
|
|||
|
||||
// major and minor versions must be single characters
|
||||
#define SCM_OBJCODE_MAJOR_VERSION 0
|
||||
#define SCM_OBJCODE_MINOR_VERSION A
|
||||
#define SCM_OBJCODE_MINOR_VERSION B
|
||||
#define SCM_OBJCODE_MAJOR_VERSION_STRING \
|
||||
SCM_CPP_STRINGIFY(SCM_OBJCODE_MAJOR_VERSION)
|
||||
#define SCM_OBJCODE_MINOR_VERSION_STRING \
|
||||
|
|
|
@ -220,6 +220,10 @@ VM_NAME (struct scm_vm *vp, SCM program, SCM *argv, int nargs)
|
|||
finish_args = SCM_EOL;
|
||||
goto vm_error;
|
||||
|
||||
vm_error_bad_wide_string_length:
|
||||
err_msg = scm_from_locale_string ("VM: Bad wide string length: ~S");
|
||||
goto vm_error;
|
||||
|
||||
#if VM_CHECK_IP
|
||||
vm_error_invalid_address:
|
||||
err_msg = scm_from_locale_string ("VM: Invalid program address");
|
||||
|
|
|
@ -20,42 +20,6 @@
|
|||
|
||||
/* This file is included in vm_engine.c */
|
||||
|
||||
VM_DEFINE_LOADER (80, load_unsigned_integer, "load-unsigned-integer")
|
||||
{
|
||||
size_t len;
|
||||
|
||||
FETCH_LENGTH (len);
|
||||
if (SCM_LIKELY (len <= 8))
|
||||
{
|
||||
scm_t_uint64 val = 0;
|
||||
while (len-- > 0)
|
||||
val = (val << 8U) + FETCH ();
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_uint64 (val));
|
||||
NEXT;
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-unsigned-integer: not implemented yet", SCM_EOL);
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (81, load_integer, "load-integer")
|
||||
{
|
||||
size_t len;
|
||||
|
||||
FETCH_LENGTH (len);
|
||||
if (SCM_LIKELY (len <= 4))
|
||||
{
|
||||
int val = 0;
|
||||
while (len-- > 0)
|
||||
val = (val << 8) + FETCH ();
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_int (val));
|
||||
NEXT;
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-integer: not implemented yet", SCM_EOL);
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (82, load_number, "load-number")
|
||||
{
|
||||
size_t len;
|
||||
|
@ -72,82 +36,24 @@ VM_DEFINE_LOADER (82, load_number, "load-number")
|
|||
VM_DEFINE_LOADER (83, load_string, "load-string")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
char *buf;
|
||||
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL);
|
||||
PUSH (str);
|
||||
ip += len * width;
|
||||
PUSH (scm_i_make_string (len, &buf));
|
||||
memcpy (buf, (char *) ip, len);
|
||||
ip += len;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (84, load_symbol, "load-symbol")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL);
|
||||
PUSH (scm_string_to_symbol (str));
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (85, load_keyword, "load-keyword")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL);
|
||||
PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str)));
|
||||
ip += len * width;
|
||||
/* FIXME: should be scm_from_latin1_symboln */
|
||||
PUSH (scm_from_locale_symboln ((const char*)ip, len));
|
||||
ip += len;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
|
@ -181,37 +87,6 @@ VM_DEFINE_INSTRUCTION (87, link_now, "link-now", 0, 1, 1)
|
|||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (88, define, "define")
|
||||
{
|
||||
SCM str, sym;
|
||||
size_t len;
|
||||
|
||||
int width;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL);
|
||||
sym = scm_string_to_symbol (str);
|
||||
ip += len * width;
|
||||
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T));
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (89, load_array, "load-array")
|
||||
{
|
||||
SCM type, shape;
|
||||
|
@ -225,6 +100,24 @@ VM_DEFINE_LOADER (89, load_array, "load-array")
|
|||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (90, load_wide_string, "load-wide-string")
|
||||
{
|
||||
size_t len;
|
||||
scm_t_wchar *wbuf;
|
||||
|
||||
FETCH_LENGTH (len);
|
||||
if (SCM_UNLIKELY (len % 4))
|
||||
{ finish_args = scm_list_1 (scm_from_size_t (len));
|
||||
goto vm_error_bad_wide_string_length;
|
||||
}
|
||||
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_i_make_wide_string (len / 4, &wbuf));
|
||||
memcpy ((char *) wbuf, (char *) ip, len);
|
||||
ip += len;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
/*
|
||||
(defun renumber-ops ()
|
||||
"start from top of buffer and renumber 'VM_DEFINE_FOO (\n' sequences"
|
||||
|
|
|
@ -1246,6 +1246,34 @@ VM_DEFINE_INSTRUCTION (65, fix_closure, "fix-closure", 2, 0, 1)
|
|||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_INSTRUCTION (66, define, "define", 0, 0, 2)
|
||||
{
|
||||
SCM sym, val;
|
||||
POP (sym);
|
||||
POP (val);
|
||||
SYNC_REGISTER ();
|
||||
VARIABLE_SET (scm_sym2var (sym, scm_current_module_lookup_closure (),
|
||||
SCM_BOOL_T),
|
||||
val);
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_INSTRUCTION (67, make_keyword, "make-keyword", 0, 1, 1)
|
||||
{
|
||||
CHECK_UNDERFLOW ();
|
||||
SYNC_REGISTER ();
|
||||
*sp = scm_symbol_to_keyword (*sp);
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_INSTRUCTION (68, make_symbol, "make-symbol", 0, 1, 1)
|
||||
{
|
||||
CHECK_UNDERFLOW ();
|
||||
SYNC_REGISTER ();
|
||||
*sp = scm_string_to_symbol (*sp);
|
||||
NEXT;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
(defun renumber-ops ()
|
||||
|
|
|
@ -34,30 +34,21 @@
|
|||
;; lengths are encoded in 3 bytes
|
||||
(define *len-len* 3)
|
||||
|
||||
;; the number of bytes per string character is encoded in 1 byte
|
||||
(define *width-len* 1)
|
||||
|
||||
|
||||
(define (byte-length assembly)
|
||||
(pmatch assembly
|
||||
(,label (guard (not (pair? label)))
|
||||
0)
|
||||
((load-unsigned-integer ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
((load-integer ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
((load-number ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
((load-string ,str)
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
((load-wide-string ,str)
|
||||
(+ 1 *len-len* (* 4 (string-length str))))
|
||||
((load-symbol ,str)
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
((load-keyword ,str)
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
((load-array ,bv)
|
||||
(+ 1 *len-len* (bytevector-length bv)))
|
||||
((define ,str)
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code)
|
||||
(+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
|
||||
((,inst . _) (guard (>= (instruction-length inst) 0))
|
||||
|
@ -171,5 +162,4 @@
|
|||
n4)))
|
||||
((load-string ,s) s)
|
||||
((load-symbol ,s) (string->symbol s))
|
||||
((load-keyword ,s) (symbol->keyword (string->symbol s)))
|
||||
(else #f)))
|
||||
|
|
|
@ -65,11 +65,13 @@
|
|||
(write-byte (logand (ash x -8) 255))
|
||||
(write-byte (logand (ash x -16) 255))
|
||||
(write-byte (logand (ash x -24) 255)))
|
||||
(define (write-uint32 x) (case byte-order
|
||||
((1234) (write-uint32-le x))
|
||||
((4321) (write-uint32-be x))
|
||||
(else (error "unknown endianness" byte-order))))
|
||||
(define (write-uint32 x)
|
||||
(case byte-order
|
||||
((1234) (write-uint32-le x))
|
||||
((4321) (write-uint32-be x))
|
||||
(else (error "unknown endianness" byte-order))))
|
||||
(define (write-wide-string s)
|
||||
(write-loader-len (* 4 (string-length s)))
|
||||
(string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
|
||||
(define (write-loader-len len)
|
||||
(write-byte (ash len -16))
|
||||
|
@ -133,14 +135,11 @@
|
|||
;; `scm_c_make_objcode_slice ()'.
|
||||
(write-bytecode meta write get-addr '()))))
|
||||
((make-char32 ,x) (write-uint32-be x))
|
||||
((load-unsigned-integer ,str) (write-loader str))
|
||||
((load-integer ,str) (write-loader str))
|
||||
((load-number ,str) (write-loader str))
|
||||
((load-string ,str) (write-sized-loader str))
|
||||
((load-symbol ,str) (write-sized-loader str))
|
||||
((load-keyword ,str) (write-sized-loader str))
|
||||
((load-string ,str) (write-loader str))
|
||||
((load-wide-string ,str) (write-wide-string str))
|
||||
((load-symbol ,str) (write-loader str))
|
||||
((load-array ,bv) (write-bytevector bv))
|
||||
((define ,str) (write-sized-loader str))
|
||||
((br ,l) (write-break l))
|
||||
((br-if ,l) (write-break l))
|
||||
((br-if-not ,l) (write-break l))
|
||||
|
|
|
@ -96,16 +96,6 @@
|
|||
(lp (cons exp out))))))))))
|
||||
|
||||
(define (decode-bytecode pop)
|
||||
(define (get1 bytes-per-char)
|
||||
(if (= bytes-per-char 1)
|
||||
(pop)
|
||||
(let* ((a (pop))
|
||||
(b (pop))
|
||||
(c (pop))
|
||||
(d (pop)))
|
||||
(if (= byte-order 1234)
|
||||
(+ (ash d 24) (ash c 16) (ash b 8) a)
|
||||
(+ (ash a 24) (ash b 16) (ash c 8) d)))))
|
||||
(and=> (pop)
|
||||
(lambda (opcode)
|
||||
(let ((inst (opcode->instruction opcode)))
|
||||
|
@ -117,29 +107,24 @@
|
|||
;; the negative length indicates a variable length
|
||||
;; instruction
|
||||
(let* ((make-sequence
|
||||
(if (eq? inst 'load-array)
|
||||
(if (or (memq inst '(load-array load-wide-string)))
|
||||
make-bytevector
|
||||
make-string))
|
||||
(sequence-set!
|
||||
(if (eq? inst 'load-array)
|
||||
(if (or (memq inst '(load-array load-wide-string)))
|
||||
bytevector-u8-set!
|
||||
(lambda (str pos value)
|
||||
(string-set! str pos (integer->char value)))))
|
||||
(len (let* ((a (pop)) (b (pop)) (c (pop)))
|
||||
(+ (ash a 16) (ash b 8) c)))
|
||||
(bytes-per-count
|
||||
(if (or (eq? inst 'load-string)
|
||||
(eq? inst 'load-symbol)
|
||||
(eq? inst 'load-keyword)
|
||||
(eq? inst 'define))
|
||||
(pop)
|
||||
1))
|
||||
(seq (make-sequence len)))
|
||||
(let lp ((i 0))
|
||||
(if (= i len)
|
||||
`(,inst ,seq)
|
||||
`(,inst ,(if (eq? inst 'load-wide-string)
|
||||
(utf32->string seq)
|
||||
seq))
|
||||
(begin
|
||||
(sequence-set! seq i (get1 bytes-per-count))
|
||||
(sequence-set! seq i (pop))
|
||||
(lp (1+ i)))))))
|
||||
(else
|
||||
;; fixed length
|
||||
|
|
|
@ -318,8 +318,8 @@
|
|||
,(modulo i 256))))
|
||||
object-alist)))))
|
||||
((define)
|
||||
(emit-code `((define ,(symbol->string name))
|
||||
(variable-set))))
|
||||
(emit-code `(,@(dump-object name addr)
|
||||
(define))))
|
||||
(else
|
||||
(error "unknown toplevel var kind" op name))))
|
||||
|
||||
|
@ -391,11 +391,20 @@
|
|||
((number? x)
|
||||
`((load-number ,(number->string x))))
|
||||
((string? x)
|
||||
`((load-string ,x)))
|
||||
(case (string-width x)
|
||||
((1) `((load-string ,x)))
|
||||
((4) (align-code `(load-wide-string ,x) addr 4 4))
|
||||
(else (error "bad string width" x))))
|
||||
((symbol? x)
|
||||
`((load-symbol ,(symbol->string x))))
|
||||
(let ((str (symbol->string x)))
|
||||
(case (string-width str)
|
||||
((1) `((load-symbol ,str)))
|
||||
((4) `(,@(dump-object str addr)
|
||||
(make-symbol)))
|
||||
(else (error "bad string width" str)))))
|
||||
((keyword? x)
|
||||
`((load-keyword ,(symbol->string (keyword->symbol x)))))
|
||||
`(,@(dump-object (keyword->symbol x) addr)
|
||||
(make-keyword)))
|
||||
((list? x)
|
||||
(let ((tail (let ((len (length x)))
|
||||
(if (>= len 65536) (too-long "list"))
|
||||
|
|
|
@ -492,11 +492,16 @@
|
|||
((tail push vals)
|
||||
(emit-code src (make-glil-toplevel 'ref name))))
|
||||
(maybe-emit-return))
|
||||
(else
|
||||
(pk 'ew-the-badness x (current-module) (fluid-ref *comp-module*))
|
||||
((module-variable the-root-module name)
|
||||
(case context
|
||||
((tail push vals)
|
||||
(emit-code src (make-glil-module 'ref '(guile) name #f))))
|
||||
(maybe-emit-return))
|
||||
(else
|
||||
(case context
|
||||
((tail push vals)
|
||||
(emit-code src (make-glil-module
|
||||
'ref (module-name (fluid-ref *comp-module*)) name #f))))
|
||||
(maybe-emit-return))))
|
||||
|
||||
((<lexical-ref> src name gensym)
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
(progv (make-vector (vector-length objects) #f))
|
||||
(asm (decompile (program-objcode prog) #:to 'assembly)))
|
||||
(pmatch asm
|
||||
((load-program ,nargs ,nrest ,nlocs ,next ,labels ,len . ,body)
|
||||
((load-program ,nargs ,nrest ,nlocs ,labels ,len . ,body)
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(pmatch x
|
||||
|
|
|
@ -65,31 +65,18 @@
|
|||
(comp-test '(make-int8 3)
|
||||
#(make-int8 3))
|
||||
|
||||
(comp-test `(load-integer ,(string (integer->char 0)))
|
||||
#(load-integer 0 0 1 0))
|
||||
|
||||
(comp-test `(load-integer ,(string (integer->char 255)))
|
||||
#(load-integer 0 0 1 255))
|
||||
|
||||
(comp-test `(load-integer ,(string (integer->char 1) (integer->char 0)))
|
||||
#(load-integer 0 0 2 1 0))
|
||||
|
||||
(comp-test '(load-number "3.14")
|
||||
(vector 'load-number 0 0 4 (char->integer #\3) (char->integer #\.)
|
||||
(char->integer #\1) (char->integer #\4)))
|
||||
|
||||
(comp-test '(load-string "foo")
|
||||
(vector 'load-string 0 0 3 1 (char->integer #\f) (char->integer #\o)
|
||||
(vector 'load-string 0 0 3 (char->integer #\f) (char->integer #\o)
|
||||
(char->integer #\o)))
|
||||
|
||||
(comp-test '(load-symbol "foo")
|
||||
(vector 'load-symbol 0 0 3 1 (char->integer #\f) (char->integer #\o)
|
||||
(vector 'load-symbol 0 0 3 (char->integer #\f) (char->integer #\o)
|
||||
(char->integer #\o)))
|
||||
|
||||
(comp-test '(load-keyword "qux")
|
||||
(vector 'load-keyword 0 0 3 1 (char->integer #\q) (char->integer #\u)
|
||||
(char->integer #\x)))
|
||||
|
||||
(comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return))
|
||||
#(load-program
|
||||
3 2 (uint16 1) ;; nargs, nrest, nlocs
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue