mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-13 07:10:20 +02:00
finish compiler.texi, woo
* libguile/objcodes.c (do-pair): Removed unused debuging hack. * module/language/glil/spec.scm (glil): Simplify a bit. * module/system/repl/repl.scm (default-catch-handler): Don't catch vm-error, as vm-backtrace doesn't exist any more. * doc/ref/compiler.texi: Finish documenting GLIL and object code.
This commit is contained in:
parent
c850030fdd
commit
ff73ae34c3
4 changed files with 154 additions and 93 deletions
|
@ -489,108 +489,212 @@ Interested readers are encouraged to read the implementation in
|
||||||
@node GLIL
|
@node GLIL
|
||||||
@subsection GLIL
|
@subsection GLIL
|
||||||
|
|
||||||
Guile Low Intermediate Language (GHIL) is a structured intermediate
|
Guile Low Intermediate Language (GLIL) is a structured intermediate
|
||||||
language whose expressions closely mirror the functionality of Guile's
|
language whose expressions closely mirror the functionality of Guile's
|
||||||
VM instruction set.
|
VM instruction set.
|
||||||
|
|
||||||
Its expression types are defined in @code{(language glil)}, and as
|
Its expression types are defined in @code{(language glil)}, and as
|
||||||
with GHIL, some of its fields parse as rest arguments.
|
with GHIL, some of its fields parse as rest arguments.
|
||||||
|
|
||||||
@deftp {Scheme Variable} <glil-asm> nargs nrest nlocs nexts meta . body
|
@deftp {Scheme Variable} <glil-program> nargs nrest nlocs nexts meta . body
|
||||||
vars is @code{(@var{nargs} @var{nrest} @var{nlocs} @var{next})}
|
A unit of code that at runtime will correspond to a compiled
|
||||||
|
procedure. (@var{nargs} @var{nrest} @var{nlocs} @var{nexts}
|
||||||
|
collectively define the program's arity; see @ref{Compiled
|
||||||
|
Procedures}, for more information. @var{meta} should be an alist of
|
||||||
|
properties, as in @code{<ghil-lambda>}. @var{body} is a list of GLIL
|
||||||
|
expressions.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-bind> . vars
|
@deftp {Scheme Variable} <glil-bind> . vars
|
||||||
vars is a list of @code{(@var{name} @var{type} @var{index})}
|
An advisory expression that notes a liveness extent for a set of
|
||||||
|
variables. @var{vars} is a list of @code{(@var{name} @var{type}
|
||||||
|
@var{index})}, where @var{type} should be either @code{argument},
|
||||||
|
@code{local}, or @code{external}.
|
||||||
|
|
||||||
|
@code{<glil-bind>} expressions end up being serialized as part of a
|
||||||
|
program's metadata and do not form part of a program's code path.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-mv-bind> vars rest
|
@deftp {Scheme Variable} <glil-mv-bind> vars rest
|
||||||
vars is a list of @code{(@var{name} @var{type} @var{index})}
|
A multiple-value binding of the values on the stack to @var{vars}. Iff
|
||||||
@var{rest} is bool
|
@var{rest} is true, the last element of @var{vars} will be treated as
|
||||||
|
a rest argument.
|
||||||
|
|
||||||
|
In addition to pushing a binding annotation on the stack, like
|
||||||
|
@code{<glil-bind>}, an expression is emitted at compilation time to
|
||||||
|
make sure that there are enough values available to bind. See the
|
||||||
|
notes on @code{truncate-values} in @ref{Procedural Instructions}, for
|
||||||
|
more information.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-unbind>
|
@deftp {Scheme Variable} <glil-unbind>
|
||||||
closes binding
|
Closes the liveness extent of the most recently encountered
|
||||||
|
@code{<glil-bind>} or @code{<glil-mv-bind>} expression. As GLIL
|
||||||
|
expressions are compiled, a parallel stack of live bindings is
|
||||||
|
maintained; this expression pops off the top element from that stack.
|
||||||
|
|
||||||
|
Bindings are written into the program's metadata so that debuggers and
|
||||||
|
other tools can determine the set of live local variables at a given
|
||||||
|
offset within a VM program.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-source> loc
|
@deftp {Scheme Variable} <glil-source> loc
|
||||||
source information for the preceding expression
|
Records source information for the preceding expression. @var{loc}
|
||||||
|
should be a vector, @code{#(@var{line} @var{column} @var{filename})}.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-void>
|
@deftp {Scheme Variable} <glil-void>
|
||||||
push the unspecified value
|
Pushes the unspecified value on the stack.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-const> obj
|
@deftp {Scheme Variable} <glil-const> obj
|
||||||
A constant value -- @var{obj} can be anything serializable -- number,
|
Pushes a constant value onto the stack. @var{obj} must be a number,
|
||||||
string, symbol, keyword, null, bool, char, or pair or vector or list thereof
|
string, symbol, keyword, boolean, character, or a pair or vector or
|
||||||
|
list thereof, or the empty list.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-argument> op index
|
@deftp {Scheme Variable} <glil-argument> op index
|
||||||
access an argument on the stack. op is ref or set.
|
Accesses an argument on the stack. If @var{op} is @code{ref}, the
|
||||||
|
argument is pushed onto the stack; if it is @code{set}, the argument
|
||||||
|
is set from the top value on the stack, which is popped off.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-local> op index
|
@deftp {Scheme Variable} <glil-local> op index
|
||||||
access a local var (on the stack). op is ref or set.
|
Like @code{<glil-argument>}, but for local variables. @xref{Stack
|
||||||
|
Layout}, for more information.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-external> op depth index
|
@deftp {Scheme Variable} <glil-external> op depth index
|
||||||
access a heap-allocated var, depth is the number of environments deep,
|
Accesses a heap-allocated variable, addressed by @var{depth}, the nth
|
||||||
index is the position within the env. op is ref or set.
|
enclosing environment, and @var{index}, the variable's position within
|
||||||
|
the environment. @var{op} is @code{ref} or @code{set}.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-toplevel> op name
|
@deftp {Scheme Variable} <glil-toplevel> op name
|
||||||
access a toplevel var. if compiling at the toplevel, will translate to
|
Accesses a toplevel variable. @var{op} may be @code{ref}, @code{set},
|
||||||
a link-now + variable-ref,set; otherwise toplevel-ref/set with the
|
or @code{define}.
|
||||||
object vector cache. also op == define.
|
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-module> op mod name public?
|
@deftp {Scheme Variable} <glil-module> op mod name public?
|
||||||
access a module var, ref/set, like ...
|
Accesses a variable within a specific module. See
|
||||||
|
@code{ghil-var-at-module!}, for more information.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-label> label
|
@deftp {Scheme Variable} <glil-label> label
|
||||||
make a new label. @var{label} can be any scheme value, and should be
|
Creates a new label. @var{label} can be any Scheme value, and should
|
||||||
unique.
|
be unique.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-branch> inst label
|
@deftp {Scheme Variable} <glil-branch> inst label
|
||||||
branch to a label. @var{label} should be a @code{<ghil-label>}.
|
Branch to a label. @var{label} should be a @code{<ghil-label>}.
|
||||||
@code{inst} is a branching instruction: @code{br-if}, @code{br}, etc.
|
@code{inst} is a branching instruction: @code{br-if}, @code{br}, etc.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-call> inst nargs
|
@deftp {Scheme Variable} <glil-call> inst nargs
|
||||||
This expression is perhaps misnamed, as it does not correspond to
|
This expression is probably misnamed, as it does not correspond to
|
||||||
function calls. @code{<glil-call>} invokes the VM instruction named
|
function calls. @code{<glil-call>} invokes the VM instruction named
|
||||||
@var{inst}, noting that it is called with @var{nargs} stack arguments.
|
@var{inst}, noting that it is called with @var{nargs} stack arguments.
|
||||||
|
The arguments should be pushed on the stack already. What happens to
|
||||||
|
the stack afterwards depends on the instruction.
|
||||||
@end deftp
|
@end deftp
|
||||||
@deftp {Scheme Variable} <glil-mv-call> nargs ra
|
@deftp {Scheme Variable} <glil-mv-call> nargs ra
|
||||||
Multiple-values call, ra should be an offset for the mvra, in bytes (?)
|
Performs a multiple-value call. @var{ra} is a @code{<glil-label>}
|
||||||
|
corresponding to the multiple-value return address for the call. See
|
||||||
|
the notes on @code{mv-call} in @ref{Procedural Instructions}, for more
|
||||||
|
information.
|
||||||
@end deftp
|
@end deftp
|
||||||
|
|
||||||
|
Users may enter in GLIL at the REPL as well, though there is a bit
|
||||||
|
more bookkeeping to do. Since GLIL needs the set of variables to be
|
||||||
|
declared explicitly in a @code{<glil-program>}, GLIL expressions must
|
||||||
|
be wrapped in a thunk that declares the arity of the expression:
|
||||||
|
|
||||||
passes through the env
|
@example
|
||||||
|
scheme@@(guile-user)> ,language glil
|
||||||
|
Guile Lowlevel Intermediate Language (GLIL) interpreter 0.3 on Guile 1.9.0
|
||||||
|
Copyright (C) 2001-2008 Free Software Foundation, Inc.
|
||||||
|
|
||||||
no let, no lambda, no closures, just labels and branches and constants
|
Enter `,help' for help.
|
||||||
and code. Well, there's a bit more, but that's the flavor of GLIL.
|
glil@@(guile-user)> (program 0 0 0 0 () (const 3) (call return 0))
|
||||||
|
@result{} 3
|
||||||
|
@end example
|
||||||
|
|
||||||
Compiled code will effectively be a thunk, of no arguments, but
|
Just as in all of Guile's compilers, an environment is passed to the
|
||||||
optionally closing over some number of variables (which should be
|
GLIL-to-object code compiler, and one is returned as well, along with
|
||||||
captured via `make-closure', @pxref{Loading Instructions}).
|
the object code.
|
||||||
|
|
||||||
@node Object Code
|
@node Object Code
|
||||||
@subsection Object Code
|
@subsection Object Code
|
||||||
|
|
||||||
describe the env -- module + externals (the actual values!)
|
Object code is the serialization of the raw instruction stream of a
|
||||||
|
program, ready for interpretation by the VM. Procedures related to
|
||||||
|
object code are defined in the @code{(system vm objcode)} module.
|
||||||
|
|
||||||
The env is used when compiling to value -- effectively calling the
|
@deffn {Scheme Procedure} objcode? obj
|
||||||
thunk from objcode->program with a certain current module and with
|
@deffnx {C Function} scm_objcode_p (obj)
|
||||||
those externals. so you can recompile a closure at runtime, a trick
|
Returns @code{#f} iff @var{obj} is object code, @code{#f} otherwise.
|
||||||
that goops uses.
|
@end deffn
|
||||||
|
|
||||||
|
@deffn {Scheme Procedure} bytecode->objcode bytecode nlocs nexts
|
||||||
|
@deffnx {C Function} scm_bytecode_to_objcode (bytecode, nlocs, nexts)
|
||||||
|
Makes a bytecode object from @var{bytecode}, which should be a
|
||||||
|
@code{u8vector}. @var{nlocs} and @var{nexts} denote the number of
|
||||||
|
stack and heap variables to reserve when this objcode is executed.
|
||||||
|
@end deffn
|
||||||
|
|
||||||
|
@deffn {Scheme Variable} load-objcode file
|
||||||
|
@deffnx {C Function} scm_load_objcode (file)
|
||||||
|
Load object code from a file named @var{file}. The file will be mapped
|
||||||
|
into memory via @code{mmap}, so this is a very fast operation.
|
||||||
|
|
||||||
|
On disk, object code has an eight-byte cookie prepended to it, so that
|
||||||
|
we will not execute arbitrary garbage. In addition, two more bytes are
|
||||||
|
reserved for @var{nlocs} and @var{nexts}.
|
||||||
|
@end deffn
|
||||||
|
|
||||||
|
@deffn {Scheme Variable} objcode->u8vector objcode
|
||||||
|
@deffnx {C Function} scm_objcode_to_u8vector (objcode)
|
||||||
|
Copy object code out to a @code{u8vector} for analysis by Scheme. The
|
||||||
|
ten-byte header is included.
|
||||||
|
@end deffn
|
||||||
|
|
||||||
|
@deffn {Scheme Variable} objcode->program objcode [external='()]
|
||||||
|
@deffnx {C Function} scm_objcode_to_program (objcode, external)
|
||||||
|
Load up object code into a Scheme program. The resulting program will
|
||||||
|
be a thunk that captures closure variables from @var{external}.
|
||||||
|
@end deffn
|
||||||
|
|
||||||
|
Object code from a file may be disassembled at the REPL via the
|
||||||
|
meta-command @code{,disassemble-file}, abbreviated as @code{,xx}.
|
||||||
|
Programs may be disassembled via @code{,disassemble}, abbreviated as
|
||||||
|
@code{,x}.
|
||||||
|
|
||||||
|
Compiling object code to the fake language, @code{value}, is performed
|
||||||
|
via loading objcode into a program, then executing that thunk with
|
||||||
|
respect to the compilation environment. Normally the environment
|
||||||
|
propagates through the compiler transparently, but users may specify
|
||||||
|
the compilation environment manually as well:
|
||||||
|
|
||||||
|
@deffn {Scheme Procedure} make-objcode-env module externals
|
||||||
|
Make an object code environment. @var{module} should be a Scheme
|
||||||
|
module, and @var{externals} should be a list of external variables.
|
||||||
|
@code{#f} is also a valid object code environment.
|
||||||
|
@end deffn
|
||||||
|
|
||||||
@node Extending the Compiler
|
@node Extending the Compiler
|
||||||
@subsection Extending the Compiler
|
@subsection Extending the Compiler
|
||||||
|
|
||||||
JIT compilation
|
At this point, we break with the impersonal tone of the rest of the
|
||||||
|
manual, and make an intervention. Admit it: if you've read this far
|
||||||
|
into the compiler internals manual, you are a junkie. Perhaps a course
|
||||||
|
at your university left you unsated, or perhaps you've always harbored
|
||||||
|
a sublimated desire to hack the holy of computer science holies: a
|
||||||
|
compiler. Well you're in good company, and in a good position. Guile's
|
||||||
|
compiler needs your help.
|
||||||
|
|
||||||
AOT compilation
|
There are many possible avenues for improving Guile's compiler.
|
||||||
|
Probably the most important improvement, speed-wise, will be some form
|
||||||
|
of native compilation, both just-in-time and ahead-of-time. This could
|
||||||
|
be done in many ways. Probably the easiest strategy would be to extend
|
||||||
|
the compiled procedure structure to include a pointer to a native code
|
||||||
|
vector, and compile from bytecode to native code at runtime after a
|
||||||
|
procedure is called a certain number of times.
|
||||||
|
|
||||||
link to what dybvig did
|
The name of the game is a profiling-based harvest of the low-hanging
|
||||||
|
fruit, running programs of interest under a system-level profiler and
|
||||||
|
determining which improvements would give the most bang for the buck.
|
||||||
|
There are many well-known efficiency hacks in the literature: Dybvig's
|
||||||
|
letrec optimization, individual boxing of heap-allocated values (and
|
||||||
|
then store the boxes on the stack directory), optimized case-lambda
|
||||||
|
expressions, stack underflow and overflow handlers, etc. Highly
|
||||||
|
recommended papers: Dybvig's HOCS, Ghuloum's compiler paper.
|
||||||
|
|
||||||
real name of the game is closure elimination -- fixing letrec
|
The compiler also needs help at the top end, enhancing the Scheme that
|
||||||
|
it knows to also understand R6RS, and adding new high-level compilers:
|
||||||
possibilities: box ``external'' values individually, then allocate on
|
Emacs Lisp, Lua, JavaScript...
|
||||||
stack instead of in a list. HOCS p3. Procedure slots in symbols?
|
|
||||||
Optimized case-lambda to avoid creating lists? Underflow / overflow
|
|
||||||
implementation of continuations? JIT / AOT compilers. R6RS especially
|
|
||||||
wrt modules and macros. Built-in syncase. Letrec optimizations.
|
|
||||||
|
|
||||||
profiling
|
|
||||||
|
|
||||||
startup time
|
|
||||||
|
|
|
@ -138,38 +138,6 @@ objcode_free (SCM obj)
|
||||||
* Scheme interface
|
* Scheme interface
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if 0
|
|
||||||
SCM_DEFINE (scm_do_pair, "do-pair", 2, 0, 0,
|
|
||||||
(SCM car, SCM cdr),
|
|
||||||
"This is a stupid test to see how cells work. (Ludo)")
|
|
||||||
{
|
|
||||||
static SCM room[512];
|
|
||||||
static SCM *where = &room[0];
|
|
||||||
SCM the_pair;
|
|
||||||
size_t incr;
|
|
||||||
|
|
||||||
if ((scm_t_bits)where & 6)
|
|
||||||
{
|
|
||||||
/* Align the cell pointer so that Guile considers it as a
|
|
||||||
non-immediate object (see tags.h). */
|
|
||||||
incr = (scm_t_bits)where & 6;
|
|
||||||
incr = (~incr) & 7;
|
|
||||||
where += incr;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf ("do-pair: pool @ %p, pair @ %p\n", &room[0], where);
|
|
||||||
where[0] = car;
|
|
||||||
where[1] = cdr;
|
|
||||||
|
|
||||||
the_pair = PTR2SCM (where);
|
|
||||||
/* This doesn't work because SCM_SET_GC_MARK will look for some sort of a
|
|
||||||
"mark bitmap" at the end of a supposed cell segment which doesn't
|
|
||||||
exist. */
|
|
||||||
|
|
||||||
return (the_pair);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
SCM_DEFINE (scm_objcode_p, "objcode?", 1, 0, 0,
|
SCM_DEFINE (scm_objcode_p, "objcode?", 1, 0, 0,
|
||||||
(SCM obj),
|
(SCM obj),
|
||||||
"")
|
"")
|
||||||
|
|
|
@ -29,12 +29,6 @@
|
||||||
(define (write-glil exp . port)
|
(define (write-glil exp . port)
|
||||||
(apply write (unparse-glil exp) port))
|
(apply write (unparse-glil exp) port))
|
||||||
|
|
||||||
(define (translate x)
|
|
||||||
;; Don't wrap in a thunk -- if you're down in these weeds you can
|
|
||||||
;; thunk it yourself. We don't know how many locs there will be,
|
|
||||||
;; anyway.
|
|
||||||
(parse-glil x))
|
|
||||||
|
|
||||||
(define (compile x e opts)
|
(define (compile x e opts)
|
||||||
(values (compile-objcode x e) e))
|
(values (compile-objcode x e) e))
|
||||||
|
|
||||||
|
@ -43,6 +37,6 @@
|
||||||
#:version "0.3"
|
#:version "0.3"
|
||||||
#:reader read
|
#:reader read
|
||||||
#:printer write-glil
|
#:printer write-glil
|
||||||
#:parser translate
|
#:parser parse-glil
|
||||||
#:compilers `((,objcode . ,compile))
|
#:compilers `((,objcode . ,compile))
|
||||||
)
|
)
|
||||||
|
|
|
@ -55,11 +55,6 @@
|
||||||
(pmatch args
|
(pmatch args
|
||||||
((quit . _)
|
((quit . _)
|
||||||
(apply throw args))
|
(apply throw args))
|
||||||
((vm-error ,fun ,msg ,args)
|
|
||||||
(vm-backtrace (the-vm))
|
|
||||||
(display "\nVM error: \n")
|
|
||||||
(apply format #t msg args)
|
|
||||||
(newline))
|
|
||||||
((,key ,subr ,msg ,args . ,rest)
|
((,key ,subr ,msg ,args . ,rest)
|
||||||
(let ((cep (current-error-port)))
|
(let ((cep (current-error-port)))
|
||||||
(cond ((not (stack? (fluid-ref the-last-stack))))
|
(cond ((not (stack? (fluid-ref the-last-stack))))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue