1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-13 07:10:20 +02:00

finish compiler.texi, woo

* libguile/objcodes.c (do-pair): Removed unused debuging hack.

* module/language/glil/spec.scm (glil): Simplify a bit.

* module/system/repl/repl.scm (default-catch-handler): Don't catch
  vm-error, as vm-backtrace doesn't exist any more.

* doc/ref/compiler.texi: Finish documenting GLIL and object code.
This commit is contained in:
Andy Wingo 2009-01-11 14:23:48 +01:00
parent c850030fdd
commit ff73ae34c3
4 changed files with 154 additions and 93 deletions

View file

@ -489,108 +489,212 @@ Interested readers are encouraged to read the implementation in
@node GLIL @node GLIL
@subsection GLIL @subsection GLIL
Guile Low Intermediate Language (GHIL) is a structured intermediate Guile Low Intermediate Language (GLIL) is a structured intermediate
language whose expressions closely mirror the functionality of Guile's language whose expressions closely mirror the functionality of Guile's
VM instruction set. VM instruction set.
Its expression types are defined in @code{(language glil)}, and as Its expression types are defined in @code{(language glil)}, and as
with GHIL, some of its fields parse as rest arguments. with GHIL, some of its fields parse as rest arguments.
@deftp {Scheme Variable} <glil-asm> nargs nrest nlocs nexts meta . body @deftp {Scheme Variable} <glil-program> nargs nrest nlocs nexts meta . body
vars is @code{(@var{nargs} @var{nrest} @var{nlocs} @var{next})} A unit of code that at runtime will correspond to a compiled
procedure. (@var{nargs} @var{nrest} @var{nlocs} @var{nexts}
collectively define the program's arity; see @ref{Compiled
Procedures}, for more information. @var{meta} should be an alist of
properties, as in @code{<ghil-lambda>}. @var{body} is a list of GLIL
expressions.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-bind> . vars @deftp {Scheme Variable} <glil-bind> . vars
vars is a list of @code{(@var{name} @var{type} @var{index})} An advisory expression that notes a liveness extent for a set of
variables. @var{vars} is a list of @code{(@var{name} @var{type}
@var{index})}, where @var{type} should be either @code{argument},
@code{local}, or @code{external}.
@code{<glil-bind>} expressions end up being serialized as part of a
program's metadata and do not form part of a program's code path.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-mv-bind> vars rest @deftp {Scheme Variable} <glil-mv-bind> vars rest
vars is a list of @code{(@var{name} @var{type} @var{index})} A multiple-value binding of the values on the stack to @var{vars}. Iff
@var{rest} is bool @var{rest} is true, the last element of @var{vars} will be treated as
a rest argument.
In addition to pushing a binding annotation on the stack, like
@code{<glil-bind>}, an expression is emitted at compilation time to
make sure that there are enough values available to bind. See the
notes on @code{truncate-values} in @ref{Procedural Instructions}, for
more information.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-unbind> @deftp {Scheme Variable} <glil-unbind>
closes binding Closes the liveness extent of the most recently encountered
@code{<glil-bind>} or @code{<glil-mv-bind>} expression. As GLIL
expressions are compiled, a parallel stack of live bindings is
maintained; this expression pops off the top element from that stack.
Bindings are written into the program's metadata so that debuggers and
other tools can determine the set of live local variables at a given
offset within a VM program.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-source> loc @deftp {Scheme Variable} <glil-source> loc
source information for the preceding expression Records source information for the preceding expression. @var{loc}
should be a vector, @code{#(@var{line} @var{column} @var{filename})}.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-void> @deftp {Scheme Variable} <glil-void>
push the unspecified value Pushes the unspecified value on the stack.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-const> obj @deftp {Scheme Variable} <glil-const> obj
A constant value -- @var{obj} can be anything serializable -- number, Pushes a constant value onto the stack. @var{obj} must be a number,
string, symbol, keyword, null, bool, char, or pair or vector or list thereof string, symbol, keyword, boolean, character, or a pair or vector or
list thereof, or the empty list.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-argument> op index @deftp {Scheme Variable} <glil-argument> op index
access an argument on the stack. op is ref or set. Accesses an argument on the stack. If @var{op} is @code{ref}, the
argument is pushed onto the stack; if it is @code{set}, the argument
is set from the top value on the stack, which is popped off.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-local> op index @deftp {Scheme Variable} <glil-local> op index
access a local var (on the stack). op is ref or set. Like @code{<glil-argument>}, but for local variables. @xref{Stack
Layout}, for more information.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-external> op depth index @deftp {Scheme Variable} <glil-external> op depth index
access a heap-allocated var, depth is the number of environments deep, Accesses a heap-allocated variable, addressed by @var{depth}, the nth
index is the position within the env. op is ref or set. enclosing environment, and @var{index}, the variable's position within
the environment. @var{op} is @code{ref} or @code{set}.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-toplevel> op name @deftp {Scheme Variable} <glil-toplevel> op name
access a toplevel var. if compiling at the toplevel, will translate to Accesses a toplevel variable. @var{op} may be @code{ref}, @code{set},
a link-now + variable-ref,set; otherwise toplevel-ref/set with the or @code{define}.
object vector cache. also op == define.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-module> op mod name public? @deftp {Scheme Variable} <glil-module> op mod name public?
access a module var, ref/set, like ... Accesses a variable within a specific module. See
@code{ghil-var-at-module!}, for more information.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-label> label @deftp {Scheme Variable} <glil-label> label
make a new label. @var{label} can be any scheme value, and should be Creates a new label. @var{label} can be any Scheme value, and should
unique. be unique.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-branch> inst label @deftp {Scheme Variable} <glil-branch> inst label
branch to a label. @var{label} should be a @code{<ghil-label>}. Branch to a label. @var{label} should be a @code{<ghil-label>}.
@code{inst} is a branching instruction: @code{br-if}, @code{br}, etc. @code{inst} is a branching instruction: @code{br-if}, @code{br}, etc.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-call> inst nargs @deftp {Scheme Variable} <glil-call> inst nargs
This expression is perhaps misnamed, as it does not correspond to This expression is probably misnamed, as it does not correspond to
function calls. @code{<glil-call>} invokes the VM instruction named function calls. @code{<glil-call>} invokes the VM instruction named
@var{inst}, noting that it is called with @var{nargs} stack arguments. @var{inst}, noting that it is called with @var{nargs} stack arguments.
The arguments should be pushed on the stack already. What happens to
the stack afterwards depends on the instruction.
@end deftp @end deftp
@deftp {Scheme Variable} <glil-mv-call> nargs ra @deftp {Scheme Variable} <glil-mv-call> nargs ra
Multiple-values call, ra should be an offset for the mvra, in bytes (?) Performs a multiple-value call. @var{ra} is a @code{<glil-label>}
corresponding to the multiple-value return address for the call. See
the notes on @code{mv-call} in @ref{Procedural Instructions}, for more
information.
@end deftp @end deftp
Users may enter in GLIL at the REPL as well, though there is a bit
more bookkeeping to do. Since GLIL needs the set of variables to be
declared explicitly in a @code{<glil-program>}, GLIL expressions must
be wrapped in a thunk that declares the arity of the expression:
passes through the env @example
scheme@@(guile-user)> ,language glil
Guile Lowlevel Intermediate Language (GLIL) interpreter 0.3 on Guile 1.9.0
Copyright (C) 2001-2008 Free Software Foundation, Inc.
no let, no lambda, no closures, just labels and branches and constants Enter `,help' for help.
and code. Well, there's a bit more, but that's the flavor of GLIL. glil@@(guile-user)> (program 0 0 0 0 () (const 3) (call return 0))
@result{} 3
@end example
Compiled code will effectively be a thunk, of no arguments, but Just as in all of Guile's compilers, an environment is passed to the
optionally closing over some number of variables (which should be GLIL-to-object code compiler, and one is returned as well, along with
captured via `make-closure', @pxref{Loading Instructions}). the object code.
@node Object Code @node Object Code
@subsection Object Code @subsection Object Code
describe the env -- module + externals (the actual values!) Object code is the serialization of the raw instruction stream of a
program, ready for interpretation by the VM. Procedures related to
object code are defined in the @code{(system vm objcode)} module.
The env is used when compiling to value -- effectively calling the @deffn {Scheme Procedure} objcode? obj
thunk from objcode->program with a certain current module and with @deffnx {C Function} scm_objcode_p (obj)
those externals. so you can recompile a closure at runtime, a trick Returns @code{#f} iff @var{obj} is object code, @code{#f} otherwise.
that goops uses. @end deffn
@deffn {Scheme Procedure} bytecode->objcode bytecode nlocs nexts
@deffnx {C Function} scm_bytecode_to_objcode (bytecode, nlocs, nexts)
Makes a bytecode object from @var{bytecode}, which should be a
@code{u8vector}. @var{nlocs} and @var{nexts} denote the number of
stack and heap variables to reserve when this objcode is executed.
@end deffn
@deffn {Scheme Variable} load-objcode file
@deffnx {C Function} scm_load_objcode (file)
Load object code from a file named @var{file}. The file will be mapped
into memory via @code{mmap}, so this is a very fast operation.
On disk, object code has an eight-byte cookie prepended to it, so that
we will not execute arbitrary garbage. In addition, two more bytes are
reserved for @var{nlocs} and @var{nexts}.
@end deffn
@deffn {Scheme Variable} objcode->u8vector objcode
@deffnx {C Function} scm_objcode_to_u8vector (objcode)
Copy object code out to a @code{u8vector} for analysis by Scheme. The
ten-byte header is included.
@end deffn
@deffn {Scheme Variable} objcode->program objcode [external='()]
@deffnx {C Function} scm_objcode_to_program (objcode, external)
Load up object code into a Scheme program. The resulting program will
be a thunk that captures closure variables from @var{external}.
@end deffn
Object code from a file may be disassembled at the REPL via the
meta-command @code{,disassemble-file}, abbreviated as @code{,xx}.
Programs may be disassembled via @code{,disassemble}, abbreviated as
@code{,x}.
Compiling object code to the fake language, @code{value}, is performed
via loading objcode into a program, then executing that thunk with
respect to the compilation environment. Normally the environment
propagates through the compiler transparently, but users may specify
the compilation environment manually as well:
@deffn {Scheme Procedure} make-objcode-env module externals
Make an object code environment. @var{module} should be a Scheme
module, and @var{externals} should be a list of external variables.
@code{#f} is also a valid object code environment.
@end deffn
@node Extending the Compiler @node Extending the Compiler
@subsection Extending the Compiler @subsection Extending the Compiler
JIT compilation At this point, we break with the impersonal tone of the rest of the
manual, and make an intervention. Admit it: if you've read this far
into the compiler internals manual, you are a junkie. Perhaps a course
at your university left you unsated, or perhaps you've always harbored
a sublimated desire to hack the holy of computer science holies: a
compiler. Well you're in good company, and in a good position. Guile's
compiler needs your help.
AOT compilation There are many possible avenues for improving Guile's compiler.
Probably the most important improvement, speed-wise, will be some form
of native compilation, both just-in-time and ahead-of-time. This could
be done in many ways. Probably the easiest strategy would be to extend
the compiled procedure structure to include a pointer to a native code
vector, and compile from bytecode to native code at runtime after a
procedure is called a certain number of times.
link to what dybvig did The name of the game is a profiling-based harvest of the low-hanging
fruit, running programs of interest under a system-level profiler and
determining which improvements would give the most bang for the buck.
There are many well-known efficiency hacks in the literature: Dybvig's
letrec optimization, individual boxing of heap-allocated values (and
then store the boxes on the stack directory), optimized case-lambda
expressions, stack underflow and overflow handlers, etc. Highly
recommended papers: Dybvig's HOCS, Ghuloum's compiler paper.
real name of the game is closure elimination -- fixing letrec The compiler also needs help at the top end, enhancing the Scheme that
it knows to also understand R6RS, and adding new high-level compilers:
possibilities: box ``external'' values individually, then allocate on Emacs Lisp, Lua, JavaScript...
stack instead of in a list. HOCS p3. Procedure slots in symbols?
Optimized case-lambda to avoid creating lists? Underflow / overflow
implementation of continuations? JIT / AOT compilers. R6RS especially
wrt modules and macros. Built-in syncase. Letrec optimizations.
profiling
startup time

View file

@ -138,38 +138,6 @@ objcode_free (SCM obj)
* Scheme interface * Scheme interface
*/ */
#if 0
SCM_DEFINE (scm_do_pair, "do-pair", 2, 0, 0,
(SCM car, SCM cdr),
"This is a stupid test to see how cells work. (Ludo)")
{
static SCM room[512];
static SCM *where = &room[0];
SCM the_pair;
size_t incr;
if ((scm_t_bits)where & 6)
{
/* Align the cell pointer so that Guile considers it as a
non-immediate object (see tags.h). */
incr = (scm_t_bits)where & 6;
incr = (~incr) & 7;
where += incr;
}
printf ("do-pair: pool @ %p, pair @ %p\n", &room[0], where);
where[0] = car;
where[1] = cdr;
the_pair = PTR2SCM (where);
/* This doesn't work because SCM_SET_GC_MARK will look for some sort of a
"mark bitmap" at the end of a supposed cell segment which doesn't
exist. */
return (the_pair);
}
#endif
SCM_DEFINE (scm_objcode_p, "objcode?", 1, 0, 0, SCM_DEFINE (scm_objcode_p, "objcode?", 1, 0, 0,
(SCM obj), (SCM obj),
"") "")

View file

@ -29,12 +29,6 @@
(define (write-glil exp . port) (define (write-glil exp . port)
(apply write (unparse-glil exp) port)) (apply write (unparse-glil exp) port))
(define (translate x)
;; Don't wrap in a thunk -- if you're down in these weeds you can
;; thunk it yourself. We don't know how many locs there will be,
;; anyway.
(parse-glil x))
(define (compile x e opts) (define (compile x e opts)
(values (compile-objcode x e) e)) (values (compile-objcode x e) e))
@ -43,6 +37,6 @@
#:version "0.3" #:version "0.3"
#:reader read #:reader read
#:printer write-glil #:printer write-glil
#:parser translate #:parser parse-glil
#:compilers `((,objcode . ,compile)) #:compilers `((,objcode . ,compile))
) )

View file

@ -55,11 +55,6 @@
(pmatch args (pmatch args
((quit . _) ((quit . _)
(apply throw args)) (apply throw args))
((vm-error ,fun ,msg ,args)
(vm-backtrace (the-vm))
(display "\nVM error: \n")
(apply format #t msg args)
(newline))
((,key ,subr ,msg ,args . ,rest) ((,key ,subr ,msg ,args . ,rest)
(let ((cep (current-error-port))) (let ((cep (current-error-port)))
(cond ((not (stack? (fluid-ref the-last-stack)))) (cond ((not (stack? (fluid-ref the-last-stack))))