1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-28 05:50:30 +02:00

Update documentation on jit_frame and jit_tramp

* doc/body.texi: Reorder documentation, making jit_frame
	and jit_tramp the lightning response to the need of
	trampolines, continuations and tail call optimizations.
	A pseudo code example of a factorial function was added.
	Also added a section for description of the available
	predicates.

	* doc/fact.c: New file, implementing a simple example of
	a translation of a trivial, recursive, tail call optimization
	into lightning calls. This is the conversion to functional C
	code of the example in doc/body.texi.

	* doc/Makefile.am: Update for the next test case.
This commit is contained in:
pcpa 2015-01-19 19:09:37 -02:00
parent c8b6c36971
commit 894a02412c
4 changed files with 258 additions and 43 deletions

View file

@ -1,3 +1,19 @@
2015-01-19 Paulo Andrade <pcpa@gnu.org>
* doc/body.texi: Reorder documentation, making jit_frame
and jit_tramp the lightning response to the need of
trampolines, continuations and tail call optimizations.
A pseudo code example of a factorial function was added.
Also added a section for description of the available
predicates.
* doc/fact.c: New file, implementing a simple example of
a translation of a trivial, recursive, tail call optimization
into lightning calls. This is the conversion to functional C
code of the example in doc/body.texi.
* doc/Makefile.am: Update for the next test case.
2015-01-17 Paulo Andrade <pcpa@gnu.org> 2015-01-17 Paulo Andrade <pcpa@gnu.org>
* include/lightning.h, lib/jit_aarch64.c, * include/lightning.h, lib/jit_aarch64.c,

View file

@ -21,7 +21,7 @@ MOSTLYCLEANFILES = lightning.tmp
lightning_TEXINFOS = body.texi version.texi lightning_TEXINFOS = body.texi version.texi
noinst_PROGRAMS = incr printf rpn rfib ifib noinst_PROGRAMS = incr printf rpn rfib ifib fact
$(top_builddir)/lib/liblightning.la: $(top_builddir)/lib/liblightning.la:
cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
@ -40,3 +40,6 @@ rfib_SOURCES = rfib.c
ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
ifib_SOURCES = ifib.c ifib_SOURCES = ifib.c
fact_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
fact_SOURCES = fact.c

View file

@ -460,9 +460,32 @@ function in that register. A function with a return value should use
before returning. @xref{Fibonacci, the Fibonacci numbers}, for an example. before returning. @xref{Fibonacci, the Fibonacci numbers}, for an example.
@code{epilog} is an optional call, that marks the end of a function @code{epilog} is an optional call, that marks the end of a function
body. It is automatically generated by lightning if starting a new body. It is automatically generated by @lightning{} if starting a new
function (what should be done after a @code{ret} call) or finishing function (what should be done after a @code{ret} call) or finishing
generating jit. generating jit.
It is very important to note that the fact that @code{epilog} being
optional may cause a common mistake. Consider this:
@example
fun1:
prolog
...
ret
fun2:
prolog
@end example
Because @code{epilog} is added when finding a new @code{prolog},
this will cause the @code{fun2} label to actually be before the
return from @code{fun1}. Because @lightning{} will actually
understand it as:
@example
fun1:
prolog
...
ret
fun2:
epilog
prolog
@end example
You should observe a few rules when using these macros. First of You should observe a few rules when using these macros. First of
all, if calling a varargs function, you should use the @code{ellipsis} all, if calling a varargs function, you should use the @code{ellipsis}
@ -614,7 +637,6 @@ allocai (not specified) @r{reserve space on the stack}
@code{allocai} receives the number of bytes to allocate and returns @code{allocai} receives the number of bytes to allocate and returns
the offset from the frame pointer register @code{FP} to the base of the offset from the frame pointer register @code{FP} to the base of
the area. the area.
@end table
As a small appetizer, here is a small function that adds 1 to the input As a small appetizer, here is a small function that adds 1 to the input
parameter (an @code{int}). I'm using an assembly-like syntax here which parameter (an @code{int}). I'm using an assembly-like syntax here which
@ -647,6 +669,143 @@ in = arg @rem{! Same as above}
ret @rem{! Return to caller} ret @rem{! Return to caller}
@end example @end example
@item Trampolines, continuations and tail call optimization
Frequently it is required to generate jit code that must jump to
code generated later, possibly from another @code{jit_context_t}.
These require compatible stack frames.
@lightning{} provides two primitives from where trampolines,
continuations and tail call optimization can be implemented.
@example
frame (not specified) @r{create stack frame}
tramp (not specified) @r{assume stack frame}
@end example
@code{frame} receives an integer argument@footnote{It is not
automatically computed because it does not know about the
requirement of later generated code.} that defines the size in
bytes for the stack frame of the current, @code{C} callable,
jit function. To calculate this value, a good formula is maximum
number of arguments to any called native function times
eight@footnote{Times eight so that it works for double arguments.
And would not need conditionals for ports that pass arguments in
the stack.}, plus the sum of the arguments to any call to
@code{jit_allocai}. @lightning{} automatically adjusts this value
for any backend specific stack memory it may need, or any
alignment constraint.
@code{frame} also instructs @lightning{} to save all callee
save registers in the prolog and reload in the epilog.
@example
main: @rem{! jit entry point}
prolog @rem{! function prolog}
frame 256 @rem{! save all callee save registers and}
@rem{! reserve at least 256 byte in stack}
main_loop:
...
jmpi handler @rem{! jumps to external code}
...
ret @rem{! return to the caller}
@end example
@code{tramp} differs from @code{frame} only that a prolog and epilog
will not be generated. Note that @code{prolog} must still be used.
The code under @code{tramp} must be ready to be entered with a jump
at the prolog position, and instead of a return, it must end with
a non conditional jump. @code{tramp} exists solely for the fact
that it allows optimizing out prolog and epilog code that would
never be executed.
@example
handler: @rem{! handler entry point}
prolog @rem{! function prolog}
tramp 256 @rem{! assumes all callee save registers}
@rem{! are saved and there is at least}
@rem{! 256 byte in stack}
...
jmpi main_loop @rem{! return to the main loop}
@end example
@lightning{} only supports Tail Call Optimization using the
@code{tramp} construct. Any other way is not guaranteed to
work on all ports.
An example of a simple (recursive) tail call optimization:
@example
factorial: @rem{! Entry point of the factorial function}
prolog
in = arg @rem{! Receive an integer argument}
getarg R0, in @rem{! Move argument to RO}
prepare
pushargi 1 @rem{! This is the accumulator}
pushargr R0 @rem{! This is the argument}
finishi fact @rem{! Call the tail call optimized function}
retval R0 @rem{! Fetch the result}
retr R0 @rem{! Return it}
epilog @rem{! Epilog *before* label before prolog}
fact: @rem{! Entry point of the helper function}
prolog
frame 16 @rem{! Reserve 16 bytes in the stack}
fact_entry: @rem{! This is the tail call entry point}
ac = arg @rem{! The accumulator is the first argument}
in = arg @rem{! The factorial argument}
getarg R0, ac @rem{! Move the accumulator to R0}
getarg R1, in @rem{! Move the argument to R1}
blei fact_out, R1, 1 @rem{! Done if argument is one or less}
mulr R0, R0, R1 @rem{! accumulator *= argument}
putargr R0, ac @rem{! Update the accumulator}
subi R1, R1, 1 @rem{! argument -= 1}
putargr R1, in @rem{! Update the argument}
jmpi fact_entry @rem{! Tail Call Optimize it!}
fact_out:
retr R0 @rem{! Return the accumulator}
@end example
@item Predicates
@example
forward_p (not specified) @r{forward label predicate}
indirect_p (not specified) @r{indirect label predicate}
target_p (not specified) @r{used label predicate}
arg_register_p (not specified) @r{argument kind predicate}
callee_save_p (not specified) @r{callee save predicate}
pointer_p (not specified) @r{pointer predicate}
@end example
@code{forward_p} expects a @code{jit_node_t*} argument, and
returns non zero if it is a forward label reference, that is,
a label returned by @code{forward}, that still needs a
@code{link} call.
@code{indirect_p} expects a @code{jit_node_t*} argument, and returns
non zero if it is an indirect label reference, that is, a label that
was returned by @code{indirect}.
@code{target_p} expects a @code{jit_node_t*} argument, that is any
kind of label, and will return non zero if there is at least one
jump or move referencing it.
@code{arg_register_p} expects a @code{jit_node_t*} argument, that must
have been returned by @code{arg}, @code{arg_f} or @code{arg_d}, and
will return non zero if the argument lives in a register. This call
is useful to know the live range of register arguments, as those
are very fast to read and write, but have volatile values.
@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
@code{JIT_Fn}, and will return non zero if the register is callee
save. This call is useful because on several ports, the @code{JIT_Rn}
and @code{JIT_Fn} registers are actually callee save; no need
to save and load the values when making function calls.
@code{pointer_p} expects a pointer argument, and will return non
zero if the pointer is inside the generated jit code. Must be
called after @code{jit_emit} and before @code{jit_destroy_state}.
@end table
@node GNU lightning examples @node GNU lightning examples
@chapter Generating code at run-time @chapter Generating code at run-time
@ -1291,8 +1450,8 @@ that is usually a function with an @code{_} (underscode) prefix
and with an argument named @code{_jit}, in the pattern: and with an argument named @code{_jit}, in the pattern:
@example @example
static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t); static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t);
#define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v); #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v);
@end example @end example
The reason for this is to use the same syntax as the initial lightning The reason for this is to use the same syntax as the initial lightning
@ -1479,44 +1638,6 @@ Or to only use a data buffer, if required:
@rem{...} @rem{...}
@end example @end example
@section Shared stack frame layout
Certain jit generation patterns, for example the original GNU Smalltalk
jit generator, uses an approach of a fixed trampoline jit code, and
later generation of code that jumps around, assuming a fixed layout
stack frame.
To help on this pattern of code generation, @lightning{} provides
the @code{jit_frame} and the @code{jit_tramp} interfaces, to define
or to assume a stack frame. Both @code{jit_frame} or @code{jit_tramp}
must be the first call after @code{jit_prolog}.
@deftypefun void jit_frame (jit_int32_t @var{frame})
@var{frame} defines the size in bytes of the current function
stack frame. To calculate its value, a good formula is maximum number
of arguments to any called native function times eight, plus the
sum of the arguments to any call to @code{jit_allocai}. @lightning{}
automatically adjusts this value for any backend specific stack memory
it may need, or any alignment constraint.
To ensure trampoline code is correct, @lightning{} will save all
callee save registers in the prolog and reload in the epilog.
@end deftypefun
@deftypefun void jit_tramp (jit_int32_t @var{frame})
@var{frame} must be the same value of the dispatcher defined with the
@code{jit_frame} call.
The only difference of @code{jit_frame} and @code{jit_tramp} is that
@code{jit_tramp} omits generation of a prolog and epilog for the
current function.
Most trampoline based jit generation implements a single dispatch method
and later emit code that knows how to return back to the dispatch routine,
and the later emitted code is called with a non local goto. In such cases,
emitting a native prolog (and epilog) is just a waste of space.
@end deftypefun
It is a fatal error if more than @var{frame} bytes are required
either in the dispatcher defined with @code{jit_frame} or the
"trampolined" code, defined with @code{jit_tramp}.
@node Acknowledgements @node Acknowledgements
@chapter Acknowledgements @chapter Acknowledgements

75
doc/fact.c Normal file
View file

@ -0,0 +1,75 @@
#include <stdio.h>
#include <lightning.h>
static jit_state_t *_jit;
typedef long (*pwfw_t)(long); /* Pointer to Long Function of Long */
int main(int argc, char *argv[])
{
pwfw_t factorial;
long arg;
jit_node_t *ac; /* Accumulator */
jit_node_t *in; /* Argument */
jit_node_t *call;
jit_node_t *fact;
jit_node_t *jump;
jit_node_t *fact_entry;
jit_node_t *fact_out;
init_jit(argv[0]);
_jit = jit_new_state();
/* declare a forward label */
fact = jit_forward();
jit_prolog(); /* Entry point of the factorial function */
in = jit_arg(); /* Receive an integer argument */
jit_getarg(JIT_R0, in); /* Move argument to RO */
jit_prepare();
jit_pushargi(1); /* This is the accumulator */
jit_pushargr(JIT_R0); /* This is the argument */
call = jit_finishi(NULL); /* Call the tail call optimized function */
jit_patch_at(call, fact); /* Patch call to forward defined function */
/* the above could have been written as:
* jit_patch_at(jit_finishi(NULL), fact);
*/
jit_retval(JIT_R0); /* Fetch the result */
jit_retr(JIT_R0); /* Return it */
jit_epilog(); /* Epilog *before* label before prolog */
/* define the forward label */
jit_link(fact); /* Entry point of the helper function */
jit_prolog();
jit_frame(16); /* Reserve 16 bytes in the stack */
fact_entry = jit_label(); /* This is the tail call entry point */
ac = jit_arg(); /* The accumulator is the first argument */
in = jit_arg(); /* The factorial argument */
jit_getarg(JIT_R0, ac); /* Move the accumulator to R0 */
jit_getarg(JIT_R1, in); /* Move the argument to R1 */
fact_out = jit_blei(JIT_R1, 1); /* Done if argument is one or less */
jit_mulr(JIT_R0, JIT_R0, JIT_R1); /* accumulator *= argument */
jit_putargr(JIT_R0, ac); /* Update the accumulator */
jit_subi(JIT_R1, JIT_R1, 1); /* argument -= 1 */
jit_putargr(JIT_R1, in); /* Update the argument */
jump = jit_jmpi();
jit_patch_at(jump, fact_entry); /* Tail Call Optimize it! */
jit_patch(fact_out);
jit_retr(JIT_R0); /* Return the accumulator */
factorial = jit_emit();
/* no need to query information about resolved addresses */
jit_clear_state();
if (argc == 2)
arg = atoi(argv[1]);
else
arg = 5;
/* call the generated code */
printf("factorial(%ld) = %ld\n", arg, factorial(arg));
/* release all memory associated with the _jit identifier */
jit_destroy_state();
finish_jit();
return 0;
}