diff --git a/ChangeLog b/ChangeLog index 5595a9934..22b52dd8a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2015-01-19 Paulo Andrade + + * doc/body.texi: Reorder documentation, making jit_frame + and jit_tramp the lightning response to the need of + trampolines, continuations and tail call optimizations. + A pseudo code example of a factorial function was added. + Also added a section for description of the available + predicates. + + * doc/fact.c: New file, implementing a simple example of + a translation of a trivial, recursive, tail call optimization + into lightning calls. This is the conversion to functional C + code of the example in doc/body.texi. + + * doc/Makefile.am: Update for the next test case. + 2015-01-17 Paulo Andrade * include/lightning.h, lib/jit_aarch64.c, diff --git a/doc/Makefile.am b/doc/Makefile.am index d883d7d0d..712aac364 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -21,7 +21,7 @@ MOSTLYCLEANFILES = lightning.tmp lightning_TEXINFOS = body.texi version.texi -noinst_PROGRAMS = incr printf rpn rfib ifib +noinst_PROGRAMS = incr printf rpn rfib ifib fact $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la @@ -40,3 +40,6 @@ rfib_SOURCES = rfib.c ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) ifib_SOURCES = ifib.c + +fact_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) +fact_SOURCES = fact.c diff --git a/doc/body.texi b/doc/body.texi index 5a1c4a374..f37a6288a 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -460,9 +460,32 @@ function in that register. A function with a return value should use before returning. @xref{Fibonacci, the Fibonacci numbers}, for an example. @code{epilog} is an optional call, that marks the end of a function -body. It is automatically generated by lightning if starting a new +body. It is automatically generated by @lightning{} if starting a new function (what should be done after a @code{ret} call) or finishing generating jit. +It is very important to note that the fact that @code{epilog} being +optional may cause a common mistake. Consider this: +@example +fun1: + prolog + ... + ret +fun2: + prolog +@end example +Because @code{epilog} is added when finding a new @code{prolog}, +this will cause the @code{fun2} label to actually be before the +return from @code{fun1}. Because @lightning{} will actually +understand it as: +@example +fun1: + prolog + ... + ret +fun2: + epilog + prolog +@end example You should observe a few rules when using these macros. First of all, if calling a varargs function, you should use the @code{ellipsis} @@ -614,7 +637,6 @@ allocai (not specified) @r{reserve space on the stack} @code{allocai} receives the number of bytes to allocate and returns the offset from the frame pointer register @code{FP} to the base of the area. -@end table As a small appetizer, here is a small function that adds 1 to the input parameter (an @code{int}). I'm using an assembly-like syntax here which @@ -647,6 +669,143 @@ in = arg @rem{! Same as above} ret @rem{! Return to caller} @end example +@item Trampolines, continuations and tail call optimization + +Frequently it is required to generate jit code that must jump to +code generated later, possibly from another @code{jit_context_t}. +These require compatible stack frames. + +@lightning{} provides two primitives from where trampolines, +continuations and tail call optimization can be implemented. + +@example +frame (not specified) @r{create stack frame} +tramp (not specified) @r{assume stack frame} +@end example + +@code{frame} receives an integer argument@footnote{It is not +automatically computed because it does not know about the +requirement of later generated code.} that defines the size in +bytes for the stack frame of the current, @code{C} callable, +jit function. To calculate this value, a good formula is maximum +number of arguments to any called native function times +eight@footnote{Times eight so that it works for double arguments. +And would not need conditionals for ports that pass arguments in +the stack.}, plus the sum of the arguments to any call to +@code{jit_allocai}. @lightning{} automatically adjusts this value +for any backend specific stack memory it may need, or any +alignment constraint. + +@code{frame} also instructs @lightning{} to save all callee +save registers in the prolog and reload in the epilog. + +@example +main: @rem{! jit entry point} + prolog @rem{! function prolog} + frame 256 @rem{! save all callee save registers and} + @rem{! reserve at least 256 byte in stack} +main_loop: + ... + jmpi handler @rem{! jumps to external code} + ... + ret @rem{! return to the caller} +@end example + +@code{tramp} differs from @code{frame} only that a prolog and epilog +will not be generated. Note that @code{prolog} must still be used. +The code under @code{tramp} must be ready to be entered with a jump +at the prolog position, and instead of a return, it must end with +a non conditional jump. @code{tramp} exists solely for the fact +that it allows optimizing out prolog and epilog code that would +never be executed. + +@example +handler: @rem{! handler entry point} + prolog @rem{! function prolog} + tramp 256 @rem{! assumes all callee save registers} + @rem{! are saved and there is at least} + @rem{! 256 byte in stack} + ... + jmpi main_loop @rem{! return to the main loop} +@end example + +@lightning{} only supports Tail Call Optimization using the +@code{tramp} construct. Any other way is not guaranteed to +work on all ports. + +An example of a simple (recursive) tail call optimization: + +@example +factorial: @rem{! Entry point of the factorial function} + prolog +in = arg @rem{! Receive an integer argument} + getarg R0, in @rem{! Move argument to RO} + prepare + pushargi 1 @rem{! This is the accumulator} + pushargr R0 @rem{! This is the argument} + finishi fact @rem{! Call the tail call optimized function} + retval R0 @rem{! Fetch the result} + retr R0 @rem{! Return it} + epilog @rem{! Epilog *before* label before prolog} + +fact: @rem{! Entry point of the helper function} + prolog + frame 16 @rem{! Reserve 16 bytes in the stack} +fact_entry: @rem{! This is the tail call entry point} +ac = arg @rem{! The accumulator is the first argument} +in = arg @rem{! The factorial argument} + getarg R0, ac @rem{! Move the accumulator to R0} + getarg R1, in @rem{! Move the argument to R1} + blei fact_out, R1, 1 @rem{! Done if argument is one or less} + mulr R0, R0, R1 @rem{! accumulator *= argument} + putargr R0, ac @rem{! Update the accumulator} + subi R1, R1, 1 @rem{! argument -= 1} + putargr R1, in @rem{! Update the argument} + jmpi fact_entry @rem{! Tail Call Optimize it!} +fact_out: + retr R0 @rem{! Return the accumulator} +@end example + +@item Predicates +@example +forward_p (not specified) @r{forward label predicate} +indirect_p (not specified) @r{indirect label predicate} +target_p (not specified) @r{used label predicate} +arg_register_p (not specified) @r{argument kind predicate} +callee_save_p (not specified) @r{callee save predicate} +pointer_p (not specified) @r{pointer predicate} +@end example + +@code{forward_p} expects a @code{jit_node_t*} argument, and +returns non zero if it is a forward label reference, that is, +a label returned by @code{forward}, that still needs a +@code{link} call. + +@code{indirect_p} expects a @code{jit_node_t*} argument, and returns +non zero if it is an indirect label reference, that is, a label that +was returned by @code{indirect}. + +@code{target_p} expects a @code{jit_node_t*} argument, that is any +kind of label, and will return non zero if there is at least one +jump or move referencing it. + +@code{arg_register_p} expects a @code{jit_node_t*} argument, that must +have been returned by @code{arg}, @code{arg_f} or @code{arg_d}, and +will return non zero if the argument lives in a register. This call +is useful to know the live range of register arguments, as those +are very fast to read and write, but have volatile values. + +@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or +@code{JIT_Fn}, and will return non zero if the register is callee +save. This call is useful because on several ports, the @code{JIT_Rn} +and @code{JIT_Fn} registers are actually callee save; no need +to save and load the values when making function calls. + +@code{pointer_p} expects a pointer argument, and will return non +zero if the pointer is inside the generated jit code. Must be +called after @code{jit_emit} and before @code{jit_destroy_state}. +@end table + @node GNU lightning examples @chapter Generating code at run-time @@ -1291,8 +1450,8 @@ that is usually a function with an @code{_} (underscode) prefix and with an argument named @code{_jit}, in the pattern: @example - static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t); - #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v); + static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t); + #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v); @end example The reason for this is to use the same syntax as the initial lightning @@ -1479,44 +1638,6 @@ Or to only use a data buffer, if required: @rem{...} @end example -@section Shared stack frame layout -Certain jit generation patterns, for example the original GNU Smalltalk -jit generator, uses an approach of a fixed trampoline jit code, and -later generation of code that jumps around, assuming a fixed layout -stack frame. - -To help on this pattern of code generation, @lightning{} provides -the @code{jit_frame} and the @code{jit_tramp} interfaces, to define -or to assume a stack frame. Both @code{jit_frame} or @code{jit_tramp} -must be the first call after @code{jit_prolog}. - -@deftypefun void jit_frame (jit_int32_t @var{frame}) -@var{frame} defines the size in bytes of the current function -stack frame. To calculate its value, a good formula is maximum number -of arguments to any called native function times eight, plus the -sum of the arguments to any call to @code{jit_allocai}. @lightning{} -automatically adjusts this value for any backend specific stack memory -it may need, or any alignment constraint. -To ensure trampoline code is correct, @lightning{} will save all -callee save registers in the prolog and reload in the epilog. -@end deftypefun - -@deftypefun void jit_tramp (jit_int32_t @var{frame}) -@var{frame} must be the same value of the dispatcher defined with the -@code{jit_frame} call. -The only difference of @code{jit_frame} and @code{jit_tramp} is that -@code{jit_tramp} omits generation of a prolog and epilog for the -current function. -Most trampoline based jit generation implements a single dispatch method -and later emit code that knows how to return back to the dispatch routine, -and the later emitted code is called with a non local goto. In such cases, -emitting a native prolog (and epilog) is just a waste of space. -@end deftypefun - -It is a fatal error if more than @var{frame} bytes are required -either in the dispatcher defined with @code{jit_frame} or the -"trampolined" code, defined with @code{jit_tramp}. - @node Acknowledgements @chapter Acknowledgements diff --git a/doc/fact.c b/doc/fact.c new file mode 100644 index 000000000..375905b81 --- /dev/null +++ b/doc/fact.c @@ -0,0 +1,75 @@ +#include +#include + +static jit_state_t *_jit; + +typedef long (*pwfw_t)(long); /* Pointer to Long Function of Long */ + +int main(int argc, char *argv[]) +{ + pwfw_t factorial; + long arg; + jit_node_t *ac; /* Accumulator */ + jit_node_t *in; /* Argument */ + jit_node_t *call; + jit_node_t *fact; + jit_node_t *jump; + jit_node_t *fact_entry; + jit_node_t *fact_out; + + init_jit(argv[0]); + _jit = jit_new_state(); + + /* declare a forward label */ + fact = jit_forward(); + + jit_prolog(); /* Entry point of the factorial function */ + in = jit_arg(); /* Receive an integer argument */ + jit_getarg(JIT_R0, in); /* Move argument to RO */ + jit_prepare(); + jit_pushargi(1); /* This is the accumulator */ + jit_pushargr(JIT_R0); /* This is the argument */ + call = jit_finishi(NULL); /* Call the tail call optimized function */ + jit_patch_at(call, fact); /* Patch call to forward defined function */ + /* the above could have been written as: + * jit_patch_at(jit_finishi(NULL), fact); + */ + jit_retval(JIT_R0); /* Fetch the result */ + jit_retr(JIT_R0); /* Return it */ + jit_epilog(); /* Epilog *before* label before prolog */ + + /* define the forward label */ + jit_link(fact); /* Entry point of the helper function */ + jit_prolog(); + jit_frame(16); /* Reserve 16 bytes in the stack */ + fact_entry = jit_label(); /* This is the tail call entry point */ + ac = jit_arg(); /* The accumulator is the first argument */ + in = jit_arg(); /* The factorial argument */ + jit_getarg(JIT_R0, ac); /* Move the accumulator to R0 */ + jit_getarg(JIT_R1, in); /* Move the argument to R1 */ + fact_out = jit_blei(JIT_R1, 1); /* Done if argument is one or less */ + jit_mulr(JIT_R0, JIT_R0, JIT_R1); /* accumulator *= argument */ + jit_putargr(JIT_R0, ac); /* Update the accumulator */ + jit_subi(JIT_R1, JIT_R1, 1); /* argument -= 1 */ + jit_putargr(JIT_R1, in); /* Update the argument */ + jump = jit_jmpi(); + jit_patch_at(jump, fact_entry); /* Tail Call Optimize it! */ + jit_patch(fact_out); + jit_retr(JIT_R0); /* Return the accumulator */ + + factorial = jit_emit(); + /* no need to query information about resolved addresses */ + jit_clear_state(); + + if (argc == 2) + arg = atoi(argv[1]); + else + arg = 5; + + /* call the generated code */ + printf("factorial(%ld) = %ld\n", arg, factorial(arg)); + /* release all memory associated with the _jit identifier */ + jit_destroy_state(); + finish_jit(); + return 0; +}