diff --git a/.gitignore b/.gitignore index a89a8e180..ddfc42407 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ missing stamp-h1 test-driver check/.deps +doc/.deps lib/.deps m4/libtool.m4 m4/lt~obsolete.m4 diff --git a/ChangeLog b/ChangeLog index 84d3c4391..1322aacf7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,31 @@ +2013-01-24 Paulo Andrade + + * check/Makefile.am: "make debug" target should pass only + the main test tool program as argument for running gdb + + * configure.ac: Add the --enable-assertions options. + + * doc/Makefile.am, doc/body.texi, doc/lightning.texi: + Major rewrite of the documentation to match the current + implementation. + + * doc/version.texi: Automatic date update. + + * doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c: + Implementation of the documentation examples, that are also + compiled during a normal build. + + * doc/p-lightning.texi, doc/porting.texi, doc/toc.texi, + doc/u-lightning.texi, doc/using.texi: These files were + renamed in the documentation rewrite, as the documentation + was significantly trimmed due to full removal of the porting + chapters. Better porting documentation should be added but + for the moment it was just removed the documentation not + matching the implementation. + 2013-01-18 Paulo Andrade - lib/jit_note.c: Correct bounds check and wrong code keeping + * lib/jit_note.c: Correct bounds check and wrong code keeping a pointer that could be changed after a realloc call. 2013-01-18 Paulo Andrade diff --git a/check/Makefile.am b/check/Makefile.am index ce78c49fd..33ce2134e 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -172,6 +172,6 @@ CLEANFILES = $(TESTS) #TESTS_ENVIRONMENT=$(srcdir)/run-test; -debug: $(check_PROGRAMS) - $(LIBTOOL) --mode=execute gdb $(check_PROGRAMS) +debug: lightning + $(LIBTOOL) --mode=execute gdb lightning diff --git a/configure.ac b/configure.ac index 9d5e43350..83a7805dd 100644 --- a/configure.ac +++ b/configure.ac @@ -56,6 +56,17 @@ if test "x$DISASSEMBLER" != "xno"; then LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDISASSEMBLER=1" fi +AC_ARG_ENABLE(assertions, + AS_HELP_STRING([--enable-assertions], + [Enable runtime code generation assertions]), + [DEBUG=$enableval], [DEBUG=auto]) +if test "x$DEBUG" = xyes; then + LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1" +else + LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG" + DEBUG=no +fi + cpu= case "$target_cpu" in i?86|x86_64) cpu=x86 ;; diff --git a/doc/Makefile.am b/doc/Makefile.am index 3baca2d69..3f4ff64ab 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,7 +1,40 @@ +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# + +AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE + info_TEXINFOS = lightning.texi -EXTRA_TEXINFOS = u-lightning.texi p-lightning.texi MOSTLYCLEANFILES = lightning.tmp -lightning_TEXINFOS = body.texi toc.texi using.texi porting.texi version.texi -u_lightning_TEXINFOS = body.texi toc.texi using.texi version.texi -p_lightning_TEXINFOS = body.texi toc.texi porting.texi version.texi +lightning_TEXINFOS = body.texi version.texi + +noinst_PROGRAMS = incr printf rpn rfib ifib + +$(top_builddir)/lib/liblightning.la: + cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la + +incr_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +incr_SOURCES = incr.c + +printf_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +printf_SOURCES = printf.c + +rpn_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +rpn_SOURCES = rpn.c + +rfib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +rfib_SOURCES = rfib.c + +ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +ifib_SOURCES = ifib.c diff --git a/doc/body.texi b/doc/body.texi index 7c20d5152..af924e807 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -1,66 +1,32 @@ -@ifinfo -@dircategory @lightning{}, a library for dynamic code generation -@direntry - * @value{TITLE}: (lightning). -@end direntry - -This file documents GNU lightning, Version @value{VERSION}. -It was last updated on @value{UPDATED}. - -Copyright @copyright{} 2000 Free Software Foundation, Inc. -Authored by Paolo Bonzini. - -This document is released under the terms of the GNU Free Documentation -License as published by the Free Software Foundation; either version 1.1, or -(at your option) any later version. - -You should have received a copy of the GNU Free Documentation License along -with GNU lightning; see the file @file{COPYING.DOC}. If not, write to the Free -Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -There are no Secondary Sections, no Cover Texts and no Invariant Sections -(as defined in the license); this text, along with its equivalent in the -printed manual, constitutes the Title Page. -@end ifinfo - -@setchapternewpage odd - -@titlepage -@title @value{TITLE} -@subtitle Version @value{VERSION} -@subtitle @value{UPDATE-MONTH} - -@author by Paolo Bonzini - -@c The following two commands start the copyright page. -@page -@vskip 0pt plus 1filll -Copyright 1988-92, 1994-95, 1999, 2000 Free Software Foundation, Inc. - -This document is released under the terms of the @sc{gnu} Free Documentation -License as published by the Free Software Foundation; either version 1.1, or -(at your option) any later version. - -You should have received a copy of the @sc{gnu} Free Documentation License -along with @sc{gnu} @i{lightning}; see the file @file{COPYING.DOC}. If not, -write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, -MA 02110-1301, USA. - -There are no Secondary Sections, no Cover Texts and no Invariant Sections -(as defined in the license); this text, along with its equivalent in the -Info documentation, constitutes the Title Page. -@end titlepage - @ifnottex @node Top @top @lightning{} +@iftex +@macro comma +@verbatim{|,|} +@end macro +@end iftex + +@ifnottex +@macro comma +@verb{|,|} +@end macro +@end ifnottex + This document describes @value{TOPIC} the @lightning{} library for dynamic code generation. Unlike other dynamic code generation systems, which are usually either inefficient or non-portable, @lightning{} is both retargetable and very fast. -@include toc.texi +@menu +* Overview:: What GNU lightning is +* Installation:: Configuring and installing GNU lightning +* The instruction set:: The RISC instruction set used i GNU lightning +* GNU lightning examples:: GNU lightning's examples +* Reentrancy:: Re-entrant usage of GNU lightning +* Acknowledgements:: Acknowledgements for GNU lightning +@end menu @end ifnottex @node Overview @@ -73,14 +39,6 @@ which are usually either inefficient or non-portable, @lightning{} is both retargetable and very fast. @end iftex -@ifclear USING -This manual assumes that you are pretty comfortable with the usage of -@lightning{} for dynamic code generation, as described in -@usingref{The instruction set, @lightning{}'s instruction set}, and -instead focuses on the retargeting process. What follows is nothing -more then a brief overview of the system. -@end ifclear - Dynamic code generation is the generation of machine code at runtime. It is typically used to strip a layer of interpretation by allowing compilation to occur at runtime. One of the most @@ -105,28 +63,8 @@ are generated, so programs using dynamic code generation must be retargeted for each machine; in addition, coding a run-time code generator is a tedious and error-prone task more than a difficult one. -@ifset USING -This manual describes the @lightning{} dynamic code generation library. @lightning{} provides a portable, fast and easily retargetable dynamic code generation system. -@end ifset -@ifclear USING -@lightning{} provides a portable, fast and easily retargetable dynamic -code generation system. -@end ifclear - -To be fast, @lightning{} emits machine code without first creating -intermediate data structures such as RTL representations traditionally -used by optimizing compilers (@pxref{RTL representation, , , gcc, Using -and porting GNU CC}). @lightning{} translates code directly from a -machine independent interface to that of the underlying architecture. -This makes code generation more efficient, since no intermediate data -structures have to be constructed and consumed. A collateral benefit -it that @lightning{} consumes little space: other than the memory -needed to store generated instructions and data structures such as -parse trees, the only data structure that client will usually need -is an array of pointers to labels and unresolved jumps, which you -can often allocate directly on the system stack. To be portable, @lightning{} abstracts over current architectures' quirks and unorthogonalities. The interface that it exposes to is that @@ -143,141 +81,1098 @@ real architectures closely enough that, most of the time, the compiler's constant folding pass ends up generating code which assembles machine instructions without further tests. -@section Drawbacks +@node Installation +@chapter Configuring and installing @lightning{} -@lightning{} has been useful in practice; however, it does have -at least four drawbacks: it has limited registers, no peephole -optimizer, no instruction scheduler and no symbolic debugger. Of -these, the last is the most critical even though it does not -affect the quality of generated code: the only way to debug code -generated at run-time is to step through it at the level of -host specific machine code. A decent knowledge of the underlying -instruction set is thus needed to make sense of the debugger's -output. - -The low number of available registers (six) is also an important -limitation. However, let's take the primary application of dynamic -code generation, that is, bytecode translators. The underlying -virtual machines tend to have very few general purpose registers -(usually 0 to 2) and the translators seldom rely on sophisticated -graph-coloring algorithms to allocate registers to temporary -variables. Rather, these translators usually obtain performance -increases because: a) they remove indirect jumps, which are usually -poorly predicted, and thus often form a bottleneck, b) they -parameterize the generated code and go through the process of decoding -the bytecodes just once. So, their usage of registers is rather -sparse---in fact, in practice, six registers were found to be -enough for most purposes. - -The lack of a peephole optimizer is most important on machines where a -single instruction can map to multiple native instructions. For -instance, Intel chips' division instruction hard-codes the dividend -to be in EAX and the quotient and remainder to be output, respectively, -in EAX and EDX: on such chips, @lightning{} does lots of pushing and -popping of EAX and EDX to save those registers that are not used. -Unnecessary stack operations could be removed by looking at whether -preserved registers are destroyed soon. Unfortunately, the current -implementation of @lightning{} is so fast because it only knows about -the single instruction that is being generated; performing these -optimizations would require a flow analysis pass that would probably -hinder @lightning{}'s speed. - -The lack of an instruction scheduler is not very important---pretty -good instruction scheduling can actually be obtained by separating -register writes from register reads. The only architectures on which -a scheduler would be useful are those on which arithmetic instructions -have two operands; an example is, again, the x86, on which the single -instruction +The first thing to do to use @lightning{} is to configure the +program, picking the set of macros to be used on the host +architecture; this configuration is automatically performed by +the @file{configure} shell script; to run it, merely type: @example - subr_i R0, R1, R2 @rem{!Compute R0 = R1 - R2} -@end example -@noindent -is translated to two instruction, of which the second depends on the -result of the first: -@example - movl %ebx, %eax @rem{! Move R1 into R0} - subl %edx, %eax @rem{! Subtract R2 from R0} + ./configure @end example -@ifset BOTH -@node Using GNU lightning -@chapter Using @lightning{} +@lightning{} supports the @code{--enable-disassembler} option, that +enables linking to GNU binutils and optionally print human readable +disassembly of the jit code. This option can be disabled by the +@code{--disable-disassembler} option. -This chapter describes installing and using @lightning{}. +Another option that @file{configure} accepts is +@code{--enable-assertions}, which enables several consistency checks in +the run-time assemblers. These are not usually needed, so you can +decide to simply forget about it; also remember that these consistency +checks tend to slow down your code generator. -@menu -@usingmenu{} -@end menu +After you've configured @lightning{}, run @file{make} as usual. -@lowersections -@end ifset +@lightning{} has an extensive set of tests to validate it is working +correctly in the build host. To test it run: +@example + make check +@end example -@ifset USING -@include using.texi -@end ifset +The next important step is: +@example + make install +@end example -@ifset BOTH -@raisesections +This ends the process of installing @lightning{}. -@node Porting GNU lightning -@chapter Porting @lightning{} +@node The instruction set +@chapter @lightning{}'s instruction set -This chapter describes the process of porting @lightning{}. -It assumes that you are pretty comfortable with the usage of -@lightning{} for dynamic code generation, as described in -@ref{Using GNU lightning}. - -@menu -@portingmenu{} -@end menu - -@lowersections -@end ifset - -@ifset PORTING -@include porting.texi -@end ifset - -@ifset BOTH -@raisesections -@end ifset - -@node Future -@chapter The future of @lightning{} - -Presented below is the set of tasks that I feel need to be performed -to make @lightning{} a more fully functional, viable system. They are -presented in no particular order. I would @emph{very much} welcome any -volunteers who would like to help with the implementation of one or -more of these tasks. Please write to me, Paolo Bonzini, at -@email{bonzini@@gnu.org} if you are interested in adding your efforts -to the @lightning{} project. - -Tasks: +@lightning{}'s instruction set was designed by deriving instructions +that closely match those of most existing RISC architectures, or +that can be easily syntesized if absent. Each instruction is composed +of: @itemize @bullet @item -The most important task to make @lightning{} more widely usable -is to retarget it. Although currently supported architectures -(x86, SPARC, PowerPC) are certainly some of the most widely used, -@lightning{} could be ported to others---namely, the Alpha and -MIPS architectures. +an operation, like @code{sub} or @code{mul} @item -Another interesting task is to allow the instruction stream to grow -dynamically. This is a problem because not all architectures allow -to write position independent code.@footnote{The x86's absolute -jumps, for example, are actually slow indirect jumps, and need a -register.} +most times, a register/immediate flag (@code{r} or @code{i}) @item -Optimize leaf procedures on the SPARC. This involves using the -output registers (@code{%o@i{X}}) instead of the local registers -(@code{%l@i{X}}) when writing leaf procedures; the problem is, -leaf procedures also receive parameters in the output registers, -so they would be overwritten by write accesses to general-purpose -registers. +an unsigned modifier (@code{u}), a type identifier or two, when applicable. @end itemize +Examples of legal mnemonics are @code{addr} (integer add, with three +register operands) and @code{muli} (integer multiply, with two +register operands and an immediate operand). Each instruction takes +two or three operands; in most cases, one of them can be an immediate +value instead of a register. + +Most @lightning{} integer operations are signed wordsize operations, +with the exception of operations that convert types, or load or store +values to/from memory. When applicable, the types and C types are as +follow: + +@example + _c @r{signed char} + _uc @r{unsigned char} + _s @r{short} + _us @r{unsigned short} + _i @r{int} + _ui @r{unsigned int} + _l @r{long} + _f @r{float} + _d @r{double} +@end example + +Most integer operations do not need a type modifier, and when loading or +storing values to memory there is an alias to the proper operation +using wordsize operands, that is, if ommited, the type is @r{int} on +32-bit architectures and @r{long} on 64-bit architectures. Note +that lightning also expects @code{sizeof(void*)} to match the wordsize. + +When an unsigned operation result differs from the equivalent signed +operation, there is a the @code{_u} modifier. + +There are at least seven integer registers, of which six are +general-purpose, while the last is used to contain the frame pointer +(@code{FP}). The frame pointer can be used to allocate and access local +variables on the stack, using the @code{allocai} instruction. + +Of the general-purpose registers, at least three are guaranteed to be +preserved across function calls (@code{V0}, @code{V1} and +@code{V2}) and at least three are not (@code{R0}, @code{R1} and +@code{R2}). Six registers are not very much, but this +restriction was forced by the need to target CISC architectures +which, like the x86, are poor of registers; anyway, backends can +specify the actual number of available registers with the calls +@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM} +(for callee-save registers). + +There are at least six floating-point registers, named @code{F0} to +@code{F5}. These are usually caller-save and are separate from the integer +registers on the supported architectures; on Intel architectures, +in 32 bit mode if SSE2 is not available or use of X87 is forced, +the register stack is mapped to a flat register file. As for the +integer registers, the macro @code{JIT_F_NUM} yields the number of +floating-point registers. + +The complete instruction set follows; as you can see, most non-memory +operations only take integers (either signed or unsigned) as operands; +this was done in order to reduce the instruction set, and because most +architectures only provide word and long word operations on registers. +There are instructions that allow operands to be extended to fit a larger +data type, both in a signed and in an unsigned way. + +@table @b +@item Binary ALU operations +These accept three operands; the last one can be an immediate. +@code{addx} operations must directly follow @code{addc}, and +@code{subx} must follow @code{subc}; otherwise, results are undefined. +Most, if not all, architectures do not support @r{float} or @r{double} +immediate operands; lightning emulates those operations by moving the +immediate to a temporary register and emiting the call with only +register operands. +@example +addr _f _d O1 = O2 + O3 +addi _f _d O1 = O2 + O3 +addxr O1 = O2 + (O3 + carry) +addxi O1 = O2 + (O3 + carry) +addcr O1 = O2 + O3, set carry +addci O1 = O2 + O3, set carry +subr _f _d O1 = O2 - O3 +subi _f _d O1 = O2 - O3 +subxr O1 = O2 - (O3 + carry) +subxi O1 = O2 - (O3 + carry) +subcr O1 = O2 - O3, set carry +subci O1 = O2 - O3, set carry +mulr _f _d O1 = O2 * O3 +muli _f _d O1 = O2 * O3 +divr _u _f _d O1 = O2 / O3 +divi _u _f _d O1 = O2 / O3 +remr _u O1 = O2 % O3 +remi _u O1 = O2 % O3 +andr O1 = O2 & O3 +andi O1 = O2 & O3 +orr O1 = O2 | O3 +ori O1 = O2 | O3 +xorr O1 = O2 ^ O3 +xori O1 = O2 ^ O3 +lshr O1 = O2 << O3 +lshi O1 = O2 << O3 +rshr _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +rshi _u O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.} +@end example + +@item Unary ALU operations +These accept two operands, both of which must be registers. +@example +negr _f _d O1 = -O2 +comr O1 = ~O2 +@end example + +There unary ALU operations are only defined for float operands. +@example +absr _f _d O1 = fabs(O2) +sqrtr O1 = sqrt(O2) +@end example + +Besides requiring the @code{r} modifier, there are no unary operations +with an immediate operand. + +@item Compare instructions +These accept three operands; again, the last can be an immediate. +The last two operands are compared, and the first operand, that must be +an integer register, is set to either 0 or 1, according to whether the +given condition was met or not. + +The conditions given below are for the standard behavior of C, +where the ``unordered'' comparison result is mapped to false. + +@example +ltr _u _f _d O1 = (O2 < O3) +lti _u _f _d O1 = (O2 < O3) +ler _u _f _d O1 = (O2 <= O3) +lei _u _f _d O1 = (O2 <= O3) +gtr _u _f _d O1 = (O2 > O3) +gti _u _f _d O1 = (O2 > O3) +ger _u _f _d O1 = (O2 >= O3) +gei _u _f _d O1 = (O2 >= O3) +eqr _f _d O1 = (O2 == O3) +eqi _f _d O1 = (O2 == O3) +ner _f _d O1 = (O2 != O3) +nei _f _d O1 = (O2 != O3) +unltr _f _d O1 = !(O2 >= O3) +unler _f _d O1 = !(O2 > O3) +ungtr _f _d O1 = !(O2 <= O3) +unger _f _d O1 = !(O2 < O3) +uneqr _f _d O1 = !(O2 < O3) && !(O2 > O3) +ltgtr _f _d O1 = !(O2 >= O3) || !(O2 <= O3) +ordr _f _d O1 = (O2 == O2) && (O3 == O3) +unordr _f _d O1 = (O2 != O2) || (O3 != O3) +@end example + +@item Transfer operations +These accept two operands; for @code{ext} both of them must be +registers, while @code{mov} accepts an immediate value as the second +operand. + +Unlike @code{movr} and @code{movi}, the other instructions are used +to truncate a wordsize operand to a smaller integer data type or to +convert float data types. You can also use @code{extr} to convert an +integer to a floating point value: the usual options are @code{extr_f} +and @code{extr_d}. + +@example +movr _f _d O1 = O2 +movi _f _d O1 = O2 +extr _c _uc _s _us _i _ui _f _d O1 = O2 +truncr _f _d O1 = trunc(O2) +@end example + +In 64-bit architectures it may be required to use @code{truncr_f_i}, +@code{truncr_f_l}, @code{truncr_d_i} and @code{truncr_d_l} to match +the equivalent C code. Only the @code{_i} modifier is available in +32-bit architectures. + +@example +truncr_f_i = O1 = O2 +truncr_f_l = O1 = O2 +truncr_d_i = O1 = O2 +truncr_d_l = O1 = O2 +@end example + +The float conversion operations are @emph{destination first, +source second}, but the order of the types is reversed. This happens +for historical reasons. + +@example +extr_f_d = O1 = O2 +extr_d_f = O1 = O2 +@end example + +@item Network extensions +These accept two operands, both of which must be registers; these +two instructions actually perform the same task, yet they are +assigned to two mnemonics for the sake of convenience and +completeness. As usual, the first operand is the destination and +the second is the source. +@example +htonr @r{Host-to-network (big endian) order} +ntohr @r{Network-to-host order } +@end example + +@item Load operations +@code{ld} accepts two operands while @code{ldx} accepts three; +in both cases, the last can be either a register or an immediate +value. Values are extended (with or without sign, according to +the data type specification) to fit a whole register. +The @code{_ui} and @code{_l} types are only available in 64-bit +architectures. For convenience, there is a version without a +type modifier for integer or pointer operands that uses the +appropriate wordsize call. +@example +ldr _c _uc _s _us _i _ui _l _f _d O1 = *O2 +ldi _c _uc _s _us _i _ui _l _f _d O1 = *O2 +ldxr _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) +ldxi _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) +@end example + +@item Store operations +@code{st} accepts two operands while @code{stx} accepts three; in +both cases, the first can be either a register or an immediate +value. Values are sign-extended to fit a whole register. +@example +str _c _uc _s _us _i _ui _l _f _d *O1 = O2 +sti _c _uc _s _us _i _ui _l _f _d *O1 = O2 +stxr _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3 +stxi _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3 +@end example +As for the load operations, the @code{_ui} and @code{_l} types are +only available in 64-bit architectures, and for convenience, there +is a version without a type modifier for integer or pointer operands +that uses the appropriate wordsize call. + +@item Argument management +These are: +@example +prepare (not specified) +pushargr _c _uc _s _us _i _ui _l _f _d +pushargi _c _uc _s _us _i _ui _l _f _d +arg _c _uc _s _us _i _ui _l _f _d +getarg _c _uc _s _us _i _ui _l _f _d +ret (not specified) +retr _c _uc _s _us _i _ui _l _f _d +reti _c _uc _s _us _i _ui _l _f _d +retval _c _uc _s _us _i _ui _l _f _d +epilog (not specified) +@end example +As with other operations that use a type modifier, the @code{_ui} and +@code{_l} types are only available in 64-bit architectures, but there +are operations without a type modifier that alias to the appropriate +integer operation with wordsize operands. + +@code{prepare}, @code{pusharg}, and @code{retval} are used by the caller, +while @code{arg}, @code{getarg} and @code{ret} are used by the callee. +A code snippet that wants to call another procedure and has to pass +arguments must, in order: use the @code{prepare} instruction and use +the @code{pushargr} or @code{pushargi} to push the arguments @strong{in +left to right order}; and use @code{finish} or @code{call} (explained below) +to perform the actual call. + +@code{arg} and @code{getarg} are used by the callee. +@code{arg} is different from other instruction in that it does not +actually generate any code: instead, it is a function which returns +a value to be passed to @code{getarg}.@footnote{``Return a +value'' means that @lightning{} code that compile these +instructions return a value when expanded.} You should call +@code{arg} as soon as possible, before any function call or, more +easily, right after the @code{prolog} instructions +(which is treated later). + +@code{getarg} accepts a register argument and a value returned by +@code{arg}, and will move that argument to the register, extending +it (with or without sign, according to the data type specification) +to fit a whole register. These instructions are more intimately +related to the usage of the @lightning{} instruction set in code +that generates other code, so they will be treated more +specifically in @ref{GNU lightning examples, , Generating code at +run-time}. + +Finally, the @code{retval} instruction fetches the return value of a +called function in a register. The @code{retval} instruction takes a +register argument and copies the return value of the previously called +function in that register. A function with a return value should use +@code{retr} or @code{reti} to put the return value in the return register +before returning. @xref{Fibonacci, the Fibonacci numbers}, for an example. + +@code{epilog} is an optional call, that marks the end of a function +body. It is automatically generated by lightning if starting a new +function (what should be done after a @code{ret} call) or finishing +generating jit. + +You should observe a few rules when using these macros. First of +all, if calling a varargs function, you should use the @code{ellipsis} +call to mark the position of the ellipsis in the C prototype. + +You should not nest calls to @code{prepare} inside a +@code{prepare/finish} block. Doing this will result in undefined +behavior. Note that for functions with zero arguments you can use +just @code{call}. + +@item Branch instructions +Like @code{arg}, these also return a value which, in this case, +is to be used to compile forward branches as explained in +@ref{Fibonacci, , Fibonacci numbers}. They accept two operands to be +compared; of these, the last can be either a register or an immediate. +They are: +@example +bltr _u _f _d @r{if }(O2 < O3)@r{ goto }O1 +blti _u _f _d @r{if }(O2 < O3)@r{ goto }O1 +bler _u _f _d @r{if }(O2 <= O3)@r{ goto }O1 +blei _u _f _d @r{if }(O2 <= O3)@r{ goto }O1 +bgtr _u _f _d @r{if }(O2 > O3)@r{ goto }O1 +bgti _u _f _d @r{if }(O2 > O3)@r{ goto }O1 +bger _u _f _d @r{if }(O2 >= O3)@r{ goto }O1 +bgei _u _f _d @r{if }(O2 >= O3)@r{ goto }O1 +beqr _f _d @r{if }(O2 == O3)@r{ goto }O1 +beqi _f _d @r{if }(O2 == O3)@r{ goto }O1 +bner _f _d @r{if }(O2 != O3)@r{ goto }O1 +bnei _f _d @r{if }(O2 != O3)@r{ goto }O1 + +bunltr _f _d @r{if }!(O2 >= O3)@r{ goto }O1 +bunler _f _d @r{if }!(O2 > O3)@r{ goto }O1 +bungtr _f _d @r{if }!(O2 <= O3)@r{ goto }O1 +bunger _f _d @r{if }!(O2 < O3)@r{ goto }O1 +buneqr _f _d @r{if }!(O2 < O3) && !(O2 > O3)@r{ goto }O1 +bltgtr _f _d @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1 +bordr _f _d @r{if } (O2 == O2) && (O3 == O3)@r{ goto }O1 +bunordr _f _d @r{if }!(O2 != O2) || (O3 != O3)@r{ goto }O1 + +bmsr @r{if }O2 & O3@r{ goto }O1 +bmsi @r{if }O2 & O3@r{ goto }O1 +bmcr @r{if }!(O2 & O3)@r{ goto }O1 +bmci @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.} +boaddr _u O2 += O3@r{, goto }O1@r{ if overflow} +boaddi _u O2 += O3@r{, goto }O1@r{ if overflow} +bxaddr _u O2 += O3@r{, goto }O1@r{ if no overflow} +bxaddi _u O2 += O3@r{, goto }O1@r{ if no overflow} +bosubr _u O2 -= O3@r{, goto }O1@r{ if overflow} +bosubi _u O2 -= O3@r{, goto }O1@r{ if overflow} +bxsubr _u O2 -= O3@r{, goto }O1@r{ if no overflow} +bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow} +@end example + +@item Jump and return operations +These accept one argument except @code{ret} which has none; the +difference between @code{finishi} and @code{calli} is that the +latter does not clean the stack from pushed parameters (if any) +and the former must @strong{always} follow a @code{prepare} +instruction. +@example +callr (not specified) @r{function call to a register} +calli (not specified) @r{function call to O1} +finishr (not specified) @r{function call to a register} +finishi (not specified) @r{function call to O1} +jmpi/jmpr (not specified) @r{unconditional jump to O1} +ret (not specified) @r{return from subroutine} +retr _c _uc _s _us _i _ui _l _f _d +reti _c _uc _s _us _i _ui _l _f _d +retval _c _uc _s _us _i _ui _l _f _d @r{move return value} + @r{to register} +@end example + +Like branch instruction, @code{jmpi} also returns a value which is to +be used to compile forward branches. @xref{Fibonacci, , Fibonacci +numbers}. + +@item Function prolog + +These macros are used to set up a function prolog. The @code{allocai} +call accept a single integer argument and returns an offset value +for stack storage access. + +@example +prolog (not specified) @r{function prolog} +allocai (not specified) @r{reserve space on the stack} +@end example + +@code{allocai} receives the number of bytes to allocate and returns +the offset from the frame pointer register @code{FP} to the base of +the area. +@end table + +As a small appetizer, here is a small function that adds 1 to the input +parameter (an @code{int}). I'm using an assembly-like syntax here which +is a bit different from the one used when writing real subroutines with +@lightning{}; the real syntax will be introduced in @xref{GNU lightning +examples, , Generating code at run-time}. + +@example +incr: + prolog +in = arg @rem{! We have an integer argument} + getarg R0, in @rem{! Move it to R0} + addi R0, R0, 1 @rem{! Add 1} + retr R0 @rem{! And return the result} +@end example + +And here is another function which uses the @code{printf} function from +the standard C library to write a number in hexadecimal notation: + +@example +printhex: + prolog +in = arg @rem{! Same as above} + getarg R0, in + prepare @rem{! Begin call sequence for printf} + pushargi "%x" @rem{! Push format string} + ellipsis @rem{! Varargs start here} + pushargr R0 @rem{! Push second argument} + finishi printf @rem{! Call printf} + ret @rem{! Return to caller} +@end example + +@node GNU lightning examples +@chapter Generating code at run-time + +To use @lightning{}, you should include the @file{lightning.h} file that +is put in your include directory by the @samp{make install} command. + +Each of the instructions above translates to a macro or function call. +All you have to do is prepend @code{jit_} (lowercase) to opcode names +and @code{JIT_} (uppercase) to register names. Of course, parameters +are to be put between parentheses. + +This small tutorial presents three examples: + +@iftex +@itemize @bullet +@item +The @code{incr} function found in @ref{The instruction set, , +@lightning{}'s instruction set}: + +@item +A simple function call to @code{printf} + +@item +An RPN calculator. + +@item +Fibonacci numbers +@end itemize +@end iftex +@ifnottex +@menu +* incr:: A function which increments a number by one +* printf:: A simple function call to printf +* RPN calculator:: A more complex example, an RPN calculator +* Fibonacci:: Calculating Fibonacci numbers +@end menu +@end ifnottex + +@node incr +@section A function which increments a number by one + +Let's see how to create and use the sample @code{incr} function created +in @ref{The instruction set, , @lightning{}'s instruction set}: + +@example +#include +#include + +static jit_state_t *_jit; + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +int main(int argc, char *argv[]) +@{ + jit_node_t *in; + pifi incr; + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog(); @rem{/* @t{ prolog } */} + in = jit_arg(); @rem{/* @t{ in = arg } */} + jit_getarg(JIT_R0, in); @rem{/* @t{ getarg R0 } */} + jit_addi(JIT_R0, JIT_R0, 1); @rem{/* @t{ addi R0@comma{} R0@comma{} 1 } */} + jit_retr(JIT_R0); @rem{/* @t{ retr R0 } */} + + incr = jit_emit(); + + @rem{/* call the generated code@comma{} passing 5 as an argument */} + printf("%d + 1 = %d\n", 5, incr(5)); + + finish_jit(); + return 0; +@} +@end example + +Let's examine the code line by line (well, almost@dots{}): + +@table @t +@item #include +You already know about this. It defines all of @lightning{}'s macros. + +@item static jit_state_t *_jit; +You might wonder about what is @code{jit_state_t}. It is a structure +that stores jit code generation information. The name @code{_jit} is +special, because since multiple jit generators can run at the same +time, you must either @r{#define _jit my_jit_state} or name it +@code{_jit}. + +@item typedef int (*pifi)(int); +Just a handy typedef for a pointer to a function that takes an +@code{int} and returns another. + +@item jit_node_t *in; +Declares a variable to hold an identifier for a function argument. It +is an opaque pointer, that will hold the return of a call to @code{arg} +and be used as argument to @code{getarg}. + +@item pifi incr; +Declares a function pointer variable to a function that receives an +@code{int} and returns an @code{int}. + +@item init_jit(argv[0]); +You must call this function before creating a @code{jit_state_t} +object. This function does global state initialization, and may need +to detect CPU or Operating System features. It receives a string +argument that is later used to read symbols from a shared object using +GNU binutils if disassembly was enabled at configure time. If no +disassembly will be performed a NULL pointer can be used as argument. + +@item _jit = jit_new_state(); +This call initializes a @lightning{} jit state. + +@item jit_prolog(); +Ok, so we start generating code for our beloved function@dots{} + +@item in = jit_arg(); +@itemx jit_getarg(JIT_R0, in); +We retrieve the first (and only) argument, an integer, and store it +into the general-purpose register @code{R0}. + +@item jit_addi(JIT_R0, JIT_R0, 1); +We add one to the content of the register. + +@item jit_retr(JIT_R0); +This instruction generates a standard function epilog that returns +the contents of the @code{R0} register. + +@item incr = jit_emit(); +This instruction is very important. It actually translates the +@lightning{} macros used before to machine code, flushes the generated +code area out of the processor's instruction cache and return a +pointer to the start of the code. + +@item printf("%d + 1 = %d", 5, incr(5)); +Calling our function is this simple---it is not distinguishable from +a normal C function call, the only difference being that @code{incr} +is a variable. + +@item finish_jit(); +This call cleanups any global state hold by @lightning{}, and is +advisable to call it once jit code will no longer be generated. +@end table + +@lightning{} abstracts two phases of dynamic code generation: selecting +instructions that map the standard representation, and emitting binary +code for these instructions. The client program has the responsibility +of describing the code to be generated using the standard @lightning{} +instruction set. + +Let's examine the code generated for @code{incr} on the x86_64 +architecture (on the right is the code that an assembly-language +programmer would write): + +@table @b +@item x86_64 +@example + sub $0x30,%rsp + mov %rbp,(%rsp) + mov %rsp,%rbp + sub $0x18,%rsp + mov %rdi,%rax mov %rdi, %rax + add $0x1,%rax inc %rax + mov %rbp,%rsp + mov (%rsp),%rbp + add $0x30,%rsp + retq retq +@end example +In this case, the main overhead is due to the function's prolog and +epilog, and stack alignment after reserving stack space for word +to/from float conversions or moving data from/to x87 to/from SSE. +Note that besides allocating space to save callee saved registers, +no registers are saved/restored because @lightning{} notices those +registers are not modified. There is currently no logic to detect +if it needs to allocate stack space for type conversions neither +proper leaf function detection, but these are subject to change +(FIXME). +@end table + +@node printf +@section A simple function call to @code{printf} + +Again, here is the code for the example: + +@example +#include +#include + +static jit_state_t *_jit; + +typedef void (*pvfi)(int); @rem{/* Pointer to Void Function of Int */} + +int main(int argc, char *argv[]) +@{ + pvfi myFunction; @rem{/* ptr to generated code */} + jit_node_t *start, *end; @rem{/* a couple of labels */} + jit_node_t *in; @rem{/* to get the argument */} + + init_jit(argv[0]); + _jit = jit_new_state(); + + start = jit_note(__FILE__, __LINE__); + jit_prolog(); + in = jit_arg(); + jit_getarg(JIT_R1, in); + jit_pushargi((jit_word_t)"generated %d bytes\n"); + jit_ellipsis(); + jit_pushargr(JIT_R1); + jit_finishi(printf); + jit_ret(); + jit_epilog(); + end = jit_note(__FILE__, __LINE__); + + myFunction = jit_emit(); + + @rem{/* call the generated code@comma{} passing its size as argument */} + myFunction((char*)jit_address(end) - (char*)jit_address(start)); + + jit_disassemble(); + + finish_jit(); + return 0; +@} +@end example + +The function shows how many bytes were generated. Most of the code +is not very interesting, as it resembles very closely the program +presented in @ref{incr, , A function which increments a number by one}. + +For this reason, we're going to concentrate on just a few statements. + +@table @t +@item start = jit_note(__FILE__, __LINE__); +@itemx @r{@dots{}} +@itemx end = jit_note(__FILE__, __LINE__); +These two instruction call the @code{jit_note} macro, which creates +a note in the jit code; arguments to @code{jit_note} usually are a +filename string and line number integer, but using NULL for the +string argument is perfectly valid if only need to create a simple +marker in the code. + +@item jit_ellipsis(); +@code{ellipsis} usually is only required if calling varargs functions +with double arguments, but it is a good practice to properly describe +the @r{@dots{}} in the call sequence. + +@itemx jit_pushargi((jit_word_t)"generated %d bytes\n"); +Note the use of the @code{(jit_word_t)} cast, that is used only +to avoid a compiler warning, due to using a pointer where a +wordsize integer type was expected. + +@item jit_prepare(); +@itemx @r{@dots{}} +@itemx jit_finishi(printf); +Once the arguments to @code{printf} have been pushed, what means +moving them to stack or register arguments, the @code{printf} +function is called and the stack cleaned. Note how @lightning{} +abstracts the differences between different architectures and +ABI's -- the client program does not know how parameter passing +works on the host architecture. + +@item jit_epilog(); +Usually it is not required to call @code{epilog}, but because it +is implicitly called when noticing the end of a function, if the +@code{end} variable was set with a @code{note} call after the +@code{ret}, it would not consider the function epilog. + +@item myFunction((char*)jit_address(end) - (char*)jit_address(start)); +This calls the generate jit function passing as argument the offset +difference from the @code{start} and @code{end} notes. The @code{address} +call must be done after the @code{emit} call or either a fatal error +will happen (if @lightning{} is built with assertions enable) or an +undefined value will be returned. + +@item jit_disassemble(); +@code{disassemble} will dump the generated code to standard output, +unless @lightning{} was built with the disassembler disabled, in which +case no output will be shown. +@end table + +@node RPN calculator +@section A more complex example, an RPN calculator + +We create a small stack-based RPN calculator which applies a series +of operators to a given parameter and to other numeric operands. +Unlike previous examples, the code generator is fully parameterized +and is able to compile different formulas to different functions. +Here is the code for the expression compiler; a sample usage will +follow. + +Since @lightning{} does not provide push/pop instruction, this +example uses a stack-allocated area to store the data. Such an +area can be allocated using the macro @code{allocai}, which +receives the number of bytes to allocate and returns the offset +from the frame pointer register @code{FP} to the base of the +area. + +Usually, you will use the @code{ldxi} and @code{stxi} instruction +to access stack-allocated variables. However, it is possible to +use operations such as @code{add} to compute the address of the +variables, and pass the address around. + +@example +#include +#include + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +static jit_state_t *_jit; + +void stack_push(int reg, int *sp) +@{ + jit_stxi_i (*sp, JIT_FP, reg); + *sp += sizeof (int); +@} + +void stack_pop(int reg, int *sp) +@{ + *sp -= sizeof (int); + jit_ldxi_i (reg, JIT_FP, *sp); +@} + +jit_node_t *compile_rpn(char *expr) +@{ + jit_node_t *in, *fn; + int stack_base, stack_ptr; + + fn = jit_note(NULL, 0); + jit_prolog(); + in = jit_arg(); + stack_ptr = stack_base = jit_allocai (32 * sizeof (int)); + + jit_getarg_i(JIT_R2, in); + + while (*expr) @{ + char buf[32]; + int n; + if (sscanf(expr, "%[0-9]%n", buf, &n)) @{ + expr += n - 1; + stack_push(JIT_R0, &stack_ptr); + jit_movi(JIT_R0, atoi(buf)); + @} else if (*expr == 'x') @{ + stack_push(JIT_R0, &stack_ptr); + jit_movr(JIT_R0, JIT_R2); + @} else if (*expr == '+') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_addr(JIT_R0, JIT_R1, JIT_R0); + @} else if (*expr == '-') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_subr(JIT_R0, JIT_R1, JIT_R0); + @} else if (*expr == '*') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_mulr(JIT_R0, JIT_R1, JIT_R0); + @} else if (*expr == '/') @{ + stack_pop(JIT_R1, &stack_ptr); + jit_divr(JIT_R0, JIT_R1, JIT_R0); + @} else @{ + fprintf(stderr, "cannot compile: %s\n", expr); + abort(); + @} + ++expr; + @} + jit_retr(JIT_R0); + jit_epilog(); + return fn; +@} +@end example + +The principle on which the calculator is based is easy: the stack top +is held in R0, while the remaining items of the stack are held in the +memory area that we allocate with @code{allocai}. Compiling a numeric +operand or the argument @code{x} pushes the old stack top onto the +stack and moves the operand into R0; compiling an operator pops the +second operand off the stack into R1, and compiles the operation so +that the result goes into R0, thus becoming the new stack top. + +This example allocates a fixed area for 32 @code{int}s. This is not +a problem when the function is a leaf like in this case; in a full-blown +compiler you will want to analyze the input and determine the number +of needed stack slots---a very simple example of register allocation. +The area is then managed like a stack using @code{stack_push} and +@code{stack_pop}. + +Source code for the client (which lies in the same source file) follows: + +@example +int main(int argc, char *argv[]) +@{ + jit_note_t *nc, *nf; + pifi c2f, f2c; + int i; + + init_jit(argv[0]); + _jit = jit_new_state(); + + nc = compile_rpn("32x9*5/+"); + nf = compile_rpn("x32-5*9/"); + (void)jit_emit(); + c2f = (pifi)jit_address(nc); + f2c = (pifi)jit_address(nf); + + printf("\nC:"); + for (i = 0; i <= 100; i += 10) printf("%3d ", i); + printf("\nF:"); + for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i)); + printf("\n"); + + printf("\nF:"); + for (i = 32; i <= 212; i += 18) printf("%3d ", i); + printf("\nC:"); + for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i)); + printf("\n"); + + finish_jit(); + return 0; +@} +@end example + +The client displays a conversion table between Celsius and Fahrenheit +degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The +formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9}, +respectively. + +Providing the formula as an argument to @code{compile_rpn} effectively +parameterizes code generation, making it possible to use the same code +to compile different functions; this is what makes dynamic code +generation so powerful. + +@node Fibonacci +@section Fibonacci numbers + +The code in this section calculates a variant of the Fibonacci sequence. +While the traditional Fibonacci sequence is modeled by the recurrence +relation: +@display + f(0) = f(1) = 1 + f(n) = f(n-1) + f(n-2) +@end display + +@noindent +the functions in this section calculates the following sequence, which +is more interesting as a benchmark@footnote{That's because, as is +easily seen, the sequence represents the number of activations of the +@code{nfibs} procedure that are needed to compute its value through +recursion.}: +@display + fib(0) = fib(1) = 1 + fib(n) = fib(n-1) + fib(n-2) + 1 +@end display + +The purpose of this example is to introduce branches. There are two +kind of branches: backward branches and forward branches. We'll +present the calculation in a recursive and iterative form; the +former only uses forward branches, while the latter uses both. + +@example +#include +#include + +static jit_state_t *_jit; + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +int main(int argc, char *argv[]) +@{ + pifi fib; + jit_node_t *label; + jit_node_t *call; + jit_node_t *in; @rem{/* offset of the argument */} + jit_node_t *ref; @rem{/* to patch the forward reference */} + + init_jit(argv[0]); + _jit = jit_new_state(); + + label = jit_label(); + jit_prolog (); + in = jit_arg (); + jit_getarg (JIT_V0, in); @rem{/* V0 = n */} + ref = jit_blti (JIT_V0, 2); + jit_subi (JIT_V1, JIT_V0, 1); @rem{/* V1 = n-1 */} + jit_subi (JIT_V2, JIT_V0, 2); @rem{/* V2 = n-2 */} + jit_prepare(); + jit_pushargr(JIT_V1); + call = jit_finishi(NULL); + jit_patch_at(call, label); + jit_retval(JIT_V1); @rem{/* V1 = fib(n-1) */} + jit_prepare(); + jit_pushargr(JIT_V2); + call = jit_finishi(NULL); + jit_patch_at(call, label); + jit_retval(JIT_V2); @rem{/* V2 = fib(n-2) */} + jit_addi(JIT_V1, JIT_V1, 1); + jit_addr(JIT_R0, JIT_V1, JIT_V2); @rem{/* R0 = V1 + V2 + 1 */} + jit_retr(JIT_R0); + + jit_patch(ref); @rem{/* patch jump */} + jit_movi(JIT_R0, 1); @rem{/* R0 = 1 */} + jit_retr(JIT_R0); + + @rem{/* call the generated code@comma{} passing 32 as an argument */} + fib = jit_emit(); + printf("fib(%d) = %d\n", 32, fib(32)); + finish_jit(); + return 0; +@} +@end example + +As said above, this is the first example of dynamically compiling +branches. Branch instructions have two operands containing the +values to be compared, and return a @code{jit_note_t *} object +to be patched. + +Because labels final address are only known after calling @code{emit}, +it is required to call @code{patch} or @code{patch_at}, what does +tell @lightning{} that the target to patch is actually a pointer to +a @code{jit_node_t *} object, otherwise, it would assume that is +a pointer to a C function. Note that conditional branches do not +receive a label argument, so they must be patched. + +You need to call @code{patch_at} on the return of value @code{calli}, +@code{finishi}, @code{jmpi} and @code{calli} if it is actually +referencing a label in the jit code. All other branch instructions +do not receive a label argument. Note that @code{movi} is an special +case, and patching it is usually done to get the final address of +a label, usually to later call @code{jmpr}. + +Now, here is the iterative version: + +@example +#include +#include + +static jit_state_t *_jit; + +typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} + +int main(int argc, char *argv[]) +@{ + pifi fib; + jit_node_t *in; @rem{/* offset of the argument */} + jit_node_t *ref; @rem{/* to patch the forward reference */} + jit_node_t *jump; @rem{/* jump to start of loop */} + jit_node_t *loop; @rem{/* start of the loop */} + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog (); + in = jit_arg (); + jit_getarg (JIT_R2, in); @rem{/* R2 = n */} + jit_movi (JIT_R1, 1); + ref = jit_blti (JIT_R2, 2); + jit_subi (JIT_R2, JIT_R2, 1); + jit_movi (JIT_R0, 1); + + loop= jit_label(); + jit_subi (JIT_R2, JIT_R2, 1); @rem{/* decr. counter */} + jit_addr (JIT_V0, JIT_R0, JIT_R1); @rem{/* V0 = R0 + R1 */} + jit_movr (JIT_R0, JIT_R1); @rem{/* R0 = R1 */} + jit_addi (JIT_R1, JIT_V0, 1); @rem{/* R1 = V0 + 1 */} + jump= jit_bnei (JIT_R2, 0); @rem{/* if (R2) goto loop; */} + jit_patch_at(jump, label); + + jit_patch(ref); @rem{/* patch forward jump */} + jit_movr (JIT_R0, JIT_R1); @rem{/* R0 = R1 */} + jit_retr (JIT_R0); + + @rem{/* call the generated code@comma{} passing 36 as an argument */} + fib = jit_emit(); + printf("fib(%d) = %d\n", 36, fib(36)); + finish_jit(); + return 0; +@} +@end example + +This code calculates the recurrence relation using iteration (a +@code{for} loop in high-level languages). There are no function +calls anymore: instead, there is a backward jump (the @code{bnei} at +the end of the loop). + +Note that the program must remember the address for backward jumps; +for forward jumps it is only required to remember the jump code, +and call @code{patch} for the implicit label. + +@node Reentrancy +@chapter Re-entrant usage of @lightning{} + +@lightning{} uses the special @code{_jit} identifier. To be able +to be able to use multiple jit generation states at the same +time, it is required to used code similar to: + +@example + struct jit_state lightning; + #define _jit lightning +@end example + +This will cause the symbol defined to @code{_jit} to be passed as +the first argument to the underlying @lightning{} implementation, +that is usually a function with an @code{_} (underscode) prefix +and with an argument named @code{_jit}, in the pattern: + +@example + static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t); + #define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v); +@end example + +The reason for this is to use the same syntax as the initial lightning +implementation and to avoid needing the user to keep adding an extra +argument to every call, as multiple jit states generating code in +paralell should be very uncommon. + +@section Registers +@chapter Accessing the whole register file + +As mentioned earlier in this chapter, all @lightning{} back-ends are +guaranteed to have at least six general-purpose integer registers and +six floating-point registers, but many back-ends will have more. + +To access the entire register files, you can use the +@code{JIT_R}, @code{JIT_V} and @code{JIT_F} macros. They +accept a parameter that identifies the register number, which +must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM} +and @code{JIT_F_NUM} respectively; the number need not be +constant. Of course, expressions like @code{JIT_R0} and +@code{JIT_R(0)} denote the same register, and likewise for +integer callee-saved, or floating-point, registers. @node Acknowledgements @chapter Acknowledgements @@ -298,9 +1193,3 @@ yet very interesting. I also thank Steve Byrne for writing GNU Smalltalk, since @lightning{} was first developed as a tool to be used in GNU Smalltalk's dynamic translator from bytecodes to native code. - -@iftex -@contents -@end iftex - -@bye diff --git a/doc/ifib.c b/doc/ifib.c new file mode 100644 index 000000000..7e098cba4 --- /dev/null +++ b/doc/ifib.c @@ -0,0 +1,44 @@ +#include +#include + +static jit_state_t *_jit; + +typedef int (*pifi)(int); /* Pointer to Int Function of Int */ + +int main(int argc, char *argv[]) +{ + pifi fib; + jit_node_t *in; /* offset of the argument */ + jit_node_t *ref; /* to patch the forward reference */ + jit_node_t *jump; /* jump to start of loop */ + jit_node_t *loop; /* start of the loop */ + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog (); + in = jit_arg (); + jit_getarg (JIT_R2, in); /* R2 = n */ + jit_movi (JIT_R1, 1); + ref = jit_blti (JIT_R2, 2); + jit_subi (JIT_R2, JIT_R2, 1); + jit_movi (JIT_R0, 1); + + loop= jit_label(); + jit_subi (JIT_R2, JIT_R2, 1); /* decr. counter */ + jit_addr (JIT_V0, JIT_R0, JIT_R1); /* V0 = R0 + R1 */ + jit_movr (JIT_R0, JIT_R1); /* R0 = R1 */ + jit_addi (JIT_R1, JIT_V0, 1); /* R1 = V0 + 1 */ + jump= jit_bnei (JIT_R2, 0); /* if (R2) goto loop; */ + jit_patch_at(jump, loop); + + jit_patch(ref); /* patch forward jump */ + jit_movr (JIT_R0, JIT_R1); /* R0 = R1 */ + jit_retr (JIT_R0); + + /* call the generated code, passing 36 as an argument */ + fib = jit_emit(); + printf("fib(%d) = %d\n", 36, fib(36)); + finish_jit(); + return 0; +} diff --git a/doc/incr.c b/doc/incr.c new file mode 100644 index 000000000..5d5e438d0 --- /dev/null +++ b/doc/incr.c @@ -0,0 +1,29 @@ +#include +#include + +static jit_state_t *_jit; + +typedef int (*pifi)(int); /* Pointer to Int Function of Int */ + +int main(int argc, char *argv[]) +{ + jit_node_t *in; + pifi incr; + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog(); /* @t{ prolog } */ + in = jit_arg(); /* @t{ in = arg } */ + jit_getarg(JIT_R0, in); /* @t{ getarg R0 } */ + jit_addi(JIT_R0, JIT_R0, 1); /* @t{ addi R0\, R0\, 1 } */ + jit_retr(JIT_R0); /* @t{ retr R0 } */ + + incr = jit_emit(); + + /* call the generated code\, passing 5 as an argument */ + printf("%d + 1 = %d\n", 5, incr(5)); + + finish_jit(); + return 0; +} diff --git a/doc/lightning.texi b/doc/lightning.texi index a336a3db0..c7d8f98f1 100644 --- a/doc/lightning.texi +++ b/doc/lightning.texi @@ -3,11 +3,8 @@ @setfilename lightning.info -@set TITLE Using and porting @sc{gnu} @i{lightning} -@set TOPIC installing, using and porting -@set BOTH -@set USING -@set PORTING +@set TITLE Using @sc{gnu} @i{lightning} +@set TOPIC installing and using @settitle @value{TITLE} @@ -63,29 +60,9 @@ @c References to the other half of the manual @c --------------------------------------------------------------------- -@ifset USING @macro usingref{node, name} @ref{\node\, , \name\} @end macro -@end ifset - -@ifclear USING -@macro usingref{node, name} -@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}} -@end macro -@end ifclear - -@ifset PORTING -@macro portingref{node, name} -@ref{\node\, , \name\} -@end macro -@end ifset - -@ifclear PORTING -@macro portingref{node, name} -@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}} -@end macro -@end ifclear @c --------------------------------------------------------------------- @c End of macro section @@ -98,3 +75,4 @@ @c *********************************************************************** +@bye diff --git a/doc/p-lightning.texi b/doc/p-lightning.texi deleted file mode 100644 index 98a9b63fb..000000000 --- a/doc/p-lightning.texi +++ /dev/null @@ -1,100 +0,0 @@ -\input texinfo.tex @c -*- texinfo -*- -@c %**start of header (This is for running Texinfo on a region.) - -@setfilename lightning.info - -@set TITLE Using @sc{gnu} @i{lightning} -@set TOPIC installing and using -@clear BOTH -@clear USING -@set PORTING - -@settitle @value{TITLE} - -@c --------------------------------------------------------------------- -@c Common macros -@c --------------------------------------------------------------------- - -@macro bulletize{a} -@item -\a\ -@end macro - -@macro rem{a} -@r{@i{\a\}} -@end macro - -@macro gnu{} -@sc{gnu} -@end macro - -@macro lightning{} -@gnu{} @i{lightning} -@end macro - -@c --------------------------------------------------------------------- -@c Macros for Texinfo 3.1/4.0 compatibility -@c --------------------------------------------------------------------- - -@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1 -@c compatibility -@macro hlink{url, link} -\link\ (\url\) -@end macro - -@c ifhtml can only be true in Texinfo 4.0, which has uref -@ifhtml -@unmacro hlink - -@macro hlink{url, link} -@uref{\url\, \link\} -@end macro - -@macro email{mail} -@uref{mailto:\mail\, , \mail\} -@end macro - -@macro url{url} -@uref{\url\} -@end macro -@end ifhtml - -@c --------------------------------------------------------------------- -@c References to the other half of the manual -@c --------------------------------------------------------------------- - -@ifset USING -@macro usingref{node, name} -@ref{\node\, , \name\} -@end macro -@end ifset - -@ifclear USING -@macro usingref{node, name} -@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}} -@end macro -@end ifclear - -@ifset PORTING -@macro portingref{node, name} -@ref{\node\, , \name\} -@end macro -@end ifset - -@ifclear PORTING -@macro portingref{node, name} -@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}} -@end macro -@end ifclear - -@c --------------------------------------------------------------------- -@c End of macro section -@c --------------------------------------------------------------------- - -@include version.texi -@include body.texi - -@c %**end of header (This is for running Texinfo on a region.) - -@c *********************************************************************** - diff --git a/doc/porting.texi b/doc/porting.texi deleted file mode 100644 index 66cd15118..000000000 --- a/doc/porting.texi +++ /dev/null @@ -1,1600 +0,0 @@ -@node Structure of a port -@chapter An overview of the porting process - -A particular port of @lightning{} is composed of four files. These -have a common suffix which identifies the port (for example, -@code{i386} or @code{ppc}), and a prefix that identifies their -function; they are: - -@itemize @bullet -@item -@file{asm-@var{suffix}.h}, which contains the description of the -target machine's instruction format. The creation of this file -is discussed in @ref{Run-time assemblers, , Creating the run-time -assembler}. - -@item -@file{core-@var{suffix}.h}, which contains the mappings from -@lightning{}'s instruction set to the target machine's assembly -language format. The creation of this file is discussed in -@ref{Standard macros, , Creating the platform-independent layer}. - -@item -@file{funcs-@var{suffix}.h}, for now, only contains the definition -of @code{jit_flush_code}. The creation of this file is briefly -discussed in @ref{Standard functions, , More complex tasks in -the platform-independent layer}. - -@item -@file{fp-@var{suffix}.h}, which contains the description of the -target machine's instruction format and the internal macros for doing -floating point computation. The creation of this file is discussed -in @ref{Floating-point macros, , Implementing macros for floating -point}. -@end itemize - -Before doing anything, you have to add the ability to recognize the -new port during the configuration process. This is explained in -@ref{Adjusting configure, , Automatically recognizing the new platform}. - -@node Adjusting configure -@chapter Automatically recognizing the new platform - -Before starting your port, you have to add the ability to recognize the -new port during the configure process. You only have to run -@file{config.guess}, which you'll find in the main distribution -directory, and note down the first part of the output (up to the first -dash). - -Then, in the two files @file{configure.in} and @file{lightning.m4}, -lookup the line -@example - case "$host_cpu" in -@end example - -@noindent -and, right after it, add the line: -@example - @var{cpu-name}) cpu=@var{file-suffix} ;; -@end example - -@noindent -where @var{cpu-name} is the cpu as output by @file{config.guess}, and -@var{file-suffix} is the suffix that you are going to use for your files -(@pxref{Structure of a port, , An overview of the porting process}). - -Now create empty files for your new port: -@example - touch lightning/asm-xxx.h - touch lightning/fp-xxx.h - touch lightning/core-xxx.h - touch lightning/funcs-xxx.h -@end example - -@noindent -and run @file{configure}, which should create the symlinks that are -needed by @code{lightning.h}. This is important because it will allow -you to use @lightning{} (albeit in a limited way) for testing even -before the port is completed. - -@node Run-time assemblers -@chapter Creating the run-time assembler - -The run-time assembler is a set of macros whose purpose is to assemble -instructions for the target machine's assembly language, translating -mnemonics to machine language together with their operands. While a -run-time assembler is not, strictly speaking, part of @lightning{} -(it is a private layer to be used while implementing the standard -macros that are ultimately used by clients), designing a run-time -assembler first allows you to think in terms of assembly language -rather than binary code (ouch!@dots{}), making it considerably easier -to write the standard macros. - -Creating a run-time assembler is a tedious process rather than a -difficult one, because most of the time will be spent collecting and -copying information from the architecture's manual. - -Macros defined by a run-time assembler are conventionally named after -the mnemonic and the type of its operands. Examples took from the -SPARC's run-time assembler are @code{ADDrrr}, a macro that assembles -an @code{ADD} instruction with three register operands, and -@code{SUBCCrir}, which assembles a @code{SUBCC} instruction whose second -operand is an immediate and the remaining two are registers. - -The first step in creating the assembler is to pick a convention for -operand specifiers (@code{r} and @code{i} in the example above) and for -register names. On the SPARC, this convention is as follows - -@table @code -@item @b{r} -A register name. For every @code{r} in the macro name, a numeric -parameter @code{RR} is passed to the macro, and the operand is assembled -as @code{%r@var{RR}}. - -@item @b{i} -An immediate, usually a 13-bit signed integer (with exception for -instructions such as @code{SETHI} and branches). The macros check -the size of the passed parameter if @lightning{} is configured with -@code{--enable-assertions}. - -@item @b{x} -A combination of two @code{r} parameters, which are summed to determine -the effective address in a memory load/store operation. - -@item @b{m} -A combination of an @code{r} and @code{i} parameter, which are summed to -determine the effective address in a memory load/store operation. -@end table - -Additional macros can be defined that provide easier access to register -names. For example, on the SPARC, @code{_Ro(3)} and @code{_Rg(5)} map -respectively to @code{%o3} and @code{%g5}; on the x86, instead, symbolic -representations of the register names are provided (for example, -@code{_EAX} and @code{_EBX}). - -CISC architectures sometimes have registers of different sizes--this is -the case on the x86 where @code{%ax} is a 16-bit register while -@code{%esp} is a 32-bit one. In this case, it can be useful to embed -information on the size in the definition of register names. The x86 -machine language, for example, represents all three of @code{%bh}, -@code{%di} and @code{%edi} as 7; but the x86 run-time assemblers defines -them with different numbers, putting the register's size in the upper -nybble (for example, @samp{17h} for @code{%bh} and @samp{27h} for -@code{%di}) so that consistency checks can be made on the operands' -sizes when @code{--enable-assertions} is used. - -The next important part defines the native architecture's instruction -formats. These can be as few as ten on RISC architectures, and as many -as fifty on CISC architectures. In the latter case it can be useful -to define more macros for sub-formats (such as macros for different -addressing modes) or even for sub-fields in an instruction. Let's see -an example of these macros. - -@example -#define _2i( OP, RD, OP2, IMM) - _I((_u2 (OP )<<30) | (_u5(RD)<<25) | (_u3(OP2)<<22) | - _u22(IMM) ) -@end example - -The name of the macro, @code{_2i}, indicates a two-operand instruction -comprising an immediate operand. The instruction format is: - -@example - .------.---------.------.-------------------------------------------. - | OP | RD | OP2 | IMM | - |------+---------+------+-------------------------------------------| - |2 bits| 5 bits |3 bits| 22 bits | - |31-30 | 29-25 | 22-24| 0-21 | - '------'---------'------'-------------------------------------------' -@end example - -@lightning{} provides macros named @code{_sXX(OP)} and @code{_uXX(OP)}, -where XX is a number between 1 and 31, which test@footnote{Only when -@code{--enable-assertions} is used.} whether @code{OP} can be -represented as (respectively) a signed or unsigned integer of the -given size. What the macro above does, then, is to shift and @sc{or} -together the different fields, ensuring that each of them fits the field. - -Here is another definition, this time for the PowerPC architecture. - -@example -#define _X(OP,RD,RA,RB,XO,RC) - _I((_u6 (OP)<<26) | (_u5(RD)<<21) | (_u5(RA)<<16) | - ( _u5(RB)<<11) | (_u10(XO)<<1) | _u1(RC) ) -@end example - -Here is the bit layout corresponding to this instruction format: - -@example - .--------.--------.--------.--------.---------------------.-------. - | OP | RD | RA | RB | X0 | RC | - |--------+--------+--------+--------+-----------------------------| - | 6 bits | 5 bits | 5 bits | 5 bits | 10 bits | 1 bit | - | 31-26 | 25-21 | 16-20 | 11-15 | 1-10 | 0 | - '--------'---------'-------'--------'-----------------------------' -@end example - -How do these macros actually generate code? The secret lies in the -@code{_I} macro, which is one of four predefined macros which actually -store machine language instructions in memory. They are @code{_B}, -@code{_W}, @code{_I} and @code{_L}, respectively for 8-bit, 16-bit, -32-bit, and @code{long} (either 32-bit or 64-bit, depending on the -architecture) values. - -Next comes another set of macros (usually the biggest) which represents -the actual mnemonics---macros such as @code{ADDrrr} and @code{SUBCCrir}, -which were cited earlier in this chapter, belong to this set. Most of -the times, all these macros will do is to use the ``instruction format'' -macros, specifying the values of the fields in the different instruction -formats. Let's see a few of these definitions, again taken from the -SPARC assembler: - -@example -#define BAi(DISP) _2 (0, 0, 8, 2, DISP) -#define BA_Ai(DISP) _2 (0, 1, 8, 2, DISP) - -#define SETHIir(IMM, RD) _2i (0, RD, 4, IMM) - -#define ADDrrr(RS1, RS2, RD) _3 (2, RD, 0, RS1, 0, 0, RS2) -#define ADDrir(RS1, IMM, RD) _3i (2, RD, 0, RS1, 1, IMM) -#define ADDCCrrr(RS1, RS2, RD) _3 (2, RD, 16, RS1, 0, 0, RS2) -#define ADDCCrir(RS1, IMM, RD) _3i (2, RD, 16, RS1, 1, IMM) -#define ANDrrr(RS1, RS2, RD) _3 (2, RD, 1, RS1, 0, 0, RS2) -#define ANDrir(RS1, IMM, RD) _3i (2, RD, 1, RS1, 1, IMM) -#define ANDCCrrr(RS1, RS2, RD) _3 (2, RD, 17, RS1, 0, 0, RS2) -#define ANDCCrir(RS1, IMM, RD) _3i (2, RD, 17, RS1, 1, IMM) -@end example - -A few things have to be noted. For example: -@itemize @bullet -@item -The SPARC assembly language sometimes uses a comma inside a mnemonic -(for example, @code{ba,a}). This symbol is not allowed inside a -@sc{cpp} macro name, so it is replaced with an underscore; the same -is done with the dots found in the PowerPC assembly language (for -example, @code{andi.} is defined as @code{ANDI_rri}). - -@item -It can be useful to group together instructions with the same -instruction format, as doing this tends to make the source code -more readable (numbers are put in the same columns). - -@item -Using an editor without automatic wrap at end of line can be useful, -since run-time assemblers tend to have very long lines. -@end itemize - -A final touch is to define the synthetic instructions, which are -usually found on RISC machines. For example, on the SPARC, the -@code{LD} instruction has two synonyms (@code{LDUW} and @code{LDSW}) -which are defined thus: - -@example -#define LDUWxr(RS1, RS2, RD) LDxr(RS1, RS2, RD) -#define LDUWmr(RS1, IMM, RD) LDmr(RS1, IMM, RD) -#define LDSWxr(RS1, RS2, RD) LDxr(RS1, RS2, RD) -#define LDSWmr(RS1, IMM, RD) LDmr(RS1, IMM, RD) -@end example - -Other common case are instructions which take advantage of registers -whose value is hard-wired to zero, and short-cut instructions which -hard-code some or all of the operands: - -@example -@rem{/* Destination is %g0\, which the processor never overwrites. */} -#define CMPrr(R1, R2) SUBCCrrr(R1, R2, 0) @rem{/* subcc %r1\, %r2\, %g0 */} - -@rem{/* One of the source registers is hard-coded to be %g0. */} -#define NEGrr(R,S) SUBrrr(0, R, S) @rem{/* sub %g0\, %rR\, %rS */} - -@rem{/* All of the operands are hard-coded. */} -#define RET() JMPLmr(31,8 ,0) @rem{/* jmpl [%r31+8]\, %g0 */} - -@rem{/* One of the operands acts as both source and destination */} -#define BSETrr(R,S) ORrrr(R, S, S) @rem{/* or %rR\, %rS\, %rS */} -@end example - -Specific to RISC computers, finally, is the instruction to load an -arbitrarily sized immediate into a register. This instruction is -usually implemented as one or two basic instructions: - -@enumerate -@item -If the number is small enough, an instruction is sufficient -(@code{LI} or @code{ORI} on the PowerPC, @code{MOV} on the SPARC). - -@item -If the lowest bits are all zeroed, an instruction is sufficient -(@code{LIS} on the PowerPC, @code{SETHI} on the SPARC). - -@item -Otherwise, the high bits are set first (with @code{LIS} or -@code{SETHI}), and the result is then @sc{or}ed with the low -bits -@end enumerate - -Here is the definition of such an instruction for the PowerPC: - -@example -#define MOVEIri(R,I) (_siP(16,I) ? LIri(R,I) : \ @rem{/* case 1 */} - (_uiP(16,I) ? ORIrri(R,0,I) : \ @rem{/* case 1 */} - _MOVEIri(R, _HI(I), _LO(I)) )) @rem{/* case 2/3 */} - -#define _MOVEIri(H,L,R) (LISri(R,H), (L ? ORIrri(R,R,L) : 0)) -@end example - -@noindent -and for the SPARC: - -@example -#define SETir(I,R) (_siP(13,I) ? MOVir(I,R) : \ - _SETir(_HI(I), _LO(I), R)) - -#define _SETir(H,L,R) (SETHIir(H,R), (L ? ORrir(R,L,R) : 0)) -@end example - -In both cases, @code{_HI} and @code{_LO} are macros for internal use -that extract different parts of the immediate operand. - -You should take a look at the run-time assemblers distributed with -@lightning{} before trying to craft your own. In particular, make -sure you understand the RISC run-time assemblers (the SPARC's is -the simplest) before trying to decypher the x86 run-time assembler, -which is significantly more complex. - - -@node Standard macros -@chapter Creating the platform-independent layer - -The platform-independent layer is the one that is ultimately used -by @lightning{} clients. Creating this layer is a matter of creating -a hundred or so macros that comprise part of the interface used by -the clients, as described in -@usingref{The instruction set, @lightning{}'s instruction set}. - -Fortunately, a number of these definitions are common to the different -platforms and are defined just once in one of the header files that -make up @lightning{}, that is, @file{core-common.h}. - -Most of the macros are relatively straight-forward to implement (with -a few caveats for architectures whose assembly language only offers -two-operand arithmetic instructions). This section will cover the -tricky points, before presenting the complete listing of the macros -that make up the platform-independent interface provided by -@lightning{}. - -@menu -@standardmacrosmenu{} -@end menu - -@node Forward references -@section Implementing forward references - -Implementation of forward references takes place in: - -@itemize @bullet -@item -The branch macros - -@item -The @code{jit_patch_at} macros -@end itemize - -Roughly speaking, the branch macros, as seen in @usingref{GNU lightning -macros, Generating code at run-time}, return a value that later calls -to @code{jit_patch} or @code{jit_patch_at} use to complete the assembly -of the forward reference. This value is usually the contents of the -program counter after the branch instruction is compiled (which is -accessible in the @code{_jit.pc} variable). Let's see an example from -the x86 back-end: - -@example -#define jit_bmsr_i(label, s1, s2) \ - (TESTLrr((s1), (s2)), JNZm(label,0,0,0), _jit.pc) -@end example - -The @code{bms} (@dfn{branch if mask set}) instruction is assembled as -the combination of a @code{TEST} instruction (bit-wise @sc{and} between -the two operands) and a @code{JNZ} instruction (jump if non-zero). The -macro then returns the final value of the program counter. - -@code{jit_patch_at} is one of the few macros that need to possess a -knowledge of the machine's instruction formats. Its purpose is to -patch a branch instruction (identified by the value returned at the -moment the branch was compiled) to jump to the current position (that -is, to the address identified by @code{_jit.pc}). - -On the x86, the displacement between the jump and the landing point is -expressed as a 32-bit signed integer lying in the last four bytes of the -jump instruction. The definition of @code{_jit_patch_at} is: - -@example -#define jit_patch(jump_pc, pv) (*_PSL((jump_pc) - 4) = \ - (pv) - (jump_pc)) -@end example - -The @code{_PSL} macro is nothing more than a cast to @code{long *}, -and is used here to shorten the definition and avoid cluttering it with -excessive parentheses. These type-cast macros are: - -@itemize @bullet -@item -@code{_PUC(X)} to cast to a @code{unsigned char *}. - -@item -@code{_PUS(X)} to cast to a @code{unsigned short *}. - -@item -@code{_PUI(X)} to cast to a @code{unsigned int *}. - -@item -@code{_PSL(X)} to cast to a @code{long *}. - -@item -@code{_PUL(X)} to cast to a @code{unsigned long *}. -@end itemize - -On other platforms, notably RISC ones, the displacement is embedded into -the instruction itself. In this case, @code{jit_patch_at} must first zero -out the field, and then @sc{or} in the correct displacement. The SPARC, -for example, encodes the displacement in the bottom 22 bits; in addition -the right-most two bits are suppressed, which are always zero because -instruction have to be word-aligned. - -@example -#define jit_patch_at(delay_pc, pv) jit_patch_ (((delay_pc) - 1), (pv)) - -@rem{/* branch instructions return the address of the @emph{delay} - * instruction---this is just a helper macro that makes the code more - * readable. - */} -#define jit_patch_(jump_pc, pv) (*jump_pc = \ - (*jump_pc & ~_MASK(22)) | \ - ((_UL(pv) - _UL(jump_pc)) >> 2) & _MASK(22)) -@end example - -This introduces more predefined shortcut macros: -@itemize @bullet -@item -@code{_UC(X)} to cast to a @code{unsigned char}. - -@item -@code{_US(X)} to cast to a @code{unsigned short}. - -@item -@code{_UI(X)} to cast to a @code{unsigned int}. - -@item -@code{_SL(X)} to cast to a @code{long}. - -@item -@code{_UL(X)} to cast to a @code{unsigned long}. - -@item -@code{_MASK(N)} gives a binary number made of N ones. -@end itemize - -Dual to branches and @code{jit_patch_at} are @code{jit_movi_p} -and @code{jit_patch_movi}, since they can also be used to implement -forward references. @code{jit_movi_p} should be carefully implemented -to use an encoding that is as long as possible, and it should return -an address which is then passed to @code{jit_patch_movi}. The -implementation of @code{jit_patch_movi} is similar to -@code{jit_patch_at}. - -@node Common features -@section Common features supported by @file{core-common.h} - -The @file{core-common.h} file contains hundreds of macro definitions -which will spare you defining a lot of things in the files the are -specific to your port. Here is a list of the features that -@file{core-common.h} provides. - -@table @b -@item Support for common synthetic instructions -These are instructions that can be represented as a simple operation, -for example a bit-wise @sc{and} or a subtraction. @file{core-common.h} -recognizes when the port-specific header file defines these macros and -avoids compiler warnings about redefined macros, but there should be -no need to define them. They are: -@example -#define jit_extr_c_ui(d, rs) -#define jit_extr_s_ui(d, rs) -#define jit_extr_c_ul(d, rs) -#define jit_extr_s_ul(d, rs) -#define jit_extr_i_ul(d, rs) -#define jit_negr_i(d, rs) -#define jit_negr_l(d, rs) -@end example - -@item Support for the @sc{abi} -All of @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not -mandatory. If not defined, they will be defined respectively as an -empty macro, as a synonym for @code{jit_prolog}, and as a synonym for -@code{jit_calli}. Whether to define them or not in the port-specific -header file, it depends on the underlying architecture's @sc{abi}---in -general, however, you'll need to define at least @code{jit_prolog}. - -@item Support for uncommon instructions -These are instructions that many widespread architectures lack. -@file{core-common.h} is able to provide default definitions, but they -are usually inefficient if the hardware provides a way to do these -operations with a single instruction. They are extension with sign -and ``reverse subtraction'' (that is, REG2@math{=}IMM@math{-}REG1): -@example -#define jit_extr_c_i(d, rs) -#define jit_extr_s_i(d, rs) -#define jit_extr_c_l(d, rs) -#define jit_extr_s_l(d, rs) -#define jit_extr_i_l(d, rs) -#define jit_rsbi_i(d, rs, is) -#define jit_rsbi_l(d, rs, is) -#define jit_rsbi_p(d, rs, is) -@end example - -@item Conversion between network and host byte ordering -These macros are no-ops on big endian systems. Don't define them on -such systems; on the other hand, they are mandatory on little endian -systems. They are: -@example -#define jit_ntoh_ui(d, rs) -#define jit_ntoh_us(d, rs) -@end example - -@item Support for a ``zero'' register -Many RISC architectures provide a read-only register whose value is -hard-coded to be zero; this register is then used implicitly when -referring to a memory location using a single register. For example, -on the SPARC, an operand like @code{[%l6]} is actually assembled as -@code{[%l6+%g0]}. If this is the case, you should define -@code{JIT_RZERO} to be the number of this register; @file{core-common.h} -will use it to implement all variations of the @code{ld} and @code{st} -instructions. For example: -@example -#define jit_ldi_c(d, is) jit_ldxi_c(d, JIT_RZERO, is) -#define jit_ldr_i(d, rs) jit_ldxr_c(d, JIT_RZERO, rs) -@end example - -If available, JIT_RZERO is also used to provide more efficient -definitions of the @code{neg} instruction (see ``Support for common -synthetic instructions'', above). - -@item Synonyms -@file{core-common.h} provides a lot of trivial definitions which make -the instruction set as orthogonal as possible. For example, adding two -unsigned integers is exactly the same as adding two signed integers -(assuming a two's complement representation of negative numbers); yet, -@lightning{} provides both @code{jit_addr_i} and @code{jit_addr_ui} -macros. Similarly, pointers and unsigned long integers behave in the -same way, but @lightning{} has separate instruction for the two data -types---those that operate on pointers usually include a typecast -that makes programs clearer. - -@item Shortcuts -These define ``synthetic'' instructions whose definition is not as -trivial as in the case of synonyms, but is anyway standard. This -is the case for bitwise @sc{not} (which is implemented by XORing a -string of ones), ``reverse subtraction'' between registers (which is -converted to a normal subtraction with the two source operands -inverted), and subtraction of an immediate from a register (which is -converted to an addition). Unlike @code{neg} and @code{ext} (see -``Support for common synthetic instructions'', above), which are -simply non-mandatory, you must not define these functions. - -@item Support for @code{long}s -On most systems, @code{long}s and @code{unsigned long}s are the same -as, respectively, @code{int}s and @code{unsigned int}s. In this case, -@file{core-common.h} defines operations on these types to be synonyms. - -@item @code{jit_state} -Last but not least, @file{core-common.h} defines the @code{jit_state} -type. Part of this @code{struct} is machine-dependent and includes -all kinds of state needed by the back-end; this part is always -accessible in a re-entrant way as @code{_jitl}. @code{_jitl} will be -of type @code{struct jit_local_state}; this struct must be defined -even if no state is required. - -@end table - -@node Delay slots -@section Supporting scheduling of delay slots - -Delay slot scheduling is obtained by clients through the -@code{jit_delay} macro. However this macro is not to be defined -in the platform-independent layer, because @lightning{} provides -a common definition in @file{core-common.h}. - -Instead, the platform-independent layer must define another macro, -called @code{jit_fill_delay_after}, which has to exchange the -instruction to be scheduled in the delay slot with the branch -instruction. The only parameter accepted by the macro is a call -to a branch macro, which must be expanded @strong{exactly once} by -@code{jit_fill_delay_after}. The client must be able to pass the -return value of @code{jit_fill_delay_after} to @code{jit_patch_at}. - -There are two possible approaches that can be used in -@code{jit_fill_delay_after}. They are summarized in the following -pictures: - -@itemize @bullet -@item -The branch instructions assemble a @sc{nop} instruction which is -then removed by @code{jit_fill_delay_after}. - -@example - before | after - ---------------------------------+----------------------------- - ... | - | - | - NOP | <--- _jit.pc - <--- _jit.pc | -@end example - -@item -The branch instruction assembles the branch so that the delay -slot is annulled, @code{jit_fill_delay_after} toggles the bit: - -@example - before | after - ---------------------------------+----------------------------- - ... | - | - | - <--- _jit.pc | <--- _jit.pc -@end example -@end itemize - -Don't forget that you can take advantage of delay slots in the -implementation of boolean instructions such as @code{le} or @code{gt}. - -@node Immediate values -@section Supporting arbitrarily sized immediate values - -This is a problem that is endemic to RISC machines. The basic idea -is to reserve one or two register to represent large immediate values. -Let's see an example from the SPARC: - -@example - addi_i R0, V2, 45 | addi_i R0, V2, 10000 - ---------------------------+--------------------------- - add %l5, 45, %l0 | set 10000, %l6 - | add %l5, %l6, %l0 -@end example - -In this case, @code{%l6} is reserved to be used for large immediates. -An elegant solution is to use an internal macro which automatically -decides which version is to be compiled. - -Beware of register conflicts on machines with delay slots. This is -the case for the SPARC, where @code{%l7} is used instead for large -immediates in compare-and-branch instructions. So the sequence - -@example - jit_delay( - jit_addi_i(JIT_R0, JIT_V2, 10000), - jit_blei_i(label, JIT_R1, 20000) - ); -@end example - -@noindent -is assembled this way: - -@example - set 10000, %l6 @rem{! prepare immediate for add} - set 20000, %l7 @rem{! prepare immediate for cmp} - cmp %l1, %l7 - ble label - add %l5, %l6, %l0 @rem{! delay slot} - @end example - -Note that using @code{%l6} in the branch instruction would have given -an incorrect result---@code{R0} would have been filled with the value of -@code{V2+@i{20000}} rather than @code{V2+@i{10000}}. - -@node Implementing the ABI -@section Implementing the ABI - -Implementing the underlying architecture's @sc{abi} is done in the -macros that handle function prologs and epilogs and argument passing. - -Let's look at the prologs and epilogs first. These are usually pretty -simple and, what's more important, with constant content---that is, -they always generate exactly the same instruction sequence. Here is -an example: - -@example - SPARC x86 - save %sp, -96, %sp push %ebp - push %ebx - push %esi - push %edi - movl %esp, %ebp - ... ... - ret popl %edi - restore popl %esi - popl %ebx - popl %ebp - ret -@end example - -The registers that are saved (@code{%ebx}, @code{%esi}, @code{%edi}) are -mapped to the @code{V0} through @code{V2} registers in the @lightning{} -instruction set. - -Argument passing is more tricky. There are basically three -cases@footnote{For speed and ease of implementation, @lightning{} does not -currently support passing some of the parameters on the stack and some -in registers.}: -@table @b -@item Register windows -Output registers are different from input registers---the prolog takes -care of moving the caller's output registers to the callee's input -registers. This is the case with the SPARC. - -@item Passing parameters via registers -In this case, output registers are the same as input registers. The -program must take care of saving input parameters somewhere (on the -stack, or in non-argument registers). This is the case with the -PowerPC. - -@item All the parameters are passed on the stack -This case is by far the simplest and is the most common in CISC -architectures, like the x86 and Motorola 68000. -@end table - -In all cases, the port-specific header file will define two variable -for private use---one to be used by the caller during the -@code{prepare}/@code{pusharg}/@code{finish} sequence, one to be used -by the callee, specifically in the @code{jit_prolog} and @code{jit_arg} -macros. - -Let's look again, this time with more detail, at each of the cases. - -@table @b -@item Register windows -@code{jit_finish} is the same as @code{jit_calli}, and is defined -in @file{core-common.h} (@pxref{Common features, , Common features -supported by @file{core-common.h}}). - -@example -#define jit_prepare_i(numargs) (_jitl.pusharg = _Ro(numargs)) -#define jit_pusharg_i(rs) (--_jitl.pusharg, \ - MOVrr((rs), _jitl.pusharg)) -@end example - -Remember that arguments pushing takes place in reverse order, thus -giving a pre-decrement (rather than post-increment) in -@code{jit_pusharg_i}. - -Here is what happens on the callee's side: - -@example -#define jit_arg_c() (_jitl.getarg++) -#define jit_getarg_c(rd, ofs) jit_extr_c_i ((rd), (ofs)) -#define jit_prolog(numargs) (SAVErir(JIT_SP, -96, JIT_SP), \ - _jitl.getarg = _Ri(0)) -@end example - -The @code{jit_arg} macros return nothing more than a register index, -which is then used by the @code{jit_getarg} macros. @code{jit_prolog} -resets the counter used by @code{jit_arg} to zero; the @code{numargs} -parameter is not used. It is sufficient for @code{jit_leaf} to be a -synonym for @code{jit_prolog}. - -@item Passing parameter via registers -The code is almost the same as that for the register windows case, but -with an additional complexity---@code{jit_arg} will transfer the -argument from the input register to a non-argument register so that -function calls will not clobber it. The prolog and epilog code can then -become unbearably long, up to 20 instructions on the PPC; a common -solution in this case is that of @dfn{trampolines}. - -The prolog does nothing more than put the function's actual address in a -caller-preserved register and then call the trampoline: -@example - mflr r0 @rem{! grab return address} - movei r10, trampo_2args @rem{! jump to trampoline} - mtlr r10 - blrl -here: mflr r31 @rem{! r31 = address of epilog} - @rem{...actual code...} - mtlr r31 @rem{! return to the trampoline} - blr -@end example - -In this case, @code{jit_prolog} does use its argument containing the -number of parameters to pick the appropriate trampoline. Here, -@code{trampo_2args} is the address of a trampoline designed for -2-argument functions. - -The trampoline executes the prolog code, jumps to the contents of -@code{r10}, and upon return from the subroutine it executes the -epilog code. - -@item All the parameters are passed on the stack -@code{jit_pusharg} uses a hardware push operation, which is commonly -available on CISC machines (where this approach is most likely -followed). Since the stack has to be cleaned up after the call, -@code{jit_prepare_i} remembers how many parameters have been put there, -and @code{jit_finish} adjusts the stack pointer after the call. - -@example -#define jit_prepare_i(numargs) (_jitl.args += (numargs)) -#define jit_pusharg_i(rs) PUSHLr(rs) -#define jit_finish(sub) (jit_calli((sub)), \ - ADDLir(4 * _jitl.args, JIT_SP), \ - _jitl.numargs = 0) -@end example - -Note the usage of @code{+=} in @code{jit_prepare_i}. This is done -so that one can defer the popping of the arguments that were saved -on the stack (@dfn{stack pollution}). To do so, it is sufficient to -use @code{jit_calli} instead of @code{jit_finish} in all but the -last call. - -On the caller's side, @code{arg} returns an offset relative to the -frame pointer, and @code{getarg} loads the argument from the stack: - -@example -#define jit_getarg_c(rd, ofs) jit_ldxi_c((rd), _EBP, (ofs)); -#define jit_arg_c() ((_jitl.frame += sizeof(int) \ - - sizeof(int)) -@end example - -The @code{_jitl.frame} variable is initialized by @code{jit_prolog} -with the displacement between the value of the frame pointer -(@code{%ebp}) and the address of the first parameter. -@end table - -These schemes are the most used, so @file{core-common.h} provides a way -to employ them automatically. If you do not define the -@code{jit_getarg_c} macro and its companions, @file{core-common.h} will -presume that you intend to pass parameters through either the registers -or the stack. - -If you define @code{JIT_AP}, stack-based parameter passing will be -employed and the @code{jit_getarg} macros will be defined like this: - -@example -#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_AP, (ofs)); -@end example - -In other words, the @code{jit_arg} macros (which are still to be defined -by the platform-specific back-end) shall return an offset into the stack -frame. On the other hand, if you don't define @code{JIT_AP}, -register-based parameter passing will be employed and the @code{jit_arg} -macros shall return a register number; in this case, @code{jit_getarg} -will be implemented in terms of @code{jit_extr} and @code{jit_movr} -operations: - -@example -#define jit_getarg_c(reg, ofs) jit_extr_c_i ((reg), (ofs)) -#define jit_getarg_i(reg, ofs) jit_movr_i ((reg), (ofs)) -@end example - - -@node Macro list -@section Macros composing the platform-independent layer - -@table @b -@item Register names (all mandatory but the last three) -@example -#define JIT_R -#define JIT_R_NUM -#define JIT_V -#define JIT_V_NUM -#define JIT_FPR -#define JIT_FPR_NUM -#define JIT_FP -#define JIT_SP -#define JIT_AP -#define JIT_RZERO -@end example - -@item Helper macros (non-mandatory): -@example -#define jit_fill_delay_after(branch) -@end example - -@item Mandatory: -@example -#define jit_allocai() -#define jit_arg_c() -#define jit_arg_i() -#define jit_arg_l() -#define jit_arg_p() -#define jit_arg_s() -#define jit_arg_uc() -#define jit_arg_ui() -#define jit_arg_ul() -#define jit_arg_us() -#define jit_abs_d(rd,rs) -#define jit_addi_i(d, rs, is) -#define jit_addr_d(rd,s1,s2) -#define jit_addr_i(d, s1, s2) -#define jit_addxi_i(d, rs, is) -#define jit_addxr_i(d, s1, s2) -#define jit_andi_i(d, rs, is) -#define jit_andr_i(d, s1, s2) -#define jit_beqi_i(label, rs, is) -#define jit_beqr_d(label, s1, s2) -#define jit_beqr_i(label, s1, s2) -#define jit_bgei_i(label, rs, is) -#define jit_bgei_ui(label, rs, is) -#define jit_bger_d(label, s1, s2) -#define jit_bger_i(label, s1, s2) -#define jit_bger_ui(label, s1, s2) -#define jit_bgti_i(label, rs, is) -#define jit_bgti_ui(label, rs, is) -#define jit_bgtr_d(label, s1, s2) -#define jit_bgtr_i(label, s1, s2) -#define jit_bgtr_ui(label, s1, s2) -#define jit_blei_i(label, rs, is) -#define jit_blei_ui(label, rs, is) -#define jit_bler_d(label, s1, s2) -#define jit_bler_i(label, s1, s2) -#define jit_bler_ui(label, s1, s2) -#define jit_bltgtr_d(label, s1, s2) -#define jit_blti_i(label, rs, is) -#define jit_blti_ui(label, rs, is) -#define jit_bltr_d(label, s1, s2) -#define jit_bltr_i(label, s1, s2) -#define jit_bltr_ui(label, s1, s2) -#define jit_bmci_i(label, rs, is) -#define jit_bmcr_i(label, s1, s2) -#define jit_bmsi_i(label, rs, is) -#define jit_bmsr_i(label, s1, s2) -#define jit_bnei_i(label, rs, is) -#define jit_bner_d(label, s1, s2) -#define jit_bner_i(label, s1, s2) -#define jit_boaddi_i(label, rs, is) -#define jit_boaddi_ui(label, rs, is) -#define jit_boaddr_i(label, s1, s2) -#define jit_boaddr_ui(label, s1, s2) -#define jit_bordr_d(label, s1, s2) -#define jit_bosubi_i(label, rs, is) -#define jit_bosubi_ui(label, rs, is) -#define jit_bosubr_i(label, s1, s2) -#define jit_bosubr_ui(label, s1, s2) -#define jit_buneqr_d(label, s1, s2) -#define jit_bunger_d(label, s1, s2) -#define jit_bungtr_d(label, s1, s2) -#define jit_bunler_d(label, s1, s2) -#define jit_bunltr_d(label, s1, s2) -#define jit_bunordr_d(label, s1, s2) -#define jit_calli(label) -#define jit_callr(label) -#define jit_ceilr_d_i(rd, rs) -#define jit_divi_i(d, rs, is) -#define jit_divi_ui(d, rs, is) -#define jit_divr_d(rd,s1,s2) -#define jit_divr_i(d, s1, s2) -#define jit_divr_ui(d, s1, s2) -#define jit_eqi_i(d, rs, is) -#define jit_eqr_d(d, s1, s2) -#define jit_eqr_i(d, s1, s2) -#define jit_extr_i_d(rd, rs) -#define jit_floorr_d_i(rd, rs) -#define jit_gei_i(d, rs, is) -#define jit_gei_ui(d, s1, s2) -#define jit_ger_d(d, s1, s2) -#define jit_ger_i(d, s1, s2) -#define jit_ger_ui(d, s1, s2) -#define jit_gti_i(d, rs, is) -#define jit_gti_ui(d, s1, s2) -#define jit_gtr_d(d, s1, s2) -#define jit_gtr_i(d, s1, s2) -#define jit_gtr_ui(d, s1, s2) -#define jit_hmuli_i(d, rs, is) -#define jit_hmuli_ui(d, rs, is) -#define jit_hmulr_i(d, s1, s2) -#define jit_hmulr_ui(d, s1, s2) -#define jit_jmpi(label) -#define jit_jmpr(reg) -#define jit_ldxi_f(rd, rs, is) -#define jit_ldxr_f(rd, s1, s2) -#define jit_ldxi_c(d, rs, is) -#define jit_ldxi_d(rd, rs, is) -#define jit_ldxi_i(d, rs, is) -#define jit_ldxi_s(d, rs, is) -#define jit_ldxi_uc(d, rs, is) -#define jit_ldxi_us(d, rs, is) -#define jit_ldxr_c(d, s1, s2) -#define jit_ldxr_d(rd, s1, s2) -#define jit_ldxr_i(d, s1, s2) -#define jit_ldxr_s(d, s1, s2) -#define jit_ldxr_uc(d, s1, s2) -#define jit_ldxr_us(d, s1, s2) -#define jit_lei_i(d, rs, is) -#define jit_lei_ui(d, s1, s2) -#define jit_ler_d(d, s1, s2) -#define jit_ler_i(d, s1, s2) -#define jit_ler_ui(d, s1, s2) -#define jit_lshi_i(d, rs, is) -#define jit_lshr_i(d, r1, r2) -#define jit_ltgtr_d(d, s1, s2) -#define jit_lti_i(d, rs, is) -#define jit_lti_ui(d, s1, s2) -#define jit_ltr_d(d, s1, s2) -#define jit_ltr_i(d, s1, s2) -#define jit_ltr_ui(d, s1, s2) -#define jit_modi_i(d, rs, is) -#define jit_modi_ui(d, rs, is) -#define jit_modr_i(d, s1, s2) -#define jit_modr_ui(d, s1, s2) -#define jit_movi_d(rd,immd) -#define jit_movi_f(rd,immf) -#define jit_movi_i(d, is) -#define jit_movi_p(d, is) -#define jit_movr_d(rd,rs) -#define jit_movr_i(d, rs) -#define jit_muli_i(d, rs, is) -#define jit_muli_ui(d, rs, is) -#define jit_mulr_d(rd,s1,s2) -#define jit_mulr_i(d, s1, s2) -#define jit_mulr_ui(d, s1, s2) -#define jit_negr_d(rd,rs) -#define jit_nei_i(d, rs, is) -#define jit_ner_d(d, s1, s2) -#define jit_ner_i(d, s1, s2) -#define jit_nop() -#define jit_ordr_d(d, s1, s2) -#define jit_ori_i(d, rs, is) -#define jit_orr_i(d, s1, s2) -#define jit_patch_at(jump_pc, value) -#define jit_patch_movi(jump_pc, value) -#define jit_prepare_d(numargs) -#define jit_prepare_f(numargs) -#define jit_prepare_i(numargs) -#define jit_pusharg_i(rs) -#define jit_ret() -#define jit_retval_i(rd) -#define jit_roundr_d_i(rd, rs) -#define jit_rshi_i(d, rs, is) -#define jit_rshi_ui(d, rs, is) -#define jit_rshr_i(d, r1, r2) -#define jit_rshr_ui(d, r1, r2) -#define jit_sqrt_d(rd,rs) -#define jit_stxi_c(id, rd, rs) -#define jit_stxi_d(id, rd, rs) -#define jit_stxi_f(id, rd, rs) -#define jit_stxi_i(id, rd, rs) -#define jit_stxi_s(id, rd, rs) -#define jit_stxr_c(d1, d2, rs) -#define jit_stxr_d(d1, d2, rs) -#define jit_stxr_f(d1, d2, rs) -#define jit_stxr_i(d1, d2, rs) -#define jit_stxr_s(d1, d2, rs) -#define jit_subr_d(rd,s1,s2) -#define jit_subr_i(d, s1, s2) -#define jit_subxi_i(d, rs, is) -#define jit_subxr_i(d, s1, s2) -#define jit_truncr_d_i(rd, rs) -#define jit_uneqr_d(d, s1, s2) -#define jit_unger_d(d, s1, s2) -#define jit_ungtr_d(d, s1, s2) -#define jit_unler_d(d, s1, s2) -#define jit_unltr_d(d, s1, s2) -#define jit_unordr_d(d, s1, s2) -#define jit_xori_i(d, rs, is) -#define jit_xorr_i(d, s1, s2) -@end example - -@item Non mandatory---there should be no need to define them: -@example -#define jit_extr_c_ui(d, rs) -#define jit_extr_s_ui(d, rs) -#define jit_extr_c_ul(d, rs) -#define jit_extr_s_ul(d, rs) -#define jit_extr_i_ul(d, rs) -#define jit_negr_i(d, rs) -#define jit_negr_l(d, rs) -@end example - -@item Non mandatory---whether to define them depends on the @sc{abi}: -@example -#define jit_prolog(n) -#define jit_finish(sub) -#define jit_finishr(reg) -#define jit_leaf(n) -#define jit_getarg_c(reg, ofs) -#define jit_getarg_i(reg, ofs) -#define jit_getarg_l(reg, ofs) -#define jit_getarg_p(reg, ofs) -#define jit_getarg_s(reg, ofs) -#define jit_getarg_uc(reg, ofs) -#define jit_getarg_ui(reg, ofs) -#define jit_getarg_ul(reg, ofs) -#define jit_getarg_us(reg, ofs) -#define jit_getarg_f(reg, ofs) -#define jit_getarg_d(reg, ofs) -@end example - -@item Non mandatory---define them if instructions that do this exist: -@example -#define jit_extr_c_i(d, rs) -#define jit_extr_s_i(d, rs) -#define jit_extr_c_l(d, rs) -#define jit_extr_s_l(d, rs) -#define jit_extr_i_l(d, rs) -#define jit_rsbi_i(d, rs, is) -#define jit_rsbi_l(d, rs, is) -@end example - -@item Non mandatory if condition code are always set by add/sub, needed on other systems: -@example -#define jit_addci_i(d, rs, is) -#define jit_addci_l(d, rs, is) -#define jit_subci_i(d, rs, is) -#define jit_subci_l(d, rs, is) -@end example - -@item Mandatory on little endian systems---don't define them on other systems: -@example -#define jit_ntoh_ui(d, rs) -#define jit_ntoh_us(d, rs) -@end example - -@item Mandatory if JIT_RZERO not defined---don't define them if it is defined: -@example -#define jit_ldi_c(d, is) -#define jit_ldi_i(d, is) -#define jit_ldi_s(d, is) -#define jit_ldr_c(d, rs) -#define jit_ldr_i(d, rs) -#define jit_ldr_s(d, rs) -#define jit_ldi_uc(d, is) -#define jit_ldi_ui(d, is) -#define jit_ldi_us(d, is) -#define jit_ldr_uc(d, rs) -#define jit_ldr_ui(d, rs) -#define jit_ldr_us(d, rs) -#define jit_sti_c(id, rs) -#define jit_sti_i(id, rs) -#define jit_sti_s(id, rs) -#define jit_str_c(rd, rs) -#define jit_str_i(rd, rs) -#define jit_str_s(rd, rs) -#define jit_ldi_f(rd, is) -#define jit_sti_f(id, rs) -#define jit_ldi_d(rd, is) -#define jit_sti_d(id, rs) -#define jit_ldr_f(rd, rs) -#define jit_str_f(rd, rs) -#define jit_ldr_d(rd, rs) -#define jit_str_d(rd, rs) -@end example - -@item Synonyms---don't define them: -@example -#define jit_addi_p(d, rs, is) -#define jit_addi_ui(d, rs, is) -#define jit_addi_ul(d, rs, is) -#define jit_addr_p(d, s1, s2) -#define jit_addr_ui(d, s1, s2) -#define jit_addr_ul(d, s1, s2) -#define jit_andi_ui(d, rs, is) -#define jit_andi_ul(d, rs, is) -#define jit_andr_ui(d, s1, s2) -#define jit_andr_ul(d, s1, s2) -#define jit_beqi_p(label, rs, is) -#define jit_beqi_ui(label, rs, is) -#define jit_beqi_ul(label, rs, is) -#define jit_beqr_p(label, s1, s2) -#define jit_beqr_ui(label, s1, s2) -#define jit_beqr_ul(label, s1, s2) -#define jit_bmci_ui(label, rs, is) -#define jit_bmci_ul(label, rs, is) -#define jit_bmcr_ui(label, s1, s2) -#define jit_bmcr_ul(label, s1, s2) -#define jit_bmsi_ui(label, rs, is) -#define jit_bmsi_ul(label, rs, is) -#define jit_bmsr_ui(label, s1, s2) -#define jit_bmsr_ul(label, s1, s2) -#define jit_bgei_p(label, rs, is) -#define jit_bger_p(label, s1, s2) -#define jit_bgti_p(label, rs, is) -#define jit_bgtr_p(label, s1, s2) -#define jit_blei_p(label, rs, is) -#define jit_bler_p(label, s1, s2) -#define jit_blti_p(label, rs, is) -#define jit_bltr_p(label, s1, s2) -#define jit_bnei_p(label, rs, is) -#define jit_bnei_ui(label, rs, is) -#define jit_bnei_ul(label, rs, is) -#define jit_bner_p(label, s1, s2) -#define jit_bner_ui(label, s1, s2) -#define jit_bner_ul(label, s1, s2) -#define jit_eqi_p(d, rs, is) -#define jit_eqi_ui(d, rs, is) -#define jit_eqi_ul(d, rs, is) -#define jit_eqr_p(d, s1, s2) -#define jit_eqr_ui(d, s1, s2) -#define jit_eqr_ul(d, s1, s2) -#define jit_extr_c_s(d, rs) -#define jit_extr_c_us(d, rs) -#define jit_extr_uc_s(d, rs) -#define jit_extr_uc_us(d, rs) -#define jit_extr_uc_i(d, rs) -#define jit_extr_uc_ui(d, rs) -#define jit_extr_us_i(d, rs) -#define jit_extr_us_ui(d, rs) -#define jit_extr_uc_l(d, rs) -#define jit_extr_uc_ul(d, rs) -#define jit_extr_us_l(d, rs) -#define jit_extr_us_ul(d, rs) -#define jit_extr_ui_l(d, rs) -#define jit_extr_ui_ul(d, rs) -#define jit_gei_p(d, rs, is) -#define jit_ger_p(d, s1, s2) -#define jit_gti_p(d, rs, is) -#define jit_gtr_p(d, s1, s2) -#define jit_ldr_p(d, rs) -#define jit_ldr_ul(d, rs) -#define jit_ldi_p(d, is) -#define jit_ldi_ul(d, is) -#define jit_ldxi_p(d, rs, is) -#define jit_ldxi_ul(d, rs, is) -#define jit_ldxr_p(d, s1, s2) -#define jit_ldxr_ul(d, s1, s2) -#define jit_lei_p(d, rs, is) -#define jit_ler_p(d, s1, s2) -#define jit_lshi_ui(d, rs, is) -#define jit_lshi_ul(d, rs, is) -#define jit_lshr_ui(d, s1, s2) -#define jit_lshr_ul(d, s1, s2) -#define jit_lti_p(d, rs, is) -#define jit_ltr_p(d, s1, s2) -#define jit_movi_p(d, is) -#define jit_movi_ui(d, rs) -#define jit_movi_ul(d, rs) -#define jit_movr_p(d, rs) -#define jit_movr_ui(d, rs) -#define jit_movr_ul(d, rs) -#define jit_nei_p(d, rs, is) -#define jit_nei_ui(d, rs, is) -#define jit_nei_ul(d, rs, is) -#define jit_ner_p(d, s1, s2) -#define jit_ner_ui(d, s1, s2) -#define jit_ner_ul(d, s1, s2) -#define jit_hton_ui(d, rs) -#define jit_hton_us(d, rs) -#define jit_ori_ui(d, rs, is) -#define jit_ori_ul(d, rs, is) -#define jit_orr_ui(d, s1, s2) -#define jit_orr_ul(d, s1, s2) -#define jit_pusharg_c(rs) -#define jit_pusharg_p(rs) -#define jit_pusharg_s(rs) -#define jit_pusharg_uc(rs) -#define jit_pusharg_ui(rs) -#define jit_pusharg_ul(rs) -#define jit_pusharg_us(rs) -#define jit_retval_c(rd) -#define jit_retval_p(rd) -#define jit_retval_s(rd) -#define jit_retval_uc(rd) -#define jit_retval_ui(rd) -#define jit_retval_ul(rd) -#define jit_retval_us(rd) -#define jit_rsbi_p(d, rs, is) -#define jit_rsbi_ui(d, rs, is) -#define jit_rsbi_ul(d, rs, is) -#define jit_rsbr_p(d, rs, is) -#define jit_rsbr_ui(d, s1, s2) -#define jit_rsbr_ul(d, s1, s2) -#define jit_sti_p(d, is) -#define jit_sti_uc(d, is) -#define jit_sti_ui(d, is) -#define jit_sti_ul(d, is) -#define jit_sti_us(d, is) -#define jit_str_p(d, rs) -#define jit_str_uc(d, rs) -#define jit_str_ui(d, rs) -#define jit_str_ul(d, rs) -#define jit_str_us(d, rs) -#define jit_stxi_p(d, rs, is) -#define jit_stxi_uc(d, rs, is) -#define jit_stxi_ui(d, rs, is) -#define jit_stxi_ul(d, rs, is) -#define jit_stxi_us(d, rs, is) -#define jit_stxr_p(d, s1, s2) -#define jit_stxr_uc(d, s1, s2) -#define jit_stxr_ui(d, s1, s2) -#define jit_stxr_ul(d, s1, s2) -#define jit_stxr_us(d, s1, s2) -#define jit_subi_p(d, rs, is) -#define jit_subi_ui(d, rs, is) -#define jit_subi_ul(d, rs, is) -#define jit_subr_p(d, s1, s2) -#define jit_subr_ui(d, s1, s2) -#define jit_subr_ul(d, s1, s2) -#define jit_subxi_p(d, rs, is) -#define jit_subxi_ui(d, rs, is) -#define jit_subxi_ul(d, rs, is) -#define jit_subxr_p(d, s1, s2) -#define jit_subxr_ui(d, s1, s2) -#define jit_subxr_ul(d, s1, s2) -#define jit_xori_ui(d, rs, is) -#define jit_xori_ul(d, rs, is) -#define jit_xorr_ui(d, s1, s2) -#define jit_xorr_ul(d, s1, s2) -@end example - -@item Shortcuts---don't define them: -@example -#define JIT_R0 -#define JIT_R1 -#define JIT_R2 -#define JIT_V0 -#define JIT_V1 -#define JIT_V2 -#define JIT_FPR0 -#define JIT_FPR1 -#define JIT_FPR2 -#define JIT_FPR3 -#define JIT_FPR4 -#define JIT_FPR5 -#define jit_patch(jump_pc) -#define jit_notr_c(d, rs) -#define jit_notr_i(d, rs) -#define jit_notr_l(d, rs) -#define jit_notr_s(d, rs) -#define jit_notr_uc(d, rs) -#define jit_notr_ui(d, rs) -#define jit_notr_ul(d, rs) -#define jit_notr_us(d, rs) -#define jit_rsbr_d(d, s1, s2) -#define jit_rsbr_i(d, s1, s2) -#define jit_rsbr_l(d, s1, s2) -#define jit_subi_i(d, rs, is) -#define jit_subi_l(d, rs, is) -@end example - -@item Mandatory unless target arithmetic is always done in the same precision: -@example -#define jit_abs_f(rd,rs) -#define jit_addr_f(rd,s1,s2) -#define jit_beqr_f(label, s1, s2) -#define jit_bger_f(label, s1, s2) -#define jit_bgtr_f(label, s1, s2) -#define jit_bler_f(label, s1, s2) -#define jit_bltgtr_f(label, s1, s2) -#define jit_bltr_f(label, s1, s2) -#define jit_bner_f(label, s1, s2) -#define jit_bordr_f(label, s1, s2) -#define jit_buneqr_f(label, s1, s2) -#define jit_bunger_f(label, s1, s2) -#define jit_bungtr_f(label, s1, s2) -#define jit_bunler_f(label, s1, s2) -#define jit_bunltr_f(label, s1, s2) -#define jit_bunordr_f(label, s1, s2) -#define jit_ceilr_f_i(rd, rs) -#define jit_divr_f(rd,s1,s2) -#define jit_eqr_f(d, s1, s2) -#define jit_extr_d_f(rs, rd) -#define jit_extr_f_d(rs, rd) -#define jit_extr_i_f(rd, rs) -#define jit_floorr_f_i(rd, rs) -#define jit_ger_f(d, s1, s2) -#define jit_gtr_f(d, s1, s2) -#define jit_ler_f(d, s1, s2) -#define jit_ltgtr_f(d, s1, s2) -#define jit_ltr_f(d, s1, s2) -#define jit_movr_f(rd,rs) -#define jit_mulr_f(rd,s1,s2) -#define jit_negr_f(rd,rs) -#define jit_ner_f(d, s1, s2) -#define jit_ordr_f(d, s1, s2) -#define jit_roundr_f_i(rd, rs) -#define jit_rsbr_f(d, s1, s2) -#define jit_sqrt_f(rd,rs) -#define jit_subr_f(rd,s1,s2) -#define jit_truncr_f_i(rd, rs) -#define jit_uneqr_f(d, s1, s2) -#define jit_unger_f(d, s1, s2) -#define jit_ungtr_f(d, s1, s2) -#define jit_unler_f(d, s1, s2) -#define jit_unltr_f(d, s1, s2) -#define jit_unordr_f(d, s1, s2) -@end example - -@item Mandatory if sizeof(long) != sizeof(int)---don't define them on other systems: -@example -#define jit_addi_l(d, rs, is) -#define jit_addr_l(d, s1, s2) -#define jit_andi_l(d, rs, is) -#define jit_andr_l(d, s1, s2) -#define jit_beqi_l(label, rs, is) -#define jit_beqr_l(label, s1, s2) -#define jit_bgei_l(label, rs, is) -#define jit_bgei_ul(label, rs, is) -#define jit_bger_l(label, s1, s2) -#define jit_bger_ul(label, s1, s2) -#define jit_bgti_l(label, rs, is) -#define jit_bgti_ul(label, rs, is) -#define jit_bgtr_l(label, s1, s2) -#define jit_bgtr_ul(label, s1, s2) -#define jit_blei_l(label, rs, is) -#define jit_blei_ul(label, rs, is) -#define jit_bler_l(label, s1, s2) -#define jit_bler_ul(label, s1, s2) -#define jit_blti_l(label, rs, is) -#define jit_blti_ul(label, rs, is) -#define jit_bltr_l(label, s1, s2) -#define jit_bltr_ul(label, s1, s2) -#define jit_bosubi_l(label, rs, is) -#define jit_bosubi_ul(label, rs, is) -#define jit_bosubr_l(label, s1, s2) -#define jit_bosubr_ul(label, s1, s2) -#define jit_boaddi_l(label, rs, is) -#define jit_boaddi_ul(label, rs, is) -#define jit_boaddr_l(label, s1, s2) -#define jit_boaddr_ul(label, s1, s2) -#define jit_bmci_l(label, rs, is) -#define jit_bmcr_l(label, s1, s2) -#define jit_bmsi_l(label, rs, is) -#define jit_bmsr_l(label, s1, s2) -#define jit_bnei_l(label, rs, is) -#define jit_bner_l(label, s1, s2) -#define jit_divi_l(d, rs, is) -#define jit_divi_ul(d, rs, is) -#define jit_divr_l(d, s1, s2) -#define jit_divr_ul(d, s1, s2) -#define jit_eqi_l(d, rs, is) -#define jit_eqr_l(d, s1, s2) -#define jit_extr_c_l(d, rs) -#define jit_extr_c_ul(d, rs) -#define jit_extr_s_l(d, rs) -#define jit_extr_s_ul(d, rs) -#define jit_extr_i_l(d, rs) -#define jit_extr_i_ul(d, rs) -#define jit_gei_l(d, rs, is) -#define jit_gei_ul(d, rs, is) -#define jit_ger_l(d, s1, s2) -#define jit_ger_ul(d, s1, s2) -#define jit_gti_l(d, rs, is) -#define jit_gti_ul(d, rs, is) -#define jit_gtr_l(d, s1, s2) -#define jit_gtr_ul(d, s1, s2) -#define jit_hmuli_l(d, rs, is) -#define jit_hmuli_ul(d, rs, is) -#define jit_hmulr_l(d, s1, s2) -#define jit_hmulr_ul(d, s1, s2) -#define jit_ldi_l(d, is) -#define jit_ldi_ui(d, is) -#define jit_ldr_l(d, rs) -#define jit_ldr_ui(d, rs) -#define jit_ldxi_l(d, rs, is) -#define jit_ldxi_ui(d, rs, is) -#define jit_ldxr_l(d, s1, s2) -#define jit_ldxr_ui(d, s1, s2) -#define jit_lei_l(d, rs, is) -#define jit_lei_ul(d, rs, is) -#define jit_ler_l(d, s1, s2) -#define jit_ler_ul(d, s1, s2) -#define jit_lshi_l(d, rs, is) -#define jit_lshr_l(d, s1, s2) -#define jit_lti_l(d, rs, is) -#define jit_lti_ul(d, rs, is) -#define jit_ltr_l(d, s1, s2) -#define jit_ltr_ul(d, s1, s2) -#define jit_modi_l(d, rs, is) -#define jit_modi_ul(d, rs, is) -#define jit_modr_l(d, s1, s2) -#define jit_modr_ul(d, s1, s2) -#define jit_movi_l(d, rs) -#define jit_movr_l(d, rs) -#define jit_muli_l(d, rs, is) -#define jit_muli_ul(d, rs, is) -#define jit_mulr_l(d, s1, s2) -#define jit_mulr_ul(d, s1, s2) -#define jit_nei_l(d, rs, is) -#define jit_ner_l(d, s1, s2) -#define jit_ori_l(d, rs, is) -#define jit_orr_l(d, s1, s2) -#define jit_pusharg_l(rs) -#define jit_retval_l(rd) -#define jit_rshi_l(d, rs, is) -#define jit_rshi_ul(d, rs, is) -#define jit_rshr_l(d, s1, s2) -#define jit_rshr_ul(d, s1, s2) -#define jit_sti_l(d, is) -#define jit_str_l(d, rs) -#define jit_stxi_l(d, rs, is) -#define jit_stxr_l(d, s1, s2) -#define jit_subr_l(d, s1, s2) -#define jit_xori_l(d, rs, is) -#define jit_xorr_l(d, s1, s2) -@end example -@end table - -@node Standard functions -@chapter More complex tasks in the platform-independent layer - -There is actually a single function that you @strong{must} define -in the @file{funcs-@var{suffix}.h} file, that is, @code{jit_flush_code}. - -As explained in @usingref{GNU lightning macros, Generating code at -run-time}, its purpose is to flush part of the processor's -instruction cache (usually the part of memory that contains the -generated code), avoiding the processor executing bogus data -that it happens to find in the cache. The @code{jit_flush_code} -function takes the first and the last address to flush. - -On many processors (for example, the x86 and the all the processors -in the 68k family up to the 68030), it is not even necessary to flush -the cache. In this case, the contents of the file will simply be - -@example -#ifndef __lightning_funcs_h -#define __lightning_funcs_h - -#define jit_flush_code(dest, end) - -#endif @rem{/* __lightning_core_h */} -@end example - -On other processors, flushing the cache is necessary for -proper behavior of the program; in this case, the file will contain -a proper definition of the function. However, we must make yet -another distinction. - -On some processors, flushing the cache is obtained through a call -to the operating system or to the C run-time library. In this case, -the definition of @code{jit_flush_code} will be very simple: two -examples are the Alpha and the 68040. For the Alpha the code will -be: -@example -#define jit_flush_code(dest, end) \ - __asm__ __volatile__("call_pal 0x86"); -@end example - -@noindent -and, for the Motorola -@example -#define jit_flush_code(start, end) \ - __clear_cache((start), (end)) -@end example - -As you can see, the Alpha does not even need to pass the start and -end address to the function. It is good practice to protect usage of -the @acronym{GNU CC}-specific @code{__asm__} directive by relying -on the preprocessor. For example: - -@example -#if !defined(__GNUC__) && !defined(__GNUG__) -#error Go get GNU C, I do not know how to flush the cache -#error with this compiler. -#else -#define jit_flush_code(dest, end) \ - __asm__ __volatile__("call_pal 0x86"); -#endif -@end example - -@lightning{}'s configuration process tries to compile a dummy file that -includes @code{lightning.h}, and gives a warning if there are problem -with the compiler that is installed on the system. - -In more complex cases, you'll need to write a full-fledged function. -Don't forget to make it @code{static}, otherwise you'll have problems -linking programs that include @code{lightning.h} multiple times. An -example, taken from the @file{funcs-ppc.h} file, is: - -@example -#ifndef __lightning_funcs_h -#define __lightning_funcs_h - -#if !defined(__GNUC__) && !defined(__GNUG__) -#error Go get GNU C, I do not know how to flush the cache -#error with this compiler. -#else -static void -jit_flush_code(start, end) - void *start; - void *end; -@{ - register char *dest = start; - - for (; dest <= end; dest += SIZEOF_CHAR_P) - __asm__ __volatile__ - ("dcbst 0,%0; sync; icbi 0,%0; isync"::"r"(dest)); -@} -#endif - -#endif /* __lightning_funcs_h */ -@end example - -The @file{funcs-@var{suffix}.h} file is also the right place to put -helper functions that do complex tasks for the -@file{core-@var{suffix}.h} file. For example, the PowerPC assembler -defines @code{jit_prolog} as a function and puts it in that file (for more -information, @pxref{Implementing the ABI}). Take special care when -defining such a function, as explained in @usingref{Reentrancy, -Reentrant usage of @lightning{}}. - - -@node Floating-point macros -@chapter Implementing macros for floating point - diff --git a/doc/printf.c b/doc/printf.c new file mode 100644 index 000000000..52bd2aa1f --- /dev/null +++ b/doc/printf.c @@ -0,0 +1,38 @@ +#include +#include + +static jit_state_t *_jit; + +typedef void (*pvfi)(int); /* Pointer to Void Function of Int */ + +int main(int argc, char *argv[]) +{ + pvfi myFunction; /* ptr to generated code */ + jit_node_t *start, *end; /* a couple of labels */ + jit_node_t *in; /* to get the argument */ + + init_jit(argv[0]); + _jit = jit_new_state(); + + start = jit_note(__FILE__, __LINE__); + jit_prolog(); + in = jit_arg(); + jit_getarg(JIT_R1, in); + jit_pushargi((jit_word_t)"generated %d bytes\n"); + jit_ellipsis(); + jit_pushargr(JIT_R1); + jit_finishi(printf); + jit_ret(); + jit_epilog(); + end = jit_note(__FILE__, __LINE__); + + myFunction = jit_emit(); + + /* call the generated code, passing its size as argument */ + myFunction((char*)jit_address(end) - (char*)jit_address(start)); + + jit_disassemble(); + + finish_jit(); + return 0; +} diff --git a/doc/rfib.c b/doc/rfib.c new file mode 100644 index 000000000..1ce02d5a7 --- /dev/null +++ b/doc/rfib.c @@ -0,0 +1,49 @@ +#include +#include + +static jit_state_t *_jit; + +typedef int (*pifi)(int); /* Pointer to Int Function of Int */ + +int main(int argc, char *argv[]) +{ + pifi fib; + jit_node_t *label; + jit_node_t *call; + jit_node_t *in; /* offset of the argument */ + jit_node_t *ref; /* to patch the forward reference */ + + init_jit(argv[0]); + _jit = jit_new_state(); + + label = jit_label(); + jit_prolog (); + in = jit_arg (); + jit_getarg (JIT_V0, in); /* V0 = n */ + ref = jit_blti (JIT_V0, 2); + jit_subi (JIT_V1, JIT_V0, 1); /* V1 = n-1 */ + jit_subi (JIT_V2, JIT_V0, 2); /* V2 = n-2 */ + jit_prepare(); + jit_pushargr(JIT_V1); + call = jit_finishi(NULL); + jit_patch_at(call, label); + jit_retval(JIT_V1); /* V1 = fib(n-1) */ + jit_prepare(); + jit_pushargr(JIT_V2); + call = jit_finishi(NULL); + jit_patch_at(call, label); + jit_retval(JIT_V2); /* V2 = fib(n-2) */ + jit_addi(JIT_V1, JIT_V1, 1); + jit_addr(JIT_R0, JIT_V1, JIT_V2); /* R0 = V1 + V2 + 1 */ + jit_retr(JIT_R0); + + jit_patch(ref); /* patch jump */ + jit_movi(JIT_R0, 1); /* R0 = 1 */ + jit_retr(JIT_R0); + + /* call the generated code, passing 32 as an argument */ + fib = jit_emit(); + printf("fib(%d) = %d\n", 32, fib(32)); + finish_jit(); + return 0; +} diff --git a/doc/rpn.c b/doc/rpn.c new file mode 100644 index 000000000..f02cef35f --- /dev/null +++ b/doc/rpn.c @@ -0,0 +1,94 @@ +#include +#include + +typedef int (*pifi)(int); /* Pointer to Int Function of Int */ + +static jit_state_t *_jit; + +void stack_push(int reg, int *sp) +{ + jit_stxi_i (*sp, JIT_FP, reg); + *sp += sizeof (int); +} + +void stack_pop(int reg, int *sp) +{ + *sp -= sizeof (int); + jit_ldxi_i (reg, JIT_FP, *sp); +} + +jit_node_t *compile_rpn(char *expr) +{ + jit_node_t *in, *fn; + int stack_base, stack_ptr; + + fn = jit_note(NULL, 0); + jit_prolog(); + in = jit_arg(); + stack_ptr = stack_base = jit_allocai (32 * sizeof (int)); + + jit_getarg_i(JIT_R2, in); + + while (*expr) { + char buf[32]; + int n; + if (sscanf(expr, "%[0-9]%n", buf, &n)) { + expr += n - 1; + stack_push(JIT_R0, &stack_ptr); + jit_movi(JIT_R0, atoi(buf)); + } else if (*expr == 'x') { + stack_push(JIT_R0, &stack_ptr); + jit_movr(JIT_R0, JIT_R2); + } else if (*expr == '+') { + stack_pop(JIT_R1, &stack_ptr); + jit_addr(JIT_R0, JIT_R1, JIT_R0); + } else if (*expr == '-') { + stack_pop(JIT_R1, &stack_ptr); + jit_subr(JIT_R0, JIT_R1, JIT_R0); + } else if (*expr == '*') { + stack_pop(JIT_R1, &stack_ptr); + jit_mulr(JIT_R0, JIT_R1, JIT_R0); + } else if (*expr == '/') { + stack_pop(JIT_R1, &stack_ptr); + jit_divr(JIT_R0, JIT_R1, JIT_R0); + } else { + fprintf(stderr, "cannot compile: %s\n", expr); + abort(); + } + ++expr; + } + jit_retr(JIT_R0); + jit_epilog(); + return fn; +} + +int main(int argc, char *argv[]) +{ + jit_node_t *nc, *nf; + pifi c2f, f2c; + int i; + + init_jit(argv[0]); + _jit = jit_new_state(); + + nc = compile_rpn("32x9*5/+"); + nf = compile_rpn("x32-5*9/"); + (void)jit_emit(); + c2f = (pifi)jit_address(nc); + f2c = (pifi)jit_address(nf); + + printf("\nC:"); + for (i = 0; i <= 100; i += 10) printf("%3d ", i); + printf("\nF:"); + for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i)); + printf("\n"); + + printf("\nF:"); + for (i = 32; i <= 212; i += 18) printf("%3d ", i); + printf("\nC:"); + for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i)); + printf("\n"); + + finish_jit(); + return 0; +} diff --git a/doc/toc.texi b/doc/toc.texi deleted file mode 100644 index 193d4f26f..000000000 --- a/doc/toc.texi +++ /dev/null @@ -1,76 +0,0 @@ -@c These macros are used because these items could go both in the -@c short listing (for partial books) and in the detailed listing -@c (for full books - i.e. using & porting) - -@macro usingmenu{} -@ifset USING -* Installation:: Configuring and installing GNU lightning -* The instruction set:: The RISC instruction set used i GNU lightning -* GNU lightning macros:: GNU lightning's macros -* Reentrancy:: Re-entrant usage of GNU lightning -* Bundling GNU lightning:: Using GNU lightning in your programs -@end ifset -@end macro - -@macro portingmenu{} -@ifset PORTING -* Structure of a port:: An overview of the porting process -* Adjusting configure:: Automatically recognizing the new platform -* Run-time assemblers:: An internal layer to simplify porting -* Standard macros:: The platform-independent layer used by clients. -* Standard functions:: Doing more complex tasks. -* Floating-point macros:: Implementing macros for floating point. -@end ifset -@end macro - -@macro standardmacrosmenu{} -@c This comment is needed because of makeinfo's vagaries... -* Forward references:: Implementing forward references -* Common features:: Common features supported by @file{core-common.h} -* Delay slots:: Supporting scheduling of delay slots -* Immediate values:: Supporting arbitrarily sized immediate values -* Implementing the ABI:: Function prologs and epilogs, and argument passing -* Macro list:: Macros composing the platform-independent layer -@end macro - -@menu -@ifclear BOTH -* Overview:: What GNU lightning is -@usingmenu{} -@portingmenu{} -* Future:: Tasks for GNU lightning's subsequent releases -* Acknowledgements:: Acknowledgements for GNU lightning - -@ifset PORTING -@detailmenu ---- The detailed node listing --- - -Standard macros: -@standardmacrosmenu{} -@end detailmenu -@end ifset -@end ifclear - -@ifset BOTH -* Overview:: What GNU lightning is. -* Using GNU lightning:: Using GNU lightning in your programs -* Porting GNU lightning:: Retargeting GNU lightning to a new system -* Future:: Tasks for GNU lightning's subsequent releases -* Acknowledgements:: Acknowledgements for GNU lightning - -@detailmenu ---- The detailed node listing --- - -Using @lightning{}: -@usingmenu{} - -Porting @lightning{}: -@portingmenu{} - -Standard macros: -@standardmacrosmenu{} -@end detailmenu - -@end ifset - -@end menu diff --git a/doc/u-lightning.texi b/doc/u-lightning.texi deleted file mode 100644 index 0c2481b3b..000000000 --- a/doc/u-lightning.texi +++ /dev/null @@ -1,100 +0,0 @@ -\input texinfo.tex @c -*- texinfo -*- -@c %**start of header (This is for running Texinfo on a region.) - -@setfilename lightning.info - -@set TITLE Porting @sc{gnu} @i{lightning} -@set TOPIC Porting -@clear BOTH -@set USING -@clear PORTING - -@settitle @value{TITLE} - -@c --------------------------------------------------------------------- -@c Common macros -@c --------------------------------------------------------------------- - -@macro bulletize{a} -@item -\a\ -@end macro - -@macro rem{a} -@r{@i{\a\}} -@end macro - -@macro gnu{} -@sc{gnu} -@end macro - -@macro lightning{} -@gnu{} @i{lightning} -@end macro - -@c --------------------------------------------------------------------- -@c Macros for Texinfo 3.1/4.0 compatibility -@c --------------------------------------------------------------------- - -@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1 -@c compatibility -@macro hlink{url, link} -\link\ (\url\) -@end macro - -@c ifhtml can only be true in Texinfo 4.0, which has uref -@ifhtml -@unmacro hlink - -@macro hlink{url, link} -@uref{\url\, \link\} -@end macro - -@macro email{mail} -@uref{mailto:\mail\, , \mail\} -@end macro - -@macro url{url} -@uref{\url\} -@end macro -@end ifhtml - -@c --------------------------------------------------------------------- -@c References to the other half of the manual -@c --------------------------------------------------------------------- - -@ifset USING -@macro usingref{node, name} -@ref{\node\, , \name\} -@end macro -@end ifset - -@ifclear USING -@macro usingref{node, name} -@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}} -@end macro -@end ifclear - -@ifset PORTING -@macro portingref{node, name} -@ref{\node\, , \name\} -@end macro -@end ifset - -@ifclear PORTING -@macro portingref{node, name} -@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}} -@end macro -@end ifclear - -@c --------------------------------------------------------------------- -@c End of macro section -@c --------------------------------------------------------------------- - -@include version.texi -@include body.texi - -@c %**end of header (This is for running Texinfo on a region.) - -@c *********************************************************************** - diff --git a/doc/using.texi b/doc/using.texi deleted file mode 100644 index 332383eea..000000000 --- a/doc/using.texi +++ /dev/null @@ -1,1273 +0,0 @@ -@node Installation -@chapter Configuring and installing @lightning{} - -The first thing to do to use @lightning{} is to configure the -program, picking the set of macros to be used on the host -architecture; this configuration is automatically performed by -the @file{configure} shell script; to run it, merely type: -@example - ./configure -@end example - -@lightning{} supports cross-compiling in that you can choose a -different set of macros from the one needed on the computer that -you are compiling @lightning{} on. For example, -@example - ./configure --host=sparc-sun-linux -@end example - -@noindent will select the SPARC set of runtime assemblers. You can use -configure's ability to make reasonable assumptions about the vendor -and operating system and simply type -@example - ./configure --host=i386 - ./configure --host=ppc - ./configure --host=sparc -@end example - -Another option that @file{configure} accepts is -@code{--enable-assertions}, which enables several consistency checks in -the run-time assemblers. These are not usually needed, so you can -decide to simply forget about it; also remember that these consistency -checks tend to slow down your code generator. - -After you've configured @lightning{}, you don't have to compile it -because it is nothing more than a set of include files. If you want to -compile the examples, run @file{make} as usual. The next important -step is: -@example - make install -@end example - -This ends the process of installing @lightning{}. - -@node The instruction set -@chapter @lightning{}'s instruction set - -@lightning{}'s instruction set was designed by deriving instructions -that closely match those of most existing RISC architectures, or -that can be easily syntesized if absent. Each instruction is composed -of: -@itemize @bullet -@item -an operation, like @code{sub} or @code{mul} - -@item -sometimes, an register/immediate flag (@code{r} or @code{i}) - -@item -a type identifier or, occasionally, two -@end itemize - -The second and third field are separated by an underscore; thus, -examples of legal mnemonics are @code{addr_i} (integer add, with three -register operands) and @code{muli_l} (long integer multiply, with two -register operands and an immediate operand). Each instruction takes -two or three operands; in most cases, one of them can be an immediate -value instead of a register. - -@lightning{} supports a full range of integer types: operands can be 1, -2 or 4 bytes long (64-bit architectures might support 8 bytes long -operands), either signed or unsigned. The types are listed in the -following table together with the C types they represent: - -@example - c @r{signed char} - uc @r{unsigned char} - s @r{short} - us @r{unsigned short} - i @r{int} - ui @r{unsigned int} - l @r{long} - ul @r{unsigned long} - f @r{float} - d @r{double} - p @r{void *} -@end example - -Some of these types may not be distinct: for example, (e.g., @code{l} -is equivalent to @code{i} on 32-bit machines, and @code{p} is -substantially equivalent to @code{ul}). - -There are at least seven integer registers, of which six are -general-purpose, while the last is used to contain the frame pointer -(@code{FP}). The frame pointer can be used to allocate and access local -variables on the stack, using the @code{allocai} instruction. - -Of the general-purpose registers, at least three are guaranteed to be -preserved across function calls (@code{V0}, @code{V1} and -@code{V2}) and at least three are not (@code{R0}, @code{R1} and -@code{R2}). Six registers are not very much, but this -restriction was forced by the need to target CISC architectures -which, like the x86, are poor of registers; anyway, backends can -specify the actual number of available registers with the macros -@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM} -(for callee-save registers). - -In addition, there is a special @code{RET} register which contains -the return value of the current function (@emph{not} the return value -of callees---use the @code{retval} instruction for this). You should -always remember, however, that writing this register could overwrite -either a general-purpose register or an incoming parameter, depending -on the architecture. - -There are at least six floating-point registers, named @code{FPR0} to -@code{FPR5}. These are caller-save and are separate from the integer -registers on all the supported architectures; on Intel architectures, -the register stack is mapped to a flat register file. As for the -integer registers, the macro @code{JIT_FPR_NUM} yields the number of -floating-point registers, and the special @code{FPRET} register contains -the return value of the current function. - -The complete instruction set follows; as you can see, most non-memory -operations only take integers, long integers (either signed or -unsigned) and pointers as operands; this was done in order to reduce -the instruction set, and because most architectures only provide word -and long word operations on registers. There are instructions that -allow operands to be extended to fit a larger data type, both in a -signed and in an unsigned way. - -@table @b -@item Binary ALU operations -These accept three operands; the last one can be an immediate -value for integer operands, or a register for all operand types. -@code{addx} operations must directly follow @code{addc}, and -@code{subx} must follow @code{subc}; otherwise, results are undefined. -@example -addr i ui l ul p f d O1 = O2 + O3 -addi i ui l ul p O1 = O2 + O3 -addxr i ui l ul O1 = O2 + (O3 + carry) -addxi i ui l ul O1 = O2 + (O3 + carry) -addcr i ui l ul O1 = O2 + O3, set carry -addci i ui l ul O1 = O2 + O3, set carry -subr i ui l ul p f d O1 = O2 - O3 -subi i ui l ul p O1 = O2 - O3 -subxr i ui l ul O1 = O2 - (O3 + carry) -subxi i ui l ul O1 = O2 - (O3 + carry) -subcr i ui l ul O1 = O2 - O3, set carry -subci i ui l ul O1 = O2 - O3, set carry -rsbr i ui l ul p f d O1 = O3 - O2 -rsbi i ui l ul p O1 = O3 - O2 -mulr i ui l ul f d O1 = O2 * O3 -muli i ui l ul O1 = O2 * O3 -hmulr i ui l ul O1 = @r{high bits of} O2 * O3 -hmuli i ui l ul O1 = @r{high bits of} O2 * O3 -divr i ui l ul f d O1 = O2 / O3 -divi i ui l ul O1 = O2 / O3 -modr i ui l ul O1 = O2 % O3 -modi i ui l ul O1 = O2 % O3 -andr i ui l ul O1 = O2 & O3 -andi i ui l ul O1 = O2 & O3 -orr i ui l ul O1 = O2 | O3 -ori i ui l ul O1 = O2 | O3 -xorr i ui l ul O1 = O2 ^ O3 -xori i ui l ul O1 = O2 ^ O3 -lshr i ui l ul O1 = O2 << O3 -lshi i ui l ul O1 = O2 << O3 -rshr i ui l ul O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.} -rshi i ui l ul O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.} -@end example - -@item Unary ALU operations -These accept two operands, both of which must be registers. -@example -negr i l f d O1 = -O2 -notr i ui l ul O1 = ~O2 -@end example - -@item Compare instructions -These accept three operands; again, the last can be an immediate -value for integer data types. The last two operands are compared, -and the first operand is set to either 0 or 1, according to -whether the given condition was met or not. - -The conditions given below are for the standard behavior of C, -where the ``unordered'' comparison result is mapped to false. - -@example -ltr i ui l ul p f d O1 = (O2 < O3) -lti i ui l ul p O1 = (O2 < O3) -ler i ui l ul p f d O1 = (O2 <= O3) -lei i ui l ul p O1 = (O2 <= O3) -gtr i ui l ul p f d O1 = (O2 > O3) -gti i ui l ul p O1 = (O2 > O3) -ger i ui l ul p f d O1 = (O2 >= O3) -gei i ui l ul p O1 = (O2 >= O3) -eqr i ui l ul p f d O1 = (O2 == O3) -eqi i ui l ul p O1 = (O2 == O3) -ner i ui l ul p f d O1 = (O2 != O3) -nei i ui l ul p O1 = (O2 != O3) -unltr f d O1 = !(O2 >= O3) -unler f d O1 = !(O2 > O3) -ungtr f d O1 = !(O2 <= O3) -unger f d O1 = !(O2 < O3) -uneqr f d O1 = !(O2 < O3) && !(O2 > O3) -ltgtr f d O1 = !(O2 >= O3) || !(O2 <= O3) -ordr f d O1 = (O2 == O2) && (O3 == O3) -unordr f d O1 = (O2 != O2) || (O3 != O3) -@end example - -@item Transfer operations -These accept two operands; for @code{ext} both of them must be -registers, while @code{mov} accepts an immediate value as the second -operand. - -Unlike @code{movr} and @code{movi}, the other instructions are applied -between operands of different data types, and they need @strong{two} -data type specifications. You can use @code{extr} to convert between -integer data types, in which case the first must be smaller in size -than the second; for example @code{extr_c_ui} is correct while -@code{extr_ul_us} is not. You can also use @code{extr} to convert -an integer to a floating point value: the only available possibilities -are @code{extr_i_f} and @code{extr_i_d}. The other instructions -convert a floating point value to an integer, so the possible -suffixes are @code{_f_i} and @code{_d_i}. - -@example -movr i ui l ul p f d O1 = O2 -movi i ui l ul p f d O1 = O2 -extr c uc s us i ui l ul f d O1 = O2 -roundr i f d O1 = round(O2) -truncr i f d O1 = trunc(O2) -floorr i f d O1 = floor(O2) -ceilr i f d O1 = ceil(O2) -@end example - -Note that the order of the arguments is @emph{destination first, -source second} as for all other @lightning{} instructions, but -the order of the types is always reversed with respect to that -of the arguments: @emph{shorter}---source---@emph{first, -longer}---destination---@emph{second}. This happens for historical -reasons. - -@item Network extensions -These accept two operands, both of which must be registers; these -two instructions actually perform the same task, yet they are -assigned to two mnemonics for the sake of convenience and -completeness. As usual, the first operand is the destination and -the second is the source. -@example -hton us ui @r{Host-to-network (big endian) order} -ntoh us ui @r{Network-to-host order } -@end example - -@item Load operations -@code{ld} accepts two operands while @code{ldx} accepts three; -in both cases, the last can be either a register or an immediate -value. Values are extended (with or without sign, according to -the data type specification) to fit a whole register. -@example -ldr c uc s us i ui l ul p f d O1 = *O2 -ldi c uc s us i ui l ul p f d O1 = *O2 -ldxr c uc s us i ui l ul p f d O1 = *(O2+O3) -ldxi c uc s us i ui l ul p f d O1 = *(O2+O3) -@end example - -@item Store operations -@code{st} accepts two operands while @code{stx} accepts three; in -both cases, the first can be either a register or an immediate -value. Values are sign-extended to fit a whole register. -@example -str c uc s us i ui l ul p f d *O1 = O2 -sti c uc s us i ui l ul p f d *O1 = O2 -stxr c uc s us i ui l ul p f d *(O1+O2) = O3 -stxi c uc s us i ui l ul p f d *(O1+O2) = O3 -@end example - -@item Argument management -These are: -@example -prepare i f d -pusharg c uc s us i ui l ul p f d -getarg c uc s us i ui l ul p f d -arg c uc s us i ui l ul p f d -retval c uc s us i ui l ul p -@end example - -Of these, the first two are used by the caller, while the last two -are used by the callee. A code snippet that wants to call another -procedure and has to pass registers must, in order: use the -@code{prepare} instruction, giving the number of arguments to -be passed to the procedure (once for each data type); use -@code{pusharg} to push the arguments @strong{in reverse order}; -and use @code{calli} or @code{finish} (explained below) to -perform the actual call. - -@code{arg} and @code{getarg} are used by the callee. -@code{arg} is different from other instruction in that it does not -actually generate any code: instead, it is a function which returns -a value to be passed to @code{getarg}.@footnote{``Return a -value'' means that @lightning{} macros that compile these -instructions return a value when expanded.} You should call -@code{arg} as soon as possible, before any function call or, more -easily, right after the @code{prolog} or @code{leaf} instructions -(which are treated later). - -@code{getarg} accepts a register argument and a value returned by -@code{arg}, and will move that argument to the register, extending -it (with or without sign, according to the data type specification) -to fit a whole register. These instructions are more intimately -related to the usage of the @lightning{} instruction set in code -that generates other code, so they will be treated more -specifically in @ref{GNU lightning macros, , Generating code at -run-time}. - -Finally, the @code{retval} instruction fetches the return value of a -called function in a register. The @code{retval} instruction takes a -register argument and copies the return value of the previously called -function in that register. A function should put its own return value -in the @code{RET} register before returning. @xref{Fibonacci, the -Fibonacci numbers}, for an example. - -You should observe a few rules when using these macros. First of -all, it is not allowed to call functions with more than six arguments; -this was done to simplify and speed up the implementation on -architectures that use registers for parameter passing. - -You should not nest calls to @code{prepare}, nor call zero-argument -functions (which do not need a call to @code{prepare}) inside a -@code{prepare/calli} or @code{prepare/finish} block. Doing this -might corrupt already pushed arguments. - -You @strong{cannot} pass parameters between subroutines using -the six general-purpose registers. This might work only when -targeting particular architectures. - -On the other hand, it is possible to assume that callee-saved registers -(@code{R0} through @code{R2}) are not clobbered by another dynamically -generated function which does not use them as operands in its code and -which does not return a value. - -@item Branch instructions -Like @code{arg}, these also return a value which, in this case, -is to be used to compile forward branches as explained in -@ref{Fibonacci, , Fibonacci numbers}. They accept a pointer to the -destination of the branch and two operands to be compared; of these, -the last can be either a register or an immediate. They are: -@example -bltr i ui l ul p f d @r{if }(O2 < O3)@r{ goto }O1 -blti i ui l ul p @r{if }(O2 < O3)@r{ goto }O1 -bler i ui l ul p f d @r{if }(O2 <= O3)@r{ goto }O1 -blei i ui l ul p @r{if }(O2 <= O3)@r{ goto }O1 -bgtr i ui l ul p f d @r{if }(O2 > O3)@r{ goto }O1 -bgti i ui l ul p @r{if }(O2 > O3)@r{ goto }O1 -bger i ui l ul p f d @r{if }(O2 >= O3)@r{ goto }O1 -bgei i ui l ul p @r{if }(O2 >= O3)@r{ goto }O1 -beqr i ui l ul p f d @r{if }(O2 == O3)@r{ goto }O1 -beqi i ui l ul p @r{if }(O2 == O3)@r{ goto }O1 -bner i ui l ul p f d @r{if }(O2 != O3)@r{ goto }O1 -bnei i ui l ul p @r{if }(O2 != O3)@r{ goto }O1 - -bunltr f d @r{if }!(O2 >= O3)@r{ goto }O1 -bunler f d @r{if }!(O2 > O3)@r{ goto }O1 -bungtr f d @r{if }!(O2 <= O3)@r{ goto }O1 -bunger f d @r{if }!(O2 < O3)@r{ goto }O1 -buneqr f d @r{if }!(O2 < O3) && !(O2 > O3)@r{ goto }O1 -bltgtr f d @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1 -bordr f d @r{if } (O2 == O2) && (O3 == O3)@r{ goto }O1 -bunordr f d @r{if }!(O2 != O2) || (O3 != O3)@r{ goto }O1 - -bmsr i ui l ul @r{if }O2 & O3@r{ goto }O1 -bmsi i ui l ul @r{if }O2 & O3@r{ goto }O1 -bmcr i ui l ul @r{if }!(O2 & O3)@r{ goto }O1 -bmci i ui l ul @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.} -boaddr i ui l ul O2 += O3@r{, goto }O1@r{ on overflow} -boaddi i ui l ul O2 += O3@r{, goto }O1@r{ on overflow} -bosubr i ui l ul O2 -= O3@r{, goto }O1@r{ on overflow} -bosubi i ui l ul O2 -= O3@r{, goto }O1@r{ on overflow} -@end example - -@item Jump and return operations -These accept one argument except @code{ret} which has none; the -difference between @code{finish} and @code{calli} is that the -latter does not clean the stack from pushed parameters (if any) -and the former must @strong{always} follow a @code{prepare} -instruction. -@example -calli (not specified) @r{function call to O1} -callr (not specified) @r{function call to a register} -finish (not specified) @r{function call to O1} -finishr (not specified) @r{function call to a register} -jmpi/jmpr (not specified) @r{unconditional jump to O1} -ret (not specified) @r{return from subroutine} -retval c uc s us i ui l ul p f d @r{move return value} - @r{to register} -@end example - -Like branch instruction, @code{jmpi} also returns a value which is to -be used to compile forward branches. @xref{Fibonacci, , Fibonacci -numbers}. - -@item Function prolog - -These macros are used to set up the function prolog, in particular to -declare the number of arguments accepted by a function, and to reserve -space on the stack to be used for variables. They accept a single -numeric argument. - -@example -prolog (not specified) @r{function prolog for O1 args} -leaf (not specified) @r{the same for leaf functions} -allocai (not specified) @r{reserve space on the stack} -@end example - -Results are undefined when using function calls in a leaf function. - -@code{allocai} receives the number of bytes to allocate and returns -the offset from the frame pointer register @code{FP} to the base of -the area. The area is aligned to an @code{int}; future versions of -@lightning{} may provide more fine-grained control on the alignment of -stack-allocated variables. -@end table - -As a small appetizer, here is a small function that adds 1 to the input -parameter (an @code{int}). I'm using an assembly-like syntax here which -is a bit different from the one used when writing real subroutines with -@lightning{}; the real syntax will be introduced in @xref{GNU lightning -macros, , Generating code at run-time}. - -@example -incr: - leaf 1 -in = arg_i @rem{! We have an integer argument} - getarg_i R0, in @rem{! Move it to R0} - addi_i RET, R0, 1 @rem{! Add 1\, put result in return value} - ret @rem{! And return the result} -@end example - -And here is another function which uses the @code{printf} function from -the standard C library to write a number in hexadecimal notation: - -@example -printhex: - prolog 1 -in = arg_i @rem{! Same as above} - getarg_i R0, in - prepare 2 @rem{! Begin call sequence for printf} - pusharg_i R0 @rem{! Push second argument} - pusharg_p "%x" @rem{! Push format string} - finish printf @rem{! Call printf} - ret @rem{! Return to caller} -@end example - -@node GNU lightning macros -@chapter Generating code at run-time - -To use @lightning{}, you should include the @file{lightning.h} file that -is put in your include directory by the @samp{make install} command. -That include files defines about four hundred public macros (plus -others that are private to @lightning{}), one for each opcode listed -above. - -Each of the instructions above translates to a macro. All you have to -do is prepend @code{jit_} (lowercase) to opcode names and @code{JIT_} -(uppercase) to register names. Of course, parameters are to be put -between parentheses, just like with every other @sc{cpp} macro. - -This small tutorial presents three examples: - -@iftex -@itemize @bullet -@item -The @code{incr} function found in @ref{The instruction set, , -@lightning{}'s instruction set}: - -@item -A simple function call to @code{printf} - -@item -An RPN calculator. - -@item -Fibonacci numbers -@end itemize -@end iftex -@ifnottex -@menu -* incr:: A function which increments a number by one -* printf:: A simple function call to printf -* RPN calculator:: A more complex example, an RPN calculator -* Fibonacci:: Calculating Fibonacci numbers -@end menu -@end ifnottex - -@node incr -@section A function which increments a number by one - -Let's see how to create and use the sample @code{incr} function created -in @ref{The instruction set, , @lightning{}'s instruction set}: - -@example -#include -#include "lightning.h" - -static jit_insn codeBuffer[1024]; - -typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} - -int main() -@{ - pifi incr = (pifi) (jit_set_ip(codeBuffer).iptr); - int in; - - jit_leaf(1); @rem{/* @t{ leaf 1 } */} - in = jit_arg_i(); @rem{/* @t{in = arg_i } */} - jit_getarg_i(JIT_R0, in); @rem{/* @t{ getarg_i R0 } */} - jit_addi_i(JIT_RET, JIT_R0, 1); @rem{/* @t{ addi_i RET\, R0\, 1} */} - jit_ret(); @rem{/* @t{ ret } */} - - jit_flush_code(codeBuffer, jit_get_ip().ptr); - - @rem{/* call the generated code\, passing 5 as an argument */} - printf("%d + 1 = %d\n", 5, incr(5)); - return 0; -@} -@end example - -Let's examine the code line by line (well, almost@dots{}): - -@table @t -@item #include "lightning.h" -You already know about this. It defines all of @lightning{}'s macros. - -@item static jit_insn codeBuffer[1024]; -You might wonder about what is @code{jit_insn}. It is just a type that -is defined by @lightning{}. Its exact definition depends on the -architecture; in general, defining an array of 1024 @code{jit_insn}s -allows one to write 100 to 400 @lightning{} instructions (depending on -the architecture and exact instructions). - -@item typedef int (*pifi)(int); -Just a handy typedef for a pointer to a function that takes an -@code{int} and returns another. - -@item pifi incr = (pifi) (jit_set_ip(codeBuffer).iptr); -This is the first @lightning{} macro we encounter that does not map to -an instruction. It is @code{jit_set_ip}, which takes a pointer to an -area of memory where compiled code will be put and returns the same -value, cast to a @code{union} type whose members are pointers to -functions returning different C types. This union is called -@code{jit_code} and is defined as follows: - -@example - typedef union jit_code @{ - char *ptr; - void (*vptr)(); - char (*cptr)(); - unsigned char (*ucptr)(); - short (*sptr)(); - unsigned short (*usptr)(); - int (*iptr)(); - unsigned int (*uiptr)(); - long (*lptr)(); - unsigned long (*ulptr)(); - void * (*pptr)(); - float (*fptr)(); - double (*dptr)(); - @} jit_code; -@end example - -Any of the members could have been used, since the result is soon casted -to type @code{pifi} but, for the sake of clarity, the program uses -@code{iptr}, a pointer to a function with no prototype and returning an -@code{int}. - -Analogous to @code{jit_set_ip} is @code{jit_get_ip}, which does not -modify the instruction pointer---it is nothing more than a cast of the -current @sc{ip} to @code{jit_code}. - -@item int in; -A footnote in @ref{The instruction set, , @lightning{}'s instruction -set}, under the description of @code{arg}, says that macros implementing -@code{arg} return a value---we'll be using this variable to store the -result of @code{arg}. - -@item jit_leaf(1); -Ok, so we start generating code for our beloved function@dots{} it will -accept one argument and won't call any other function. - -@item in = jit_arg_i(); -@itemx jit_getarg_i(JIT_R0, in); -We retrieve the first (and only) argument, an integer, and store it -into the general-purpose register @code{R0}. - -@item jit_addi_i(JIT_RET, JIT_R0, 1); -We add one to the content of the register and store the result in the -return value. - -@item jit_ret(); -This instruction generates a standard function epilog that returns -the contents of the @code{RET} register. - -@item jit_flush_code(codeBuffer, jit_get_ip().ptr); -This instruction is very important. It flushes the generated code -area out of the processor's instruction cache, avoiding the processor -executes bogus data that it happens to find there. The -@code{jit_flush_code} function accepts the first and the last address -to flush; we use @code{jit_get_ip} to find out the latter. - -@item printf("%d + 1 = %d", 5, incr(5)); -Calling our function is this simple---it is not distinguishable from -a normal C function call, the only difference being that @code{incr} -is a variable. -@end table - -@lightning{} abstracts two phases of dynamic code generation: selecting -instructions that map the standard representation, and emitting binary -code for these instructions. The client program has the responsibility -of describing the code to be generated using the standard @lightning{} -instruction set. - -Let's examine the code generated for @code{incr} on the SPARC and x86 -architectures (on the right is the code that an assembly-language -programmer would write): - -@table @b -@item SPARC -@example - save %sp, -96, %sp - mov %i0, %l0 retl - add %l0, 1, %i0 add %o0, 1, %o0 - ret - restore -@end example -In this case, @lightning{} introduces overhead to create a register -window (not knowing that the procedure is a leaf procedure) and to -move the argument to the general purpose register @code{R0} (which -maps to @code{%l0} on the SPARC). The former overhead could be -avoided by teaching @lightning{} about leaf procedures (@pxref{Future}); -the latter could instead be avoided by rewriting the getarg instruction -as @code{jit_getarg_i(JIT_RET, in)}, which was not done in this -example. - -@item x86 -@example - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 8(%ebp), %eax movl 4(%esp), %eax - addl $1, %eax incl %eax - popl %edi - popl %esi - popl %ebx - popl %ebp - ret ret -@end example -In this case, the main overhead is due to the function's prolog and -epilog, which is nine instructions long on the x86; a hand-written -routine would not save unused callee-preserved registers on the stack. -It is to be said, however, that this is not a problem in more -complicated uses, because more complex procedure would probably use -the @code{V0} through @code{V2} registers (@code{%ebx}, @code{%esi}, -@code{%edi}); in this case, a hand-written routine would have included -the prolog too. Also, a ten byte prolog would probably be a small -overhead in a more complex function. -@end table - -In such a simple case, the macros that make up the back-end compile -reasonably efficient code, with the notable exception of prolog/epilog -code. - -@node printf -@section A simple function call to @code{printf} - -Again, here is the code for the example: - -@example -#include -#include "lightning.h" - -static jit_insn codeBuffer[1024]; - -typedef void (*pvfi)(int); @rem{/* Pointer to Void Function of Int */} - -int main() -@{ - pvfi myFunction; @rem{/* ptr to generated code */} - char *start, *end; @rem{/* a couple of labels */} - int in; @rem{/* to get the argument */} - - myFunction = (pvfi) (jit_set_ip(codeBuffer).vptr); - start = jit_get_ip().ptr; - jit_prolog(1); - in = jit_arg_i(); - jit_movi_p(JIT_R0, "generated %d bytes\n"); - jit_getarg_i(JIT_R1, in); - jit_prepare(2); - jit_pusharg_i(JIT_R1); @rem{/* push in reverse order */} - jit_pusharg_p(JIT_R0); - jit_finish(printf); - jit_ret(); - end = jit_get_ip().ptr; - - @rem{/* call the generated code\, passing its size as argument */} - jit_flush_code(start, end); - myFunction(end - start); -@} -@end example - -The function shows how many bytes were generated. Most of the code -is not very interesting, as it resembles very closely the program -presented in @ref{incr, , A function which increments a number by one}. - -For this reason, we're going to concentrate on just a few statements. - -@table @t -@item start = jit_get_ip().ptr; -@itemx @r{@dots{}} -@itemx end = jit_get_ip().ptr; -These two instruction call the @code{jit_get_ip} macro which was -mentioned in @ref{incr, , A function which increments a number by one} -too. In this case we use the only field of @code{jit_code} that is -not a function pointer: @code{ptr}, which is a simple @code{char *}. - -@item jit_movi_p(JIT_R0, "generated %d bytes\n"); -Note the use of the @samp{p} type specifier, which automatically -casts the second parameter to an @code{unsigned long} to make the -code more clear and less cluttered by typecasts. - -@item jit_prepare(2); -@itemx jit_pusharg_i(JIT_R1); -@itemx jit_pusharg_p(JIT_R0); -@itemx jit_finish(printf); -Once the arguments to @code{printf} have been put in general-purpose -registers, we can start a prepare/pusharg/finish sequence that -moves the argument to either the stack or registers, then calls -@code{printf}, then cleans up the stack. Note how @lightning{} -abstracts the differences between different architectures and -ABI's -- the client program does not know how parameter passing -works on the host architecture. -@end table - -@node RPN calculator -@section A more complex example, an RPN calculator - -We create a small stack-based RPN calculator which applies a series -of operators to a given parameter and to other numeric operands. -Unlike previous examples, the code generator is fully parameterized -and is able to compile different formulas to different functions. -Here is the code for the expression compiler; a sample usage will -follow. - -Since @lightning{} does not provide push/pop instruction, this -example uses a stack-allocated area to store the data. Such an -area can be allocated using the macro @code{jit_allocai}, which -receives the number of bytes to allocate and returns the offset -from the frame pointer register @code{JIT_FP} to the base of the -area. The area is aligned to an @code{int}; future versions -of @lightning{} may provide more fine-grained control on the -alignment of stack-allocated variables. - -Usually, you will use the @code{ldxi} and @code{stxi} instruction -to access stack-allocated variables. However, it is possible to -use operations such as @code{add} to compute the address of the -variables, and pass the address around. - -@example -#include -#include "lightning.h" - -typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} - -void stack_push(int reg, int *sp) -@{ - jit_stxi_i (*sp, JIT_FP, reg); - *sp += sizeof (int); -@} - -void stack_pop(int reg, int *sp) -@{ - *sp -= sizeof (int); - jit_ldxi_i (reg, JIT_FP, *sp); -@} - -pifi compile_rpn(char *expr) -@{ - pifi fn; - int stack_base, stack_ptr; - int in; - - fn = (pifi) (jit_get_ip().iptr); - jit_leaf(1); - in = jit_arg_i(); - stack_ptr = stack_base = jit_allocai (32 * sizeof (int)); - - jit_getarg_i(JIT_R2, in); - - while (*expr) @{ - char buf[32]; - int n; - if (sscanf(expr, "%[0-9]%n", buf, &n)) @{ - expr += n - 1; - stack_push(JIT_R0, &stack_ptr); - jit_movi_i(JIT_R0, atoi(buf)); - @} else if (*expr == 'x') @{ - stack_push(JIT_R0, &stack_ptr); - jit_movi_i(JIT_R0, JIT_R2); - @} else if (*expr == '+') @{ - stack_pop(JIT_R1, &stack_ptr); - jit_addr_i(JIT_R0, JIT_R1, JIT_R0); - @} else if (*expr == '-') @{ - stack_pop(JIT_R1, &stack_ptr); - jit_subr_i(JIT_R0, JIT_R1, JIT_R0); - @} else if (*expr == '*') @{ - stack_pop(JIT_R1, &stack_ptr); - jit_mulr_i(JIT_R0, JIT_R1, JIT_R0); - @} else if (*expr == '/') @{ - stack_pop(JIT_R1, &stack_ptr); - jit_divr_i(JIT_R0, JIT_R1, JIT_R0); - @} else @{ - fprintf(stderr, "cannot compile: %s\n", expr); - abort(); - @} - ++expr; - @} - jit_movr_i(JIT_RET, JIT_R0); - jit_ret(); - return fn; -@} -@end example - -The principle on which the calculator is based is easy: the stack top -is held in R0, while the remaining items of the stack are held in the -memory area that we allocate with @code{allocai}. Compiling a numeric -operand or the argument @code{x} pushes the old stack top onto the -stack and moves the operand into R0; compiling an operator pops the -second operand off the stack into R1, and compiles the operation so -that the result goes into R0, thus becoming the new stack top. - -This example allocates a fixed area for 32 @code{int}s. This is not -a problem when the function is a leaf like in this case; in a full-blown -compiler you will want to analyze the input and determine the number -of needed stack slots---a very simple example of register allocation. -The area is then managed like a stack using @code{stack_push} and -@code{stack_pop}. - -Try to locate a call to @code{jit_set_ip} in the source code. You -will not find one; this means that the client has to manually set -the instruction pointer. This technique has one advantage and one -drawback. The advantage is that the client can simply set the -instruction pointer once and then generate code for multiple functions, -one after another, without caring about passing a different instruction -pointer each time; see @ref{Reentrancy, , Re-entrant usage of -@lightning{}} for the disadvantage. - -Source code for the client (which lies in the same source file) follows: - -@example -static jit_insn codeBuffer[1024]; - -int main() -@{ - pifi c2f, f2c; - int i; - - jit_set_ip(codeBuffer); - c2f = compile_rpn("32x9*5/+"); - f2c = compile_rpn("x32-5*9/"); - jit_flush_code(codeBuffer, jit_get_ip().ptr); - - printf("\nC:"); - for (i = 0; i <= 100; i += 10) printf("%3d ", i); - printf("\nF:"); - for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i)); - printf("\n"); - - printf("\nF:"); - for (i = 32; i <= 212; i += 10) printf("%3d ", i); - printf("\nC:"); - for (i = 32; i <= 212; i += 10) printf("%3d ", f2c(i)); - printf("\n"); - return 0; -@} -@end example - -The client displays a conversion table between Celsius and Fahrenheit -degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The -formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9}, -respectively. - -Providing the formula as an argument to @code{compile_rpn} effectively -parameterizes code generation, making it possible to use the same code -to compile different functions; this is what makes dynamic code -generation so powerful. - -The @file{rpn.c} file in the @lightning{} distribution includes a more -complete (and more complex) implementation of @code{compile_rpn}, -which does constant folding and is able to assemble instructions with -an immediate parameter. Still, it is based on the same principle and -also uses @code{allocai} to allocate space for the stack. - -@node Fibonacci -@section Fibonacci numbers - -The code in this section calculates a variant of the Fibonacci sequence. -While the traditional Fibonacci sequence is modeled by the recurrence -relation: -@display - f(0) = f(1) = 1 - f(n) = f(n-1) + f(n-2) -@end display - -@noindent -the functions in this section calculates the following sequence, which -is more interesting as a benchmark@footnote{That's because, as is -easily seen, the sequence represents the number of activations of the -@code{nfibs} procedure that are needed to compute its value through -recursion.}: -@display - nfibs(0) = nfibs(1) = 1 - nfibs(n) = nfibs(n-1) + nfibs(n-2) + 1 -@end display - -The purpose of this example is to introduce branches. There are two -kind of branches: backward branches and forward branches. We'll -present the calculation in a recursive and iterative form; the -former only uses forward branches, while the latter uses both. - -@example -#include -#include "lightning.h" - -static jit_insn codeBuffer[1024]; - -typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} - -int main() -@{ - pifi nfibs = (pifi) (jit_set_ip(codeBuffer).iptr); - int in; @rem{/* offset of the argument */} - jit_insn *ref; @rem{/* to patch the forward reference */} - - jit_prolog (1); - in = jit_arg_ui (); - jit_getarg_ui(JIT_V0, in); @rem{/* V0 = n */} - ref = jit_blti_ui (jit_forward(), JIT_V0, 2); - jit_subi_ui (JIT_V1, JIT_V0, 1); @rem{/* V1 = n-1 */} - jit_subi_ui (JIT_V2, JIT_V0, 2); @rem{/* V2 = n-2 */} - jit_prepare(1); - jit_pusharg_ui(JIT_V1); - jit_finish(nfibs); - jit_retval(JIT_V1); @rem{/* V1 = nfibs(n-1) */} - jit_prepare(1); - jit_pusharg_ui(JIT_V2); - jit_finish(nfibs); - jit_retval(JIT_V2); @rem{/* V2 = nfibs(n-2) */} - jit_addi_ui(JIT_V1, JIT_V1, 1); - jit_addr_ui(JIT_RET, JIT_V1, JIT_V2); @rem{/* RET = V1 + V2 + 1 */} - jit_ret(); - - jit_patch(ref); @rem{/* patch jump */} - jit_movi_i(JIT_RET, 1); @rem{/* RET = 1 */} - jit_ret(); - - @rem{/* call the generated code\, passing 32 as an argument */} - jit_flush_code(codeBuffer, jit_get_ip().ptr); - printf("nfibs(%d) = %d", 32, nfibs(32)); - return 0; -@} -@end example - -As said above, this is the first example of dynamically compiling -branches. Branch instructions have three operands: two contains the -values to be compared, while the first is a @dfn{label}; @lightning{} -label's are represented as @code{jit_insn *} values. Unlike other -instructions (apart from @code{arg}, which is actually a directive -rather than an instruction), branch instructions also return a value -which, as we see in the example above, can be used to compile -forward references. - -Compiling a forward reference is a two-step operation. First, a -branch is compiled with a dummy label, since the actual destination -of the jump is not yet known; the dummy label is returned by the -@code{jit_forward()} macro. The value returned by the branch -instruction is saved to be used later. - -Then, when the destination of the jump is reached, another macro -is used, @code{jit_patch()}. This macro must be called once for -@strong{every} point in which the code had a forward branch to the -instruction following @code{jit_patch} (in this case a @code{movi_i} -instruction). - -Now, here is the iterative version: - -@example -#include -#include "lightning.h" - -static jit_insn codeBuffer[1024]; - -typedef int (*pifi)(int); @rem{/* Pointer to Int Function of Int */} - -int main() -@{ - pifi nfibs = (pifi) (jit_set_ip(codeBuffer).iptr); - int in; @rem{/* offset of the argument */} - jit_insn *ref; @rem{/* to patch the forward reference */} - jit_insn *loop; @rem{/* start of the loop */} - - jit_leaf (1); - in = jit_arg_ui (); - jit_getarg_ui(JIT_R2, in); @rem{/* R2 = n */} - jit_movi_ui (JIT_R1, 1); - ref = jit_blti_ui (jit_forward(), JIT_R2, 2); - jit_subi_ui (JIT_R2, JIT_R2, 1); - jit_movi_ui (JIT_R0, 1); - - loop= jit_get_label(); - jit_subi_ui (JIT_R2, JIT_R2, 1); @rem{/* decr. counter */} - jit_addr_ui (JIT_V0, JIT_R0, JIT_R1); @rem{/* V0 = R0 + R1 */} - jit_movr_ui (JIT_R0, JIT_R1); @rem{/* R0 = R1 */} - jit_addi_ui (JIT_R1, JIT_V0, 1); @rem{/* R1 = V0 + 1 */} - jit_bnei_ui (loop, JIT_R2, 0); @rem{/* if (R2) goto loop; */} - - jit_patch(ref); @rem{/* patch forward jump */} - jit_movr_ui (JIT_RET, JIT_R1); @rem{/* RET = R1 */} - jit_ret (); - - @rem{/* call the generated code\, passing 36 as an argument */} - jit_flush_code(codeBuffer, jit_get_ip().ptr); - printf("nfibs(%d) = %d", 36, nfibs(36)); - return 0; -@} -@end example - -This code calculates the recurrence relation using iteration (a -@code{for} loop in high-level languages). There is still a forward -reference (indicated by the @code{jit_forward}/@code{jit_patch} pair); -there are no function calls anymore: instead, there is a backward -jump (the @code{bnei} at the end of the loop). - -In this case, the destination address should be known, because the -jumps lands on an instruction that has already been compiled. -However the program must make a provision and remember the address -where the jump will land. This is achieved with @code{jit_get_label}, -yet another macro that is much similar to @code{jit_get_ip} but, -instead of a @code{jit_code} union, it answers an @code{jit_insn *} -that the branch macros accept. - -Now, let's make one more change: let's rewrite the loop like this: - -@example - @r{@dots{}} - - jit_delay( - jit_movi_ui (JIT_R1, 1), - ref = jit_blti_ui (jit_forward(), JIT_R2, 2)); - jit_subi_ui (JIT_R2, JIT_R2, 1); - - loop= jit_get_label(); - jit_subi_ui (JIT_R2, JIT_R2, 1); @rem{/* decr. counter */} - jit_addr_ui (JIT_V0, JIT_R0, JIT_R1); @rem{/* V0 = R0 + R1 */} - jit_movr_ui (JIT_R0, JIT_R1); @rem{/* R0 = R1 */} - jit_delay( - jit_addi_ui (JIT_R1, JIT_V0, 1), @rem{/* R1 = V0 + 1 */} - jit_bnei_ui (loop, JIT_R2, 0)); @rem{/* if (R2) goto loop; */} - - @r{@dots{}} -@end example - -The @code{jit_delay} macro is used to schedule delay slots in jumps and -branches. This is optional, but might lead to performance improvements -in tight inner loops (of course not in a loop that is executed 35 -times, but this is just an example). - -@code{jit_delay} takes two @lightning{} instructions, a @dfn{delay -instruction} and a @dfn{branch instruction}. Note that the two -instructions must be written in execution order (first the delay -instruction, then the branch instruction), @strong{not} with the branch -first. If the current machine has a delay slot, the delay instruction -(or part of it) is placed in the delay slot after the branch -instruction; otherwise, it emits the delay instruction before the branch -instruction. The delay instruction must not depend on being executed -before or after the branch. - -Instead of @code{jit_patch}, you can use @code{jit_patch_at}, which -takes two arguments: the first is the same as for @code{jit_patch}, and -the second is the valued to be patched in. In other words, these two -invocations have the same effect: - -@example - jit_patch (jump_pc); - jit_patch_at (jump_pc, jit_get_ip ()); -@end example - -Dual to branches and @code{jit_patch_at} are @code{jit_movi_p} -and @code{jit_patch_movi}, which can also be used to implement -forward references. @code{jit_movi_p} is carefully implemented -to use an encoding that is as long as possible, so that it can -always be patched; in addition, like branches, it will return -an address which is then passed to @code{jit_patch_movi}. The -usage of @code{jit_patch_movi} is similar to @code{jit_patch_at}. - -@node Reentrancy -@chapter Re-entrant usage of @lightning{} - -By default, @lightning{} is able to compile different functions at the -same time as long as it happens in different object files, and on the -other hand constrains code generation tasks to reside in a single -object file. - -The reason for this is not apparent, but is easily explained: -the @file{lightning.h} header file defines its state as a -@code{static} variable, so calls to @code{jit_set_ip} and -@code{jit_get_ip} residing in different files access different -instruction pointers. This was not done without reason: it makes -the usage of @lightning{} much simpler, as it limits the initialization -tasks to the bare minimum and removes the need to link the program -with a separate library. - -On the other hand, multi-threaded or otherwise concurrent programs -require reentrancy in the code generator, so this approach cannot be -the only one. In fact, it is possible to define your own copy of -@lightning{}'s instruction state by defining a variable of type -@code{jit_state} and @code{#define}-ing @code{_jit} to it: - -@example - struct jit_state lightning; - #define _jit lightning -@end example - -You are free to define the @code{jit_state} variable as you like: -@code{extern}, @code{static} to a function, @code{auto}, or global. - -This feature takes advantage of an aspect of macros (@dfn{cascaded -macros}), which is documented thus in @acronym{CPP}'s reference manual: - -@quotation -A cascade of macros is when one macro's body contains a reference to -another macro. This is very common practice. For example, -@example -#define BUFSIZE 1020 -#define TABLESIZE BUFSIZE -@end example -This is not at all the same as defining @code{TABLESIZE} to be -@samp{1020}. The @code{#define} for @code{TABLESIZE} uses exactly the -body you specify---in this case, @code{BUFSIZE}---and does not check to -see whether it too is the name of a macro; it's only when you use -@code{TABLESIZE} that the result of its expansion is checked for more -macro names. - -This makes a difference if you change the definition of @code{BUFSIZE} -at some point in the source file. @code{TABLESIZE}, defined as shown, -will always expand using the definition of @code{BUFSIZE} that is -currently in effect: -#define BUFSIZE 1020 -#define TABLESIZE BUFSIZE -#undef BUFSIZE -#define BUFSIZE 37 - -Now @code{TABLESIZE} expands (in two stages) to `37'. (The @code{#undef} -is to prevent any warning about the nontrivial redefinition of -@code{BUFSIZE}.) -@end quotation - -@noindent -In the same way, @code{jit_get_label} will adopt whatever definition of -@code{_jit} is in effect: -@example -#define jit_get_label() (_jit.pc) -@end example - -Special care must be taken when functions residing in separate files -must access the same state. This could be the case, for example, if a -special library contained function for strength reduction of -multiplications to adds & shifts, or maybe of divisions to -multiplications and shifts. The function would be compiled using a -single definition of @code{_jit} and that definition would be used -whenever the function would be called. - -Since @lightning{} uses a feature of the preprocessor to obtain -re-entrancy, it makes sense to rely on the preprocessor in this case -too. - -The idea is to pass the current @code{struct jit_state} to the -function: - -@example -static void -_opt_muli_i(jit, dest, source, n) - register struct jit_state *jit; - register int dest, source, n; -@{ -#define _jit jit -@dots{} -#undef _jit -@} -@end example - -@noindent -doing this unbeknownst to the client, using a macro in the header file: - -@example -extern void _opt_muli_i(struct jit_state *, int, int, int); - -#define opt_muli_i(rd, rs, n) _opt_muli_i(&_jit, (rd), (rs), (n)) -@end example - - -@section Registers -@chapter Accessing the whole register file - -As mentioned earlier in this chapter, all @lightning{} back-ends are -guaranteed to have at least six general-purpose integer registers and -six floating-point registers, but many back-ends will have more. - -To access the entire register files, you can use the -@code{JIT_R}, @code{JIT_V} and @code{JIT_FPR} macros. They -accept a parameter that identifies the register number, which -must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM} -and @code{JIT_FPR_NUM} respectively; the number need not be -constant. Of course, expressions like @code{JIT_R0} and -@code{JIT_R(0)} denote the same register, and likewise for -integer callee-saved, or floating-point, registers. - -@node Bundling GNU lightning -@chapter Using @lightning{} in your programs - -It is very easy to include @lightning{}'s source code (without the -documentation and examples) into your program's distribution -so that people don't need to have it installed in order to use it. - -Here is a step by step explanation of what to do: - -@enumerate -@item Run @command{lightningize} from your package's main -distribution directory. -@example - lightningize -@end example - -@noindent -This will copy the source code for the @lightning{} back ends -into the @file{lightning} directory of your package. - -@item If you're using Automake, you might be pleased to know that -@file{Makefile.am} files will be already there. - -If you're not using Automake and @code{aclocal}, instead, -you should delete the @file{Makefile.am} files (they are of no use -to you) and copy the contents of the @file{lightning.m4} file, found in -@command{aclocal}'s macro repository (usually @file{/usr/share/aclocal}, -to your @file{configure.in} or @file{acinclude.m4} or @file{aclocal.m4} file. - -@item Include a call to the @code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} -macro in your @file{configure.in} file. -@end enumerate - -@code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} will first look for a -pre-installed copy of @lightning{} and, if it can be found, it will -use it; otherwise, it will test if there is a back-end for the host -system. If @lightning{} is already installed, or if the system is -supported by lightning, it will define the @code{HAVE_LIGHTNING} -symbol. - -In addition, an Automake conditional named @code{HAVE_INSTALLED_LIGHTNING} -will be set if @lightning{} is already installed, which can be used to -set up include paths appropriately. - -Finally, @code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} accepts two -optional parameters: respectively, an action to be taken if @lightning{} -is available, and an action to be taken if it is not. diff --git a/doc/version.texi b/doc/version.texi index c9347b90b..b7b6751ff 100644 --- a/doc/version.texi +++ b/doc/version.texi @@ -1,4 +1,4 @@ -@set UPDATED 3 June 2009 -@set UPDATED-MONTH June 2009 -@set EDITION 1.2c -@set VERSION 1.2c +@set UPDATED 24 January 2013 +@set UPDATED-MONTH January 2013 +@set EDITION 2.0 +@set VERSION 2.0