diff --git a/.gitignore b/.gitignore
index a89a8e180..ddfc42407 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ missing
 stamp-h1
 test-driver
 check/.deps
+doc/.deps
 lib/.deps
 m4/libtool.m4
 m4/lt~obsolete.m4
diff --git a/ChangeLog b/ChangeLog
index 84d3c4391..1322aacf7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,31 @@
+2013-01-24 Paulo Andrade <pcpa@gnu.org>
+
+	* check/Makefile.am: "make debug" target should pass only
+	the main test tool program as argument for running gdb
+
+	* configure.ac: Add the --enable-assertions options.
+
+	* doc/Makefile.am, doc/body.texi, doc/lightning.texi:
+	Major rewrite of the documentation to match the current
+	implementation.
+
+	* doc/version.texi: Automatic date update.
+
+	* doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c:
+	Implementation of the documentation examples, that are also
+	compiled during a normal build.
+
+	* doc/p-lightning.texi, doc/porting.texi, doc/toc.texi,
+	doc/u-lightning.texi, doc/using.texi: These files were
+	renamed in the documentation rewrite, as the documentation
+	was significantly trimmed due to full removal of the porting
+	chapters. Better porting documentation should be added but
+	for the moment it was just removed the documentation not
+	matching the implementation.
+
 2013-01-18 Paulo Andrade <pcpa@gnu.org>
 
-	lib/jit_note.c: Correct bounds check and wrong code keeping
+	* lib/jit_note.c: Correct bounds check and wrong code keeping
 	a pointer that could be changed after a realloc call.
 
 2013-01-18 Paulo Andrade <pcpa@gnu.org>
diff --git a/check/Makefile.am b/check/Makefile.am
index ce78c49fd..33ce2134e 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -172,6 +172,6 @@ CLEANFILES = $(TESTS)
 
 #TESTS_ENVIRONMENT=$(srcdir)/run-test;
 
-debug:		$(check_PROGRAMS)
-	$(LIBTOOL) --mode=execute gdb $(check_PROGRAMS)
+debug:		lightning
+	$(LIBTOOL) --mode=execute gdb lightning
 
diff --git a/configure.ac b/configure.ac
index 9d5e43350..83a7805dd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -56,6 +56,17 @@ if test "x$DISASSEMBLER" != "xno"; then
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDISASSEMBLER=1"
 fi
 
+AC_ARG_ENABLE(assertions,
+	      AS_HELP_STRING([--enable-assertions],
+			     [Enable runtime code generation assertions]),
+	      [DEBUG=$enableval], [DEBUG=auto])
+if test "x$DEBUG" = xyes; then
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
+else
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
+    DEBUG=no
+fi
+
 cpu=
 case "$target_cpu" in
     i?86|x86_64)	cpu=x86		;;
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 3baca2d69..3f4ff64ab 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,7 +1,40 @@
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+
+AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
+
 info_TEXINFOS = lightning.texi 
-EXTRA_TEXINFOS = u-lightning.texi p-lightning.texi
 MOSTLYCLEANFILES = lightning.tmp
 
-lightning_TEXINFOS = body.texi toc.texi using.texi porting.texi version.texi
-u_lightning_TEXINFOS = body.texi toc.texi using.texi version.texi
-p_lightning_TEXINFOS = body.texi toc.texi porting.texi version.texi
+lightning_TEXINFOS = body.texi version.texi
+
+noinst_PROGRAMS = incr printf rpn rfib ifib
+
+$(top_builddir)/lib/liblightning.la:
+	cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
+
+incr_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+incr_SOURCES = incr.c
+
+printf_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+printf_SOURCES = printf.c
+
+rpn_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+rpn_SOURCES = rpn.c
+
+rfib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+rfib_SOURCES = rfib.c
+
+ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+ifib_SOURCES = ifib.c
diff --git a/doc/body.texi b/doc/body.texi
index 7c20d5152..af924e807 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -1,66 +1,32 @@
-@ifinfo
-@dircategory @lightning{}, a library for dynamic code generation
-@direntry
-     * @value{TITLE}: (lightning).
-@end direntry
-
-This file documents GNU lightning, Version @value{VERSION}.
-It was last updated on @value{UPDATED}.
-
-Copyright @copyright{} 2000 Free Software Foundation, Inc.
-Authored by Paolo Bonzini.
-
-This document is released under the terms of the GNU Free Documentation
-License as published by the Free Software Foundation; either version 1.1, or
-(at your option) any later version.
-
-You should have received a copy of the GNU Free Documentation License along
-with GNU lightning; see the file @file{COPYING.DOC}.  If not, write to the Free
-Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  
-
-There are no Secondary Sections, no Cover Texts and no Invariant Sections
-(as defined in the license); this text, along with its equivalent in the
-printed manual, constitutes the Title Page.
-@end ifinfo
-
-@setchapternewpage odd
-
-@titlepage
-@title @value{TITLE}
-@subtitle Version @value{VERSION}
-@subtitle @value{UPDATE-MONTH}
-
-@author by Paolo Bonzini
-
-@c  The following two commands start the copyright page.
-@page
-@vskip 0pt plus 1filll
-Copyright 1988-92, 1994-95, 1999, 2000 Free Software Foundation, Inc.
-
-This document is released under the terms of the @sc{gnu} Free Documentation
-License as published by the Free Software Foundation; either version 1.1, or
-(at your option) any later version.
-
-You should have received a copy of the @sc{gnu} Free Documentation License
-along with @sc{gnu} @i{lightning}; see the file @file{COPYING.DOC}.  If not,
-write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
-MA 02110-1301, USA.  
-
-There are no Secondary Sections, no Cover Texts and no Invariant Sections
-(as defined in the license); this text, along with its equivalent in the
-Info documentation, constitutes the Title Page.
-@end titlepage
-
 @ifnottex
 @node Top
 @top @lightning{}
 
+@iftex
+@macro comma
+@verbatim{|,|}
+@end macro
+@end iftex
+
+@ifnottex
+@macro comma
+@verb{|,|}
+@end macro
+@end ifnottex
+
 This document describes @value{TOPIC} the @lightning{} library for
 dynamic code generation.  Unlike other dynamic code generation systems,
 which are usually either inefficient or non-portable, @lightning{} is
 both retargetable and very fast.
 
-@include toc.texi
+@menu
+* Overview::                What GNU lightning is
+* Installation::            Configuring and installing GNU lightning
+* The instruction set::     The RISC instruction set used i GNU lightning
+* GNU lightning examples::  GNU lightning's examples
+* Reentrancy::              Re-entrant usage of GNU lightning
+* Acknowledgements::        Acknowledgements for GNU lightning
+@end menu
 @end ifnottex
 
 @node Overview
@@ -73,14 +39,6 @@ which are usually either inefficient or non-portable, @lightning{} is
 both retargetable and very fast.
 @end iftex
 
-@ifclear USING
-This manual assumes that you are pretty comfortable with the usage of
-@lightning{} for dynamic code generation, as described in
-@usingref{The instruction set, @lightning{}'s instruction set}, and
-instead focuses on the retargeting process.  What follows is nothing
-more then a brief overview of the system.
-@end ifclear
-
 Dynamic code generation is the generation of machine code 
 at runtime. It is typically used to strip a layer of interpretation 
 by allowing compilation to occur at runtime.  One of the most
@@ -105,28 +63,8 @@ are generated, so programs using dynamic code generation must be
 retargeted for each machine; in addition, coding a run-time code
 generator is a tedious and error-prone task more than a difficult one.
 
-@ifset USING
-This manual describes the @lightning{} dynamic code generation library.
 @lightning{} provides a portable, fast and easily retargetable dynamic
 code generation system. 
-@end ifset
-@ifclear USING
-@lightning{} provides a portable, fast and easily retargetable dynamic
-code generation system. 
-@end ifclear
-
-To be fast, @lightning{} emits machine code without first creating
-intermediate data structures such as RTL representations traditionally
-used by optimizing compilers (@pxref{RTL representation, , , gcc, Using
-and porting GNU CC}).  @lightning{} translates code directly from a
-machine independent interface to that of the underlying architecture.
-This makes code generation more efficient, since no intermediate data
-structures have to be constructed and consumed.  A collateral benefit
-it that @lightning{} consumes little space: other than the memory
-needed to store generated instructions and data structures such as
-parse trees, the only data structure that client will usually need
-is an array of pointers to labels and unresolved jumps, which you
-can often allocate directly on the system stack.
 
 To be portable, @lightning{} abstracts over current architectures'
 quirks and unorthogonalities.  The interface that it exposes to is that
@@ -143,141 +81,1098 @@ real architectures closely enough that, most of the time, the
 compiler's constant folding pass ends up generating code which
 assembles machine instructions without further tests.
 
-@section Drawbacks
+@node Installation
+@chapter Configuring and installing @lightning{}
 
-@lightning{} has been useful in practice; however, it does have
-at least four drawbacks: it has limited registers, no peephole
-optimizer, no instruction scheduler and no symbolic debugger. Of
-these, the last is the most critical even though it does not
-affect the quality of generated code: the only way to debug code
-generated at run-time  is to step through it at the level of
-host specific machine code.  A decent knowledge of the underlying
-instruction set is thus needed to make sense of the debugger's
-output.
-
-The low number of available registers (six) is also an important
-limitation.  However, let's take the primary application of dynamic
-code generation, that is, bytecode translators.  The underlying
-virtual machines tend to have very few general purpose registers
-(usually 0 to 2) and the translators seldom rely on sophisticated
-graph-coloring algorithms to allocate registers to temporary
-variables.  Rather, these translators usually obtain performance
-increases because: a) they remove indirect jumps, which are usually
-poorly predicted, and thus often form a bottleneck, b) they
-parameterize the generated code and go through the process of decoding
-the bytecodes just once.  So, their usage of registers is rather
-sparse---in fact, in practice, six registers were found to be
-enough for most purposes.
-
-The lack of a peephole optimizer is most important on machines where a 
-single instruction can map to multiple native instructions.  For
-instance, Intel chips' division instruction hard-codes the dividend
-to be in EAX and the quotient and remainder to be output, respectively,
-in EAX and EDX: on such chips, @lightning{} does lots of pushing and
-popping of EAX and EDX to save those registers that are not used.  
-Unnecessary stack operations could be removed by looking at whether
-preserved registers are destroyed soon.  Unfortunately, the current 
-implementation of @lightning{} is so fast because it only knows about
-the single instruction that is being generated; performing these
-optimizations would require a flow analysis pass that would probably
-hinder @lightning{}'s speed.
-
-The lack of an instruction scheduler is not very important---pretty
-good instruction scheduling can actually be obtained by separating
-register writes from register reads.  The only architectures on which
-a scheduler would be useful are those on which arithmetic instructions
-have two operands; an example is, again, the x86, on which the single
-instruction
+The first thing to do to use @lightning{} is to configure the
+program, picking the set of macros to be used on the host
+architecture; this configuration is automatically performed by
+the @file{configure} shell script; to run it, merely type:
 @example
-    subr_i  R0, R1, R2       @rem{!Compute R0 = R1 - R2}
-@end example
-@noindent
-is translated to two instruction, of which the second depends on the
-result of the first:
-@example
-    movl    %ebx, %eax       @rem{! Move R1 into R0}
-    subl    %edx, %eax       @rem{! Subtract R2 from R0}
+     ./configure
 @end example
 
-@ifset BOTH
-@node Using GNU lightning
-@chapter Using @lightning{}
+@lightning{} supports the @code{--enable-disassembler} option, that
+enables linking to GNU binutils and optionally print human readable
+disassembly of the jit code. This option can be disabled by the
+@code{--disable-disassembler} option.
 
-This chapter describes installing and using @lightning{}.
+Another option that @file{configure} accepts is
+@code{--enable-assertions}, which enables several consistency checks in
+the run-time assemblers.  These are not usually needed, so you can
+decide to simply forget about it; also remember that these consistency
+checks tend to slow down your code generator.
 
-@menu
-@usingmenu{}
-@end menu
+After you've configured @lightning{}, run @file{make} as usual.
 
-@lowersections
-@end ifset
+@lightning{} has an extensive set of tests to validate it is working
+correctly in the build host. To test it run:
+@example
+    make check
+@end example
 
-@ifset USING
-@include using.texi
-@end ifset
+The next important step is:
+@example
+    make install
+@end example
 
-@ifset BOTH
-@raisesections
+This ends the process of installing @lightning{}.
 
-@node Porting GNU lightning
-@chapter Porting @lightning{}
+@node The instruction set
+@chapter @lightning{}'s instruction set
 
-This chapter describes the process of porting @lightning{}.
-It assumes that you are pretty comfortable with the usage of
-@lightning{} for dynamic code generation, as described in
-@ref{Using GNU lightning}.
-
-@menu
-@portingmenu{}
-@end menu
-
-@lowersections
-@end ifset
-
-@ifset PORTING
-@include porting.texi
-@end ifset
-
-@ifset BOTH
-@raisesections
-@end ifset
-
-@node Future
-@chapter The future of @lightning{}
-
-Presented below is the set of tasks that I feel need to be performed
-to make @lightning{} a more fully functional, viable system.  They are
-presented in no particular order.  I would @emph{very much} welcome any
-volunteers who would like to help with the implementation of one or
-more of these tasks.  Please write to me, Paolo Bonzini, at
-@email{bonzini@@gnu.org} if you are interested in adding your efforts
-to the @lightning{} project.
-
-Tasks:
+@lightning{}'s instruction set was designed by deriving instructions
+that closely match those of most existing RISC architectures, or
+that can be easily syntesized if absent.  Each instruction is composed
+of:
 @itemize @bullet
 @item
-The most important task to make @lightning{} more widely usable
-is to retarget it.  Although currently supported architectures
-(x86, SPARC, PowerPC) are certainly some of the most widely used,
-@lightning{} could be ported to others---namely, the Alpha and
-MIPS architectures.
+an operation, like @code{sub} or @code{mul}
 
 @item
-Another interesting task is to allow the instruction stream to grow
-dynamically.  This is a problem because not all architectures allow
-to write position independent code.@footnote{The x86's absolute
-jumps, for example, are actually slow indirect jumps, and need a
-register.}
+most times, a register/immediate flag (@code{r} or @code{i})
 
 @item
-Optimize leaf procedures on the SPARC.  This involves using the
-output registers (@code{%o@i{X}}) instead of the local registers
-(@code{%l@i{X}}) when writing leaf procedures;  the problem is,
-leaf procedures also receive parameters in the output registers,
-so they would be overwritten by write accesses to general-purpose
-registers.
+an unsigned modifier (@code{u}), a type identifier or two, when applicable.
 @end itemize
 
+Examples of legal mnemonics are @code{addr} (integer add, with three
+register operands) and @code{muli} (integer multiply, with two
+register operands and an immediate operand).  Each instruction takes
+two or three operands; in most cases, one of them can be an immediate
+value instead of a register.
+
+Most @lightning{} integer operations are signed wordsize operations,
+with the exception of operations that convert types, or load or store
+values to/from memory. When applicable, the types and C types are as
+follow:
+
+@example
+     _c         @r{signed char}
+     _uc        @r{unsigned char}
+     _s         @r{short}
+     _us        @r{unsigned short}
+     _i         @r{int}
+     _ui        @r{unsigned int}
+     _l         @r{long}
+     _f         @r{float}
+     _d         @r{double}
+@end example
+
+Most integer operations do not need a type modifier, and when loading or
+storing values to memory there is an alias to the proper operation
+using wordsize operands, that is, if ommited, the type is @r{int} on
+32-bit architectures and @r{long} on 64-bit architectures.  Note
+that lightning also expects @code{sizeof(void*)} to match the wordsize.
+
+When an unsigned operation result differs from the equivalent signed
+operation, there is a the @code{_u} modifier.
+
+There are at least seven integer registers, of which six are
+general-purpose, while the last is used to contain the frame pointer
+(@code{FP}).  The frame pointer can be used to allocate and access local
+variables on the stack, using the @code{allocai} instruction.
+
+Of the general-purpose registers, at least three are guaranteed to be
+preserved across function calls (@code{V0}, @code{V1} and
+@code{V2}) and at least three are not (@code{R0}, @code{R1} and
+@code{R2}).  Six registers are not very much, but this
+restriction was forced by the need to target CISC architectures
+which, like the x86, are poor of registers; anyway, backends can
+specify the actual number of available registers with the calls
+@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM}
+(for callee-save registers).
+
+There are at least six floating-point registers, named @code{F0} to
+@code{F5}.  These are usually caller-save and are separate from the integer
+registers on the supported architectures; on Intel architectures,
+in 32 bit mode if SSE2 is not available or use of X87 is forced,
+the register stack is mapped to a flat register file.  As for the
+integer registers, the macro @code{JIT_F_NUM} yields the number of
+floating-point registers.
+
+The complete instruction set follows; as you can see, most non-memory
+operations only take integers (either signed or unsigned) as operands;
+this was done in order to reduce the instruction set, and because most
+architectures only provide word and long word operations on registers.
+There are instructions that allow operands to be extended to fit a larger
+data type, both in a signed and in an unsigned way.
+
+@table @b
+@item Binary ALU operations
+These accept three operands; the last one can be an immediate.
+@code{addx} operations must directly follow @code{addc}, and
+@code{subx} must follow @code{subc}; otherwise, results are undefined.
+Most, if not all, architectures do not support @r{float} or @r{double}
+immediate operands; lightning emulates those operations by moving the
+immediate to a temporary register and emiting the call with only
+register operands.
+@example
+addr         _f  _d  O1 = O2 + O3
+addi         _f  _d  O1 = O2 + O3
+addxr                O1 = O2 + (O3 + carry)
+addxi                O1 = O2 + (O3 + carry)
+addcr                O1 = O2 + O3, set carry
+addci                O1 = O2 + O3, set carry
+subr         _f  _d  O1 = O2 - O3
+subi         _f  _d  O1 = O2 - O3
+subxr                O1 = O2 - (O3 + carry)
+subxi                O1 = O2 - (O3 + carry)
+subcr                O1 = O2 - O3, set carry
+subci                O1 = O2 - O3, set carry
+mulr         _f  _d  O1 = O2 * O3
+muli         _f  _d  O1 = O2 * O3
+divr     _u  _f  _d  O1 = O2 / O3
+divi     _u  _f  _d  O1 = O2 / O3
+remr     _u          O1 = O2 % O3
+remi     _u          O1 = O2 % O3
+andr                 O1 = O2 & O3
+andi                 O1 = O2 & O3
+orr                  O1 = O2 | O3
+ori                  O1 = O2 | O3
+xorr                 O1 = O2 ^ O3
+xori                 O1 = O2 ^ O3
+lshr                 O1 = O2 << O3
+lshi                 O1 = O2 << O3
+rshr     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+rshi     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+@end example
+
+@item Unary ALU operations
+These accept two operands, both of which must be registers.
+@example
+negr         _f  _d  O1 = -O2
+comr                 O1 = ~O2
+@end example
+
+There unary ALU operations are only defined for float operands.
+@example
+absr         _f  _d  O1 = fabs(O2)
+sqrtr                O1 = sqrt(O2)
+@end example
+
+Besides requiring the @code{r} modifier, there are no unary operations
+with an immediate operand.
+
+@item Compare instructions
+These accept three operands; again, the last can be an immediate.
+The last two operands are compared, and the first operand, that must be
+an integer register, is set to either 0 or 1, according to whether the
+given condition was met or not.
+
+The conditions given below are for the standard behavior of C,
+where the ``unordered'' comparison result is mapped to false.
+
+@example
+ltr       _u  _f  _d  O1 =  (O2 <  O3)
+lti       _u  _f  _d  O1 =  (O2 <  O3)
+ler       _u  _f  _d  O1 =  (O2 <= O3)
+lei       _u  _f  _d  O1 =  (O2 <= O3)
+gtr       _u  _f  _d  O1 =  (O2 >  O3)
+gti       _u  _f  _d  O1 =  (O2 >  O3)
+ger       _u  _f  _d  O1 =  (O2 >= O3)
+gei       _u  _f  _d  O1 =  (O2 >= O3)
+eqr           _f  _d  O1 =  (O2 == O3)
+eqi           _f  _d  O1 =  (O2 == O3)
+ner           _f  _d  O1 =  (O2 != O3)
+nei           _f  _d  O1 =  (O2 != O3)
+unltr         _f  _d  O1 = !(O2 >= O3)
+unler         _f  _d  O1 = !(O2 >  O3)
+ungtr         _f  _d  O1 = !(O2 <= O3)
+unger         _f  _d  O1 = !(O2 <  O3)
+uneqr         _f  _d  O1 = !(O2 <  O3) && !(O2 >  O3)
+ltgtr         _f  _d  O1 = !(O2 >= O3) || !(O2 <= O3)
+ordr          _f  _d  O1 =  (O2 == O2) &&  (O3 == O3)
+unordr        _f  _d  O1 =  (O2 != O2) ||  (O3 != O3)
+@end example
+
+@item Transfer operations
+These accept two operands; for @code{ext} both of them must be
+registers, while @code{mov} accepts an immediate value as the second
+operand.
+
+Unlike @code{movr} and @code{movi}, the other instructions are used
+to truncate a wordsize operand to a smaller integer data type or to
+convert float data types. You can also use @code{extr} to convert an
+integer to a floating point value: the usual options are @code{extr_f}
+and @code{extr_d}.
+
+@example
+movr                                 _f  _d  O1 = O2
+movi                                 _f  _d  O1 = O2
+extr      _c  _uc  _s  _us  _i  _ui  _f  _d  O1 = O2
+truncr                               _f  _d  O1 = trunc(O2)
+@end example
+
+In 64-bit architectures it may be required to use @code{truncr_f_i},
+@code{truncr_f_l}, @code{truncr_d_i} and @code{truncr_d_l} to match
+the equivalent C code.  Only the @code{_i} modifier is available in
+32-bit architectures.
+
+@example
+truncr_f_i    = <int> O1 = <float> O2
+truncr_f_l    = <long>O1 = <float> O2
+truncr_d_i    = <int> O1 = <double>O2
+truncr_d_l    = <long>O1 = <double>O2
+@end example
+
+The float conversion operations are @emph{destination first,
+source second}, but the order of the types is reversed.  This happens
+for historical reasons.
+
+@example
+extr_f_d    = <double>O1 = <float> O2
+extr_d_f    = <float> O1 = <double>O2
+@end example
+
+@item Network extensions
+These accept two operands, both of which must be registers; these
+two instructions actually perform the same task, yet they are
+assigned to two mnemonics for the sake of convenience and
+completeness.  As usual, the first operand is the destination and
+the second is the source.
+@example
+htonr    @r{Host-to-network (big endian) order}
+ntohr    @r{Network-to-host order }
+@end example
+
+@item Load operations
+@code{ld} accepts two operands while @code{ldx} accepts three;
+in both cases, the last can be either a register or an immediate
+value. Values are extended (with or without sign, according to
+the data type specification) to fit a whole register.
+The @code{_ui} and @code{_l} types are only available in 64-bit
+architectures.  For convenience, there is a version without a
+type modifier for integer or pointer operands that uses the
+appropriate wordsize call.
+@example
+ldr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
+ldi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
+ldxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+@end example
+
+@item Store operations
+@code{st} accepts two operands while @code{stx} accepts three; in
+both cases, the first can be either a register or an immediate
+value. Values are sign-extended to fit a whole register.
+@example
+str     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
+sti     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
+stxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+stxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+@end example
+As for the load operations, the @code{_ui} and @code{_l} types are
+only available in 64-bit architectures, and for convenience, there
+is a version without a type modifier for integer or pointer operands
+that uses the appropriate wordsize call.
+
+@item Argument management
+These are:
+@example
+prepare     (not specified)
+pushargr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+pushargi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+arg         _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+ret         (not specified)
+retr        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+reti        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+epilog      (not specified)
+@end example
+As with other operations that use a type modifier, the @code{_ui} and
+@code{_l} types are only available in 64-bit architectures, but there
+are operations without a type modifier that alias to the appropriate
+integer operation with wordsize operands.
+
+@code{prepare}, @code{pusharg}, and @code{retval} are used by the caller,
+while @code{arg}, @code{getarg} and @code{ret} are used by the callee.
+A code snippet that wants to call another procedure and has to pass
+arguments must, in order: use the @code{prepare} instruction and use
+the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
+left to right order}; and use @code{finish} or @code{call} (explained below)
+to perform the actual call.
+
+@code{arg} and @code{getarg} are used by the callee.
+@code{arg} is different from other instruction in that it does not
+actually generate any code: instead, it is a function which returns
+a value to be passed to @code{getarg}.@footnote{``Return a
+value'' means that @lightning{} code that compile these
+instructions return a value when expanded.} You should call
+@code{arg} as soon as possible, before any function call or, more
+easily, right after the @code{prolog} instructions
+(which is treated later).
+
+@code{getarg} accepts a register argument and a value returned by
+@code{arg}, and will move that argument to the register, extending
+it (with or without sign, according to the data type specification)
+to fit a whole register.  These instructions are more intimately
+related to the usage of the @lightning{} instruction set in code
+that generates other code, so they will be treated more
+specifically in @ref{GNU lightning examples, , Generating code at
+run-time}.
+
+Finally, the @code{retval} instruction fetches the return value of a
+called function in a register.  The @code{retval} instruction takes a
+register argument and copies the return value of the previously called
+function in that register.  A function with a return value should use
+@code{retr} or @code{reti} to put the return value in the return register
+before returning.  @xref{Fibonacci, the Fibonacci numbers}, for an example.
+
+@code{epilog} is an optional call, that marks the end of a function
+body. It is automatically generated by lightning if starting a new
+function (what should be done after a @code{ret} call) or finishing
+generating jit.
+
+You should observe a few rules when using these macros.  First of
+all, if calling a varargs function, you should use the @code{ellipsis}
+call to mark the position of the ellipsis in the C prototype.
+
+You should not nest calls to @code{prepare} inside a
+@code{prepare/finish} block.  Doing this will result in undefined
+behavior. Note that for functions with zero arguments you can use
+just @code{call}.
+
+@item Branch instructions
+Like @code{arg}, these also return a value which, in this case,
+is to be used to compile forward branches as explained in
+@ref{Fibonacci, , Fibonacci numbers}.  They accept two operands to be
+compared; of these, the last can be either a register or an immediate.
+They are:
+@example
+bltr      _u  _f  _d  @r{if }(O2 <  O3)@r{ goto }O1
+blti      _u  _f  _d  @r{if }(O2 <  O3)@r{ goto }O1
+bler      _u  _f  _d  @r{if }(O2 <= O3)@r{ goto }O1
+blei      _u  _f  _d  @r{if }(O2 <= O3)@r{ goto }O1
+bgtr      _u  _f  _d  @r{if }(O2 >  O3)@r{ goto }O1
+bgti      _u  _f  _d  @r{if }(O2 >  O3)@r{ goto }O1
+bger      _u  _f  _d  @r{if }(O2 >= O3)@r{ goto }O1
+bgei      _u  _f  _d  @r{if }(O2 >= O3)@r{ goto }O1
+beqr          _f  _d  @r{if }(O2 == O3)@r{ goto }O1
+beqi          _f  _d  @r{if }(O2 == O3)@r{ goto }O1
+bner          _f  _d  @r{if }(O2 != O3)@r{ goto }O1
+bnei          _f  _d  @r{if }(O2 != O3)@r{ goto }O1
+
+bunltr        _f  _d  @r{if }!(O2 >= O3)@r{ goto }O1
+bunler        _f  _d  @r{if }!(O2 >  O3)@r{ goto }O1
+bungtr        _f  _d  @r{if }!(O2 <= O3)@r{ goto }O1
+bunger        _f  _d  @r{if }!(O2 <  O3)@r{ goto }O1
+buneqr        _f  _d  @r{if }!(O2 <  O3) && !(O2 >  O3)@r{ goto }O1
+bltgtr        _f  _d  @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
+bordr         _f  _d  @r{if } (O2 == O2) &&  (O3 == O3)@r{ goto }O1
+bunordr       _f  _d  @r{if }!(O2 != O2) ||  (O3 != O3)@r{ goto }O1
+
+bmsr                  @r{if }O2 &  O3@r{ goto }O1
+bmsi                  @r{if }O2 &  O3@r{ goto }O1
+bmcr                  @r{if }!(O2 & O3)@r{ goto }O1
+bmci                  @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.}
+boaddr    _u          O2 += O3@r{, goto }O1@r{ if overflow}
+boaddi    _u          O2 += O3@r{, goto }O1@r{ if overflow}
+bxaddr    _u          O2 += O3@r{, goto }O1@r{ if no overflow}
+bxaddi    _u          O2 += O3@r{, goto }O1@r{ if no overflow}
+bosubr    _u          O2 -= O3@r{, goto }O1@r{ if overflow}
+bosubi    _u          O2 -= O3@r{, goto }O1@r{ if overflow}
+bxsubr    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
+bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
+@end example
+
+@item Jump and return operations
+These accept one argument except @code{ret} which has none; the
+difference between @code{finishi} and @code{calli} is that the
+latter does not clean the stack from pushed parameters (if any)
+and the former must @strong{always} follow a @code{prepare}
+instruction.
+@example
+callr     (not specified)                @r{function call to a register}
+calli     (not specified)                @r{function call to O1}
+finishr   (not specified)                @r{function call to a register}
+finishi   (not specified)                @r{function call to O1}
+jmpi/jmpr (not specified)                @r{unconditional jump to O1}
+ret       (not specified)                @r{return from subroutine}
+retr      _c _uc _s _us _i _ui _l _f _d
+reti      _c _uc _s _us _i _ui _l _f _d
+retval    _c _uc _s _us _i _ui _l _f _d  @r{move return value}
+                                         @r{to register}
+@end example
+
+Like branch instruction, @code{jmpi} also returns a value which is to
+be used to compile forward branches. @xref{Fibonacci, , Fibonacci
+numbers}.
+
+@item Function prolog
+
+These macros are used to set up a function prolog.  The @code{allocai}
+call accept a single integer argument and returns an offset value
+for stack storage access.
+
+@example
+prolog    (not specified)                @r{function prolog}
+allocai   (not specified)                @r{reserve space on the stack}
+@end example
+
+@code{allocai} receives the number of bytes to allocate and returns
+the offset from the frame pointer register @code{FP} to the base of
+the area.
+@end table
+
+As a small appetizer, here is a small function that adds 1 to the input
+parameter (an @code{int}).  I'm using an assembly-like syntax here which
+is a bit different from the one used when writing real subroutines with
+@lightning{}; the real syntax will be introduced in @xref{GNU lightning
+examples, , Generating code at run-time}.
+
+@example
+incr:
+     prolog
+in = arg                     @rem{! We have an integer argument}
+     getarg    R0, in        @rem{! Move it to R0}
+     addi      R0, R0, 1     @rem{! Add 1}
+     retr      R0            @rem{! And return the result}
+@end example
+
+And here is another function which uses the @code{printf} function from
+the standard C library to write a number in hexadecimal notation:
+
+@example
+printhex:
+     prolog
+in = arg                     @rem{! Same as above}
+     getarg    R0, in
+     prepare                 @rem{! Begin call sequence for printf}
+     pushargi  "%x"          @rem{! Push format string}
+     ellipsis                @rem{! Varargs start here}
+     pushargr  R0            @rem{! Push second argument}
+     finishi   printf        @rem{! Call printf}
+     ret                     @rem{! Return to caller}
+@end example
+
+@node GNU lightning examples
+@chapter Generating code at run-time
+
+To use @lightning{}, you should include the @file{lightning.h} file that
+is put in your include directory by the @samp{make install} command.
+
+Each of the instructions above translates to a macro or function call.
+All you have to do is prepend @code{jit_} (lowercase) to opcode names
+and @code{JIT_} (uppercase) to register names.  Of course, parameters
+are to be put between parentheses.
+
+This small tutorial presents three examples:
+
+@iftex
+@itemize @bullet
+@item
+The @code{incr} function found in @ref{The instruction set, ,
+@lightning{}'s instruction set}:
+
+@item
+A simple function call to @code{printf}
+
+@item
+An RPN calculator.
+
+@item
+Fibonacci numbers
+@end itemize
+@end iftex
+@ifnottex
+@menu
+* incr::             A function which increments a number by one
+* printf::           A simple function call to printf
+* RPN calculator::   A more complex example, an RPN calculator
+* Fibonacci::        Calculating Fibonacci numbers
+@end menu
+@end ifnottex
+
+@node incr
+@section A function which increments a number by one
+
+Let's see how to create and use the sample @code{incr} function created
+in @ref{The instruction set, , @lightning{}'s instruction set}:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);    @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  jit_node_t  *in;
+  pifi         incr;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  jit_prolog();                    @rem{/* @t{     prolog             } */}
+  in = jit_arg();                  @rem{/* @t{     in = arg           } */}
+  jit_getarg(JIT_R0, in);          @rem{/* @t{     getarg R0          } */}
+  jit_addi(JIT_R0, JIT_R0, 1);     @rem{/* @t{     addi   R0@comma{} R0@comma{} 1   } */}
+  jit_retr(JIT_R0);                @rem{/* @t{     retr   R0          } */}
+
+  incr = jit_emit();
+
+  @rem{/* call the generated code@comma{} passing 5 as an argument */}
+  printf("%d + 1 = %d\n", 5, incr(5));
+
+  finish_jit();
+  return 0;
+@}
+@end example
+
+Let's examine the code line by line (well, almost@dots{}):
+
+@table @t
+@item #include <lightning.h>
+You already know about this.  It defines all of @lightning{}'s macros.
+
+@item static jit_state_t *_jit;
+You might wonder about what is @code{jit_state_t}.  It is a structure
+that stores jit code generation information.  The name @code{_jit} is
+special, because since multiple jit generators can run at the same
+time, you must either @r{#define _jit my_jit_state} or name it
+@code{_jit}.
+
+@item typedef int (*pifi)(int);
+Just a handy typedef for a pointer to a function that takes an
+@code{int} and returns another.
+
+@item jit_node_t  *in;
+Declares a variable to hold an identifier for a function argument. It
+is an opaque pointer, that will hold the return of a call to @code{arg}
+and be used as argument to @code{getarg}.
+
+@item pifi         incr;
+Declares a function pointer variable to a function that receives an
+@code{int} and returns an @code{int}.
+
+@item init_jit(argv[0]);
+You must call this function before creating a @code{jit_state_t}
+object. This function does global state initialization, and may need
+to detect CPU or Operating System features.  It receives a string
+argument that is later used to read symbols from a shared object using
+GNU binutils if disassembly was enabled at configure time. If no
+disassembly will be performed a NULL pointer can be used as argument.
+
+@item _jit = jit_new_state();
+This call initializes a @lightning{} jit state.
+
+@item jit_prolog();
+Ok, so we start generating code for our beloved function@dots{}
+
+@item in = jit_arg();
+@itemx jit_getarg(JIT_R0, in);
+We retrieve the first (and only) argument, an integer, and store it
+into the general-purpose register @code{R0}.
+
+@item jit_addi(JIT_R0, JIT_R0, 1);
+We add one to the content of the register.
+
+@item jit_retr(JIT_R0);
+This instruction generates a standard function epilog that returns
+the contents of the @code{R0} register.
+
+@item incr = jit_emit();
+This instruction is very important.  It actually translates the
+@lightning{} macros used before to machine code, flushes the generated
+code area out of the processor's instruction cache and return a
+pointer to the start of the code.
+
+@item printf("%d + 1 = %d", 5, incr(5));
+Calling our function is this simple---it is not distinguishable from
+a normal C function call, the only difference being that @code{incr}
+is a variable.
+
+@item finish_jit();
+This call cleanups any global state hold by @lightning{}, and is
+advisable to call it once jit code will no longer be generated.
+@end table
+
+@lightning{} abstracts two phases of dynamic code generation: selecting
+instructions that map the standard representation, and emitting binary
+code for these instructions.  The client program has the responsibility
+of describing the code to be generated using the standard @lightning{}
+instruction set.
+
+Let's examine the code generated for @code{incr} on the x86_64
+architecture (on the right is the code that an assembly-language
+programmer would write):
+
+@table @b
+@item x86_64
+@example
+    sub   $0x30,%rsp
+    mov   %rbp,(%rsp)
+    mov   %rsp,%rbp
+    sub   $0x18,%rsp
+    mov   %rdi,%rax            mov %rdi, %rax
+    add   $0x1,%rax            inc %rax
+    mov   %rbp,%rsp
+    mov   (%rsp),%rbp
+    add   $0x30,%rsp
+    retq                       retq
+@end example
+In this case, the main overhead is due to the function's prolog and
+epilog, and stack alignment after reserving stack space for word
+to/from float conversions or moving data from/to x87 to/from SSE.
+Note that besides allocating space to save callee saved registers,
+no registers are saved/restored because @lightning{} notices those
+registers are not modified. There is currently no logic to detect
+if it needs to allocate stack space for type conversions neither
+proper leaf function detection, but these are subject to change
+(FIXME).
+@end table
+
+@node printf
+@section A simple function call to @code{printf}
+
+Again, here is the code for the example:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int);      @rem{/* Pointer to Void Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pvfi          myFunction;             @rem{/* ptr to generated code */}
+  jit_node_t    *start, *end;           @rem{/* a couple of labels */}
+  jit_node_t    *in;                    @rem{/* to get the argument */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  start = jit_note(__FILE__, __LINE__);
+  jit_prolog();
+  in = jit_arg();
+  jit_getarg(JIT_R1, in);
+  jit_pushargi((jit_word_t)"generated %d bytes\n");
+  jit_ellipsis();
+  jit_pushargr(JIT_R1);
+  jit_finishi(printf);
+  jit_ret();
+  jit_epilog();
+  end = jit_note(__FILE__, __LINE__);
+
+  myFunction = jit_emit();
+
+  @rem{/* call the generated code@comma{} passing its size as argument */}
+  myFunction((char*)jit_address(end) - (char*)jit_address(start));
+
+  jit_disassemble();
+
+  finish_jit();
+  return 0;
+@}
+@end example
+
+The function shows how many bytes were generated.  Most of the code
+is not very interesting, as it resembles very closely the program
+presented in @ref{incr, , A function which increments a number by one}.
+
+For this reason, we're going to concentrate on just a few statements.
+
+@table @t
+@item start = jit_note(__FILE__, __LINE__);
+@itemx @r{@dots{}}
+@itemx end = jit_note(__FILE__, __LINE__);
+These two instruction call the @code{jit_note} macro, which creates
+a note in the jit code; arguments to @code{jit_note} usually are a
+filename string and line number integer, but using NULL for the
+string argument is perfectly valid if only need to create a simple
+marker in the code.
+
+@item jit_ellipsis();
+@code{ellipsis} usually is only required if calling varargs functions
+with double arguments, but it is a good practice to properly describe
+the @r{@dots{}} in the call sequence.
+
+@itemx jit_pushargi((jit_word_t)"generated %d bytes\n");
+Note the use of the @code{(jit_word_t)} cast, that is used only
+to avoid a compiler warning, due to using a pointer where a
+wordsize integer type was expected.
+
+@item jit_prepare();
+@itemx @r{@dots{}}
+@itemx jit_finishi(printf);
+Once the arguments to @code{printf} have been pushed, what means
+moving them to stack or register arguments, the @code{printf}
+function is called and the stack cleaned.  Note how @lightning{}
+abstracts the differences between different architectures and
+ABI's -- the client program does not know how parameter passing
+works on the host architecture.
+
+@item jit_epilog();
+Usually it is not required to call @code{epilog}, but because it
+is implicitly called when noticing the end of a function, if the
+@code{end} variable was set with a @code{note} call after the
+@code{ret}, it would not consider the function epilog.
+
+@item myFunction((char*)jit_address(end) - (char*)jit_address(start));
+This calls the generate jit function passing as argument the offset
+difference from the @code{start} and @code{end} notes. The @code{address}
+call must be done after the @code{emit} call or either a fatal error
+will happen (if @lightning{} is built with assertions enable) or an
+undefined value will be returned.
+
+@item jit_disassemble();
+@code{disassemble} will dump the generated code to standard output,
+unless @lightning{} was built with the disassembler disabled, in which
+case no output will be shown.
+@end table
+
+@node RPN calculator
+@section A more complex example, an RPN calculator
+
+We create a small stack-based RPN calculator which applies a series
+of operators to a given parameter and to other numeric operands.
+Unlike previous examples, the code generator is fully parameterized
+and is able to compile different formulas to different functions.
+Here is the code for the expression compiler; a sample usage will
+follow.
+
+Since @lightning{} does not provide push/pop instruction, this
+example uses a stack-allocated area to store the data.  Such an
+area can be allocated using the macro @code{allocai}, which
+receives the number of bytes to allocate and returns the offset
+from the frame pointer register @code{FP} to the base of the
+area.
+
+Usually, you will use the @code{ldxi} and @code{stxi} instruction
+to access stack-allocated variables.  However, it is possible to
+use operations such as @code{add} to compute the address of the
+variables, and pass the address around.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+@{
+  jit_stxi_i (*sp, JIT_FP, reg);
+  *sp += sizeof (int);
+@}
+
+void stack_pop(int reg, int *sp)
+@{
+  *sp -= sizeof (int);
+  jit_ldxi_i (reg, JIT_FP, *sp);
+@}
+
+jit_node_t *compile_rpn(char *expr)
+@{
+  jit_node_t *in, *fn;
+  int stack_base, stack_ptr;
+
+  fn = jit_note(NULL, 0);
+  jit_prolog();
+  in = jit_arg();
+  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+  jit_getarg_i(JIT_R2, in);
+
+  while (*expr) @{
+    char buf[32];
+    int n;
+    if (sscanf(expr, "%[0-9]%n", buf, &n)) @{
+      expr += n - 1;
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movi(JIT_R0, atoi(buf));
+    @} else if (*expr == 'x') @{
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movr(JIT_R0, JIT_R2);
+    @} else if (*expr == '+') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_addr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '-') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_subr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '*') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '/') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_divr(JIT_R0, JIT_R1, JIT_R0);
+    @} else @{
+      fprintf(stderr, "cannot compile: %s\n", expr);
+      abort();
+    @}
+    ++expr;
+  @}
+  jit_retr(JIT_R0);
+  jit_epilog();
+  return fn;
+@}
+@end example
+
+The principle on which the calculator is based is easy: the stack top
+is held in R0, while the remaining items of the stack are held in the
+memory area that we allocate with @code{allocai}.  Compiling a numeric
+operand or the argument @code{x} pushes the old stack top onto the
+stack and moves the operand into R0; compiling an operator pops the
+second operand off the stack into R1, and compiles the operation so
+that the result goes into R0, thus becoming the new stack top.
+
+This example allocates a fixed area for 32 @code{int}s.  This is not
+a problem when the function is a leaf like in this case; in a full-blown
+compiler you will want to analyze the input and determine the number
+of needed stack slots---a very simple example of register allocation.
+The area is then managed like a stack using @code{stack_push} and
+@code{stack_pop}.
+
+Source code for the client (which lies in the same source file) follows:
+
+@example
+int main(int argc, char *argv[])
+@{
+  jit_note_t *nc, *nf;
+  pifi c2f, f2c;
+  int i;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  nc = compile_rpn("32x9*5/+");
+  nf = compile_rpn("x32-5*9/");
+  (void)jit_emit();
+  c2f = (pifi)jit_address(nc);
+  f2c = (pifi)jit_address(nf);
+
+  printf("\nC:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+  printf("\nF:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+  printf("\n");
+
+  printf("\nF:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+  printf("\nC:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+  printf("\n");
+
+  finish_jit();
+  return 0;
+@}
+@end example
+
+The client displays a conversion table between Celsius and Fahrenheit
+degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The
+formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9},
+respectively.
+
+Providing the formula as an argument to @code{compile_rpn} effectively
+parameterizes code generation, making it possible to use the same code
+to compile different functions; this is what makes dynamic code
+generation so powerful.
+
+@node Fibonacci
+@section Fibonacci numbers
+
+The code in this section calculates a variant of the Fibonacci sequence.
+While the traditional Fibonacci sequence is modeled by the recurrence
+relation:
+@display
+     f(0) = f(1) = 1
+     f(n) = f(n-1) + f(n-2)
+@end display
+
+@noindent
+the functions in this section calculates the following sequence, which
+is more interesting as a benchmark@footnote{That's because, as is
+easily seen, the sequence represents the number of activations of the
+@code{nfibs} procedure that are needed to compute its value through
+recursion.}:
+@display
+     fib(0) = fib(1) = 1
+     fib(n) = fib(n-1) + fib(n-2) + 1
+@end display
+
+The purpose of this example is to introduce branches.  There are two
+kind of branches: backward branches and forward branches.  We'll
+present the calculation in a recursive and iterative form; the
+former only uses forward branches, while the latter uses both.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pifi       fib;
+  jit_node_t *label;
+  jit_node_t *call;
+  jit_node_t *in;                 @rem{/* offset of the argument */}
+  jit_node_t *ref;                @rem{/* to patch the forward reference */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  label = jit_label();
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_V0, in);              @rem{/* V0 = n */}
+  ref = jit_blti     (JIT_V0, 2);
+        jit_subi     (JIT_V1, JIT_V0, 1);       @rem{/* V1 = n-1 */}
+        jit_subi     (JIT_V2, JIT_V0, 2);       @rem{/* V2 = n-2 */}
+        jit_prepare();
+          jit_pushargr(JIT_V1);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V1);                     @rem{/* V1 = fib(n-1) */}
+        jit_prepare();
+          jit_pushargr(JIT_V2);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V2);                     @rem{/* V2 = fib(n-2) */}
+        jit_addi(JIT_V1,  JIT_V1,  1);
+        jit_addr(JIT_R0, JIT_V1, JIT_V2);       @rem{/* R0 = V1 + V2 + 1 */}
+        jit_retr(JIT_R0);
+
+  jit_patch(ref);                               @rem{/* patch jump */}
+        jit_movi(JIT_R0, 1);                    @rem{/* R0 = 1 */}
+        jit_retr(JIT_R0);
+
+  @rem{/* call the generated code@comma{} passing 32 as an argument */}
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 32, fib(32));
+  finish_jit();
+  return 0;
+@}
+@end example
+
+As said above, this is the first example of dynamically compiling
+branches.  Branch instructions have two operands containing the
+values to be compared, and return a @code{jit_note_t *} object
+to be patched.
+
+Because labels final address are only known after calling @code{emit},
+it is required to call @code{patch} or @code{patch_at}, what does
+tell @lightning{} that the target to patch is actually a pointer to
+a @code{jit_node_t *} object, otherwise, it would assume that is
+a pointer to a C function. Note that conditional branches do not
+receive a label argument, so they must be patched.
+
+You need to call @code{patch_at} on the return of value @code{calli},
+@code{finishi}, @code{jmpi} and @code{calli} if it is actually
+referencing a label in the jit code. All other branch instructions
+do not receive a label argument. Note that @code{movi} is an special
+case, and patching it is usually done to get the final address of
+a label, usually to later call @code{jmpr}.
+
+Now, here is the iterative version:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pifi       fib;
+  jit_node_t *in;               @rem{/* offset of the argument */}
+  jit_node_t *ref;              @rem{/* to patch the forward reference */}
+  jit_node_t *jump;             @rem{/* jump to start of loop */}
+  jit_node_t *loop;             @rem{/* start of the loop */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R2, in);              @rem{/* R2 = n */}
+        jit_movi     (JIT_R1, 1);
+  ref = jit_blti     (JIT_R2, 2);
+        jit_subi     (JIT_R2, JIT_R2, 1);
+        jit_movi     (JIT_R0, 1);
+
+  loop= jit_label();
+        jit_subi     (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
+        jit_addr     (JIT_V0, JIT_R0, JIT_R1);  @rem{/* V0 = R0 + R1 */}
+        jit_movr     (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
+        jit_addi     (JIT_R1, JIT_V0, 1);       @rem{/* R1 = V0 + 1 */}
+  jump= jit_bnei     (JIT_R2, 0);               @rem{/* if (R2) goto loop; */}
+  jit_patch_at(jump, label);
+
+  jit_patch(ref);                               @rem{/* patch forward jump */}
+        jit_movr     (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
+        jit_retr     (JIT_R0);
+
+  @rem{/* call the generated code@comma{} passing 36 as an argument */}
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 36, fib(36));
+  finish_jit();
+  return 0;
+@}
+@end example
+
+This code calculates the recurrence relation using iteration (a
+@code{for} loop in high-level languages).  There are no function
+calls anymore: instead, there is a backward jump (the @code{bnei} at
+the end of the loop).
+
+Note that the program must remember the address for backward jumps;
+for forward jumps it is only required to remember the jump code,
+and call @code{patch} for the implicit label.
+
+@node Reentrancy
+@chapter Re-entrant usage of @lightning{}
+
+@lightning{} uses the special @code{_jit} identifier. To be able
+to be able to use multiple jit generation states at the same
+time, it is required to used code similar to:
+
+@example
+    struct jit_state lightning;
+    #define _jit lightning
+@end example
+
+This will cause the symbol defined to @code{_jit} to be passed as
+the first argument to the underlying @lightning{} implementation,
+that is usually a function with an @code{_} (underscode) prefix
+and with an argument named @code{_jit}, in the pattern:
+
+@example
+	static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t);
+	#define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v);
+@end example
+
+The reason for this is to use the same syntax as the initial lightning
+implementation and to avoid needing the user to keep adding an extra
+argument to every call, as multiple jit states generating code in
+paralell should be very uncommon.
+
+@section Registers
+@chapter Accessing the whole register file
+
+As mentioned earlier in this chapter, all @lightning{} back-ends are
+guaranteed to have at least six general-purpose integer registers and
+six floating-point registers, but many back-ends will have more.
+
+To access the entire register files, you can use the
+@code{JIT_R}, @code{JIT_V} and @code{JIT_F} macros.  They
+accept a parameter that identifies the register number, which
+must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
+and @code{JIT_F_NUM} respectively; the number need not be
+constant.  Of course, expressions like @code{JIT_R0} and
+@code{JIT_R(0)} denote the same register, and likewise for
+integer callee-saved, or floating-point, registers.
 
 @node Acknowledgements
 @chapter Acknowledgements
@@ -298,9 +1193,3 @@ yet very interesting.
 I also thank Steve Byrne for writing GNU Smalltalk, since @lightning{}
 was first developed as a tool to be used in GNU Smalltalk's dynamic
 translator from bytecodes to native code.
-
-@iftex
-@contents
-@end iftex
-
-@bye
diff --git a/doc/ifib.c b/doc/ifib.c
new file mode 100644
index 000000000..7e098cba4
--- /dev/null
+++ b/doc/ifib.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pifi       fib;
+  jit_node_t *in;               /* offset of the argument */
+  jit_node_t *ref;              /* to patch the forward reference */
+  jit_node_t *jump;             /* jump to start of loop */
+  jit_node_t *loop;             /* start of the loop */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R2, in);              /* R2 = n */
+        jit_movi     (JIT_R1, 1);
+  ref = jit_blti     (JIT_R2, 2);
+        jit_subi     (JIT_R2, JIT_R2, 1);
+        jit_movi     (JIT_R0, 1);
+
+  loop= jit_label();
+        jit_subi     (JIT_R2, JIT_R2, 1);       /* decr. counter */
+        jit_addr     (JIT_V0, JIT_R0, JIT_R1);  /* V0 = R0 + R1 */
+        jit_movr     (JIT_R0, JIT_R1);          /* R0 = R1 */
+        jit_addi     (JIT_R1, JIT_V0, 1);       /* R1 = V0 + 1 */
+  jump= jit_bnei     (JIT_R2, 0);               /* if (R2) goto loop; */
+  jit_patch_at(jump, loop);
+
+  jit_patch(ref);                               /* patch forward jump */
+        jit_movr     (JIT_R0, JIT_R1);          /* R0 = R1 */
+        jit_retr     (JIT_R0);
+
+  /* call the generated code, passing 36 as an argument */
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 36, fib(36));
+  finish_jit();
+  return 0;
+}
diff --git a/doc/incr.c b/doc/incr.c
new file mode 100644
index 000000000..5d5e438d0
--- /dev/null
+++ b/doc/incr.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);    /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  jit_node_t  *in;
+  pifi         incr;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  jit_prolog();                    /* @t{     prolog             } */
+  in = jit_arg();                  /* @t{     in = arg           } */
+  jit_getarg(JIT_R0, in);          /* @t{     getarg R0          } */
+  jit_addi(JIT_R0, JIT_R0, 1);     /* @t{     addi   R0\, R0\, 1 } */
+  jit_retr(JIT_R0);                /* @t{     retr   R0          } */
+
+  incr = jit_emit();
+
+  /* call the generated code\, passing 5 as an argument */
+  printf("%d + 1 = %d\n", 5, incr(5));
+
+  finish_jit();
+  return 0;
+}
diff --git a/doc/lightning.texi b/doc/lightning.texi
index a336a3db0..c7d8f98f1 100644
--- a/doc/lightning.texi
+++ b/doc/lightning.texi
@@ -3,11 +3,8 @@
 
 @setfilename lightning.info
 
-@set TITLE       Using and porting @sc{gnu} @i{lightning}
-@set TOPIC       installing, using and porting
-@set BOTH
-@set USING
-@set PORTING
+@set TITLE       Using @sc{gnu} @i{lightning}
+@set TOPIC       installing and using
 
 @settitle @value{TITLE}
 
@@ -63,29 +60,9 @@
 @c References to the other half of the manual
 @c ---------------------------------------------------------------------
 
-@ifset USING
 @macro usingref{node, name}
 @ref{\node\, , \name\}
 @end macro
-@end ifset
-
-@ifclear USING
-@macro usingref{node, name}
-@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@ifset PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
 
 @c ---------------------------------------------------------------------
 @c End of macro section
@@ -98,3 +75,4 @@
 
 @c ***********************************************************************
 
+@bye
diff --git a/doc/p-lightning.texi b/doc/p-lightning.texi
deleted file mode 100644
index 98a9b63fb..000000000
--- a/doc/p-lightning.texi
+++ /dev/null
@@ -1,100 +0,0 @@
-\input texinfo.tex  @c -*- texinfo -*-
-@c %**start of header (This is for running Texinfo on a region.)
-
-@setfilename lightning.info
-
-@set TITLE       Using @sc{gnu} @i{lightning}
-@set TOPIC       installing and using
-@clear BOTH
-@clear USING
-@set PORTING
-
-@settitle @value{TITLE}
-
-@c ---------------------------------------------------------------------
-@c Common macros
-@c ---------------------------------------------------------------------
-
-@macro bulletize{a}
-@item
-\a\
-@end macro
-
-@macro rem{a}
-@r{@i{\a\}}
-@end macro
-
-@macro gnu{}
-@sc{gnu}
-@end macro
-
-@macro lightning{}
-@gnu{} @i{lightning}
-@end macro
-
-@c ---------------------------------------------------------------------
-@c Macros for Texinfo 3.1/4.0 compatibility
-@c ---------------------------------------------------------------------
-
-@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
-@c compatibility
-@macro hlink{url, link}
-\link\ (\url\)
-@end macro
-
-@c ifhtml can only be true in Texinfo 4.0, which has uref
-@ifhtml
-@unmacro hlink
-
-@macro hlink{url, link}
-@uref{\url\, \link\}
-@end macro
-
-@macro email{mail}
-@uref{mailto:\mail\, , \mail\}
-@end macro
-
-@macro url{url}
-@uref{\url\}
-@end macro
-@end ifhtml
-
-@c ---------------------------------------------------------------------
-@c References to the other half of the manual
-@c ---------------------------------------------------------------------
-
-@ifset USING
-@macro usingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear USING
-@macro usingref{node, name}
-@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@ifset PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@c ---------------------------------------------------------------------
-@c End of macro section
-@c ---------------------------------------------------------------------
-
-@include version.texi
-@include body.texi
-
-@c %**end of header (This is for running Texinfo on a region.)
-
-@c ***********************************************************************
-
diff --git a/doc/porting.texi b/doc/porting.texi
deleted file mode 100644
index 66cd15118..000000000
--- a/doc/porting.texi
+++ /dev/null
@@ -1,1600 +0,0 @@
-@node Structure of a port
-@chapter An overview of the porting process
-
-A particular port of @lightning{} is composed of four files. These
-have a common suffix which identifies the port (for example,
-@code{i386} or @code{ppc}), and a prefix that identifies their
-function; they are:
-
-@itemize @bullet
-@item
-@file{asm-@var{suffix}.h}, which contains the description of the
-target machine's instruction format.  The creation of this file
-is discussed in @ref{Run-time assemblers, , Creating the run-time
-assembler}.
-
-@item
-@file{core-@var{suffix}.h}, which contains the mappings from
-@lightning{}'s instruction set to the target machine's assembly
-language format.  The creation of this file is discussed in
-@ref{Standard macros, , Creating the platform-independent layer}.
-
-@item
-@file{funcs-@var{suffix}.h}, for now, only contains the definition
-of @code{jit_flush_code}. The creation of this file is briefly
-discussed in @ref{Standard functions, , More complex tasks in
-the platform-independent layer}.
-
-@item
-@file{fp-@var{suffix}.h}, which contains the description of the
-target machine's instruction format and the internal macros for doing
-floating point computation. The creation of this file is discussed
-in @ref{Floating-point macros, , Implementing macros for floating
-point}.
-@end itemize
-
-Before doing anything, you have to add the ability to recognize the
-new port during the configuration process.  This is explained in
-@ref{Adjusting configure, , Automatically recognizing the new platform}.
-
-@node Adjusting configure
-@chapter Automatically recognizing the new platform
-
-Before starting your port, you have to add the ability to recognize the
-new port during the configure process.  You only have to run
-@file{config.guess}, which you'll find in the main distribution
-directory, and note down the first part of the output (up to the first
-dash).
-
-Then, in the two files @file{configure.in} and @file{lightning.m4},
-lookup the line
-@example
-    case "$host_cpu" in
-@end example
-
-@noindent
-and, right after it, add the line:
-@example
-    @var{cpu-name})  cpu=@var{file-suffix}           ;;
-@end example
-
-@noindent
-where @var{cpu-name} is the cpu as output by @file{config.guess}, and
-@var{file-suffix} is the suffix that you are going to use for your files
-(@pxref{Structure of a port, , An overview of the porting process}).
-
-Now create empty files for your new port:
-@example
-    touch lightning/asm-xxx.h
-    touch lightning/fp-xxx.h
-    touch lightning/core-xxx.h
-    touch lightning/funcs-xxx.h
-@end example
-
-@noindent
-and run @file{configure}, which should create the symlinks that are
-needed by @code{lightning.h}.  This is important because it will allow
-you to use @lightning{} (albeit in a limited way) for testing even
-before the port is completed.
-
-@node Run-time assemblers
-@chapter Creating the run-time assembler
-
-The run-time assembler is a set of macros whose purpose is to assemble
-instructions for the target machine's assembly language, translating
-mnemonics to machine language together with their operands.  While a
-run-time assembler is not, strictly speaking, part of @lightning{}
-(it is a private layer to be used while implementing the standard
-macros that are ultimately used by clients), designing a run-time
-assembler first allows you to think in terms of assembly language
-rather than binary code (ouch!@dots{}), making it considerably easier
-to write the standard macros.
-
-Creating a run-time assembler is a tedious process rather than a
-difficult one, because most of the time will be spent collecting and
-copying information from the architecture's manual.
-
-Macros defined by a run-time assembler are conventionally named after
-the mnemonic and the type of its operands.  Examples took from the
-SPARC's run-time assembler are @code{ADDrrr}, a macro that assembles
-an @code{ADD} instruction with three register operands, and
-@code{SUBCCrir}, which assembles a @code{SUBCC} instruction whose second
-operand is an immediate and the remaining two are registers.
-
-The first step in creating the assembler is to pick a convention for
-operand specifiers (@code{r} and @code{i} in the example above) and for
-register names.  On the SPARC, this convention is as follows
-
-@table @code
-@item @b{r}
-A register name.  For every @code{r} in the macro name, a numeric
-parameter @code{RR} is passed to the macro, and the operand is assembled
-as @code{%r@var{RR}}.
-
-@item @b{i}
-An immediate, usually a 13-bit signed integer (with exception for
-instructions such as @code{SETHI} and branches).  The macros check
-the size of the passed parameter if @lightning{} is configured with
-@code{--enable-assertions}.
-
-@item @b{x}
-A combination of two @code{r} parameters, which are summed to determine
-the effective address in a memory load/store operation.
-
-@item @b{m}
-A combination of an @code{r} and @code{i} parameter, which are summed to
-determine the effective address in a memory load/store operation.
-@end table
-
-Additional macros can be defined that provide easier access to register
-names.  For example, on the SPARC, @code{_Ro(3)} and @code{_Rg(5)} map
-respectively to @code{%o3} and @code{%g5}; on the x86, instead, symbolic
-representations of the register names are provided (for example,
-@code{_EAX} and @code{_EBX}).
-
-CISC architectures sometimes have registers of different sizes--this is
-the case on the x86 where @code{%ax} is a 16-bit register while
-@code{%esp} is a 32-bit one.  In this case, it can be useful to embed
-information on the size in the definition of register names.  The x86
-machine language, for example, represents all three of @code{%bh},
-@code{%di} and @code{%edi} as 7; but the x86 run-time assemblers defines
-them with different numbers, putting the register's size in the upper
-nybble (for example, @samp{17h} for @code{%bh} and @samp{27h} for
-@code{%di}) so that consistency checks can be made on the operands'
-sizes when @code{--enable-assertions} is used.
-
-The next important part defines the native architecture's instruction
-formats.  These can be as few as ten on RISC architectures, and as many
-as fifty on CISC architectures.  In the latter case it can be useful
-to define more macros for sub-formats (such as macros for different
-addressing modes) or even for sub-fields in an instruction.  Let's see
-an example of these macros.
-
-@example
-#define _2i( OP, RD, OP2, IMM)
-        _I((_u2 (OP )<<30)  |  (_u5(RD)<<25)  |  (_u3(OP2)<<22)  |
-            _u22(IMM)                                            )
-@end example
-
-The name of the macro, @code{_2i}, indicates a two-operand instruction
-comprising an immediate operand.  The instruction format is:
-
-@example
- .------.---------.------.-------------------------------------------.
- |  OP  |   RD    | OP2  |               IMM                         |
- |------+---------+------+-------------------------------------------|
- |2 bits|  5 bits |3 bits|             22 bits                       |
- |31-30 |  29-25  | 22-24|              0-21                         |
- '------'---------'------'-------------------------------------------'
-@end example
-
-@lightning{} provides macros named @code{_sXX(OP)} and @code{_uXX(OP)},
-where XX is a number between 1 and 31, which test@footnote{Only when
-@code{--enable-assertions} is used.} whether @code{OP} can be
-represented as (respectively) a signed or unsigned integer of the
-given size.  What the macro above does, then, is to shift and @sc{or}
-together the different fields, ensuring that each of them fits the field.
-
-Here is another definition, this time for the PowerPC architecture.
-
-@example
-#define _X(OP,RD,RA,RB,XO,RC)
-        _I((_u6 (OP)<<26)  |  (_u5(RD)<<21)  |  (_u5(RA)<<16)  |
-           ( _u5(RB)<<11)  |  (_u10(XO)<<1)  |   _u1(RC)       )
-@end example
-
-Here is the bit layout corresponding to this instruction format:
-
-@example
- .--------.--------.--------.--------.---------------------.-------.
- |    OP  |   RD   |   RA   |   RB   |           X0        |   RC  |
- |--------+--------+--------+--------+-----------------------------|
- | 6 bits | 5 bits | 5 bits | 5 bits |         10 bits     | 1 bit |
- | 31-26  | 25-21  | 16-20  | 11-15  |         1-10        |   0   |
- '--------'---------'-------'--------'-----------------------------'
-@end example
-
-How do these macros actually generate code? The secret lies in the
-@code{_I} macro, which is one of four predefined macros which actually
-store machine language instructions in memory.  They are @code{_B},
-@code{_W}, @code{_I} and @code{_L}, respectively for 8-bit, 16-bit,
-32-bit, and @code{long} (either 32-bit or 64-bit, depending on the
-architecture) values.
-
-Next comes another set of macros (usually the biggest) which represents
-the actual mnemonics---macros such as @code{ADDrrr} and @code{SUBCCrir},
-which were cited earlier in this chapter, belong to this set.  Most of
-the times, all these macros will do is to use the ``instruction format''
-macros, specifying the values of the fields in the different instruction
-formats.  Let's see a few of these definitions, again taken from the
-SPARC assembler:
-
-@example
-#define BAi(DISP)                       _2   (0, 0,  8, 2, DISP)
-#define BA_Ai(DISP)                     _2   (0, 1,  8, 2, DISP)
-
-#define SETHIir(IMM, RD)                _2i  (0, RD, 4, IMM)
-
-#define ADDrrr(RS1, RS2, RD)            _3   (2, RD,  0, RS1, 0, 0, RS2)
-#define ADDrir(RS1, IMM, RD)            _3i  (2, RD,  0, RS1, 1,    IMM)
-#define ADDCCrrr(RS1, RS2, RD)          _3   (2, RD, 16, RS1, 0, 0, RS2)
-#define ADDCCrir(RS1, IMM, RD)          _3i  (2, RD, 16, RS1, 1,    IMM)
-#define ANDrrr(RS1, RS2, RD)            _3   (2, RD,  1, RS1, 0, 0, RS2)
-#define ANDrir(RS1, IMM, RD)            _3i  (2, RD,  1, RS1, 1,    IMM)
-#define ANDCCrrr(RS1, RS2, RD)          _3   (2, RD, 17, RS1, 0, 0, RS2)
-#define ANDCCrir(RS1, IMM, RD)          _3i  (2, RD, 17, RS1, 1,    IMM)
-@end example
-
-A few things have to be noted.  For example:
-@itemize @bullet
-@item
-The SPARC assembly language sometimes uses a comma inside a mnemonic
-(for example, @code{ba,a}).  This symbol is not allowed inside a
-@sc{cpp} macro name, so it is replaced with an underscore; the same
-is done with the dots found in the PowerPC assembly language (for
-example, @code{andi.} is defined as @code{ANDI_rri}).
-
-@item
-It can be useful to group together instructions with the same
-instruction format, as doing this tends to make the source code
-more readable (numbers are put in the same columns).
-
-@item
-Using an editor without automatic wrap at end of line can be useful,
-since run-time assemblers tend to have very long lines.
-@end itemize
-
-A final touch is to define the synthetic instructions, which are
-usually found on RISC machines.  For example, on the SPARC, the
-@code{LD} instruction has two synonyms (@code{LDUW} and @code{LDSW})
-which are defined thus:
-
-@example
-#define LDUWxr(RS1, RS2, RD)            LDxr(RS1, RS2, RD)
-#define LDUWmr(RS1, IMM, RD)            LDmr(RS1, IMM, RD)
-#define LDSWxr(RS1, RS2, RD)            LDxr(RS1, RS2, RD)
-#define LDSWmr(RS1, IMM, RD)            LDmr(RS1, IMM, RD)
-@end example
-
-Other common case are instructions which take advantage of registers
-whose value is hard-wired to zero, and short-cut instructions which
-hard-code some or all of the operands:
-
-@example
-@rem{/* Destination is %g0\, which the processor never overwrites. */}
-#define CMPrr(R1, R2)   SUBCCrrr(R1, R2, 0) @rem{/* subcc %r1\, %r2\, %g0 */}
-
-@rem{/* One of the source registers is hard-coded to be %g0. */}
-#define NEGrr(R,S)      SUBrrr(0, R, S)     @rem{/* sub %g0\, %rR\, %rS */}
-
-@rem{/* All of the operands are hard-coded. */}
-#define RET()           JMPLmr(31,8 ,0)     @rem{/* jmpl [%r31+8]\, %g0  */}
-
-@rem{/* One of the operands acts as both source and destination */}
-#define BSETrr(R,S)     ORrrr(R, S, S)      @rem{/* or %rR\, %rS\, %rS */}
-@end example
-
-Specific to RISC computers, finally, is the instruction to load an
-arbitrarily sized immediate into a register.  This instruction is
-usually implemented as one or two basic instructions:
-
-@enumerate
-@item
-If the number is small enough, an instruction is sufficient
-(@code{LI} or @code{ORI} on the PowerPC, @code{MOV} on the SPARC).
-
-@item
-If the lowest bits are all zeroed, an instruction is sufficient
-(@code{LIS} on the PowerPC, @code{SETHI} on the SPARC).
-
-@item
-Otherwise, the high bits are set first (with @code{LIS} or
-@code{SETHI}), and the result is then @sc{or}ed with the low
-bits
-@end enumerate
-
-Here is the definition of such an instruction for the PowerPC:
-
-@example
-#define MOVEIri(R,I)      (_siP(16,I) ? LIri(R,I) :     \ @rem{/* case 1    */}
-                          (_uiP(16,I) ? ORIrri(R,0,I) : \ @rem{/* case 1    */}
-                          _MOVEIri(R, _HI(I), _LO(I)) ))  @rem{/* case 2/3  */}
-
-#define _MOVEIri(H,L,R)  (LISri(R,H), (L ? ORIrri(R,R,L) : 0))
-@end example
-
-@noindent
-and for the SPARC:
-
-@example
-#define SETir(I,R)      (_siP(13,I) ? MOVir(I,R) : \
-			 _SETir(_HI(I), _LO(I), R))
-
-#define _SETir(H,L,R)   (SETHIir(H,R), (L ? ORrir(R,L,R) : 0))
-@end example
-
-In both cases, @code{_HI} and @code{_LO} are macros for internal use
-that extract different parts of the immediate operand.
-
-You should take a look at the run-time assemblers distributed with
-@lightning{} before trying to craft your own.  In particular, make
-sure you understand the RISC run-time assemblers (the SPARC's is
-the simplest) before trying to decypher the x86 run-time assembler,
-which is significantly more complex.
-
-
-@node Standard macros
-@chapter Creating the platform-independent layer
-
-The platform-independent layer is the one that is ultimately used
-by @lightning{} clients.  Creating this layer is a matter of creating
-a hundred or so macros that comprise part of the interface used by
-the clients, as described in
-@usingref{The instruction set, @lightning{}'s instruction set}.
-
-Fortunately, a number of these definitions are common to the different
-platforms and are defined just once in one of the header files that
-make up @lightning{}, that is, @file{core-common.h}.
-
-Most of the macros are relatively straight-forward to implement (with
-a few caveats for architectures whose assembly language only offers
-two-operand arithmetic instructions).  This section will cover the
-tricky points, before presenting the complete listing of the macros
-that make up the platform-independent interface provided by
-@lightning{}.
-
-@menu
-@standardmacrosmenu{}
-@end menu
-
-@node Forward references
-@section Implementing forward references
-
-Implementation of forward references takes place in:
-
-@itemize @bullet
-@item
-The branch macros
-
-@item
-The @code{jit_patch_at} macros
-@end itemize
-
-Roughly speaking, the branch macros, as seen in @usingref{GNU lightning
-macros, Generating code at run-time}, return a value that later calls
-to @code{jit_patch} or @code{jit_patch_at} use to complete the assembly
-of the forward reference.  This value is usually the contents of the
-program counter after the branch instruction is compiled (which is
-accessible in the @code{_jit.pc} variable).  Let's see an example from
-the x86 back-end:
-
-@example
-#define jit_bmsr_i(label, s1, s2)                            \
-   (TESTLrr((s1), (s2)), JNZm(label,0,0,0), _jit.pc)
-@end example
-
-The @code{bms} (@dfn{branch if mask set}) instruction is assembled as
-the combination of a @code{TEST} instruction (bit-wise @sc{and} between
-the two operands) and a @code{JNZ} instruction (jump if non-zero).  The
-macro then returns the final value of the program counter.
-
-@code{jit_patch_at} is one of the few macros that need to possess a
-knowledge of the machine's instruction formats.  Its purpose is to
-patch a branch instruction (identified by the value returned at the
-moment the branch was compiled) to jump to the current position (that
-is, to the address identified by @code{_jit.pc}).
-
-On the x86, the displacement between the jump and the landing point is
-expressed as a 32-bit signed integer lying in the last four bytes of the
-jump instruction.  The definition of @code{_jit_patch_at} is:
-
-@example
-#define jit_patch(jump_pc, pv)    (*_PSL((jump_pc) - 4) = \
-				   (pv) - (jump_pc))
-@end example
-
-The @code{_PSL} macro is nothing more than a cast to @code{long *},
-and is used here to shorten the definition and avoid cluttering it with
-excessive parentheses.  These type-cast macros are:
-
-@itemize @bullet
-@item
-@code{_PUC(X)} to cast to a @code{unsigned char *}.
-
-@item
-@code{_PUS(X)} to cast to a @code{unsigned short *}.
-
-@item
-@code{_PUI(X)} to cast to a @code{unsigned int *}.
-
-@item
-@code{_PSL(X)} to cast to a @code{long *}.
-
-@item
-@code{_PUL(X)} to cast to a @code{unsigned long *}.
-@end itemize
-
-On other platforms, notably RISC ones, the displacement is embedded into
-the instruction itself.  In this case, @code{jit_patch_at} must first zero
-out the field, and then @sc{or} in the correct displacement.  The SPARC,
-for example, encodes the displacement in the bottom 22 bits; in addition
-the right-most two bits are suppressed, which are always zero because
-instruction have to be word-aligned.
-
-@example
-#define jit_patch_at(delay_pc, pv)   jit_patch_ (((delay_pc) - 1), (pv))
-
-@rem{/* branch instructions return the address of the @emph{delay}
- * instruction---this is just a helper macro that makes the code more
- * readable.
- */}
-#define jit_patch_(jump_pc, pv)   (*jump_pc =		    \
-	 (*jump_pc & ~_MASK(22)) |			    \
-         ((_UL(pv) - _UL(jump_pc)) >> 2) & _MASK(22))
-@end example
-
-This introduces more predefined shortcut macros:
-@itemize @bullet
-@item
-@code{_UC(X)} to cast to a @code{unsigned char}.
-
-@item
-@code{_US(X)} to cast to a @code{unsigned short}.
-
-@item
-@code{_UI(X)} to cast to a @code{unsigned int}.
-
-@item
-@code{_SL(X)} to cast to a @code{long}.
-
-@item
-@code{_UL(X)} to cast to a @code{unsigned long}.
-
-@item
-@code{_MASK(N)} gives a binary number made of N ones.
-@end itemize
-
-Dual to branches and @code{jit_patch_at} are @code{jit_movi_p}
-and @code{jit_patch_movi}, since they can also be used to implement
-forward references.  @code{jit_movi_p} should be carefully implemented
-to use an encoding that is as long as possible, and it should return
-an address which is then passed to @code{jit_patch_movi}.  The
-implementation of @code{jit_patch_movi} is similar to
-@code{jit_patch_at}.
-
-@node Common features
-@section Common features supported by @file{core-common.h}
-
-The @file{core-common.h} file contains hundreds of macro definitions
-which will spare you defining a lot of things in the files the are
-specific to your port.  Here is a list of the features that 
-@file{core-common.h} provides.
-
-@table @b
-@item Support for common synthetic instructions
-These are instructions that can be represented as a simple operation,
-for example a bit-wise @sc{and} or a subtraction.  @file{core-common.h}
-recognizes when the port-specific header file defines these macros and
-avoids compiler warnings about redefined macros, but there should be
-no need to define them.  They are:
-@example
-#define jit_extr_c_ui(d, rs)
-#define jit_extr_s_ui(d, rs)
-#define jit_extr_c_ul(d, rs)
-#define jit_extr_s_ul(d, rs)
-#define jit_extr_i_ul(d, rs)
-#define jit_negr_i(d, rs)
-#define jit_negr_l(d, rs)
-@end example
-
-@item Support for the @sc{abi}
-All of @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not
-mandatory.  If not defined, they will be defined respectively as an
-empty macro, as a synonym for @code{jit_prolog}, and as a synonym for
-@code{jit_calli}.  Whether to define them or not in the port-specific
-header file, it depends on the underlying architecture's @sc{abi}---in
-general, however, you'll need to define at least @code{jit_prolog}.
-
-@item Support for uncommon instructions
-These are instructions that many widespread architectures lack.
-@file{core-common.h} is able to provide default definitions, but they
-are usually inefficient if the hardware provides a way to do these
-operations with a single instruction.  They are extension with sign
-and ``reverse subtraction'' (that is, REG2@math{=}IMM@math{-}REG1):
-@example
-#define jit_extr_c_i(d, rs)
-#define jit_extr_s_i(d, rs)
-#define jit_extr_c_l(d, rs)
-#define jit_extr_s_l(d, rs)
-#define jit_extr_i_l(d, rs)
-#define jit_rsbi_i(d, rs, is)
-#define jit_rsbi_l(d, rs, is)
-#define jit_rsbi_p(d, rs, is)
-@end example
-
-@item Conversion between network and host byte ordering
-These macros are no-ops on big endian systems.  Don't define them on
-such systems; on the other hand, they are mandatory on little endian
-systems.  They are:
-@example
-#define jit_ntoh_ui(d, rs)
-#define jit_ntoh_us(d, rs)
-@end example
-
-@item Support for a ``zero'' register
-Many RISC architectures provide a read-only register whose value is
-hard-coded to be zero; this register is then used implicitly when
-referring to a memory location using a single register.  For example,
-on the SPARC, an operand like @code{[%l6]} is actually assembled as
-@code{[%l6+%g0]}.  If this is the case, you should define
-@code{JIT_RZERO} to be the number of this register; @file{core-common.h}
-will use it to implement all variations of the @code{ld} and @code{st}
-instructions.  For example:
-@example
-#define jit_ldi_c(d, is)         jit_ldxi_c(d, JIT_RZERO, is)
-#define jit_ldr_i(d, rs)         jit_ldxr_c(d, JIT_RZERO, rs)
-@end example
-
-If available, JIT_RZERO is also used to provide more efficient
-definitions of the @code{neg} instruction (see ``Support for common
-synthetic instructions'', above).
-
-@item Synonyms
-@file{core-common.h} provides a lot of trivial definitions which make
-the instruction set as orthogonal as possible.  For example, adding two
-unsigned integers is exactly the same as adding two signed integers
-(assuming a two's complement representation of negative numbers); yet,
-@lightning{} provides both @code{jit_addr_i} and @code{jit_addr_ui}
-macros.  Similarly, pointers and unsigned long integers behave in the
-same way, but @lightning{} has separate instruction for the two data
-types---those that operate on pointers usually include a typecast
-that makes programs clearer.
-
-@item Shortcuts
-These define ``synthetic'' instructions whose definition is not as
-trivial as in the case of synonyms, but is anyway standard.  This
-is the case for bitwise @sc{not} (which is implemented by XORing a
-string of ones), ``reverse subtraction'' between registers (which is
-converted to a normal subtraction with the two source operands
-inverted), and subtraction of an immediate from a register (which is
-converted to an addition).  Unlike @code{neg} and @code{ext} (see
-``Support for common synthetic instructions'', above), which are
-simply non-mandatory, you must not define these functions.
-
-@item Support for @code{long}s
-On most systems, @code{long}s and @code{unsigned long}s are the same
-as, respectively, @code{int}s and @code{unsigned int}s.  In this case,
-@file{core-common.h} defines operations on these types to be synonyms.
-
-@item @code{jit_state}
-Last but not least, @file{core-common.h} defines the @code{jit_state}
-type.  Part of this @code{struct} is machine-dependent and includes
-all kinds of state needed by the back-end; this part is always
-accessible in a re-entrant way as @code{_jitl}.  @code{_jitl} will be
-of type @code{struct jit_local_state}; this struct must be defined
-even if no state is required.
-
-@end table
-
-@node Delay slots
-@section Supporting scheduling of delay slots
-
-Delay slot scheduling is obtained by clients through the
-@code{jit_delay} macro.  However this macro is not to be defined
-in the platform-independent layer, because @lightning{} provides
-a common definition in @file{core-common.h}.
-
-Instead, the platform-independent layer must define another macro,
-called @code{jit_fill_delay_after}, which has to exchange the
-instruction to be scheduled in the delay slot with the branch
-instruction.  The only parameter accepted by the macro is a call
-to a branch macro, which must be expanded @strong{exactly once} by
-@code{jit_fill_delay_after}.  The client must be able to pass the
-return value of @code{jit_fill_delay_after} to @code{jit_patch_at}.
-
-There are two possible approaches that can be used in
-@code{jit_fill_delay_after}.  They are summarized in the following
-pictures:
-
-@itemize @bullet
-@item
-The branch instructions assemble a @sc{nop} instruction which is
-then removed by @code{jit_fill_delay_after}.
-
-@example
-     before                         |   after
-   ---------------------------------+-----------------------------
-     ...                            |
-     <would-be delay instruction>   |    <branch instruction>
-     <branch instruction>           |    <delay instruction>
-     NOP                            |           <--- _jit.pc
-              <--- _jit.pc          |
-@end example
-
-@item
-The branch instruction assembles the branch so that the delay
-slot is annulled, @code{jit_fill_delay_after} toggles the bit:
-
-@example
-     before                         |   after
-   ---------------------------------+-----------------------------
-     ...                            |
-     <would-be delay instruction>   |    <branch instruction>
-     <branch with annulled delay>   |    <delay instruction>
-              <--- _jit.pc          |           <--- _jit.pc
-@end example
-@end itemize
-
-Don't forget that you can take advantage of delay slots in the
-implementation of boolean instructions such as @code{le} or @code{gt}.
-
-@node Immediate values
-@section Supporting arbitrarily sized immediate values
-
-This is a problem that is endemic to RISC machines.  The basic idea
-is to reserve one or two register to represent large immediate values.
-Let's see an example from the SPARC:
-
-@example
-   addi_i R0, V2, 45         |  addi_i R0, V2, 10000
-  ---------------------------+---------------------------
-   add    %l5, 45, %l0       |  set    10000, %l6
-                             |  add    %l5, %l6, %l0
-@end example
-
-In this case, @code{%l6} is reserved to be used for large immediates.
-An elegant solution is to use an internal macro which automatically
-decides which version is to be compiled.
-
-Beware of register conflicts on machines with delay slots.  This is
-the case for the SPARC, where @code{%l7} is used instead for large
-immediates in compare-and-branch instructions.  So the sequence
-
-@example
-   jit_delay(
-      jit_addi_i(JIT_R0, JIT_V2, 10000),
-      jit_blei_i(label, JIT_R1, 20000)
-   );
-@end example
-
-@noindent
-is assembled this way:
-
-@example
-   set 10000, %l6       @rem{! prepare immediate for add}
-   set 20000, %l7       @rem{! prepare immediate for cmp}
-   cmp %l1, %l7
-   ble label
-   add %l5, %l6, %l0    @rem{! delay slot}
-	@end example
-
-Note that using @code{%l6} in the branch instruction would have given
-an incorrect result---@code{R0} would have been filled with the value of
-@code{V2+@i{20000}} rather than @code{V2+@i{10000}}.
-
-@node Implementing the ABI
-@section Implementing the ABI
-
-Implementing the underlying architecture's @sc{abi} is done in the
-macros that handle function prologs and epilogs and argument passing.
-
-Let's look at the prologs and epilogs first.  These are usually pretty
-simple and, what's more important, with constant content---that is,
-they always generate exactly the same instruction sequence.  Here is
-an example:
-
-@example
-          SPARC                        x86
-          save %sp, -96, %sp           push %ebp
-                                       push %ebx
-                                       push %esi
-                                       push %edi
-                                       movl %esp, %ebp
-          ...                          ...
-          ret                          popl %edi
-          restore                      popl %esi
-                                       popl %ebx
-                                       popl %ebp
-                                       ret
-@end example
-
-The registers that are saved (@code{%ebx}, @code{%esi}, @code{%edi}) are
-mapped to the @code{V0} through @code{V2} registers in the @lightning{}
-instruction set.
-
-Argument passing is more tricky.  There are basically three
-cases@footnote{For speed and ease of implementation, @lightning{} does not
-currently support passing some of the parameters on the stack and some
-in registers.}:
-@table @b
-@item Register windows
-Output registers are different from input registers---the prolog takes
-care of moving the caller's output registers to the callee's input
-registers.  This is the case with the SPARC.
-
-@item Passing parameters via registers
-In this case, output registers are the same as input registers.  The
-program must take care of saving input parameters somewhere (on the
-stack, or in non-argument registers).  This is the case with the
-PowerPC.
-
-@item All the parameters are passed on the stack
-This case is by far the simplest and is the most common in CISC
-architectures, like the x86 and Motorola 68000.
-@end table
-
-In all cases, the port-specific header file will define two variable
-for private use---one to be used by the caller during the
-@code{prepare}/@code{pusharg}/@code{finish} sequence, one to be used
-by the callee, specifically in the @code{jit_prolog} and @code{jit_arg}
-macros.
-
-Let's look again, this time with more detail, at each of the cases.
-
-@table @b
-@item Register windows
-@code{jit_finish} is the same as @code{jit_calli}, and is defined
-in @file{core-common.h} (@pxref{Common features, , Common features
-supported by @file{core-common.h}}).
-
-@example
-#define jit_prepare_i(numargs)  (_jitl.pusharg = _Ro(numargs))
-#define jit_pusharg_i(rs)       (--_jitl.pusharg,         \
-                                 MOVrr((rs), _jitl.pusharg))
-@end example
-
-Remember that arguments pushing takes place in reverse order, thus
-giving a pre-decrement (rather than post-increment) in
-@code{jit_pusharg_i}.
-
-Here is what happens on the callee's side:
-
-@example
-#define jit_arg_c()           (_jitl.getarg++)
-#define jit_getarg_c(rd, ofs) jit_extr_c_i  ((rd), (ofs))
-#define jit_prolog(numargs)   (SAVErir(JIT_SP, -96, JIT_SP), \
-                               _jitl.getarg = _Ri(0))
-@end example
-
-The @code{jit_arg} macros return nothing more than a register index,
-which is then used by the @code{jit_getarg} macros.  @code{jit_prolog}
-resets the counter used by @code{jit_arg} to zero; the @code{numargs}
-parameter is not used. It is sufficient for @code{jit_leaf} to be a
-synonym for @code{jit_prolog}.
-
-@item Passing parameter via registers
-The code is almost the same as that for the register windows case, but
-with an additional complexity---@code{jit_arg} will transfer the
-argument from the input register to a non-argument register so that
-function calls will not clobber it.  The prolog and epilog code can then
-become unbearably long, up to 20 instructions on the PPC; a common
-solution in this case is that of @dfn{trampolines}.
-
-The prolog does nothing more than put the function's actual address in a
-caller-preserved register and then call the trampoline:
-@example
-       mflr    r0                 @rem{! grab return address}
-       movei   r10, trampo_2args  @rem{! jump to trampoline}
-       mtlr    r10
-       blrl
-here:  mflr    r31                @rem{! r31 = address of epilog}
-       @rem{...actual code...}
-       mtlr    r31                @rem{! return to the trampoline}
-       blr
-@end example
-
-In this case, @code{jit_prolog} does use its argument containing the
-number of parameters to pick the appropriate trampoline. Here,
-@code{trampo_2args} is the address of a trampoline designed for
-2-argument functions.
-
-The trampoline executes the prolog code, jumps to the contents of
-@code{r10}, and upon return from the subroutine it executes the
-epilog code.
-
-@item All the parameters are passed on the stack
-@code{jit_pusharg} uses a hardware push operation, which is commonly
-available on CISC machines (where this approach is most likely
-followed).  Since the stack has to be cleaned up after the call,
-@code{jit_prepare_i} remembers how many parameters have been put there,
-and @code{jit_finish} adjusts the stack pointer after the call.
-
-@example
-#define jit_prepare_i(numargs) (_jitl.args += (numargs))
-#define jit_pusharg_i(rs)      PUSHLr(rs)
-#define jit_finish(sub)        (jit_calli((sub)),              \
-                               ADDLir(4 * _jitl.args, JIT_SP), \
-                               _jitl.numargs = 0)
-@end example
-
-Note the usage of @code{+=} in @code{jit_prepare_i}.  This is done
-so that one can defer the popping of the arguments that were saved
-on the stack (@dfn{stack pollution}).  To do so, it is sufficient to
-use @code{jit_calli} instead of @code{jit_finish} in all but the
-last call.
-
-On the caller's side, @code{arg} returns an offset relative to the
-frame pointer, and @code{getarg} loads the argument from the stack:
-
-@example
-#define jit_getarg_c(rd, ofs) jit_ldxi_c((rd), _EBP, (ofs));
-#define jit_arg_c()           ((_jitl.frame += sizeof(int) \
-                                            -  sizeof(int))
-@end example
-
-The @code{_jitl.frame} variable is initialized by @code{jit_prolog}
-with the displacement between the value of the frame pointer
-(@code{%ebp}) and the address of the first parameter.
-@end table
-
-These schemes are the most used, so @file{core-common.h} provides a way
-to employ them automatically.  If you do not define the
-@code{jit_getarg_c} macro and its companions, @file{core-common.h} will
-presume that you intend to pass parameters through either the registers
-or the stack.
-
-If you define @code{JIT_AP}, stack-based parameter passing will be
-employed and the @code{jit_getarg} macros will be defined like this:
-
-@example
-#define jit_getarg_c(reg, ofs)  jit_ldxi_c((reg), JIT_AP, (ofs));
-@end example
-
-In other words, the @code{jit_arg} macros (which are still to be defined
-by the platform-specific back-end) shall return an offset into the stack
-frame.  On the other hand, if you don't define @code{JIT_AP},
-register-based parameter passing will be employed and the @code{jit_arg}
-macros shall return a register number; in this case, @code{jit_getarg}
-will be implemented in terms of @code{jit_extr} and @code{jit_movr}
-operations:
-
-@example
-#define jit_getarg_c(reg, ofs)		jit_extr_c_i  ((reg), (ofs))
-#define jit_getarg_i(reg, ofs)		jit_movr_i    ((reg), (ofs))
-@end example
-
-
-@node Macro list
-@section Macros composing the platform-independent layer
-
-@table @b
-@item Register names (all mandatory but the last three)
-@example
-#define JIT_R
-#define JIT_R_NUM
-#define JIT_V
-#define JIT_V_NUM
-#define JIT_FPR
-#define JIT_FPR_NUM
-#define JIT_FP
-#define JIT_SP
-#define JIT_AP
-#define JIT_RZERO
-@end example
-
-@item Helper macros (non-mandatory):
-@example
-#define jit_fill_delay_after(branch)
-@end example
-
-@item Mandatory:
-@example
-#define jit_allocai()
-#define jit_arg_c()
-#define jit_arg_i()
-#define jit_arg_l()
-#define jit_arg_p()
-#define jit_arg_s()
-#define jit_arg_uc()
-#define jit_arg_ui()
-#define jit_arg_ul()
-#define jit_arg_us()
-#define jit_abs_d(rd,rs)
-#define jit_addi_i(d, rs, is)
-#define jit_addr_d(rd,s1,s2)
-#define jit_addr_i(d, s1, s2)
-#define jit_addxi_i(d, rs, is)
-#define jit_addxr_i(d, s1, s2)
-#define jit_andi_i(d, rs, is)
-#define jit_andr_i(d, s1, s2)
-#define jit_beqi_i(label, rs, is)
-#define jit_beqr_d(label, s1, s2)
-#define jit_beqr_i(label, s1, s2)
-#define jit_bgei_i(label, rs, is)
-#define jit_bgei_ui(label, rs, is)
-#define jit_bger_d(label, s1, s2)
-#define jit_bger_i(label, s1, s2)
-#define jit_bger_ui(label, s1, s2)
-#define jit_bgti_i(label, rs, is)
-#define jit_bgti_ui(label, rs, is)
-#define jit_bgtr_d(label, s1, s2)
-#define jit_bgtr_i(label, s1, s2)
-#define jit_bgtr_ui(label, s1, s2)
-#define jit_blei_i(label, rs, is)
-#define jit_blei_ui(label, rs, is)
-#define jit_bler_d(label, s1, s2)
-#define jit_bler_i(label, s1, s2)
-#define jit_bler_ui(label, s1, s2)
-#define jit_bltgtr_d(label, s1, s2)
-#define jit_blti_i(label, rs, is)
-#define jit_blti_ui(label, rs, is)
-#define jit_bltr_d(label, s1, s2)
-#define jit_bltr_i(label, s1, s2)
-#define jit_bltr_ui(label, s1, s2)
-#define jit_bmci_i(label, rs, is)
-#define jit_bmcr_i(label, s1, s2)
-#define jit_bmsi_i(label, rs, is)
-#define jit_bmsr_i(label, s1, s2)
-#define jit_bnei_i(label, rs, is)
-#define jit_bner_d(label, s1, s2)
-#define jit_bner_i(label, s1, s2)
-#define jit_boaddi_i(label, rs, is)
-#define jit_boaddi_ui(label, rs, is)
-#define jit_boaddr_i(label, s1, s2)
-#define jit_boaddr_ui(label, s1, s2)
-#define jit_bordr_d(label, s1, s2)
-#define jit_bosubi_i(label, rs, is)
-#define jit_bosubi_ui(label, rs, is)
-#define jit_bosubr_i(label, s1, s2)
-#define jit_bosubr_ui(label, s1, s2)
-#define jit_buneqr_d(label, s1, s2)
-#define jit_bunger_d(label, s1, s2)
-#define jit_bungtr_d(label, s1, s2)
-#define jit_bunler_d(label, s1, s2)
-#define jit_bunltr_d(label, s1, s2)
-#define jit_bunordr_d(label, s1, s2)
-#define jit_calli(label)
-#define jit_callr(label)
-#define jit_ceilr_d_i(rd, rs)
-#define jit_divi_i(d, rs, is)
-#define jit_divi_ui(d, rs, is)
-#define jit_divr_d(rd,s1,s2)
-#define jit_divr_i(d, s1, s2)
-#define jit_divr_ui(d, s1, s2)
-#define jit_eqi_i(d, rs, is)
-#define jit_eqr_d(d, s1, s2)
-#define jit_eqr_i(d, s1, s2)
-#define jit_extr_i_d(rd, rs)
-#define jit_floorr_d_i(rd, rs)
-#define jit_gei_i(d, rs, is)
-#define jit_gei_ui(d, s1, s2)
-#define jit_ger_d(d, s1, s2)
-#define jit_ger_i(d, s1, s2)
-#define jit_ger_ui(d, s1, s2)
-#define jit_gti_i(d, rs, is)
-#define jit_gti_ui(d, s1, s2)
-#define jit_gtr_d(d, s1, s2)
-#define jit_gtr_i(d, s1, s2)
-#define jit_gtr_ui(d, s1, s2)
-#define jit_hmuli_i(d, rs, is)
-#define jit_hmuli_ui(d, rs, is)
-#define jit_hmulr_i(d, s1, s2)
-#define jit_hmulr_ui(d, s1, s2)
-#define jit_jmpi(label)
-#define jit_jmpr(reg)
-#define jit_ldxi_f(rd, rs, is)
-#define jit_ldxr_f(rd, s1, s2)
-#define jit_ldxi_c(d, rs, is)
-#define jit_ldxi_d(rd, rs, is)
-#define jit_ldxi_i(d, rs, is)
-#define jit_ldxi_s(d, rs, is)
-#define jit_ldxi_uc(d, rs, is)
-#define jit_ldxi_us(d, rs, is)
-#define jit_ldxr_c(d, s1, s2)
-#define jit_ldxr_d(rd, s1, s2)
-#define jit_ldxr_i(d, s1, s2)
-#define jit_ldxr_s(d, s1, s2)
-#define jit_ldxr_uc(d, s1, s2)
-#define jit_ldxr_us(d, s1, s2)
-#define jit_lei_i(d, rs, is)
-#define jit_lei_ui(d, s1, s2)
-#define jit_ler_d(d, s1, s2)
-#define jit_ler_i(d, s1, s2)
-#define jit_ler_ui(d, s1, s2)
-#define jit_lshi_i(d, rs, is)
-#define jit_lshr_i(d, r1, r2)
-#define jit_ltgtr_d(d, s1, s2)
-#define jit_lti_i(d, rs, is)
-#define jit_lti_ui(d, s1, s2)
-#define jit_ltr_d(d, s1, s2)
-#define jit_ltr_i(d, s1, s2)
-#define jit_ltr_ui(d, s1, s2)
-#define jit_modi_i(d, rs, is)
-#define jit_modi_ui(d, rs, is)
-#define jit_modr_i(d, s1, s2)
-#define jit_modr_ui(d, s1, s2)
-#define jit_movi_d(rd,immd)
-#define jit_movi_f(rd,immf)
-#define jit_movi_i(d, is)
-#define jit_movi_p(d, is)
-#define jit_movr_d(rd,rs)
-#define jit_movr_i(d, rs)
-#define jit_muli_i(d, rs, is)
-#define jit_muli_ui(d, rs, is)
-#define jit_mulr_d(rd,s1,s2)
-#define jit_mulr_i(d, s1, s2)
-#define jit_mulr_ui(d, s1, s2)
-#define jit_negr_d(rd,rs)
-#define jit_nei_i(d, rs, is)
-#define jit_ner_d(d, s1, s2)
-#define jit_ner_i(d, s1, s2)
-#define jit_nop()
-#define jit_ordr_d(d, s1, s2)
-#define jit_ori_i(d, rs, is)
-#define jit_orr_i(d, s1, s2)
-#define jit_patch_at(jump_pc, value)
-#define jit_patch_movi(jump_pc, value)
-#define jit_prepare_d(numargs)
-#define jit_prepare_f(numargs)
-#define jit_prepare_i(numargs)
-#define jit_pusharg_i(rs)
-#define jit_ret()
-#define jit_retval_i(rd)
-#define jit_roundr_d_i(rd, rs)
-#define jit_rshi_i(d, rs, is)
-#define jit_rshi_ui(d, rs, is)
-#define jit_rshr_i(d, r1, r2)
-#define jit_rshr_ui(d, r1, r2)
-#define jit_sqrt_d(rd,rs)
-#define jit_stxi_c(id, rd, rs)
-#define jit_stxi_d(id, rd, rs)
-#define jit_stxi_f(id, rd, rs)
-#define jit_stxi_i(id, rd, rs)
-#define jit_stxi_s(id, rd, rs)
-#define jit_stxr_c(d1, d2, rs)
-#define jit_stxr_d(d1, d2, rs)
-#define jit_stxr_f(d1, d2, rs)
-#define jit_stxr_i(d1, d2, rs)
-#define jit_stxr_s(d1, d2, rs)
-#define jit_subr_d(rd,s1,s2)
-#define jit_subr_i(d, s1, s2)
-#define jit_subxi_i(d, rs, is)
-#define jit_subxr_i(d, s1, s2)
-#define jit_truncr_d_i(rd, rs)
-#define jit_uneqr_d(d, s1, s2)
-#define jit_unger_d(d, s1, s2)
-#define jit_ungtr_d(d, s1, s2)
-#define jit_unler_d(d, s1, s2)
-#define jit_unltr_d(d, s1, s2)
-#define jit_unordr_d(d, s1, s2)
-#define jit_xori_i(d, rs, is)
-#define jit_xorr_i(d, s1, s2)
-@end example
-
-@item Non mandatory---there should be no need to define them:
-@example
-#define jit_extr_c_ui(d, rs)
-#define jit_extr_s_ui(d, rs)
-#define jit_extr_c_ul(d, rs)
-#define jit_extr_s_ul(d, rs)
-#define jit_extr_i_ul(d, rs)
-#define jit_negr_i(d, rs)
-#define jit_negr_l(d, rs)
-@end example
-
-@item Non mandatory---whether to define them depends on the @sc{abi}:
-@example
-#define jit_prolog(n)
-#define jit_finish(sub)
-#define jit_finishr(reg)
-#define jit_leaf(n)
-#define jit_getarg_c(reg, ofs)
-#define jit_getarg_i(reg, ofs)
-#define jit_getarg_l(reg, ofs)
-#define jit_getarg_p(reg, ofs)
-#define jit_getarg_s(reg, ofs)
-#define jit_getarg_uc(reg, ofs)
-#define jit_getarg_ui(reg, ofs)
-#define jit_getarg_ul(reg, ofs)
-#define jit_getarg_us(reg, ofs)
-#define jit_getarg_f(reg, ofs)
-#define jit_getarg_d(reg, ofs)
-@end example
-
-@item Non mandatory---define them if instructions that do this exist:
-@example
-#define jit_extr_c_i(d, rs)
-#define jit_extr_s_i(d, rs)
-#define jit_extr_c_l(d, rs)
-#define jit_extr_s_l(d, rs)
-#define jit_extr_i_l(d, rs)
-#define jit_rsbi_i(d, rs, is)
-#define jit_rsbi_l(d, rs, is)
-@end example
-
-@item Non mandatory if condition code are always set by add/sub, needed on other systems:
-@example
-#define jit_addci_i(d, rs, is)
-#define jit_addci_l(d, rs, is)
-#define jit_subci_i(d, rs, is)
-#define jit_subci_l(d, rs, is)
-@end example
-
-@item Mandatory on little endian systems---don't define them on other systems:
-@example
-#define jit_ntoh_ui(d, rs)
-#define jit_ntoh_us(d, rs)
-@end example
-
-@item Mandatory if JIT_RZERO not defined---don't define them if it is defined:
-@example
-#define jit_ldi_c(d, is)
-#define jit_ldi_i(d, is)
-#define jit_ldi_s(d, is)
-#define jit_ldr_c(d, rs)
-#define jit_ldr_i(d, rs)
-#define jit_ldr_s(d, rs)
-#define jit_ldi_uc(d, is)
-#define jit_ldi_ui(d, is)
-#define jit_ldi_us(d, is)
-#define jit_ldr_uc(d, rs)
-#define jit_ldr_ui(d, rs)
-#define jit_ldr_us(d, rs)
-#define jit_sti_c(id, rs)
-#define jit_sti_i(id, rs)
-#define jit_sti_s(id, rs)
-#define jit_str_c(rd, rs)
-#define jit_str_i(rd, rs)
-#define jit_str_s(rd, rs)
-#define jit_ldi_f(rd, is)
-#define jit_sti_f(id, rs)
-#define jit_ldi_d(rd, is)
-#define jit_sti_d(id, rs)
-#define jit_ldr_f(rd, rs)
-#define jit_str_f(rd, rs)
-#define jit_ldr_d(rd, rs)
-#define jit_str_d(rd, rs)
-@end example
-
-@item Synonyms---don't define them:
-@example
-#define jit_addi_p(d, rs, is)
-#define jit_addi_ui(d, rs, is)
-#define jit_addi_ul(d, rs, is)
-#define jit_addr_p(d, s1, s2)
-#define jit_addr_ui(d, s1, s2)
-#define jit_addr_ul(d, s1, s2)
-#define jit_andi_ui(d, rs, is)
-#define jit_andi_ul(d, rs, is)
-#define jit_andr_ui(d, s1, s2)
-#define jit_andr_ul(d, s1, s2)
-#define jit_beqi_p(label, rs, is)
-#define jit_beqi_ui(label, rs, is)
-#define jit_beqi_ul(label, rs, is)
-#define jit_beqr_p(label, s1, s2)
-#define jit_beqr_ui(label, s1, s2)
-#define jit_beqr_ul(label, s1, s2)
-#define jit_bmci_ui(label, rs, is)
-#define jit_bmci_ul(label, rs, is)
-#define jit_bmcr_ui(label, s1, s2)
-#define jit_bmcr_ul(label, s1, s2)
-#define jit_bmsi_ui(label, rs, is)
-#define jit_bmsi_ul(label, rs, is)
-#define jit_bmsr_ui(label, s1, s2)
-#define jit_bmsr_ul(label, s1, s2)
-#define jit_bgei_p(label, rs, is)
-#define jit_bger_p(label, s1, s2)
-#define jit_bgti_p(label, rs, is)
-#define jit_bgtr_p(label, s1, s2)
-#define jit_blei_p(label, rs, is)
-#define jit_bler_p(label, s1, s2)
-#define jit_blti_p(label, rs, is)
-#define jit_bltr_p(label, s1, s2)
-#define jit_bnei_p(label, rs, is)
-#define jit_bnei_ui(label, rs, is)
-#define jit_bnei_ul(label, rs, is)
-#define jit_bner_p(label, s1, s2)
-#define jit_bner_ui(label, s1, s2)
-#define jit_bner_ul(label, s1, s2)
-#define jit_eqi_p(d, rs, is)
-#define jit_eqi_ui(d, rs, is)
-#define jit_eqi_ul(d, rs, is)
-#define jit_eqr_p(d, s1, s2)
-#define jit_eqr_ui(d, s1, s2)
-#define jit_eqr_ul(d, s1, s2)
-#define jit_extr_c_s(d, rs)
-#define jit_extr_c_us(d, rs)
-#define jit_extr_uc_s(d, rs)
-#define jit_extr_uc_us(d, rs)
-#define jit_extr_uc_i(d, rs)
-#define jit_extr_uc_ui(d, rs)
-#define jit_extr_us_i(d, rs)
-#define jit_extr_us_ui(d, rs)
-#define jit_extr_uc_l(d, rs)
-#define jit_extr_uc_ul(d, rs)
-#define jit_extr_us_l(d, rs)
-#define jit_extr_us_ul(d, rs)
-#define jit_extr_ui_l(d, rs)
-#define jit_extr_ui_ul(d, rs)
-#define jit_gei_p(d, rs, is)
-#define jit_ger_p(d, s1, s2)
-#define jit_gti_p(d, rs, is)
-#define jit_gtr_p(d, s1, s2)
-#define jit_ldr_p(d, rs)
-#define jit_ldr_ul(d, rs)
-#define jit_ldi_p(d, is)
-#define jit_ldi_ul(d, is)
-#define jit_ldxi_p(d, rs, is)
-#define jit_ldxi_ul(d, rs, is)
-#define jit_ldxr_p(d, s1, s2)
-#define jit_ldxr_ul(d, s1, s2)
-#define jit_lei_p(d, rs, is)
-#define jit_ler_p(d, s1, s2)
-#define jit_lshi_ui(d, rs, is)
-#define jit_lshi_ul(d, rs, is)
-#define jit_lshr_ui(d, s1, s2)
-#define jit_lshr_ul(d, s1, s2)
-#define jit_lti_p(d, rs, is)
-#define jit_ltr_p(d, s1, s2)
-#define jit_movi_p(d, is)
-#define jit_movi_ui(d, rs)
-#define jit_movi_ul(d, rs)
-#define jit_movr_p(d, rs)
-#define jit_movr_ui(d, rs)
-#define jit_movr_ul(d, rs)
-#define jit_nei_p(d, rs, is)
-#define jit_nei_ui(d, rs, is)
-#define jit_nei_ul(d, rs, is)
-#define jit_ner_p(d, s1, s2)
-#define jit_ner_ui(d, s1, s2)
-#define jit_ner_ul(d, s1, s2)
-#define jit_hton_ui(d, rs)
-#define jit_hton_us(d, rs)
-#define jit_ori_ui(d, rs, is)
-#define jit_ori_ul(d, rs, is)
-#define jit_orr_ui(d, s1, s2)
-#define jit_orr_ul(d, s1, s2)
-#define jit_pusharg_c(rs)
-#define jit_pusharg_p(rs)
-#define jit_pusharg_s(rs)
-#define jit_pusharg_uc(rs)
-#define jit_pusharg_ui(rs)
-#define jit_pusharg_ul(rs)
-#define jit_pusharg_us(rs)
-#define jit_retval_c(rd)
-#define jit_retval_p(rd)
-#define jit_retval_s(rd)
-#define jit_retval_uc(rd)
-#define jit_retval_ui(rd)
-#define jit_retval_ul(rd)
-#define jit_retval_us(rd)
-#define jit_rsbi_p(d, rs, is)
-#define jit_rsbi_ui(d, rs, is)
-#define jit_rsbi_ul(d, rs, is)
-#define jit_rsbr_p(d, rs, is)
-#define jit_rsbr_ui(d, s1, s2)
-#define jit_rsbr_ul(d, s1, s2)
-#define jit_sti_p(d, is)
-#define jit_sti_uc(d, is)
-#define jit_sti_ui(d, is)
-#define jit_sti_ul(d, is)
-#define jit_sti_us(d, is)
-#define jit_str_p(d, rs)
-#define jit_str_uc(d, rs)
-#define jit_str_ui(d, rs)
-#define jit_str_ul(d, rs)
-#define jit_str_us(d, rs)
-#define jit_stxi_p(d, rs, is)
-#define jit_stxi_uc(d, rs, is)
-#define jit_stxi_ui(d, rs, is)
-#define jit_stxi_ul(d, rs, is)
-#define jit_stxi_us(d, rs, is)
-#define jit_stxr_p(d, s1, s2)
-#define jit_stxr_uc(d, s1, s2)
-#define jit_stxr_ui(d, s1, s2)
-#define jit_stxr_ul(d, s1, s2)
-#define jit_stxr_us(d, s1, s2)
-#define jit_subi_p(d, rs, is)
-#define jit_subi_ui(d, rs, is)
-#define jit_subi_ul(d, rs, is)
-#define jit_subr_p(d, s1, s2)
-#define jit_subr_ui(d, s1, s2)
-#define jit_subr_ul(d, s1, s2)
-#define jit_subxi_p(d, rs, is)
-#define jit_subxi_ui(d, rs, is)
-#define jit_subxi_ul(d, rs, is)
-#define jit_subxr_p(d, s1, s2)
-#define jit_subxr_ui(d, s1, s2)
-#define jit_subxr_ul(d, s1, s2)
-#define jit_xori_ui(d, rs, is)
-#define jit_xori_ul(d, rs, is)
-#define jit_xorr_ui(d, s1, s2)
-#define jit_xorr_ul(d, s1, s2)
-@end example
-
-@item Shortcuts---don't define them:
-@example
-#define JIT_R0
-#define JIT_R1
-#define JIT_R2
-#define JIT_V0
-#define JIT_V1
-#define JIT_V2
-#define JIT_FPR0
-#define JIT_FPR1
-#define JIT_FPR2
-#define JIT_FPR3
-#define JIT_FPR4
-#define JIT_FPR5
-#define jit_patch(jump_pc)
-#define jit_notr_c(d, rs)
-#define jit_notr_i(d, rs)
-#define jit_notr_l(d, rs)
-#define jit_notr_s(d, rs)
-#define jit_notr_uc(d, rs)
-#define jit_notr_ui(d, rs)
-#define jit_notr_ul(d, rs)
-#define jit_notr_us(d, rs)
-#define jit_rsbr_d(d, s1, s2)
-#define jit_rsbr_i(d, s1, s2)
-#define jit_rsbr_l(d, s1, s2)
-#define jit_subi_i(d, rs, is)
-#define jit_subi_l(d, rs, is)
-@end example
-
-@item Mandatory unless target arithmetic is always done in the same precision:
-@example
-#define jit_abs_f(rd,rs)
-#define jit_addr_f(rd,s1,s2)
-#define jit_beqr_f(label, s1, s2)
-#define jit_bger_f(label, s1, s2)
-#define jit_bgtr_f(label, s1, s2)
-#define jit_bler_f(label, s1, s2)
-#define jit_bltgtr_f(label, s1, s2)
-#define jit_bltr_f(label, s1, s2)
-#define jit_bner_f(label, s1, s2)
-#define jit_bordr_f(label, s1, s2)
-#define jit_buneqr_f(label, s1, s2)
-#define jit_bunger_f(label, s1, s2)
-#define jit_bungtr_f(label, s1, s2)
-#define jit_bunler_f(label, s1, s2)
-#define jit_bunltr_f(label, s1, s2)
-#define jit_bunordr_f(label, s1, s2)
-#define jit_ceilr_f_i(rd, rs)
-#define jit_divr_f(rd,s1,s2)
-#define jit_eqr_f(d, s1, s2)
-#define jit_extr_d_f(rs, rd)
-#define jit_extr_f_d(rs, rd)
-#define jit_extr_i_f(rd, rs)
-#define jit_floorr_f_i(rd, rs)
-#define jit_ger_f(d, s1, s2)
-#define jit_gtr_f(d, s1, s2)
-#define jit_ler_f(d, s1, s2)
-#define jit_ltgtr_f(d, s1, s2)
-#define jit_ltr_f(d, s1, s2)
-#define jit_movr_f(rd,rs)
-#define jit_mulr_f(rd,s1,s2)
-#define jit_negr_f(rd,rs)
-#define jit_ner_f(d, s1, s2)
-#define jit_ordr_f(d, s1, s2)
-#define jit_roundr_f_i(rd, rs)
-#define jit_rsbr_f(d, s1, s2)
-#define jit_sqrt_f(rd,rs)
-#define jit_subr_f(rd,s1,s2)
-#define jit_truncr_f_i(rd, rs)
-#define jit_uneqr_f(d, s1, s2)
-#define jit_unger_f(d, s1, s2)
-#define jit_ungtr_f(d, s1, s2)
-#define jit_unler_f(d, s1, s2)
-#define jit_unltr_f(d, s1, s2)
-#define jit_unordr_f(d, s1, s2)
-@end example
-
-@item Mandatory if sizeof(long) != sizeof(int)---don't define them on other systems:
-@example
-#define jit_addi_l(d, rs, is)
-#define jit_addr_l(d, s1, s2)
-#define jit_andi_l(d, rs, is)
-#define jit_andr_l(d, s1, s2)
-#define jit_beqi_l(label, rs, is)
-#define jit_beqr_l(label, s1, s2)
-#define jit_bgei_l(label, rs, is)
-#define jit_bgei_ul(label, rs, is)
-#define jit_bger_l(label, s1, s2)
-#define jit_bger_ul(label, s1, s2)
-#define jit_bgti_l(label, rs, is)
-#define jit_bgti_ul(label, rs, is)
-#define jit_bgtr_l(label, s1, s2)
-#define jit_bgtr_ul(label, s1, s2)
-#define jit_blei_l(label, rs, is)
-#define jit_blei_ul(label, rs, is)
-#define jit_bler_l(label, s1, s2)
-#define jit_bler_ul(label, s1, s2)
-#define jit_blti_l(label, rs, is)
-#define jit_blti_ul(label, rs, is)
-#define jit_bltr_l(label, s1, s2)
-#define jit_bltr_ul(label, s1, s2)
-#define jit_bosubi_l(label, rs, is)
-#define jit_bosubi_ul(label, rs, is)
-#define jit_bosubr_l(label, s1, s2)
-#define jit_bosubr_ul(label, s1, s2)
-#define jit_boaddi_l(label, rs, is)
-#define jit_boaddi_ul(label, rs, is)
-#define jit_boaddr_l(label, s1, s2)
-#define jit_boaddr_ul(label, s1, s2)
-#define jit_bmci_l(label, rs, is)
-#define jit_bmcr_l(label, s1, s2)
-#define jit_bmsi_l(label, rs, is)
-#define jit_bmsr_l(label, s1, s2)
-#define jit_bnei_l(label, rs, is)
-#define jit_bner_l(label, s1, s2)
-#define jit_divi_l(d, rs, is)
-#define jit_divi_ul(d, rs, is)
-#define jit_divr_l(d, s1, s2)
-#define jit_divr_ul(d, s1, s2)
-#define jit_eqi_l(d, rs, is)
-#define jit_eqr_l(d, s1, s2)
-#define jit_extr_c_l(d, rs)
-#define jit_extr_c_ul(d, rs)
-#define jit_extr_s_l(d, rs)
-#define jit_extr_s_ul(d, rs)
-#define jit_extr_i_l(d, rs)
-#define jit_extr_i_ul(d, rs)
-#define jit_gei_l(d, rs, is)
-#define jit_gei_ul(d, rs, is)
-#define jit_ger_l(d, s1, s2)
-#define jit_ger_ul(d, s1, s2)
-#define jit_gti_l(d, rs, is)
-#define jit_gti_ul(d, rs, is)
-#define jit_gtr_l(d, s1, s2)
-#define jit_gtr_ul(d, s1, s2)
-#define jit_hmuli_l(d, rs, is)
-#define jit_hmuli_ul(d, rs, is)
-#define jit_hmulr_l(d, s1, s2)
-#define jit_hmulr_ul(d, s1, s2)
-#define jit_ldi_l(d, is)
-#define jit_ldi_ui(d, is)
-#define jit_ldr_l(d, rs)
-#define jit_ldr_ui(d, rs)
-#define jit_ldxi_l(d, rs, is)
-#define jit_ldxi_ui(d, rs, is)
-#define jit_ldxr_l(d, s1, s2)
-#define jit_ldxr_ui(d, s1, s2)
-#define jit_lei_l(d, rs, is)
-#define jit_lei_ul(d, rs, is)
-#define jit_ler_l(d, s1, s2)
-#define jit_ler_ul(d, s1, s2)
-#define jit_lshi_l(d, rs, is)
-#define jit_lshr_l(d, s1, s2)
-#define jit_lti_l(d, rs, is)
-#define jit_lti_ul(d, rs, is)
-#define jit_ltr_l(d, s1, s2)
-#define jit_ltr_ul(d, s1, s2)
-#define jit_modi_l(d, rs, is)
-#define jit_modi_ul(d, rs, is)
-#define jit_modr_l(d, s1, s2)
-#define jit_modr_ul(d, s1, s2)
-#define jit_movi_l(d, rs)
-#define jit_movr_l(d, rs)
-#define jit_muli_l(d, rs, is)
-#define jit_muli_ul(d, rs, is)
-#define jit_mulr_l(d, s1, s2)
-#define jit_mulr_ul(d, s1, s2)
-#define jit_nei_l(d, rs, is)
-#define jit_ner_l(d, s1, s2)
-#define jit_ori_l(d, rs, is)
-#define jit_orr_l(d, s1, s2)
-#define jit_pusharg_l(rs)
-#define jit_retval_l(rd)
-#define jit_rshi_l(d, rs, is)
-#define jit_rshi_ul(d, rs, is)
-#define jit_rshr_l(d, s1, s2)
-#define jit_rshr_ul(d, s1, s2)
-#define jit_sti_l(d, is)
-#define jit_str_l(d, rs)
-#define jit_stxi_l(d, rs, is)
-#define jit_stxr_l(d, s1, s2)
-#define jit_subr_l(d, s1, s2)
-#define jit_xori_l(d, rs, is)
-#define jit_xorr_l(d, s1, s2)
-@end example
-@end table
-
-@node Standard functions
-@chapter More complex tasks in the platform-independent layer
-
-There is actually a single function that you @strong{must} define
-in the @file{funcs-@var{suffix}.h} file, that is, @code{jit_flush_code}.
-
-As explained in @usingref{GNU lightning macros, Generating code at
-run-time}, its purpose is to flush part of the processor's
-instruction cache (usually the part of memory that contains the
-generated code), avoiding the processor executing bogus data
-that it happens to find in the cache.  The @code{jit_flush_code}
-function takes the first and the last address to flush.
-
-On many processors (for example, the x86 and the all the processors
-in the 68k family up to the 68030), it is not even necessary to flush
-the cache.  In this case, the contents of the file will simply be
-
-@example
-#ifndef __lightning_funcs_h
-#define __lightning_funcs_h
-
-#define jit_flush_code(dest, end)
-
-#endif @rem{/* __lightning_core_h */}
-@end example
-
-On other processors, flushing the cache is necessary for
-proper behavior of the program; in this case, the file will contain
-a proper definition of the function.  However, we must make yet
-another distinction.
-
-On some processors, flushing the cache is obtained through a call
-to the operating system or to the C run-time library.  In this case,
-the definition of @code{jit_flush_code} will be very simple: two
-examples are the Alpha and the 68040. For the Alpha the code will
-be:
-@example
-#define jit_flush_code(dest, end) \
-        __asm__ __volatile__("call_pal 0x86");
-@end example
-
-@noindent
-and, for the Motorola
-@example
-#define jit_flush_code(start, end) \
-        __clear_cache((start), (end))
-@end example
-
-As you can see, the Alpha does not even need to pass the start and
-end address to the function.  It is good practice to protect usage of
-the @acronym{GNU CC}-specific @code{__asm__} directive by relying
-on the preprocessor.  For example:
-
-@example
-#if !defined(__GNUC__) && !defined(__GNUG__)
-#error Go get GNU C, I do not know how to flush the cache
-#error with this compiler.
-#else
-#define jit_flush_code(dest, end) \
-        __asm__ __volatile__("call_pal 0x86");
-#endif
-@end example
-
-@lightning{}'s configuration process tries to compile a dummy file that
-includes @code{lightning.h}, and gives a warning if there are problem
-with the compiler that is installed on the system.
-
-In more complex cases, you'll need to write a full-fledged function.
-Don't forget to make it @code{static}, otherwise you'll have problems
-linking programs that include @code{lightning.h} multiple times. An
-example, taken from the @file{funcs-ppc.h} file, is:
-
-@example
-#ifndef __lightning_funcs_h
-#define __lightning_funcs_h
-
-#if !defined(__GNUC__) && !defined(__GNUG__)
-#error Go get GNU C, I do not know how to flush the cache
-#error with this compiler.
-#else
-static void
-jit_flush_code(start, end)
-     void       *start;
-     void       *end;
-@{
-  register char *dest = start;
-
-  for (; dest <= end; dest += SIZEOF_CHAR_P)
-    __asm__ __volatile__ 
-      ("dcbst 0,%0; sync; icbi 0,%0; isync"::"r"(dest));
-@}
-#endif
-
-#endif /* __lightning_funcs_h */
-@end example
-
-The @file{funcs-@var{suffix}.h} file is also the right place to put
-helper functions that do complex tasks for the
-@file{core-@var{suffix}.h} file.  For example, the PowerPC assembler
-defines @code{jit_prolog} as a function and puts it in that file (for more
-information, @pxref{Implementing the ABI}).  Take special care when
-defining such a function, as explained in @usingref{Reentrancy,
-Reentrant usage of @lightning{}}.
-
-
-@node Floating-point macros
-@chapter Implementing macros for floating point
-
diff --git a/doc/printf.c b/doc/printf.c
new file mode 100644
index 000000000..52bd2aa1f
--- /dev/null
+++ b/doc/printf.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int);    /* Pointer to Void Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pvfi          myFunction;             /* ptr to generated code */
+  jit_node_t    *start, *end;           /* a couple of labels */
+  jit_node_t    *in;                    /* to get the argument */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  start = jit_note(__FILE__, __LINE__);
+  jit_prolog();
+  in = jit_arg();
+  jit_getarg(JIT_R1, in);
+  jit_pushargi((jit_word_t)"generated %d bytes\n");
+  jit_ellipsis();
+  jit_pushargr(JIT_R1);
+  jit_finishi(printf);
+  jit_ret();
+  jit_epilog();
+  end = jit_note(__FILE__, __LINE__);
+
+  myFunction = jit_emit();
+
+  /* call the generated code, passing its size as argument */
+  myFunction((char*)jit_address(end) - (char*)jit_address(start));
+
+  jit_disassemble();
+
+  finish_jit();
+  return 0;
+}
diff --git a/doc/rfib.c b/doc/rfib.c
new file mode 100644
index 000000000..1ce02d5a7
--- /dev/null
+++ b/doc/rfib.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pifi       fib;
+  jit_node_t *label;
+  jit_node_t *call;
+  jit_node_t *in;                 /* offset of the argument */
+  jit_node_t *ref;                /* to patch the forward reference */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  label = jit_label();
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_V0, in);              /* V0 = n */
+  ref = jit_blti     (JIT_V0, 2);
+        jit_subi     (JIT_V1, JIT_V0, 1);       /* V1 = n-1 */
+        jit_subi     (JIT_V2, JIT_V0, 2);       /* V2 = n-2 */
+        jit_prepare();
+          jit_pushargr(JIT_V1);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V1);                     /* V1 = fib(n-1) */
+        jit_prepare();
+          jit_pushargr(JIT_V2);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V2);                     /* V2 = fib(n-2) */
+        jit_addi(JIT_V1,  JIT_V1,  1);
+        jit_addr(JIT_R0, JIT_V1, JIT_V2);       /* R0 = V1 + V2 + 1 */
+        jit_retr(JIT_R0);
+
+  jit_patch(ref);                               /* patch jump */
+        jit_movi(JIT_R0, 1);                    /* R0 = 1 */
+        jit_retr(JIT_R0);
+
+  /* call the generated code, passing 32 as an argument */
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 32, fib(32));
+  finish_jit();
+  return 0;
+}
diff --git a/doc/rpn.c b/doc/rpn.c
new file mode 100644
index 000000000..f02cef35f
--- /dev/null
+++ b/doc/rpn.c
@@ -0,0 +1,94 @@
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+{
+  jit_stxi_i (*sp, JIT_FP, reg);
+  *sp += sizeof (int);
+}
+
+void stack_pop(int reg, int *sp)
+{
+  *sp -= sizeof (int);
+  jit_ldxi_i (reg, JIT_FP, *sp);
+}
+
+jit_node_t *compile_rpn(char *expr)
+{
+  jit_node_t *in, *fn;
+  int stack_base, stack_ptr;
+
+  fn = jit_note(NULL, 0);
+  jit_prolog();
+  in = jit_arg();
+  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+  jit_getarg_i(JIT_R2, in);
+
+  while (*expr) {
+    char buf[32];
+    int n;
+    if (sscanf(expr, "%[0-9]%n", buf, &n)) {
+      expr += n - 1;
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movi(JIT_R0, atoi(buf));
+    } else if (*expr == 'x') {
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movr(JIT_R0, JIT_R2);
+    } else if (*expr == '+') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_addr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '-') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_subr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '*') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '/') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_divr(JIT_R0, JIT_R1, JIT_R0);
+    } else {
+      fprintf(stderr, "cannot compile: %s\n", expr);
+      abort();
+    }
+    ++expr;
+  }
+  jit_retr(JIT_R0);
+  jit_epilog();
+  return fn;
+}
+
+int main(int argc, char *argv[])
+{
+  jit_node_t *nc, *nf;
+  pifi c2f, f2c;
+  int i;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  nc = compile_rpn("32x9*5/+");
+  nf = compile_rpn("x32-5*9/");
+  (void)jit_emit();
+  c2f = (pifi)jit_address(nc);
+  f2c = (pifi)jit_address(nf);
+
+  printf("\nC:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+  printf("\nF:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+  printf("\n");
+
+  printf("\nF:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+  printf("\nC:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+  printf("\n");
+
+  finish_jit();
+  return 0;
+}
diff --git a/doc/toc.texi b/doc/toc.texi
deleted file mode 100644
index 193d4f26f..000000000
--- a/doc/toc.texi
+++ /dev/null
@@ -1,76 +0,0 @@
-@c These macros are used because these items could go both in the
-@c short listing (for partial books) and in the detailed listing
-@c (for full books - i.e. using & porting)
-
-@macro usingmenu{}
-@ifset USING
-* Installation::            Configuring and installing GNU lightning
-* The instruction set::     The RISC instruction set used i GNU lightning
-* GNU lightning macros::    GNU lightning's macros
-* Reentrancy::              Re-entrant usage of GNU lightning
-* Bundling GNU lightning::  Using GNU lightning in your programs
-@end ifset
-@end macro
-
-@macro portingmenu{}
-@ifset PORTING
-* Structure of a port::   An overview of the porting process
-* Adjusting configure::   Automatically recognizing the new platform
-* Run-time assemblers::   An internal layer to simplify porting
-* Standard macros::       The platform-independent layer used by clients.
-* Standard functions::    Doing more complex tasks.
-* Floating-point macros:: Implementing macros for floating point.
-@end ifset
-@end macro
-
-@macro standardmacrosmenu{}
-@c This comment is needed because of makeinfo's vagaries...
-* Forward references::    Implementing forward references
-* Common features::       Common features supported by @file{core-common.h}
-* Delay slots::           Supporting scheduling of delay slots
-* Immediate values::      Supporting arbitrarily sized immediate values
-* Implementing the ABI::  Function prologs and epilogs, and argument passing
-* Macro list::            Macros composing the platform-independent layer
-@end macro
-
-@menu
-@ifclear BOTH
-* Overview::              What GNU lightning is
-@usingmenu{}
-@portingmenu{}
-* Future::                Tasks for GNU lightning's subsequent releases
-* Acknowledgements::      Acknowledgements for GNU lightning
-
-@ifset PORTING
-@detailmenu
---- The detailed node listing ---
-
-Standard macros:
-@standardmacrosmenu{}
-@end detailmenu
-@end ifset
-@end ifclear
-
-@ifset BOTH
-* Overview::              What GNU lightning is.
-* Using GNU lightning::   Using GNU lightning in your programs
-* Porting GNU lightning:: Retargeting GNU lightning to a new system
-* Future::                Tasks for GNU lightning's subsequent releases
-* Acknowledgements::      Acknowledgements for GNU lightning
-
-@detailmenu
---- The detailed node listing ---
-
-Using @lightning{}:
-@usingmenu{}
-
-Porting @lightning{}:
-@portingmenu{}
-
-Standard macros:
-@standardmacrosmenu{}
-@end detailmenu
-
-@end ifset
-
-@end menu
diff --git a/doc/u-lightning.texi b/doc/u-lightning.texi
deleted file mode 100644
index 0c2481b3b..000000000
--- a/doc/u-lightning.texi
+++ /dev/null
@@ -1,100 +0,0 @@
-\input texinfo.tex  @c -*- texinfo -*-
-@c %**start of header (This is for running Texinfo on a region.)
-
-@setfilename lightning.info
-
-@set TITLE       Porting @sc{gnu} @i{lightning}
-@set TOPIC       Porting
-@clear BOTH
-@set USING
-@clear PORTING
-
-@settitle @value{TITLE}
-
-@c ---------------------------------------------------------------------
-@c Common macros
-@c ---------------------------------------------------------------------
-
-@macro bulletize{a}
-@item
-\a\
-@end macro
-
-@macro rem{a}
-@r{@i{\a\}}
-@end macro
-
-@macro gnu{}
-@sc{gnu}
-@end macro
-
-@macro lightning{}
-@gnu{} @i{lightning}
-@end macro
-
-@c ---------------------------------------------------------------------
-@c Macros for Texinfo 3.1/4.0 compatibility
-@c ---------------------------------------------------------------------
-
-@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
-@c compatibility
-@macro hlink{url, link}
-\link\ (\url\)
-@end macro
-
-@c ifhtml can only be true in Texinfo 4.0, which has uref
-@ifhtml
-@unmacro hlink
-
-@macro hlink{url, link}
-@uref{\url\, \link\}
-@end macro
-
-@macro email{mail}
-@uref{mailto:\mail\, , \mail\}
-@end macro
-
-@macro url{url}
-@uref{\url\}
-@end macro
-@end ifhtml
-
-@c ---------------------------------------------------------------------
-@c References to the other half of the manual
-@c ---------------------------------------------------------------------
-
-@ifset USING
-@macro usingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear USING
-@macro usingref{node, name}
-@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@ifset PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@c ---------------------------------------------------------------------
-@c End of macro section
-@c ---------------------------------------------------------------------
-
-@include version.texi
-@include body.texi
-
-@c %**end of header (This is for running Texinfo on a region.)
-
-@c ***********************************************************************
-
diff --git a/doc/using.texi b/doc/using.texi
deleted file mode 100644
index 332383eea..000000000
--- a/doc/using.texi
+++ /dev/null
@@ -1,1273 +0,0 @@
-@node Installation
-@chapter Configuring and installing @lightning{}
-
-The first thing to do to use @lightning{} is to configure the
-program, picking the set of macros to be used on the host
-architecture; this configuration is automatically performed by
-the @file{configure} shell script; to run it, merely type:
-@example
-     ./configure
-@end example
-
-@lightning{} supports cross-compiling in that you can choose a
-different set of macros from the one needed on the computer that
-you are compiling @lightning{} on.  For example,
-@example
-     ./configure --host=sparc-sun-linux
-@end example
-
-@noindent will select the SPARC set of runtime assemblers.  You can use
-configure's ability to make reasonable assumptions about the vendor
-and operating system and simply type
-@example
-     ./configure --host=i386
-     ./configure --host=ppc
-     ./configure --host=sparc
-@end example
-
-Another option that @file{configure} accepts is
-@code{--enable-assertions}, which enables several consistency checks in
-the run-time assemblers.  These are not usually needed, so you can
-decide to simply forget about it; also remember that these consistency
-checks tend to slow down your code generator.
-
-After you've configured @lightning{}, you don't have to compile it
-because it is nothing more than a set of include files.  If you want to
-compile the examples, run @file{make} as usual.  The next important
-step is:
-@example
-    make install
-@end example
-
-This ends the process of installing @lightning{}.
-
-@node The instruction set
-@chapter @lightning{}'s instruction set
-
-@lightning{}'s instruction set was designed by deriving instructions
-that closely match those of most existing RISC architectures, or
-that can be easily syntesized if absent.  Each instruction is composed
-of:
-@itemize @bullet
-@item
-an operation, like @code{sub} or @code{mul}
-
-@item
-sometimes, an register/immediate flag (@code{r} or @code{i})
-
-@item
-a type identifier or, occasionally, two
-@end itemize
-
-The second and third field are separated by an underscore; thus,
-examples of legal mnemonics are @code{addr_i} (integer add, with three
-register operands) and @code{muli_l} (long integer multiply, with two
-register operands and an immediate operand).  Each instruction takes
-two or three operands; in most cases, one of them can be an immediate
-value instead of a register.
-
-@lightning{} supports a full range of integer types: operands can be 1,
-2 or 4 bytes long (64-bit architectures might support 8 bytes long
-operands), either signed or unsigned.  The types are listed in the
-following table together with the C types they represent:
-
-@example
-     c          @r{signed char}
-     uc         @r{unsigned char}
-     s          @r{short}
-     us         @r{unsigned short}
-     i          @r{int}
-     ui         @r{unsigned int}
-     l          @r{long}
-     ul         @r{unsigned long}
-     f          @r{float}
-     d          @r{double}
-     p          @r{void *}
-@end example
-
-Some of these types may not be distinct: for example, (e.g., @code{l}
-is equivalent to @code{i} on 32-bit machines, and @code{p} is
-substantially equivalent to @code{ul}).
-
-There are at least seven integer registers, of which six are
-general-purpose, while the last is used to contain the frame pointer
-(@code{FP}).  The frame pointer can be used to allocate and access local
-variables on the stack, using the @code{allocai} instruction.
-
-Of the general-purpose registers, at least three are guaranteed to be
-preserved across function calls (@code{V0}, @code{V1} and
-@code{V2}) and at least three are not (@code{R0}, @code{R1} and
-@code{R2}).  Six registers are not very much, but this
-restriction was forced by the need to target CISC architectures
-which, like the x86, are poor of registers; anyway, backends can
-specify the actual number of available registers with the macros
-@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM}
-(for callee-save registers).
-
-In addition, there is a special @code{RET} register which contains
-the return value of the current function (@emph{not} the return value
-of callees---use the @code{retval} instruction for this).  You should
-always remember, however, that writing this register could overwrite
-either a general-purpose register or an incoming parameter, depending
-on the architecture.
-
-There are at least six floating-point registers, named @code{FPR0} to
-@code{FPR5}.  These are caller-save and are separate from the integer
-registers on all the supported architectures; on Intel architectures,
-the register stack is mapped to a flat register file.  As for the
-integer registers, the macro @code{JIT_FPR_NUM} yields the number of
-floating-point registers, and the special @code{FPRET} register contains
-the return value of the current function.
-
-The complete instruction set follows; as you can see, most non-memory
-operations only take integers, long integers (either signed or
-unsigned) and pointers as operands; this was done in order to reduce
-the instruction set, and because most architectures only provide word
-and long word operations on registers.  There are instructions that
-allow operands to be extended to fit a larger data type, both in a
-signed and in an unsigned way.
-
-@table @b
-@item Binary ALU operations
-These accept three operands; the last one can be an immediate
-value for integer operands, or a register for all operand types.
-@code{addx} operations must directly follow @code{addc}, and
-@code{subx} must follow @code{subc}; otherwise, results are undefined.
-@example
-addr     i  ui  l  ul  p  f  d  O1 = O2 + O3
-addi     i  ui  l  ul  p        O1 = O2 + O3
-addxr    i  ui  l  ul           O1 = O2 + (O3 + carry)
-addxi    i  ui  l  ul           O1 = O2 + (O3 + carry)
-addcr    i  ui  l  ul           O1 = O2 + O3, set carry
-addci    i  ui  l  ul           O1 = O2 + O3, set carry
-subr     i  ui  l  ul  p  f  d  O1 = O2 - O3
-subi     i  ui  l  ul  p        O1 = O2 - O3
-subxr    i  ui  l  ul           O1 = O2 - (O3 + carry)
-subxi    i  ui  l  ul           O1 = O2 - (O3 + carry)
-subcr    i  ui  l  ul           O1 = O2 - O3, set carry
-subci    i  ui  l  ul           O1 = O2 - O3, set carry
-rsbr     i  ui  l  ul  p  f  d  O1 = O3 - O2
-rsbi     i  ui  l  ul  p        O1 = O3 - O2
-mulr     i  ui  l  ul     f  d  O1 = O2 * O3
-muli     i  ui  l  ul           O1 = O2 * O3
-hmulr    i  ui  l  ul           O1 = @r{high bits of} O2 * O3
-hmuli    i  ui  l  ul           O1 = @r{high bits of} O2 * O3
-divr     i  ui  l  ul     f  d  O1 = O2 / O3
-divi     i  ui  l  ul           O1 = O2 / O3
-modr     i  ui  l  ul           O1 = O2 % O3
-modi     i  ui  l  ul           O1 = O2 % O3
-andr     i  ui  l  ul           O1 = O2 & O3
-andi     i  ui  l  ul           O1 = O2 & O3
-orr      i  ui  l  ul           O1 = O2 | O3
-ori      i  ui  l  ul           O1 = O2 | O3
-xorr     i  ui  l  ul           O1 = O2 ^ O3
-xori     i  ui  l  ul           O1 = O2 ^ O3
-lshr     i  ui  l  ul           O1 = O2 << O3
-lshi     i  ui  l  ul           O1 = O2 << O3
-rshr     i  ui  l  ul           O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.}
-rshi     i  ui  l  ul           O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.}
-@end example
-
-@item Unary ALU operations
-These accept two operands, both of which must be registers.
-@example
-negr     i     l         f  d  O1 = -O2
-notr     i  ui l  ul           O1 = ~O2
-@end example
-
-@item Compare instructions
-These accept three operands; again, the last can be an immediate
-value for integer data types.  The last two operands are compared,
-and the first operand is set to either 0 or 1, according to
-whether the given condition was met or not.
-
-The conditions given below are for the standard behavior of C,
-where the ``unordered'' comparison result is mapped to false.
-
-@example
-ltr      i  ui  l  ul  p  f  d  O1 = (O2 <  O3)
-lti      i  ui  l  ul  p        O1 = (O2 <  O3)
-ler      i  ui  l  ul  p  f  d  O1 = (O2 <= O3)
-lei      i  ui  l  ul  p        O1 = (O2 <= O3)
-gtr      i  ui  l  ul  p  f  d  O1 = (O2 >  O3)
-gti      i  ui  l  ul  p        O1 = (O2 >  O3)
-ger      i  ui  l  ul  p  f  d  O1 = (O2 >= O3)
-gei      i  ui  l  ul  p        O1 = (O2 >= O3)
-eqr      i  ui  l  ul  p  f  d  O1 = (O2 == O3)
-eqi      i  ui  l  ul  p        O1 = (O2 == O3)
-ner      i  ui  l  ul  p  f  d  O1 = (O2 != O3)
-nei      i  ui  l  ul  p        O1 = (O2 != O3)
-unltr                     f  d  O1 = !(O2 >= O3)
-unler                     f  d  O1 = !(O2 >  O3)
-ungtr                     f  d  O1 = !(O2 <= O3)
-unger                     f  d  O1 = !(O2 <  O3)
-uneqr                     f  d  O1 = !(O2 <  O3) && !(O2 >  O3)
-ltgtr                     f  d  O1 = !(O2 >= O3) || !(O2 <= O3)
-ordr                      f  d  O1 =  (O2 == O2) &&  (O3 == O3)
-unordr                    f  d  O1 =  (O2 != O2) ||  (O3 != O3)
-@end example
-
-@item Transfer operations
-These accept two operands; for @code{ext} both of them must be
-registers, while @code{mov} accepts an immediate value as the second
-operand.
-
-Unlike @code{movr} and @code{movi}, the other instructions are applied
-between operands of different data types, and they need @strong{two}
-data type specifications.  You can use @code{extr} to convert between
-integer data types, in which case the first must be smaller in size
-than the second; for example @code{extr_c_ui} is correct while
-@code{extr_ul_us} is not.  You can also use @code{extr} to convert
-an integer to a floating point value: the only available possibilities
-are @code{extr_i_f} and @code{extr_i_d}.  The other instructions
-convert a floating point value to an integer, so the possible
-suffixes are @code{_f_i} and @code{_d_i}.
-
-@example
-movr                      i  ui  l  ul  p  f  d  O1 = O2
-movi                      i  ui  l  ul  p  f  d  O1 = O2
-extr        c  uc  s  us  i  ui  l  ul     f  d  O1 = O2
-roundr                    i                f  d  O1 = round(O2)
-truncr                    i                f  d  O1 = trunc(O2)
-floorr                    i                f  d  O1 = floor(O2)
-ceilr                     i                f  d  O1 = ceil(O2)
-@end example
-
-Note that the order of the arguments is @emph{destination first,
-source second} as for all other @lightning{} instructions, but
-the order of the types is always reversed with respect to that
-of the arguments: @emph{shorter}---source---@emph{first,
-longer}---destination---@emph{second}.  This happens for historical
-reasons.
-
-@item Network extensions
-These accept two operands, both of which must be registers; these
-two instructions actually perform the same task, yet they are
-assigned to two mnemonics for the sake of convenience and
-completeness.  As usual, the first operand is the destination and
-the second is the source.
-@example
-hton       us ui          @r{Host-to-network (big endian) order}
-ntoh       us ui          @r{Network-to-host order }
-@end example
-
-@item Load operations
-@code{ld} accepts two operands while @code{ldx} accepts three;
-in both cases, the last can be either a register or an immediate
-value. Values are extended (with or without sign, according to
-the data type specification) to fit a whole register.
-@example
-ldr     c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *O2
-ldi     c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *O2
-ldxr    c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *(O2+O3)
-ldxi    c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *(O2+O3)
-@end example
-
-@item Store operations
-@code{st} accepts two operands while @code{stx} accepts three; in
-both cases, the first can be either a register or an immediate
-value. Values are sign-extended to fit a whole register.
-@example
-str     c  uc  s  us  i  ui  l  ul  p  f  d  *O1 = O2
-sti     c  uc  s  us  i  ui  l  ul  p  f  d  *O1 = O2
-stxr    c  uc  s  us  i  ui  l  ul  p  f  d  *(O1+O2) = O3
-stxi    c  uc  s  us  i  ui  l  ul  p  f  d  *(O1+O2) = O3
-@end example
-
-@item Argument management
-These are:
-@example
-prepare                   i                f  d
-pusharg     c  uc  s  us  i  ui  l  ul  p  f  d
-getarg      c  uc  s  us  i  ui  l  ul  p  f  d
-arg         c  uc  s  us  i  ui  l  ul  p  f  d
-retval      c  uc  s  us  i  ui  l  ul  p
-@end example
-
-Of these, the first two are used by the caller, while the last two
-are used by the callee.  A code snippet that wants to call another
-procedure and has to pass registers must, in order: use the
-@code{prepare} instruction, giving the number of arguments to
-be passed to the procedure (once for each data type); use
-@code{pusharg} to push the arguments @strong{in reverse order};
-and use @code{calli} or @code{finish} (explained below) to
-perform the actual call.
-
-@code{arg} and @code{getarg} are used by the callee.
-@code{arg} is different from other instruction in that it does not
-actually generate any code: instead, it is a function which returns
-a value to be passed to @code{getarg}.@footnote{``Return a
-value'' means that @lightning{} macros that compile these
-instructions return a value when expanded.} You should call
-@code{arg} as soon as possible, before any function call or, more
-easily, right after the @code{prolog} or @code{leaf} instructions
-(which are treated later).
-
-@code{getarg} accepts a register argument and a value returned by
-@code{arg}, and will move that argument to the register, extending
-it (with or without sign, according to the data type specification)
-to fit a whole register.  These instructions are more intimately
-related to the usage of the @lightning{} instruction set in code
-that generates other code, so they will be treated more
-specifically in @ref{GNU lightning macros, , Generating code at
-run-time}.
-
-Finally, the @code{retval} instruction fetches the return value of a
-called function in a register.  The @code{retval} instruction takes a
-register argument and copies the return value of the previously called
-function in that register.  A function should put its own return value
-in the @code{RET} register before returning.  @xref{Fibonacci, the
-Fibonacci numbers}, for an example.
-
-You should observe a few rules when using these macros.  First of
-all, it is not allowed to call functions with more than six arguments;
-this was done to simplify and speed up the implementation on
-architectures that use registers for parameter passing.
-
-You should not nest calls to @code{prepare}, nor call zero-argument
-functions (which do not need a call to @code{prepare}) inside a
-@code{prepare/calli} or @code{prepare/finish} block.  Doing this
-might corrupt already pushed arguments.
-
-You @strong{cannot} pass parameters between subroutines using
-the six general-purpose registers.  This might work only when
-targeting particular architectures.
-
-On the other hand, it is possible to assume that callee-saved registers
-(@code{R0} through @code{R2}) are not clobbered by another dynamically
-generated function which does not use them as operands in its code and
-which does not return a value.
-
-@item Branch instructions
-Like @code{arg}, these also return a value which, in this case,
-is to be used to compile forward branches as explained in
-@ref{Fibonacci, , Fibonacci numbers}.  They accept a pointer to the
-destination of the branch and two operands to be compared; of these,
-the last can be either a register or an immediate.  They are:
-@example
-bltr      i  ui  l  ul  p  f  d  @r{if }(O2 <  O3)@r{ goto }O1
-blti      i  ui  l  ul  p        @r{if }(O2 <  O3)@r{ goto }O1
-bler      i  ui  l  ul  p  f  d  @r{if }(O2 <= O3)@r{ goto }O1
-blei      i  ui  l  ul  p        @r{if }(O2 <= O3)@r{ goto }O1
-bgtr      i  ui  l  ul  p  f  d  @r{if }(O2 >  O3)@r{ goto }O1
-bgti      i  ui  l  ul  p        @r{if }(O2 >  O3)@r{ goto }O1
-bger      i  ui  l  ul  p  f  d  @r{if }(O2 >= O3)@r{ goto }O1
-bgei      i  ui  l  ul  p        @r{if }(O2 >= O3)@r{ goto }O1
-beqr      i  ui  l  ul  p  f  d  @r{if }(O2 == O3)@r{ goto }O1
-beqi      i  ui  l  ul  p        @r{if }(O2 == O3)@r{ goto }O1
-bner      i  ui  l  ul  p  f  d  @r{if }(O2 != O3)@r{ goto }O1
-bnei      i  ui  l  ul  p        @r{if }(O2 != O3)@r{ goto }O1
-
-bunltr                     f  d  @r{if }!(O2 >= O3)@r{ goto }O1
-bunler                     f  d  @r{if }!(O2 >  O3)@r{ goto }O1
-bungtr                     f  d  @r{if }!(O2 <= O3)@r{ goto }O1
-bunger                     f  d  @r{if }!(O2 <  O3)@r{ goto }O1
-buneqr                     f  d  @r{if }!(O2 <  O3) && !(O2 >  O3)@r{ goto }O1
-bltgtr                     f  d  @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
-bordr                      f  d  @r{if } (O2 == O2) &&  (O3 == O3)@r{ goto }O1
-bunordr                    f  d  @r{if }!(O2 != O2) ||  (O3 != O3)@r{ goto }O1
-
-bmsr      i ui l  ul             @r{if }O2 &  O3@r{ goto }O1
-bmsi      i ui l  ul             @r{if }O2 &  O3@r{ goto }O1
-bmcr      i ui l  ul             @r{if }!(O2 & O3)@r{ goto }O1
-bmci      i ui l  ul             @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.}
-boaddr    i ui l  ul             O2 += O3@r{, goto }O1@r{ on overflow}
-boaddi    i ui l  ul             O2 += O3@r{, goto }O1@r{ on overflow}
-bosubr    i ui l  ul             O2 -= O3@r{, goto }O1@r{ on overflow}
-bosubi    i ui l  ul             O2 -= O3@r{, goto }O1@r{ on overflow}
-@end example
-
-@item Jump and return operations
-These accept one argument except @code{ret} which has none; the
-difference between @code{finish} and @code{calli} is that the
-latter does not clean the stack from pushed parameters (if any)
-and the former must @strong{always} follow a @code{prepare}
-instruction.
-@example
-calli     (not specified)                  @r{function call to O1}
-callr     (not specified)                  @r{function call to a register}
-finish    (not specified)                  @r{function call to O1}
-finishr   (not specified)                  @r{function call to a register}
-jmpi/jmpr (not specified)                  @r{unconditional jump to O1}
-ret       (not specified)                  @r{return from subroutine}
-retval    c  uc s  us i  ui l  ul p  f  d  @r{move return value}
-                                           @r{to register}
-@end example
-
-Like branch instruction, @code{jmpi} also returns a value which is to
-be used to compile forward branches. @xref{Fibonacci, , Fibonacci
-numbers}.
-
-@item Function prolog
-
-These macros are used to set up the function prolog, in particular to
-declare the number of arguments accepted by a function, and to reserve
-space on the stack to be used for variables.  They accept a single
-numeric argument.
-
-@example
-prolog    (not specified)                  @r{function prolog for O1 args}
-leaf      (not specified)                  @r{the same for leaf functions}
-allocai   (not specified)                  @r{reserve space on the stack}
-@end example
-
-Results are undefined when using function calls in a leaf function.
-
-@code{allocai} receives the number of bytes to allocate and returns
-the offset from the frame pointer register @code{FP} to the base of
-the area.  The area is aligned to an @code{int}; future versions of
-@lightning{} may provide more fine-grained control on the alignment of
-stack-allocated variables.
-@end table
-
-As a small appetizer, here is a small function that adds 1 to the input
-parameter (an @code{int}).  I'm using an assembly-like syntax here which
-is a bit different from the one used when writing real subroutines with
-@lightning{}; the real syntax will be introduced in @xref{GNU lightning
-macros, , Generating code at run-time}.
-
-@example
-incr:
-     leaf      1
-in = arg_i                   @rem{! We have an integer argument}
-     getarg_i  R0, in        @rem{! Move it to R0}
-     addi_i    RET, R0, 1    @rem{! Add 1\, put result in return value}
-     ret                     @rem{! And return the result}
-@end example
-
-And here is another function which uses the @code{printf} function from
-the standard C library to write a number in hexadecimal notation:
-
-@example
-printhex:
-     prolog    1
-in = arg_i                    @rem{! Same as above}
-     getarg_i  R0, in
-     prepare   2              @rem{! Begin call sequence for printf}
-     pusharg_i R0             @rem{! Push second argument}
-     pusharg_p "%x"           @rem{! Push format string}
-     finish    printf         @rem{! Call printf}
-     ret                      @rem{! Return to caller}
-@end example
-
-@node GNU lightning macros
-@chapter Generating code at run-time
-
-To use @lightning{}, you should include the @file{lightning.h} file that
-is put in your include directory by the @samp{make install} command.
-That include files defines about four hundred public macros (plus
-others that are private to @lightning{}), one for each opcode listed
-above.
-
-Each of the instructions above translates to a macro.  All you have to
-do is prepend @code{jit_} (lowercase) to opcode names and @code{JIT_}
-(uppercase) to register names.  Of course, parameters are to be put
-between parentheses, just like with every other @sc{cpp} macro.
-
-This small tutorial presents three examples:
-
-@iftex
-@itemize @bullet
-@item
-The @code{incr} function found in @ref{The instruction set, ,
-@lightning{}'s instruction set}:
-
-@item
-A simple function call to @code{printf}
-
-@item
-An RPN calculator.
-
-@item
-Fibonacci numbers
-@end itemize
-@end iftex
-@ifnottex
-@menu
-* incr::             A function which increments a number by one
-* printf::           A simple function call to printf
-* RPN calculator::   A more complex example, an RPN calculator
-* Fibonacci::        Calculating Fibonacci numbers
-@end menu
-@end ifnottex
-
-@node incr
-@section A function which increments a number by one
-
-Let's see how to create and use the sample @code{incr} function created
-in @ref{The instruction set, , @lightning{}'s instruction set}:
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef int (*pifi)(int);    @rem{/* Pointer to Int Function of Int */}
-
-int main()
-@{
-  pifi  incr = (pifi) (jit_set_ip(codeBuffer).iptr);
-  int   in;
-
-  jit_leaf(1);                     @rem{/* @t{     leaf  1            } */}
-  in = jit_arg_i();                @rem{/* @t{in = arg_i              } */}
-  jit_getarg_i(JIT_R0, in);        @rem{/* @t{     getarg_i R0        } */}
-  jit_addi_i(JIT_RET, JIT_R0, 1);  @rem{/* @t{     addi_i   RET\, R0\, 1} */}
-  jit_ret();                       @rem{/* @t{     ret                } */}
-
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-
-  @rem{/* call the generated code\, passing 5 as an argument */}
-  printf("%d + 1 = %d\n", 5, incr(5));
-  return 0;
-@}
-@end example
-
-Let's examine the code line by line (well, almost@dots{}):
-
-@table @t
-@item #include "lightning.h"
-You already know about this.  It defines all of @lightning{}'s macros.
-
-@item static jit_insn codeBuffer[1024];
-You might wonder about what is @code{jit_insn}.  It is just a type that
-is defined by @lightning{}.  Its exact definition depends on the
-architecture; in general, defining an array of 1024 @code{jit_insn}s
-allows one to write 100 to 400 @lightning{} instructions (depending on
-the architecture and exact instructions).
-
-@item typedef int (*pifi)(int);
-Just a handy typedef for a pointer to a function that takes an
-@code{int} and returns another.
-
-@item pifi incr = (pifi) (jit_set_ip(codeBuffer).iptr);
-This is the first @lightning{} macro we encounter that does not map to
-an instruction.  It is @code{jit_set_ip}, which takes a pointer to an
-area of memory where compiled code will be put and returns the same
-value, cast to a @code{union} type whose members are pointers to
-functions returning different C types.  This union is called
-@code{jit_code} and is defined as follows:
-
-@example
-    typedef union jit_code @{
-      char               *ptr;
-      void               (*vptr)();
-      char               (*cptr)();
-      unsigned char      (*ucptr)();
-      short              (*sptr)();
-      unsigned short     (*usptr)();
-      int                (*iptr)();
-      unsigned int       (*uiptr)();
-      long               (*lptr)();
-      unsigned long      (*ulptr)();
-      void *             (*pptr)();
-      float              (*fptr)();
-      double             (*dptr)();
-    @} jit_code;
-@end example
-
-Any of the members could have been used, since the result is soon casted
-to type @code{pifi} but, for the sake of clarity, the program uses
-@code{iptr}, a pointer to a function with no prototype and returning an
-@code{int}.
-
-Analogous to @code{jit_set_ip} is @code{jit_get_ip}, which does not
-modify the instruction pointer---it is nothing more than a cast of the
-current @sc{ip} to @code{jit_code}.
-
-@item int       in;
-A footnote in @ref{The instruction set, , @lightning{}'s instruction
-set}, under the description of @code{arg}, says that macros implementing
-@code{arg} return a value---we'll be using this variable to store the
-result of @code{arg}.
-
-@item jit_leaf(1);
-Ok, so we start generating code for our beloved function@dots{} it will
-accept one argument and won't call any other function.
-
-@item in = jit_arg_i();
-@itemx jit_getarg_i(JIT_R0, in);
-We retrieve the first (and only) argument, an integer, and store it
-into the general-purpose register @code{R0}.
-
-@item jit_addi_i(JIT_RET, JIT_R0, 1);
-We add one to the content of the register and store the result in the
-return value.
-
-@item jit_ret();
-This instruction generates a standard function epilog that returns
-the contents of the @code{RET} register.
-
-@item jit_flush_code(codeBuffer, jit_get_ip().ptr);
-This instruction is very important.  It flushes the generated code
-area out of the processor's instruction cache, avoiding the processor
-executes bogus data that it happens to find there.  The
-@code{jit_flush_code} function accepts the first and the last address
-to flush; we use @code{jit_get_ip} to find out the latter.
-
-@item printf("%d + 1 = %d", 5, incr(5));
-Calling our function is this simple---it is not distinguishable from
-a normal C function call, the only difference being that @code{incr}
-is a variable.
-@end table
-
-@lightning{} abstracts two phases of dynamic code generation: selecting
-instructions that map the standard representation, and emitting binary
-code for these instructions.  The client program has the responsibility
-of describing the code to be generated using the standard @lightning{}
-instruction set.
-
-Let's examine the code generated for @code{incr} on the SPARC and x86
-architectures (on the right is the code that an assembly-language
-programmer would write):
-
-@table @b
-@item SPARC
-@example
-    save %sp, -96, %sp
-    mov  %i0, %l0                   retl
-    add  %l0, 1,  %i0               add %o0, 1, %o0
-    ret
-    restore
-@end example
-In this case, @lightning{} introduces overhead to create a register
-window (not knowing that the procedure is a leaf procedure) and to
-move the argument to the general purpose register @code{R0} (which
-maps to @code{%l0} on the SPARC).  The former overhead could be
-avoided by teaching @lightning{} about leaf procedures (@pxref{Future});
-the latter could instead be avoided by rewriting the getarg instruction
-as @code{jit_getarg_i(JIT_RET, in)}, which was not done in this
-example.
-
-@item x86
-@example
-    pushl %ebp
-    movl  %esp, %ebp
-    pushl %ebx
-    pushl %esi
-    pushl %edi
-    movl  8(%ebp), %eax        movl 4(%esp), %eax
-    addl  $1, %eax             incl %eax
-    popl  %edi
-    popl  %esi
-    popl  %ebx
-    popl  %ebp
-    ret                        ret
-@end example
-In this case, the main overhead is due to the function's prolog and
-epilog, which is nine instructions long on the x86; a hand-written
-routine would not save unused callee-preserved registers on the stack.
-It is to be said, however, that this is not a problem in more
-complicated uses, because more complex procedure would probably use
-the @code{V0} through @code{V2} registers (@code{%ebx}, @code{%esi},
-@code{%edi}); in this case, a hand-written routine would have included
-the prolog too.  Also, a ten byte prolog would probably be a small
-overhead in a more complex function.
-@end table
-
-In such a simple case, the macros that make up the back-end compile
-reasonably efficient code, with the notable exception of prolog/epilog
-code.
-
-@node printf
-@section A simple function call to @code{printf}
-
-Again, here is the code for the example:
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef void (*pvfi)(int);      @rem{/* Pointer to Void Function of Int */}
-
-int main()
-@{
-  pvfi          myFunction;             @rem{/* ptr to generated code */}
-  char          *start, *end;           @rem{/* a couple of labels */}
-  int           in;                     @rem{/* to get the argument */}
-
-  myFunction = (pvfi) (jit_set_ip(codeBuffer).vptr);
-  start = jit_get_ip().ptr;
-  jit_prolog(1);
-  in = jit_arg_i();
-  jit_movi_p(JIT_R0, "generated %d bytes\n");
-  jit_getarg_i(JIT_R1, in);
-  jit_prepare(2);
-    jit_pusharg_i(JIT_R1);              @rem{/* push in reverse order */}
-    jit_pusharg_p(JIT_R0);
-  jit_finish(printf);
-  jit_ret();
-  end = jit_get_ip().ptr;
-
-  @rem{/* call the generated code\, passing its size as argument */}
-  jit_flush_code(start, end);
-  myFunction(end - start);
-@}
-@end example
-
-The function shows how many bytes were generated.  Most of the code
-is not very interesting, as it resembles very closely the program
-presented in @ref{incr, , A function which increments a number by one}.
-
-For this reason, we're going to concentrate on just a few statements.
-
-@table @t
-@item start = jit_get_ip().ptr;
-@itemx @r{@dots{}}
-@itemx end = jit_get_ip().ptr;
-These two instruction call the @code{jit_get_ip} macro which was
-mentioned in @ref{incr, , A function which increments a number by one}
-too.  In this case we use the only field of @code{jit_code} that is
-not a function pointer: @code{ptr}, which is a simple @code{char *}.
-
-@item jit_movi_p(JIT_R0, "generated %d bytes\n");
-Note the use of the @samp{p} type specifier, which automatically
-casts the second parameter to an @code{unsigned long} to make the
-code more clear and less cluttered by typecasts.
-
-@item jit_prepare(2);
-@itemx jit_pusharg_i(JIT_R1);
-@itemx jit_pusharg_p(JIT_R0);
-@itemx jit_finish(printf);
-Once the arguments to @code{printf} have been put in general-purpose
-registers, we can start a prepare/pusharg/finish sequence that
-moves the argument to either the stack or registers, then calls
-@code{printf}, then cleans up the stack.  Note how @lightning{}
-abstracts the differences between different architectures and
-ABI's -- the client program does not know how parameter passing
-works on the host architecture.
-@end table
-
-@node RPN calculator
-@section A more complex example, an RPN calculator
-
-We create a small stack-based RPN calculator which applies a series
-of operators to a given parameter and to other numeric operands.
-Unlike previous examples, the code generator is fully parameterized
-and is able to compile different formulas to different functions.
-Here is the code for the expression compiler; a sample usage will
-follow.
-
-Since @lightning{} does not provide push/pop instruction, this
-example uses a stack-allocated area to store the data.  Such an
-area can be allocated using the macro @code{jit_allocai}, which
-receives the number of bytes to allocate and returns the offset
-from the frame pointer register @code{JIT_FP} to the base of the
-area.  The area is aligned to an @code{int}; future versions
-of @lightning{} may provide more fine-grained control on the
-alignment of stack-allocated variables.
-
-Usually, you will use the @code{ldxi} and @code{stxi} instruction
-to access stack-allocated variables.  However, it is possible to
-use operations such as @code{add} to compute the address of the
-variables, and pass the address around.
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
-
-void stack_push(int reg, int *sp)
-@{
-  jit_stxi_i (*sp, JIT_FP, reg);
-  *sp += sizeof (int);
-@}
-
-void stack_pop(int reg, int *sp)
-@{
-  *sp -= sizeof (int);
-  jit_ldxi_i (reg, JIT_FP, *sp);
-@}
-
-pifi compile_rpn(char *expr)
-@{
-  pifi fn;
-  int stack_base, stack_ptr;
-  int in;
-
-  fn = (pifi) (jit_get_ip().iptr);
-  jit_leaf(1);
-  in = jit_arg_i();
-  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
-
-  jit_getarg_i(JIT_R2, in);
-
-  while (*expr) @{
-    char buf[32];
-    int n;
-    if (sscanf(expr, "%[0-9]%n", buf, &n)) @{
-      expr += n - 1;
-      stack_push(JIT_R0, &stack_ptr);
-      jit_movi_i(JIT_R0, atoi(buf));
-    @} else if (*expr == 'x') @{
-      stack_push(JIT_R0, &stack_ptr);
-      jit_movi_i(JIT_R0, JIT_R2);
-    @} else if (*expr == '+') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_addr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else if (*expr == '-') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_subr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else if (*expr == '*') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_mulr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else if (*expr == '/') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_divr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else @{
-      fprintf(stderr, "cannot compile: %s\n", expr);
-      abort();
-    @}
-    ++expr;
-  @}
-  jit_movr_i(JIT_RET, JIT_R0);
-  jit_ret();
-  return fn;
-@}
-@end example
-
-The principle on which the calculator is based is easy: the stack top
-is held in R0, while the remaining items of the stack are held in the
-memory area that we allocate with @code{allocai}.  Compiling a numeric
-operand or the argument @code{x} pushes the old stack top onto the
-stack and moves the operand into R0; compiling an operator pops the
-second operand off the stack into R1, and compiles the operation so
-that the result goes into R0, thus becoming the new stack top.
-
-This example allocates a fixed area for 32 @code{int}s.  This is not
-a problem when the function is a leaf like in this case; in a full-blown
-compiler you will want to analyze the input and determine the number
-of needed stack slots---a very simple example of register allocation.
-The area is then managed like a stack using @code{stack_push} and
-@code{stack_pop}.
-
-Try to locate a call to @code{jit_set_ip} in the source code.  You
-will not find one; this means that the client has to manually set
-the instruction pointer.  This technique has one advantage and one
-drawback.  The advantage is that the client can simply set the
-instruction pointer once and then generate code for multiple functions,
-one after another, without caring about passing a different instruction
-pointer each time; see @ref{Reentrancy, , Re-entrant usage of
-@lightning{}} for the disadvantage.
-
-Source code for the client (which lies in the same source file) follows:
-
-@example
-static jit_insn codeBuffer[1024];
-
-int main()
-@{
-  pifi c2f, f2c;
-  int i;
-
-  jit_set_ip(codeBuffer);
-  c2f = compile_rpn("32x9*5/+");
-  f2c = compile_rpn("x32-5*9/");
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-
-  printf("\nC:");
-  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
-  printf("\nF:");
-  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
-  printf("\n");
-
-  printf("\nF:");
-  for (i = 32; i <= 212; i += 10) printf("%3d ", i);
-  printf("\nC:");
-  for (i = 32; i <= 212; i += 10) printf("%3d ", f2c(i));
-  printf("\n");
-  return 0;
-@}
-@end example
-
-The client displays a conversion table between Celsius and Fahrenheit
-degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The
-formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9},
-respectively.
-
-Providing the formula as an argument to @code{compile_rpn} effectively
-parameterizes code generation, making it possible to use the same code
-to compile different functions; this is what makes dynamic code
-generation so powerful.
-
-The @file{rpn.c} file in the @lightning{} distribution includes a more
-complete (and more complex) implementation of @code{compile_rpn},
-which does constant folding and is able to assemble instructions with
-an immediate parameter.  Still, it is based on the same principle and
-also uses @code{allocai} to allocate space for the stack.
-
-@node Fibonacci
-@section Fibonacci numbers
-
-The code in this section calculates a variant of the Fibonacci sequence.
-While the traditional Fibonacci sequence is modeled by the recurrence
-relation:
-@display
-     f(0) = f(1) = 1
-     f(n) = f(n-1) + f(n-2)
-@end display
-
-@noindent
-the functions in this section calculates the following sequence, which
-is more interesting as a benchmark@footnote{That's because, as is
-easily seen, the sequence represents the number of activations of the
-@code{nfibs} procedure that are needed to compute its value through
-recursion.}:
-@display
-     nfibs(0) = nfibs(1) = 1
-     nfibs(n) = nfibs(n-1) + nfibs(n-2) + 1
-@end display
-
-The purpose of this example is to introduce branches.  There are two
-kind of branches: backward branches and forward branches.  We'll
-present the calculation in a recursive and iterative form; the
-former only uses forward branches, while the latter uses both.
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
-
-int main()
-@{
-  pifi      nfibs = (pifi) (jit_set_ip(codeBuffer).iptr);
-  int       in;                 @rem{/* offset of the argument */}
-  jit_insn  *ref;               @rem{/* to patch the forward reference */}
-
-        jit_prolog   (1);
-  in =  jit_arg_ui   ();
-        jit_getarg_ui(JIT_V0, in);              @rem{/* V0 = n */}
-  ref = jit_blti_ui  (jit_forward(), JIT_V0, 2);
-        jit_subi_ui  (JIT_V1, JIT_V0, 1);       @rem{/* V1 = n-1 */}
-        jit_subi_ui  (JIT_V2, JIT_V0, 2);       @rem{/* V2 = n-2 */}
-        jit_prepare(1);
-          jit_pusharg_ui(JIT_V1);
-        jit_finish(nfibs);
-        jit_retval(JIT_V1);                     @rem{/* V1 = nfibs(n-1) */}
-        jit_prepare(1);
-          jit_pusharg_ui(JIT_V2);
-        jit_finish(nfibs);
-        jit_retval(JIT_V2);                     @rem{/* V2 = nfibs(n-2) */}
-        jit_addi_ui(JIT_V1,  JIT_V1,  1);
-        jit_addr_ui(JIT_RET, JIT_V1, JIT_V2);   @rem{/* RET = V1 + V2 + 1 */}
-        jit_ret();
-
-  jit_patch(ref);                               @rem{/* patch jump */}
-        jit_movi_i(JIT_RET, 1);                 @rem{/* RET = 1 */}
-        jit_ret();
-
-  @rem{/* call the generated code\, passing 32 as an argument */}
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-  printf("nfibs(%d) = %d", 32, nfibs(32));
-  return 0;
-@}
-@end example
-
-As said above, this is the first example of dynamically compiling
-branches.  Branch instructions have three operands: two contains the
-values to be compared, while the first is a @dfn{label}; @lightning{}
-label's are represented as @code{jit_insn *} values.  Unlike other
-instructions (apart from @code{arg}, which is actually a directive
-rather than an instruction), branch instructions also return a value
-which, as we see in the example above, can be used to compile
-forward references.
-
-Compiling a forward reference is a two-step operation.  First, a
-branch is compiled with a dummy label, since the actual destination
-of the jump is not yet known; the dummy label is returned by the
-@code{jit_forward()} macro.  The value returned by the branch
-instruction is saved to be used later.
-
-Then, when the destination of the jump is reached, another macro
-is used, @code{jit_patch()}. This macro must be called once for
-@strong{every} point in which the code had a forward branch to the
-instruction following @code{jit_patch} (in this case a @code{movi_i}
-instruction).
-
-Now, here is the iterative version:
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
-
-int main()
-@{
-  pifi     nfibs = (pifi) (jit_set_ip(codeBuffer).iptr);
-  int      in;                  @rem{/* offset of the argument */}
-  jit_insn *ref;                @rem{/* to patch the forward reference */}
-  jit_insn *loop;               @rem{/* start of the loop */}
-
-        jit_leaf     (1);
-  in =  jit_arg_ui   ();
-        jit_getarg_ui(JIT_R2, in);              @rem{/* R2 = n */}
-        jit_movi_ui  (JIT_R1, 1);
-  ref = jit_blti_ui  (jit_forward(), JIT_R2, 2);
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);
-        jit_movi_ui  (JIT_R0, 1);
-
-  loop= jit_get_label();
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
-        jit_addr_ui  (JIT_V0, JIT_R0, JIT_R1);  @rem{/* V0 = R0 + R1 */}
-        jit_movr_ui  (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
-        jit_addi_ui  (JIT_R1, JIT_V0, 1);       @rem{/* R1 = V0 + 1 */}
-        jit_bnei_ui  (loop, JIT_R2, 0);         @rem{/* if (R2) goto loop; */}
-
-  jit_patch(ref);                               @rem{/* patch forward jump */}
-        jit_movr_ui  (JIT_RET, JIT_R1);         @rem{/* RET = R1 */}
-        jit_ret      ();
-
-  @rem{/* call the generated code\, passing 36 as an argument */}
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-  printf("nfibs(%d) = %d", 36, nfibs(36));
-  return 0;
-@}
-@end example
-
-This code calculates the recurrence relation using iteration (a
-@code{for} loop in high-level languages).  There is still a forward
-reference (indicated by the @code{jit_forward}/@code{jit_patch} pair);
-there are no function calls anymore: instead, there is a backward
-jump (the @code{bnei} at the end of the loop).
-
-In this case, the destination address should be known, because the
-jumps lands on an instruction that has already been compiled.
-However the program must make a provision and remember the address
-where the jump will land.  This is achieved with @code{jit_get_label},
-yet another macro that is much similar to @code{jit_get_ip} but,
-instead of a @code{jit_code} union, it answers an @code{jit_insn *}
-that the branch macros accept.
-
-Now, let's make one more change: let's rewrite the loop like this:
-
-@example
-  @r{@dots{}}
-
-  jit_delay(
-        jit_movi_ui  (JIT_R1, 1),
-  ref = jit_blti_ui  (jit_forward(), JIT_R2, 2));
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);
-
-  loop= jit_get_label();
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
-        jit_addr_ui  (JIT_V0, JIT_R0, JIT_R1);  @rem{/* V0 = R0 + R1 */}
-        jit_movr_ui  (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
-  jit_delay(
-        jit_addi_ui  (JIT_R1, JIT_V0, 1),       @rem{/* R1 = V0 + 1 */}
-        jit_bnei_ui  (loop, JIT_R2, 0));        @rem{/* if (R2) goto loop; */}
-
-  @r{@dots{}}
-@end example
-
-The @code{jit_delay} macro is used to schedule delay slots in jumps and
-branches.  This is optional, but might lead to performance improvements
-in tight inner loops (of course not in a loop that is executed 35
-times, but this is just an example).
-
-@code{jit_delay} takes two @lightning{} instructions, a @dfn{delay
-instruction} and a @dfn{branch instruction}.  Note that the two
-instructions must be written in execution order (first the delay
-instruction, then the branch instruction), @strong{not} with the branch
-first.  If the current machine has a delay slot, the delay instruction
-(or part of it) is placed in the delay slot after the branch
-instruction; otherwise, it emits the delay instruction before the branch
-instruction.  The delay instruction must not depend on being executed
-before or after the branch.
-
-Instead of @code{jit_patch}, you can use @code{jit_patch_at}, which
-takes two arguments: the first is the same as for @code{jit_patch}, and
-the second is the valued to be patched in.  In other words, these two
-invocations have the same effect:
-
-@example
-  jit_patch (jump_pc);
-  jit_patch_at (jump_pc, jit_get_ip ());
-@end example
-
-Dual to branches and @code{jit_patch_at} are @code{jit_movi_p}
-and @code{jit_patch_movi}, which can also be used to implement
-forward references.  @code{jit_movi_p} is carefully implemented
-to use an encoding that is as long as possible, so that it can
-always be patched; in addition, like branches, it will return
-an address which is then passed to @code{jit_patch_movi}.  The
-usage of @code{jit_patch_movi} is similar to @code{jit_patch_at}.
-
-@node Reentrancy
-@chapter Re-entrant usage of @lightning{}
-
-By default, @lightning{} is able to compile different functions at the
-same time as long as it happens in different object files, and on the
-other hand constrains code generation tasks to reside in a single
-object file.
-
-The reason for this is not apparent, but is easily explained:
-the @file{lightning.h} header file defines its state as a
-@code{static} variable, so calls to @code{jit_set_ip} and
-@code{jit_get_ip} residing in different files access different
-instruction pointers.  This was not done without reason: it makes
-the usage of @lightning{} much simpler, as it limits the initialization
-tasks to the bare minimum and removes the need to link the program
-with a separate library.
-
-On the other hand, multi-threaded or otherwise concurrent programs
-require reentrancy in the code generator, so this approach cannot be
-the only one.  In fact, it is possible to define your own copy of
-@lightning{}'s instruction state by defining a variable of type
-@code{jit_state} and @code{#define}-ing @code{_jit} to it:
-
-@example
-    struct jit_state lightning;
-    #define _jit lightning
-@end example
-
-You are free to define the @code{jit_state} variable as you like:
-@code{extern}, @code{static} to a function, @code{auto}, or global.
-
-This feature takes advantage of an aspect of macros (@dfn{cascaded
-macros}), which is documented thus in @acronym{CPP}'s reference manual:
-
-@quotation
-A cascade of macros is when one macro's body contains a reference to
-another macro.  This is very common practice.  For example,
-@example
-#define BUFSIZE 1020
-#define TABLESIZE BUFSIZE
-@end example
-This is not at all the same as defining @code{TABLESIZE} to be
-@samp{1020}.  The @code{#define} for @code{TABLESIZE} uses exactly the
-body you specify---in this case, @code{BUFSIZE}---and does not check to
-see whether it too is the name of a macro; it's only when you use
-@code{TABLESIZE} that the result of its expansion is checked for more
-macro names. 
-
-This makes a difference if you change the definition of @code{BUFSIZE}
-at some point in the source file. @code{TABLESIZE}, defined as shown,
-will always expand using the definition of @code{BUFSIZE} that is
-currently in effect: 
-#define BUFSIZE 1020
-#define TABLESIZE BUFSIZE
-#undef BUFSIZE
-#define BUFSIZE 37
-
-Now @code{TABLESIZE} expands (in two stages) to `37'. (The @code{#undef}
-is to prevent any warning about the nontrivial redefinition of
-@code{BUFSIZE}.)
-@end quotation
-
-@noindent
-In the same way, @code{jit_get_label} will adopt whatever definition of
-@code{_jit} is in effect:
-@example
-#define	jit_get_label()			(_jit.pc)
-@end example
-
-Special care must be taken when functions residing in separate files
-must access the same state.  This could be the case, for example, if a
-special library contained function for strength reduction of
-multiplications to adds & shifts, or maybe of divisions to
-multiplications and shifts.  The function would be compiled using a
-single definition of @code{_jit} and that definition would be used
-whenever the function would be called.
-
-Since @lightning{} uses a feature of the preprocessor to obtain
-re-entrancy, it makes sense to rely on the preprocessor in this case
-too.
-
-The idea is to pass the current @code{struct jit_state} to the
-function:
-
-@example
-static void
-_opt_muli_i(jit, dest, source, n)
-     register struct jit_state *jit;
-     register int		dest, source, n;
-@{
-#define _jit          jit
-@dots{}
-#undef _jit
-@}
-@end example
-
-@noindent
-doing this unbeknownst to the client, using a macro in the header file:
-
-@example
-extern void _opt_muli_i(struct jit_state *, int, int, int);
-
-#define opt_muli_i(rd, rs, n)	_opt_muli_i(&_jit, (rd), (rs), (n))
-@end example
-
-
-@section Registers
-@chapter Accessing the whole register file
-
-As mentioned earlier in this chapter, all @lightning{} back-ends are
-guaranteed to have at least six general-purpose integer registers and
-six floating-point registers, but many back-ends will have more.
-
-To access the entire register files, you can use the
-@code{JIT_R}, @code{JIT_V} and @code{JIT_FPR} macros.  They
-accept a parameter that identifies the register number, which
-must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
-and @code{JIT_FPR_NUM} respectively; the number need not be
-constant.  Of course, expressions like @code{JIT_R0} and
-@code{JIT_R(0)} denote the same register, and likewise for
-integer callee-saved, or floating-point, registers.
-
-@node Bundling GNU lightning
-@chapter Using @lightning{} in your programs
-
-It is very easy to include @lightning{}'s source code (without the
-documentation and examples) into your program's distribution 
-so that people don't need to have it installed in order to use it.
-
-Here is a step by step explanation of what to do:
-
-@enumerate
-@item Run @command{lightningize} from your package's main
-distribution directory.
-@example
-     lightningize
-@end example
-
-@noindent
-This will copy the source code for the @lightning{} back ends
-into the @file{lightning} directory of your package.
-
-@item If you're using Automake, you might be pleased to know that
-@file{Makefile.am} files will be already there.
-
-If you're not using Automake and @code{aclocal}, instead,
-you should delete the @file{Makefile.am} files (they are of no use
-to you) and copy the contents of the @file{lightning.m4} file, found in
-@command{aclocal}'s macro repository (usually @file{/usr/share/aclocal},
-to your @file{configure.in} or @file{acinclude.m4} or @file{aclocal.m4} file.
-
-@item Include a call to the @code{LIGHTNING_CONFIGURE_IF_NOT_FOUND}
-macro in your @file{configure.in} file.
-@end enumerate
-
-@code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} will first look for a
-pre-installed copy of @lightning{} and, if it can be found, it will
-use it; otherwise, it will test if there is a back-end for the host
-system.  If @lightning{} is already installed, or if the system is
-supported by lightning, it will define the @code{HAVE_LIGHTNING}
-symbol.
-
-In addition, an Automake conditional named @code{HAVE_INSTALLED_LIGHTNING}
-will be set if @lightning{} is already installed, which can be used to
-set up include paths appropriately.
-
-Finally, @code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} accepts two
-optional parameters: respectively, an action to be taken if @lightning{}
-is available, and an action to be taken if it is not.
diff --git a/doc/version.texi b/doc/version.texi
index c9347b90b..b7b6751ff 100644
--- a/doc/version.texi
+++ b/doc/version.texi
@@ -1,4 +1,4 @@
-@set UPDATED 3 June 2009
-@set UPDATED-MONTH June 2009
-@set EDITION 1.2c
-@set VERSION 1.2c
+@set UPDATED 24 January 2013
+@set UPDATED-MONTH January 2013
+@set EDITION 2.0
+@set VERSION 2.0