From 16d18f11d389ec96cf5934cea1badfd8f4b6127b Mon Sep 17 00:00:00 2001
From: pcpa <paulo.cesar.pereira.de.andrade@gmail.com>
Date: Thu, 24 Jan 2013 19:41:35 -0200
Subject: [PATCH] Update texinfo documentation to match current implementation.

	* check/Makefile.am: "make debug" target should pass only
	the main test tool program as argument for running gdb

	* configure.ac: Add the --enable-assertions options.

	* doc/Makefile.am, doc/body.texi, doc/lightning.texi:
	Major rewrite of the documentation to match the current
	implementation.

	* doc/version.texi: Automatic date update.

	* doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c:
	Implementation of the documentation examples, that are also
	compiled during a normal build.

	* doc/p-lightning.texi, doc/porting.texi, doc/toc.texi,
	doc/u-lightning.texi, doc/using.texi: These files were
	renamed in the documentation rewrite, as the documentation
	was significantly trimmed due to full removal of the porting
	chapters. Better porting documentation should be added but
	for the moment it was just removed the documentation not
	matching the implementation.
---
 .gitignore           |    1 +
 ChangeLog            |   27 +-
 check/Makefile.am    |    4 +-
 configure.ac         |   11 +
 doc/Makefile.am      |   41 +-
 doc/body.texi        | 1297 ++++++++++++++++++++++++++++------
 doc/ifib.c           |   44 ++
 doc/incr.c           |   29 +
 doc/lightning.texi   |   28 +-
 doc/p-lightning.texi |  100 ---
 doc/porting.texi     | 1600 ------------------------------------------
 doc/printf.c         |   38 +
 doc/rfib.c           |   49 ++
 doc/rpn.c            |   94 +++
 doc/toc.texi         |   76 --
 doc/u-lightning.texi |  100 ---
 doc/using.texi       | 1273 ---------------------------------
 doc/version.texi     |    8 +-
 18 files changed, 1431 insertions(+), 3389 deletions(-)
 create mode 100644 doc/ifib.c
 create mode 100644 doc/incr.c
 delete mode 100644 doc/p-lightning.texi
 delete mode 100644 doc/porting.texi
 create mode 100644 doc/printf.c
 create mode 100644 doc/rfib.c
 create mode 100644 doc/rpn.c
 delete mode 100644 doc/toc.texi
 delete mode 100644 doc/u-lightning.texi
 delete mode 100644 doc/using.texi

diff --git a/.gitignore b/.gitignore
index a89a8e180..ddfc42407 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ missing
 stamp-h1
 test-driver
 check/.deps
+doc/.deps
 lib/.deps
 m4/libtool.m4
 m4/lt~obsolete.m4
diff --git a/ChangeLog b/ChangeLog
index 84d3c4391..1322aacf7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,31 @@
+2013-01-24 Paulo Andrade <pcpa@gnu.org>
+
+	* check/Makefile.am: "make debug" target should pass only
+	the main test tool program as argument for running gdb
+
+	* configure.ac: Add the --enable-assertions options.
+
+	* doc/Makefile.am, doc/body.texi, doc/lightning.texi:
+	Major rewrite of the documentation to match the current
+	implementation.
+
+	* doc/version.texi: Automatic date update.
+
+	* doc/ifib.c, doc/incr.c, doc/printf.c, doc/rfib.c, doc/rpn.c:
+	Implementation of the documentation examples, that are also
+	compiled during a normal build.
+
+	* doc/p-lightning.texi, doc/porting.texi, doc/toc.texi,
+	doc/u-lightning.texi, doc/using.texi: These files were
+	renamed in the documentation rewrite, as the documentation
+	was significantly trimmed due to full removal of the porting
+	chapters. Better porting documentation should be added but
+	for the moment it was just removed the documentation not
+	matching the implementation.
+
 2013-01-18 Paulo Andrade <pcpa@gnu.org>
 
-	lib/jit_note.c: Correct bounds check and wrong code keeping
+	* lib/jit_note.c: Correct bounds check and wrong code keeping
 	a pointer that could be changed after a realloc call.
 
 2013-01-18 Paulo Andrade <pcpa@gnu.org>
diff --git a/check/Makefile.am b/check/Makefile.am
index ce78c49fd..33ce2134e 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -172,6 +172,6 @@ CLEANFILES = $(TESTS)
 
 #TESTS_ENVIRONMENT=$(srcdir)/run-test;
 
-debug:		$(check_PROGRAMS)
-	$(LIBTOOL) --mode=execute gdb $(check_PROGRAMS)
+debug:		lightning
+	$(LIBTOOL) --mode=execute gdb lightning
 
diff --git a/configure.ac b/configure.ac
index 9d5e43350..83a7805dd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -56,6 +56,17 @@ if test "x$DISASSEMBLER" != "xno"; then
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDISASSEMBLER=1"
 fi
 
+AC_ARG_ENABLE(assertions,
+	      AS_HELP_STRING([--enable-assertions],
+			     [Enable runtime code generation assertions]),
+	      [DEBUG=$enableval], [DEBUG=auto])
+if test "x$DEBUG" = xyes; then
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
+else
+    LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
+    DEBUG=no
+fi
+
 cpu=
 case "$target_cpu" in
     i?86|x86_64)	cpu=x86		;;
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 3baca2d69..3f4ff64ab 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,7 +1,40 @@
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+
+AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE
+
 info_TEXINFOS = lightning.texi 
-EXTRA_TEXINFOS = u-lightning.texi p-lightning.texi
 MOSTLYCLEANFILES = lightning.tmp
 
-lightning_TEXINFOS = body.texi toc.texi using.texi porting.texi version.texi
-u_lightning_TEXINFOS = body.texi toc.texi using.texi version.texi
-p_lightning_TEXINFOS = body.texi toc.texi porting.texi version.texi
+lightning_TEXINFOS = body.texi version.texi
+
+noinst_PROGRAMS = incr printf rpn rfib ifib
+
+$(top_builddir)/lib/liblightning.la:
+	cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
+
+incr_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+incr_SOURCES = incr.c
+
+printf_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+printf_SOURCES = printf.c
+
+rpn_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+rpn_SOURCES = rpn.c
+
+rfib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+rfib_SOURCES = rfib.c
+
+ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl
+ifib_SOURCES = ifib.c
diff --git a/doc/body.texi b/doc/body.texi
index 7c20d5152..af924e807 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -1,66 +1,32 @@
-@ifinfo
-@dircategory @lightning{}, a library for dynamic code generation
-@direntry
-     * @value{TITLE}: (lightning).
-@end direntry
-
-This file documents GNU lightning, Version @value{VERSION}.
-It was last updated on @value{UPDATED}.
-
-Copyright @copyright{} 2000 Free Software Foundation, Inc.
-Authored by Paolo Bonzini.
-
-This document is released under the terms of the GNU Free Documentation
-License as published by the Free Software Foundation; either version 1.1, or
-(at your option) any later version.
-
-You should have received a copy of the GNU Free Documentation License along
-with GNU lightning; see the file @file{COPYING.DOC}.  If not, write to the Free
-Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  
-
-There are no Secondary Sections, no Cover Texts and no Invariant Sections
-(as defined in the license); this text, along with its equivalent in the
-printed manual, constitutes the Title Page.
-@end ifinfo
-
-@setchapternewpage odd
-
-@titlepage
-@title @value{TITLE}
-@subtitle Version @value{VERSION}
-@subtitle @value{UPDATE-MONTH}
-
-@author by Paolo Bonzini
-
-@c  The following two commands start the copyright page.
-@page
-@vskip 0pt plus 1filll
-Copyright 1988-92, 1994-95, 1999, 2000 Free Software Foundation, Inc.
-
-This document is released under the terms of the @sc{gnu} Free Documentation
-License as published by the Free Software Foundation; either version 1.1, or
-(at your option) any later version.
-
-You should have received a copy of the @sc{gnu} Free Documentation License
-along with @sc{gnu} @i{lightning}; see the file @file{COPYING.DOC}.  If not,
-write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
-MA 02110-1301, USA.  
-
-There are no Secondary Sections, no Cover Texts and no Invariant Sections
-(as defined in the license); this text, along with its equivalent in the
-Info documentation, constitutes the Title Page.
-@end titlepage
-
 @ifnottex
 @node Top
 @top @lightning{}
 
+@iftex
+@macro comma
+@verbatim{|,|}
+@end macro
+@end iftex
+
+@ifnottex
+@macro comma
+@verb{|,|}
+@end macro
+@end ifnottex
+
 This document describes @value{TOPIC} the @lightning{} library for
 dynamic code generation.  Unlike other dynamic code generation systems,
 which are usually either inefficient or non-portable, @lightning{} is
 both retargetable and very fast.
 
-@include toc.texi
+@menu
+* Overview::                What GNU lightning is
+* Installation::            Configuring and installing GNU lightning
+* The instruction set::     The RISC instruction set used i GNU lightning
+* GNU lightning examples::  GNU lightning's examples
+* Reentrancy::              Re-entrant usage of GNU lightning
+* Acknowledgements::        Acknowledgements for GNU lightning
+@end menu
 @end ifnottex
 
 @node Overview
@@ -73,14 +39,6 @@ which are usually either inefficient or non-portable, @lightning{} is
 both retargetable and very fast.
 @end iftex
 
-@ifclear USING
-This manual assumes that you are pretty comfortable with the usage of
-@lightning{} for dynamic code generation, as described in
-@usingref{The instruction set, @lightning{}'s instruction set}, and
-instead focuses on the retargeting process.  What follows is nothing
-more then a brief overview of the system.
-@end ifclear
-
 Dynamic code generation is the generation of machine code 
 at runtime. It is typically used to strip a layer of interpretation 
 by allowing compilation to occur at runtime.  One of the most
@@ -105,28 +63,8 @@ are generated, so programs using dynamic code generation must be
 retargeted for each machine; in addition, coding a run-time code
 generator is a tedious and error-prone task more than a difficult one.
 
-@ifset USING
-This manual describes the @lightning{} dynamic code generation library.
 @lightning{} provides a portable, fast and easily retargetable dynamic
 code generation system. 
-@end ifset
-@ifclear USING
-@lightning{} provides a portable, fast and easily retargetable dynamic
-code generation system. 
-@end ifclear
-
-To be fast, @lightning{} emits machine code without first creating
-intermediate data structures such as RTL representations traditionally
-used by optimizing compilers (@pxref{RTL representation, , , gcc, Using
-and porting GNU CC}).  @lightning{} translates code directly from a
-machine independent interface to that of the underlying architecture.
-This makes code generation more efficient, since no intermediate data
-structures have to be constructed and consumed.  A collateral benefit
-it that @lightning{} consumes little space: other than the memory
-needed to store generated instructions and data structures such as
-parse trees, the only data structure that client will usually need
-is an array of pointers to labels and unresolved jumps, which you
-can often allocate directly on the system stack.
 
 To be portable, @lightning{} abstracts over current architectures'
 quirks and unorthogonalities.  The interface that it exposes to is that
@@ -143,141 +81,1098 @@ real architectures closely enough that, most of the time, the
 compiler's constant folding pass ends up generating code which
 assembles machine instructions without further tests.
 
-@section Drawbacks
+@node Installation
+@chapter Configuring and installing @lightning{}
 
-@lightning{} has been useful in practice; however, it does have
-at least four drawbacks: it has limited registers, no peephole
-optimizer, no instruction scheduler and no symbolic debugger. Of
-these, the last is the most critical even though it does not
-affect the quality of generated code: the only way to debug code
-generated at run-time  is to step through it at the level of
-host specific machine code.  A decent knowledge of the underlying
-instruction set is thus needed to make sense of the debugger's
-output.
-
-The low number of available registers (six) is also an important
-limitation.  However, let's take the primary application of dynamic
-code generation, that is, bytecode translators.  The underlying
-virtual machines tend to have very few general purpose registers
-(usually 0 to 2) and the translators seldom rely on sophisticated
-graph-coloring algorithms to allocate registers to temporary
-variables.  Rather, these translators usually obtain performance
-increases because: a) they remove indirect jumps, which are usually
-poorly predicted, and thus often form a bottleneck, b) they
-parameterize the generated code and go through the process of decoding
-the bytecodes just once.  So, their usage of registers is rather
-sparse---in fact, in practice, six registers were found to be
-enough for most purposes.
-
-The lack of a peephole optimizer is most important on machines where a 
-single instruction can map to multiple native instructions.  For
-instance, Intel chips' division instruction hard-codes the dividend
-to be in EAX and the quotient and remainder to be output, respectively,
-in EAX and EDX: on such chips, @lightning{} does lots of pushing and
-popping of EAX and EDX to save those registers that are not used.  
-Unnecessary stack operations could be removed by looking at whether
-preserved registers are destroyed soon.  Unfortunately, the current 
-implementation of @lightning{} is so fast because it only knows about
-the single instruction that is being generated; performing these
-optimizations would require a flow analysis pass that would probably
-hinder @lightning{}'s speed.
-
-The lack of an instruction scheduler is not very important---pretty
-good instruction scheduling can actually be obtained by separating
-register writes from register reads.  The only architectures on which
-a scheduler would be useful are those on which arithmetic instructions
-have two operands; an example is, again, the x86, on which the single
-instruction
+The first thing to do to use @lightning{} is to configure the
+program, picking the set of macros to be used on the host
+architecture; this configuration is automatically performed by
+the @file{configure} shell script; to run it, merely type:
 @example
-    subr_i  R0, R1, R2       @rem{!Compute R0 = R1 - R2}
-@end example
-@noindent
-is translated to two instruction, of which the second depends on the
-result of the first:
-@example
-    movl    %ebx, %eax       @rem{! Move R1 into R0}
-    subl    %edx, %eax       @rem{! Subtract R2 from R0}
+     ./configure
 @end example
 
-@ifset BOTH
-@node Using GNU lightning
-@chapter Using @lightning{}
+@lightning{} supports the @code{--enable-disassembler} option, that
+enables linking to GNU binutils and optionally print human readable
+disassembly of the jit code. This option can be disabled by the
+@code{--disable-disassembler} option.
 
-This chapter describes installing and using @lightning{}.
+Another option that @file{configure} accepts is
+@code{--enable-assertions}, which enables several consistency checks in
+the run-time assemblers.  These are not usually needed, so you can
+decide to simply forget about it; also remember that these consistency
+checks tend to slow down your code generator.
 
-@menu
-@usingmenu{}
-@end menu
+After you've configured @lightning{}, run @file{make} as usual.
 
-@lowersections
-@end ifset
+@lightning{} has an extensive set of tests to validate it is working
+correctly in the build host. To test it run:
+@example
+    make check
+@end example
 
-@ifset USING
-@include using.texi
-@end ifset
+The next important step is:
+@example
+    make install
+@end example
 
-@ifset BOTH
-@raisesections
+This ends the process of installing @lightning{}.
 
-@node Porting GNU lightning
-@chapter Porting @lightning{}
+@node The instruction set
+@chapter @lightning{}'s instruction set
 
-This chapter describes the process of porting @lightning{}.
-It assumes that you are pretty comfortable with the usage of
-@lightning{} for dynamic code generation, as described in
-@ref{Using GNU lightning}.
-
-@menu
-@portingmenu{}
-@end menu
-
-@lowersections
-@end ifset
-
-@ifset PORTING
-@include porting.texi
-@end ifset
-
-@ifset BOTH
-@raisesections
-@end ifset
-
-@node Future
-@chapter The future of @lightning{}
-
-Presented below is the set of tasks that I feel need to be performed
-to make @lightning{} a more fully functional, viable system.  They are
-presented in no particular order.  I would @emph{very much} welcome any
-volunteers who would like to help with the implementation of one or
-more of these tasks.  Please write to me, Paolo Bonzini, at
-@email{bonzini@@gnu.org} if you are interested in adding your efforts
-to the @lightning{} project.
-
-Tasks:
+@lightning{}'s instruction set was designed by deriving instructions
+that closely match those of most existing RISC architectures, or
+that can be easily syntesized if absent.  Each instruction is composed
+of:
 @itemize @bullet
 @item
-The most important task to make @lightning{} more widely usable
-is to retarget it.  Although currently supported architectures
-(x86, SPARC, PowerPC) are certainly some of the most widely used,
-@lightning{} could be ported to others---namely, the Alpha and
-MIPS architectures.
+an operation, like @code{sub} or @code{mul}
 
 @item
-Another interesting task is to allow the instruction stream to grow
-dynamically.  This is a problem because not all architectures allow
-to write position independent code.@footnote{The x86's absolute
-jumps, for example, are actually slow indirect jumps, and need a
-register.}
+most times, a register/immediate flag (@code{r} or @code{i})
 
 @item
-Optimize leaf procedures on the SPARC.  This involves using the
-output registers (@code{%o@i{X}}) instead of the local registers
-(@code{%l@i{X}}) when writing leaf procedures;  the problem is,
-leaf procedures also receive parameters in the output registers,
-so they would be overwritten by write accesses to general-purpose
-registers.
+an unsigned modifier (@code{u}), a type identifier or two, when applicable.
 @end itemize
 
+Examples of legal mnemonics are @code{addr} (integer add, with three
+register operands) and @code{muli} (integer multiply, with two
+register operands and an immediate operand).  Each instruction takes
+two or three operands; in most cases, one of them can be an immediate
+value instead of a register.
+
+Most @lightning{} integer operations are signed wordsize operations,
+with the exception of operations that convert types, or load or store
+values to/from memory. When applicable, the types and C types are as
+follow:
+
+@example
+     _c         @r{signed char}
+     _uc        @r{unsigned char}
+     _s         @r{short}
+     _us        @r{unsigned short}
+     _i         @r{int}
+     _ui        @r{unsigned int}
+     _l         @r{long}
+     _f         @r{float}
+     _d         @r{double}
+@end example
+
+Most integer operations do not need a type modifier, and when loading or
+storing values to memory there is an alias to the proper operation
+using wordsize operands, that is, if ommited, the type is @r{int} on
+32-bit architectures and @r{long} on 64-bit architectures.  Note
+that lightning also expects @code{sizeof(void*)} to match the wordsize.
+
+When an unsigned operation result differs from the equivalent signed
+operation, there is a the @code{_u} modifier.
+
+There are at least seven integer registers, of which six are
+general-purpose, while the last is used to contain the frame pointer
+(@code{FP}).  The frame pointer can be used to allocate and access local
+variables on the stack, using the @code{allocai} instruction.
+
+Of the general-purpose registers, at least three are guaranteed to be
+preserved across function calls (@code{V0}, @code{V1} and
+@code{V2}) and at least three are not (@code{R0}, @code{R1} and
+@code{R2}).  Six registers are not very much, but this
+restriction was forced by the need to target CISC architectures
+which, like the x86, are poor of registers; anyway, backends can
+specify the actual number of available registers with the calls
+@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM}
+(for callee-save registers).
+
+There are at least six floating-point registers, named @code{F0} to
+@code{F5}.  These are usually caller-save and are separate from the integer
+registers on the supported architectures; on Intel architectures,
+in 32 bit mode if SSE2 is not available or use of X87 is forced,
+the register stack is mapped to a flat register file.  As for the
+integer registers, the macro @code{JIT_F_NUM} yields the number of
+floating-point registers.
+
+The complete instruction set follows; as you can see, most non-memory
+operations only take integers (either signed or unsigned) as operands;
+this was done in order to reduce the instruction set, and because most
+architectures only provide word and long word operations on registers.
+There are instructions that allow operands to be extended to fit a larger
+data type, both in a signed and in an unsigned way.
+
+@table @b
+@item Binary ALU operations
+These accept three operands; the last one can be an immediate.
+@code{addx} operations must directly follow @code{addc}, and
+@code{subx} must follow @code{subc}; otherwise, results are undefined.
+Most, if not all, architectures do not support @r{float} or @r{double}
+immediate operands; lightning emulates those operations by moving the
+immediate to a temporary register and emiting the call with only
+register operands.
+@example
+addr         _f  _d  O1 = O2 + O3
+addi         _f  _d  O1 = O2 + O3
+addxr                O1 = O2 + (O3 + carry)
+addxi                O1 = O2 + (O3 + carry)
+addcr                O1 = O2 + O3, set carry
+addci                O1 = O2 + O3, set carry
+subr         _f  _d  O1 = O2 - O3
+subi         _f  _d  O1 = O2 - O3
+subxr                O1 = O2 - (O3 + carry)
+subxi                O1 = O2 - (O3 + carry)
+subcr                O1 = O2 - O3, set carry
+subci                O1 = O2 - O3, set carry
+mulr         _f  _d  O1 = O2 * O3
+muli         _f  _d  O1 = O2 * O3
+divr     _u  _f  _d  O1 = O2 / O3
+divi     _u  _f  _d  O1 = O2 / O3
+remr     _u          O1 = O2 % O3
+remi     _u          O1 = O2 % O3
+andr                 O1 = O2 & O3
+andi                 O1 = O2 & O3
+orr                  O1 = O2 | O3
+ori                  O1 = O2 | O3
+xorr                 O1 = O2 ^ O3
+xori                 O1 = O2 ^ O3
+lshr                 O1 = O2 << O3
+lshi                 O1 = O2 << O3
+rshr     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+rshi     _u          O1 = O2 >> O3@footnote{The sign bit is propagated unless using the @code{_u} modifier.}
+@end example
+
+@item Unary ALU operations
+These accept two operands, both of which must be registers.
+@example
+negr         _f  _d  O1 = -O2
+comr                 O1 = ~O2
+@end example
+
+There unary ALU operations are only defined for float operands.
+@example
+absr         _f  _d  O1 = fabs(O2)
+sqrtr                O1 = sqrt(O2)
+@end example
+
+Besides requiring the @code{r} modifier, there are no unary operations
+with an immediate operand.
+
+@item Compare instructions
+These accept three operands; again, the last can be an immediate.
+The last two operands are compared, and the first operand, that must be
+an integer register, is set to either 0 or 1, according to whether the
+given condition was met or not.
+
+The conditions given below are for the standard behavior of C,
+where the ``unordered'' comparison result is mapped to false.
+
+@example
+ltr       _u  _f  _d  O1 =  (O2 <  O3)
+lti       _u  _f  _d  O1 =  (O2 <  O3)
+ler       _u  _f  _d  O1 =  (O2 <= O3)
+lei       _u  _f  _d  O1 =  (O2 <= O3)
+gtr       _u  _f  _d  O1 =  (O2 >  O3)
+gti       _u  _f  _d  O1 =  (O2 >  O3)
+ger       _u  _f  _d  O1 =  (O2 >= O3)
+gei       _u  _f  _d  O1 =  (O2 >= O3)
+eqr           _f  _d  O1 =  (O2 == O3)
+eqi           _f  _d  O1 =  (O2 == O3)
+ner           _f  _d  O1 =  (O2 != O3)
+nei           _f  _d  O1 =  (O2 != O3)
+unltr         _f  _d  O1 = !(O2 >= O3)
+unler         _f  _d  O1 = !(O2 >  O3)
+ungtr         _f  _d  O1 = !(O2 <= O3)
+unger         _f  _d  O1 = !(O2 <  O3)
+uneqr         _f  _d  O1 = !(O2 <  O3) && !(O2 >  O3)
+ltgtr         _f  _d  O1 = !(O2 >= O3) || !(O2 <= O3)
+ordr          _f  _d  O1 =  (O2 == O2) &&  (O3 == O3)
+unordr        _f  _d  O1 =  (O2 != O2) ||  (O3 != O3)
+@end example
+
+@item Transfer operations
+These accept two operands; for @code{ext} both of them must be
+registers, while @code{mov} accepts an immediate value as the second
+operand.
+
+Unlike @code{movr} and @code{movi}, the other instructions are used
+to truncate a wordsize operand to a smaller integer data type or to
+convert float data types. You can also use @code{extr} to convert an
+integer to a floating point value: the usual options are @code{extr_f}
+and @code{extr_d}.
+
+@example
+movr                                 _f  _d  O1 = O2
+movi                                 _f  _d  O1 = O2
+extr      _c  _uc  _s  _us  _i  _ui  _f  _d  O1 = O2
+truncr                               _f  _d  O1 = trunc(O2)
+@end example
+
+In 64-bit architectures it may be required to use @code{truncr_f_i},
+@code{truncr_f_l}, @code{truncr_d_i} and @code{truncr_d_l} to match
+the equivalent C code.  Only the @code{_i} modifier is available in
+32-bit architectures.
+
+@example
+truncr_f_i    = <int> O1 = <float> O2
+truncr_f_l    = <long>O1 = <float> O2
+truncr_d_i    = <int> O1 = <double>O2
+truncr_d_l    = <long>O1 = <double>O2
+@end example
+
+The float conversion operations are @emph{destination first,
+source second}, but the order of the types is reversed.  This happens
+for historical reasons.
+
+@example
+extr_f_d    = <double>O1 = <float> O2
+extr_d_f    = <float> O1 = <double>O2
+@end example
+
+@item Network extensions
+These accept two operands, both of which must be registers; these
+two instructions actually perform the same task, yet they are
+assigned to two mnemonics for the sake of convenience and
+completeness.  As usual, the first operand is the destination and
+the second is the source.
+@example
+htonr    @r{Host-to-network (big endian) order}
+ntohr    @r{Network-to-host order }
+@end example
+
+@item Load operations
+@code{ld} accepts two operands while @code{ldx} accepts three;
+in both cases, the last can be either a register or an immediate
+value. Values are extended (with or without sign, according to
+the data type specification) to fit a whole register.
+The @code{_ui} and @code{_l} types are only available in 64-bit
+architectures.  For convenience, there is a version without a
+type modifier for integer or pointer operands that uses the
+appropriate wordsize call.
+@example
+ldr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
+ldi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
+ldxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+@end example
+
+@item Store operations
+@code{st} accepts two operands while @code{stx} accepts three; in
+both cases, the first can be either a register or an immediate
+value. Values are sign-extended to fit a whole register.
+@example
+str     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
+sti     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
+stxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+stxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+@end example
+As for the load operations, the @code{_ui} and @code{_l} types are
+only available in 64-bit architectures, and for convenience, there
+is a version without a type modifier for integer or pointer operands
+that uses the appropriate wordsize call.
+
+@item Argument management
+These are:
+@example
+prepare     (not specified)
+pushargr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+pushargi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+arg         _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+ret         (not specified)
+retr        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+reti        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+epilog      (not specified)
+@end example
+As with other operations that use a type modifier, the @code{_ui} and
+@code{_l} types are only available in 64-bit architectures, but there
+are operations without a type modifier that alias to the appropriate
+integer operation with wordsize operands.
+
+@code{prepare}, @code{pusharg}, and @code{retval} are used by the caller,
+while @code{arg}, @code{getarg} and @code{ret} are used by the callee.
+A code snippet that wants to call another procedure and has to pass
+arguments must, in order: use the @code{prepare} instruction and use
+the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
+left to right order}; and use @code{finish} or @code{call} (explained below)
+to perform the actual call.
+
+@code{arg} and @code{getarg} are used by the callee.
+@code{arg} is different from other instruction in that it does not
+actually generate any code: instead, it is a function which returns
+a value to be passed to @code{getarg}.@footnote{``Return a
+value'' means that @lightning{} code that compile these
+instructions return a value when expanded.} You should call
+@code{arg} as soon as possible, before any function call or, more
+easily, right after the @code{prolog} instructions
+(which is treated later).
+
+@code{getarg} accepts a register argument and a value returned by
+@code{arg}, and will move that argument to the register, extending
+it (with or without sign, according to the data type specification)
+to fit a whole register.  These instructions are more intimately
+related to the usage of the @lightning{} instruction set in code
+that generates other code, so they will be treated more
+specifically in @ref{GNU lightning examples, , Generating code at
+run-time}.
+
+Finally, the @code{retval} instruction fetches the return value of a
+called function in a register.  The @code{retval} instruction takes a
+register argument and copies the return value of the previously called
+function in that register.  A function with a return value should use
+@code{retr} or @code{reti} to put the return value in the return register
+before returning.  @xref{Fibonacci, the Fibonacci numbers}, for an example.
+
+@code{epilog} is an optional call, that marks the end of a function
+body. It is automatically generated by lightning if starting a new
+function (what should be done after a @code{ret} call) or finishing
+generating jit.
+
+You should observe a few rules when using these macros.  First of
+all, if calling a varargs function, you should use the @code{ellipsis}
+call to mark the position of the ellipsis in the C prototype.
+
+You should not nest calls to @code{prepare} inside a
+@code{prepare/finish} block.  Doing this will result in undefined
+behavior. Note that for functions with zero arguments you can use
+just @code{call}.
+
+@item Branch instructions
+Like @code{arg}, these also return a value which, in this case,
+is to be used to compile forward branches as explained in
+@ref{Fibonacci, , Fibonacci numbers}.  They accept two operands to be
+compared; of these, the last can be either a register or an immediate.
+They are:
+@example
+bltr      _u  _f  _d  @r{if }(O2 <  O3)@r{ goto }O1
+blti      _u  _f  _d  @r{if }(O2 <  O3)@r{ goto }O1
+bler      _u  _f  _d  @r{if }(O2 <= O3)@r{ goto }O1
+blei      _u  _f  _d  @r{if }(O2 <= O3)@r{ goto }O1
+bgtr      _u  _f  _d  @r{if }(O2 >  O3)@r{ goto }O1
+bgti      _u  _f  _d  @r{if }(O2 >  O3)@r{ goto }O1
+bger      _u  _f  _d  @r{if }(O2 >= O3)@r{ goto }O1
+bgei      _u  _f  _d  @r{if }(O2 >= O3)@r{ goto }O1
+beqr          _f  _d  @r{if }(O2 == O3)@r{ goto }O1
+beqi          _f  _d  @r{if }(O2 == O3)@r{ goto }O1
+bner          _f  _d  @r{if }(O2 != O3)@r{ goto }O1
+bnei          _f  _d  @r{if }(O2 != O3)@r{ goto }O1
+
+bunltr        _f  _d  @r{if }!(O2 >= O3)@r{ goto }O1
+bunler        _f  _d  @r{if }!(O2 >  O3)@r{ goto }O1
+bungtr        _f  _d  @r{if }!(O2 <= O3)@r{ goto }O1
+bunger        _f  _d  @r{if }!(O2 <  O3)@r{ goto }O1
+buneqr        _f  _d  @r{if }!(O2 <  O3) && !(O2 >  O3)@r{ goto }O1
+bltgtr        _f  _d  @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
+bordr         _f  _d  @r{if } (O2 == O2) &&  (O3 == O3)@r{ goto }O1
+bunordr       _f  _d  @r{if }!(O2 != O2) ||  (O3 != O3)@r{ goto }O1
+
+bmsr                  @r{if }O2 &  O3@r{ goto }O1
+bmsi                  @r{if }O2 &  O3@r{ goto }O1
+bmcr                  @r{if }!(O2 & O3)@r{ goto }O1
+bmci                  @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.}
+boaddr    _u          O2 += O3@r{, goto }O1@r{ if overflow}
+boaddi    _u          O2 += O3@r{, goto }O1@r{ if overflow}
+bxaddr    _u          O2 += O3@r{, goto }O1@r{ if no overflow}
+bxaddi    _u          O2 += O3@r{, goto }O1@r{ if no overflow}
+bosubr    _u          O2 -= O3@r{, goto }O1@r{ if overflow}
+bosubi    _u          O2 -= O3@r{, goto }O1@r{ if overflow}
+bxsubr    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
+bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
+@end example
+
+@item Jump and return operations
+These accept one argument except @code{ret} which has none; the
+difference between @code{finishi} and @code{calli} is that the
+latter does not clean the stack from pushed parameters (if any)
+and the former must @strong{always} follow a @code{prepare}
+instruction.
+@example
+callr     (not specified)                @r{function call to a register}
+calli     (not specified)                @r{function call to O1}
+finishr   (not specified)                @r{function call to a register}
+finishi   (not specified)                @r{function call to O1}
+jmpi/jmpr (not specified)                @r{unconditional jump to O1}
+ret       (not specified)                @r{return from subroutine}
+retr      _c _uc _s _us _i _ui _l _f _d
+reti      _c _uc _s _us _i _ui _l _f _d
+retval    _c _uc _s _us _i _ui _l _f _d  @r{move return value}
+                                         @r{to register}
+@end example
+
+Like branch instruction, @code{jmpi} also returns a value which is to
+be used to compile forward branches. @xref{Fibonacci, , Fibonacci
+numbers}.
+
+@item Function prolog
+
+These macros are used to set up a function prolog.  The @code{allocai}
+call accept a single integer argument and returns an offset value
+for stack storage access.
+
+@example
+prolog    (not specified)                @r{function prolog}
+allocai   (not specified)                @r{reserve space on the stack}
+@end example
+
+@code{allocai} receives the number of bytes to allocate and returns
+the offset from the frame pointer register @code{FP} to the base of
+the area.
+@end table
+
+As a small appetizer, here is a small function that adds 1 to the input
+parameter (an @code{int}).  I'm using an assembly-like syntax here which
+is a bit different from the one used when writing real subroutines with
+@lightning{}; the real syntax will be introduced in @xref{GNU lightning
+examples, , Generating code at run-time}.
+
+@example
+incr:
+     prolog
+in = arg                     @rem{! We have an integer argument}
+     getarg    R0, in        @rem{! Move it to R0}
+     addi      R0, R0, 1     @rem{! Add 1}
+     retr      R0            @rem{! And return the result}
+@end example
+
+And here is another function which uses the @code{printf} function from
+the standard C library to write a number in hexadecimal notation:
+
+@example
+printhex:
+     prolog
+in = arg                     @rem{! Same as above}
+     getarg    R0, in
+     prepare                 @rem{! Begin call sequence for printf}
+     pushargi  "%x"          @rem{! Push format string}
+     ellipsis                @rem{! Varargs start here}
+     pushargr  R0            @rem{! Push second argument}
+     finishi   printf        @rem{! Call printf}
+     ret                     @rem{! Return to caller}
+@end example
+
+@node GNU lightning examples
+@chapter Generating code at run-time
+
+To use @lightning{}, you should include the @file{lightning.h} file that
+is put in your include directory by the @samp{make install} command.
+
+Each of the instructions above translates to a macro or function call.
+All you have to do is prepend @code{jit_} (lowercase) to opcode names
+and @code{JIT_} (uppercase) to register names.  Of course, parameters
+are to be put between parentheses.
+
+This small tutorial presents three examples:
+
+@iftex
+@itemize @bullet
+@item
+The @code{incr} function found in @ref{The instruction set, ,
+@lightning{}'s instruction set}:
+
+@item
+A simple function call to @code{printf}
+
+@item
+An RPN calculator.
+
+@item
+Fibonacci numbers
+@end itemize
+@end iftex
+@ifnottex
+@menu
+* incr::             A function which increments a number by one
+* printf::           A simple function call to printf
+* RPN calculator::   A more complex example, an RPN calculator
+* Fibonacci::        Calculating Fibonacci numbers
+@end menu
+@end ifnottex
+
+@node incr
+@section A function which increments a number by one
+
+Let's see how to create and use the sample @code{incr} function created
+in @ref{The instruction set, , @lightning{}'s instruction set}:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);    @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  jit_node_t  *in;
+  pifi         incr;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  jit_prolog();                    @rem{/* @t{     prolog             } */}
+  in = jit_arg();                  @rem{/* @t{     in = arg           } */}
+  jit_getarg(JIT_R0, in);          @rem{/* @t{     getarg R0          } */}
+  jit_addi(JIT_R0, JIT_R0, 1);     @rem{/* @t{     addi   R0@comma{} R0@comma{} 1   } */}
+  jit_retr(JIT_R0);                @rem{/* @t{     retr   R0          } */}
+
+  incr = jit_emit();
+
+  @rem{/* call the generated code@comma{} passing 5 as an argument */}
+  printf("%d + 1 = %d\n", 5, incr(5));
+
+  finish_jit();
+  return 0;
+@}
+@end example
+
+Let's examine the code line by line (well, almost@dots{}):
+
+@table @t
+@item #include <lightning.h>
+You already know about this.  It defines all of @lightning{}'s macros.
+
+@item static jit_state_t *_jit;
+You might wonder about what is @code{jit_state_t}.  It is a structure
+that stores jit code generation information.  The name @code{_jit} is
+special, because since multiple jit generators can run at the same
+time, you must either @r{#define _jit my_jit_state} or name it
+@code{_jit}.
+
+@item typedef int (*pifi)(int);
+Just a handy typedef for a pointer to a function that takes an
+@code{int} and returns another.
+
+@item jit_node_t  *in;
+Declares a variable to hold an identifier for a function argument. It
+is an opaque pointer, that will hold the return of a call to @code{arg}
+and be used as argument to @code{getarg}.
+
+@item pifi         incr;
+Declares a function pointer variable to a function that receives an
+@code{int} and returns an @code{int}.
+
+@item init_jit(argv[0]);
+You must call this function before creating a @code{jit_state_t}
+object. This function does global state initialization, and may need
+to detect CPU or Operating System features.  It receives a string
+argument that is later used to read symbols from a shared object using
+GNU binutils if disassembly was enabled at configure time. If no
+disassembly will be performed a NULL pointer can be used as argument.
+
+@item _jit = jit_new_state();
+This call initializes a @lightning{} jit state.
+
+@item jit_prolog();
+Ok, so we start generating code for our beloved function@dots{}
+
+@item in = jit_arg();
+@itemx jit_getarg(JIT_R0, in);
+We retrieve the first (and only) argument, an integer, and store it
+into the general-purpose register @code{R0}.
+
+@item jit_addi(JIT_R0, JIT_R0, 1);
+We add one to the content of the register.
+
+@item jit_retr(JIT_R0);
+This instruction generates a standard function epilog that returns
+the contents of the @code{R0} register.
+
+@item incr = jit_emit();
+This instruction is very important.  It actually translates the
+@lightning{} macros used before to machine code, flushes the generated
+code area out of the processor's instruction cache and return a
+pointer to the start of the code.
+
+@item printf("%d + 1 = %d", 5, incr(5));
+Calling our function is this simple---it is not distinguishable from
+a normal C function call, the only difference being that @code{incr}
+is a variable.
+
+@item finish_jit();
+This call cleanups any global state hold by @lightning{}, and is
+advisable to call it once jit code will no longer be generated.
+@end table
+
+@lightning{} abstracts two phases of dynamic code generation: selecting
+instructions that map the standard representation, and emitting binary
+code for these instructions.  The client program has the responsibility
+of describing the code to be generated using the standard @lightning{}
+instruction set.
+
+Let's examine the code generated for @code{incr} on the x86_64
+architecture (on the right is the code that an assembly-language
+programmer would write):
+
+@table @b
+@item x86_64
+@example
+    sub   $0x30,%rsp
+    mov   %rbp,(%rsp)
+    mov   %rsp,%rbp
+    sub   $0x18,%rsp
+    mov   %rdi,%rax            mov %rdi, %rax
+    add   $0x1,%rax            inc %rax
+    mov   %rbp,%rsp
+    mov   (%rsp),%rbp
+    add   $0x30,%rsp
+    retq                       retq
+@end example
+In this case, the main overhead is due to the function's prolog and
+epilog, and stack alignment after reserving stack space for word
+to/from float conversions or moving data from/to x87 to/from SSE.
+Note that besides allocating space to save callee saved registers,
+no registers are saved/restored because @lightning{} notices those
+registers are not modified. There is currently no logic to detect
+if it needs to allocate stack space for type conversions neither
+proper leaf function detection, but these are subject to change
+(FIXME).
+@end table
+
+@node printf
+@section A simple function call to @code{printf}
+
+Again, here is the code for the example:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int);      @rem{/* Pointer to Void Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pvfi          myFunction;             @rem{/* ptr to generated code */}
+  jit_node_t    *start, *end;           @rem{/* a couple of labels */}
+  jit_node_t    *in;                    @rem{/* to get the argument */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  start = jit_note(__FILE__, __LINE__);
+  jit_prolog();
+  in = jit_arg();
+  jit_getarg(JIT_R1, in);
+  jit_pushargi((jit_word_t)"generated %d bytes\n");
+  jit_ellipsis();
+  jit_pushargr(JIT_R1);
+  jit_finishi(printf);
+  jit_ret();
+  jit_epilog();
+  end = jit_note(__FILE__, __LINE__);
+
+  myFunction = jit_emit();
+
+  @rem{/* call the generated code@comma{} passing its size as argument */}
+  myFunction((char*)jit_address(end) - (char*)jit_address(start));
+
+  jit_disassemble();
+
+  finish_jit();
+  return 0;
+@}
+@end example
+
+The function shows how many bytes were generated.  Most of the code
+is not very interesting, as it resembles very closely the program
+presented in @ref{incr, , A function which increments a number by one}.
+
+For this reason, we're going to concentrate on just a few statements.
+
+@table @t
+@item start = jit_note(__FILE__, __LINE__);
+@itemx @r{@dots{}}
+@itemx end = jit_note(__FILE__, __LINE__);
+These two instruction call the @code{jit_note} macro, which creates
+a note in the jit code; arguments to @code{jit_note} usually are a
+filename string and line number integer, but using NULL for the
+string argument is perfectly valid if only need to create a simple
+marker in the code.
+
+@item jit_ellipsis();
+@code{ellipsis} usually is only required if calling varargs functions
+with double arguments, but it is a good practice to properly describe
+the @r{@dots{}} in the call sequence.
+
+@itemx jit_pushargi((jit_word_t)"generated %d bytes\n");
+Note the use of the @code{(jit_word_t)} cast, that is used only
+to avoid a compiler warning, due to using a pointer where a
+wordsize integer type was expected.
+
+@item jit_prepare();
+@itemx @r{@dots{}}
+@itemx jit_finishi(printf);
+Once the arguments to @code{printf} have been pushed, what means
+moving them to stack or register arguments, the @code{printf}
+function is called and the stack cleaned.  Note how @lightning{}
+abstracts the differences between different architectures and
+ABI's -- the client program does not know how parameter passing
+works on the host architecture.
+
+@item jit_epilog();
+Usually it is not required to call @code{epilog}, but because it
+is implicitly called when noticing the end of a function, if the
+@code{end} variable was set with a @code{note} call after the
+@code{ret}, it would not consider the function epilog.
+
+@item myFunction((char*)jit_address(end) - (char*)jit_address(start));
+This calls the generate jit function passing as argument the offset
+difference from the @code{start} and @code{end} notes. The @code{address}
+call must be done after the @code{emit} call or either a fatal error
+will happen (if @lightning{} is built with assertions enable) or an
+undefined value will be returned.
+
+@item jit_disassemble();
+@code{disassemble} will dump the generated code to standard output,
+unless @lightning{} was built with the disassembler disabled, in which
+case no output will be shown.
+@end table
+
+@node RPN calculator
+@section A more complex example, an RPN calculator
+
+We create a small stack-based RPN calculator which applies a series
+of operators to a given parameter and to other numeric operands.
+Unlike previous examples, the code generator is fully parameterized
+and is able to compile different formulas to different functions.
+Here is the code for the expression compiler; a sample usage will
+follow.
+
+Since @lightning{} does not provide push/pop instruction, this
+example uses a stack-allocated area to store the data.  Such an
+area can be allocated using the macro @code{allocai}, which
+receives the number of bytes to allocate and returns the offset
+from the frame pointer register @code{FP} to the base of the
+area.
+
+Usually, you will use the @code{ldxi} and @code{stxi} instruction
+to access stack-allocated variables.  However, it is possible to
+use operations such as @code{add} to compute the address of the
+variables, and pass the address around.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+@{
+  jit_stxi_i (*sp, JIT_FP, reg);
+  *sp += sizeof (int);
+@}
+
+void stack_pop(int reg, int *sp)
+@{
+  *sp -= sizeof (int);
+  jit_ldxi_i (reg, JIT_FP, *sp);
+@}
+
+jit_node_t *compile_rpn(char *expr)
+@{
+  jit_node_t *in, *fn;
+  int stack_base, stack_ptr;
+
+  fn = jit_note(NULL, 0);
+  jit_prolog();
+  in = jit_arg();
+  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+  jit_getarg_i(JIT_R2, in);
+
+  while (*expr) @{
+    char buf[32];
+    int n;
+    if (sscanf(expr, "%[0-9]%n", buf, &n)) @{
+      expr += n - 1;
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movi(JIT_R0, atoi(buf));
+    @} else if (*expr == 'x') @{
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movr(JIT_R0, JIT_R2);
+    @} else if (*expr == '+') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_addr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '-') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_subr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '*') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+    @} else if (*expr == '/') @{
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_divr(JIT_R0, JIT_R1, JIT_R0);
+    @} else @{
+      fprintf(stderr, "cannot compile: %s\n", expr);
+      abort();
+    @}
+    ++expr;
+  @}
+  jit_retr(JIT_R0);
+  jit_epilog();
+  return fn;
+@}
+@end example
+
+The principle on which the calculator is based is easy: the stack top
+is held in R0, while the remaining items of the stack are held in the
+memory area that we allocate with @code{allocai}.  Compiling a numeric
+operand or the argument @code{x} pushes the old stack top onto the
+stack and moves the operand into R0; compiling an operator pops the
+second operand off the stack into R1, and compiles the operation so
+that the result goes into R0, thus becoming the new stack top.
+
+This example allocates a fixed area for 32 @code{int}s.  This is not
+a problem when the function is a leaf like in this case; in a full-blown
+compiler you will want to analyze the input and determine the number
+of needed stack slots---a very simple example of register allocation.
+The area is then managed like a stack using @code{stack_push} and
+@code{stack_pop}.
+
+Source code for the client (which lies in the same source file) follows:
+
+@example
+int main(int argc, char *argv[])
+@{
+  jit_note_t *nc, *nf;
+  pifi c2f, f2c;
+  int i;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  nc = compile_rpn("32x9*5/+");
+  nf = compile_rpn("x32-5*9/");
+  (void)jit_emit();
+  c2f = (pifi)jit_address(nc);
+  f2c = (pifi)jit_address(nf);
+
+  printf("\nC:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+  printf("\nF:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+  printf("\n");
+
+  printf("\nF:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+  printf("\nC:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+  printf("\n");
+
+  finish_jit();
+  return 0;
+@}
+@end example
+
+The client displays a conversion table between Celsius and Fahrenheit
+degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The
+formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9},
+respectively.
+
+Providing the formula as an argument to @code{compile_rpn} effectively
+parameterizes code generation, making it possible to use the same code
+to compile different functions; this is what makes dynamic code
+generation so powerful.
+
+@node Fibonacci
+@section Fibonacci numbers
+
+The code in this section calculates a variant of the Fibonacci sequence.
+While the traditional Fibonacci sequence is modeled by the recurrence
+relation:
+@display
+     f(0) = f(1) = 1
+     f(n) = f(n-1) + f(n-2)
+@end display
+
+@noindent
+the functions in this section calculates the following sequence, which
+is more interesting as a benchmark@footnote{That's because, as is
+easily seen, the sequence represents the number of activations of the
+@code{nfibs} procedure that are needed to compute its value through
+recursion.}:
+@display
+     fib(0) = fib(1) = 1
+     fib(n) = fib(n-1) + fib(n-2) + 1
+@end display
+
+The purpose of this example is to introduce branches.  There are two
+kind of branches: backward branches and forward branches.  We'll
+present the calculation in a recursive and iterative form; the
+former only uses forward branches, while the latter uses both.
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pifi       fib;
+  jit_node_t *label;
+  jit_node_t *call;
+  jit_node_t *in;                 @rem{/* offset of the argument */}
+  jit_node_t *ref;                @rem{/* to patch the forward reference */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  label = jit_label();
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_V0, in);              @rem{/* V0 = n */}
+  ref = jit_blti     (JIT_V0, 2);
+        jit_subi     (JIT_V1, JIT_V0, 1);       @rem{/* V1 = n-1 */}
+        jit_subi     (JIT_V2, JIT_V0, 2);       @rem{/* V2 = n-2 */}
+        jit_prepare();
+          jit_pushargr(JIT_V1);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V1);                     @rem{/* V1 = fib(n-1) */}
+        jit_prepare();
+          jit_pushargr(JIT_V2);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V2);                     @rem{/* V2 = fib(n-2) */}
+        jit_addi(JIT_V1,  JIT_V1,  1);
+        jit_addr(JIT_R0, JIT_V1, JIT_V2);       @rem{/* R0 = V1 + V2 + 1 */}
+        jit_retr(JIT_R0);
+
+  jit_patch(ref);                               @rem{/* patch jump */}
+        jit_movi(JIT_R0, 1);                    @rem{/* R0 = 1 */}
+        jit_retr(JIT_R0);
+
+  @rem{/* call the generated code@comma{} passing 32 as an argument */}
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 32, fib(32));
+  finish_jit();
+  return 0;
+@}
+@end example
+
+As said above, this is the first example of dynamically compiling
+branches.  Branch instructions have two operands containing the
+values to be compared, and return a @code{jit_note_t *} object
+to be patched.
+
+Because labels final address are only known after calling @code{emit},
+it is required to call @code{patch} or @code{patch_at}, what does
+tell @lightning{} that the target to patch is actually a pointer to
+a @code{jit_node_t *} object, otherwise, it would assume that is
+a pointer to a C function. Note that conditional branches do not
+receive a label argument, so they must be patched.
+
+You need to call @code{patch_at} on the return of value @code{calli},
+@code{finishi}, @code{jmpi} and @code{calli} if it is actually
+referencing a label in the jit code. All other branch instructions
+do not receive a label argument. Note that @code{movi} is an special
+case, and patching it is usually done to get the final address of
+a label, usually to later call @code{jmpr}.
+
+Now, here is the iterative version:
+
+@example
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
+
+int main(int argc, char *argv[])
+@{
+  pifi       fib;
+  jit_node_t *in;               @rem{/* offset of the argument */}
+  jit_node_t *ref;              @rem{/* to patch the forward reference */}
+  jit_node_t *jump;             @rem{/* jump to start of loop */}
+  jit_node_t *loop;             @rem{/* start of the loop */}
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R2, in);              @rem{/* R2 = n */}
+        jit_movi     (JIT_R1, 1);
+  ref = jit_blti     (JIT_R2, 2);
+        jit_subi     (JIT_R2, JIT_R2, 1);
+        jit_movi     (JIT_R0, 1);
+
+  loop= jit_label();
+        jit_subi     (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
+        jit_addr     (JIT_V0, JIT_R0, JIT_R1);  @rem{/* V0 = R0 + R1 */}
+        jit_movr     (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
+        jit_addi     (JIT_R1, JIT_V0, 1);       @rem{/* R1 = V0 + 1 */}
+  jump= jit_bnei     (JIT_R2, 0);               @rem{/* if (R2) goto loop; */}
+  jit_patch_at(jump, label);
+
+  jit_patch(ref);                               @rem{/* patch forward jump */}
+        jit_movr     (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
+        jit_retr     (JIT_R0);
+
+  @rem{/* call the generated code@comma{} passing 36 as an argument */}
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 36, fib(36));
+  finish_jit();
+  return 0;
+@}
+@end example
+
+This code calculates the recurrence relation using iteration (a
+@code{for} loop in high-level languages).  There are no function
+calls anymore: instead, there is a backward jump (the @code{bnei} at
+the end of the loop).
+
+Note that the program must remember the address for backward jumps;
+for forward jumps it is only required to remember the jump code,
+and call @code{patch} for the implicit label.
+
+@node Reentrancy
+@chapter Re-entrant usage of @lightning{}
+
+@lightning{} uses the special @code{_jit} identifier. To be able
+to be able to use multiple jit generation states at the same
+time, it is required to used code similar to:
+
+@example
+    struct jit_state lightning;
+    #define _jit lightning
+@end example
+
+This will cause the symbol defined to @code{_jit} to be passed as
+the first argument to the underlying @lightning{} implementation,
+that is usually a function with an @code{_} (underscode) prefix
+and with an argument named @code{_jit}, in the pattern:
+
+@example
+	static void _jit_mnemonic(jit_state_t *, jit_gpr_t, jit_gpr_t);
+	#define jit_mnemonic(u, v) _jit_mnemonic(_jit, u, v);
+@end example
+
+The reason for this is to use the same syntax as the initial lightning
+implementation and to avoid needing the user to keep adding an extra
+argument to every call, as multiple jit states generating code in
+paralell should be very uncommon.
+
+@section Registers
+@chapter Accessing the whole register file
+
+As mentioned earlier in this chapter, all @lightning{} back-ends are
+guaranteed to have at least six general-purpose integer registers and
+six floating-point registers, but many back-ends will have more.
+
+To access the entire register files, you can use the
+@code{JIT_R}, @code{JIT_V} and @code{JIT_F} macros.  They
+accept a parameter that identifies the register number, which
+must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
+and @code{JIT_F_NUM} respectively; the number need not be
+constant.  Of course, expressions like @code{JIT_R0} and
+@code{JIT_R(0)} denote the same register, and likewise for
+integer callee-saved, or floating-point, registers.
 
 @node Acknowledgements
 @chapter Acknowledgements
@@ -298,9 +1193,3 @@ yet very interesting.
 I also thank Steve Byrne for writing GNU Smalltalk, since @lightning{}
 was first developed as a tool to be used in GNU Smalltalk's dynamic
 translator from bytecodes to native code.
-
-@iftex
-@contents
-@end iftex
-
-@bye
diff --git a/doc/ifib.c b/doc/ifib.c
new file mode 100644
index 000000000..7e098cba4
--- /dev/null
+++ b/doc/ifib.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pifi       fib;
+  jit_node_t *in;               /* offset of the argument */
+  jit_node_t *ref;              /* to patch the forward reference */
+  jit_node_t *jump;             /* jump to start of loop */
+  jit_node_t *loop;             /* start of the loop */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_R2, in);              /* R2 = n */
+        jit_movi     (JIT_R1, 1);
+  ref = jit_blti     (JIT_R2, 2);
+        jit_subi     (JIT_R2, JIT_R2, 1);
+        jit_movi     (JIT_R0, 1);
+
+  loop= jit_label();
+        jit_subi     (JIT_R2, JIT_R2, 1);       /* decr. counter */
+        jit_addr     (JIT_V0, JIT_R0, JIT_R1);  /* V0 = R0 + R1 */
+        jit_movr     (JIT_R0, JIT_R1);          /* R0 = R1 */
+        jit_addi     (JIT_R1, JIT_V0, 1);       /* R1 = V0 + 1 */
+  jump= jit_bnei     (JIT_R2, 0);               /* if (R2) goto loop; */
+  jit_patch_at(jump, loop);
+
+  jit_patch(ref);                               /* patch forward jump */
+        jit_movr     (JIT_R0, JIT_R1);          /* R0 = R1 */
+        jit_retr     (JIT_R0);
+
+  /* call the generated code, passing 36 as an argument */
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 36, fib(36));
+  finish_jit();
+  return 0;
+}
diff --git a/doc/incr.c b/doc/incr.c
new file mode 100644
index 000000000..5d5e438d0
--- /dev/null
+++ b/doc/incr.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);    /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  jit_node_t  *in;
+  pifi         incr;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  jit_prolog();                    /* @t{     prolog             } */
+  in = jit_arg();                  /* @t{     in = arg           } */
+  jit_getarg(JIT_R0, in);          /* @t{     getarg R0          } */
+  jit_addi(JIT_R0, JIT_R0, 1);     /* @t{     addi   R0\, R0\, 1 } */
+  jit_retr(JIT_R0);                /* @t{     retr   R0          } */
+
+  incr = jit_emit();
+
+  /* call the generated code\, passing 5 as an argument */
+  printf("%d + 1 = %d\n", 5, incr(5));
+
+  finish_jit();
+  return 0;
+}
diff --git a/doc/lightning.texi b/doc/lightning.texi
index a336a3db0..c7d8f98f1 100644
--- a/doc/lightning.texi
+++ b/doc/lightning.texi
@@ -3,11 +3,8 @@
 
 @setfilename lightning.info
 
-@set TITLE       Using and porting @sc{gnu} @i{lightning}
-@set TOPIC       installing, using and porting
-@set BOTH
-@set USING
-@set PORTING
+@set TITLE       Using @sc{gnu} @i{lightning}
+@set TOPIC       installing and using
 
 @settitle @value{TITLE}
 
@@ -63,29 +60,9 @@
 @c References to the other half of the manual
 @c ---------------------------------------------------------------------
 
-@ifset USING
 @macro usingref{node, name}
 @ref{\node\, , \name\}
 @end macro
-@end ifset
-
-@ifclear USING
-@macro usingref{node, name}
-@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@ifset PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
 
 @c ---------------------------------------------------------------------
 @c End of macro section
@@ -98,3 +75,4 @@
 
 @c ***********************************************************************
 
+@bye
diff --git a/doc/p-lightning.texi b/doc/p-lightning.texi
deleted file mode 100644
index 98a9b63fb..000000000
--- a/doc/p-lightning.texi
+++ /dev/null
@@ -1,100 +0,0 @@
-\input texinfo.tex  @c -*- texinfo -*-
-@c %**start of header (This is for running Texinfo on a region.)
-
-@setfilename lightning.info
-
-@set TITLE       Using @sc{gnu} @i{lightning}
-@set TOPIC       installing and using
-@clear BOTH
-@clear USING
-@set PORTING
-
-@settitle @value{TITLE}
-
-@c ---------------------------------------------------------------------
-@c Common macros
-@c ---------------------------------------------------------------------
-
-@macro bulletize{a}
-@item
-\a\
-@end macro
-
-@macro rem{a}
-@r{@i{\a\}}
-@end macro
-
-@macro gnu{}
-@sc{gnu}
-@end macro
-
-@macro lightning{}
-@gnu{} @i{lightning}
-@end macro
-
-@c ---------------------------------------------------------------------
-@c Macros for Texinfo 3.1/4.0 compatibility
-@c ---------------------------------------------------------------------
-
-@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
-@c compatibility
-@macro hlink{url, link}
-\link\ (\url\)
-@end macro
-
-@c ifhtml can only be true in Texinfo 4.0, which has uref
-@ifhtml
-@unmacro hlink
-
-@macro hlink{url, link}
-@uref{\url\, \link\}
-@end macro
-
-@macro email{mail}
-@uref{mailto:\mail\, , \mail\}
-@end macro
-
-@macro url{url}
-@uref{\url\}
-@end macro
-@end ifhtml
-
-@c ---------------------------------------------------------------------
-@c References to the other half of the manual
-@c ---------------------------------------------------------------------
-
-@ifset USING
-@macro usingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear USING
-@macro usingref{node, name}
-@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@ifset PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@c ---------------------------------------------------------------------
-@c End of macro section
-@c ---------------------------------------------------------------------
-
-@include version.texi
-@include body.texi
-
-@c %**end of header (This is for running Texinfo on a region.)
-
-@c ***********************************************************************
-
diff --git a/doc/porting.texi b/doc/porting.texi
deleted file mode 100644
index 66cd15118..000000000
--- a/doc/porting.texi
+++ /dev/null
@@ -1,1600 +0,0 @@
-@node Structure of a port
-@chapter An overview of the porting process
-
-A particular port of @lightning{} is composed of four files. These
-have a common suffix which identifies the port (for example,
-@code{i386} or @code{ppc}), and a prefix that identifies their
-function; they are:
-
-@itemize @bullet
-@item
-@file{asm-@var{suffix}.h}, which contains the description of the
-target machine's instruction format.  The creation of this file
-is discussed in @ref{Run-time assemblers, , Creating the run-time
-assembler}.
-
-@item
-@file{core-@var{suffix}.h}, which contains the mappings from
-@lightning{}'s instruction set to the target machine's assembly
-language format.  The creation of this file is discussed in
-@ref{Standard macros, , Creating the platform-independent layer}.
-
-@item
-@file{funcs-@var{suffix}.h}, for now, only contains the definition
-of @code{jit_flush_code}. The creation of this file is briefly
-discussed in @ref{Standard functions, , More complex tasks in
-the platform-independent layer}.
-
-@item
-@file{fp-@var{suffix}.h}, which contains the description of the
-target machine's instruction format and the internal macros for doing
-floating point computation. The creation of this file is discussed
-in @ref{Floating-point macros, , Implementing macros for floating
-point}.
-@end itemize
-
-Before doing anything, you have to add the ability to recognize the
-new port during the configuration process.  This is explained in
-@ref{Adjusting configure, , Automatically recognizing the new platform}.
-
-@node Adjusting configure
-@chapter Automatically recognizing the new platform
-
-Before starting your port, you have to add the ability to recognize the
-new port during the configure process.  You only have to run
-@file{config.guess}, which you'll find in the main distribution
-directory, and note down the first part of the output (up to the first
-dash).
-
-Then, in the two files @file{configure.in} and @file{lightning.m4},
-lookup the line
-@example
-    case "$host_cpu" in
-@end example
-
-@noindent
-and, right after it, add the line:
-@example
-    @var{cpu-name})  cpu=@var{file-suffix}           ;;
-@end example
-
-@noindent
-where @var{cpu-name} is the cpu as output by @file{config.guess}, and
-@var{file-suffix} is the suffix that you are going to use for your files
-(@pxref{Structure of a port, , An overview of the porting process}).
-
-Now create empty files for your new port:
-@example
-    touch lightning/asm-xxx.h
-    touch lightning/fp-xxx.h
-    touch lightning/core-xxx.h
-    touch lightning/funcs-xxx.h
-@end example
-
-@noindent
-and run @file{configure}, which should create the symlinks that are
-needed by @code{lightning.h}.  This is important because it will allow
-you to use @lightning{} (albeit in a limited way) for testing even
-before the port is completed.
-
-@node Run-time assemblers
-@chapter Creating the run-time assembler
-
-The run-time assembler is a set of macros whose purpose is to assemble
-instructions for the target machine's assembly language, translating
-mnemonics to machine language together with their operands.  While a
-run-time assembler is not, strictly speaking, part of @lightning{}
-(it is a private layer to be used while implementing the standard
-macros that are ultimately used by clients), designing a run-time
-assembler first allows you to think in terms of assembly language
-rather than binary code (ouch!@dots{}), making it considerably easier
-to write the standard macros.
-
-Creating a run-time assembler is a tedious process rather than a
-difficult one, because most of the time will be spent collecting and
-copying information from the architecture's manual.
-
-Macros defined by a run-time assembler are conventionally named after
-the mnemonic and the type of its operands.  Examples took from the
-SPARC's run-time assembler are @code{ADDrrr}, a macro that assembles
-an @code{ADD} instruction with three register operands, and
-@code{SUBCCrir}, which assembles a @code{SUBCC} instruction whose second
-operand is an immediate and the remaining two are registers.
-
-The first step in creating the assembler is to pick a convention for
-operand specifiers (@code{r} and @code{i} in the example above) and for
-register names.  On the SPARC, this convention is as follows
-
-@table @code
-@item @b{r}
-A register name.  For every @code{r} in the macro name, a numeric
-parameter @code{RR} is passed to the macro, and the operand is assembled
-as @code{%r@var{RR}}.
-
-@item @b{i}
-An immediate, usually a 13-bit signed integer (with exception for
-instructions such as @code{SETHI} and branches).  The macros check
-the size of the passed parameter if @lightning{} is configured with
-@code{--enable-assertions}.
-
-@item @b{x}
-A combination of two @code{r} parameters, which are summed to determine
-the effective address in a memory load/store operation.
-
-@item @b{m}
-A combination of an @code{r} and @code{i} parameter, which are summed to
-determine the effective address in a memory load/store operation.
-@end table
-
-Additional macros can be defined that provide easier access to register
-names.  For example, on the SPARC, @code{_Ro(3)} and @code{_Rg(5)} map
-respectively to @code{%o3} and @code{%g5}; on the x86, instead, symbolic
-representations of the register names are provided (for example,
-@code{_EAX} and @code{_EBX}).
-
-CISC architectures sometimes have registers of different sizes--this is
-the case on the x86 where @code{%ax} is a 16-bit register while
-@code{%esp} is a 32-bit one.  In this case, it can be useful to embed
-information on the size in the definition of register names.  The x86
-machine language, for example, represents all three of @code{%bh},
-@code{%di} and @code{%edi} as 7; but the x86 run-time assemblers defines
-them with different numbers, putting the register's size in the upper
-nybble (for example, @samp{17h} for @code{%bh} and @samp{27h} for
-@code{%di}) so that consistency checks can be made on the operands'
-sizes when @code{--enable-assertions} is used.
-
-The next important part defines the native architecture's instruction
-formats.  These can be as few as ten on RISC architectures, and as many
-as fifty on CISC architectures.  In the latter case it can be useful
-to define more macros for sub-formats (such as macros for different
-addressing modes) or even for sub-fields in an instruction.  Let's see
-an example of these macros.
-
-@example
-#define _2i( OP, RD, OP2, IMM)
-        _I((_u2 (OP )<<30)  |  (_u5(RD)<<25)  |  (_u3(OP2)<<22)  |
-            _u22(IMM)                                            )
-@end example
-
-The name of the macro, @code{_2i}, indicates a two-operand instruction
-comprising an immediate operand.  The instruction format is:
-
-@example
- .------.---------.------.-------------------------------------------.
- |  OP  |   RD    | OP2  |               IMM                         |
- |------+---------+------+-------------------------------------------|
- |2 bits|  5 bits |3 bits|             22 bits                       |
- |31-30 |  29-25  | 22-24|              0-21                         |
- '------'---------'------'-------------------------------------------'
-@end example
-
-@lightning{} provides macros named @code{_sXX(OP)} and @code{_uXX(OP)},
-where XX is a number between 1 and 31, which test@footnote{Only when
-@code{--enable-assertions} is used.} whether @code{OP} can be
-represented as (respectively) a signed or unsigned integer of the
-given size.  What the macro above does, then, is to shift and @sc{or}
-together the different fields, ensuring that each of them fits the field.
-
-Here is another definition, this time for the PowerPC architecture.
-
-@example
-#define _X(OP,RD,RA,RB,XO,RC)
-        _I((_u6 (OP)<<26)  |  (_u5(RD)<<21)  |  (_u5(RA)<<16)  |
-           ( _u5(RB)<<11)  |  (_u10(XO)<<1)  |   _u1(RC)       )
-@end example
-
-Here is the bit layout corresponding to this instruction format:
-
-@example
- .--------.--------.--------.--------.---------------------.-------.
- |    OP  |   RD   |   RA   |   RB   |           X0        |   RC  |
- |--------+--------+--------+--------+-----------------------------|
- | 6 bits | 5 bits | 5 bits | 5 bits |         10 bits     | 1 bit |
- | 31-26  | 25-21  | 16-20  | 11-15  |         1-10        |   0   |
- '--------'---------'-------'--------'-----------------------------'
-@end example
-
-How do these macros actually generate code? The secret lies in the
-@code{_I} macro, which is one of four predefined macros which actually
-store machine language instructions in memory.  They are @code{_B},
-@code{_W}, @code{_I} and @code{_L}, respectively for 8-bit, 16-bit,
-32-bit, and @code{long} (either 32-bit or 64-bit, depending on the
-architecture) values.
-
-Next comes another set of macros (usually the biggest) which represents
-the actual mnemonics---macros such as @code{ADDrrr} and @code{SUBCCrir},
-which were cited earlier in this chapter, belong to this set.  Most of
-the times, all these macros will do is to use the ``instruction format''
-macros, specifying the values of the fields in the different instruction
-formats.  Let's see a few of these definitions, again taken from the
-SPARC assembler:
-
-@example
-#define BAi(DISP)                       _2   (0, 0,  8, 2, DISP)
-#define BA_Ai(DISP)                     _2   (0, 1,  8, 2, DISP)
-
-#define SETHIir(IMM, RD)                _2i  (0, RD, 4, IMM)
-
-#define ADDrrr(RS1, RS2, RD)            _3   (2, RD,  0, RS1, 0, 0, RS2)
-#define ADDrir(RS1, IMM, RD)            _3i  (2, RD,  0, RS1, 1,    IMM)
-#define ADDCCrrr(RS1, RS2, RD)          _3   (2, RD, 16, RS1, 0, 0, RS2)
-#define ADDCCrir(RS1, IMM, RD)          _3i  (2, RD, 16, RS1, 1,    IMM)
-#define ANDrrr(RS1, RS2, RD)            _3   (2, RD,  1, RS1, 0, 0, RS2)
-#define ANDrir(RS1, IMM, RD)            _3i  (2, RD,  1, RS1, 1,    IMM)
-#define ANDCCrrr(RS1, RS2, RD)          _3   (2, RD, 17, RS1, 0, 0, RS2)
-#define ANDCCrir(RS1, IMM, RD)          _3i  (2, RD, 17, RS1, 1,    IMM)
-@end example
-
-A few things have to be noted.  For example:
-@itemize @bullet
-@item
-The SPARC assembly language sometimes uses a comma inside a mnemonic
-(for example, @code{ba,a}).  This symbol is not allowed inside a
-@sc{cpp} macro name, so it is replaced with an underscore; the same
-is done with the dots found in the PowerPC assembly language (for
-example, @code{andi.} is defined as @code{ANDI_rri}).
-
-@item
-It can be useful to group together instructions with the same
-instruction format, as doing this tends to make the source code
-more readable (numbers are put in the same columns).
-
-@item
-Using an editor without automatic wrap at end of line can be useful,
-since run-time assemblers tend to have very long lines.
-@end itemize
-
-A final touch is to define the synthetic instructions, which are
-usually found on RISC machines.  For example, on the SPARC, the
-@code{LD} instruction has two synonyms (@code{LDUW} and @code{LDSW})
-which are defined thus:
-
-@example
-#define LDUWxr(RS1, RS2, RD)            LDxr(RS1, RS2, RD)
-#define LDUWmr(RS1, IMM, RD)            LDmr(RS1, IMM, RD)
-#define LDSWxr(RS1, RS2, RD)            LDxr(RS1, RS2, RD)
-#define LDSWmr(RS1, IMM, RD)            LDmr(RS1, IMM, RD)
-@end example
-
-Other common case are instructions which take advantage of registers
-whose value is hard-wired to zero, and short-cut instructions which
-hard-code some or all of the operands:
-
-@example
-@rem{/* Destination is %g0\, which the processor never overwrites. */}
-#define CMPrr(R1, R2)   SUBCCrrr(R1, R2, 0) @rem{/* subcc %r1\, %r2\, %g0 */}
-
-@rem{/* One of the source registers is hard-coded to be %g0. */}
-#define NEGrr(R,S)      SUBrrr(0, R, S)     @rem{/* sub %g0\, %rR\, %rS */}
-
-@rem{/* All of the operands are hard-coded. */}
-#define RET()           JMPLmr(31,8 ,0)     @rem{/* jmpl [%r31+8]\, %g0  */}
-
-@rem{/* One of the operands acts as both source and destination */}
-#define BSETrr(R,S)     ORrrr(R, S, S)      @rem{/* or %rR\, %rS\, %rS */}
-@end example
-
-Specific to RISC computers, finally, is the instruction to load an
-arbitrarily sized immediate into a register.  This instruction is
-usually implemented as one or two basic instructions:
-
-@enumerate
-@item
-If the number is small enough, an instruction is sufficient
-(@code{LI} or @code{ORI} on the PowerPC, @code{MOV} on the SPARC).
-
-@item
-If the lowest bits are all zeroed, an instruction is sufficient
-(@code{LIS} on the PowerPC, @code{SETHI} on the SPARC).
-
-@item
-Otherwise, the high bits are set first (with @code{LIS} or
-@code{SETHI}), and the result is then @sc{or}ed with the low
-bits
-@end enumerate
-
-Here is the definition of such an instruction for the PowerPC:
-
-@example
-#define MOVEIri(R,I)      (_siP(16,I) ? LIri(R,I) :     \ @rem{/* case 1    */}
-                          (_uiP(16,I) ? ORIrri(R,0,I) : \ @rem{/* case 1    */}
-                          _MOVEIri(R, _HI(I), _LO(I)) ))  @rem{/* case 2/3  */}
-
-#define _MOVEIri(H,L,R)  (LISri(R,H), (L ? ORIrri(R,R,L) : 0))
-@end example
-
-@noindent
-and for the SPARC:
-
-@example
-#define SETir(I,R)      (_siP(13,I) ? MOVir(I,R) : \
-			 _SETir(_HI(I), _LO(I), R))
-
-#define _SETir(H,L,R)   (SETHIir(H,R), (L ? ORrir(R,L,R) : 0))
-@end example
-
-In both cases, @code{_HI} and @code{_LO} are macros for internal use
-that extract different parts of the immediate operand.
-
-You should take a look at the run-time assemblers distributed with
-@lightning{} before trying to craft your own.  In particular, make
-sure you understand the RISC run-time assemblers (the SPARC's is
-the simplest) before trying to decypher the x86 run-time assembler,
-which is significantly more complex.
-
-
-@node Standard macros
-@chapter Creating the platform-independent layer
-
-The platform-independent layer is the one that is ultimately used
-by @lightning{} clients.  Creating this layer is a matter of creating
-a hundred or so macros that comprise part of the interface used by
-the clients, as described in
-@usingref{The instruction set, @lightning{}'s instruction set}.
-
-Fortunately, a number of these definitions are common to the different
-platforms and are defined just once in one of the header files that
-make up @lightning{}, that is, @file{core-common.h}.
-
-Most of the macros are relatively straight-forward to implement (with
-a few caveats for architectures whose assembly language only offers
-two-operand arithmetic instructions).  This section will cover the
-tricky points, before presenting the complete listing of the macros
-that make up the platform-independent interface provided by
-@lightning{}.
-
-@menu
-@standardmacrosmenu{}
-@end menu
-
-@node Forward references
-@section Implementing forward references
-
-Implementation of forward references takes place in:
-
-@itemize @bullet
-@item
-The branch macros
-
-@item
-The @code{jit_patch_at} macros
-@end itemize
-
-Roughly speaking, the branch macros, as seen in @usingref{GNU lightning
-macros, Generating code at run-time}, return a value that later calls
-to @code{jit_patch} or @code{jit_patch_at} use to complete the assembly
-of the forward reference.  This value is usually the contents of the
-program counter after the branch instruction is compiled (which is
-accessible in the @code{_jit.pc} variable).  Let's see an example from
-the x86 back-end:
-
-@example
-#define jit_bmsr_i(label, s1, s2)                            \
-   (TESTLrr((s1), (s2)), JNZm(label,0,0,0), _jit.pc)
-@end example
-
-The @code{bms} (@dfn{branch if mask set}) instruction is assembled as
-the combination of a @code{TEST} instruction (bit-wise @sc{and} between
-the two operands) and a @code{JNZ} instruction (jump if non-zero).  The
-macro then returns the final value of the program counter.
-
-@code{jit_patch_at} is one of the few macros that need to possess a
-knowledge of the machine's instruction formats.  Its purpose is to
-patch a branch instruction (identified by the value returned at the
-moment the branch was compiled) to jump to the current position (that
-is, to the address identified by @code{_jit.pc}).
-
-On the x86, the displacement between the jump and the landing point is
-expressed as a 32-bit signed integer lying in the last four bytes of the
-jump instruction.  The definition of @code{_jit_patch_at} is:
-
-@example
-#define jit_patch(jump_pc, pv)    (*_PSL((jump_pc) - 4) = \
-				   (pv) - (jump_pc))
-@end example
-
-The @code{_PSL} macro is nothing more than a cast to @code{long *},
-and is used here to shorten the definition and avoid cluttering it with
-excessive parentheses.  These type-cast macros are:
-
-@itemize @bullet
-@item
-@code{_PUC(X)} to cast to a @code{unsigned char *}.
-
-@item
-@code{_PUS(X)} to cast to a @code{unsigned short *}.
-
-@item
-@code{_PUI(X)} to cast to a @code{unsigned int *}.
-
-@item
-@code{_PSL(X)} to cast to a @code{long *}.
-
-@item
-@code{_PUL(X)} to cast to a @code{unsigned long *}.
-@end itemize
-
-On other platforms, notably RISC ones, the displacement is embedded into
-the instruction itself.  In this case, @code{jit_patch_at} must first zero
-out the field, and then @sc{or} in the correct displacement.  The SPARC,
-for example, encodes the displacement in the bottom 22 bits; in addition
-the right-most two bits are suppressed, which are always zero because
-instruction have to be word-aligned.
-
-@example
-#define jit_patch_at(delay_pc, pv)   jit_patch_ (((delay_pc) - 1), (pv))
-
-@rem{/* branch instructions return the address of the @emph{delay}
- * instruction---this is just a helper macro that makes the code more
- * readable.
- */}
-#define jit_patch_(jump_pc, pv)   (*jump_pc =		    \
-	 (*jump_pc & ~_MASK(22)) |			    \
-         ((_UL(pv) - _UL(jump_pc)) >> 2) & _MASK(22))
-@end example
-
-This introduces more predefined shortcut macros:
-@itemize @bullet
-@item
-@code{_UC(X)} to cast to a @code{unsigned char}.
-
-@item
-@code{_US(X)} to cast to a @code{unsigned short}.
-
-@item
-@code{_UI(X)} to cast to a @code{unsigned int}.
-
-@item
-@code{_SL(X)} to cast to a @code{long}.
-
-@item
-@code{_UL(X)} to cast to a @code{unsigned long}.
-
-@item
-@code{_MASK(N)} gives a binary number made of N ones.
-@end itemize
-
-Dual to branches and @code{jit_patch_at} are @code{jit_movi_p}
-and @code{jit_patch_movi}, since they can also be used to implement
-forward references.  @code{jit_movi_p} should be carefully implemented
-to use an encoding that is as long as possible, and it should return
-an address which is then passed to @code{jit_patch_movi}.  The
-implementation of @code{jit_patch_movi} is similar to
-@code{jit_patch_at}.
-
-@node Common features
-@section Common features supported by @file{core-common.h}
-
-The @file{core-common.h} file contains hundreds of macro definitions
-which will spare you defining a lot of things in the files the are
-specific to your port.  Here is a list of the features that 
-@file{core-common.h} provides.
-
-@table @b
-@item Support for common synthetic instructions
-These are instructions that can be represented as a simple operation,
-for example a bit-wise @sc{and} or a subtraction.  @file{core-common.h}
-recognizes when the port-specific header file defines these macros and
-avoids compiler warnings about redefined macros, but there should be
-no need to define them.  They are:
-@example
-#define jit_extr_c_ui(d, rs)
-#define jit_extr_s_ui(d, rs)
-#define jit_extr_c_ul(d, rs)
-#define jit_extr_s_ul(d, rs)
-#define jit_extr_i_ul(d, rs)
-#define jit_negr_i(d, rs)
-#define jit_negr_l(d, rs)
-@end example
-
-@item Support for the @sc{abi}
-All of @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not
-mandatory.  If not defined, they will be defined respectively as an
-empty macro, as a synonym for @code{jit_prolog}, and as a synonym for
-@code{jit_calli}.  Whether to define them or not in the port-specific
-header file, it depends on the underlying architecture's @sc{abi}---in
-general, however, you'll need to define at least @code{jit_prolog}.
-
-@item Support for uncommon instructions
-These are instructions that many widespread architectures lack.
-@file{core-common.h} is able to provide default definitions, but they
-are usually inefficient if the hardware provides a way to do these
-operations with a single instruction.  They are extension with sign
-and ``reverse subtraction'' (that is, REG2@math{=}IMM@math{-}REG1):
-@example
-#define jit_extr_c_i(d, rs)
-#define jit_extr_s_i(d, rs)
-#define jit_extr_c_l(d, rs)
-#define jit_extr_s_l(d, rs)
-#define jit_extr_i_l(d, rs)
-#define jit_rsbi_i(d, rs, is)
-#define jit_rsbi_l(d, rs, is)
-#define jit_rsbi_p(d, rs, is)
-@end example
-
-@item Conversion between network and host byte ordering
-These macros are no-ops on big endian systems.  Don't define them on
-such systems; on the other hand, they are mandatory on little endian
-systems.  They are:
-@example
-#define jit_ntoh_ui(d, rs)
-#define jit_ntoh_us(d, rs)
-@end example
-
-@item Support for a ``zero'' register
-Many RISC architectures provide a read-only register whose value is
-hard-coded to be zero; this register is then used implicitly when
-referring to a memory location using a single register.  For example,
-on the SPARC, an operand like @code{[%l6]} is actually assembled as
-@code{[%l6+%g0]}.  If this is the case, you should define
-@code{JIT_RZERO} to be the number of this register; @file{core-common.h}
-will use it to implement all variations of the @code{ld} and @code{st}
-instructions.  For example:
-@example
-#define jit_ldi_c(d, is)         jit_ldxi_c(d, JIT_RZERO, is)
-#define jit_ldr_i(d, rs)         jit_ldxr_c(d, JIT_RZERO, rs)
-@end example
-
-If available, JIT_RZERO is also used to provide more efficient
-definitions of the @code{neg} instruction (see ``Support for common
-synthetic instructions'', above).
-
-@item Synonyms
-@file{core-common.h} provides a lot of trivial definitions which make
-the instruction set as orthogonal as possible.  For example, adding two
-unsigned integers is exactly the same as adding two signed integers
-(assuming a two's complement representation of negative numbers); yet,
-@lightning{} provides both @code{jit_addr_i} and @code{jit_addr_ui}
-macros.  Similarly, pointers and unsigned long integers behave in the
-same way, but @lightning{} has separate instruction for the two data
-types---those that operate on pointers usually include a typecast
-that makes programs clearer.
-
-@item Shortcuts
-These define ``synthetic'' instructions whose definition is not as
-trivial as in the case of synonyms, but is anyway standard.  This
-is the case for bitwise @sc{not} (which is implemented by XORing a
-string of ones), ``reverse subtraction'' between registers (which is
-converted to a normal subtraction with the two source operands
-inverted), and subtraction of an immediate from a register (which is
-converted to an addition).  Unlike @code{neg} and @code{ext} (see
-``Support for common synthetic instructions'', above), which are
-simply non-mandatory, you must not define these functions.
-
-@item Support for @code{long}s
-On most systems, @code{long}s and @code{unsigned long}s are the same
-as, respectively, @code{int}s and @code{unsigned int}s.  In this case,
-@file{core-common.h} defines operations on these types to be synonyms.
-
-@item @code{jit_state}
-Last but not least, @file{core-common.h} defines the @code{jit_state}
-type.  Part of this @code{struct} is machine-dependent and includes
-all kinds of state needed by the back-end; this part is always
-accessible in a re-entrant way as @code{_jitl}.  @code{_jitl} will be
-of type @code{struct jit_local_state}; this struct must be defined
-even if no state is required.
-
-@end table
-
-@node Delay slots
-@section Supporting scheduling of delay slots
-
-Delay slot scheduling is obtained by clients through the
-@code{jit_delay} macro.  However this macro is not to be defined
-in the platform-independent layer, because @lightning{} provides
-a common definition in @file{core-common.h}.
-
-Instead, the platform-independent layer must define another macro,
-called @code{jit_fill_delay_after}, which has to exchange the
-instruction to be scheduled in the delay slot with the branch
-instruction.  The only parameter accepted by the macro is a call
-to a branch macro, which must be expanded @strong{exactly once} by
-@code{jit_fill_delay_after}.  The client must be able to pass the
-return value of @code{jit_fill_delay_after} to @code{jit_patch_at}.
-
-There are two possible approaches that can be used in
-@code{jit_fill_delay_after}.  They are summarized in the following
-pictures:
-
-@itemize @bullet
-@item
-The branch instructions assemble a @sc{nop} instruction which is
-then removed by @code{jit_fill_delay_after}.
-
-@example
-     before                         |   after
-   ---------------------------------+-----------------------------
-     ...                            |
-     <would-be delay instruction>   |    <branch instruction>
-     <branch instruction>           |    <delay instruction>
-     NOP                            |           <--- _jit.pc
-              <--- _jit.pc          |
-@end example
-
-@item
-The branch instruction assembles the branch so that the delay
-slot is annulled, @code{jit_fill_delay_after} toggles the bit:
-
-@example
-     before                         |   after
-   ---------------------------------+-----------------------------
-     ...                            |
-     <would-be delay instruction>   |    <branch instruction>
-     <branch with annulled delay>   |    <delay instruction>
-              <--- _jit.pc          |           <--- _jit.pc
-@end example
-@end itemize
-
-Don't forget that you can take advantage of delay slots in the
-implementation of boolean instructions such as @code{le} or @code{gt}.
-
-@node Immediate values
-@section Supporting arbitrarily sized immediate values
-
-This is a problem that is endemic to RISC machines.  The basic idea
-is to reserve one or two register to represent large immediate values.
-Let's see an example from the SPARC:
-
-@example
-   addi_i R0, V2, 45         |  addi_i R0, V2, 10000
-  ---------------------------+---------------------------
-   add    %l5, 45, %l0       |  set    10000, %l6
-                             |  add    %l5, %l6, %l0
-@end example
-
-In this case, @code{%l6} is reserved to be used for large immediates.
-An elegant solution is to use an internal macro which automatically
-decides which version is to be compiled.
-
-Beware of register conflicts on machines with delay slots.  This is
-the case for the SPARC, where @code{%l7} is used instead for large
-immediates in compare-and-branch instructions.  So the sequence
-
-@example
-   jit_delay(
-      jit_addi_i(JIT_R0, JIT_V2, 10000),
-      jit_blei_i(label, JIT_R1, 20000)
-   );
-@end example
-
-@noindent
-is assembled this way:
-
-@example
-   set 10000, %l6       @rem{! prepare immediate for add}
-   set 20000, %l7       @rem{! prepare immediate for cmp}
-   cmp %l1, %l7
-   ble label
-   add %l5, %l6, %l0    @rem{! delay slot}
-	@end example
-
-Note that using @code{%l6} in the branch instruction would have given
-an incorrect result---@code{R0} would have been filled with the value of
-@code{V2+@i{20000}} rather than @code{V2+@i{10000}}.
-
-@node Implementing the ABI
-@section Implementing the ABI
-
-Implementing the underlying architecture's @sc{abi} is done in the
-macros that handle function prologs and epilogs and argument passing.
-
-Let's look at the prologs and epilogs first.  These are usually pretty
-simple and, what's more important, with constant content---that is,
-they always generate exactly the same instruction sequence.  Here is
-an example:
-
-@example
-          SPARC                        x86
-          save %sp, -96, %sp           push %ebp
-                                       push %ebx
-                                       push %esi
-                                       push %edi
-                                       movl %esp, %ebp
-          ...                          ...
-          ret                          popl %edi
-          restore                      popl %esi
-                                       popl %ebx
-                                       popl %ebp
-                                       ret
-@end example
-
-The registers that are saved (@code{%ebx}, @code{%esi}, @code{%edi}) are
-mapped to the @code{V0} through @code{V2} registers in the @lightning{}
-instruction set.
-
-Argument passing is more tricky.  There are basically three
-cases@footnote{For speed and ease of implementation, @lightning{} does not
-currently support passing some of the parameters on the stack and some
-in registers.}:
-@table @b
-@item Register windows
-Output registers are different from input registers---the prolog takes
-care of moving the caller's output registers to the callee's input
-registers.  This is the case with the SPARC.
-
-@item Passing parameters via registers
-In this case, output registers are the same as input registers.  The
-program must take care of saving input parameters somewhere (on the
-stack, or in non-argument registers).  This is the case with the
-PowerPC.
-
-@item All the parameters are passed on the stack
-This case is by far the simplest and is the most common in CISC
-architectures, like the x86 and Motorola 68000.
-@end table
-
-In all cases, the port-specific header file will define two variable
-for private use---one to be used by the caller during the
-@code{prepare}/@code{pusharg}/@code{finish} sequence, one to be used
-by the callee, specifically in the @code{jit_prolog} and @code{jit_arg}
-macros.
-
-Let's look again, this time with more detail, at each of the cases.
-
-@table @b
-@item Register windows
-@code{jit_finish} is the same as @code{jit_calli}, and is defined
-in @file{core-common.h} (@pxref{Common features, , Common features
-supported by @file{core-common.h}}).
-
-@example
-#define jit_prepare_i(numargs)  (_jitl.pusharg = _Ro(numargs))
-#define jit_pusharg_i(rs)       (--_jitl.pusharg,         \
-                                 MOVrr((rs), _jitl.pusharg))
-@end example
-
-Remember that arguments pushing takes place in reverse order, thus
-giving a pre-decrement (rather than post-increment) in
-@code{jit_pusharg_i}.
-
-Here is what happens on the callee's side:
-
-@example
-#define jit_arg_c()           (_jitl.getarg++)
-#define jit_getarg_c(rd, ofs) jit_extr_c_i  ((rd), (ofs))
-#define jit_prolog(numargs)   (SAVErir(JIT_SP, -96, JIT_SP), \
-                               _jitl.getarg = _Ri(0))
-@end example
-
-The @code{jit_arg} macros return nothing more than a register index,
-which is then used by the @code{jit_getarg} macros.  @code{jit_prolog}
-resets the counter used by @code{jit_arg} to zero; the @code{numargs}
-parameter is not used. It is sufficient for @code{jit_leaf} to be a
-synonym for @code{jit_prolog}.
-
-@item Passing parameter via registers
-The code is almost the same as that for the register windows case, but
-with an additional complexity---@code{jit_arg} will transfer the
-argument from the input register to a non-argument register so that
-function calls will not clobber it.  The prolog and epilog code can then
-become unbearably long, up to 20 instructions on the PPC; a common
-solution in this case is that of @dfn{trampolines}.
-
-The prolog does nothing more than put the function's actual address in a
-caller-preserved register and then call the trampoline:
-@example
-       mflr    r0                 @rem{! grab return address}
-       movei   r10, trampo_2args  @rem{! jump to trampoline}
-       mtlr    r10
-       blrl
-here:  mflr    r31                @rem{! r31 = address of epilog}
-       @rem{...actual code...}
-       mtlr    r31                @rem{! return to the trampoline}
-       blr
-@end example
-
-In this case, @code{jit_prolog} does use its argument containing the
-number of parameters to pick the appropriate trampoline. Here,
-@code{trampo_2args} is the address of a trampoline designed for
-2-argument functions.
-
-The trampoline executes the prolog code, jumps to the contents of
-@code{r10}, and upon return from the subroutine it executes the
-epilog code.
-
-@item All the parameters are passed on the stack
-@code{jit_pusharg} uses a hardware push operation, which is commonly
-available on CISC machines (where this approach is most likely
-followed).  Since the stack has to be cleaned up after the call,
-@code{jit_prepare_i} remembers how many parameters have been put there,
-and @code{jit_finish} adjusts the stack pointer after the call.
-
-@example
-#define jit_prepare_i(numargs) (_jitl.args += (numargs))
-#define jit_pusharg_i(rs)      PUSHLr(rs)
-#define jit_finish(sub)        (jit_calli((sub)),              \
-                               ADDLir(4 * _jitl.args, JIT_SP), \
-                               _jitl.numargs = 0)
-@end example
-
-Note the usage of @code{+=} in @code{jit_prepare_i}.  This is done
-so that one can defer the popping of the arguments that were saved
-on the stack (@dfn{stack pollution}).  To do so, it is sufficient to
-use @code{jit_calli} instead of @code{jit_finish} in all but the
-last call.
-
-On the caller's side, @code{arg} returns an offset relative to the
-frame pointer, and @code{getarg} loads the argument from the stack:
-
-@example
-#define jit_getarg_c(rd, ofs) jit_ldxi_c((rd), _EBP, (ofs));
-#define jit_arg_c()           ((_jitl.frame += sizeof(int) \
-                                            -  sizeof(int))
-@end example
-
-The @code{_jitl.frame} variable is initialized by @code{jit_prolog}
-with the displacement between the value of the frame pointer
-(@code{%ebp}) and the address of the first parameter.
-@end table
-
-These schemes are the most used, so @file{core-common.h} provides a way
-to employ them automatically.  If you do not define the
-@code{jit_getarg_c} macro and its companions, @file{core-common.h} will
-presume that you intend to pass parameters through either the registers
-or the stack.
-
-If you define @code{JIT_AP}, stack-based parameter passing will be
-employed and the @code{jit_getarg} macros will be defined like this:
-
-@example
-#define jit_getarg_c(reg, ofs)  jit_ldxi_c((reg), JIT_AP, (ofs));
-@end example
-
-In other words, the @code{jit_arg} macros (which are still to be defined
-by the platform-specific back-end) shall return an offset into the stack
-frame.  On the other hand, if you don't define @code{JIT_AP},
-register-based parameter passing will be employed and the @code{jit_arg}
-macros shall return a register number; in this case, @code{jit_getarg}
-will be implemented in terms of @code{jit_extr} and @code{jit_movr}
-operations:
-
-@example
-#define jit_getarg_c(reg, ofs)		jit_extr_c_i  ((reg), (ofs))
-#define jit_getarg_i(reg, ofs)		jit_movr_i    ((reg), (ofs))
-@end example
-
-
-@node Macro list
-@section Macros composing the platform-independent layer
-
-@table @b
-@item Register names (all mandatory but the last three)
-@example
-#define JIT_R
-#define JIT_R_NUM
-#define JIT_V
-#define JIT_V_NUM
-#define JIT_FPR
-#define JIT_FPR_NUM
-#define JIT_FP
-#define JIT_SP
-#define JIT_AP
-#define JIT_RZERO
-@end example
-
-@item Helper macros (non-mandatory):
-@example
-#define jit_fill_delay_after(branch)
-@end example
-
-@item Mandatory:
-@example
-#define jit_allocai()
-#define jit_arg_c()
-#define jit_arg_i()
-#define jit_arg_l()
-#define jit_arg_p()
-#define jit_arg_s()
-#define jit_arg_uc()
-#define jit_arg_ui()
-#define jit_arg_ul()
-#define jit_arg_us()
-#define jit_abs_d(rd,rs)
-#define jit_addi_i(d, rs, is)
-#define jit_addr_d(rd,s1,s2)
-#define jit_addr_i(d, s1, s2)
-#define jit_addxi_i(d, rs, is)
-#define jit_addxr_i(d, s1, s2)
-#define jit_andi_i(d, rs, is)
-#define jit_andr_i(d, s1, s2)
-#define jit_beqi_i(label, rs, is)
-#define jit_beqr_d(label, s1, s2)
-#define jit_beqr_i(label, s1, s2)
-#define jit_bgei_i(label, rs, is)
-#define jit_bgei_ui(label, rs, is)
-#define jit_bger_d(label, s1, s2)
-#define jit_bger_i(label, s1, s2)
-#define jit_bger_ui(label, s1, s2)
-#define jit_bgti_i(label, rs, is)
-#define jit_bgti_ui(label, rs, is)
-#define jit_bgtr_d(label, s1, s2)
-#define jit_bgtr_i(label, s1, s2)
-#define jit_bgtr_ui(label, s1, s2)
-#define jit_blei_i(label, rs, is)
-#define jit_blei_ui(label, rs, is)
-#define jit_bler_d(label, s1, s2)
-#define jit_bler_i(label, s1, s2)
-#define jit_bler_ui(label, s1, s2)
-#define jit_bltgtr_d(label, s1, s2)
-#define jit_blti_i(label, rs, is)
-#define jit_blti_ui(label, rs, is)
-#define jit_bltr_d(label, s1, s2)
-#define jit_bltr_i(label, s1, s2)
-#define jit_bltr_ui(label, s1, s2)
-#define jit_bmci_i(label, rs, is)
-#define jit_bmcr_i(label, s1, s2)
-#define jit_bmsi_i(label, rs, is)
-#define jit_bmsr_i(label, s1, s2)
-#define jit_bnei_i(label, rs, is)
-#define jit_bner_d(label, s1, s2)
-#define jit_bner_i(label, s1, s2)
-#define jit_boaddi_i(label, rs, is)
-#define jit_boaddi_ui(label, rs, is)
-#define jit_boaddr_i(label, s1, s2)
-#define jit_boaddr_ui(label, s1, s2)
-#define jit_bordr_d(label, s1, s2)
-#define jit_bosubi_i(label, rs, is)
-#define jit_bosubi_ui(label, rs, is)
-#define jit_bosubr_i(label, s1, s2)
-#define jit_bosubr_ui(label, s1, s2)
-#define jit_buneqr_d(label, s1, s2)
-#define jit_bunger_d(label, s1, s2)
-#define jit_bungtr_d(label, s1, s2)
-#define jit_bunler_d(label, s1, s2)
-#define jit_bunltr_d(label, s1, s2)
-#define jit_bunordr_d(label, s1, s2)
-#define jit_calli(label)
-#define jit_callr(label)
-#define jit_ceilr_d_i(rd, rs)
-#define jit_divi_i(d, rs, is)
-#define jit_divi_ui(d, rs, is)
-#define jit_divr_d(rd,s1,s2)
-#define jit_divr_i(d, s1, s2)
-#define jit_divr_ui(d, s1, s2)
-#define jit_eqi_i(d, rs, is)
-#define jit_eqr_d(d, s1, s2)
-#define jit_eqr_i(d, s1, s2)
-#define jit_extr_i_d(rd, rs)
-#define jit_floorr_d_i(rd, rs)
-#define jit_gei_i(d, rs, is)
-#define jit_gei_ui(d, s1, s2)
-#define jit_ger_d(d, s1, s2)
-#define jit_ger_i(d, s1, s2)
-#define jit_ger_ui(d, s1, s2)
-#define jit_gti_i(d, rs, is)
-#define jit_gti_ui(d, s1, s2)
-#define jit_gtr_d(d, s1, s2)
-#define jit_gtr_i(d, s1, s2)
-#define jit_gtr_ui(d, s1, s2)
-#define jit_hmuli_i(d, rs, is)
-#define jit_hmuli_ui(d, rs, is)
-#define jit_hmulr_i(d, s1, s2)
-#define jit_hmulr_ui(d, s1, s2)
-#define jit_jmpi(label)
-#define jit_jmpr(reg)
-#define jit_ldxi_f(rd, rs, is)
-#define jit_ldxr_f(rd, s1, s2)
-#define jit_ldxi_c(d, rs, is)
-#define jit_ldxi_d(rd, rs, is)
-#define jit_ldxi_i(d, rs, is)
-#define jit_ldxi_s(d, rs, is)
-#define jit_ldxi_uc(d, rs, is)
-#define jit_ldxi_us(d, rs, is)
-#define jit_ldxr_c(d, s1, s2)
-#define jit_ldxr_d(rd, s1, s2)
-#define jit_ldxr_i(d, s1, s2)
-#define jit_ldxr_s(d, s1, s2)
-#define jit_ldxr_uc(d, s1, s2)
-#define jit_ldxr_us(d, s1, s2)
-#define jit_lei_i(d, rs, is)
-#define jit_lei_ui(d, s1, s2)
-#define jit_ler_d(d, s1, s2)
-#define jit_ler_i(d, s1, s2)
-#define jit_ler_ui(d, s1, s2)
-#define jit_lshi_i(d, rs, is)
-#define jit_lshr_i(d, r1, r2)
-#define jit_ltgtr_d(d, s1, s2)
-#define jit_lti_i(d, rs, is)
-#define jit_lti_ui(d, s1, s2)
-#define jit_ltr_d(d, s1, s2)
-#define jit_ltr_i(d, s1, s2)
-#define jit_ltr_ui(d, s1, s2)
-#define jit_modi_i(d, rs, is)
-#define jit_modi_ui(d, rs, is)
-#define jit_modr_i(d, s1, s2)
-#define jit_modr_ui(d, s1, s2)
-#define jit_movi_d(rd,immd)
-#define jit_movi_f(rd,immf)
-#define jit_movi_i(d, is)
-#define jit_movi_p(d, is)
-#define jit_movr_d(rd,rs)
-#define jit_movr_i(d, rs)
-#define jit_muli_i(d, rs, is)
-#define jit_muli_ui(d, rs, is)
-#define jit_mulr_d(rd,s1,s2)
-#define jit_mulr_i(d, s1, s2)
-#define jit_mulr_ui(d, s1, s2)
-#define jit_negr_d(rd,rs)
-#define jit_nei_i(d, rs, is)
-#define jit_ner_d(d, s1, s2)
-#define jit_ner_i(d, s1, s2)
-#define jit_nop()
-#define jit_ordr_d(d, s1, s2)
-#define jit_ori_i(d, rs, is)
-#define jit_orr_i(d, s1, s2)
-#define jit_patch_at(jump_pc, value)
-#define jit_patch_movi(jump_pc, value)
-#define jit_prepare_d(numargs)
-#define jit_prepare_f(numargs)
-#define jit_prepare_i(numargs)
-#define jit_pusharg_i(rs)
-#define jit_ret()
-#define jit_retval_i(rd)
-#define jit_roundr_d_i(rd, rs)
-#define jit_rshi_i(d, rs, is)
-#define jit_rshi_ui(d, rs, is)
-#define jit_rshr_i(d, r1, r2)
-#define jit_rshr_ui(d, r1, r2)
-#define jit_sqrt_d(rd,rs)
-#define jit_stxi_c(id, rd, rs)
-#define jit_stxi_d(id, rd, rs)
-#define jit_stxi_f(id, rd, rs)
-#define jit_stxi_i(id, rd, rs)
-#define jit_stxi_s(id, rd, rs)
-#define jit_stxr_c(d1, d2, rs)
-#define jit_stxr_d(d1, d2, rs)
-#define jit_stxr_f(d1, d2, rs)
-#define jit_stxr_i(d1, d2, rs)
-#define jit_stxr_s(d1, d2, rs)
-#define jit_subr_d(rd,s1,s2)
-#define jit_subr_i(d, s1, s2)
-#define jit_subxi_i(d, rs, is)
-#define jit_subxr_i(d, s1, s2)
-#define jit_truncr_d_i(rd, rs)
-#define jit_uneqr_d(d, s1, s2)
-#define jit_unger_d(d, s1, s2)
-#define jit_ungtr_d(d, s1, s2)
-#define jit_unler_d(d, s1, s2)
-#define jit_unltr_d(d, s1, s2)
-#define jit_unordr_d(d, s1, s2)
-#define jit_xori_i(d, rs, is)
-#define jit_xorr_i(d, s1, s2)
-@end example
-
-@item Non mandatory---there should be no need to define them:
-@example
-#define jit_extr_c_ui(d, rs)
-#define jit_extr_s_ui(d, rs)
-#define jit_extr_c_ul(d, rs)
-#define jit_extr_s_ul(d, rs)
-#define jit_extr_i_ul(d, rs)
-#define jit_negr_i(d, rs)
-#define jit_negr_l(d, rs)
-@end example
-
-@item Non mandatory---whether to define them depends on the @sc{abi}:
-@example
-#define jit_prolog(n)
-#define jit_finish(sub)
-#define jit_finishr(reg)
-#define jit_leaf(n)
-#define jit_getarg_c(reg, ofs)
-#define jit_getarg_i(reg, ofs)
-#define jit_getarg_l(reg, ofs)
-#define jit_getarg_p(reg, ofs)
-#define jit_getarg_s(reg, ofs)
-#define jit_getarg_uc(reg, ofs)
-#define jit_getarg_ui(reg, ofs)
-#define jit_getarg_ul(reg, ofs)
-#define jit_getarg_us(reg, ofs)
-#define jit_getarg_f(reg, ofs)
-#define jit_getarg_d(reg, ofs)
-@end example
-
-@item Non mandatory---define them if instructions that do this exist:
-@example
-#define jit_extr_c_i(d, rs)
-#define jit_extr_s_i(d, rs)
-#define jit_extr_c_l(d, rs)
-#define jit_extr_s_l(d, rs)
-#define jit_extr_i_l(d, rs)
-#define jit_rsbi_i(d, rs, is)
-#define jit_rsbi_l(d, rs, is)
-@end example
-
-@item Non mandatory if condition code are always set by add/sub, needed on other systems:
-@example
-#define jit_addci_i(d, rs, is)
-#define jit_addci_l(d, rs, is)
-#define jit_subci_i(d, rs, is)
-#define jit_subci_l(d, rs, is)
-@end example
-
-@item Mandatory on little endian systems---don't define them on other systems:
-@example
-#define jit_ntoh_ui(d, rs)
-#define jit_ntoh_us(d, rs)
-@end example
-
-@item Mandatory if JIT_RZERO not defined---don't define them if it is defined:
-@example
-#define jit_ldi_c(d, is)
-#define jit_ldi_i(d, is)
-#define jit_ldi_s(d, is)
-#define jit_ldr_c(d, rs)
-#define jit_ldr_i(d, rs)
-#define jit_ldr_s(d, rs)
-#define jit_ldi_uc(d, is)
-#define jit_ldi_ui(d, is)
-#define jit_ldi_us(d, is)
-#define jit_ldr_uc(d, rs)
-#define jit_ldr_ui(d, rs)
-#define jit_ldr_us(d, rs)
-#define jit_sti_c(id, rs)
-#define jit_sti_i(id, rs)
-#define jit_sti_s(id, rs)
-#define jit_str_c(rd, rs)
-#define jit_str_i(rd, rs)
-#define jit_str_s(rd, rs)
-#define jit_ldi_f(rd, is)
-#define jit_sti_f(id, rs)
-#define jit_ldi_d(rd, is)
-#define jit_sti_d(id, rs)
-#define jit_ldr_f(rd, rs)
-#define jit_str_f(rd, rs)
-#define jit_ldr_d(rd, rs)
-#define jit_str_d(rd, rs)
-@end example
-
-@item Synonyms---don't define them:
-@example
-#define jit_addi_p(d, rs, is)
-#define jit_addi_ui(d, rs, is)
-#define jit_addi_ul(d, rs, is)
-#define jit_addr_p(d, s1, s2)
-#define jit_addr_ui(d, s1, s2)
-#define jit_addr_ul(d, s1, s2)
-#define jit_andi_ui(d, rs, is)
-#define jit_andi_ul(d, rs, is)
-#define jit_andr_ui(d, s1, s2)
-#define jit_andr_ul(d, s1, s2)
-#define jit_beqi_p(label, rs, is)
-#define jit_beqi_ui(label, rs, is)
-#define jit_beqi_ul(label, rs, is)
-#define jit_beqr_p(label, s1, s2)
-#define jit_beqr_ui(label, s1, s2)
-#define jit_beqr_ul(label, s1, s2)
-#define jit_bmci_ui(label, rs, is)
-#define jit_bmci_ul(label, rs, is)
-#define jit_bmcr_ui(label, s1, s2)
-#define jit_bmcr_ul(label, s1, s2)
-#define jit_bmsi_ui(label, rs, is)
-#define jit_bmsi_ul(label, rs, is)
-#define jit_bmsr_ui(label, s1, s2)
-#define jit_bmsr_ul(label, s1, s2)
-#define jit_bgei_p(label, rs, is)
-#define jit_bger_p(label, s1, s2)
-#define jit_bgti_p(label, rs, is)
-#define jit_bgtr_p(label, s1, s2)
-#define jit_blei_p(label, rs, is)
-#define jit_bler_p(label, s1, s2)
-#define jit_blti_p(label, rs, is)
-#define jit_bltr_p(label, s1, s2)
-#define jit_bnei_p(label, rs, is)
-#define jit_bnei_ui(label, rs, is)
-#define jit_bnei_ul(label, rs, is)
-#define jit_bner_p(label, s1, s2)
-#define jit_bner_ui(label, s1, s2)
-#define jit_bner_ul(label, s1, s2)
-#define jit_eqi_p(d, rs, is)
-#define jit_eqi_ui(d, rs, is)
-#define jit_eqi_ul(d, rs, is)
-#define jit_eqr_p(d, s1, s2)
-#define jit_eqr_ui(d, s1, s2)
-#define jit_eqr_ul(d, s1, s2)
-#define jit_extr_c_s(d, rs)
-#define jit_extr_c_us(d, rs)
-#define jit_extr_uc_s(d, rs)
-#define jit_extr_uc_us(d, rs)
-#define jit_extr_uc_i(d, rs)
-#define jit_extr_uc_ui(d, rs)
-#define jit_extr_us_i(d, rs)
-#define jit_extr_us_ui(d, rs)
-#define jit_extr_uc_l(d, rs)
-#define jit_extr_uc_ul(d, rs)
-#define jit_extr_us_l(d, rs)
-#define jit_extr_us_ul(d, rs)
-#define jit_extr_ui_l(d, rs)
-#define jit_extr_ui_ul(d, rs)
-#define jit_gei_p(d, rs, is)
-#define jit_ger_p(d, s1, s2)
-#define jit_gti_p(d, rs, is)
-#define jit_gtr_p(d, s1, s2)
-#define jit_ldr_p(d, rs)
-#define jit_ldr_ul(d, rs)
-#define jit_ldi_p(d, is)
-#define jit_ldi_ul(d, is)
-#define jit_ldxi_p(d, rs, is)
-#define jit_ldxi_ul(d, rs, is)
-#define jit_ldxr_p(d, s1, s2)
-#define jit_ldxr_ul(d, s1, s2)
-#define jit_lei_p(d, rs, is)
-#define jit_ler_p(d, s1, s2)
-#define jit_lshi_ui(d, rs, is)
-#define jit_lshi_ul(d, rs, is)
-#define jit_lshr_ui(d, s1, s2)
-#define jit_lshr_ul(d, s1, s2)
-#define jit_lti_p(d, rs, is)
-#define jit_ltr_p(d, s1, s2)
-#define jit_movi_p(d, is)
-#define jit_movi_ui(d, rs)
-#define jit_movi_ul(d, rs)
-#define jit_movr_p(d, rs)
-#define jit_movr_ui(d, rs)
-#define jit_movr_ul(d, rs)
-#define jit_nei_p(d, rs, is)
-#define jit_nei_ui(d, rs, is)
-#define jit_nei_ul(d, rs, is)
-#define jit_ner_p(d, s1, s2)
-#define jit_ner_ui(d, s1, s2)
-#define jit_ner_ul(d, s1, s2)
-#define jit_hton_ui(d, rs)
-#define jit_hton_us(d, rs)
-#define jit_ori_ui(d, rs, is)
-#define jit_ori_ul(d, rs, is)
-#define jit_orr_ui(d, s1, s2)
-#define jit_orr_ul(d, s1, s2)
-#define jit_pusharg_c(rs)
-#define jit_pusharg_p(rs)
-#define jit_pusharg_s(rs)
-#define jit_pusharg_uc(rs)
-#define jit_pusharg_ui(rs)
-#define jit_pusharg_ul(rs)
-#define jit_pusharg_us(rs)
-#define jit_retval_c(rd)
-#define jit_retval_p(rd)
-#define jit_retval_s(rd)
-#define jit_retval_uc(rd)
-#define jit_retval_ui(rd)
-#define jit_retval_ul(rd)
-#define jit_retval_us(rd)
-#define jit_rsbi_p(d, rs, is)
-#define jit_rsbi_ui(d, rs, is)
-#define jit_rsbi_ul(d, rs, is)
-#define jit_rsbr_p(d, rs, is)
-#define jit_rsbr_ui(d, s1, s2)
-#define jit_rsbr_ul(d, s1, s2)
-#define jit_sti_p(d, is)
-#define jit_sti_uc(d, is)
-#define jit_sti_ui(d, is)
-#define jit_sti_ul(d, is)
-#define jit_sti_us(d, is)
-#define jit_str_p(d, rs)
-#define jit_str_uc(d, rs)
-#define jit_str_ui(d, rs)
-#define jit_str_ul(d, rs)
-#define jit_str_us(d, rs)
-#define jit_stxi_p(d, rs, is)
-#define jit_stxi_uc(d, rs, is)
-#define jit_stxi_ui(d, rs, is)
-#define jit_stxi_ul(d, rs, is)
-#define jit_stxi_us(d, rs, is)
-#define jit_stxr_p(d, s1, s2)
-#define jit_stxr_uc(d, s1, s2)
-#define jit_stxr_ui(d, s1, s2)
-#define jit_stxr_ul(d, s1, s2)
-#define jit_stxr_us(d, s1, s2)
-#define jit_subi_p(d, rs, is)
-#define jit_subi_ui(d, rs, is)
-#define jit_subi_ul(d, rs, is)
-#define jit_subr_p(d, s1, s2)
-#define jit_subr_ui(d, s1, s2)
-#define jit_subr_ul(d, s1, s2)
-#define jit_subxi_p(d, rs, is)
-#define jit_subxi_ui(d, rs, is)
-#define jit_subxi_ul(d, rs, is)
-#define jit_subxr_p(d, s1, s2)
-#define jit_subxr_ui(d, s1, s2)
-#define jit_subxr_ul(d, s1, s2)
-#define jit_xori_ui(d, rs, is)
-#define jit_xori_ul(d, rs, is)
-#define jit_xorr_ui(d, s1, s2)
-#define jit_xorr_ul(d, s1, s2)
-@end example
-
-@item Shortcuts---don't define them:
-@example
-#define JIT_R0
-#define JIT_R1
-#define JIT_R2
-#define JIT_V0
-#define JIT_V1
-#define JIT_V2
-#define JIT_FPR0
-#define JIT_FPR1
-#define JIT_FPR2
-#define JIT_FPR3
-#define JIT_FPR4
-#define JIT_FPR5
-#define jit_patch(jump_pc)
-#define jit_notr_c(d, rs)
-#define jit_notr_i(d, rs)
-#define jit_notr_l(d, rs)
-#define jit_notr_s(d, rs)
-#define jit_notr_uc(d, rs)
-#define jit_notr_ui(d, rs)
-#define jit_notr_ul(d, rs)
-#define jit_notr_us(d, rs)
-#define jit_rsbr_d(d, s1, s2)
-#define jit_rsbr_i(d, s1, s2)
-#define jit_rsbr_l(d, s1, s2)
-#define jit_subi_i(d, rs, is)
-#define jit_subi_l(d, rs, is)
-@end example
-
-@item Mandatory unless target arithmetic is always done in the same precision:
-@example
-#define jit_abs_f(rd,rs)
-#define jit_addr_f(rd,s1,s2)
-#define jit_beqr_f(label, s1, s2)
-#define jit_bger_f(label, s1, s2)
-#define jit_bgtr_f(label, s1, s2)
-#define jit_bler_f(label, s1, s2)
-#define jit_bltgtr_f(label, s1, s2)
-#define jit_bltr_f(label, s1, s2)
-#define jit_bner_f(label, s1, s2)
-#define jit_bordr_f(label, s1, s2)
-#define jit_buneqr_f(label, s1, s2)
-#define jit_bunger_f(label, s1, s2)
-#define jit_bungtr_f(label, s1, s2)
-#define jit_bunler_f(label, s1, s2)
-#define jit_bunltr_f(label, s1, s2)
-#define jit_bunordr_f(label, s1, s2)
-#define jit_ceilr_f_i(rd, rs)
-#define jit_divr_f(rd,s1,s2)
-#define jit_eqr_f(d, s1, s2)
-#define jit_extr_d_f(rs, rd)
-#define jit_extr_f_d(rs, rd)
-#define jit_extr_i_f(rd, rs)
-#define jit_floorr_f_i(rd, rs)
-#define jit_ger_f(d, s1, s2)
-#define jit_gtr_f(d, s1, s2)
-#define jit_ler_f(d, s1, s2)
-#define jit_ltgtr_f(d, s1, s2)
-#define jit_ltr_f(d, s1, s2)
-#define jit_movr_f(rd,rs)
-#define jit_mulr_f(rd,s1,s2)
-#define jit_negr_f(rd,rs)
-#define jit_ner_f(d, s1, s2)
-#define jit_ordr_f(d, s1, s2)
-#define jit_roundr_f_i(rd, rs)
-#define jit_rsbr_f(d, s1, s2)
-#define jit_sqrt_f(rd,rs)
-#define jit_subr_f(rd,s1,s2)
-#define jit_truncr_f_i(rd, rs)
-#define jit_uneqr_f(d, s1, s2)
-#define jit_unger_f(d, s1, s2)
-#define jit_ungtr_f(d, s1, s2)
-#define jit_unler_f(d, s1, s2)
-#define jit_unltr_f(d, s1, s2)
-#define jit_unordr_f(d, s1, s2)
-@end example
-
-@item Mandatory if sizeof(long) != sizeof(int)---don't define them on other systems:
-@example
-#define jit_addi_l(d, rs, is)
-#define jit_addr_l(d, s1, s2)
-#define jit_andi_l(d, rs, is)
-#define jit_andr_l(d, s1, s2)
-#define jit_beqi_l(label, rs, is)
-#define jit_beqr_l(label, s1, s2)
-#define jit_bgei_l(label, rs, is)
-#define jit_bgei_ul(label, rs, is)
-#define jit_bger_l(label, s1, s2)
-#define jit_bger_ul(label, s1, s2)
-#define jit_bgti_l(label, rs, is)
-#define jit_bgti_ul(label, rs, is)
-#define jit_bgtr_l(label, s1, s2)
-#define jit_bgtr_ul(label, s1, s2)
-#define jit_blei_l(label, rs, is)
-#define jit_blei_ul(label, rs, is)
-#define jit_bler_l(label, s1, s2)
-#define jit_bler_ul(label, s1, s2)
-#define jit_blti_l(label, rs, is)
-#define jit_blti_ul(label, rs, is)
-#define jit_bltr_l(label, s1, s2)
-#define jit_bltr_ul(label, s1, s2)
-#define jit_bosubi_l(label, rs, is)
-#define jit_bosubi_ul(label, rs, is)
-#define jit_bosubr_l(label, s1, s2)
-#define jit_bosubr_ul(label, s1, s2)
-#define jit_boaddi_l(label, rs, is)
-#define jit_boaddi_ul(label, rs, is)
-#define jit_boaddr_l(label, s1, s2)
-#define jit_boaddr_ul(label, s1, s2)
-#define jit_bmci_l(label, rs, is)
-#define jit_bmcr_l(label, s1, s2)
-#define jit_bmsi_l(label, rs, is)
-#define jit_bmsr_l(label, s1, s2)
-#define jit_bnei_l(label, rs, is)
-#define jit_bner_l(label, s1, s2)
-#define jit_divi_l(d, rs, is)
-#define jit_divi_ul(d, rs, is)
-#define jit_divr_l(d, s1, s2)
-#define jit_divr_ul(d, s1, s2)
-#define jit_eqi_l(d, rs, is)
-#define jit_eqr_l(d, s1, s2)
-#define jit_extr_c_l(d, rs)
-#define jit_extr_c_ul(d, rs)
-#define jit_extr_s_l(d, rs)
-#define jit_extr_s_ul(d, rs)
-#define jit_extr_i_l(d, rs)
-#define jit_extr_i_ul(d, rs)
-#define jit_gei_l(d, rs, is)
-#define jit_gei_ul(d, rs, is)
-#define jit_ger_l(d, s1, s2)
-#define jit_ger_ul(d, s1, s2)
-#define jit_gti_l(d, rs, is)
-#define jit_gti_ul(d, rs, is)
-#define jit_gtr_l(d, s1, s2)
-#define jit_gtr_ul(d, s1, s2)
-#define jit_hmuli_l(d, rs, is)
-#define jit_hmuli_ul(d, rs, is)
-#define jit_hmulr_l(d, s1, s2)
-#define jit_hmulr_ul(d, s1, s2)
-#define jit_ldi_l(d, is)
-#define jit_ldi_ui(d, is)
-#define jit_ldr_l(d, rs)
-#define jit_ldr_ui(d, rs)
-#define jit_ldxi_l(d, rs, is)
-#define jit_ldxi_ui(d, rs, is)
-#define jit_ldxr_l(d, s1, s2)
-#define jit_ldxr_ui(d, s1, s2)
-#define jit_lei_l(d, rs, is)
-#define jit_lei_ul(d, rs, is)
-#define jit_ler_l(d, s1, s2)
-#define jit_ler_ul(d, s1, s2)
-#define jit_lshi_l(d, rs, is)
-#define jit_lshr_l(d, s1, s2)
-#define jit_lti_l(d, rs, is)
-#define jit_lti_ul(d, rs, is)
-#define jit_ltr_l(d, s1, s2)
-#define jit_ltr_ul(d, s1, s2)
-#define jit_modi_l(d, rs, is)
-#define jit_modi_ul(d, rs, is)
-#define jit_modr_l(d, s1, s2)
-#define jit_modr_ul(d, s1, s2)
-#define jit_movi_l(d, rs)
-#define jit_movr_l(d, rs)
-#define jit_muli_l(d, rs, is)
-#define jit_muli_ul(d, rs, is)
-#define jit_mulr_l(d, s1, s2)
-#define jit_mulr_ul(d, s1, s2)
-#define jit_nei_l(d, rs, is)
-#define jit_ner_l(d, s1, s2)
-#define jit_ori_l(d, rs, is)
-#define jit_orr_l(d, s1, s2)
-#define jit_pusharg_l(rs)
-#define jit_retval_l(rd)
-#define jit_rshi_l(d, rs, is)
-#define jit_rshi_ul(d, rs, is)
-#define jit_rshr_l(d, s1, s2)
-#define jit_rshr_ul(d, s1, s2)
-#define jit_sti_l(d, is)
-#define jit_str_l(d, rs)
-#define jit_stxi_l(d, rs, is)
-#define jit_stxr_l(d, s1, s2)
-#define jit_subr_l(d, s1, s2)
-#define jit_xori_l(d, rs, is)
-#define jit_xorr_l(d, s1, s2)
-@end example
-@end table
-
-@node Standard functions
-@chapter More complex tasks in the platform-independent layer
-
-There is actually a single function that you @strong{must} define
-in the @file{funcs-@var{suffix}.h} file, that is, @code{jit_flush_code}.
-
-As explained in @usingref{GNU lightning macros, Generating code at
-run-time}, its purpose is to flush part of the processor's
-instruction cache (usually the part of memory that contains the
-generated code), avoiding the processor executing bogus data
-that it happens to find in the cache.  The @code{jit_flush_code}
-function takes the first and the last address to flush.
-
-On many processors (for example, the x86 and the all the processors
-in the 68k family up to the 68030), it is not even necessary to flush
-the cache.  In this case, the contents of the file will simply be
-
-@example
-#ifndef __lightning_funcs_h
-#define __lightning_funcs_h
-
-#define jit_flush_code(dest, end)
-
-#endif @rem{/* __lightning_core_h */}
-@end example
-
-On other processors, flushing the cache is necessary for
-proper behavior of the program; in this case, the file will contain
-a proper definition of the function.  However, we must make yet
-another distinction.
-
-On some processors, flushing the cache is obtained through a call
-to the operating system or to the C run-time library.  In this case,
-the definition of @code{jit_flush_code} will be very simple: two
-examples are the Alpha and the 68040. For the Alpha the code will
-be:
-@example
-#define jit_flush_code(dest, end) \
-        __asm__ __volatile__("call_pal 0x86");
-@end example
-
-@noindent
-and, for the Motorola
-@example
-#define jit_flush_code(start, end) \
-        __clear_cache((start), (end))
-@end example
-
-As you can see, the Alpha does not even need to pass the start and
-end address to the function.  It is good practice to protect usage of
-the @acronym{GNU CC}-specific @code{__asm__} directive by relying
-on the preprocessor.  For example:
-
-@example
-#if !defined(__GNUC__) && !defined(__GNUG__)
-#error Go get GNU C, I do not know how to flush the cache
-#error with this compiler.
-#else
-#define jit_flush_code(dest, end) \
-        __asm__ __volatile__("call_pal 0x86");
-#endif
-@end example
-
-@lightning{}'s configuration process tries to compile a dummy file that
-includes @code{lightning.h}, and gives a warning if there are problem
-with the compiler that is installed on the system.
-
-In more complex cases, you'll need to write a full-fledged function.
-Don't forget to make it @code{static}, otherwise you'll have problems
-linking programs that include @code{lightning.h} multiple times. An
-example, taken from the @file{funcs-ppc.h} file, is:
-
-@example
-#ifndef __lightning_funcs_h
-#define __lightning_funcs_h
-
-#if !defined(__GNUC__) && !defined(__GNUG__)
-#error Go get GNU C, I do not know how to flush the cache
-#error with this compiler.
-#else
-static void
-jit_flush_code(start, end)
-     void       *start;
-     void       *end;
-@{
-  register char *dest = start;
-
-  for (; dest <= end; dest += SIZEOF_CHAR_P)
-    __asm__ __volatile__ 
-      ("dcbst 0,%0; sync; icbi 0,%0; isync"::"r"(dest));
-@}
-#endif
-
-#endif /* __lightning_funcs_h */
-@end example
-
-The @file{funcs-@var{suffix}.h} file is also the right place to put
-helper functions that do complex tasks for the
-@file{core-@var{suffix}.h} file.  For example, the PowerPC assembler
-defines @code{jit_prolog} as a function and puts it in that file (for more
-information, @pxref{Implementing the ABI}).  Take special care when
-defining such a function, as explained in @usingref{Reentrancy,
-Reentrant usage of @lightning{}}.
-
-
-@node Floating-point macros
-@chapter Implementing macros for floating point
-
diff --git a/doc/printf.c b/doc/printf.c
new file mode 100644
index 000000000..52bd2aa1f
--- /dev/null
+++ b/doc/printf.c
@@ -0,0 +1,38 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef void (*pvfi)(int);    /* Pointer to Void Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pvfi          myFunction;             /* ptr to generated code */
+  jit_node_t    *start, *end;           /* a couple of labels */
+  jit_node_t    *in;                    /* to get the argument */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  start = jit_note(__FILE__, __LINE__);
+  jit_prolog();
+  in = jit_arg();
+  jit_getarg(JIT_R1, in);
+  jit_pushargi((jit_word_t)"generated %d bytes\n");
+  jit_ellipsis();
+  jit_pushargr(JIT_R1);
+  jit_finishi(printf);
+  jit_ret();
+  jit_epilog();
+  end = jit_note(__FILE__, __LINE__);
+
+  myFunction = jit_emit();
+
+  /* call the generated code, passing its size as argument */
+  myFunction((char*)jit_address(end) - (char*)jit_address(start));
+
+  jit_disassemble();
+
+  finish_jit();
+  return 0;
+}
diff --git a/doc/rfib.c b/doc/rfib.c
new file mode 100644
index 000000000..1ce02d5a7
--- /dev/null
+++ b/doc/rfib.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <lightning.h>
+
+static jit_state_t *_jit;
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+int main(int argc, char *argv[])
+{
+  pifi       fib;
+  jit_node_t *label;
+  jit_node_t *call;
+  jit_node_t *in;                 /* offset of the argument */
+  jit_node_t *ref;                /* to patch the forward reference */
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  label = jit_label();
+        jit_prolog   ();
+  in =  jit_arg      ();
+        jit_getarg   (JIT_V0, in);              /* V0 = n */
+  ref = jit_blti     (JIT_V0, 2);
+        jit_subi     (JIT_V1, JIT_V0, 1);       /* V1 = n-1 */
+        jit_subi     (JIT_V2, JIT_V0, 2);       /* V2 = n-2 */
+        jit_prepare();
+          jit_pushargr(JIT_V1);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V1);                     /* V1 = fib(n-1) */
+        jit_prepare();
+          jit_pushargr(JIT_V2);
+        call = jit_finishi(NULL);
+        jit_patch_at(call, label);
+        jit_retval(JIT_V2);                     /* V2 = fib(n-2) */
+        jit_addi(JIT_V1,  JIT_V1,  1);
+        jit_addr(JIT_R0, JIT_V1, JIT_V2);       /* R0 = V1 + V2 + 1 */
+        jit_retr(JIT_R0);
+
+  jit_patch(ref);                               /* patch jump */
+        jit_movi(JIT_R0, 1);                    /* R0 = 1 */
+        jit_retr(JIT_R0);
+
+  /* call the generated code, passing 32 as an argument */
+  fib = jit_emit();
+  printf("fib(%d) = %d\n", 32, fib(32));
+  finish_jit();
+  return 0;
+}
diff --git a/doc/rpn.c b/doc/rpn.c
new file mode 100644
index 000000000..f02cef35f
--- /dev/null
+++ b/doc/rpn.c
@@ -0,0 +1,94 @@
+#include <stdio.h>
+#include <lightning.h>
+
+typedef int (*pifi)(int);       /* Pointer to Int Function of Int */
+
+static jit_state_t *_jit;
+
+void stack_push(int reg, int *sp)
+{
+  jit_stxi_i (*sp, JIT_FP, reg);
+  *sp += sizeof (int);
+}
+
+void stack_pop(int reg, int *sp)
+{
+  *sp -= sizeof (int);
+  jit_ldxi_i (reg, JIT_FP, *sp);
+}
+
+jit_node_t *compile_rpn(char *expr)
+{
+  jit_node_t *in, *fn;
+  int stack_base, stack_ptr;
+
+  fn = jit_note(NULL, 0);
+  jit_prolog();
+  in = jit_arg();
+  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
+
+  jit_getarg_i(JIT_R2, in);
+
+  while (*expr) {
+    char buf[32];
+    int n;
+    if (sscanf(expr, "%[0-9]%n", buf, &n)) {
+      expr += n - 1;
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movi(JIT_R0, atoi(buf));
+    } else if (*expr == 'x') {
+      stack_push(JIT_R0, &stack_ptr);
+      jit_movr(JIT_R0, JIT_R2);
+    } else if (*expr == '+') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_addr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '-') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_subr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '*') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_mulr(JIT_R0, JIT_R1, JIT_R0);
+    } else if (*expr == '/') {
+      stack_pop(JIT_R1, &stack_ptr);
+      jit_divr(JIT_R0, JIT_R1, JIT_R0);
+    } else {
+      fprintf(stderr, "cannot compile: %s\n", expr);
+      abort();
+    }
+    ++expr;
+  }
+  jit_retr(JIT_R0);
+  jit_epilog();
+  return fn;
+}
+
+int main(int argc, char *argv[])
+{
+  jit_node_t *nc, *nf;
+  pifi c2f, f2c;
+  int i;
+
+  init_jit(argv[0]);
+  _jit = jit_new_state();
+
+  nc = compile_rpn("32x9*5/+");
+  nf = compile_rpn("x32-5*9/");
+  (void)jit_emit();
+  c2f = (pifi)jit_address(nc);
+  f2c = (pifi)jit_address(nf);
+
+  printf("\nC:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
+  printf("\nF:");
+  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
+  printf("\n");
+
+  printf("\nF:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", i);
+  printf("\nC:");
+  for (i = 32; i <= 212; i += 18) printf("%3d ", f2c(i));
+  printf("\n");
+
+  finish_jit();
+  return 0;
+}
diff --git a/doc/toc.texi b/doc/toc.texi
deleted file mode 100644
index 193d4f26f..000000000
--- a/doc/toc.texi
+++ /dev/null
@@ -1,76 +0,0 @@
-@c These macros are used because these items could go both in the
-@c short listing (for partial books) and in the detailed listing
-@c (for full books - i.e. using & porting)
-
-@macro usingmenu{}
-@ifset USING
-* Installation::            Configuring and installing GNU lightning
-* The instruction set::     The RISC instruction set used i GNU lightning
-* GNU lightning macros::    GNU lightning's macros
-* Reentrancy::              Re-entrant usage of GNU lightning
-* Bundling GNU lightning::  Using GNU lightning in your programs
-@end ifset
-@end macro
-
-@macro portingmenu{}
-@ifset PORTING
-* Structure of a port::   An overview of the porting process
-* Adjusting configure::   Automatically recognizing the new platform
-* Run-time assemblers::   An internal layer to simplify porting
-* Standard macros::       The platform-independent layer used by clients.
-* Standard functions::    Doing more complex tasks.
-* Floating-point macros:: Implementing macros for floating point.
-@end ifset
-@end macro
-
-@macro standardmacrosmenu{}
-@c This comment is needed because of makeinfo's vagaries...
-* Forward references::    Implementing forward references
-* Common features::       Common features supported by @file{core-common.h}
-* Delay slots::           Supporting scheduling of delay slots
-* Immediate values::      Supporting arbitrarily sized immediate values
-* Implementing the ABI::  Function prologs and epilogs, and argument passing
-* Macro list::            Macros composing the platform-independent layer
-@end macro
-
-@menu
-@ifclear BOTH
-* Overview::              What GNU lightning is
-@usingmenu{}
-@portingmenu{}
-* Future::                Tasks for GNU lightning's subsequent releases
-* Acknowledgements::      Acknowledgements for GNU lightning
-
-@ifset PORTING
-@detailmenu
---- The detailed node listing ---
-
-Standard macros:
-@standardmacrosmenu{}
-@end detailmenu
-@end ifset
-@end ifclear
-
-@ifset BOTH
-* Overview::              What GNU lightning is.
-* Using GNU lightning::   Using GNU lightning in your programs
-* Porting GNU lightning:: Retargeting GNU lightning to a new system
-* Future::                Tasks for GNU lightning's subsequent releases
-* Acknowledgements::      Acknowledgements for GNU lightning
-
-@detailmenu
---- The detailed node listing ---
-
-Using @lightning{}:
-@usingmenu{}
-
-Porting @lightning{}:
-@portingmenu{}
-
-Standard macros:
-@standardmacrosmenu{}
-@end detailmenu
-
-@end ifset
-
-@end menu
diff --git a/doc/u-lightning.texi b/doc/u-lightning.texi
deleted file mode 100644
index 0c2481b3b..000000000
--- a/doc/u-lightning.texi
+++ /dev/null
@@ -1,100 +0,0 @@
-\input texinfo.tex  @c -*- texinfo -*-
-@c %**start of header (This is for running Texinfo on a region.)
-
-@setfilename lightning.info
-
-@set TITLE       Porting @sc{gnu} @i{lightning}
-@set TOPIC       Porting
-@clear BOTH
-@set USING
-@clear PORTING
-
-@settitle @value{TITLE}
-
-@c ---------------------------------------------------------------------
-@c Common macros
-@c ---------------------------------------------------------------------
-
-@macro bulletize{a}
-@item
-\a\
-@end macro
-
-@macro rem{a}
-@r{@i{\a\}}
-@end macro
-
-@macro gnu{}
-@sc{gnu}
-@end macro
-
-@macro lightning{}
-@gnu{} @i{lightning}
-@end macro
-
-@c ---------------------------------------------------------------------
-@c Macros for Texinfo 3.1/4.0 compatibility
-@c ---------------------------------------------------------------------
-
-@c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
-@c compatibility
-@macro hlink{url, link}
-\link\ (\url\)
-@end macro
-
-@c ifhtml can only be true in Texinfo 4.0, which has uref
-@ifhtml
-@unmacro hlink
-
-@macro hlink{url, link}
-@uref{\url\, \link\}
-@end macro
-
-@macro email{mail}
-@uref{mailto:\mail\, , \mail\}
-@end macro
-
-@macro url{url}
-@uref{\url\}
-@end macro
-@end ifhtml
-
-@c ---------------------------------------------------------------------
-@c References to the other half of the manual
-@c ---------------------------------------------------------------------
-
-@ifset USING
-@macro usingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear USING
-@macro usingref{node, name}
-@ref{\node\, , \name\, u-lightning, Using @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@ifset PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\}
-@end macro
-@end ifset
-
-@ifclear PORTING
-@macro portingref{node, name}
-@ref{\node\, , \name\, p-lightning, Porting @sc{gnu} @i{lightning}}
-@end macro
-@end ifclear
-
-@c ---------------------------------------------------------------------
-@c End of macro section
-@c ---------------------------------------------------------------------
-
-@include version.texi
-@include body.texi
-
-@c %**end of header (This is for running Texinfo on a region.)
-
-@c ***********************************************************************
-
diff --git a/doc/using.texi b/doc/using.texi
deleted file mode 100644
index 332383eea..000000000
--- a/doc/using.texi
+++ /dev/null
@@ -1,1273 +0,0 @@
-@node Installation
-@chapter Configuring and installing @lightning{}
-
-The first thing to do to use @lightning{} is to configure the
-program, picking the set of macros to be used on the host
-architecture; this configuration is automatically performed by
-the @file{configure} shell script; to run it, merely type:
-@example
-     ./configure
-@end example
-
-@lightning{} supports cross-compiling in that you can choose a
-different set of macros from the one needed on the computer that
-you are compiling @lightning{} on.  For example,
-@example
-     ./configure --host=sparc-sun-linux
-@end example
-
-@noindent will select the SPARC set of runtime assemblers.  You can use
-configure's ability to make reasonable assumptions about the vendor
-and operating system and simply type
-@example
-     ./configure --host=i386
-     ./configure --host=ppc
-     ./configure --host=sparc
-@end example
-
-Another option that @file{configure} accepts is
-@code{--enable-assertions}, which enables several consistency checks in
-the run-time assemblers.  These are not usually needed, so you can
-decide to simply forget about it; also remember that these consistency
-checks tend to slow down your code generator.
-
-After you've configured @lightning{}, you don't have to compile it
-because it is nothing more than a set of include files.  If you want to
-compile the examples, run @file{make} as usual.  The next important
-step is:
-@example
-    make install
-@end example
-
-This ends the process of installing @lightning{}.
-
-@node The instruction set
-@chapter @lightning{}'s instruction set
-
-@lightning{}'s instruction set was designed by deriving instructions
-that closely match those of most existing RISC architectures, or
-that can be easily syntesized if absent.  Each instruction is composed
-of:
-@itemize @bullet
-@item
-an operation, like @code{sub} or @code{mul}
-
-@item
-sometimes, an register/immediate flag (@code{r} or @code{i})
-
-@item
-a type identifier or, occasionally, two
-@end itemize
-
-The second and third field are separated by an underscore; thus,
-examples of legal mnemonics are @code{addr_i} (integer add, with three
-register operands) and @code{muli_l} (long integer multiply, with two
-register operands and an immediate operand).  Each instruction takes
-two or three operands; in most cases, one of them can be an immediate
-value instead of a register.
-
-@lightning{} supports a full range of integer types: operands can be 1,
-2 or 4 bytes long (64-bit architectures might support 8 bytes long
-operands), either signed or unsigned.  The types are listed in the
-following table together with the C types they represent:
-
-@example
-     c          @r{signed char}
-     uc         @r{unsigned char}
-     s          @r{short}
-     us         @r{unsigned short}
-     i          @r{int}
-     ui         @r{unsigned int}
-     l          @r{long}
-     ul         @r{unsigned long}
-     f          @r{float}
-     d          @r{double}
-     p          @r{void *}
-@end example
-
-Some of these types may not be distinct: for example, (e.g., @code{l}
-is equivalent to @code{i} on 32-bit machines, and @code{p} is
-substantially equivalent to @code{ul}).
-
-There are at least seven integer registers, of which six are
-general-purpose, while the last is used to contain the frame pointer
-(@code{FP}).  The frame pointer can be used to allocate and access local
-variables on the stack, using the @code{allocai} instruction.
-
-Of the general-purpose registers, at least three are guaranteed to be
-preserved across function calls (@code{V0}, @code{V1} and
-@code{V2}) and at least three are not (@code{R0}, @code{R1} and
-@code{R2}).  Six registers are not very much, but this
-restriction was forced by the need to target CISC architectures
-which, like the x86, are poor of registers; anyway, backends can
-specify the actual number of available registers with the macros
-@code{JIT_R_NUM} (for caller-save registers) and @code{JIT_V_NUM}
-(for callee-save registers).
-
-In addition, there is a special @code{RET} register which contains
-the return value of the current function (@emph{not} the return value
-of callees---use the @code{retval} instruction for this).  You should
-always remember, however, that writing this register could overwrite
-either a general-purpose register or an incoming parameter, depending
-on the architecture.
-
-There are at least six floating-point registers, named @code{FPR0} to
-@code{FPR5}.  These are caller-save and are separate from the integer
-registers on all the supported architectures; on Intel architectures,
-the register stack is mapped to a flat register file.  As for the
-integer registers, the macro @code{JIT_FPR_NUM} yields the number of
-floating-point registers, and the special @code{FPRET} register contains
-the return value of the current function.
-
-The complete instruction set follows; as you can see, most non-memory
-operations only take integers, long integers (either signed or
-unsigned) and pointers as operands; this was done in order to reduce
-the instruction set, and because most architectures only provide word
-and long word operations on registers.  There are instructions that
-allow operands to be extended to fit a larger data type, both in a
-signed and in an unsigned way.
-
-@table @b
-@item Binary ALU operations
-These accept three operands; the last one can be an immediate
-value for integer operands, or a register for all operand types.
-@code{addx} operations must directly follow @code{addc}, and
-@code{subx} must follow @code{subc}; otherwise, results are undefined.
-@example
-addr     i  ui  l  ul  p  f  d  O1 = O2 + O3
-addi     i  ui  l  ul  p        O1 = O2 + O3
-addxr    i  ui  l  ul           O1 = O2 + (O3 + carry)
-addxi    i  ui  l  ul           O1 = O2 + (O3 + carry)
-addcr    i  ui  l  ul           O1 = O2 + O3, set carry
-addci    i  ui  l  ul           O1 = O2 + O3, set carry
-subr     i  ui  l  ul  p  f  d  O1 = O2 - O3
-subi     i  ui  l  ul  p        O1 = O2 - O3
-subxr    i  ui  l  ul           O1 = O2 - (O3 + carry)
-subxi    i  ui  l  ul           O1 = O2 - (O3 + carry)
-subcr    i  ui  l  ul           O1 = O2 - O3, set carry
-subci    i  ui  l  ul           O1 = O2 - O3, set carry
-rsbr     i  ui  l  ul  p  f  d  O1 = O3 - O2
-rsbi     i  ui  l  ul  p        O1 = O3 - O2
-mulr     i  ui  l  ul     f  d  O1 = O2 * O3
-muli     i  ui  l  ul           O1 = O2 * O3
-hmulr    i  ui  l  ul           O1 = @r{high bits of} O2 * O3
-hmuli    i  ui  l  ul           O1 = @r{high bits of} O2 * O3
-divr     i  ui  l  ul     f  d  O1 = O2 / O3
-divi     i  ui  l  ul           O1 = O2 / O3
-modr     i  ui  l  ul           O1 = O2 % O3
-modi     i  ui  l  ul           O1 = O2 % O3
-andr     i  ui  l  ul           O1 = O2 & O3
-andi     i  ui  l  ul           O1 = O2 & O3
-orr      i  ui  l  ul           O1 = O2 | O3
-ori      i  ui  l  ul           O1 = O2 | O3
-xorr     i  ui  l  ul           O1 = O2 ^ O3
-xori     i  ui  l  ul           O1 = O2 ^ O3
-lshr     i  ui  l  ul           O1 = O2 << O3
-lshi     i  ui  l  ul           O1 = O2 << O3
-rshr     i  ui  l  ul           O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.}
-rshi     i  ui  l  ul           O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.}
-@end example
-
-@item Unary ALU operations
-These accept two operands, both of which must be registers.
-@example
-negr     i     l         f  d  O1 = -O2
-notr     i  ui l  ul           O1 = ~O2
-@end example
-
-@item Compare instructions
-These accept three operands; again, the last can be an immediate
-value for integer data types.  The last two operands are compared,
-and the first operand is set to either 0 or 1, according to
-whether the given condition was met or not.
-
-The conditions given below are for the standard behavior of C,
-where the ``unordered'' comparison result is mapped to false.
-
-@example
-ltr      i  ui  l  ul  p  f  d  O1 = (O2 <  O3)
-lti      i  ui  l  ul  p        O1 = (O2 <  O3)
-ler      i  ui  l  ul  p  f  d  O1 = (O2 <= O3)
-lei      i  ui  l  ul  p        O1 = (O2 <= O3)
-gtr      i  ui  l  ul  p  f  d  O1 = (O2 >  O3)
-gti      i  ui  l  ul  p        O1 = (O2 >  O3)
-ger      i  ui  l  ul  p  f  d  O1 = (O2 >= O3)
-gei      i  ui  l  ul  p        O1 = (O2 >= O3)
-eqr      i  ui  l  ul  p  f  d  O1 = (O2 == O3)
-eqi      i  ui  l  ul  p        O1 = (O2 == O3)
-ner      i  ui  l  ul  p  f  d  O1 = (O2 != O3)
-nei      i  ui  l  ul  p        O1 = (O2 != O3)
-unltr                     f  d  O1 = !(O2 >= O3)
-unler                     f  d  O1 = !(O2 >  O3)
-ungtr                     f  d  O1 = !(O2 <= O3)
-unger                     f  d  O1 = !(O2 <  O3)
-uneqr                     f  d  O1 = !(O2 <  O3) && !(O2 >  O3)
-ltgtr                     f  d  O1 = !(O2 >= O3) || !(O2 <= O3)
-ordr                      f  d  O1 =  (O2 == O2) &&  (O3 == O3)
-unordr                    f  d  O1 =  (O2 != O2) ||  (O3 != O3)
-@end example
-
-@item Transfer operations
-These accept two operands; for @code{ext} both of them must be
-registers, while @code{mov} accepts an immediate value as the second
-operand.
-
-Unlike @code{movr} and @code{movi}, the other instructions are applied
-between operands of different data types, and they need @strong{two}
-data type specifications.  You can use @code{extr} to convert between
-integer data types, in which case the first must be smaller in size
-than the second; for example @code{extr_c_ui} is correct while
-@code{extr_ul_us} is not.  You can also use @code{extr} to convert
-an integer to a floating point value: the only available possibilities
-are @code{extr_i_f} and @code{extr_i_d}.  The other instructions
-convert a floating point value to an integer, so the possible
-suffixes are @code{_f_i} and @code{_d_i}.
-
-@example
-movr                      i  ui  l  ul  p  f  d  O1 = O2
-movi                      i  ui  l  ul  p  f  d  O1 = O2
-extr        c  uc  s  us  i  ui  l  ul     f  d  O1 = O2
-roundr                    i                f  d  O1 = round(O2)
-truncr                    i                f  d  O1 = trunc(O2)
-floorr                    i                f  d  O1 = floor(O2)
-ceilr                     i                f  d  O1 = ceil(O2)
-@end example
-
-Note that the order of the arguments is @emph{destination first,
-source second} as for all other @lightning{} instructions, but
-the order of the types is always reversed with respect to that
-of the arguments: @emph{shorter}---source---@emph{first,
-longer}---destination---@emph{second}.  This happens for historical
-reasons.
-
-@item Network extensions
-These accept two operands, both of which must be registers; these
-two instructions actually perform the same task, yet they are
-assigned to two mnemonics for the sake of convenience and
-completeness.  As usual, the first operand is the destination and
-the second is the source.
-@example
-hton       us ui          @r{Host-to-network (big endian) order}
-ntoh       us ui          @r{Network-to-host order }
-@end example
-
-@item Load operations
-@code{ld} accepts two operands while @code{ldx} accepts three;
-in both cases, the last can be either a register or an immediate
-value. Values are extended (with or without sign, according to
-the data type specification) to fit a whole register.
-@example
-ldr     c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *O2
-ldi     c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *O2
-ldxr    c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *(O2+O3)
-ldxi    c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *(O2+O3)
-@end example
-
-@item Store operations
-@code{st} accepts two operands while @code{stx} accepts three; in
-both cases, the first can be either a register or an immediate
-value. Values are sign-extended to fit a whole register.
-@example
-str     c  uc  s  us  i  ui  l  ul  p  f  d  *O1 = O2
-sti     c  uc  s  us  i  ui  l  ul  p  f  d  *O1 = O2
-stxr    c  uc  s  us  i  ui  l  ul  p  f  d  *(O1+O2) = O3
-stxi    c  uc  s  us  i  ui  l  ul  p  f  d  *(O1+O2) = O3
-@end example
-
-@item Argument management
-These are:
-@example
-prepare                   i                f  d
-pusharg     c  uc  s  us  i  ui  l  ul  p  f  d
-getarg      c  uc  s  us  i  ui  l  ul  p  f  d
-arg         c  uc  s  us  i  ui  l  ul  p  f  d
-retval      c  uc  s  us  i  ui  l  ul  p
-@end example
-
-Of these, the first two are used by the caller, while the last two
-are used by the callee.  A code snippet that wants to call another
-procedure and has to pass registers must, in order: use the
-@code{prepare} instruction, giving the number of arguments to
-be passed to the procedure (once for each data type); use
-@code{pusharg} to push the arguments @strong{in reverse order};
-and use @code{calli} or @code{finish} (explained below) to
-perform the actual call.
-
-@code{arg} and @code{getarg} are used by the callee.
-@code{arg} is different from other instruction in that it does not
-actually generate any code: instead, it is a function which returns
-a value to be passed to @code{getarg}.@footnote{``Return a
-value'' means that @lightning{} macros that compile these
-instructions return a value when expanded.} You should call
-@code{arg} as soon as possible, before any function call or, more
-easily, right after the @code{prolog} or @code{leaf} instructions
-(which are treated later).
-
-@code{getarg} accepts a register argument and a value returned by
-@code{arg}, and will move that argument to the register, extending
-it (with or without sign, according to the data type specification)
-to fit a whole register.  These instructions are more intimately
-related to the usage of the @lightning{} instruction set in code
-that generates other code, so they will be treated more
-specifically in @ref{GNU lightning macros, , Generating code at
-run-time}.
-
-Finally, the @code{retval} instruction fetches the return value of a
-called function in a register.  The @code{retval} instruction takes a
-register argument and copies the return value of the previously called
-function in that register.  A function should put its own return value
-in the @code{RET} register before returning.  @xref{Fibonacci, the
-Fibonacci numbers}, for an example.
-
-You should observe a few rules when using these macros.  First of
-all, it is not allowed to call functions with more than six arguments;
-this was done to simplify and speed up the implementation on
-architectures that use registers for parameter passing.
-
-You should not nest calls to @code{prepare}, nor call zero-argument
-functions (which do not need a call to @code{prepare}) inside a
-@code{prepare/calli} or @code{prepare/finish} block.  Doing this
-might corrupt already pushed arguments.
-
-You @strong{cannot} pass parameters between subroutines using
-the six general-purpose registers.  This might work only when
-targeting particular architectures.
-
-On the other hand, it is possible to assume that callee-saved registers
-(@code{R0} through @code{R2}) are not clobbered by another dynamically
-generated function which does not use them as operands in its code and
-which does not return a value.
-
-@item Branch instructions
-Like @code{arg}, these also return a value which, in this case,
-is to be used to compile forward branches as explained in
-@ref{Fibonacci, , Fibonacci numbers}.  They accept a pointer to the
-destination of the branch and two operands to be compared; of these,
-the last can be either a register or an immediate.  They are:
-@example
-bltr      i  ui  l  ul  p  f  d  @r{if }(O2 <  O3)@r{ goto }O1
-blti      i  ui  l  ul  p        @r{if }(O2 <  O3)@r{ goto }O1
-bler      i  ui  l  ul  p  f  d  @r{if }(O2 <= O3)@r{ goto }O1
-blei      i  ui  l  ul  p        @r{if }(O2 <= O3)@r{ goto }O1
-bgtr      i  ui  l  ul  p  f  d  @r{if }(O2 >  O3)@r{ goto }O1
-bgti      i  ui  l  ul  p        @r{if }(O2 >  O3)@r{ goto }O1
-bger      i  ui  l  ul  p  f  d  @r{if }(O2 >= O3)@r{ goto }O1
-bgei      i  ui  l  ul  p        @r{if }(O2 >= O3)@r{ goto }O1
-beqr      i  ui  l  ul  p  f  d  @r{if }(O2 == O3)@r{ goto }O1
-beqi      i  ui  l  ul  p        @r{if }(O2 == O3)@r{ goto }O1
-bner      i  ui  l  ul  p  f  d  @r{if }(O2 != O3)@r{ goto }O1
-bnei      i  ui  l  ul  p        @r{if }(O2 != O3)@r{ goto }O1
-
-bunltr                     f  d  @r{if }!(O2 >= O3)@r{ goto }O1
-bunler                     f  d  @r{if }!(O2 >  O3)@r{ goto }O1
-bungtr                     f  d  @r{if }!(O2 <= O3)@r{ goto }O1
-bunger                     f  d  @r{if }!(O2 <  O3)@r{ goto }O1
-buneqr                     f  d  @r{if }!(O2 <  O3) && !(O2 >  O3)@r{ goto }O1
-bltgtr                     f  d  @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
-bordr                      f  d  @r{if } (O2 == O2) &&  (O3 == O3)@r{ goto }O1
-bunordr                    f  d  @r{if }!(O2 != O2) ||  (O3 != O3)@r{ goto }O1
-
-bmsr      i ui l  ul             @r{if }O2 &  O3@r{ goto }O1
-bmsi      i ui l  ul             @r{if }O2 &  O3@r{ goto }O1
-bmcr      i ui l  ul             @r{if }!(O2 & O3)@r{ goto }O1
-bmci      i ui l  ul             @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.}
-boaddr    i ui l  ul             O2 += O3@r{, goto }O1@r{ on overflow}
-boaddi    i ui l  ul             O2 += O3@r{, goto }O1@r{ on overflow}
-bosubr    i ui l  ul             O2 -= O3@r{, goto }O1@r{ on overflow}
-bosubi    i ui l  ul             O2 -= O3@r{, goto }O1@r{ on overflow}
-@end example
-
-@item Jump and return operations
-These accept one argument except @code{ret} which has none; the
-difference between @code{finish} and @code{calli} is that the
-latter does not clean the stack from pushed parameters (if any)
-and the former must @strong{always} follow a @code{prepare}
-instruction.
-@example
-calli     (not specified)                  @r{function call to O1}
-callr     (not specified)                  @r{function call to a register}
-finish    (not specified)                  @r{function call to O1}
-finishr   (not specified)                  @r{function call to a register}
-jmpi/jmpr (not specified)                  @r{unconditional jump to O1}
-ret       (not specified)                  @r{return from subroutine}
-retval    c  uc s  us i  ui l  ul p  f  d  @r{move return value}
-                                           @r{to register}
-@end example
-
-Like branch instruction, @code{jmpi} also returns a value which is to
-be used to compile forward branches. @xref{Fibonacci, , Fibonacci
-numbers}.
-
-@item Function prolog
-
-These macros are used to set up the function prolog, in particular to
-declare the number of arguments accepted by a function, and to reserve
-space on the stack to be used for variables.  They accept a single
-numeric argument.
-
-@example
-prolog    (not specified)                  @r{function prolog for O1 args}
-leaf      (not specified)                  @r{the same for leaf functions}
-allocai   (not specified)                  @r{reserve space on the stack}
-@end example
-
-Results are undefined when using function calls in a leaf function.
-
-@code{allocai} receives the number of bytes to allocate and returns
-the offset from the frame pointer register @code{FP} to the base of
-the area.  The area is aligned to an @code{int}; future versions of
-@lightning{} may provide more fine-grained control on the alignment of
-stack-allocated variables.
-@end table
-
-As a small appetizer, here is a small function that adds 1 to the input
-parameter (an @code{int}).  I'm using an assembly-like syntax here which
-is a bit different from the one used when writing real subroutines with
-@lightning{}; the real syntax will be introduced in @xref{GNU lightning
-macros, , Generating code at run-time}.
-
-@example
-incr:
-     leaf      1
-in = arg_i                   @rem{! We have an integer argument}
-     getarg_i  R0, in        @rem{! Move it to R0}
-     addi_i    RET, R0, 1    @rem{! Add 1\, put result in return value}
-     ret                     @rem{! And return the result}
-@end example
-
-And here is another function which uses the @code{printf} function from
-the standard C library to write a number in hexadecimal notation:
-
-@example
-printhex:
-     prolog    1
-in = arg_i                    @rem{! Same as above}
-     getarg_i  R0, in
-     prepare   2              @rem{! Begin call sequence for printf}
-     pusharg_i R0             @rem{! Push second argument}
-     pusharg_p "%x"           @rem{! Push format string}
-     finish    printf         @rem{! Call printf}
-     ret                      @rem{! Return to caller}
-@end example
-
-@node GNU lightning macros
-@chapter Generating code at run-time
-
-To use @lightning{}, you should include the @file{lightning.h} file that
-is put in your include directory by the @samp{make install} command.
-That include files defines about four hundred public macros (plus
-others that are private to @lightning{}), one for each opcode listed
-above.
-
-Each of the instructions above translates to a macro.  All you have to
-do is prepend @code{jit_} (lowercase) to opcode names and @code{JIT_}
-(uppercase) to register names.  Of course, parameters are to be put
-between parentheses, just like with every other @sc{cpp} macro.
-
-This small tutorial presents three examples:
-
-@iftex
-@itemize @bullet
-@item
-The @code{incr} function found in @ref{The instruction set, ,
-@lightning{}'s instruction set}:
-
-@item
-A simple function call to @code{printf}
-
-@item
-An RPN calculator.
-
-@item
-Fibonacci numbers
-@end itemize
-@end iftex
-@ifnottex
-@menu
-* incr::             A function which increments a number by one
-* printf::           A simple function call to printf
-* RPN calculator::   A more complex example, an RPN calculator
-* Fibonacci::        Calculating Fibonacci numbers
-@end menu
-@end ifnottex
-
-@node incr
-@section A function which increments a number by one
-
-Let's see how to create and use the sample @code{incr} function created
-in @ref{The instruction set, , @lightning{}'s instruction set}:
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef int (*pifi)(int);    @rem{/* Pointer to Int Function of Int */}
-
-int main()
-@{
-  pifi  incr = (pifi) (jit_set_ip(codeBuffer).iptr);
-  int   in;
-
-  jit_leaf(1);                     @rem{/* @t{     leaf  1            } */}
-  in = jit_arg_i();                @rem{/* @t{in = arg_i              } */}
-  jit_getarg_i(JIT_R0, in);        @rem{/* @t{     getarg_i R0        } */}
-  jit_addi_i(JIT_RET, JIT_R0, 1);  @rem{/* @t{     addi_i   RET\, R0\, 1} */}
-  jit_ret();                       @rem{/* @t{     ret                } */}
-
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-
-  @rem{/* call the generated code\, passing 5 as an argument */}
-  printf("%d + 1 = %d\n", 5, incr(5));
-  return 0;
-@}
-@end example
-
-Let's examine the code line by line (well, almost@dots{}):
-
-@table @t
-@item #include "lightning.h"
-You already know about this.  It defines all of @lightning{}'s macros.
-
-@item static jit_insn codeBuffer[1024];
-You might wonder about what is @code{jit_insn}.  It is just a type that
-is defined by @lightning{}.  Its exact definition depends on the
-architecture; in general, defining an array of 1024 @code{jit_insn}s
-allows one to write 100 to 400 @lightning{} instructions (depending on
-the architecture and exact instructions).
-
-@item typedef int (*pifi)(int);
-Just a handy typedef for a pointer to a function that takes an
-@code{int} and returns another.
-
-@item pifi incr = (pifi) (jit_set_ip(codeBuffer).iptr);
-This is the first @lightning{} macro we encounter that does not map to
-an instruction.  It is @code{jit_set_ip}, which takes a pointer to an
-area of memory where compiled code will be put and returns the same
-value, cast to a @code{union} type whose members are pointers to
-functions returning different C types.  This union is called
-@code{jit_code} and is defined as follows:
-
-@example
-    typedef union jit_code @{
-      char               *ptr;
-      void               (*vptr)();
-      char               (*cptr)();
-      unsigned char      (*ucptr)();
-      short              (*sptr)();
-      unsigned short     (*usptr)();
-      int                (*iptr)();
-      unsigned int       (*uiptr)();
-      long               (*lptr)();
-      unsigned long      (*ulptr)();
-      void *             (*pptr)();
-      float              (*fptr)();
-      double             (*dptr)();
-    @} jit_code;
-@end example
-
-Any of the members could have been used, since the result is soon casted
-to type @code{pifi} but, for the sake of clarity, the program uses
-@code{iptr}, a pointer to a function with no prototype and returning an
-@code{int}.
-
-Analogous to @code{jit_set_ip} is @code{jit_get_ip}, which does not
-modify the instruction pointer---it is nothing more than a cast of the
-current @sc{ip} to @code{jit_code}.
-
-@item int       in;
-A footnote in @ref{The instruction set, , @lightning{}'s instruction
-set}, under the description of @code{arg}, says that macros implementing
-@code{arg} return a value---we'll be using this variable to store the
-result of @code{arg}.
-
-@item jit_leaf(1);
-Ok, so we start generating code for our beloved function@dots{} it will
-accept one argument and won't call any other function.
-
-@item in = jit_arg_i();
-@itemx jit_getarg_i(JIT_R0, in);
-We retrieve the first (and only) argument, an integer, and store it
-into the general-purpose register @code{R0}.
-
-@item jit_addi_i(JIT_RET, JIT_R0, 1);
-We add one to the content of the register and store the result in the
-return value.
-
-@item jit_ret();
-This instruction generates a standard function epilog that returns
-the contents of the @code{RET} register.
-
-@item jit_flush_code(codeBuffer, jit_get_ip().ptr);
-This instruction is very important.  It flushes the generated code
-area out of the processor's instruction cache, avoiding the processor
-executes bogus data that it happens to find there.  The
-@code{jit_flush_code} function accepts the first and the last address
-to flush; we use @code{jit_get_ip} to find out the latter.
-
-@item printf("%d + 1 = %d", 5, incr(5));
-Calling our function is this simple---it is not distinguishable from
-a normal C function call, the only difference being that @code{incr}
-is a variable.
-@end table
-
-@lightning{} abstracts two phases of dynamic code generation: selecting
-instructions that map the standard representation, and emitting binary
-code for these instructions.  The client program has the responsibility
-of describing the code to be generated using the standard @lightning{}
-instruction set.
-
-Let's examine the code generated for @code{incr} on the SPARC and x86
-architectures (on the right is the code that an assembly-language
-programmer would write):
-
-@table @b
-@item SPARC
-@example
-    save %sp, -96, %sp
-    mov  %i0, %l0                   retl
-    add  %l0, 1,  %i0               add %o0, 1, %o0
-    ret
-    restore
-@end example
-In this case, @lightning{} introduces overhead to create a register
-window (not knowing that the procedure is a leaf procedure) and to
-move the argument to the general purpose register @code{R0} (which
-maps to @code{%l0} on the SPARC).  The former overhead could be
-avoided by teaching @lightning{} about leaf procedures (@pxref{Future});
-the latter could instead be avoided by rewriting the getarg instruction
-as @code{jit_getarg_i(JIT_RET, in)}, which was not done in this
-example.
-
-@item x86
-@example
-    pushl %ebp
-    movl  %esp, %ebp
-    pushl %ebx
-    pushl %esi
-    pushl %edi
-    movl  8(%ebp), %eax        movl 4(%esp), %eax
-    addl  $1, %eax             incl %eax
-    popl  %edi
-    popl  %esi
-    popl  %ebx
-    popl  %ebp
-    ret                        ret
-@end example
-In this case, the main overhead is due to the function's prolog and
-epilog, which is nine instructions long on the x86; a hand-written
-routine would not save unused callee-preserved registers on the stack.
-It is to be said, however, that this is not a problem in more
-complicated uses, because more complex procedure would probably use
-the @code{V0} through @code{V2} registers (@code{%ebx}, @code{%esi},
-@code{%edi}); in this case, a hand-written routine would have included
-the prolog too.  Also, a ten byte prolog would probably be a small
-overhead in a more complex function.
-@end table
-
-In such a simple case, the macros that make up the back-end compile
-reasonably efficient code, with the notable exception of prolog/epilog
-code.
-
-@node printf
-@section A simple function call to @code{printf}
-
-Again, here is the code for the example:
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef void (*pvfi)(int);      @rem{/* Pointer to Void Function of Int */}
-
-int main()
-@{
-  pvfi          myFunction;             @rem{/* ptr to generated code */}
-  char          *start, *end;           @rem{/* a couple of labels */}
-  int           in;                     @rem{/* to get the argument */}
-
-  myFunction = (pvfi) (jit_set_ip(codeBuffer).vptr);
-  start = jit_get_ip().ptr;
-  jit_prolog(1);
-  in = jit_arg_i();
-  jit_movi_p(JIT_R0, "generated %d bytes\n");
-  jit_getarg_i(JIT_R1, in);
-  jit_prepare(2);
-    jit_pusharg_i(JIT_R1);              @rem{/* push in reverse order */}
-    jit_pusharg_p(JIT_R0);
-  jit_finish(printf);
-  jit_ret();
-  end = jit_get_ip().ptr;
-
-  @rem{/* call the generated code\, passing its size as argument */}
-  jit_flush_code(start, end);
-  myFunction(end - start);
-@}
-@end example
-
-The function shows how many bytes were generated.  Most of the code
-is not very interesting, as it resembles very closely the program
-presented in @ref{incr, , A function which increments a number by one}.
-
-For this reason, we're going to concentrate on just a few statements.
-
-@table @t
-@item start = jit_get_ip().ptr;
-@itemx @r{@dots{}}
-@itemx end = jit_get_ip().ptr;
-These two instruction call the @code{jit_get_ip} macro which was
-mentioned in @ref{incr, , A function which increments a number by one}
-too.  In this case we use the only field of @code{jit_code} that is
-not a function pointer: @code{ptr}, which is a simple @code{char *}.
-
-@item jit_movi_p(JIT_R0, "generated %d bytes\n");
-Note the use of the @samp{p} type specifier, which automatically
-casts the second parameter to an @code{unsigned long} to make the
-code more clear and less cluttered by typecasts.
-
-@item jit_prepare(2);
-@itemx jit_pusharg_i(JIT_R1);
-@itemx jit_pusharg_p(JIT_R0);
-@itemx jit_finish(printf);
-Once the arguments to @code{printf} have been put in general-purpose
-registers, we can start a prepare/pusharg/finish sequence that
-moves the argument to either the stack or registers, then calls
-@code{printf}, then cleans up the stack.  Note how @lightning{}
-abstracts the differences between different architectures and
-ABI's -- the client program does not know how parameter passing
-works on the host architecture.
-@end table
-
-@node RPN calculator
-@section A more complex example, an RPN calculator
-
-We create a small stack-based RPN calculator which applies a series
-of operators to a given parameter and to other numeric operands.
-Unlike previous examples, the code generator is fully parameterized
-and is able to compile different formulas to different functions.
-Here is the code for the expression compiler; a sample usage will
-follow.
-
-Since @lightning{} does not provide push/pop instruction, this
-example uses a stack-allocated area to store the data.  Such an
-area can be allocated using the macro @code{jit_allocai}, which
-receives the number of bytes to allocate and returns the offset
-from the frame pointer register @code{JIT_FP} to the base of the
-area.  The area is aligned to an @code{int}; future versions
-of @lightning{} may provide more fine-grained control on the
-alignment of stack-allocated variables.
-
-Usually, you will use the @code{ldxi} and @code{stxi} instruction
-to access stack-allocated variables.  However, it is possible to
-use operations such as @code{add} to compute the address of the
-variables, and pass the address around.
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
-
-void stack_push(int reg, int *sp)
-@{
-  jit_stxi_i (*sp, JIT_FP, reg);
-  *sp += sizeof (int);
-@}
-
-void stack_pop(int reg, int *sp)
-@{
-  *sp -= sizeof (int);
-  jit_ldxi_i (reg, JIT_FP, *sp);
-@}
-
-pifi compile_rpn(char *expr)
-@{
-  pifi fn;
-  int stack_base, stack_ptr;
-  int in;
-
-  fn = (pifi) (jit_get_ip().iptr);
-  jit_leaf(1);
-  in = jit_arg_i();
-  stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
-
-  jit_getarg_i(JIT_R2, in);
-
-  while (*expr) @{
-    char buf[32];
-    int n;
-    if (sscanf(expr, "%[0-9]%n", buf, &n)) @{
-      expr += n - 1;
-      stack_push(JIT_R0, &stack_ptr);
-      jit_movi_i(JIT_R0, atoi(buf));
-    @} else if (*expr == 'x') @{
-      stack_push(JIT_R0, &stack_ptr);
-      jit_movi_i(JIT_R0, JIT_R2);
-    @} else if (*expr == '+') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_addr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else if (*expr == '-') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_subr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else if (*expr == '*') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_mulr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else if (*expr == '/') @{
-      stack_pop(JIT_R1, &stack_ptr);
-      jit_divr_i(JIT_R0, JIT_R1, JIT_R0);
-    @} else @{
-      fprintf(stderr, "cannot compile: %s\n", expr);
-      abort();
-    @}
-    ++expr;
-  @}
-  jit_movr_i(JIT_RET, JIT_R0);
-  jit_ret();
-  return fn;
-@}
-@end example
-
-The principle on which the calculator is based is easy: the stack top
-is held in R0, while the remaining items of the stack are held in the
-memory area that we allocate with @code{allocai}.  Compiling a numeric
-operand or the argument @code{x} pushes the old stack top onto the
-stack and moves the operand into R0; compiling an operator pops the
-second operand off the stack into R1, and compiles the operation so
-that the result goes into R0, thus becoming the new stack top.
-
-This example allocates a fixed area for 32 @code{int}s.  This is not
-a problem when the function is a leaf like in this case; in a full-blown
-compiler you will want to analyze the input and determine the number
-of needed stack slots---a very simple example of register allocation.
-The area is then managed like a stack using @code{stack_push} and
-@code{stack_pop}.
-
-Try to locate a call to @code{jit_set_ip} in the source code.  You
-will not find one; this means that the client has to manually set
-the instruction pointer.  This technique has one advantage and one
-drawback.  The advantage is that the client can simply set the
-instruction pointer once and then generate code for multiple functions,
-one after another, without caring about passing a different instruction
-pointer each time; see @ref{Reentrancy, , Re-entrant usage of
-@lightning{}} for the disadvantage.
-
-Source code for the client (which lies in the same source file) follows:
-
-@example
-static jit_insn codeBuffer[1024];
-
-int main()
-@{
-  pifi c2f, f2c;
-  int i;
-
-  jit_set_ip(codeBuffer);
-  c2f = compile_rpn("32x9*5/+");
-  f2c = compile_rpn("x32-5*9/");
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-
-  printf("\nC:");
-  for (i = 0; i <= 100; i += 10) printf("%3d ", i);
-  printf("\nF:");
-  for (i = 0; i <= 100; i += 10) printf("%3d ", c2f(i));
-  printf("\n");
-
-  printf("\nF:");
-  for (i = 32; i <= 212; i += 10) printf("%3d ", i);
-  printf("\nC:");
-  for (i = 32; i <= 212; i += 10) printf("%3d ", f2c(i));
-  printf("\n");
-  return 0;
-@}
-@end example
-
-The client displays a conversion table between Celsius and Fahrenheit
-degrees (both Celsius-to-Fahrenheit and Fahrenheit-to-Celsius). The
-formulas are, @math{F(c) = c*9/5+32} and @math{C(f) = (f-32)*5/9},
-respectively.
-
-Providing the formula as an argument to @code{compile_rpn} effectively
-parameterizes code generation, making it possible to use the same code
-to compile different functions; this is what makes dynamic code
-generation so powerful.
-
-The @file{rpn.c} file in the @lightning{} distribution includes a more
-complete (and more complex) implementation of @code{compile_rpn},
-which does constant folding and is able to assemble instructions with
-an immediate parameter.  Still, it is based on the same principle and
-also uses @code{allocai} to allocate space for the stack.
-
-@node Fibonacci
-@section Fibonacci numbers
-
-The code in this section calculates a variant of the Fibonacci sequence.
-While the traditional Fibonacci sequence is modeled by the recurrence
-relation:
-@display
-     f(0) = f(1) = 1
-     f(n) = f(n-1) + f(n-2)
-@end display
-
-@noindent
-the functions in this section calculates the following sequence, which
-is more interesting as a benchmark@footnote{That's because, as is
-easily seen, the sequence represents the number of activations of the
-@code{nfibs} procedure that are needed to compute its value through
-recursion.}:
-@display
-     nfibs(0) = nfibs(1) = 1
-     nfibs(n) = nfibs(n-1) + nfibs(n-2) + 1
-@end display
-
-The purpose of this example is to introduce branches.  There are two
-kind of branches: backward branches and forward branches.  We'll
-present the calculation in a recursive and iterative form; the
-former only uses forward branches, while the latter uses both.
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
-
-int main()
-@{
-  pifi      nfibs = (pifi) (jit_set_ip(codeBuffer).iptr);
-  int       in;                 @rem{/* offset of the argument */}
-  jit_insn  *ref;               @rem{/* to patch the forward reference */}
-
-        jit_prolog   (1);
-  in =  jit_arg_ui   ();
-        jit_getarg_ui(JIT_V0, in);              @rem{/* V0 = n */}
-  ref = jit_blti_ui  (jit_forward(), JIT_V0, 2);
-        jit_subi_ui  (JIT_V1, JIT_V0, 1);       @rem{/* V1 = n-1 */}
-        jit_subi_ui  (JIT_V2, JIT_V0, 2);       @rem{/* V2 = n-2 */}
-        jit_prepare(1);
-          jit_pusharg_ui(JIT_V1);
-        jit_finish(nfibs);
-        jit_retval(JIT_V1);                     @rem{/* V1 = nfibs(n-1) */}
-        jit_prepare(1);
-          jit_pusharg_ui(JIT_V2);
-        jit_finish(nfibs);
-        jit_retval(JIT_V2);                     @rem{/* V2 = nfibs(n-2) */}
-        jit_addi_ui(JIT_V1,  JIT_V1,  1);
-        jit_addr_ui(JIT_RET, JIT_V1, JIT_V2);   @rem{/* RET = V1 + V2 + 1 */}
-        jit_ret();
-
-  jit_patch(ref);                               @rem{/* patch jump */}
-        jit_movi_i(JIT_RET, 1);                 @rem{/* RET = 1 */}
-        jit_ret();
-
-  @rem{/* call the generated code\, passing 32 as an argument */}
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-  printf("nfibs(%d) = %d", 32, nfibs(32));
-  return 0;
-@}
-@end example
-
-As said above, this is the first example of dynamically compiling
-branches.  Branch instructions have three operands: two contains the
-values to be compared, while the first is a @dfn{label}; @lightning{}
-label's are represented as @code{jit_insn *} values.  Unlike other
-instructions (apart from @code{arg}, which is actually a directive
-rather than an instruction), branch instructions also return a value
-which, as we see in the example above, can be used to compile
-forward references.
-
-Compiling a forward reference is a two-step operation.  First, a
-branch is compiled with a dummy label, since the actual destination
-of the jump is not yet known; the dummy label is returned by the
-@code{jit_forward()} macro.  The value returned by the branch
-instruction is saved to be used later.
-
-Then, when the destination of the jump is reached, another macro
-is used, @code{jit_patch()}. This macro must be called once for
-@strong{every} point in which the code had a forward branch to the
-instruction following @code{jit_patch} (in this case a @code{movi_i}
-instruction).
-
-Now, here is the iterative version:
-
-@example
-#include <stdio.h>
-#include "lightning.h"
-
-static jit_insn codeBuffer[1024];
-
-typedef int (*pifi)(int);       @rem{/* Pointer to Int Function of Int */}
-
-int main()
-@{
-  pifi     nfibs = (pifi) (jit_set_ip(codeBuffer).iptr);
-  int      in;                  @rem{/* offset of the argument */}
-  jit_insn *ref;                @rem{/* to patch the forward reference */}
-  jit_insn *loop;               @rem{/* start of the loop */}
-
-        jit_leaf     (1);
-  in =  jit_arg_ui   ();
-        jit_getarg_ui(JIT_R2, in);              @rem{/* R2 = n */}
-        jit_movi_ui  (JIT_R1, 1);
-  ref = jit_blti_ui  (jit_forward(), JIT_R2, 2);
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);
-        jit_movi_ui  (JIT_R0, 1);
-
-  loop= jit_get_label();
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
-        jit_addr_ui  (JIT_V0, JIT_R0, JIT_R1);  @rem{/* V0 = R0 + R1 */}
-        jit_movr_ui  (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
-        jit_addi_ui  (JIT_R1, JIT_V0, 1);       @rem{/* R1 = V0 + 1 */}
-        jit_bnei_ui  (loop, JIT_R2, 0);         @rem{/* if (R2) goto loop; */}
-
-  jit_patch(ref);                               @rem{/* patch forward jump */}
-        jit_movr_ui  (JIT_RET, JIT_R1);         @rem{/* RET = R1 */}
-        jit_ret      ();
-
-  @rem{/* call the generated code\, passing 36 as an argument */}
-  jit_flush_code(codeBuffer, jit_get_ip().ptr);
-  printf("nfibs(%d) = %d", 36, nfibs(36));
-  return 0;
-@}
-@end example
-
-This code calculates the recurrence relation using iteration (a
-@code{for} loop in high-level languages).  There is still a forward
-reference (indicated by the @code{jit_forward}/@code{jit_patch} pair);
-there are no function calls anymore: instead, there is a backward
-jump (the @code{bnei} at the end of the loop).
-
-In this case, the destination address should be known, because the
-jumps lands on an instruction that has already been compiled.
-However the program must make a provision and remember the address
-where the jump will land.  This is achieved with @code{jit_get_label},
-yet another macro that is much similar to @code{jit_get_ip} but,
-instead of a @code{jit_code} union, it answers an @code{jit_insn *}
-that the branch macros accept.
-
-Now, let's make one more change: let's rewrite the loop like this:
-
-@example
-  @r{@dots{}}
-
-  jit_delay(
-        jit_movi_ui  (JIT_R1, 1),
-  ref = jit_blti_ui  (jit_forward(), JIT_R2, 2));
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);
-
-  loop= jit_get_label();
-        jit_subi_ui  (JIT_R2, JIT_R2, 1);       @rem{/* decr. counter */}
-        jit_addr_ui  (JIT_V0, JIT_R0, JIT_R1);  @rem{/* V0 = R0 + R1 */}
-        jit_movr_ui  (JIT_R0, JIT_R1);          @rem{/* R0 = R1 */}
-  jit_delay(
-        jit_addi_ui  (JIT_R1, JIT_V0, 1),       @rem{/* R1 = V0 + 1 */}
-        jit_bnei_ui  (loop, JIT_R2, 0));        @rem{/* if (R2) goto loop; */}
-
-  @r{@dots{}}
-@end example
-
-The @code{jit_delay} macro is used to schedule delay slots in jumps and
-branches.  This is optional, but might lead to performance improvements
-in tight inner loops (of course not in a loop that is executed 35
-times, but this is just an example).
-
-@code{jit_delay} takes two @lightning{} instructions, a @dfn{delay
-instruction} and a @dfn{branch instruction}.  Note that the two
-instructions must be written in execution order (first the delay
-instruction, then the branch instruction), @strong{not} with the branch
-first.  If the current machine has a delay slot, the delay instruction
-(or part of it) is placed in the delay slot after the branch
-instruction; otherwise, it emits the delay instruction before the branch
-instruction.  The delay instruction must not depend on being executed
-before or after the branch.
-
-Instead of @code{jit_patch}, you can use @code{jit_patch_at}, which
-takes two arguments: the first is the same as for @code{jit_patch}, and
-the second is the valued to be patched in.  In other words, these two
-invocations have the same effect:
-
-@example
-  jit_patch (jump_pc);
-  jit_patch_at (jump_pc, jit_get_ip ());
-@end example
-
-Dual to branches and @code{jit_patch_at} are @code{jit_movi_p}
-and @code{jit_patch_movi}, which can also be used to implement
-forward references.  @code{jit_movi_p} is carefully implemented
-to use an encoding that is as long as possible, so that it can
-always be patched; in addition, like branches, it will return
-an address which is then passed to @code{jit_patch_movi}.  The
-usage of @code{jit_patch_movi} is similar to @code{jit_patch_at}.
-
-@node Reentrancy
-@chapter Re-entrant usage of @lightning{}
-
-By default, @lightning{} is able to compile different functions at the
-same time as long as it happens in different object files, and on the
-other hand constrains code generation tasks to reside in a single
-object file.
-
-The reason for this is not apparent, but is easily explained:
-the @file{lightning.h} header file defines its state as a
-@code{static} variable, so calls to @code{jit_set_ip} and
-@code{jit_get_ip} residing in different files access different
-instruction pointers.  This was not done without reason: it makes
-the usage of @lightning{} much simpler, as it limits the initialization
-tasks to the bare minimum and removes the need to link the program
-with a separate library.
-
-On the other hand, multi-threaded or otherwise concurrent programs
-require reentrancy in the code generator, so this approach cannot be
-the only one.  In fact, it is possible to define your own copy of
-@lightning{}'s instruction state by defining a variable of type
-@code{jit_state} and @code{#define}-ing @code{_jit} to it:
-
-@example
-    struct jit_state lightning;
-    #define _jit lightning
-@end example
-
-You are free to define the @code{jit_state} variable as you like:
-@code{extern}, @code{static} to a function, @code{auto}, or global.
-
-This feature takes advantage of an aspect of macros (@dfn{cascaded
-macros}), which is documented thus in @acronym{CPP}'s reference manual:
-
-@quotation
-A cascade of macros is when one macro's body contains a reference to
-another macro.  This is very common practice.  For example,
-@example
-#define BUFSIZE 1020
-#define TABLESIZE BUFSIZE
-@end example
-This is not at all the same as defining @code{TABLESIZE} to be
-@samp{1020}.  The @code{#define} for @code{TABLESIZE} uses exactly the
-body you specify---in this case, @code{BUFSIZE}---and does not check to
-see whether it too is the name of a macro; it's only when you use
-@code{TABLESIZE} that the result of its expansion is checked for more
-macro names. 
-
-This makes a difference if you change the definition of @code{BUFSIZE}
-at some point in the source file. @code{TABLESIZE}, defined as shown,
-will always expand using the definition of @code{BUFSIZE} that is
-currently in effect: 
-#define BUFSIZE 1020
-#define TABLESIZE BUFSIZE
-#undef BUFSIZE
-#define BUFSIZE 37
-
-Now @code{TABLESIZE} expands (in two stages) to `37'. (The @code{#undef}
-is to prevent any warning about the nontrivial redefinition of
-@code{BUFSIZE}.)
-@end quotation
-
-@noindent
-In the same way, @code{jit_get_label} will adopt whatever definition of
-@code{_jit} is in effect:
-@example
-#define	jit_get_label()			(_jit.pc)
-@end example
-
-Special care must be taken when functions residing in separate files
-must access the same state.  This could be the case, for example, if a
-special library contained function for strength reduction of
-multiplications to adds & shifts, or maybe of divisions to
-multiplications and shifts.  The function would be compiled using a
-single definition of @code{_jit} and that definition would be used
-whenever the function would be called.
-
-Since @lightning{} uses a feature of the preprocessor to obtain
-re-entrancy, it makes sense to rely on the preprocessor in this case
-too.
-
-The idea is to pass the current @code{struct jit_state} to the
-function:
-
-@example
-static void
-_opt_muli_i(jit, dest, source, n)
-     register struct jit_state *jit;
-     register int		dest, source, n;
-@{
-#define _jit          jit
-@dots{}
-#undef _jit
-@}
-@end example
-
-@noindent
-doing this unbeknownst to the client, using a macro in the header file:
-
-@example
-extern void _opt_muli_i(struct jit_state *, int, int, int);
-
-#define opt_muli_i(rd, rs, n)	_opt_muli_i(&_jit, (rd), (rs), (n))
-@end example
-
-
-@section Registers
-@chapter Accessing the whole register file
-
-As mentioned earlier in this chapter, all @lightning{} back-ends are
-guaranteed to have at least six general-purpose integer registers and
-six floating-point registers, but many back-ends will have more.
-
-To access the entire register files, you can use the
-@code{JIT_R}, @code{JIT_V} and @code{JIT_FPR} macros.  They
-accept a parameter that identifies the register number, which
-must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
-and @code{JIT_FPR_NUM} respectively; the number need not be
-constant.  Of course, expressions like @code{JIT_R0} and
-@code{JIT_R(0)} denote the same register, and likewise for
-integer callee-saved, or floating-point, registers.
-
-@node Bundling GNU lightning
-@chapter Using @lightning{} in your programs
-
-It is very easy to include @lightning{}'s source code (without the
-documentation and examples) into your program's distribution 
-so that people don't need to have it installed in order to use it.
-
-Here is a step by step explanation of what to do:
-
-@enumerate
-@item Run @command{lightningize} from your package's main
-distribution directory.
-@example
-     lightningize
-@end example
-
-@noindent
-This will copy the source code for the @lightning{} back ends
-into the @file{lightning} directory of your package.
-
-@item If you're using Automake, you might be pleased to know that
-@file{Makefile.am} files will be already there.
-
-If you're not using Automake and @code{aclocal}, instead,
-you should delete the @file{Makefile.am} files (they are of no use
-to you) and copy the contents of the @file{lightning.m4} file, found in
-@command{aclocal}'s macro repository (usually @file{/usr/share/aclocal},
-to your @file{configure.in} or @file{acinclude.m4} or @file{aclocal.m4} file.
-
-@item Include a call to the @code{LIGHTNING_CONFIGURE_IF_NOT_FOUND}
-macro in your @file{configure.in} file.
-@end enumerate
-
-@code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} will first look for a
-pre-installed copy of @lightning{} and, if it can be found, it will
-use it; otherwise, it will test if there is a back-end for the host
-system.  If @lightning{} is already installed, or if the system is
-supported by lightning, it will define the @code{HAVE_LIGHTNING}
-symbol.
-
-In addition, an Automake conditional named @code{HAVE_INSTALLED_LIGHTNING}
-will be set if @lightning{} is already installed, which can be used to
-set up include paths appropriately.
-
-Finally, @code{LIGHTNING_CONFIGURE_IF_NOT_FOUND} accepts two
-optional parameters: respectively, an action to be taken if @lightning{}
-is available, and an action to be taken if it is not.
diff --git a/doc/version.texi b/doc/version.texi
index c9347b90b..b7b6751ff 100644
--- a/doc/version.texi
+++ b/doc/version.texi
@@ -1,4 +1,4 @@
-@set UPDATED 3 June 2009
-@set UPDATED-MONTH June 2009
-@set EDITION 1.2c
-@set VERSION 1.2c
+@set UPDATED 24 January 2013
+@set UPDATED-MONTH January 2013
+@set EDITION 2.0
+@set VERSION 2.0