From ba5044a6684e1af0fc20fc42da8cc5de609c9692 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Oct 2004 16:10:07 +0000 Subject: [PATCH] big merge git-archimport-id: bonzini@gnu.org--2004b/lightning--stable--1.2--patch-1 git-archimport-id: bonzini@gnu.org--2004b/lightning--stable--1.2--patch-2 --- .cvsignore | 1 + AUTHORS | 3 +- ChangeLog | 214 ++++++++++++++++++++++++++ NEWS | 11 +- THANKS | 8 + config/config.guess | 1 + config/config.sub | 1 + config/depcomp | 1 + config/mdate-sh | 1 + config/missing | 1 + config/texi2dvi | 4 +- config/texinfo.tex | 1 + doc/.cvsignore | 3 + doc/Makefile.am | 2 - doc/body.texi | 8 +- doc/lightning.texi | 5 - doc/porting.texi | 317 ++++++++++++++++++++++++++++++-------- doc/toc.texi | 1 - doc/using.texi | 297 ++++++++++++++++++++++++----------- lightning-inst.h | 5 +- lightning.h.in | 5 +- lightning/Makefile.am | 2 +- lightning/asm-common.h | 12 +- lightning/core-common.h | 98 +++++++++--- lightning/fp-common.h | 274 ++++++--------------------------- lightning/funcs-common.h | 6 + lightning/i386/asm.h | 50 +++--- lightning/i386/core.h | 39 ++--- lightning/i386/fp.h | 323 +++++++++++++++++++++++++-------------- lightning/i386/funcs.h | 53 ++++++- lightning/ppc/asm.h | 80 ++++++++-- lightning/ppc/core.h | 122 +++++++++------ lightning/ppc/fp.h | 217 +++++++++++++++++++------- lightning/ppc/funcs.h | 125 +++++++-------- lightning/sparc/asm.h | 80 ++++++++++ lightning/sparc/core.h | 60 +++++--- lightning/sparc/fp.h | 210 ++++++++++++------------- opcode/Makefile.am | 2 + tests/Makefile.am | 8 +- tests/bp.c | 89 +++++++++++ tests/bp.ok | 1 + tests/fib.c | 8 +- tests/fibit.c | 2 +- tests/funcfp.c | 204 ++++++++++++------------- tests/funcfp.ok | 2 +- tests/printf.c | 2 +- tests/rpnfp.c | 18 +-- tests/testfp.c | 101 ++++++------ tests/testfp.ok | 3 +- 49 files changed, 2022 insertions(+), 1059 deletions(-) create mode 100644 .cvsignore create mode 100644 THANKS create mode 120000 config/config.guess create mode 120000 config/config.sub create mode 120000 config/depcomp create mode 120000 config/mdate-sh create mode 120000 config/missing create mode 120000 config/texinfo.tex create mode 100644 doc/.cvsignore create mode 100644 tests/bp.c create mode 100644 tests/bp.ok diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 000000000..d89921897 --- /dev/null +++ b/.cvsignore @@ -0,0 +1 @@ +autom4te.cache diff --git a/AUTHORS b/AUTHORS index d19bf6b35..bda81b472 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,2 +1,3 @@ Paolo Bonzini -i386 and PPC assemblers by Ian Piumarta \ No newline at end of file +i386 and PPC assemblers by Ian Piumarta +Major PPC contributions by Laurent Michel diff --git a/ChangeLog b/ChangeLog index 5fbd089d6..41724988e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,217 @@ +2004-10-12 Paolo Bonzini + + * lightning/i386/fp.h: Fix bugs in conditional branches. + +2004-10-10 Paolo Bonzini + + * lightning/i386/funcs.h: Fix pasto in jit_flush_code. + +2004-10-08 Paolo Bonzini + + * lightning/ppc/fp.h: Optimized conditional branches. + +2004-09-20 Paolo Bonzini + + * lightning/ppc/asm.h: Fix more typos. + +2004-09-20 Paolo Bonzini + + * lightning/ppc/asm.h: Fix typos, replace `26' with JIT_AUX. + +2004-09-20 Paolo Bonzini + + * lightning/ppc/fp.h: Added conditional branches. + +2004-09-18 Laurent Michel + + * lightning/ppc/fp.h (jit_unler_d, jit_unltr_d, jit_unger_d, + jit_ungtr_d, jit_ltgt_d, jit_uneq_d): Implemented missing tests + to fully support testfp. + (jit_floorr_d_i, jit_ceilr_d_i, jit_roundr_d_i, jit_truncr_d_i): + New macros. + * lightning/ppc/asm.h: Added missing opcodes FCTIWZ and MTFSFI. + * lightning/ppc/funcs.h (_jit_prolog): Fixed minor mistake in + the initialization of _jitl.nextarg_geti, relying on the + JIT_AUX macro as well to get the register offset. + +2004-09-07 Paolo Bonzini + + * lightning/ppc/funcs.h: Fix typo. + +2004-09-06 Paolo Bonzini + + * tests/funcfp.c: Use %g. Remove C99 variable declarations. + * tests/testfp.c: Don't use __builtin_nan. + + * lightning/ppc/core.h: Add three V registers. + * lightning/ppc/funcs.h: Adjust. + + * lightning/sparc/core.h: Some fixes related to FP argument passing. + Move R0 to %g2, use %o7 for JIT_BIG2. + * lightning/sparc/fp.h: Some fixes related to FP argument passing. + +2004-09-02 Paolo Bonzini + + * lightning/sparc/core.h: Add another V register, + move R0 to %o7. + +2004-07-15 Paolo Bonzini + + * lightning/i386/funcs.h: Implement jit_flush_code, + in order to support Fedora's exec-shield. + +2004-07-14 Paolo Bonzini + + * lightning/core-common.h: Add more jit_extr_*_* macros. + * lightning/doc/using.texi: Be clearer about the order + of arguments in jit_extr_*_*. + * lightning/doc/porting.texi: Add more jit_extr_*_* macros. + * lightning/i386/fp.h: Fix typo in jit_extr_i_d. + +2004-07-14 Paolo Bonzini + + * lightning/ppc/funcs.h: Adjust offset of LR into + stack frame if running under the Darwin ABI. + +2004-07-13 Paolo Bonzini + + * lightning/i386/fp.h: Rename jit_exti_d to jit_extr_i_d. + +2004-07-13 Paolo Bonzini + + * lightning/ppc/core.h: Fix thinko. + + * lightning/i386/core.h: Fix jit_lti_ui. + * lightning/core-common.h: Add missing macros. + + * lightning/ppc/fp.h: Rename jit_neg_* to jit_negr_*. + * lightning/i386/fp.h: Rename jit_neg_* to jit_negr_*. + * lightning/sparc/fp.h: Rename jit_neg_* to jit_negr_*. + * lightning/fp-common.h: Rename jit_neg_* to jit_negr_*. + * doc/porting.texi: Add undocumented macros. + +2004-07-12 Paolo Bonzini + + * doc/porting.texi: Add missing macros. + +2004-07-12 Paolo Bonzini + + * lightning/ppc/funcs.h: Don't generate trampolines. + Separate prolog and epilog generation. + * lightning/ppc/core.h: Generate epilog explicitly. + Don't reserve r31 anymore. + * lightning/core-common.h: Remove call to jit_setup_code. + +2004-07-09 Paolo Bonzini + + * lightning/lightning.h.in: Avoid preprocessor warnings. + * lightning/lightning-inst.h: Likewise. + + * lightning/i386/core.h: Define JIT_R, JIT_R_NUM, JIT_V, + JIT_V_NUM. + * lightning/ppc/core.h: Likewise. + * lightning/sparc/core.h: Likewise. + * lightning/i386/fp.h: Define JIT_FPR, JIT_FPR_NUM. + * lightning/ppc/fp.h: Likewise. + * lightning/sparc/fp.h: Likewise. + * lightning/core-common.h: Define fixed register names. + * lightning/fp-common.h: Likewise for FP regs. + +2004-07-09 Paolo Bonzini + + * lightning/ppc/funcs.h: Fix location where return address + is stored. + * lightning/i386/asm.h: Add a trailing _ to opcodes without + any parameter. + * lightning/i386/core.h: Adjust for the above. + +2004-04-15 Paolo Bonzini + + * lightning/i386/fp.h: Change "and" to "_and" + to satisfy C++ compilers. + +2004-04-14 Paolo Bonzini + + * lightning/sparc/fp.h: Use memcpy to implement jit_movi. + * lightning/ppc/fp.h: Use memcpy to implement jit_movi. + Move floating-point opcodes... + * lightning/ppc/asm.h: ... here. + +2004-04-14 Paolo Bonzini + + * lightning/core-common.h: Add jit_finishr. + * lightning/ppc/core.h: Add jit_callr and jit_finishr. + * lightning/i386/core.h: Add jit_callr. + * lightning/sparc/core.h: Add jit_callr. Fix typo. + +2004-04-14 Paolo Bonzini + + * lightning/i386/core.h: Fix pasto in jit_b*_ui. + +2004-03-30 Laurent Michel + + * lightning/ppc: Implement PowerPC floating point + (ChangeLog entry missing). + +2004-03-12 Paolo Bonzini + + * lightning/fp-common.h: Load/store macros are not the + same for floats and doubles anywhere, but jit_retval may be. + * lightning/i386/asm.h: Fix = mistaken for == in ESCrri. + * lightning/i386/core.h: Fix typo in jit_prepare_[fd]. + * lightning/i386/fp.h: Rewritten. + * tests/testfp.c: Add tests for unordered comparisons. + * tests/testfp.ok: Add results. + +2004-03-15 Paolo Bonzini + + Merge changes from Laurent Michel. + + * lightning/asm-common.h: Add _jit_I_noinc. + * lightning/core-common.h: Support jit_init, + jit_setup_code, jit_patch_at. Return patchable IP from + jit_movi_p. + * lightning/funcs-common.h: Provide defaults + for jit_setup_code, jit_start_pfx, jit_end_pfx + * lightning/i386/core.h: Add jit_patch_at, jit_patch_movi. + * lightning/ppc/core.h: Likewise. + * lightning/sparc/core.h: Likewise. + * lightning/ppc/asm.h: Fix generation of branch destination + displacements in _FB and _BB + * lightning/ppc/core.h: Generate trampolines in the user + area. + * lightning/ppc/funcs.h: Add a few casts. + * tests/bc.c: New testcase. + + * lightning/i386/asm.h: Wrap into #ifndef LIGHTNING_DEBUG. + * lightning/ppc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG. + * lightning/sparc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG. + + +2004-03-09 Paolo Bonzini + + * lightning/sparc/fp.h: Rewrite. Move macros for + FP code generation... + * lightning/sparc/asm.h: ... here. + * lightning/sparc/core.h: Rename jit_prepare to + jit_prepare_i, jit_retval to jit_retval_i. + * lightning/ppc/core.h: Rename jit_prepare to + jit_prepare_i, jit_retval to jit_retval_i. + * lightning/i386/core.h: Rename jit_prepare to + jit_prepare_i, jit_retval to jit_retval_i. + * lightning/core-common.h: Provide backwards + compatible synonyms for the above. + * lightning/fp-common.h: Rewrite. + * lightning-inst.h: Include fp unconditionally. + * lightning.h.in: Include fp unconditionally. + * tests/Makefile.am: Enable fp tests. + * tests/fib.c: Use jit_retval_i. + * tests/fibit.c: Cast codeBuffer to char *. + * tests/funcfp.c: Use new fp macros. + * tests/printf.c: Use jit_retval_i. + * tests/rpnfp.c: Use new fp macros. + * tests/testfp.c: Use new fp macros. + 2004-03-02 Paolo Bonzini * lightning/i386/core.h: generate correct code when diff --git a/NEWS b/NEWS index bae18f34f..a8b42b1a2 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,17 @@ NEWS FROM VERSION 1.1.2 TO 1.2 -o Floating-point interface rewritten, uses a common register - file architecture rather than a stack. +o Floating-point interface rewritten, uses a register file + architecture rather than a stack. o Many bug fixes. +o jit_prepare and jit_retval are now jit_prepare_i and + jit_retval_i. + +o Support for Fedora Core 1's exec-shield feature. + +o PPC supports both SysV and Darwin ABIs. + o More (and more complete) examples provided. --- diff --git a/THANKS b/THANKS new file mode 100644 index 000000000..76715415f --- /dev/null +++ b/THANKS @@ -0,0 +1,8 @@ +Thanks to all the following people for their help in +improving GNU lightning: + +Tom Tromey +Laurent Michel +Eli Barzilay +Jens Troeger +Basile Starynkevitch diff --git a/config/config.guess b/config/config.guess new file mode 120000 index 000000000..9c6e8c249 --- /dev/null +++ b/config/config.guess @@ -0,0 +1 @@ +/sw/share/automake-1.9/config.guess \ No newline at end of file diff --git a/config/config.sub b/config/config.sub new file mode 120000 index 000000000..9db5449ca --- /dev/null +++ b/config/config.sub @@ -0,0 +1 @@ +/sw/share/automake-1.9/config.sub \ No newline at end of file diff --git a/config/depcomp b/config/depcomp new file mode 120000 index 000000000..20f0b6142 --- /dev/null +++ b/config/depcomp @@ -0,0 +1 @@ +/sw/share/automake-1.9/depcomp \ No newline at end of file diff --git a/config/mdate-sh b/config/mdate-sh new file mode 120000 index 000000000..a3d6cb982 --- /dev/null +++ b/config/mdate-sh @@ -0,0 +1 @@ +/sw/share/automake-1.9/mdate-sh \ No newline at end of file diff --git a/config/missing b/config/missing new file mode 120000 index 000000000..4db5c1b96 --- /dev/null +++ b/config/missing @@ -0,0 +1 @@ +/sw/share/automake-1.9/missing \ No newline at end of file diff --git a/config/texi2dvi b/config/texi2dvi index 010b586b4..fa4d4e070 100755 --- a/config/texi2dvi +++ b/config/texi2dvi @@ -1,6 +1,6 @@ #! /bin/sh # texi2dvi --- produce DVI (or PDF) files from Texinfo (or LaTeX) sources. -# $Id: texi2dvi,v 1.14 2003/02/05 00:42:33 karl Exp $ +# $Id: texi2dvi,v 1.1.1.1 2004/03/03 12:51:44 bonzini Exp $ # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001, # 2002, 2003 Free Software Foundation, Inc. @@ -27,7 +27,7 @@ # the `--debug' option when making a bug report. # This string is expanded by rcs automatically when this file is checked out. -rcs_revision='$Revision: 1.14 $' +rcs_revision='$Revision: 1.1.1.1 $' rcs_version=`set - $rcs_revision; echo $2` program=`echo $0 | sed -e 's!.*/!!'` version="texi2dvi (GNU Texinfo 4.5) $rcs_version diff --git a/config/texinfo.tex b/config/texinfo.tex new file mode 120000 index 000000000..02f11115b --- /dev/null +++ b/config/texinfo.tex @@ -0,0 +1 @@ +/sw/share/automake-1.9/texinfo.tex \ No newline at end of file diff --git a/doc/.cvsignore b/doc/.cvsignore new file mode 100644 index 000000000..01e2da890 --- /dev/null +++ b/doc/.cvsignore @@ -0,0 +1,3 @@ +*.info* +stamp-* +version.texi diff --git a/doc/Makefile.am b/doc/Makefile.am index 968812fca..10b4a465b 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,5 +1,3 @@ -EXTRA_DIST=lightning.info lightning.info-1 lightning.info-2 lightning.info-3 - TEXI2DVI=$(top_srcdir)/config/texi2dvi HELP2MAN = $(top_srcdir)/config/help2man diff --git a/doc/body.texi b/doc/body.texi index cccadd049..b385aed85 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -51,7 +51,7 @@ There are no Secondary Sections, no Cover Texts and no Invariant Sections Info documentation, constitutes the Title Page. @end titlepage -@ifclear ISTEX +@ifnottex @node Top @top @lightning{} @@ -61,17 +61,17 @@ which are usually either inefficient or non-portable, @lightning{} is both retargetable and very fast. @include toc.texi -@end ifclear +@end ifnottex @node Overview @chapter Introduction to @lightning{} -@ifset ISTEX +@iftex This document describes @value{TOPIC} the @lightning{} library for dynamic code generation. Unlike other dynamic code generation systems, which are usually either inefficient or non-portable, @lightning{} is both retargetable and very fast. -@end ifset +@end iftex @ifclear USING This manual assumes that you are pretty comfortable with the usage of diff --git a/doc/lightning.texi b/doc/lightning.texi index dee770e1d..a336a3db0 100644 --- a/doc/lightning.texi +++ b/doc/lightning.texi @@ -36,11 +36,6 @@ @c Macros for Texinfo 3.1/4.0 compatibility @c --------------------------------------------------------------------- -@c Emulate the `@ifnottex' command which is found in Texinfo 4.0 -@iftex -@set ISTEX -@end iftex - @c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1 @c compatibility @macro hlink{url, link} diff --git a/doc/porting.texi b/doc/porting.texi index 74233975d..9f68d86e9 100644 --- a/doc/porting.texi +++ b/doc/porting.texi @@ -353,16 +353,20 @@ that make up the platform-independent interface provided by Implementation of forward references takes place in: @itemize @bullet -@bulletize The branch macros -@bulletize The @code{jit_patch} macros +@item +The branch macros + +@item +The @code{jit_patch_at} macros @end itemize Roughly speaking, the branch macros, as seen in @usingref{GNU lightning macros, Generating code at run-time}, return a value that later calls -to @code{jit_patch} use to complete the assembly of the forward -reference. This value is usually the contents of the program counter -after the branch instruction is compiled (which is accessible in the -@code{_jit.pc} variable). Let's see an example from the x86 back-end: +to @code{jit_patch} or @code{jit_patch_at} use to complete the assembly +of the forward reference. This value is usually the contents of the +program counter after the branch instruction is compiled (which is +accessible in the @code{_jit.pc} variable). Let's see an example from +the x86 back-end: @example #define jit_bmsr_i(label, s1, s2) \ @@ -374,7 +378,7 @@ the combination of a @code{TEST} instruction (bit-wise @sc{and} between the two operands) and a @code{JNZ} instruction (jump if non-zero). The macro then returns the final value of the program counter. -@code{jit_patch} is one of the few macros that need to possess a +@code{jit_patch_at} is one of the few macros that need to possess a knowledge of the machine's instruction formats. Its purpose is to patch a branch instruction (identified by the value returned at the moment the branch was compiled) to jump to the current position (that @@ -382,11 +386,11 @@ is, to the address identified by @code{_jit.pc}). On the x86, the displacement between the jump and the landing point is expressed as a 32-bit signed integer lying in the last four bytes of the -jump instruction. The definition of @code{_jit_patch} is: +jump instruction. The definition of @code{_jit_patch_at} is: @example -#define jit_patch(jump_pc) (*_PSL((jump_pc) - 4) = \ - _jit.pc - (jump_pc)) +#define jit_patch(jump_pc, pv) (*_PSL((jump_pc) - 4) = \ + (pv) - (jump_pc)) @end example The @code{_PSL} macro is nothing more than a cast to @code{long *}, @@ -394,42 +398,69 @@ and is used here to shorten the definition and avoid cluttering it with excessive parentheses. These type-cast macros are: @itemize @bullet -@bulletize @code{_PUC(X)} to cast to a @code{unsigned char *}. -@bulletize @code{_PUS(X)} to cast to a @code{unsigned short *}. -@bulletize @code{_PUI(X)} to cast to a @code{unsigned int *}. -@bulletize @code{_PSL(X)} to cast to a @code{long *}. -@bulletize @code{_PUL(X)} to cast to a @code{unsigned long *}. +@item +@code{_PUC(X)} to cast to a @code{unsigned char *}. + +@item +@code{_PUS(X)} to cast to a @code{unsigned short *}. + +@item +@code{_PUI(X)} to cast to a @code{unsigned int *}. + +@item +@code{_PSL(X)} to cast to a @code{long *}. + +@item +@code{_PUL(X)} to cast to a @code{unsigned long *}. @end itemize On other platforms, notably RISC ones, the displacement is embedded into -the instruction itself. In this case, @code{jit_patch} must first zero +the instruction itself. In this case, @code{jit_patch_at} must first zero out the field, and then @sc{or} in the correct displacement. The SPARC, for example, encodes the displacement in the bottom 22 bits; in addition the right-most two bits are suppressed, which are always zero because instruction have to be word-aligned. @example -#define jit_patch(delay_pc) jit_patch_ ( ((delay_pc) - 1) ) +#define jit_patch_at(delay_pc, pv) jit_patch_ (((delay_pc) - 1), (pv)) @rem{/* branch instructions return the address of the @emph{delay} * instruction---this is just a helper macro that makes the code more * readable. */} -#define jit_patch_(jump_pc) (*jump_pc = \ +#define jit_patch_(jump_pc, pv) (*jump_pc = \ (*jump_pc & ~_MASK(22)) | \ - ((_UL(_jit.pc) - _UL(jump_pc)) >> 2) & _MASK(22)) + ((_UL(pv) - _UL(jump_pc)) >> 2) & _MASK(22)) @end example This introduces more predefined shortcut macros: @itemize @bullet -@bulletize @code{_UC(X)} to cast to a @code{unsigned char}. -@bulletize @code{_US(X)} to cast to a @code{unsigned short}. -@bulletize @code{_UI(X)} to cast to a @code{unsigned int}. -@bulletize @code{_SL(X)} to cast to a @code{long}. -@bulletize @code{_UL(X)} to cast to a @code{unsigned long}. -@bulletize @code{_MASK(N)} gives a binary number made of N ones. +@item +@code{_UC(X)} to cast to a @code{unsigned char}. + +@item +@code{_US(X)} to cast to a @code{unsigned short}. + +@item +@code{_UI(X)} to cast to a @code{unsigned int}. + +@item +@code{_SL(X)} to cast to a @code{long}. + +@item +@code{_UL(X)} to cast to a @code{unsigned long}. + +@item +@code{_MASK(N)} gives a binary number made of N ones. @end itemize +Dual to branches and @code{jit_patch_at} are @code{jit_movi_p} +and @code{jit_patch_movi}, since they can also be used to implement +forward references. @code{jit_movi_p} should be carefully implemented +to use an encoding that is as long as possible, and it should return +an address which is then passed to @code{jit_patch_movi}. The +implementation of @code{jit_patch_movi} is similar to +@code{jit_patch_at}. @node Common features @section Common features supported by @file{core-common.h} @@ -448,14 +479,16 @@ avoids compiler warnings about redefined macros, but there should be no need to define them. They are: @example #define jit_extr_c_ui(d, rs) -#define jit_extr_i_ul(d, rs) #define jit_extr_s_ui(d, rs) +#define jit_extr_c_ul(d, rs) +#define jit_extr_s_ul(d, rs) +#define jit_extr_i_ul(d, rs) #define jit_negr_i(d, rs) #define jit_negr_l(d, rs) @end example @item Support for the @sc{abi} -Both @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not +All of @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not mandatory. If not defined, they will be defined respectively as an empty macro, as a synonym for @code{jit_prolog}, and as a synonym for @code{jit_calli}. Whether to define them or not in the port-specific @@ -471,8 +504,12 @@ and ``reverse subtraction'' (that is, REG2@math{=}IMM@math{-}REG1): @example #define jit_extr_c_i(d, rs) #define jit_extr_s_i(d, rs) +#define jit_extr_c_l(d, rs) +#define jit_extr_s_l(d, rs) +#define jit_extr_i_l(d, rs) #define jit_rsbi_i(d, rs, is) #define jit_rsbi_l(d, rs, is) +#define jit_rsbi_p(d, rs, is) @end example @item Conversion between network and host byte ordering @@ -510,7 +547,7 @@ unsigned integers is exactly the same as adding two signed integers @lightning{} provides both @code{jit_addr_i} and @code{jit_addr_ui} macros. Similarly, pointers and unsigned long integers behave in the same way, but @lightning{} has separate instruction for the two data -types---those that operate on pointers usually comprise a typecast +types---those that operate on pointers usually include a typecast that makes programs clearer. @item Shortcuts @@ -553,7 +590,7 @@ instruction to be scheduled in the delay slot with the branch instruction. The only parameter accepted by the macro is a call to a branch macro, which must be expanded @strong{exactly once} by @code{jit_fill_delay_after}. The client must be able to pass the -return value of @code{jit_fill_delay_after} to @code{jit_patch}. +return value of @code{jit_fill_delay_after} to @code{jit_patch_at}. There are two possible approaches that can be used in @code{jit_fill_delay_after}. They are summarized in the following @@ -701,9 +738,9 @@ in @file{core-common.h} (@pxref{Common features, , Common features supported by @file{core-common.h}}). @example -#define jit_prepare(numargs) (_jitl.pusharg = _Ro(numargs)) -#define jit_pusharg_i(rs) (--_jitl.pusharg, \ - MOVrr((rs), _jitl.pusharg)) +#define jit_prepare_i(numargs) (_jitl.pusharg = _Ro(numargs)) +#define jit_pusharg_i(rs) (--_jitl.pusharg, \ + MOVrr((rs), _jitl.pusharg)) @end example Remember that arguments pushing takes place in reverse order, thus @@ -759,18 +796,18 @@ epilog code. @code{jit_pusharg} uses a hardware push operation, which is commonly available on CISC machines (where this approach is most likely followed). Since the stack has to be cleaned up after the call, -@code{jit_prepare} remembers how many parameters have been put there, +@code{jit_prepare_i} remembers how many parameters have been put there, and @code{jit_finish} adjusts the stack pointer after the call. @example -#define jit_prepare(numargs) (_jitl.args += (numargs)) -#define jit_pusharg_i(rs) PUSHLr(rs) -#define jit_finish(sub) (jit_calli((sub)), \ - ADDLir(4 * _jitl.args, JIT_SP), \ - _jitl.numargs = 0) +#define jit_prepare_i(numargs) (_jitl.args += (numargs)) +#define jit_pusharg_i(rs) PUSHLr(rs) +#define jit_finish(sub) (jit_calli((sub)), \ + ADDLir(4 * _jitl.args, JIT_SP), \ + _jitl.numargs = 0) @end example -Note the usage of @code{+=} in @code{jit_prepare}. This is done +Note the usage of @code{+=} in @code{jit_prepare_i}. This is done so that one can defer the popping of the arguments that were saved on the stack (@dfn{stack pollution}). To do so, it is sufficient to use @code{jit_calli} instead of @code{jit_finish} in all but the @@ -823,12 +860,12 @@ operations: @table @b @item Register names (all mandatory but the last two) @example -#define JIT_R0 -#define JIT_R1 -#define JIT_R2 -#define JIT_V0 -#define JIT_V1 -#define JIT_V2 +#define JIT_R +#define JIT_R_NUM +#define JIT_V +#define JIT_V_NUM +#define JIT_FPR +#define JIT_FPR_NUM #define JIT_SP #define JIT_FP #define JIT_RZERO @@ -850,57 +887,81 @@ operations: #define jit_arg_ui() #define jit_arg_ul() #define jit_arg_us() +#define jit_abs_d(rd,rs) #define jit_addi_i(d, rs, is) +#define jit_addr_d(rd,s1,s2) #define jit_addr_i(d, s1, s2) #define jit_addxi_i(d, rs, is) #define jit_addxr_i(d, s1, s2) #define jit_andi_i(d, rs, is) #define jit_andr_i(d, s1, s2) #define jit_beqi_i(label, rs, is) +#define jit_beqr_d(label, s1, s2) #define jit_beqr_i(label, s1, s2) #define jit_bgei_i(label, rs, is) #define jit_bgei_ui(label, rs, is) +#define jit_bger_d(label, s1, s2) #define jit_bger_i(label, s1, s2) #define jit_bger_ui(label, s1, s2) #define jit_bgti_i(label, rs, is) #define jit_bgti_ui(label, rs, is) +#define jit_bgtr_d(label, s1, s2) #define jit_bgtr_i(label, s1, s2) #define jit_bgtr_ui(label, s1, s2) #define jit_blei_i(label, rs, is) #define jit_blei_ui(label, rs, is) +#define jit_bler_d(label, s1, s2) #define jit_bler_i(label, s1, s2) #define jit_bler_ui(label, s1, s2) +#define jit_bltgtr_d(label, s1, s2) #define jit_blti_i(label, rs, is) #define jit_blti_ui(label, rs, is) +#define jit_bltr_d(label, s1, s2) #define jit_bltr_i(label, s1, s2) #define jit_bltr_ui(label, s1, s2) -#define jit_boaddi_i(label, rs, is) -#define jit_boaddi_ui(label, rs, is) -#define jit_boaddr_i(label, s1, s2) -#define jit_boaddr_ui(label, s1, s2) -#define jit_bosubi_i(label, rs, is) -#define jit_bosubi_ui(label, rs, is) -#define jit_bosubr_i(label, s1, s2) -#define jit_bosubr_ui(label, s1, s2) #define jit_bmci_i(label, rs, is) #define jit_bmcr_i(label, s1, s2) #define jit_bmsi_i(label, rs, is) #define jit_bmsr_i(label, s1, s2) #define jit_bnei_i(label, rs, is) +#define jit_bner_d(label, s1, s2) #define jit_bner_i(label, s1, s2) +#define jit_boaddi_i(label, rs, is) +#define jit_boaddi_ui(label, rs, is) +#define jit_boaddr_i(label, s1, s2) +#define jit_boaddr_ui(label, s1, s2) +#define jit_bordr_d(label, s1, s2) +#define jit_bosubi_i(label, rs, is) +#define jit_bosubi_ui(label, rs, is) +#define jit_bosubr_i(label, s1, s2) +#define jit_bosubr_ui(label, s1, s2) +#define jit_buneqr_d(label, s1, s2) +#define jit_bunger_d(label, s1, s2) +#define jit_bungtr_d(label, s1, s2) +#define jit_bunler_d(label, s1, s2) +#define jit_bunltr_d(label, s1, s2) +#define jit_bunordr_d(label, s1, s2) #define jit_calli(label) +#define jit_callr(label) +#define jit_ceilr_d_i(rd, rs) #define jit_divi_i(d, rs, is) #define jit_divi_ui(d, rs, is) +#define jit_divr_d(rd,s1,s2) #define jit_divr_i(d, s1, s2) #define jit_divr_ui(d, s1, s2) #define jit_eqi_i(d, rs, is) +#define jit_eqr_d(d, s1, s2) #define jit_eqr_i(d, s1, s2) +#define jit_extr_i_d(rd, rs) +#define jit_floorr_d_i(rd, rs) #define jit_gei_i(d, rs, is) #define jit_gei_ui(d, s1, s2) +#define jit_ger_d(d, s1, s2) #define jit_ger_i(d, s1, s2) #define jit_ger_ui(d, s1, s2) #define jit_gti_i(d, rs, is) #define jit_gti_ui(d, s1, s2) +#define jit_gtr_d(d, s1, s2) #define jit_gtr_i(d, s1, s2) #define jit_gtr_ui(d, s1, s2) #define jit_hmuli_i(d, rs, is) @@ -909,61 +970,93 @@ operations: #define jit_hmulr_ui(d, s1, s2) #define jit_jmpi(label) #define jit_jmpr(reg) +#define jit_ldxi_f(rd, rs, is) +#define jit_ldxr_f(rd, s1, s2) #define jit_ldxi_c(d, rs, is) +#define jit_ldxi_d(rd, rs, is) #define jit_ldxi_i(d, rs, is) #define jit_ldxi_s(d, rs, is) #define jit_ldxi_uc(d, rs, is) #define jit_ldxi_us(d, rs, is) #define jit_ldxr_c(d, s1, s2) +#define jit_ldxr_d(rd, s1, s2) #define jit_ldxr_i(d, s1, s2) #define jit_ldxr_s(d, s1, s2) #define jit_ldxr_uc(d, s1, s2) #define jit_ldxr_us(d, s1, s2) #define jit_lei_i(d, rs, is) #define jit_lei_ui(d, s1, s2) +#define jit_ler_d(d, s1, s2) #define jit_ler_i(d, s1, s2) #define jit_ler_ui(d, s1, s2) #define jit_lshi_i(d, rs, is) #define jit_lshr_i(d, r1, r2) +#define jit_ltgtr_d(d, s1, s2) #define jit_lti_i(d, rs, is) #define jit_lti_ui(d, s1, s2) +#define jit_ltr_d(d, s1, s2) #define jit_ltr_i(d, s1, s2) #define jit_ltr_ui(d, s1, s2) #define jit_modi_i(d, rs, is) #define jit_modi_ui(d, rs, is) #define jit_modr_i(d, s1, s2) #define jit_modr_ui(d, s1, s2) +#define jit_movi_d(rd,immd) +#define jit_movi_f(rd,immf) #define jit_movi_i(d, is) +#define jit_movi_p(d, is) +#define jit_movr_d(rd,rs) #define jit_movr_i(d, rs) #define jit_muli_i(d, rs, is) #define jit_muli_ui(d, rs, is) +#define jit_mulr_d(rd,s1,s2) #define jit_mulr_i(d, s1, s2) #define jit_mulr_ui(d, s1, s2) +#define jit_negr_d(rd,rs) #define jit_nei_i(d, rs, is) +#define jit_ner_d(d, s1, s2) #define jit_ner_i(d, s1, s2) #define jit_nop() +#define jit_ordr_d(d, s1, s2) #define jit_ori_i(d, rs, is) #define jit_orr_i(d, s1, s2) -#define jit_patch(jump_pc) +#define jit_patch_at(jump_pc, value) +#define jit_patch_movi(jump_pc, value) #define jit_pop_i(rs) -#define jit_prepare(numargs) +#define jit_prepare_d(numargs) +#define jit_prepare_f(numargs) +#define jit_prepare_i(numargs) #define jit_push_i(rs) #define jit_pusharg_i(rs) #define jit_ret() #define jit_retval_i(rd) +#define jit_roundr_d_i(rd, rs) #define jit_rshi_i(d, rs, is) #define jit_rshi_ui(d, rs, is) #define jit_rshr_i(d, r1, r2) #define jit_rshr_ui(d, r1, r2) +#define jit_sqrt_d(rd,rs) #define jit_stxi_c(rd, id, rs) +#define jit_stxi_d(id, rd, rs) +#define jit_stxi_f(id, rd, rs) #define jit_stxi_i(rd, id, rs) #define jit_stxi_s(rd, id, rs) #define jit_stxr_c(d1, d2, rs) +#define jit_stxr_d(d1, d2, rs) +#define jit_stxr_f(d1, d2, rs) #define jit_stxr_i(d1, d2, rs) #define jit_stxr_s(d1, d2, rs) +#define jit_subr_d(rd,s1,s2) #define jit_subr_i(d, s1, s2) #define jit_subxi_i(d, rs, is) #define jit_subxr_i(d, s1, s2) +#define jit_truncr_d_i(rd, rs) +#define jit_uneqr_d(d, s1, s2) +#define jit_unger_d(d, s1, s2) +#define jit_ungtr_d(d, s1, s2) +#define jit_unler_d(d, s1, s2) +#define jit_unltr_d(d, s1, s2) +#define jit_unordr_d(d, s1, s2) #define jit_xori_i(d, rs, is) #define jit_xorr_i(d, s1, s2) @end example @@ -971,17 +1064,20 @@ operations: @item Non mandatory---there should be no need to define them: @example #define jit_extr_c_ui(d, rs) -#define jit_extr_i_ul(d, rs) #define jit_extr_s_ui(d, rs) +#define jit_extr_c_ul(d, rs) +#define jit_extr_s_ul(d, rs) +#define jit_extr_i_ul(d, rs) #define jit_negr_i(d, rs) #define jit_negr_l(d, rs) @end example @item Non mandatory---whether to define them depends on the @sc{abi}: @example -#define jit_prolog() -#define jit_finish() -#define jit_leaf() +#define jit_prolog(n) +#define jit_finish(sub) +#define jit_finishr(reg) +#define jit_leaf(n) #define jit_getarg_c(reg, ofs) #define jit_getarg_i(reg, ofs) #define jit_getarg_l(reg, ofs) @@ -991,12 +1087,17 @@ operations: #define jit_getarg_ui(reg, ofs) #define jit_getarg_ul(reg, ofs) #define jit_getarg_us(reg, ofs) +#define jit_getarg_f(reg, ofs) +#define jit_getarg_d(reg, ofs) @end example @item Non mandatory---define them if instructions that do this exist: @example #define jit_extr_c_i(d, rs) #define jit_extr_s_i(d, rs) +#define jit_extr_c_l(d, rs) +#define jit_extr_s_l(d, rs) +#define jit_extr_i_l(d, rs) #define jit_rsbi_i(d, rs, is) #define jit_rsbi_l(d, rs, is) @end example @@ -1037,6 +1138,14 @@ operations: #define jit_str_c(rd, rs) #define jit_str_i(rd, rs) #define jit_str_s(rd, rs) +#define jit_ldi_f(rd, is) +#define jit_sti_f(id, rs) +#define jit_ldi_d(rd, is) +#define jit_sti_d(id, rs) +#define jit_ldr_f(rd, rs) +#define jit_str_f(rd, rs) +#define jit_ldr_d(rd, rs) +#define jit_str_d(rd, rs) @end example @item Synonyms---don't define them: @@ -1085,14 +1194,20 @@ operations: #define jit_eqr_p(d, s1, s2) #define jit_eqr_ui(d, s1, s2) #define jit_eqr_ul(d, s1, s2) +#define jit_extr_c_s(d, rs) +#define jit_extr_c_us(d, rs) +#define jit_extr_uc_s(d, rs) +#define jit_extr_uc_us(d, rs) #define jit_extr_uc_i(d, rs) #define jit_extr_uc_ui(d, rs) -#define jit_extr_ui_l(d, rs) -#define jit_extr_ui_l(d, rs) -#define jit_extr_ui_ul(d, rs) -#define jit_extr_ui_ul(d, rs) #define jit_extr_us_i(d, rs) #define jit_extr_us_ui(d, rs) +#define jit_extr_uc_l(d, rs) +#define jit_extr_uc_ul(d, rs) +#define jit_extr_us_l(d, rs) +#define jit_extr_us_ul(d, rs) +#define jit_extr_ui_l(d, rs) +#define jit_extr_ui_ul(d, rs) #define jit_gei_p(d, rs, is) #define jit_ger_p(d, s1, s2) #define jit_gti_p(d, rs, is) @@ -1145,8 +1260,10 @@ operations: #define jit_retval_ui(rd) #define jit_retval_ul(rd) #define jit_retval_us(rd) +#define jit_rsbi_p(d, rs, is) #define jit_rsbi_ui(d, rs, is) #define jit_rsbi_ul(d, rs, is) +#define jit_rsbr_p(d, rs, is) #define jit_rsbr_ui(d, s1, s2) #define jit_rsbr_ul(d, s1, s2) #define jit_sti_p(d, is) @@ -1175,6 +1292,12 @@ operations: #define jit_subr_p(d, s1, s2) #define jit_subr_ui(d, s1, s2) #define jit_subr_ul(d, s1, s2) +#define jit_subxi_p(d, rs, is) +#define jit_subxi_ui(d, rs, is) +#define jit_subxi_ul(d, rs, is) +#define jit_subxr_p(d, s1, s2) +#define jit_subxr_ui(d, s1, s2) +#define jit_subxr_ul(d, s1, s2) #define jit_xori_ui(d, rs, is) #define jit_xori_ul(d, rs, is) #define jit_xorr_ui(d, s1, s2) @@ -1183,6 +1306,19 @@ operations: @item Shortcuts---don't define them: @example +#define JIT_R0 +#define JIT_R1 +#define JIT_R2 +#define JIT_V0 +#define JIT_V1 +#define JIT_V2 +#define JIT_FPR0 +#define JIT_FPR1 +#define JIT_FPR2 +#define JIT_FPR3 +#define JIT_FPR4 +#define JIT_FPR5 +#define jit_patch(jump_pc) #define jit_notr_c(d, rs) #define jit_notr_i(d, rs) #define jit_notr_l(d, rs) @@ -1191,12 +1327,61 @@ operations: #define jit_notr_ui(d, rs) #define jit_notr_ul(d, rs) #define jit_notr_us(d, rs) +#define jit_rsbr_d(d, s1, s2) #define jit_rsbr_i(d, s1, s2) #define jit_rsbr_l(d, s1, s2) #define jit_subi_i(d, rs, is) #define jit_subi_l(d, rs, is) @end example +@item Mandatory unless target arithmetic is always done in the same precision: +@example +#define jit_abs_f(rd,rs) +#define jit_addr_f(rd,s1,s2) +#define jit_beqr_f(label, s1, s2) +#define jit_bger_f(label, s1, s2) +#define jit_bgtr_f(label, s1, s2) +#define jit_bler_f(label, s1, s2) +#define jit_bltgtr_f(label, s1, s2) +#define jit_bltr_f(label, s1, s2) +#define jit_bner_f(label, s1, s2) +#define jit_bordr_f(label, s1, s2) +#define jit_buneqr_f(label, s1, s2) +#define jit_bunger_f(label, s1, s2) +#define jit_bungtr_f(label, s1, s2) +#define jit_bunler_f(label, s1, s2) +#define jit_bunltr_f(label, s1, s2) +#define jit_bunordr_f(label, s1, s2) +#define jit_ceilr_f_i(rd, rs) +#define jit_divr_f(rd,s1,s2) +#define jit_eqr_f(d, s1, s2) +#define jit_extr_d_f(rs, rd) +#define jit_extr_f_d(rs, rd) +#define jit_extr_i_f(rd, rs) +#define jit_floorr_f_i(rd, rs) +#define jit_ger_f(d, s1, s2) +#define jit_gtr_f(d, s1, s2) +#define jit_ler_f(d, s1, s2) +#define jit_ltgtr_f(d, s1, s2) +#define jit_ltr_f(d, s1, s2) +#define jit_movr_f(rd,rs) +#define jit_mulr_f(rd,s1,s2) +#define jit_negr_f(rd,rs) +#define jit_ner_f(d, s1, s2) +#define jit_ordr_f(d, s1, s2) +#define jit_roundr_f_i(rd, rs) +#define jit_rsbr_f(d, s1, s2) +#define jit_sqrt_f(rd,rs) +#define jit_subr_f(rd,s1,s2) +#define jit_truncr_f_i(rd, rs) +#define jit_uneqr_f(d, s1, s2) +#define jit_unger_f(d, s1, s2) +#define jit_ungtr_f(d, s1, s2) +#define jit_unler_f(d, s1, s2) +#define jit_unltr_f(d, s1, s2) +#define jit_unordr_f(d, s1, s2) +@end example + @item Mandatory if sizeof(long) != sizeof(int)---don't define them on other systems: @example #define jit_addi_l(d, rs, is) @@ -1241,6 +1426,12 @@ operations: #define jit_divr_ul(d, s1, s2) #define jit_eqi_l(d, rs, is) #define jit_eqr_l(d, s1, s2) +#define jit_extr_c_l(d, rs) +#define jit_extr_c_ul(d, rs) +#define jit_extr_s_l(d, rs) +#define jit_extr_s_ul(d, rs) +#define jit_extr_i_l(d, rs) +#define jit_extr_i_ul(d, rs) #define jit_gei_l(d, rs, is) #define jit_gei_ul(d, rs, is) #define jit_ger_l(d, s1, s2) diff --git a/doc/toc.texi b/doc/toc.texi index 8a3702291..56b2e1049 100644 --- a/doc/toc.texi +++ b/doc/toc.texi @@ -7,7 +7,6 @@ * Installation:: Configuring and installing GNU lightning * The instruction set:: The RISC instruction set used i GNU lightning * GNU lightning macros:: GNU lightning's macros -* Floating-point:: Doing floating point computations. * Reentrancy:: Re-entrant usage of GNU lightning * Autoconf support:: Using @code{autoconf} with GNU lightning @end ifset diff --git a/doc/using.texi b/doc/using.texi index a65a048e5..9a0a7c070 100644 --- a/doc/using.texi +++ b/doc/using.texi @@ -49,9 +49,14 @@ that closely match those of most existing RISC architectures, or that can be easily syntesized if absent. Each instruction is composed of: @itemize @bullet -@bulletize an operation (like @code{sub} or @code{mul}) -@bulletize sometimes, an register/immediate flag (@code{r} or @code{i}) -@bulletize a type identifier (occasionally, two) +@item +an operation, like @code{sub} or @code{mul} + +@item +sometimes, an register/immediate flag (@code{r} or @code{i}) + +@item +a type identifier or, occasionally, two @end itemize The second and third field are separated by an underscore; thus, @@ -75,6 +80,8 @@ following table together with the C types they represent: ui @r{unsigned int} l @r{long} ul @r{unsigned long} + f @r{float} + d @r{double} p @r{void *} @end example @@ -82,27 +89,31 @@ Some of these types may not be distinct: for example, (e.g., @code{l} is equivalent to @code{i} on 32-bit machines, and @code{p} is substantially equivalent to @code{ul}). -There are seven registers, of which six are general-purpose, while -the last is used to contain the stack pointer (@code{SP}). The -stack pointer can be used to allocate and access local variables -on the stack (which is supposed to grow downwards in memory on all -architectures). +There are at least seven integer registers, of which six are +general-purpose, while the last is used to contain the stack pointer +(@code{SP}). The stack pointer can be used to allocate and access local +variables on the stack (which is supposed to grow downwards in memory +on all architectures). -Of the six general-purpose registers, three are guaranteed to be +Of the general-purpose registers, at least three are guaranteed to be preserved across function calls (@code{V0}, @code{V1} and -@code{V2}) and three are not (@code{R0}, @code{R1} and -@code{R2}).@footnote{Six registers are not very much, but this +@code{V2}) and at least three are not (@code{R0}, @code{R1} and +@code{R2}). Six registers are not very much, but this restriction was forced by the need to target CISC architectures -which, like the x86, are poor of registers. Anyway, consider -that even on a RISC architecture you don't have many more registers -which are not devoted to function calls: on the SPARC, you have nine -(@code{%g1} and the eight registers @code{%l0} through @code{%l7}).} +which, like the x86, are poor of registers; anyway, backends can +specify the actual number of available caller- and callee-save +registers. In addition, there is a special @code{RET} register which contains the return value. You should always remember, however, that writing this register could overwrite either a general-purpose register or an incoming parameter, depending on the architecture. +There are at least six floating-point registers, named @code{FPR0} to +@code{FPR5}. These are separate from the integer registers on +all the supported architectures; on Intel architectures, the +register stack is mapped to a flat register file. + The complete instruction set follows; as you can see, most non-memory operations only take integers, long integers (either signed or unsigned) and pointers as operands; this was done in order to reduce @@ -113,61 +124,117 @@ signed and in an unsigned way. @table @b @item Binary ALU operations -These accept three operands, of which the last can be an immediate -value. @code{addx} operations must directly follow @code{addc}, and +These accept three operands; the last one can be an immediate +value for integer operands, or a register for all operand types. +@code{addx} operations must directly follow @code{addc}, and @code{subx} must follow @code{subc}; otherwise, results are undefined. @example -addr/addi i ui l ul p O1 = O2 + O3 -addxr/addxi i ui l ul O1 = O2 + (O3 + carry) -addcr/addci i ui l ul O1 = O2 + O3, set carry -subr/subi i ui l ul p O1 = O2 - O3 -subxr/subxi i ui l ul O1 = O2 - (O3 + carry) -subcr/subci i ui l ul O1 = O2 - O3, set carry -rsbr/rsbi i ui l ul p O1 = O3 - O2 -mulr/muli i ui l ul O1 = O2 * O3 -hmulr/hmuli i ui l ul O1 = @r{high bits of} O2 * O3 -divr/divi i ui l ul O1 = O2 / O3 -modr/modi i ui l ul O1 = O2 % O3 -andr/andi i ui l ul O1 = O2 & O3 -orr/ori i ui l ul O1 = O2 | O3 -xorr/xori i ui l ul O1 = O2 ^ O3 -lshr/lshi i ui l ul O1 = O2 << O3 -rshr/rshi i ui l ul O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.} +addr i ui l ul p f d O1 = O2 + O3 +addi i ui l ul p O1 = O2 + O3 +addxr i ui l ul O1 = O2 + (O3 + carry) +addxi i ui l ul O1 = O2 + (O3 + carry) +addcr i ui l ul O1 = O2 + O3, set carry +addci i ui l ul O1 = O2 + O3, set carry +subr i ui l ul p f d O1 = O2 - O3 +subi i ui l ul p O1 = O2 - O3 +subxr i ui l ul O1 = O2 - (O3 + carry) +subxi i ui l ul O1 = O2 - (O3 + carry) +subcr i ui l ul O1 = O2 - O3, set carry +subci i ui l ul O1 = O2 - O3, set carry +rsbr i ui l ul p f d O1 = O3 - O2 +rsbi i ui l ul p O1 = O3 - O2 +mulr i ui l ul f d O1 = O2 * O3 +muli i ui l ul O1 = O2 * O3 +hmulr i ui l ul O1 = @r{high bits of} O2 * O3 +hmuli i ui l ul O1 = @r{high bits of} O2 * O3 +divr i ui l ul f d O1 = O2 / O3 +divi i ui l ul O1 = O2 / O3 +modr i ui l ul O1 = O2 % O3 +modi i ui l ul O1 = O2 % O3 +andr i ui l ul O1 = O2 & O3 +andi i ui l ul O1 = O2 & O3 +orr i ui l ul O1 = O2 | O3 +ori i ui l ul O1 = O2 | O3 +xorr i ui l ul O1 = O2 ^ O3 +xori i ui l ul O1 = O2 ^ O3 +lshr i ui l ul O1 = O2 << O3 +lshi i ui l ul O1 = O2 << O3 +rshr i ui l ul O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.} +rshi i ui l ul O1 = O2 >> O3@footnote{The sign bit is propagated for signed types.} @end example @item Unary ALU operations These accept two operands, both of which must be registers. @example -negr i l O1 = -O2 -notr i ui l ul O1 = ~O2 +negr i l f d O1 = -O2 +notr i ui l ul O1 = ~O2 @end example @item Compare instructions -These accept three operands, of which the last can be an immediate -value. The last two operands are compared, and the first operand is -set to either 0 or 1, according to whether the given condition was -met or not. +These accept three operands; again, the last can be an immediate +value for integer data types. The last two operands are compared, +and the first operand is set to either 0 or 1, according to +whether the given condition was met or not. + +The conditions given below are for the standard behavior of C, +where the ``unordered'' comparison result is mapped to false. @example -ltr/lti i ui l ul p O1 = (O2 < O3) -ler/lei i ui l ul p O1 = (O2 <= O3) -gtr/gti i ui l ul p O1 = (O2 > O3) -ger/gei i ui l ul p O1 = (O2 >= O3) -eqr/eqi i ui l ul p O1 = (O2 == O3) -ner/nei i ui l ul p O1 = (O2 != O3) +ltr i ui l ul p f d O1 = (O2 < O3) +lti i ui l ul p O1 = (O2 < O3) +ler i ui l ul p f d O1 = (O2 <= O3) +lei i ui l ul p O1 = (O2 <= O3) +gtr i ui l ul p f d O1 = (O2 > O3) +gti i ui l ul p O1 = (O2 > O3) +ger i ui l ul p f d O1 = (O2 >= O3) +gei i ui l ul p O1 = (O2 >= O3) +eqr i ui l ul p f d O1 = (O2 == O3) +eqi i ui l ul p O1 = (O2 == O3) +ner i ui l ul p f d O1 = (O2 != O3) +nei i ui l ul p O1 = (O2 != O3) +unltr f d O1 = !(O2 >= O3) +unler f d O1 = !(O2 > O3) +ungtr f d O1 = !(O2 <= O3) +unger f d O1 = !(O2 < O3) +uneqr f d O1 = !(O2 < O3) && !(O2 > O3) +ltgtr f d O1 = !(O2 >= O3) || !(O2 <= O3) +ordr f d O1 = (O2 == O2) && (O3 == O3) +unordr f d O1 = (O2 != O2) || (O3 != O3) @end example @item Transfer operations These accept two operands; for @code{ext} both of them must be registers, while @code{mov} accepts an immediate value as the second -operand. @code{ext} needs @strong{two} data type specifications, of -which the first must be smaller in size than the second; for example -@code{extr_c_ui} is correct while @code{extr_ul_us} is not. +operand. + +Unlike @code{movr} and @code{movi}, the other instructions are applied +between operands of different data types, and they need @strong{two} +data type specifications. You can use @code{extr} to convert between +integer data types, in which case the first must be smaller in size +than the second; for example @code{extr_c_ui} is correct while +@code{extr_ul_us} is not. You can also use @code{extr} to convert +an integer to a floating point value: the only available possibilities +are @code{extr_i_f} and @code{extr_i_d}. The other instructions +convert a floating point value to an integer, so the possible +suffixes are @code{_f_i} and @code{_d_i}. + @example -movr/movi i ui l ul p O1 = O2 -extr c uc s us i ui l ul O1 = O2@footnote{Unlike @code{movr} and @code{movi}, @code{extr} is applied between operands of different sizes.} +movr i ui l ul p f d O1 = O2 +movi i ui l ul p f d O1 = O2 +extr c uc s us i ui l ul f d O1 = O2 +roundr i f d O1 = round(O2) +truncr i f d O1 = trunc(O2) +floorr i f d O1 = floor(O2) +ceilr i f d O1 = ceil(O2) @end example +Note that the order of the arguments is @emph{destination first, +source second} as for all other @lightning{} instructions, but +the order of the types is always reversed with respect to that +of the arguments: @emph{shorter}---source---@emph{first, +longer}---destination---@emph{second}. This happens for historical +reasons. + @item Network extensions These accept two operands, both of which must be registers; these two instructions actually perform the same task, yet they are @@ -185,8 +252,10 @@ in both cases, the last can be either a register or an immediate value. Values are extended (with or without sign, according to the data type specification) to fit a whole register. @example -ldr/ldi c uc s us i ui l ul p O1 = *O2 -ldxr/ldxi c uc s us i ui l ul p O1 = *(O2+O3) +ldr c uc s us i ui l ul p f d O1 = *O2 +ldi c uc s us i ui l ul p f d O1 = *O2 +ldxr c uc s us i ui l ul p f d O1 = *(O2+O3) +ldxi c uc s us i ui l ul p f d O1 = *(O2+O3) @end example @item Store operations @@ -194,8 +263,10 @@ ldxr/ldxi c uc s us i ui l ul p O1 = *(O2+O3) both cases, the first can be either a register or an immediate value. Values are sign-extended to fit a whole register. @example -str/sti c uc s us i ui l ul p *O1 = O2 -stxr/stxi c uc s us i ui l ul p *(O1+O2) = O3 +str c uc s us i ui l ul p f d *O1 = O2 +sti c uc s us i ui l ul p f d *O1 = O2 +stxr c uc s us i ui l ul p f d *(O1+O2) = O3 +stxi c uc s us i ui l ul p f d *(O1+O2) = O3 @end example @item Stack management @@ -203,26 +274,27 @@ These accept a single register parameter. These operations are not guaranteed to be efficient on all architectures. @example -pushr i ui l ul p @r{push }O1@r{ on the stack} -popr i ui l ul p @r{pop }O1@r{ off the stack} +pushr i ui l ul p @r{push }O1@r{ on the stack} +popr i ui l ul p @r{pop }O1@r{ off the stack} @end example @item Argument management These are: @example -prepare (not specified) -pusharg c uc s us i ui l ul p -getarg c uc s us i ui l ul p -arg c uc s us i ui l ul p +prepare i f d +pusharg c uc s us i ui l ul p f d +getarg c uc s us i ui l ul p f d +arg c uc s us i ui l ul p f d @end example Of these, the first two are used by the caller, while the last two are used by the callee. A code snippet that wants to call another procedure and has to pass registers must, in order: use the @code{prepare} instruction, giving the number of arguments to -be passed to the procedure; use @code{pusharg} to push the arguments -@strong{in reverse order}; and use @code{calli} or @code{finish} -(explained below) to perform the actual call. +be passed to the procedure (once for each data type); use +@code{pusharg} to push the arguments @strong{in reverse order}; +and use @code{calli} or @code{finish} (explained below) to +perform the actual call. @code{arg} and @code{getarg} are used by the callee. @code{arg} is different from other instruction in that it does not @@ -269,18 +341,36 @@ is to be used to compile forward branches as explained in destination of the branch and two operands to be compared; of these, the last can be either a register or an immediate. They are: @example -bltr/blti i ui l ul p @r{if }O2 < O3@r{ goto }O1 -bler/blei i ui l ul p @r{if }O2 <= O3@r{ goto }O1 -bgtr/bgti i ui l ul p @r{if }O2 > O3@r{ goto }O1 -bger/bgei i ui l ul p @r{if }O2 >= O3@r{ goto }O1 -beqr/beqi i ui l ul p @r{if }O2 == O3@r{ goto }O1 -bner/bnei i ui l ul p @r{if }O2 != O3@r{ goto }O1 +bltr i ui l ul p f d @r{if }(O2 < O3)@r{ goto }O1 +blti i ui l ul p @r{if }(O2 < O3)@r{ goto }O1 +bler i ui l ul p f d @r{if }(O2 <= O3)@r{ goto }O1 +blei i ui l ul p @r{if }(O2 <= O3)@r{ goto }O1 +bgtr i ui l ul p f d @r{if }(O2 > O3)@r{ goto }O1 +bgti i ui l ul p @r{if }(O2 > O3)@r{ goto }O1 +bger i ui l ul p f d @r{if }(O2 >= O3)@r{ goto }O1 +bgei i ui l ul p @r{if }(O2 >= O3)@r{ goto }O1 +beqr i ui l ul p f d @r{if }(O2 == O3)@r{ goto }O1 +beqi i ui l ul p @r{if }(O2 == O3)@r{ goto }O1 +bner i ui l ul p f d @r{if }(O2 != O3)@r{ goto }O1 +bnei i ui l ul p @r{if }(O2 != O3)@r{ goto }O1 -bmsr/bmsi i ui l ul @r{if }O2 & O3@r{ goto }O1 -bmcr/bmci i ui l ul @r{if }!(O2 & O3)@r{ goto }O1@footnote{These two mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.} +bunltr f d @r{if }!(O2 >= O3)@r{ goto }O1 +bunler f d @r{if }!(O2 > O3)@r{ goto }O1 +bungtr f d @r{if }!(O2 <= O3)@r{ goto }O1 +bunger f d @r{if }!(O2 < O3)@r{ goto }O1 +buneqr f d @r{if }!(O2 < O3) && !(O2 > O3)@r{ goto }O1 +bltgtr f d @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1 +bordr f d @r{if } (O2 == O2) && (O3 == O3)@r{ goto }O1 +bunordr f d @r{if }!(O2 != O2) || (O3 != O3)@r{ goto }O1 -boaddr/boaddi i ui l ul O2 += O3@r{, goto }O1@r{ on overflow} -bosubr/bosubi i ui l ul O2 -= O3@r{, goto }O1@r{ on overflow} +bmsr i ui l ul @r{if }O2 & O3@r{ goto }O1 +bmsi i ui l ul @r{if }O2 & O3@r{ goto }O1 +bmcr i ui l ul @r{if }!(O2 & O3)@r{ goto }O1 +bmci i ui l ul @r{if }!(O2 & O3)@r{ goto }O1@footnote{These mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask cleared}.} +boaddr i ui l ul O2 += O3@r{, goto }O1@r{ on overflow} +boaddi i ui l ul O2 += O3@r{, goto }O1@r{ on overflow} +bosubr i ui l ul O2 -= O3@r{, goto }O1@r{ on overflow} +bosubi i ui l ul O2 -= O3@r{, goto }O1@r{ on overflow} @end example @item Jump and return operations @@ -291,14 +381,16 @@ and the former must @strong{always} follow a @code{prepare} instruction. Results are undefined when using function calls in a leaf function. @example -calli (not specified) @r{function call to O1} -finish (not specified) @r{function call to O1} -jmpi/jmpr (not specified) @r{unconditional jump to O1} -prolog (not specified) @r{function prolog for O1 args} -leaf (not specified) @r{the same for leaf functions} -ret (not specified) @r{return from subroutine} -retval c uc s us i ui l ul p @r{move return value} - @r{to register} +calli (not specified) @r{function call to O1} +callr (not specified) @r{function call to a register} +finish (not specified) @r{function call to O1} +finishr (not specified) @r{function call to a register} +jmpi/jmpr (not specified) @r{unconditional jump to O1} +prolog (not specified) @r{function prolog for O1 args} +leaf (not specified) @r{the same for leaf functions} +ret (not specified) @r{return from subroutine} +retval c uc s us i ui l ul p f d @r{move return value} + @r{to register} @end example Like branch instruction, @code{jmpi} also returns a value which is to @@ -353,7 +445,7 @@ between parentheses, just like with every other @sc{cpp} macro. This small tutorial presents three examples: -@ifset ISTEX +@iftex @itemize @bullet @item The @code{incr} function found in @ref{The instruction set, , @@ -368,15 +460,15 @@ An RPN calculator. @item Fibonacci numbers @end itemize -@end ifset -@ifclear ISTEX +@end iftex +@ifnottex @menu * incr:: A function which increments a number by one * printf:: A simple function call to printf * RPN calculator:: A more complex example, an RPN calculator * Fibonacci:: Calculating Fibonacci numbers @end menu -@end ifclear +@end ifnottex @node incr @section A function which increments a number by one @@ -931,8 +1023,23 @@ instruction; otherwise, it emits the delay instruction before the branch instruction. The delay instruction must not depend on being executed before or after the branch. -@node Floating-point -@chapter Doing floating point computations +Instead of @code{jit_patch}, you can use @code{jit_patch_at}, which +takes two arguments: the first is the same as for @code{jit_patch}, and +the second is the valued to be patched in. In other words, these two +invocations have the same effect: + +@example + jit_patch (jump_pc); + jit_patch_at (jump_pc, jit_get_ip ()); +@end example + +Dual to branches and @code{jit_patch_at} are @code{jit_movi_p} +and @code{jit_patch_movi}, which can also be used to implement +forward references. @code{jit_movi_p} is carefully implemented +to use an encoding that is as long as possible, so that it can +always be patched; in addition, like branches, it will return +an address which is then passed to @code{jit_patch_movi}. The +usage of @code{jit_patch_movi} is similar to @code{jit_patch_at}. @node Reentrancy @chapter Re-entrant usage of @lightning{} @@ -1040,6 +1147,22 @@ extern void _opt_muli_i(struct jit_state *, int, int, int); @end example +@section Registers +@chapter Accessing the whole register file + +As mentioned earlier in this chapter, all @lightning{} back-ends +are guaranteed to have at least six integer registers and six +floating-point registers, but many back-ends will have more. + +To access the entire register files, you can use the +@code{JIT_R}, @code{JIT_V} and @code{JIT_FPR} macros. They +accept a parameter that identifies the register number, which +must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM} +and @code{JIT_FPR_NUM} respectively; the number need not be +constant. Of course, expressions like @code{JIT_R0} and +@code{JIT_R(0)} denote the same register, and likewise for +integer callee-saved, or floating-point, registers. + @node Autoconf support @chapter Using @code{autoconf} with @lightning{} diff --git a/lightning-inst.h b/lightning-inst.h index 5cf1af2ca..96f41a416 100644 --- a/lightning-inst.h +++ b/lightning-inst.h @@ -39,7 +39,6 @@ extern "C" { #endif #include -#include #ifndef LIGHTNING_DEBUG #include @@ -48,11 +47,9 @@ extern "C" { #include #include #include +#include #include - -#ifdef jit_cmp #include -#endif #ifndef JIT_R0 #error GNU lightning does not support the current target diff --git a/lightning.h.in b/lightning.h.in index fd1a4d79c..78367f93c 100644 --- a/lightning.h.in +++ b/lightning.h.in @@ -62,7 +62,6 @@ extern "C" { #endif #include -#include #ifndef LIGHTNING_DEBUG #include @@ -71,11 +70,9 @@ extern "C" { #include #include #include +#include #include - -#ifdef jit_cmp #include -#endif #ifdef LIGHTNING_DISASSEMBLE extern void disassemble(FILE *stream, char *from, char *to); diff --git a/lightning/Makefile.am b/lightning/Makefile.am index d032e2a89..d02dd2950 100644 --- a/lightning/Makefile.am +++ b/lightning/Makefile.am @@ -12,5 +12,5 @@ dist_pkgdata_DATA = Makefile.am nobase_dist_lightning_HEADERS = $(LIGHTNING_FILES) nodist_lightning_HEADERS = asm.h core.h funcs.h fp.h else -dist_noinst_HEADERS = $(LIGHTNING_FILES) lightning.h +dist_noinst_HEADERS = $(LIGHTNING_FILES) endif diff --git a/lightning/asm-common.h b/lightning/asm-common.h index b0a11948b..42c8814a9 100644 --- a/lightning/asm-common.h +++ b/lightning/asm-common.h @@ -88,12 +88,11 @@ typedef unsigned int _ui; typedef long _sl; typedef unsigned long _ul; -#define _jit_UC(X) ((_uc )(X)) -#define _jit_US(X) ((_us )(X)) -#define _jit_UI(X) ((_ui )(X)) -#define _jit_SL(X) ((_sl )(X)) -#define _jit_UL(X) ((_ul )(X)) - +#define _jit_UC(X) ((_uc )(X)) +#define _jit_US(X) ((_us )(X)) +#define _jit_UI(X) ((_ui )(X)) +#define _jit_SL(X) ((_sl )(X)) +#define _jit_UL(X) ((_ul )(X)) # define _PUC(X) ((_uc *)(X)) # define _PUS(X) ((_us *)(X)) # define _PUI(X) ((_ui *)(X)) @@ -104,6 +103,7 @@ typedef unsigned long _ul; #define _jit_W(W) _jit_UL(((*_jit.x.us_pc++)= _jit_US((W)&0xffff))) #define _jit_I(I) _jit_UL(((*_jit.x.ui_pc++)= _jit_UI((I) ))) #define _jit_L(L) _jit_UL(((*_jit.x.ul_pc++)= _jit_UL((L) ))) +#define _jit_I_noinc(I) _jit_UL(((*_jit.x.ui_pc)= _jit_UI((I) ))) #define _MASK(N) ((unsigned)((1<<(N)))-1) #define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N))) diff --git a/lightning/core-common.h b/lightning/core-common.h index 23b474706..9310ee23f 100644 --- a/lightning/core-common.h +++ b/lightning/core-common.h @@ -45,14 +45,24 @@ typedef struct { struct jit_local_state jitl; } jit_state; +#ifdef jit_init +static jit_state _jit = jit_init (); +#else static jit_state _jit; +#endif #define JIT_NOREG (-1) +#define JIT_R0 JIT_R(0) +#define JIT_R1 JIT_R(1) +#define JIT_R2 JIT_R(2) +#define JIT_V0 JIT_V(0) +#define JIT_V1 JIT_V(1) +#define JIT_V2 JIT_V(2) #define _jitl _jit.jitl #define jit_get_ip() (*(jit_code *) &_jit.x.pc) -#define jit_set_ip(ptr) (_jit.x.pc = (jit_insn *) ptr, jit_get_ip()) +#define jit_set_ip(ptr) (_jit.x.pc = (ptr), jit_get_ip ()) #define jit_get_label() (_jit.x.pc) #define jit_forward() (_jit.x.pc) @@ -138,16 +148,24 @@ typedef union jit_code { #define jit_subci_ul(d, rs, is) jit_subci_l((d), (rs), (is)) #define jit_subcr_ul(d, s1, s2) jit_subcr_l((d), (s1), (s2)) #define jit_subxi_ui(d, rs, is) jit_subxi_i((d), (rs), (is)) +#define jit_subxi_ul(d, rs, is) jit_subxi_l((d), (rs), (is)) #define jit_subxr_ui(d, s1, s2) jit_subxr_i((d), (s1), (s2)) +#define jit_subxr_ul(d, s1, s2) jit_subxr_i((d), (s1), (s2)) #define jit_xori_ul(d, rs, is) jit_xori_l((d), (rs), (is)) #define jit_xorr_ul(d, s1, s2) jit_xorr_l((d), (s1), (s2)) #define jit_addr_p(d, s1, s2) jit_addr_ul((d), (s1), (s2)) #define jit_addi_p(d, rs, is) jit_addi_ul((d), (rs), (long) (is)) #define jit_movr_p(d, rs) jit_movr_ul((d), (rs)) -#define jit_movi_p(d, is) jit_movi_ul((d), (long) (is)) #define jit_subr_p(d, s1, s2) jit_subr_ul((d), (s1), (s2)) #define jit_subi_p(d, rs, is) jit_subi_ul((d), (rs), (long) (is)) +#define jit_rsbi_p(d, rs, is) jit_rsbi_ul((d), (rs), (long) (is)) + +#ifndef jit_movi_p +#define jit_movi_p(d, is) (jit_movi_ul((d), (long) (is)), _jit.x.pc) +#endif + +#define jit_patch(pv) jit_patch_at ((pv), (_jit.x.pc)) #ifndef jit_addci_i #define jit_addci_i(d, rs, is) jit_addi_i((d), (rs), (is)) @@ -190,8 +208,11 @@ typedef union jit_code { #define jit_subi_l(d, rs, is) jit_addi_l((d), (rs), -(is)) #define jit_subci_i(d, rs, is) jit_addci_i((d), (rs), -(is)) #define jit_subci_l(d, rs, is) jit_addci_l((d), (rs), -(is)) +#define jit_rsbr_f(d, s1, s2) jit_subr_f((d), (s2), (s1)) +#define jit_rsbr_d(d, s1, s2) jit_subr_d((d), (s2), (s1)) #define jit_rsbr_i(d, s1, s2) jit_subr_i((d), (s2), (s1)) #define jit_rsbr_l(d, s1, s2) jit_subr_l((d), (s2), (s1)) +#define jit_rsbr_p(d, s1, s2) jit_subr_p((d), (s2), (s1)) /* Unary */ #define jit_notr_c(d, rs) jit_xori_c((d), (rs), 255) @@ -216,23 +237,43 @@ typedef union jit_code { #define jit_extr_s_i(d, rs) (jit_lshi_i((d), (rs), 16), jit_rshi_i((d), (d), 16)) #endif +#ifdef jit_addi_l /* sizeof(long) != sizeof(int) */ +#ifndef jit_extr_c_l +#define jit_extr_c_l(d, rs) (jit_lshi_l((d), (rs), 56), jit_rshi_l((d), (d), 56)) +#endif +#ifndef jit_extr_s_l +#define jit_extr_s_l(d, rs) (jit_lshi_l((d), (rs), 48), jit_rshi_l((d), (d), 48)) +#endif +#ifndef jit_extr_i_l +#define jit_extr_i_l(d, rs) (jit_lshi_l((d), (rs), 32), jit_rshi_l((d), (d), 32)) +#endif +#ifndef jit_extr_c_ul +#define jit_extr_c_ul(d, rs) jit_andi_l((d), (rs), 0xFF) +#endif +#ifndef jit_extr_s_ul +#define jit_extr_s_ul(d, rs) jit_andi_l((d), (rs), 0xFFFF) +#endif +#ifndef jit_extr_i_ul +#define jit_extr_i_ul(d, rs) jit_andi_l((d), (rs), 0xFFFFFFFFUL) +#endif +#endif +#define jit_extr_c_s(d, rs) jit_extr_c_i((d), (rs)) +#define jit_extr_c_us(d, rs) jit_extr_c_ui((d), (rs)) +#define jit_extr_uc_s(d, rs) jit_extr_uc_i((d), (rs)) +#define jit_extr_uc_us(d, rs) jit_extr_uc_ui((d), (rs)) #define jit_extr_uc_i(d, rs) jit_extr_c_ui((d), (rs)) #define jit_extr_uc_ui(d, rs) jit_extr_c_ui((d), (rs)) #define jit_extr_us_i(d, rs) jit_extr_s_ui((d), (rs)) #define jit_extr_us_ui(d, rs) jit_extr_s_ui((d), (rs)) - -#ifndef jit_extr_i_ul -#ifdef jit_addi_l /* sizeof(long) != sizeof(int) */ -#define jit_extr_i_ul(d, rs) jit_andi_ui((d), (rs), 0xFF) -#else /* sizeof(long) == sizeof(int) */ -#define jit_extr_i_ul(d, rs) jit_movr_i(d, rs) -#endif /* sizeof(long) == sizeof(int) */ -#endif - +#define jit_extr_uc_l(d, rs) jit_extr_c_ul((d), (rs)) +#define jit_extr_uc_ul(d, rs) jit_extr_c_ul((d), (rs)) +#define jit_extr_us_l(d, rs) jit_extr_s_ul((d), (rs)) +#define jit_extr_us_ul(d, rs) jit_extr_s_ul((d), (rs)) #define jit_extr_ui_l(d, rs) jit_extr_i_ul((d), (rs)) #define jit_extr_ui_ul(d, rs) jit_extr_i_ul((d), (rs)) + /* NTOH/HTON is not mandatory for big endian architectures */ #ifndef jit_ntoh_ui /* big endian */ #define jit_ntoh_ui(d, rs) ((d) == (rs) ? (void)0 : jit_movr_i((d), (rs))) @@ -251,7 +292,7 @@ typedef union jit_code { #define jit_pushr_p(rs) jit_pushr_ul(rs) #define jit_popr_p(rs) jit_popr_ul(rs) -#define jit_prepare(nint) jitfp_prepare((nint), 0, 0) +#define jit_prepare(nint) jit_prepare_i((nint)) #define jit_pusharg_c(rs) jit_pusharg_i(rs) #define jit_pusharg_s(rs) jit_pusharg_i(rs) #define jit_pusharg_uc(rs) jit_pusharg_i(rs) @@ -388,10 +429,17 @@ typedef union jit_code { #define jit_retval_c(rd) jit_retval_i((rd)) #define jit_retval_s(rd) jit_retval_i((rd)) +/* This was a bug, but we keep it. */ +#define jit_retval(rd) jit_retval_i ((rd)) + #ifndef jit_finish #define jit_finish(sub) jit_calli(sub) #endif +#ifndef jit_finishr +#define jit_finishr(reg) jit_callr(reg) +#endif + #ifndef jit_prolog #define jit_prolog(numargs) #endif @@ -412,15 +460,15 @@ typedef union jit_code { #define jit_getarg_ul(reg, ofs) jit_extr_uc_ul((reg), (ofs)) #define jit_getarg_us(reg, ofs) jit_extr_us_ul((reg), (ofs)) #else -#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs)); -#define jit_getarg_uc(reg, ofs) jit_ldxi_uc((reg), JIT_FP, (ofs)); -#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs)); -#define jit_getarg_us(reg, ofs) jit_ldxi_us((reg), JIT_FP, (ofs)); -#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs)); -#define jit_getarg_ui(reg, ofs) jit_ldxi_ui((reg), JIT_FP, (ofs)); -#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs)); -#define jit_getarg_ul(reg, ofs) jit_ldxi_ul((reg), JIT_FP, (ofs)); -#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs)); +#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs)); +#define jit_getarg_uc(reg, ofs) jit_ldxi_uc((reg), JIT_FP, (ofs)); +#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs)); +#define jit_getarg_us(reg, ofs) jit_ldxi_us((reg), JIT_FP, (ofs)); +#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs)); +#define jit_getarg_ui(reg, ofs) jit_ldxi_ui((reg), JIT_FP, (ofs)); +#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs)); +#define jit_getarg_ul(reg, ofs) jit_ldxi_ul((reg), JIT_FP, (ofs)); +#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs)); #endif #endif @@ -474,6 +522,14 @@ typedef union jit_code { #define jit_rshi_ul(d, rs, is) jit_rshi_ui((d), (rs), (is)) #define jit_rshr_ul(d, s1, s2) jit_rshr_ui((d), (s1), (s2)) +/* Sign/Zero extension */ +#define jit_extr_c_l(d, rs) jit_extr_c_i(d, rs) +#define jit_extr_c_ul(d, rs) jit_extr_c_ui(d, rs) +#define jit_extr_s_l(d, rs) jit_extr_s_i(d, rs) +#define jit_extr_s_ul(d, rs) jit_extr_s_ui(d, rs) +#define jit_extr_i_l(d, rs) jit_movr_i(d, rs) +#define jit_extr_i_ul(d, rs) jit_movr_i(d, rs) + /* Unary */ #define jit_movi_l(d, rs) jit_movi_i((d), (rs)) #define jit_movr_l(d, rs) jit_movr_i((d), (rs)) diff --git a/lightning/fp-common.h b/lightning/fp-common.h index de25fbba8..907fdc4d9 100644 --- a/lightning/fp-common.h +++ b/lightning/fp-common.h @@ -29,232 +29,58 @@ * ***********************************************************************/ -struct jit_fp { - char kind; - char subkind; - union { - struct { - int displ; - char reg1; - char reg2; - } addr; - union { - double number; - long split[sizeof(double) / sizeof(long)]; - } imm; - struct { - struct jit_fp *lhs, *rhs; - } ops; - } d; -}; - -#ifdef jit_trunc - -enum { JIT_NULL, /* unused */ - - JIT_CMP, JIT_FLOOR, JIT_CEIL, JIT_ROUND, JIT_TRUNC, /* integer */ - - JIT_XI, JIT_ADD, JIT_XR, JIT_SUB, /* subkinds */ - JIT_I, JIT_MUL, JIT_R, JIT_DIV, - JIT_INT, - - JIT_ABS, JIT_SIN, JIT_COS, JIT_TAN, JIT_ATN, /* functions */ - JIT_EXP, JIT_LOG, JIT_NEG, JIT_SQRT, - - JIT_OP, JIT_FN, JIT_LD, JIT_IMM }; /* kinds */ - -/* Declarations */ - -static void _jit_emit(jit_state *, struct jit_fp *, - int, int, int, int) JIT_UNUSED; -static struct jit_fp *_jit_op(struct jit_fp *, int, - struct jit_fp *, struct jit_fp *) JIT_UNUSED; -static struct jit_fp *_jit_ld(struct jit_fp *, int, - int, int) JIT_UNUSED; -static struct jit_fp *_jit_fn(struct jit_fp *, int, - struct jit_fp *) JIT_UNUSED; -static struct jit_fp *_jit_imm(struct jit_fp *, double) JIT_UNUSED; - -/* Internal function to walk the tree */ - -void -_jit_emit(jit_state *jit, struct jit_fp *head, - int store_kind, int store1, int store2, int reg0) -{ -#define _jit (*jit) - switch (head->kind) { - case JIT_OP: - _jit_emit(jit, head->d.ops.lhs, JIT_NULL, 0, 0, reg0); - _jit_emit(jit, head->d.ops.rhs, JIT_NULL, 0, 0, reg0 + 1); - switch (head->subkind) { - case JIT_ADD: jit_add_two(reg0); break; - case JIT_SUB: jit_sub_two(reg0); break; - case JIT_MUL: jit_mul_two(reg0); break; - case JIT_DIV: jit_div_two(reg0); break; - } - break; - - case JIT_IMM: -#ifdef JIT_LONG_IS_INT - jit_fpimm(reg0, head->d.imm.split[0], head->d.imm.split[1]); -#else - jit_fpimm(reg0, head->d.imm.split[0]); -#endif - break; - - case JIT_FN: - _jit_emit(jit, head->d.ops.lhs, JIT_NULL, 0, 0, reg0); - switch (head->subkind) { - case JIT_ABS: jit_abs(reg0); break; - case JIT_NEG: jit_neg(reg0); break; -#ifdef JIT_TRANSCENDENTAL - case JIT_SIN: jit_sin(reg0); break; - case JIT_SQRT: jit_sqrt(reg0); break; - case JIT_COS: jit_cos(reg0); break; - case JIT_TAN: jit_tan(reg0); break; - case JIT_ATN: jit_atn(reg0); break; - case JIT_EXP: jit_exp(reg0); break; - case JIT_LOG: jit_log(reg0); break; -#endif - } - break; - - case JIT_LD: - switch (head->subkind) { - case JIT_INT: jit_exti_d(reg0, head->d.addr.reg1); break; - case JIT_XI: jit_ldxi_f(reg0, head->d.addr.reg1, head->d.addr.displ); break; - case JIT_XR: jit_ldxr_f(reg0, head->d.addr.reg1, head->d.addr.reg2); break; - case JIT_XI | 1: jit_ldxi_d(reg0, head->d.addr.reg1, head->d.addr.displ); break; - case JIT_XR | 1: jit_ldxr_d(reg0, head->d.addr.reg1, head->d.addr.reg2); break; -#ifndef JIT_RZERO - case JIT_I: jit_ldi_f(reg0, head->d.addr.displ); break; - case JIT_R: jit_ldr_f(reg0, head->d.addr.reg1); break; - case JIT_I | 1: jit_ldi_d(reg0, head->d.addr.displ); break; - case JIT_R | 1: jit_ldr_d(reg0, head->d.addr.reg1); break; -#endif - } - break; - } - - switch (store_kind) { - case JIT_FLOOR: jit_floor(store1, reg0); break; - case JIT_CEIL: jit_ceil(store1, reg0); break; - case JIT_TRUNC: jit_trunc(store1, reg0); break; - case JIT_ROUND: jit_round(store1, reg0); break; - case JIT_CMP: jit_cmp(store1, store2, reg0); break; - case JIT_XI: jit_stxi_f(store2, store1, reg0); break; - case JIT_XR: jit_stxr_f(store2, store1, reg0); break; - case JIT_XI | 1: jit_stxi_d(store2, store1, reg0); break; - case JIT_XR | 1: jit_stxr_d(store2, store1, reg0); break; -#ifndef JIT_RZERO - case JIT_I: jit_sti_f(store2, reg0); break; - case JIT_R: jit_str_f(store2, reg0); break; - case JIT_I | 1: jit_sti_d(store2, reg0); break; - case JIT_R | 1: jit_str_d(store2, reg0); break; -#endif - case JIT_NULL: break; - } -#undef _jit -} - -/* Internal functions to build the tree */ - -struct jit_fp * -_jit_op(struct jit_fp *where, int which, - struct jit_fp *op1, struct jit_fp *op2) -{ - where->kind = JIT_OP; - where->subkind = which; - where->d.ops.lhs = op1; - where->d.ops.rhs = op2; - return (where); -} - -struct jit_fp * -_jit_ld(struct jit_fp *where, int which, int op1, int op2) -{ - where->kind = JIT_LD; - where->subkind = which; - switch (which & ~1) { - case JIT_XI: where->d.addr.reg1 = op1; - case JIT_I: where->d.addr.displ = op2; break; - case JIT_XR: where->d.addr.reg2 = op2; - case JIT_INT: - case JIT_R: where->d.addr.reg1 = op1; break; - } - return (where); -} - -struct jit_fp * -_jit_fn(struct jit_fp *where, int which, struct jit_fp *op1) -{ - where->kind = JIT_FN; - where->subkind = which; - where->d.ops.lhs = op1; - return (where); -} - -struct jit_fp * -_jit_imm(struct jit_fp *where, double number) -{ - where->kind = JIT_IMM; - where->d.imm.number = number; - return (where); -} - -#define jitfp_begin(buf) (_jit.fp = (buf), --_jit.fp) -#define jitfp_add(op1, op2) _jit_op(++_jit.fp, JIT_ADD, (op1), (op2)) -#define jitfp_sub(op1, op2) _jit_op(++_jit.fp, JIT_SUB, (op1), (op2)) -#define jitfp_mul(op1, op2) _jit_op(++_jit.fp, JIT_MUL, (op1), (op2)) -#define jitfp_div(op1, op2) _jit_op(++_jit.fp, JIT_DIV, (op1), (op2)) -#define jitfp_imm(imm) _jit_imm(++_jit.fp, (imm)) -#define jitfp_exti_d(reg1) _jit_ld(++_jit.fp, JIT_INT, (reg1), 0) -#define jitfp_ldxi_f(reg1, imm) _jit_ld(++_jit.fp, JIT_XI, (reg1), (long)(imm)) -#define jitfp_ldxr_f(reg1, reg2) _jit_ld(++_jit.fp, JIT_XR, (reg1), (reg2)) -#define jitfp_ldxi_d(reg1, imm) _jit_ld(++_jit.fp, JIT_XI | 1, (reg1), (long)(imm)) -#define jitfp_ldxr_d(reg1, reg2) _jit_ld(++_jit.fp, JIT_XR | 1, (reg1), (reg2)) -#define jitfp_abs(op1) _jit_fn(++_jit.fp, JIT_ABS, (op1)) -#define jitfp_sqrt(op1) _jit_fn(++_jit.fp, JIT_SQRT, (op1)) -#define jitfp_neg(op1) _jit_fn(++_jit.fp, JIT_NEG, (op1)) -#define jitfp_stxi_f(imm, reg1, op1) _jit_emit(&_jit, (op1), JIT_XI, (reg1), (long)(imm), 0) -#define jitfp_stxr_f(reg1, reg2, op1) _jit_emit(&_jit, (op1), JIT_XR, (reg1), (reg2), 0) -#define jitfp_stxi_d(imm, reg1, op1) _jit_emit(&_jit, (op1), JIT_XI | 1, (reg1), (long)(imm), 0) -#define jitfp_stxr_d(reg1, reg2, op1) _jit_emit(&_jit, (op1), JIT_XR | 1, (reg1), (reg2), 0) -#define jitfp_cmp(regle, regge, op1) _jit_emit(&_jit, (op1), JIT_CMP, regle, regge, 0) -#define jitfp_floor(reg1, op1) _jit_emit(&_jit, (op1), JIT_FLOOR, reg1, 0, 0) -#define jitfp_ceil(reg1, op1) _jit_emit(&_jit, (op1), JIT_CEIL, reg1, 0, 0) -#define jitfp_trunc(reg1, op1) _jit_emit(&_jit, (op1), JIT_TRUNC, reg1, 0, 0) -#define jitfp_round(reg1, op1) _jit_emit(&_jit, (op1), JIT_ROUND, reg1, 0, 0) - - -#ifdef JIT_TRANSCENDENTAL -#define jitfp_sin(op1) _jit_fn(++_jit.fp, JIT_SIN, (op1)) -#define jitfp_cos(op1) _jit_fn(++_jit.fp, JIT_COS, (op1)) -#define jitfp_tan(op1) _jit_fn(++_jit.fp, JIT_TAN, (op1)) -#define jitfp_atn(op1) _jit_fn(++_jit.fp, JIT_ATN, (op1)) -#define jitfp_exp(op1) _jit_fn(++_jit.fp, JIT_EXP, (op1)) -#define jitfp_log(op1) _jit_fn(++_jit.fp, JIT_LOG, (op1)) -#endif +#define JIT_FPR0 JIT_FPR(0) +#define JIT_FPR1 JIT_FPR(1) +#define JIT_FPR2 JIT_FPR(2) +#define JIT_FPR3 JIT_FPR(3) +#define JIT_FPR4 JIT_FPR(4) +#define JIT_FPR5 JIT_FPR(5) #ifdef JIT_RZERO -#define jitfp_ldi_f(imm) _jit_ld(++_jit.fp, JIT_XI, JIT_RZERO, (long)(imm)) -#define jitfp_ldr_f(reg1) _jit_ld(++_jit.fp, JIT_XR, JIT_RZERO, (reg1)) -#define jitfp_ldi_d(imm) _jit_ld(++_jit.fp, JIT_XI | 1, JIT_RZERO, (long)(imm)) -#define jitfp_ldr_d(reg1) _jit_ld(++_jit.fp, JIT_XR | 1, JIT_RZERO, (reg1)) -#define jitfp_sti_f(imm, op1) _jit_emit(&_jit, (op1), JIT_XI, JIT_RZERO, (long)(imm), 0) -#define jitfp_str_f(reg1, op1) _jit_emit(&_jit, (op1), JIT_XR, JIT_RZERO, (reg1), 0) -#define jitfp_sti_d(imm, op1) _jit_emit(&_jit, (op1), JIT_XI | 1, JIT_RZERO, (long)(imm), 0) -#define jitfp_str_d(reg1, op1) _jit_emit(&_jit, (op1), JIT_XR | 1, JIT_RZERO, (reg1), 0) -#else -#define jitfp_ldi_f(imm) _jit_ld(++_jit.fp, JIT_I, 0, (long)(imm)) -#define jitfp_ldr_f(reg1) _jit_ld(++_jit.fp, JIT_R, (reg1), 0) -#define jitfp_ldi_d(imm) _jit_ld(++_jit.fp, JIT_I | 1, 0, (long)(imm)) -#define jitfp_ldr_d(reg1) _jit_ld(++_jit.fp, JIT_R | 1, (reg1), 0) -#define jitfp_sti_f(imm, op1) _jit_emit(&_jit, (op1), JIT_I, 0, (long)(imm), 0) -#define jitfp_str_f(reg1, op1) _jit_emit(&_jit, (op1), JIT_R, 0, (reg1), 0) -#define jitfp_sti_d(imm, op1) _jit_emit(&_jit, (op1), JIT_I | 1, 0, (long)(imm), 0) -#define jitfp_str_d(reg1, op1) _jit_emit(&_jit, (op1), JIT_R | 1, 0, (reg1), 0) +#ifndef jit_ldi_f +#define jit_ldi_f(rd, is) jit_ldxi_f((rd), JIT_RZERO, (is)) +#define jit_sti_f(id, rs) jit_stxi_f((id), JIT_RZERO, (rs)) +#define jit_ldi_d(rd, is) jit_ldxi_d((rd), JIT_RZERO, (is)) +#define jit_sti_d(id, rs) jit_stxi_d((id), JIT_RZERO, (rs)) #endif - +#ifndef jit_ldr_f +#define jit_ldr_f(rd, rs) jit_ldxr_f((rd), JIT_RZERO, (rs)) +#define jit_str_f(rd, rs) jit_stxr_f((rd), JIT_RZERO, (rs)) +#define jit_ldr_d(rd, rs) jit_ldxr_d((rd), JIT_RZERO, (rs)) +#define jit_str_d(rd, rs) jit_stxr_d((rd), JIT_RZERO, (rs)) +#endif +#endif + +#ifndef jit_addr_f +#define jit_addr_f(rd,s1,s2) jit_addr_d(rd,s1,s2) +#define jit_subr_f(rd,s1,s2) jit_subr_d(rd,s1,s2) +#define jit_mulr_f(rd,s1,s2) jit_mulr_d(rd,s1,s2) +#define jit_divr_f(rd,s1,s2) jit_divr_d(rd,s1,s2) +#define jit_movr_f(rd,rs) jit_movr_d(rd,rs) +#define jit_abs_f(rd,rs) jit_abs_d(rd,rs) +#define jit_negr_f(rd,rs) jit_negr_d(rd,rs) +#define jit_sqrt_f(rd,rs) jit_sqrt_d(rd,rs) +#define jit_extr_f_d(rs, rd) +#define jit_extr_d_f(rs, rd) +#define jit_extr_i_f(rd, rs) jit_extr_i_d(rd, rs) +#define jit_roundr_f_i(rd, rs) jit_roundr_d_i(rd, rs) +#define jit_floorr_f_i(rd, rs) jit_floorr_d_i(rd, rs) +#define jit_ceilr_f_i(rd, rs) jit_ceilr_d_i(rd, rs) +#define jit_truncr_f_i(rd, rs) jit_truncr_d_i(rd, rs) +#define jit_ltr_f(d, s1, s2) jit_ltr_d(d, s1, s2) +#define jit_ler_f(d, s1, s2) jit_ler_d(d, s1, s2) +#define jit_eqr_f(d, s1, s2) jit_eqr_d(d, s1, s2) +#define jit_ner_f(d, s1, s2) jit_ner_d(d, s1, s2) +#define jit_ger_f(d, s1, s2) jit_ger_d(d, s1, s2) +#define jit_gtr_f(d, s1, s2) jit_gtr_d(d, s1, s2) +#define jit_unltr_f(d, s1, s2) jit_unltr_d(d, s1, s2) +#define jit_unler_f(d, s1, s2) jit_unler_d(d, s1, s2) +#define jit_uneqr_f(d, s1, s2) jit_uneqr_d(d, s1, s2) +#define jit_ltgtr_f(d, s1, s2) jit_ltgtr_d(d, s1, s2) +#define jit_unger_f(d, s1, s2) jit_unger_d(d, s1, s2) +#define jit_ungtr_f(d, s1, s2) jit_ungtr_d(d, s1, s2) +#define jit_ordr_f(d, s1, s2) jit_ordr_d(d, s1, s2) +#define jit_unordr_f(d, s1, s2) jit_unordr_d(d, s1, s2) +#define jit_retval_f(rs) jit_retval_d(rs) #endif diff --git a/lightning/funcs-common.h b/lightning/funcs-common.h index dafae1f9e..278c544f3 100644 --- a/lightning/funcs-common.h +++ b/lightning/funcs-common.h @@ -45,4 +45,10 @@ jit_fail(const char *msg, const char *file, int line, const char *function) abort(); } + +#ifndef jit_start_pfx +#define jit_start_pfx() ( (jit_insn*)0x4) +#define jit_end_pfx() ( (jit_insn*)0x0) +#endif + #endif /* __lightning_funcs_common_h */ diff --git a/lightning/i386/asm.h b/lightning/i386/asm.h index d805716b4..fcc364c05 100644 --- a/lightning/i386/asm.h +++ b/lightning/i386/asm.h @@ -43,6 +43,7 @@ typedef _uc jit_insn; +#ifndef LIGHTNING_DEBUG #define _b00 0 #define _b01 1 #define _b10 2 @@ -340,10 +341,10 @@ typedef _uc jit_insn; #define CALLsm(D,B,I,S) _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ) -#define CBW() _O (0x98 ) -#define CLC() _O (0xf8 ) -#define CLTD() _O (0x99 ) -#define CMC() _O (0xf5 ) +#define CBW_() _O (0x98 ) +#define CLC_() _O (0xf8 ) +#define CLTD_() _O (0x99 ) +#define CMC_() _O (0xf5 ) #define CMPBrr(RS, RD) _O_Mrm (0x38 ,_b11,_r1(RS),_r1(RD) ) @@ -365,7 +366,7 @@ typedef _uc jit_insn; #define CMPLim(IM, MD, MB, MI, MS) _O_r_X_L (0x81 ,_b111 ,MD,MB,MI,MS ,IM ) -#define CWD() _O (0x99 ) +#define CWD_() _O (0x99 ) #define CMPXCHGBrr(RS,RD) _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ) @@ -399,7 +400,7 @@ typedef _uc jit_insn; #define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) -#define HLT() _O (0xf4 ) +#define HLT_() _O (0xf4 ) #define IDIVBr(RS) _O_Mrm (0xf6 ,_b11,_b111 ,_r1(RS) ) @@ -443,7 +444,7 @@ typedef _uc jit_insn; #define INCLm(MD,MB,MI,MS) _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ) -#define INVD() _OO (0x0f08 ) +#define INVD_() _OO (0x0f08 ) #define INVLPGm(MD, MB, MI, MS) _OO_r_X (0x0f01 ,_b111 ,MD,MB,MI,MS ) @@ -523,9 +524,9 @@ typedef _uc jit_insn; #define JMPsm(D,B,I,S) _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ) -#define LAHF() _O (0x9f ) +#define LAHF_() _O (0x9f ) #define LEALmr(MD, MB, MI, MS, RD) _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ) -#define LEAVE() _O (0xc9 ) +#define LEAVE_() _O (0xc9 ) #define LMSWr(RS) _OO_Mrm (0x0f01 ,_b11,_b110,_r4(RS) ) @@ -600,7 +601,7 @@ typedef _uc jit_insn; #define NEGLm(MD,MB,MI,MS) _O_r_X (0xf7 ,_b011 ,MD,MB,MI,MS ) -#define NOP() _O (0x90 ) +#define NOP_() _O (0x90 ) #define NOTBr(RD) _O_Mrm (0xf6 ,_b11,_b010 ,_r1(RD) ) @@ -639,11 +640,11 @@ typedef _uc jit_insn; #define POPLm(MD,MB,MI,MS) _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ) -#define POPA() _wO (0x61 ) -#define POPAD() _O (0x61 ) +#define POPA_() _wO (0x61 ) +#define POPAD_() _O (0x61 ) -#define POPF() _wO (0x9d ) -#define POPFD() _O (0x9d ) +#define POPF_() _wO (0x9d ) +#define POPFD_() _O (0x9d ) #define PUSHWr(R) _wOr (0x50,_r2(R) ) @@ -655,13 +656,13 @@ typedef _uc jit_insn; #define PUSHLi(IM) _Os_sL (0x68 ,IM ) -#define PUSHA() _wO (0x60 ) -#define PUSHAD() _O (0x60 ) +#define PUSHA_() _wO (0x60 ) +#define PUSHAD_() _O (0x60 ) -#define PUSHF() _O (0x9c ) -#define PUSHFD() _wO (0x9c ) +#define PUSHF_() _O (0x9c ) +#define PUSHFD_() _wO (0x9c ) -#define RET() _O (0xc3 ) +#define RET_() _O (0xc3 ) #define RETi(IM) _O_W (0xc2 ,_su16(IM)) @@ -721,7 +722,7 @@ typedef _uc jit_insn; JITFAIL ("source register must be CL" ) ) -#define SAHF() _O (0x9e ) +#define SAHF_() _O (0x9e ) #define SALBir SHLBir @@ -904,7 +905,7 @@ typedef _uc jit_insn; JITFAIL ("source register must be CL" ) ) -#define STC() _O (0xf9 ) +#define STC_() _O (0xf9 ) #define SUBBrr(RS, RD) _O_Mrm (0x28 ,_b11,_r1(RS),_r1(RD) ) @@ -985,8 +986,8 @@ typedef _uc jit_insn; #define ESCmi(D,B,I,S,OP) _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S) #define ESCri(RD,OP) _O_Mrm(0xd8|(OP >> 3), _b11, (OP & 7), RD) -#define ESCrri(RS,RD,OP) ((RS) = _ST0 ? ESCri(RD,(OP|040)) \ - : (RD) = _ST0 ? ESCri(RS,OP) \ +#define ESCrri(RS,RD,OP) ((RS) == _ST0 ? ESCri(RD,(OP|040)) \ + : (RD) == _ST0 ? ESCri(RS,OP) \ : JITFAIL ("coprocessor instruction without st0")) #define FLDSm(D,B,I,S) ESCmi(D,B,I,S,010) /* fld m32real */ @@ -1036,7 +1037,7 @@ typedef _uc jit_insn; #define FNSTSWr(RD) ((RD == _AX || RD == _EAX) ? _OO (0xdfe0) \ : JITFAIL ("AX or EAX expected")) /* N byte NOPs */ -#define _NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), \ +#define NOPi(N) ((( (N) >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), \ (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00)) : \ ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_L(0x00)) : \ ( ((N)&7) == 5) ? (_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \ @@ -1056,5 +1057,6 @@ typedef _uc jit_insn; /* [2] "Intel Architecture Software Developer's Manual Volume 2: Instruction Set Reference", */ /* Intel Corporation 1997. */ +#endif #endif /* __lightning_asm_h */ diff --git a/lightning/i386/core.h b/lightning/i386/core.h index 79f4d645e..dd9d58a87 100644 --- a/lightning/i386/core.h +++ b/lightning/i386/core.h @@ -34,16 +34,15 @@ #ifndef __lightning_core_h #define __lightning_core_h -#define JIT_R0 _EAX -#define JIT_R1 _ECX -#define JIT_R2 _EDX -#define JIT_V0 _EBX -#define JIT_V1 _ESI -#define JIT_V2 _EDI #define JIT_FP _EBP #define JIT_SP _ESP #define JIT_RET _EAX +#define JIT_R_NUM 3 +#define JIT_V_NUM 3 +#define JIT_R(i) (_EAX + (i)) +#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1) + struct jit_local_state { int framesize; int argssize; @@ -265,10 +264,13 @@ struct jit_local_state { /* The += allows for stack pollution */ -#define jitfp_prepare(ni,nf,nd) ((void) (_jitl.argssize += (ni) + (nf) + 2*(nd))) +#define jit_prepare_i(ni) (_jitl.argssize += (ni)) +#define jit_prepare_f(nf) (_jitl.argssize += (nf)) +#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) #define jit_pusharg_i(rs) PUSHLr(rs) #define jit_finish(sub) (jit_calli((sub)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0) -#define jit_retval(rd) jit_movr_i ((rd), _EAX) +#define jit_finishr(reg) (jit_callr((reg)), ADDLir(4 * _jitl.argssize, JIT_SP), _jitl.argssize = 0) +#define jit_retval_i(rd) jit_movr_i ((rd), _EAX) #define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int)) #define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int)) @@ -289,6 +291,8 @@ struct jit_local_state { #define jit_movr_i(d, rs) ((rs) == (d) ? 0 : MOVLrr((rs), (d))) #define jit_movi_i(d, is) ((is) ? MOVLir((is), (d)) : XORLrr ((d), (d)) ) +#define jit_movi_p(d, is) (MOVLir((is), (d)), _jit.x.pc) +#define jit_patch_movi(pa,pv) (*_PSL((pa) - 4) = _jit_SL((pv))) #define jit_ntoh_ui(d, rs) jit_op_((d), (rs), BSWAPLr(d)) #define jit_ntoh_us(d, rs) jit_op_((d), (rs), RORWir(8, d)) @@ -311,7 +315,7 @@ struct jit_local_state { #define jit_gei_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETGEr, SETNSr ) #define jit_eqi_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETEr, SETEr ) #define jit_nei_i(d, rs, is) jit_bool_i0((d), (rs), (is), SETNEr, SETNEr ) -#define jit_lti_ui(d, rs, is) jit_bool_i ((d), (rs), (is), SETB ) +#define jit_lti_ui(d, rs, is) jit_bool_i ((d), (rs), (is), SETBr ) #define jit_lei_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETBEr, SETEr ) #define jit_gti_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETAr, SETNEr ) #define jit_gei_ui(d, rs, is) jit_bool_i0((d), (rs), (is), SETAEr, INCLr ) @@ -340,10 +344,10 @@ struct jit_local_state { #define jit_bgei_i(label, rs, is) jit_bra_i0((rs), (is), JGEm(label,0,0,0), JNSm(label,0,0,0) ) #define jit_beqi_i(label, rs, is) jit_bra_i0((rs), (is), JEm(label, 0,0,0), JEm(label, 0,0,0) ) #define jit_bnei_i(label, rs, is) jit_bra_i0((rs), (is), JNEm(label,0,0,0), JNEm(label,0,0,0) ) -#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JLm(label, 0,0,0) ) -#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JLEm(label,0,0,0), JEm(label, 0,0,0) ) -#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JGm(label, 0,0,0), JNEm(label,0,0,0) ) -#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JGEm(label,0,0,0) ) +#define jit_blti_ui(label, rs, is) jit_bra_i ((rs), (is), JBm(label, 0,0,0) ) +#define jit_blei_ui(label, rs, is) jit_bra_i0((rs), (is), JBEm(label,0,0,0), JEm(label, 0,0,0) ) +#define jit_bgti_ui(label, rs, is) jit_bra_i0((rs), (is), JAm(label, 0,0,0), JNEm(label,0,0,0) ) +#define jit_bgei_ui(label, rs, is) jit_bra_i ((rs), (is), JAEm(label,0,0,0) ) #define jit_boaddi_i(label, rs, is) (ADDLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc) #define jit_bosubi_i(label, rs, is) (SUBLir((is), (rs)), JOm(label,0,0,0), _jit.x.pc) #define jit_boaddi_ui(label, rs, is) (ADDLir((is), (rs)), JCm(label,0,0,0), _jit.x.pc) @@ -354,9 +358,10 @@ struct jit_local_state { #define jit_jmpi(label) (JMPm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc) #define jit_calli(label) (CALLm( ((unsigned long) (label)), 0, 0, 0), _jit.x.pc) +#define jit_callr(reg) (CALLsr(reg)) #define jit_jmpr(reg) JMPsr(reg) -#define jit_patch(jump_pc) (*_PSL((jump_pc) - 4) = _jit_SL(_jit.x.pc - (jump_pc))) -#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET()) +#define jit_patch_at(jump_pc,v) (*_PSL((jump_pc) - 4) = _jit_SL((v) - (jump_pc))) +#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_()) /* Memory */ #define jit_ldi_c(d, is) MOVSBLmr((is), 0, 0, 0, (d)) @@ -400,9 +405,9 @@ struct jit_local_state { #define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0) /* Extra */ -#define jit_nop() NOP() +#define jit_nop() NOP_() #define _jit_alignment(pc, n) (((pc ^ _MASK(4)) + 1) & _MASK(n)) -#define jit_align(n) _NOPi(_jit_alignment(_jit_UL(_jit.x.pc), (n))) +#define jit_align(n) NOPi(_jit_alignment(_jit_UL(_jit.x.pc), (n))) #endif /* __lightning_core_h */ diff --git a/lightning/i386/fp.h b/lightning/i386/fp.h index c347e3d58..0d2725563 100644 --- a/lightning/i386/fp.h +++ b/lightning/i386/fp.h @@ -33,49 +33,48 @@ #ifndef __lightning_asm_fp_h #define __lightning_asm_fp_h -/* Actually, we should redesign the jitfp interface. As a first step, I have - defined the macros for many x87 instructions, and I am using them here. +/* We really must map the x87 stack onto a flat register file. In practice, + we can provide something sensible and make it work on the x86 using the + stack like a file of eight registers. - In practice, we can provide something sensible and make it work on the x86 - using the stack like a file of eight registers. Then this awful stuff goes - away, and everything is "beautiful" as the rest of GNU lightning---and we'll - document it, promised. - - Well, let's use six or seven registers so as to have some freedom - for floor, ceil, round, log, tan, atn and exp. + We use six or seven registers so as to have some freedom + for floor, ceil, round, (and log, tan, atn and exp). Not hard at all, basically play with FXCH. FXCH is mostly free, so the generated code is not bad. Of course we special case when one of the operands turns out to be ST0. - - binary ops: + Here are the macros that actually do the trick. */ - add FRR3 to FPR0 - FADD ST0,ST3 +#define JIT_FPR_NUM 6 +#define JIT_FPR(i) (i) - add FPR0 to FPR3 - FADD ST3,ST0 +#define jit_fxch(rs, op) (((rs) != 0 ? FXCHr(rs) : 0), \ + op, ((rs) != 0 ? FXCHr(rs) : 0)) - add FPR3 to FPR7 (I'm using nasm syntax here) - FXCH ST3 - FADD ST7,ST0 - FXCH ST3 +#define jit_fp_unary(rd, s1, op) \ + ((rd) == (s1) ? jit_fxch ((rd), op) \ + : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op) \ + : (FLDr ((s1)), op, FSTPr ((rd)))) - - stores: +#define jit_fp_binary(rd, s1, s2, op, opr) \ + ((rd) == (s1) ? \ + ((s2) == 0 ? opr(0, (rd)) \ + : (s2) == (s1) ? jit_fxch((rd), op(0, 0)) \ + : jit_fxch((rd), op((s2), 0))) \ + : (rd) == (s2) ? jit_fxch((s1), opr(0, (rd) == 0 ? (s1) : (rd))) \ + : (FLDr (s1), op(0, (s2)+1), FSTPr((rd)+1))) - store FPR3 +#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr) +#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr) +#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr) +#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr) - FXCH ST3 - FST [FUBAR] - FXCH ST3 +#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1)) +#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0)) +#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa)) - store FPR0 - - FST [FUBAR] - - (and similarly for other unary ops like FCHS or FABS) - - - moves: +/* - moves: move FPR0 to FPR3 FST ST3 @@ -85,11 +84,16 @@ FST ST3 move FPR3 to FPR1 - FSTP ST1 Save old st0 into destination register - FLD ST2 Stack is rotated, so FPRn becomes STn-1 - FXCH ST1 Get back old st0 + FLD ST1 + FST ST4 Stack is rotated, so FPRn becomes STn+1 */ - - loads: +#define jit_movr_d(rd,s1) \ + ((s1) == (rd) ? 0 \ + : (s1) == 0 ? FSTr ((rd)) \ + : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1))) \ + : (FLDr ((s1)), FSTr ((rd)+1))) + +/* - loads: load into FPR0 FSTP ST0 @@ -102,53 +106,82 @@ (and similarly for immediates, using the stack) */ -#define jit_add_two(reg0) FADDPr(1) -#define jit_sub_two(reg0) FSUBRPr(1) -#define jit_mul_two(reg0) FMULPr(1) -#define jit_div_two(reg0) FDIVRPr(1) +#define jit_movi_f(rd,immf) \ + (_O (0x68), \ + *((float *) _jit.x.pc) = (float) immf, \ + _jit.x.uc_pc += sizeof (float), \ + jit_ldr_f((rd), _ESP), \ + ADDLir(4, _ESP)) -#define jit_abs(reg0) _OO(0xd9e1) /* fabs */ -#define jit_sqr(reg0) FMULrr(0,0) -#define jit_sqrt(reg0) _OO(0xd9fa) /* fsqrt */ +union jit_double_imm { + double d; + int i[2]; +}; -#define jit_exti_d(reg0, rs) (PUSHLr((rs)), FILDLm(0, _ESP, 0, 0), POPLr((rs))) +#define jit_movi_d(rd,immd) \ + (_O (0x68), \ + _jit.x.uc_pc[4] = 0x68, \ + ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \ + *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \ + _jit.x.uc_pc += 9, \ + jit_ldr_d((rd), _ESP), \ + ADDLir(8, _ESP)) -#define jit_neg(reg0) _OO(0xd9e0) /* fchs */ +#define jit_ldi_f(rd, is) \ + ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \ + : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1))) -#define jit_ldxr_f(reg0, s1, s2) FLDSm(0, (s1), (s2), 1) -#define jit_ldxi_f(reg0, rs, is) FLDSm((is), (rs), 0, 0) -#define jit_ldxr_f(reg0, s1, s2) FLDSm(0, (s1), (s2), 1) -#define jit_ldxi_d(reg0, rs, is) FLDLm((is), (rs), 0, 0) -#define jit_ldxr_d(reg0, s1, s2) FLDLm(0, (s1), (s2), 1) -#define jit_ldi_f(reg0, is) FLDSm((is), 0, 0, 0) -#define jit_ldr_f(reg0, rs) FLDSm(0, (rs), 0, 0) -#define jit_ldi_d(reg0, is) FLDLm((is), 0, 0, 0) -#define jit_ldr_d(reg0, rs) FLDLm(0, (rs), 0, 0) -#define jit_stxi_f(id, rd, reg0) FSTPSm((id), (rd), 0, 0) -#define jit_stxr_f(d1, d2, reg0) FSTPSm(0, (d1), (d2), 1) -#define jit_stxi_d(id, rd, reg0) FSTPLm((id), (rd), 0, 0) -#define jit_stxr_d(d1, d2, reg0) FSTPLm(0, (d1), (d2), 1) -#define jit_sti_f(id, reg0) FSTPSm((id), 0, 0, 0) -#define jit_str_f(rd, reg0) FSTPSm(0, (rd), 0, 0) -#define jit_sti_d(id, reg0) FSTPLm((id), 0, 0, 0) -#define jit_str_d(rd, reg0) FSTPLm(0, (rd), 0, 0) +#define jit_ldi_d(rd, is) \ + ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \ + : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1))) -#define jit_fpimm(reg0, first, second) \ - (PUSHLi(second), \ - PUSHLi(first), \ - FLDLm(0, _ESP, 0, 0), \ - ADDLir(8, _ESP)) +#define jit_ldr_f(rd, rs) \ + ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \ + : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1))) +#define jit_ldr_d(rd, rs) \ + ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \ + : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxi_f(rd, rs, is) \ + ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \ + : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxi_d(rd, rs, is) \ + ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \ + : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1))) + +#define jit_ldxr_f(rd, s1, s2) \ + ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \ + : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) + +#define jit_ldxr_d(rd, s1, s2) \ + ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1)) \ + : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) + +#define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \ + ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \ + : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \ + POPLr((rs))) + +#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0)) +#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1)) +#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0)) +#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1)) +#define jit_sti_f(id, rs) jit_fxch ((rs), FSTSm((id), 0, 0, 0)) +#define jit_str_f(rd, rs) jit_fxch ((rs), FSTSm(0, (rd), 0, 0)) +#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0)) +#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0)) /* Assume round to near mode */ -#define jit_floor(rd, reg0) \ - jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX)) +#define jit_floorr_d_i(rd, rs) \ + (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))) -#define jit_ceil(rd, reg0) \ - jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX)) +#define jit_ceilr_d_i(rd, rs) \ + (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX))) -#define jit_trunc(rd, reg0) \ - jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX)) +#define jit_truncr_d_i(rd, rs) \ + (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX))) #define jit_calc_diff(ofs) \ FISTLm(ofs, _ESP, 0, 0), \ @@ -200,53 +233,115 @@ POPLr(aux)) /* the easy one */ -#define jit_round(rd, reg0) \ - (PUSHLr(_EAX), \ - FISTPLm(0, _ESP, 0, 0), \ +#define jit_roundr_d_i(rd, rs) \ + (PUSHLr(_EAX), \ + jit_fxch ((rs), FISTPLm(0, _ESP, 0, 0)), \ POPLr((rd))) -#define jit_cmp(le, ge, reg0) ( \ - ((le) == _EAX || (ge) == _EAX ? 0 : PUSHLr(_EAX)), \ - FCOMr(0), \ - FNSTSWr(_AX), \ - TESTBir(0x40, _AH), \ - MOVLir(0, (le)), \ - MOVLrr((le), (ge)), \ - JZSm(_jit.x.pc + 11, 0, 0, 0), \ - _OO(0xd9e4), /* ftst */ /* 2 */ \ - FNSTSWr(_AX), /* 2 */ \ - SAHF(), /* 1 */ \ - SETLEr( ((le) & 15) | 0x10), /* 3 */ \ - SETGEr( ((ge) & 15) | 0x10), /* 3 */ \ - ((le) == _EAX || (ge) == _EAX ? ANDLir (1, _EAX) : POPLr(_EAX)) ) +#define jit_fp_test(d, s1, s2, n, _and, res) \ + (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \ + res, \ + ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ -#define jitfp_getarg_f(ofs) jitfp_ldxi_f(JIT_FP,(ofs)) -#define jitfp_getarg_d(ofs) jitfp_ldxi_d(JIT_FP,(ofs)) -#define jitfp_pusharg_d(op1) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jitfp_str_d(JIT_SP,(op1))) -#define jitfp_pusharg_f(op1) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jitfp_str_f(JIT_SP,(op1))) -#define jitfp_retval(op1) _jit_emit(&_jit, (op1), JIT_NULL, 0, 0, 0) +#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \ + (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + PUSHLr(_EAX), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir ((_and), _EAX) : 0), \ + ((cmp) ? CMPLir ((cmp), _AL) : 0), \ + POPLr(_EAX), \ + res ((d), 0, 0, 0)) -#define JIT_TRANSCENDENTAL +#define jit_nothing_needed(x) -#define jit_sin(reg0) _OO(0xd9fe) /* fsin */ -#define jit_cos(reg0) _OO(0xd9ff) /* fcos */ -#define jit_tan(reg0) (_OO(0xd9f2), /* fptan */ \ - FSTPr(0)) /* fstp st */ -#define jit_atn(reg0) (_OO(0xd9e8), /* fld1 */ \ - _OO(0xd9f3)) /* fpatan */ -#define jit_exp(reg0) (_OO(0xd9ea), /* fldl2e */ \ - FMULPr(1), /* fmulp */ \ - _OO(0xd9c0), /* fld st */ \ - _OO(0xd9fc), /* frndint */ \ - _OO(0xdce9), /* fsubr */ \ - FXCHr(1), /* fxch st(1) */ \ - _OO(0xd9f0), /* f2xm1 */ \ - _OO(0xd9e8), /* fld1 */ \ - _OO(0xdec1), /* faddp */ \ - _OO(0xd9fd), /* fscale */ \ - FSTPr(1)) /* fstp st(1) */ -#define jit_log(reg0) (_OO(0xd9ed), /* fldln2 */ \ - FXCHr(1), /* fxch st(1) */ \ - _OO(0xd9f1)) /* fyl2x */ +/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered. Here + is how to map the values of the status word's high byte to the + conditions. + + < = > unord valid values condition + gt no no yes no 0 STSW & 45 == 0 + lt yes no no no 1 STSW & 45 == 1 + eq no yes no no 40 STSW & 45 == 40 + unord no no no yes 45 bit 2 == 1 + + ge no yes no no 0, 40 bit 0 == 0 + unlt yes no no yes 1, 45 bit 0 == 1 + ltgt yes no yes no 0, 1 bit 6 == 0 + uneq no yes no yes 40, 45 bit 6 == 1 + le yes yes no no 1, 40 odd parity for STSW & 41 + ungt no no yes yes 0, 45 even parity for STSW & 41 + + unle yes yes no yes 1, 40, 45 STSW & 45 != 0 + unge no yes yes yes 0, 40, 45 STSW & 45 != 1 + ne yes no yes yes 0, 1, 45 STSW & 45 != 40 + ord yes yes yes no 0, 1, 40 bit 2 == 0 + + lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with + the operands swapped; it is more efficient this way. */ + +#define jit_gtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETZr (_AL)) +#define jit_ger_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, SBBBir (-1, _AL)) +#define jit_unler_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, SETNZr (_AL)) +#define jit_unltr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 9, 0, ADCBir (0, _AL)) +#define jit_ltr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETZr (_AL)) +#define jit_ler_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, SBBBir (-1, _AL)) +#define jit_unger_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 8, 0x45, SETNZr (_AL)) +#define jit_ungtr_d(d, s1, s2) jit_fp_test((d), (s2), (s1), 9, 0, ADCBir (0, _AL)) +#define jit_eqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETEr (_AL))) +#define jit_ner_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 8, 0x45, (CMPBir (0x40, _AL), SETNEr (_AL))) +#define jit_ltgtr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, SBBBir (-1, _AL)) +#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL)) +#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL)) +#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL)) + +#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm) +#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm) +#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) +#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) +#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm) +#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm) +#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) +#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) +#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm) +#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) +#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm) +#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm) +#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm) +#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm) + +#define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs)) +#define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs)) +#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs))) +#define jit_pusharg_f(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jit_str_f(JIT_SP,(rs))) +#define jit_retval_d(op1) jit_movr_d(0, (op1)) + + +#if 0 +#define jit_sin() _OO(0xd9fe) /* fsin */ +#define jit_cos() _OO(0xd9ff) /* fcos */ +#define jit_tan() (_OO(0xd9f2), /* fptan */ \ + FSTPr(0)) /* fstp st */ +#define jit_atn() (_OO(0xd9e8), /* fld1 */ \ + _OO(0xd9f3)) /* fpatan */ +#define jit_exp() (_OO(0xd9ea), /* fldl2e */ \ + FMULPr(1), /* fmulp */ \ + _OO(0xd9c0), /* fld st */ \ + _OO(0xd9fc), /* frndint */ \ + _OO(0xdce9), /* fsubr */ \ + FXCHr(1), /* fxch st(1) */ \ + _OO(0xd9f0), /* f2xm1 */ \ + _OO(0xd9e8), /* fld1 */ \ + _OO(0xdec1), /* faddp */ \ + _OO(0xd9fd), /* fscale */ \ + FSTPr(1)) /* fstp st(1) */ +#define jit_log() (_OO(0xd9ed), /* fldln2 */ \ + FXCHr(1), /* fxch st(1) */ \ + _OO(0xd9f1)) /* fyl2x */ +#endif #endif /* __lightning_asm_h */ diff --git a/lightning/i386/funcs.h b/lightning/i386/funcs.h index a618a8573..1ae48e74e 100644 --- a/lightning/i386/funcs.h +++ b/lightning/i386/funcs.h @@ -34,6 +34,57 @@ #ifndef __lightning_funcs_h #define __lightning_funcs_h -#define jit_flush_code(dest, end) +#ifdef __linux__ +#include +#endif + +static void +jit_flush_code(void *dest, void *end) +{ + /* On the x86, the PROT_EXEC bits are not handled by the MMU. + However, the kernel can emulate this by setting the code + segment's limit to the end address of the highest page + whose PROT_EXEC bit is set. + + Linux kernels that do so and that disable by default the + execution of the data and stack segment are becoming more + and more common (Fedora, for example), so we implement our + jit_flush_code as an mprotect. */ +#ifdef __linux__ + static unsigned long prev_page = 0, prev_length = 0; + int page, length; +#ifdef PAGESIZE + const int page_size = PAGESIZE; +#else + static int page_size = -1; + if (page_size == -1) + page_size = sysconf (_SC_PAGESIZE); +#endif + + page = (long) dest & ~(page_size - 1); + length = ((char *) end - (char *) page + page_size - 1) & ~(page_size - 1); + + /* Simple-minded attempt at optimizing the common case where a single + chunk of memory is used to compile multiple functions. */ + if (page >= prev_page && page + length <= prev_page + prev_length) + return; + + mprotect ((void *) page, length, PROT_READ | PROT_WRITE | PROT_EXEC); + + /* See if we can extend the previously mprotect'ed memory area towards + higher addresses: the starting address remains the same as before. */ + if (page >= prev_page && page <= prev_page + prev_length) + prev_length = page + length - prev_page; + + /* See if we can extend the previously mprotect'ed memory area towards + lower addresses: the highest address remains the same as before. */ + else if (page < prev_page && page + length <= prev_page + prev_length) + prev_length += prev_page - page, prev_page = page; + + /* Nothing to do, replace the area. */ + else + prev_page = page, prev_length = length; +#endif +} #endif /* __lightning_funcs_h */ diff --git a/lightning/ppc/asm.h b/lightning/ppc/asm.h index d102283b4..9f3c71dd9 100644 --- a/lightning/ppc/asm.h +++ b/lightning/ppc/asm.h @@ -61,6 +61,7 @@ typedef unsigned int jit_insn; +#ifndef LIGHTNING_DEBUG #define _cr0 0 #define _cr1 1 #define _cr2 2 @@ -81,9 +82,9 @@ typedef unsigned int jit_insn; /* primitive instruction forms [1, Section A.4] */ -#define _FB( OP, BD,AA,LK ) _jit_I((_u6(OP)<<26)| _d26(BD)| (_u1(AA)<<1)|_u1(LK)) +#define _FB( OP, BD,AA,LK ) (_jit_I_noinc((_u6(OP)<<26)| _d26(BD)| (_u1(AA)<<1)|_u1(LK)), _jit.x.pc++, 0) #define _FBA( OP, BD,AA,LK ) _jit_I((_u6(OP)<<26)| (_u26(BD)&~3)| (_u1(AA)<<1)|_u1(LK)) -#define _BB( OP,BO,BI, BD,AA,LK ) _jit_I((_u6(OP)<<26)|(_u5(BO)<<21)|(_u5(BI)<<16)| _d16(BD)| (_u1(AA)<<1)|_u1(LK)) +#define _BB( OP,BO,BI, BD,AA,LK ) (_jit_I_noinc((_u6(OP)<<26)|(_u5(BO)<<21)|(_u5(BI)<<16)| _d16(BD)| (_u1(AA)<<1)|_u1(LK)), _jit.x.pc++, 0) #define _D( OP,RD,RA, DD ) _jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)| _s16(DD) ) #define _Du( OP,RD,RA, DD ) _jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)| _u16(DD) ) #define _Ds( OP,RD,RA, DD ) _jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)| _su16(DD) ) @@ -93,6 +94,7 @@ typedef unsigned int jit_insn; #define _XO( OP,RD,RA,RB,OE,XO,RC ) _jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|( _u5(RB)<<11)|(_u1(OE)<<10)|( _u9(XO)<<1)|_u1(RC)) #define _M( OP,RS,RA,SH,MB,ME,RC ) _jit_I((_u6(OP)<<26)|(_u5(RS)<<21)|(_u5(RA)<<16)|( _u5(SH)<<11)|(_u5(MB)<< 6)|( _u5(ME)<<1)|_u1(RC)) + /* special purpose registers (form XFX) [1, Section 8.2, page 8-138] */ #define SPR_LR ((8<<5)|(0)) @@ -121,7 +123,7 @@ typedef unsigned int jit_insn; #define Bi(BD) _FB (18, BD, 0, 0) #define BAi(BD) _FBA (18, BD, 1, 0) -#define BLi(BD) _FB (18, BD, 0, 1) +#define BLi(BD) _FB (18, BD, 0, 1) #define BLAi(BD) _FBA (18, BD, 1, 1) #define BCiii(BO,BI,BD) _BB (16, BO, BI, BD, 0, 0) @@ -313,10 +315,10 @@ typedef unsigned int jit_insn; #define MOVEIri(R,I) (_siP(16,I) ? LIri(R,I) : \ MOVEIri2(R, _HI(I), _LO(I)) ) -#define SUBIrri(RD,RA,IM) ADDIrri(RD,RA,-_jit_L((IM))) /* [1, Section F.2.1] */ -#define SUBISrri(RD,RA,IM) ADDISrri(RD,RA,-_jit_L((IM))) -#define SUBICrri(RD,RA,IM) ADDICrri(RD,RA,-_jit_L((IM))) -#define SUBIC_rri(RD,RA,IM) ADDIC_rri(RD,RA,-_jit_L((IM))) +#define SUBIrri(RD,RA,IM) ADDIrri(RD,RA,-_LO((IM))) /* [1, Section F.2.1] */ +#define SUBISrri(RD,RA,IM) ADDISrri(RD,RA,-_LO((IM))) +#define SUBICrri(RD,RA,IM) ADDICrri(RD,RA,-_LO((IM))) +#define SUBIC_rri(RD,RA,IM) ADDIC_rri(RD,RA,-_LO((IM))) #define SUBrrr(RD,RA,RB) SUBFrrr(RD,RB,RA) /* [1, Section F.2.2] */ #define SUBOrrr(RD,RA,RB) SUBFOrrr(RD,RB,RA) @@ -350,17 +352,21 @@ typedef unsigned int jit_insn; #define CLRRWIrri(RA,RS,N) RLWINMrriii(RA, RS, 0, 0, 31-(N)) #define CLRLSLWIrrii(RA,RS,B,N) RLWINMrriii(RA, RS, N, (B)-(N), 31-(N)) + /* 9 below inverts the branch condition and the branch prediction. - * This has an incestuous knowledge of the fact that register 26 - * is used as auxiliary!!! */ + * This has an incestuous knowledge of JIT_AUX */ #define BC_EXT(A, C, D) (_siP(16, _jit_UL(D)-_jit_UL(_jit.x.pc)) \ ? BCiii((A), (C), (D)) \ - : (BCiii((A)^9, (C), _jit.x.pc+5), LISri(26,_HI(D)), ORIrri(26,26,_LO(D)), \ - MTLRr(26), BLR() )) + : (BCiii((A)^9, (C), _jit.x.pc+5), \ + LISri(JIT_AUX,_HI(D)), \ + ORIrri(JIT_AUX,JIT_AUX,_LO(D)), \ + MTLRr(JIT_AUX), BLR() )) #define B_EXT(D) (_siP(16, _jit_UL(D)-_jit_UL(_jit.x.pc)) \ ? Bi((D)) \ - : (LISri(26,_HI(D)), ORIrri(26,26,_LO(D)), MTLRr(26), BLR()) ) + : (LISri(JIT_AUX,_HI(D)), \ + ORIrri(JIT_AUX,JIT_AUX,_LO(D)), \ + MTLRr(JIT_AUX), BLR()) ) #define BTii(C,D) BC_EXT(12, C, D) /* [1, Table F-5] */ #define BFii(C,D) BC_EXT( 4, C, D) @@ -379,7 +385,7 @@ typedef unsigned int jit_insn; #define BLTLRi(CR) BCLRii(12, ((CR)<<2)+0) /* [1, Table F-10] */ -#define BLELRi(CR) BCLRii( 4 ((CR)<<2)+1) +#define BLELRi(CR) BCLRii( 4, ((CR)<<2)+1) #define BEQLRi(CR) BCLRii(12, ((CR)<<2)+2) #define BGELRi(CR) BCLRii( 4, ((CR)<<2)+0) #define BGTLRi(CR) BCLRii(12, ((CR)<<2)+1) @@ -405,7 +411,7 @@ typedef unsigned int jit_insn; #define BNULRLi(CR) BCLRLii( 4, ((CR)<<2)+3) #define BLTCTRi(CR) BCCTRii(12, ((CR)<<2)+0) /* [1, Table F-10] */ -#define BLECTRi(CR) BCCTRii( 4 ((CR)<<2)+1) +#define BLECTRi(CR) BCCTRii( 4, ((CR)<<2)+1) #define BEQCTRi(CR) BCCTRii(12, ((CR)<<2)+2) #define BGECTRi(CR) BCCTRii( 4, ((CR)<<2)+0) #define BGTCTRi(CR) BCCTRii(12, ((CR)<<2)+1) @@ -511,7 +517,7 @@ typedef unsigned int jit_insn; #define BNUi(D) BNUii(0,D) #define BLTLii(C,D) BCLiii(12, ((C)<<2)+0, D) /* [1, Table F-??] */ -#define BLELii(C,D) BCLiii( 4 ((C)<<2)+1, D) +#define BLELii(C,D) BCLiii( 4, ((C)<<2)+1, D) #define BEQLii(C,D) BCLiii(12, ((C)<<2)+2, D) #define BGELii(C,D) BCLiii( 4, ((C)<<2)+0, D) #define BGTLii(C,D) BCLiii(12, ((C)<<2)+1, D) @@ -586,7 +592,50 @@ typedef unsigned int jit_insn; #define _LO(I) (_jit_UL(I) & _MASK(16)) #define _HI(I) (_jit_UL(I) >> (16)) +#define _A(OP,RD,RA,RB,RC,XO,RCx) _jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|( _u5(RB)<<11)|_u5(RC)<<6|(_u5(XO)<<1)|_u1(RCx)) +#define LFDrri(RD,RA,imm) _D(50,RD,RA,imm) +#define LFDUrri(RD,RA,imm) _D(51,RD,RA,imm) +#define LFDUxrrr(RD,RA,RB) _X(31,RD,RA,RB,631,0) +#define LFDxrrr(RD,RA,RB) _X(31,RD,RA,RB,599,0) + +#define LFSrri(RD,RA,imm) _D(48,RD,RA,imm) +#define LFSUrri(RD,RA,imm) _D(49,RD,RA,imm) +#define LFSUxrrr(RD,RA,RB) _X(31,RD,RA,RB,567,0) +#define LFSxrrr(RD,RA,RB) _X(31,RD,RA,RB,535,0) + +#define STFDrri(RS,RA,imm) _D(54,RS,RA,imm) +#define STFDUrri(RS,RA,imm) _D(55,RS,RA,imm) +#define STFDUxrrr(RS,RA,RB) _X(31,RS,RA,RB,759,0) +#define STFDxrrr(RS,RA,RB) _X(31,RS,RA,RB,727,0) + +#define STFSrri(RS,RA,imm) _D(52,RS,RA,imm) +#define STFSUrri(RS,RA,imm) _D(53,RS,RA,imm) +#define STFSUxrrr(RS,RA,RB) _X(31,RS,RA,RB,695,0) +#define STFSxrrr(RS,RA,RB) _X(31,RS,RA,RB,663,0) +#define STFIWXrrr(RS,RA,RB) _X(31,RS,RA,RB,983,0) + +#define FADDDrrr(RD,RA,RB) _A(63,RD,RA,RB,0,21,0) +#define FADDSrrr(RD,RA,RB) _A(59,RD,RA,RB,0,21,0) +#define FSUBDrrr(RD,RA,RB) _A(63,RD,RA,RB,0,20,0) +#define FSUBSrrr(RD,RA,RB) _A(59,RD,RA,RB,0,20,0) +#define FMULDrrr(RD,RA,RC) _A(63,RD,RA,0,RC,25,0) +#define FMULSrrr(RD,RA,RC) _A(59,RD,RA,0,RC,25,0) +#define FDIVDrrr(RD,RA,RB) _A(63,RD,RA,RB,0,18,0) +#define FDIVSrrr(RD,RA,RB) _A(59,RD,RA,RB,0,25,0) +#define FSQRTDrr(RD,RB) _A(63,RD,0,RB,0,22,0) +#define FSQRTSrr(RD,RB) _A(59,RD,0,RB,0,22,0) +#define FSELrrrr(RD,RA,RB,RC) _A(63,RD,RA,RB,RC,23,0) +#define FCTIWrr(RD,RB) _X(63,RD,0,RB,14,0) +#define FCTIWZrr(RD,RB) _X(63,RD,0,RB,15,0) +#define FRSPrr(RD,RB) _X(63,RD,0,RB,12,0) +#define FABSrr(RD,RB) _X(63,RD,0,RB,264,0) +#define FNABSrr(RD,RB) _X(63,RD,0,RB,136,0) +#define FNEGrr(RD,RB) _X(63,RD,0,RB,40,0) +#define FMOVErr(RD,RB) _X(63,RD,0,RB,72,0) +#define FCMPOrrr(CR,RA,RB) _X(63,_u3((CR)<<2),RA,RB,32,0) +#define FCMPUrrr(CR,RA,RB) _X(63,_u3((CR)<<2),RA,RB,0,0) +#define MTFSFIri(CR,IMM) _X(63,_u5((CR)<<2),0,_u5((IMM)<<1),134,0) /*** References: * @@ -594,4 +643,5 @@ typedef unsigned int jit_insn; */ +#endif #endif /* __ccg_asm_ppc_h */ diff --git a/lightning/ppc/core.h b/lightning/ppc/core.h index 1377e5147..14eaae336 100644 --- a/lightning/ppc/core.h +++ b/lightning/ppc/core.h @@ -36,20 +36,24 @@ #define __lightning_core_h struct jit_local_state { - int nextarg_put; /* Next r3-r8 reg. to be written */ - int nextarg_putfp; /* Next r3-r8 reg. to be written */ - int nextarg_get; /* Next r20-r25 reg. to be read */ + int nextarg_puti; /* number of integer args */ + int nextarg_putf; /* number of float args */ + int nextarg_putd; /* number of double args */ + int nextarg_geti; /* Next r20-r25 reg. to be read */ + int nextarg_getd; /* The FP args are picked up from FPR1 -> FPR10 */ + int nbArgs; /* Number of arguments for the prolog */ }; #define JIT_SP 1 #define JIT_RET 3 -#define JIT_R0 9 -#define JIT_R1 10 -#define JIT_R2 30 /* using r8 would limit argument passing */ -#define JIT_V0 29 -#define JIT_V1 28 -#define JIT_V2 27 -#define JIT_AUX 26 /* for 32-bit operands & shift counts */ +#define JIT_R_NUM 3 +#define JIT_V_NUM 7 +#define JIT_R(i) (9+(i)) +#define JIT_V(i) (31-(i)) +#define JIT_AUX JIT_V(JIT_V_NUM) /* for 32-bit operands & shift counts */ + +#define jit_pfx_start() (_jit.jitl.trampolines) +#define jit_pfx_end() (_jit.jitl.free) /* If possible, use the `small' instruction (rd, rs, imm) * else load imm into r26 and use the `big' instruction (rd, rs, r26) @@ -58,6 +62,9 @@ struct jit_local_state { #define jit_chk_imu(imm, small, big) (_uiP(16,(imm)) ? (small) : (MOVEIri(JIT_AUX, imm), (big)) ) #define jit_chk_imu15(imm, small, big) (_uiP(15,(imm)) ? (small) : (MOVEIri(JIT_AUX, imm), (big)) ) +#define jit_big_ims(imm, big) (MOVEIri(JIT_AUX, imm), (big)) +#define jit_big_imu(imm, big) (MOVEIri(JIT_AUX, imm), (big)) + /* Helper macros for branches */ #define jit_s_brai(rs, is, jmp) (jit_chk_ims (is, CMPWIri(rs, is), CMPWrr(rs, JIT_AUX)), jmp, _jit.x.pc) #define jit_s_brar(s1, s2, jmp) ( CMPWrr(s1, s2), jmp, _jit.x.pc) @@ -87,38 +94,48 @@ struct jit_local_state { MULLWrrr(31, 31, JIT_AUX), SUBrrr((rs), (rs), JIT_AUX), \ MFLRr(31)) -/* Emit a 2-instruction MOVEI, even if a 1-instruction one is possible - * (it is a rare case for branches, and a fixed sequence of instructions - * is easier to patch). */ -#define jit_movei(reg, imm) (LISri(reg,_HI(imm)), ORIrri((reg),(reg),_LO(imm))) - /* Patch a movei instruction made of a LIS at lis_pc and an ORI at ori_pc. */ -#define jit_patch_movei(lis_pc, ori_pc) \ - (*(lis_pc) &= ~_MASK(16), *lis_pc |= _HI(_jit.x.pc), \ - *(ori_pc) &= ~_MASK(16), *ori_pc |= _LO(_jit.x.pc)) \ +#define jit_patch_movei(lis_pc, ori_pc, dest) \ + (*(lis_pc) &= ~_MASK(16), *(lis_pc) |= _HI(dest), \ + *(ori_pc) &= ~_MASK(16), *(ori_pc) |= _LO(dest)) \ /* Patch a branch instruction */ -#define jit_patch_branch(jump_pc) \ +#define jit_patch_branch(jump_pc,pv) \ (*(jump_pc) &= ~_MASK(16) | 3, \ - *(jump_pc) |= (_jit_UL(_jit.x.pc) - _jit_UL(jump_pc)) & _MASK(16)) + *(jump_pc) |= (_jit_UL(pv) - _jit_UL(jump_pc)) & _MASK(16)) +#define jit_patch_ucbranch(jump_pc,pv) \ + (*(jump_pc) &= ~_MASK(26) | 3, \ + (*(jump_pc) |= (_jit_UL((pv)) - _jit_UL(jump_pc)) & _MASK(26))) + +#define _jit_b_encoding (18 << 26) #define _jit_blr_encoding ((19 << 26) | (20 << 21) | (00 << 16) | (00 << 11) | (16 << 1)) +#define _jit_is_ucbranch(a) (((*(a) & (63<<26)) == _jit_b_encoding)) -#define jit_patch(jump_pc) ( \ +#define jit_patch_at(jump_pc, value) ( \ ((*(jump_pc - 1) & ~1) == _jit_blr_encoding) \ - ? jit_patch_movei(((jump_pc) - 4), ((jump_pc) - 3)) \ - : jit_patch_branch((jump_pc) - 1)) + ? jit_patch_movei(((jump_pc) - 4), ((jump_pc) - 3), (value)) \ + : ( _jit_is_ucbranch((jump_pc) - 1) \ + ? jit_patch_ucbranch((jump_pc) - 1, (value)) \ + : jit_patch_branch((jump_pc) - 1, (value)))) +#define jit_patch_movi(movi_pc, val) \ + jit_patch_movei((movi_pc) - 2, (movi_pc) - 1, (val)) + +#define jit_arg_c() (_jitl.nextarg_geti--) +#define jit_arg_i() (_jitl.nextarg_geti--) +#define jit_arg_l() (_jitl.nextarg_geti--) +#define jit_arg_p() (_jitl.nextarg_geti--) +#define jit_arg_s() (_jitl.nextarg_geti--) +#define jit_arg_uc() (_jitl.nextarg_geti--) +#define jit_arg_ui() (_jitl.nextarg_geti--) +#define jit_arg_ul() (_jitl.nextarg_geti--) +#define jit_arg_us() (_jitl.nextarg_geti--) + +/* Check Mach-O-Runtime documentation: Must skip GPR(s) whenever "corresponding" FPR is used */ +#define jit_arg_f() (_jitl.nextarg_geti-- ,_jitl.nextarg_getd++) +#define jit_arg_d() (_jitl.nextarg_geti-=2,_jitl.nextarg_getd++) -#define jit_arg_c() (_jitl.nextarg_get--) -#define jit_arg_i() (_jitl.nextarg_get--) -#define jit_arg_l() (_jitl.nextarg_get--) -#define jit_arg_p() (_jitl.nextarg_get--) -#define jit_arg_s() (_jitl.nextarg_get--) -#define jit_arg_uc() (_jitl.nextarg_get--) -#define jit_arg_ui() (_jitl.nextarg_get--) -#define jit_arg_ul() (_jitl.nextarg_get--) -#define jit_arg_us() (_jitl.nextarg_get--) #define jit_addi_i(d, rs, is) jit_chk_ims((is), ADDICrri((d), (rs), (is)), ADDrrr((d), (rs), JIT_AUX)) #define jit_addr_i(d, s1, s2) ADDrrr((d), (s1), (s2)) #define jit_addci_i(d, rs, is) jit_chk_ims((is), ADDICrri((d), (rs), (is)), ADDCrrr((d), (rs), JIT_AUX)) @@ -159,11 +176,12 @@ struct jit_local_state { #define jit_bosubi_ui(label, rs, is) (jit_chk_ims ((is), SUBICri((rs), (rs), is), SUBCrr((rs), JIT_AUX)), MCRXRi(0), BEQi((label)), _jit.x.pc) #define jit_boaddr_ui(label, s1, s2) ( ADDCrr((s1), (s1), (s2)), MCRXRi(0), BEQi((label)), _jit.x.pc) #define jit_bosubr_ui(label, s1, s2) ( SUBCrr((s1), (s1), (s2)), MCRXRi(0), BEQi((label)), _jit.x.pc) -#define jit_calli(label) (jit_movei(JIT_AUX, (label)), MTLRr(JIT_AUX), BLRL(), _jit.x.pc) -#define jit_divi_i(d, rs, is) jit_chk_ims(1111111, 0, DIVWrrr ((d), (rs), JIT_AUX)) -#define jit_divi_ui(d, rs, is) jit_chk_imu(1111111, 0, DIVWUrrr((d), (rs), JIT_AUX)) -#define jit_divr_i(d, s1, s2) DIVWrrr ((d), (s1), (s2)) -#define jit_divr_ui(d, s1, s2) DIVWUrrr((d), (s1), (s2)) +#define jit_calli(label) (jit_movi_p(JIT_AUX, (label)), MTCTRr(JIT_AUX), BCTRL(), _jitl.nextarg_puti = _jitl.nextarg_putf = _jitl.nextarg_putd = 0, _jit.x.pc) +#define jit_callr(reg) (MTCTRr(reg), BCTRL()) +#define jit_divi_i(d, rs, is) jit_big_ims((is), DIVWrrr ((d), (rs), JIT_AUX)) +#define jit_divi_ui(d, rs, is) jit_big_imu((is), DIVWUrrr((d), (rs), JIT_AUX)) +#define jit_divr_i(d, s1, s2) DIVWrrr ((d), (s1), (s2)) +#define jit_divr_ui(d, s1, s2) DIVWUrrr((d), (s1), (s2)) #define jit_eqi_i(d, rs, is) (jit_chk_ims((is), SUBIrri(JIT_AUX, (rs), (is)), SUBrrr(JIT_AUX, (rs), JIT_AUX)), SUBFICrri((d), JIT_AUX, 0), ADDErrr((d), (d), JIT_AUX)) #define jit_eqr_i(d, s1, s2) (SUBrrr(JIT_AUX, (s1), (s2)), SUBFICrri((d), JIT_AUX, 0), ADDErrr((d), (d), JIT_AUX)) #define jit_extr_c_i(d, rs) EXTSBrr((d), (rs)) @@ -176,8 +194,8 @@ struct jit_local_state { #define jit_gti_ui(d, rs, is) jit_ubooli ((d), (rs), (is), _gt) #define jit_gtr_i(d, s1, s2) jit_sboolr ((d), (s1), (s2), _gt) #define jit_gtr_ui(d, s1, s2) jit_uboolr ((d), (s1), (s2), _gt) -#define jit_hmuli_i(d, rs, is) jit_chk_ims(1111111, 0, MULHWrrr ((d), (rs), JIT_AUX)) -#define jit_hmuli_ui(d, rs, is) jit_chk_imu(1111111, 0, MULHWUrrr((d), (rs), JIT_AUX)) +#define jit_hmuli_i(d, rs, is) jit_big_ims((is), MULHWrrr ((d), (rs), JIT_AUX)) +#define jit_hmuli_ui(d, rs, is) jit_big_imu((is), MULHWUrrr((d), (rs), JIT_AUX)) #define jit_hmulr_i(d, s1, s2) MULHWrrr ((d), (s1), (s2)) #define jit_hmulr_ui(d, s1, s2) MULHWUrrr((d), (s1), (s2)) #define jit_jmpi(label) (B_EXT((label)), _jit.x.pc) @@ -197,16 +215,18 @@ struct jit_local_state { #define jit_ler_i(d, s1, s2) jit_sboolr2((d), (s1), (s2), _gt ) #define jit_ler_ui(d, s1, s2) jit_uboolr2((d), (s1), (s2), _gt ) #define jit_lshi_i(d, rs, is) SLWIrri((d), (rs), (is)) -#define jit_lshr_i(d, s1, s2) (ANDIrri(JIT_AUX, (s2), 31), SLWrrr ((d), (s1), JIT_AUX)) +#define jit_lshr_i(d, s1, s2) (ANDI_rri(JIT_AUX, (s2), 31), SLWrrr ((d), (s1), JIT_AUX)) #define jit_lti_i(d, rs, is) jit_sbooli ((d), (rs), (is), _lt ) #define jit_lti_ui(d, rs, is) jit_ubooli ((d), (rs), (is), _lt ) #define jit_ltr_i(d, s1, s2) jit_sboolr ((d), (s1), (s2), _lt ) #define jit_ltr_ui(d, s1, s2) jit_uboolr ((d), (s1), (s2), _lt ) -#define jit_modi_i(d, rs, is) _jit_mod(jit_divi_i (31, (rs), JIT_AUX), (is)) -#define jit_modi_ui(d, rs, is) _jit_mod(jit_divi_ui(31, (rs), JIT_AUX), (irs)) +#define jit_modi_i(d, rs, is) _jit_mod(jit_divi_i (31, (rs), JIT_AUX), (rs), (is)) +#define jit_modi_ui(d, rs, is) _jit_mod(jit_divi_ui(31, (rs), JIT_AUX), (rs), (is)) #define jit_modr_i(d, s1, s2) (DIVWrrr(JIT_AUX, (s1), (s2)), MULLWrrr(JIT_AUX, JIT_AUX, (s2)), SUBrrr((d), (s1), JIT_AUX)) #define jit_modr_ui(d, s1, s2) (DIVWUrrr(JIT_AUX, (s1), (s2)), MULLWrrr(JIT_AUX, JIT_AUX, (s2)), SUBrrr((d), (s1), JIT_AUX)) #define jit_movi_i(d, is) MOVEIri((d), (is)) +#define jit_movi_p(d, is) (LISri((d), _HI((is))),ORIrri((d),(d),_LO((is))),_jit.x.pc) + #define jit_movr_i(d, rs) MRrr((d), (rs)) #define jit_muli_i(d, rs, is) jit_chk_ims ((is), MULLIrri((d), (rs), (is)), MULLWrrr((d), (rs), JIT_AUX)) #define jit_muli_ui(d, rs, is) jit_chk_imu15((is), MULLIrri((d), (rs), (is)), MULLWrrr((d), (rs), JIT_AUX)) @@ -218,17 +238,19 @@ struct jit_local_state { #define jit_ori_i(d, rs, is) jit_chk_imu((is), ORIrri((d), (rs), (is)), ORrrr((d), (rs), JIT_AUX)) #define jit_orr_i(d, s1, s2) ORrrr((d), (s1), (s2)) #define jit_popr_i(rs) (LWZrm((rs), 0, 1), ADDIrri(1, 1, 4)) -#define jitfp_prepare(numi, numf, numd) (_jitl.nextarg_put = 3 + (numi) + (numf) + 2*(numd)) +#define jit_prepare_i(numi) (_jitl.nextarg_puti = numi) +#define jit_prepare_f(numf) (_jitl.nextarg_putf = numf) +#define jit_prepare_d(numd) (_jitl.nextarg_putd = numd) #define jit_prolog(n) _jit_prolog(&_jit, (n)) #define jit_pushr_i(rs) STWUrm((rs), -4, 1) -#define jit_pusharg_i(rs) (--_jitl.nextarg_put, MRrr(_jitl.nextarg_put, (rs))) -#define jit_ret() jit_jmpr(31) -#define jit_retval(rd) MRrr((rd), 3) +#define jit_pusharg_i(rs) (--_jitl.nextarg_puti, MRrr((3 + _jitl.nextarg_putd * 2 + _jitl.nextarg_putf + _jitl.nextarg_puti), (rs))) +#define jit_ret() _jit_epilog(&_jit) +#define jit_retval_i(rd) MRrr((rd), 3) #define jit_rsbi_i(d, rs, is) jit_chk_ims((is), SUBFICrri((d), (rs), (is)), SUBFCrrr((d), (rs), JIT_AUX)) #define jit_rshi_i(d, rs, is) SRAWIrri((d), (rs), (is)) #define jit_rshi_ui(d, rs, is) SRWIrri ((d), (rs), (is)) -#define jit_rshr_i(d, s1, s2) (ANDIrrr(JIT_AUX, (s2), 31), SRAWrrr ((d), (s1), JIT_AUX)) -#define jit_rshr_ui(d, s1, s2) (ANDIrrr(JIT_AUX, (s2), 31), SRWrrr ((d), (s1), JIT_AUX)) +#define jit_rshr_i(d, s1, s2) (ANDI_rri(JIT_AUX, (s2), 31), SRAWrrr ((d), (s1), JIT_AUX)) +#define jit_rshr_ui(d, s1, s2) (ANDI_rri(JIT_AUX, (s2), 31), SRWrrr ((d), (s1), JIT_AUX)) #define jit_stxi_c(id, rd, rs) jit_chk_ims((id), STBrm((rs), (id), (rd)), STBrx((rs), (rd), JIT_AUX)) #define jit_stxi_i(id, rd, rs) jit_chk_ims((id), STWrm((rs), (id), (rd)), STWrx((rs), (rd), JIT_AUX)) #define jit_stxi_s(id, rd, rs) jit_chk_ims((id), STHrm((rs), (id), (rd)), STHrx((rs), (rd), JIT_AUX)) @@ -237,8 +259,8 @@ struct jit_local_state { #define jit_stxr_s(d1, d2, rs) STHrx((rs), (d1), (d2)) #define jit_subr_i(d, s1, s2) SUBrrr((d), (s1), (s2)) #define jit_subcr_i(d, s1, s2) SUBCrrr((d), (s1), (s2)) -#define jit_subxi_i(d, rs, is) jit_chk_ims(111111111, 0, SUBErrr((d), (rs), JIT_AUX)) -#define jit_subxr_i(d, s1, s2) SUBErrr((d), (s1), (s2)) +#define jit_subxi_i(d, rs, is) jit_big_ims((is), SUBErrr((d), (rs), JIT_AUX)) +#define jit_subxr_i(d, s1, s2) SUBErrr((d), (s1), (s2)) #define jit_xori_i(d, rs, is) jit_chk_imu((is), XORIrri((d), (rs), (is)), XORrrr((d), (rs), JIT_AUX)) #define jit_xorr_i(d, s1, s2) XORrrr((d), (s1), (s2)) diff --git a/lightning/ppc/fp.h b/lightning/ppc/fp.h index 176719539..911882d77 100644 --- a/lightning/ppc/fp.h +++ b/lightning/ppc/fp.h @@ -35,70 +35,177 @@ #ifndef __lightning_asm_fp_h #define __lightning_asm_fp_h -#if 0 -/* dummy for now */ +#define JIT_FPR_NUM 6 +#define JIT_FPR(i) (8+(i)) -#define jit_add_two(reg0) FADDrrr(13 - (reg0), 13 - (reg0), 12 - (reg0)) -#define jit_sub_two(reg0) FSUBrrr(13 - (reg0), 13 - (reg0), 12 - (reg0)) -#define jit_mul_two(reg0) FMULrrr(13 - (reg0), 13 - (reg0), 12 - (reg0)) -#define jit_div_two(reg0) FDIVrrr(13 - (reg0), 13 - (reg0), 12 - (reg0)) - -#define jit_abs(reg0) FABSr(13 - (reg0)) -#define jit_sqrt(reg0) FSQRTr(13 - (reg0)) -#define jit_neg(reg0) FNEGr(13 - (reg0)) - -#define jit_ldxi_f(reg0, rs, is) 0 -#define jit_ldxr_f(reg0, s1, s2) 0 -#define jit_ldxi_d(reg0, rs, is) 0 -#define jit_ldxr_d(reg0, s1, s2) 0 -#define jit_ldi_f(reg0, is) 0 -#define jit_ldr_f(reg0, rs) 0 -#define jit_ldi_d(reg0, is) 0 -#define jit_ldr_d(reg0, rs) 0 -#define jit_stxi_f(id, rd, reg0) 0 -#define jit_stxr_f(d1, d2, reg0) 0 -#define jit_stxi_d(id, rd, reg0) 0 -#define jit_stxr_d(d1, d2, reg0) 0 -#define jit_sti_f(id, reg0) 0 -#define jit_str_f(rd, reg0) 0 -#define jit_sti_d(id, reg0) 0 -#define jit_str_d(rd, reg0) 0 +#define JIT_FPFR 0 /* Make space for 1 or 2 words, store address in REG */ -#define jit_data(REG, D1) (_FBA (18, 8, 0, 1), _jit_L(D1), MFLRr(REG)) -#define jit_data2(REG, D1, D2) (_FBA (18, 12, 0, 1), _jit_L(D1), _jit_L(D2), MFLRr(REG)) +#define jit_data(REG, D1) (_FBA (18, 8, 0, 1), _jit_L(D1), MFLRr(REG)) -#define jit_fpimm(reg0, first, second) \ - (jit_data2(JIT_AUX, (first), (second)), \ - jit_ldxi_d((reg0), JIT_AUX, 0)) +#define jit_addr_d(rd,s1,s2) FADDDrrr((rd),(s1),(s2)) +#define jit_subr_d(rd,s1,s2) FSUBDrrr((rd),(s1),(s2)) +#define jit_mulr_d(rd,s1,s2) FMULDrrr((rd),(s1),(s2)) +#define jit_divr_d(rd,s1,s2) FDIVDrrr((rd),(s1),(s2)) -#define jit_floor(rd, reg0) jit_call_fp((rd), (reg0), floor) -#define jit_ceil(rd, reg0) jit_call_fp((rd), (reg0), ceil) +#define jit_addr_f(rd,s1,s2) FADDSrrr((rd),(s1),(s2)) +#define jit_subr_f(rd,s1,s2) FSUBSrrr((rd),(s1),(s2)) +#define jit_mulr_f(rd,s1,s2) FMULSrrr((rd),(s1),(s2)) +#define jit_divr_f(rd,s1,s2) FDIVSrrr((rd),(s1),(s2)) -#define jit_call_fp(rd, reg0, fn) \ - jit_fail(#fn " not supported", __FILE__, __LINE__, __FUNCTION__) -/* pass reg0 as first parameter of rd - bl fn - mr r3, rd */ +#define jit_movr_d(rd,rs) ( (rd) == (rs) ? 0 : FMOVErr((rd),(rs))) +#define jit_movi_d(reg0,d) do { \ + double _v = (d); \ + _FBA (18, 12, 0, 1); \ + memcpy(_jit.x.uc_pc, &_v, sizeof (double)); \ + _jit.x.uc_pc += sizeof (double); \ + MFLRr (JIT_AUX); \ + jit_ldxi_d((reg0), JIT_AUX, 0); \ + } while(0) -#define jit_trunc(rd, reg0) (jit_data((rd), 0), \ - FCTIWZrr(13 - (reg0), 13 - (reg0)), \ - STFIWXrrr(13 - (reg0), 0, (rd)), \ - LWZrm((rd), 0, (rd))) -#define jit_round(rd, reg0) (jit_data((rd), 0), \ - FCTIWrr(13 - (reg0), 13 - (reg0)), \ - STFIWXrrr(13 - (reg0), 0, (rd)), \ - LWZrm((rd), 0, (rd))) - -#define jit_cmp(le, ge, reg0) (FCMPOirr(7, 13 - (reg0), 0), \ - CRORiii(28 + _gt, 28 + _gt, 28 + _eq), \ - CRORiii(28 + _lt, 28 + _lt, 28 + _eq), \ - MFCRr((ge)), \ - EXTRWIrrii((le), (ge), 1, 28 + _lt), \ - EXTRWIrrii((ge), (ge), 1, 28 + _gt)) +#define jit_movr_f(rd,rs) ( (rd) == (rs) ? 0 : FMOVErr((rd),(rs))) +#define jit_movi_f(reg0,f) do { \ + float _v = (f); \ + _FBA (18, 8, 0, 1); \ + memcpy(_jit.x.uc_pc, &_v, sizeof (float)); \ + _jit.x.uc_pc += sizeof (float); \ + MFLRr (JIT_AUX); \ + jit_ldxi_f((reg0), JIT_AUX, 0); \ + } while(0) -#endif + +#define jit_abs_d(rd,rs) FABSrr((rd),(rs)) +#define jit_negr_d(rd,rs) FNEGrr((rd),(rs)) +#define jit_sqrt_d(rd,rs) FSQRTDrr((rd),(rs)) + + +#define jit_ldxi_f(reg0, rs, is) (_siP(16,(is)) ? LFSrri((reg0),(rs),(is)) : (MOVEIri(JIT_AUX,(is)),LFSxrrr((reg0),(rs),JIT_AUX))) +#define jit_ldxi_d(reg0, rs, is) (_siP(16,(is)) ? LFDrri((reg0),(rs),(is)) : (MOVEIri(JIT_AUX,(is)),LFDxrrr((reg0),(rs),JIT_AUX))) +#define jit_ldxr_f(reg0, s1, s2) LFSxrrr((reg0),(s1),(s2)) +#define jit_ldxr_d(reg0, s1, s2) LFDxrrr((reg0),(s1),(s2)) +#define jit_ldi_f(reg0, is) (_siP(16,(is)) ? LFSrri((reg0),0,(is)) : (MOVEIri(JIT_AUX,(is)),LFSrri((reg0),JIT_AUX,0))) +#define jit_ldi_d(reg0, is) (_siP(16,(is)) ? LFDrri((reg0),0,(is)) : (MOVEIri(JIT_AUX,(is)),LFDrri((reg0),JIT_AUX,0))) +#define jit_ldr_f(reg0, rs) LFSrri((reg0),(rs),0) +#define jit_ldr_d(reg0, rs) LFDrri((reg0),(rs),0) +#define jit_stxi_f(id, rd, reg0) (_siP(16,(id)) ? STFSrri((reg0),(rd),(id)) : (MOVEIri(JIT_AUX,(id)),STFSrri((reg0),(rd),JIT_AUX))) +#define jit_stxi_d(id, rd, reg0) (_siP(16,(id)) ? STFDrri((reg0),(rd),(id)) : (MOVEIri(JIT_AUX,(id)),STFDrri((reg0),(rd),JIT_AUX))) +#define jit_stxr_f(d1, d2, reg0) STFSxrrr((reg0),(d1),(d2)) +#define jit_stxr_d(d1, d2, reg0) STFDxrrr((reg0),(d1),(d2)) +#define jit_sti_f(id, reg0) (_siP(16,(id)) ? STFSrri((reg0),0,(id)) : (MOVEIri(JIT_AUX,(id)),STFSrri((reg0),JIT_AUX,0))) +#define jit_sti_d(id, reg0) (_siP(16,(id)) ? STFDrri((reg0),0,(id)) : (MOVEIri(JIT_AUX,(id)),STFDrri((reg0),JIT_AUX,0))) +#define jit_str_f(rd, reg0) STFSrri((reg0),(rd),0) +#define jit_str_d(rd, reg0) STFDrri((reg0),(rd),0) + +#define jit_fpboolr(d, s1, s2, rcbit) ( \ + FCMPOrrr(_cr0,(s1),(s2)), \ + MFCRr((d)), \ + EXTRWIrrii((d), (d), 1, (rcbit))) + +#define jit_fpboolr_neg(d, s1, s2,rcbit) ( \ + FCMPOrrr(_cr0,(s1),(s2)), \ + MFCRr((d)), \ + EXTRWIrrii((d), (d), 1, (rcbit)), \ + XORIrri((d), (d), 1)) + +#define jit_fpboolur(d, s1, s2, rcbit) ( \ + FCMPUrrr(_cr0,(s1),(s2)), \ + MFCRr((d)), \ + EXTRWIrrii((d), (d), 1, (rcbit))) + +#define jit_fpboolur_neg(d, s1, s2,rcbit) ( \ + FCMPUrrr(_cr0,(s1),(s2)), \ + MFCRr((d)), \ + EXTRWIrrii((d), (d), 1, (rcbit)), \ + XORIrri((d), (d), 1)) + +#define jit_fpboolur_or(d, s1, s2, bit1, bit2) (\ + FCMPUrrr(_cr0,(s1),(s2)), \ + CRORiii((bit1), (bit1), (bit2)), \ + MFCRr((d)), \ + EXTRWIrrii((d), (d), 1, (bit1))) + +#define jit_gtr_d(d, s1, s2) jit_fpboolr ((d),(s1),(s2),_gt) +#define jit_ger_d(d, s1, s2) jit_fpboolr_neg((d),(s1),(s2),_lt) +#define jit_ltr_d(d, s1, s2) jit_fpboolr ((d),(s1),(s2),_lt) +#define jit_ler_d(d, s1, s2) jit_fpboolr_neg((d),(s1),(s2),_gt) +#define jit_eqr_d(d, s1, s2) jit_fpboolr ((d),(s1),(s2),_eq) +#define jit_ner_d(d, s1, s2) jit_fpboolr_neg((d),(s1),(s2),_eq) +#define jit_unordr_d(d, s1, s2) jit_fpboolur ((d),(s1),(s2),_un) +#define jit_ordr_d(d, s1, s2) jit_fpboolur_neg((d),(s1),(s2),_un) +#define jit_unler_d(d, s1, s2) jit_fpboolur_neg ((d), (s1), (s2), _gt) +#define jit_unltr_d(d, s1, s2) jit_fpboolur_or ((d), (s1), (s2), _un, _lt) +#define jit_unger_d(d, s1, s2) jit_fpboolur_neg ((d), (s1), (s2), _lt) +#define jit_ungtr_d(d, s1, s2) jit_fpboolur_or ((d), (s1), (s2), _un, _gt) +#define jit_ltgtr_d(d, s1, s2) jit_fpboolur_or ((d), (s1), (s2), _gt, _lt) +#define jit_uneqr_d(d, s1, s2) jit_fpboolur_or ((d), (s1), (s2), _un, _eq) + +#define jit_fpbr(d, s1, s2, rcbit) ( \ + FCMPOrrr(_cr0,(s1),(s2)), \ + BTii ((rcbit), (d))) + +#define jit_fpbr_neg(d, s1, s2,rcbit) ( \ + FCMPOrrr(_cr0,(s1),(s2)), \ + BFii ((rcbit), (d))) + +#define jit_fpbur(d, s1, s2, rcbit) ( \ + FCMPUrrr(_cr0,(s1),(s2)), \ + BTii ((rcbit), (d))) + +#define jit_fpbur_neg(d, s1, s2,rcbit) ( \ + FCMPUrrr(_cr0,(s1),(s2)), \ + BFii ((rcbit), (d))) + +#define jit_fpbur_or(d, s1, s2, bit1, bit2) ( \ + FCMPUrrr(_cr0,(s1),(s2)), \ + CRORiii((bit1), (bit1), (bit2)), \ + BTii ((bit1), (d))) + +#define jit_bgtr_d(d, s1, s2) jit_fpbr ((d),(s1),(s2),_gt) +#define jit_bger_d(d, s1, s2) jit_fpbr_neg((d),(s1),(s2),_lt) +#define jit_bltr_d(d, s1, s2) jit_fpbr ((d),(s1),(s2),_lt) +#define jit_bler_d(d, s1, s2) jit_fpbr_neg((d),(s1),(s2),_gt) +#define jit_beqr_d(d, s1, s2) jit_fpbr ((d),(s1),(s2),_eq) +#define jit_bner_d(d, s1, s2) jit_fpbr_neg((d),(s1),(s2),_eq) +#define jit_bunordr_d(d, s1, s2) jit_fpbur ((d),(s1),(s2),_un) +#define jit_bordr_d(d, s1, s2) jit_fpbur_neg((d),(s1),(s2),_un) +#define jit_bunler_d(d, s1, s2) jit_fpbur_neg ((d), (s1), (s2), _gt) +#define jit_bunltr_d(d, s1, s2) jit_fpbur_or ((d), (s1), (s2), _un, _lt) +#define jit_bunger_d(d, s1, s2) jit_fpbur_neg ((d), (s1), (s2), _lt) +#define jit_bungtr_d(d, s1, s2) jit_fpbur_or ((d), (s1), (s2), _un, _gt) +#define jit_bltgtr_d(d, s1, s2) jit_fpbur_or ((d), (s1), (s2), _gt, _lt) +#define jit_buneqr_d(d, s1, s2) jit_fpbur_or ((d), (s1), (s2), _un, _eq) + +#define jit_getarg_f(rd, ofs) jit_movr_f((rd),(ofs)) +#define jit_getarg_d(rd, ofs) jit_movr_d((rd),(ofs)) +#define jit_pusharg_d(rs) (_jitl.nextarg_putd--,jit_movr_d((_jitl.nextarg_putf+_jitl.nextarg_putd+1), (rs))) +#define jit_pusharg_f(rs) (_jitl.nextarg_putf--,jit_movr_f((_jitl.nextarg_putf+_jitl.nextarg_putd+1), (rs))) +#define jit_retval_d(op1) jit_movr_d(1, (op1)) +#define jit_retval_f(op1) jit_movr_f(1, (op1)) + + +#define jit_floorr_d_i(rd,rs) (MTFSFIri(7,3), \ + FCTIWrr(31,(rs)), \ + MOVEIri(JIT_AUX,-4), \ + STFIWXrrr(31,JIT_SP,JIT_AUX), \ + LWZrm((rd),-4,JIT_SP)) + +#define jit_ceilr_d_i(rd,rs) (MTFSFIri(7,2), \ + FCTIWrr(31,(rs)), \ + MOVEIri(JIT_AUX,-4), \ + STFIWXrrr(31,JIT_SP,JIT_AUX), \ + LWZrm((rd),-4,JIT_SP)) + +#define jit_roundr_d_i(rd,rs) (MTFSFIri(7,0), \ + FCTIWrr(31,(rs)), \ + MOVEIri(JIT_AUX,-4), \ + STFIWXrrr(31,JIT_SP,JIT_AUX), \ + LWZrm((rd),-4,JIT_SP)) + +#define jit_truncr_d_i(rd,rs) (FCTIWZrr(31,(rs)), \ + MOVEIri(JIT_AUX,-4), \ + STFIWXrrr(31,JIT_SP,JIT_AUX), \ + LWZrm((rd),-4,JIT_SP)) #endif /* __lightning_asm_h */ diff --git a/lightning/ppc/funcs.h b/lightning/ppc/funcs.h index 38b6a6552..a4a94b8e1 100644 --- a/lightning/ppc/funcs.h +++ b/lightning/ppc/funcs.h @@ -7,7 +7,7 @@ /*********************************************************************** * - * Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. * Written by Paolo Bonzini. * * This file is part of GNU lightning. @@ -69,13 +69,13 @@ jit_flush_code(void *start, void *end) end -= ((long) end) & (cache_line_size - 1); /* Force data cache write-backs */ - for (ddest = start; ddest <= (char *) end; ddest += cache_line_size) { + for (ddest = (char *) start; ddest <= (char *) end; ddest += cache_line_size) { __asm__ __volatile__ ("dcbst 0,%0" : : "r"(ddest)); } __asm__ __volatile__ ("sync" : : ); /* Now invalidate the instruction cache */ - for (idest = start; idest <= (char *) end; idest += cache_line_size) { + for (idest = (char *) start; idest <= (char *) end; idest += cache_line_size) { __asm__ __volatile__ ("icbi 0,%0" : : "r"(idest)); } __asm__ __volatile__ ("isync" : : ); @@ -85,75 +85,78 @@ jit_flush_code(void *start, void *end) #define _jit (*jit) -/* Emit a trampoline for a function. - * Upon entrance to the trampoline: - * - R0 = return address for the function - * - LR = address where the real code for the function lies - * - R3-R8 = parameters - * After jumping to the address pointed to by R10: - * - LR = address where the epilog lies (the function must return there) - * - R25-R20 = parameters (order is reversed, 1st argument is R25) - */ -static jit_insn * -_jit_trampoline(jit, n) - register jit_state *jit; - register int n; +static void +_jit_epilog(jit_state *jit) { - static jit_insn trampolines[200]; - static jit_insn *p_trampolines[6], *free = trampolines; - jit_insn *trampo; - int i, ofs, frame_size; + int n = _jitl.nbArgs; + int frame_size, i, ofs; + int first_saved_reg = JIT_AUX - n; + int num_saved_regs = 32 - first_saved_reg; - if (!p_trampolines[n]) { - _jit.x.pc = trampo = p_trampolines[n] = free; + frame_size = 24 + 32 + num_saved_regs * 4; /* r24..r31 + args */ + frame_size += 15; /* the stack must be quad-word */ + frame_size &= ~15; /* aligned */ - frame_size = 24 + (6 + n) * 4; /* r26..r31 + args */ - frame_size += 15; /* the stack must be quad-word */ - frame_size &= ~15; /* aligned */ +#ifdef _CALL_DARWIN + LWZrm(0, frame_size + 8, 1); /* lwz r0, x+8(r1) (ret.addr.) */ +#else + LWZrm(0, frame_size + 4, 1); /* lwz r0, x+4(r1) (ret.addr.) */ +#endif + MTLRr(0); /* mtspr LR, r0 */ - STWUrm(1, -frame_size, 1); /* stwu r1, -x(r1) */ - - for (ofs = frame_size - (6 + n) * 4, i = 26 - n; i <= 31; ofs += 4, i++) { - STWrm(i, ofs, 1); /* stw rI, ofs(r1) */ - } - STWrm(0, ofs+4, 1); /* stw r0, x(r1) */ - for (i = 0; i < n; i++) { - MRrr(25-i, 3+i); /* save parameters in r25..r20 */ - } - BLRL(); /* blrl */ - LWZrm(0, ofs+4, 1); /* lwz r0, x(r1) (ret.addr.) */ - MTLRr(0); /* mtspr LR, r0 */ - - for (ofs = frame_size - (6 + n) * 4, i = 26 - n; i <= 31; ofs += 4, i++) { - LWZrm(i, ofs, 1); /* lwz rI, ofs(r1) */ - } - ADDIrri(1, 1, frame_size); /* addi r1, r1, x */ - BLR(); /* blr */ - - jit_flush_code(trampo, _jit.x.pc); - free = _jit.x.pc; - } - - return p_trampolines[n]; + ofs = frame_size - num_saved_regs * 4; + LMWrm(first_saved_reg, ofs, 1); /* lmw rI, ofs(r1) */ + ADDIrri(1, 1, frame_size); /* addi r1, r1, x */ + BLR(); /* blr */ } +/* Emit a prolog for a function. + Upon entrance to the trampoline: + - LR = address where the real code for the function lies + - R3-R8 = parameters + Upon finishing the trampoline: + - R0 = return address for the function + - R25-R20 = parameters (order is reversed, 1st argument is R25) + + The +32 in frame_size computation is to accound for the parameter area of + a function frame. + + On PPC the frame must have space to host the arguments of any callee. + However, as it currently stands, the argument to jit_trampoline (n) is + the number of arguments of the caller we generate. Therefore, the + callee can overwrite a part of the stack (saved register area when it + flushes its own parameter on the stack. The addition of a constant + offset = 32 is enough to hold eight 4 bytes arguments. This is less + than perfect but is a reasonable work around for now. + Better solution must be investigated. */ static void -_jit_prolog(jit, n) - register jit_state *jit; - register int n; +_jit_prolog(jit_state *jit, int n) { - register jit_insn *save_pc, *trampo; + int frame_size; + int ofs, i; + int first_saved_reg = JIT_AUX - n; + int num_saved_regs = 32 - first_saved_reg; - save_pc = _jit.x.pc; - trampo = _jit_trampoline(jit, n); - _jit.x.pc = save_pc; + _jitl.nextarg_geti = JIT_AUX - 1; + _jitl.nextarg_getd = 1; + _jitl.nbArgs = n; + + frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args */ + frame_size += 15; /* the stack must be quad-word */ + frame_size &= ~15; /* aligned */ - _jitl.nextarg_get = 25; MFLRr(0); - MOVEIri(10, trampo); - MTLRr(10); - BLRL(); /* blrl */ - MFLRr(31); /* mflr r31 */ + STWUrm(1, -frame_size, 1); /* stwu r1, -x(r1) */ + + ofs = frame_size - num_saved_regs * 4; + STMWrm(first_saved_reg, ofs, 1); /* stmw rI, ofs(r1) */ +#ifdef _CALL_DARWIN + STWrm(0, frame_size + 8, 1); /* stw r0, x+8(r1) */ +#else + STWrm(0, frame_size + 4, 1); /* stw r0, x+4(r1) */ +#endif + for (i = 0; i < n; i++) + MRrr(JIT_AUX-1-i, 3+i); /* save parameters below r24 */ } #undef _jit diff --git a/lightning/sparc/asm.h b/lightning/sparc/asm.h index 808bb4ad3..a8c544c4f 100644 --- a/lightning/sparc/asm.h +++ b/lightning/sparc/asm.h @@ -49,6 +49,7 @@ typedef unsigned int jit_insn; +#ifndef LIGHTNING_DEBUG #define _d30(BD) ((_jit_UL(BD) - _jit_UL(_jit.x.pc))>>2) #define _d22(BD) _ck_d(22, _d30(BD)) @@ -82,6 +83,9 @@ typedef unsigned int jit_insn; #define _3( RD, OP3, RS1, I, ASI, RS2) _jit_I((3<<30)| (_u5(RD)<<25)|(_u6(OP3)<<19)|(_u5(RS1)<<14)|(_u1(I)<<13)|(_u8(ASI)<<5)|_u5 (RS2)) #define _3i(RD, OP3, RS1, I, IMM) _jit_I((3<<30)| (_u5(RD)<<25)|(_u6(OP3)<<19)|(_u5(RS1)<<14)|(_u1(I)<<13)| _s13(IMM)) +#define _FP1(RD, RS1, OPF, RS2) _2f((RD), 52, (RS1), (OPF), (RS2)) +#define _FP2(RD, RS1, OPF, RS2) _2f((RD), 53, (RS1), (OPF), (RS2)) + /* basic instructions [Section B, page 87] */ #define ADDrrr(RS1, RS2, RD) _2 ((RD), 0, (RS1), 0, 0, (RS2)) @@ -300,4 +304,80 @@ typedef unsigned int jit_insn; #define WRii(IMM, RD) WRrii(0, (IMM), (RD)) #define WRri(RS2, RD) WRrri(0, (RS2), (RD)) +#define LDFSRx(RS1, RS2) _3 (0, 33, (RS1), 0, 0, (RS2)) +#define LDFSRm(RS1, IMM) _3i (0, 33, (RS1), 1, (IMM)) +#define STFSRx(RD1, RD2) _3 (0, 37, (RD1), 0, 0, (RD2)) +#define STFSRm(RD, IMM) _3i (0, 37, (RD), 1, (IMM)) + +#define FITODrr(FRS, FRD) _FP1((FRD), 0, 200, (FRS)) +#define FITOSrr(FRS, FRD) _FP1((FRD), 0, 196, (FRS)) +#define FDTOIrr(FRS, FRD) _FP1((FRD), 0, 210, (FRS)) +#define FSTOIrr(FRS, FRD) _FP1((FRD), 0, 209, (FRS)) +#define FSTODrr(FRS, FRD) _FP1((FRD), 0, 201, (FRS)) +#define FDTOSrr(FRS, FRD) _FP1((FRD), 0, 198, (FRS)) +#define FMOVSrr(FRS, FRD) _FP1((FRD), 0, 1, (FRS)) +#define FNEGSrr(FRS, FRD) _FP1((FRD), 0, 5, (FRS)) +#define FABSSrr(FRS, FRD) _FP1((FRD), 0, 9, (FRS)) +#define FMOVDrr(FRS, FRD) _FP1((FRD), 0, 2, (FRS)) +#define FNEGDrr(FRS, FRD) _FP1((FRD), 0, 6, (FRS)) +#define FABSDrr(FRS, FRD) _FP1((FRD), 0, 10, (FRS)) +#define FSQRTDrr(FRS, FRD) _FP1((FRD), 0, 42, (FRS)) +#define FSQRTSrr(FRS, FRD) _FP1((FRD), 0, 41, (FRS)) + +#define FADDSrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 65, (FRS2)) +#define FSUBSrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 69, (FRS2)) +#define FMULSrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 73, (FRS2)) +#define FDIVSrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 77, (FRS2)) + +#define FADDDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 66, (FRS2)) +#define FSUBDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 70, (FRS2)) +#define FMULDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 74, (FRS2)) +#define FDIVDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 78, (FRS2)) + +#define FCMPSrr(FRS1, FRS2) _FP2(0, (FRS1), 81, (FRS2)) +#define FCMPDrr(FRS1, FRS2) _FP2(0, (FRS1), 82, (FRS2)) + +#define LDFxr(RS1, RS2, RD) _3 ((RD), 32, (RS1), 0, 0, (RS2)) +#define LDFmr(RS1, IMM, RD) _3i ((RD), 32, (RS1), 1, (IMM)) +#define LDDFxr(RS1, RS2, RD) _3 ((RD), 35, (RS1), 0, 0, (RS2)) +#define LDDFmr(RS1, IMM, RD) _3i ((RD), 35, (RS1), 1, (IMM)) +#define STFrx(RS, RD1, RD2) _3 ((RS), 36, (RD1), 0, 0, (RD2)) +#define STFrm(RS, RD1, IMM) _3i ((RS), 36, (RD1), 1, (IMM)) +#define STDFrx(RS, RD1, RD2) _3 ((RS), 39, (RD1), 0, 0, (RD2)) +#define STDFrm(RS, RD1, IMM) _3i ((RS), 39, (RD1), 1, (IMM)) + +#define FBNi(DISP) _0 (0, 0, 6, (DISP)) +#define FBN_Ai(DISP) _0 (1, 0, 6, (DISP)) +#define FBNEi(DISP) _0 (0, 1, 6, (DISP)) +#define FBNE_Ai(DISP) _0 (1, 1, 6, (DISP)) +#define FBLGi(DISP) _0 (0, 2, 6, (DISP)) +#define FBLG_Ai(DISP) _0 (1, 2, 6, (DISP)) +#define FBULi(DISP) _0 (0, 3, 6, (DISP)) +#define FBUL_Ai(DISP) _0 (1, 3, 6, (DISP)) +#define FBLi(DISP) _0 (0, 4, 6, (DISP)) +#define FBL_Ai(DISP) _0 (1, 4, 6, (DISP)) +#define FBUGi(DISP) _0 (0, 5, 6, (DISP)) +#define FBUG_Ai(DISP) _0 (1, 5, 6, (DISP)) +#define FBGi(DISP) _0 (0, 6, 6, (DISP)) +#define FBG_Ai(DISP) _0 (1, 6, 6, (DISP)) +#define FBUi(DISP) _0 (0, 7, 6, (DISP)) +#define FBU_Ai(DISP) _0 (1, 7, 6, (DISP)) +#define FBAi(DISP) _0 (0, 8, 6, (DISP)) +#define FBA_Ai(DISP) _0 (1, 8, 6, (DISP)) +#define FBEi(DISP) _0 (0, 9, 6, (DISP)) +#define FBE_Ai(DISP) _0 (1, 9, 6, (DISP)) +#define FBUEi(DISP) _0 (0, 10, 6, (DISP)) +#define FBUE_Ai(DISP) _0 (1, 10, 6, (DISP)) +#define FBGEi(DISP) _0 (0, 11, 6, (DISP)) +#define FBGE_Ai(DISP) _0 (1, 11, 6, (DISP)) +#define FBUGEi(DISP) _0 (0, 12, 6, (DISP)) +#define FBUGE_Ai(DISP) _0 (1, 12, 6, (DISP)) +#define FBLEi(DISP) _0 (0, 13, 6, (DISP)) +#define FBLE_Ai(DISP) _0 (1, 13, 6, (DISP)) +#define FBULEi(DISP) _0 (0, 14, 6, (DISP)) +#define FBULE_Ai(DISP) _0 (1, 14, 6, (DISP)) +#define FBOi(DISP) _0 (0, 15, 6, (DISP)) +#define FBO_Ai(DISP) _0 (1, 15, 6, (DISP)) + +#endif #endif /* __ccg_asm_sparc_h */ diff --git a/lightning/sparc/core.h b/lightning/sparc/core.h index b70259e9b..7912a3b18 100644 --- a/lightning/sparc/core.h +++ b/lightning/sparc/core.h @@ -33,14 +33,13 @@ #ifndef __lightning_core_h #define __lightning_core_h -#define JIT_R0 _Rl(0) -#define JIT_R1 _Rl(1) -#define JIT_R2 _Rl(2) -#define JIT_V0 _Rl(3) -#define JIT_V1 _Rl(4) -#define JIT_V2 _Rl(5) +#define JIT_R_NUM 3 +#define JIT_V_NUM 6 +#define JIT_R(i) ((i) ? _Rl((i) - 1) : _Rg(2)) +#define JIT_V(i) _Rl((i)+2) + #define JIT_BIG _Rg(1) /* %g1 used to make 32-bit operands */ -#define JIT_BIG2 _Rg(2) /* %g2 used to make 32-bit compare operands */ +#define JIT_BIG2 _Ro(7) /* %o7 used to make 32-bit compare operands */ #define JIT_SP _Ro(6) #define JIT_RZERO _Rg(0) #define JIT_RET _Ri(0) @@ -94,10 +93,18 @@ struct jit_local_state { #define jit_prepare_y(rs, is) (SRArir(rs, 31, JIT_BIG), WRri(JIT_BIG, _y), NOP(), NOP(), NOP(), _jit.x.pc -= jit_immsize(is)) #define jit_clr_y(rs, is) ( WRri(0, _y), NOP(), NOP(), NOP(), _jit.x.pc -= jit_immsize(is)) -#define jit_mod(div, mul, d, s1, s2) ( \ - div (JIT_BIG2, s1, s2), \ - mul (JIT_BIG2, JIT_BIG2, s2), \ - jit_subr_i (d, s1, JIT_BIG2)) +#define jit_modr(jit_div, jit_mul, d, s1, s2) \ + (jit_div (JIT_BIG, s1, s2), \ + jit_mul (JIT_BIG, JIT_BIG, s2), \ + jit_subr_i (d, s1, JIT_BIG)) + +#define jit_modi(jit_divi, jit_muli, jit_divr, jit_mulr, d, rs, is) \ + (_siP(13,(imm)) \ + ? (jit_divi (JIT_BIG, rs, is), \ + jit_muli (JIT_BIG, JIT_BIG, is), \ + jit_subr_i (d, rs, JIT_BIG)) \ + : (SETir ((is), JIT_BIG2), \ + jit_modr (jit_divr, jit_mulr, d, rs, JIT_BIG2))) /* How many instruction are needed to put imm in a register. */ #define jit_immsize(imm) (!(imm) ? 0 : \ @@ -107,10 +114,16 @@ struct jit_local_state { /* branch instructions return the address of the *delay* instruction -- this * is just a helper macro that makes jit_patch more readable. */ -#define jit_patch_(jump_pc) \ +#define jit_patch_(jump_pc,pv) \ (*jump_pc &= ~_MASK(22), \ - *jump_pc |= ((_jit_UL(_jit.x.pc) - _jit_UL(jump_pc)) >> 2) & _MASK(22)) + *jump_pc |= ((_jit_UL((pv)) - _jit_UL(jump_pc)) >> 2) & _MASK(22)) +#define jit_patch_set(sethi_pc, or_pc, dest) \ + (*(sethi_pc) &= ~_MASK(22), *(sethi_pc) |= _HI(dest), \ + *(or_pc) &= ~_MASK(13), *(or_pc) |= _LO(dest)) \ + +#define jit_patch_movi(movi_pc, val) \ + jit_patch_set((movi_pc) - 2, (movi_pc) - 1, (val)) #define jit_arg_c() (_jitl.nextarg_get++) #define jit_arg_i() (_jitl.nextarg_get++) @@ -162,8 +175,10 @@ struct jit_local_state { #define jit_boaddr_ui(label, s1, s2) ( ADDCCrrr((s1), (s2), (s1)), BCSi((label)), NOP(), _jit.x.pc - 1) #define jit_bosubr_ui(label, s1, s2) ( SUBCCrrr((s1), (s2), (s1)), BCSi((label)), NOP(), _jit.x.pc - 1) #define jit_calli(label) (CALLi(label), NOP(), _jit.x.pc - 1) +#define jit_callr(reg) (CALLx((reg), 0), NOP()) + #define jit_divi_i(d, rs, is) (jit_prepare_y((rs), 0x12345678), SETir((is), JIT_BIG), SDIVrrr((rs), JIT_BIG, (d)) ) -#define jit_divi_ui(d, rs, is) (jit_clr_y((rs)), 0x12345678), SETir((is), JIT_BIG), UDIVrrr((rs), JIT_BIG, (d)) ) +#define jit_divi_ui(d, rs, is) (jit_clr_y((rs), 0x12345678), SETir((is), JIT_BIG), UDIVrrr((rs), JIT_BIG, (d)) ) #define jit_divr_i(d, s1, s2) (jit_prepare_y((s1), 0), SDIVrrr((s1), (s2), (d))) #define jit_divr_ui(d, s1, s2) (jit_clr_y((s1), 0), UDIVrrr((s1), (s2), (d))) #define jit_eqi_i(d, rs, is) jit_chk_imm((is), \ @@ -208,11 +223,12 @@ struct jit_local_state { #define jit_lti_ui(d, rs, is) jit_booli ((d), (rs), (is), BLUi(_jit.x.pc + 3) ) #define jit_ltr_i(d, s1, s2) jit_boolr ((d), (s1), (s2), BLi(_jit.x.pc + 3) ) #define jit_ltr_ui(d, s1, s2) jit_boolr ((d), (s1), (s2), BLUi(_jit.x.pc + 3) ) -#define jit_modi_i(d, rs, is) jit_modi(jit_divi_i, jit_muli_i, (d), (rs), (is)) -#define jit_modi_ui(d, rs, is) jit_modi(jit_divi_i, jit_muli_i, (d), (rs), (is)) -#define jit_modr_i(d, s1, s2) jit_modr(jit_divr_i, jit_mulr_i, (d), (s1), (s2)) -#define jit_modr_ui(d, s1, s2) jit_modr(jit_divr_i, jit_mulr_i, (d), (s1), (s2)) +#define jit_modi_i(d, rs, is) jit_modi(jit_divi_i, jit_muli_i, jit_divr_i, jit_mulr_i, (d), (rs), (is)) +#define jit_modi_ui(d, rs, is) jit_modi(jit_divi_ui, jit_muli_ui, jit_divr_ui, jit_mulr_ui, (d), (rs), (is)) +#define jit_modr_i(d, s1, s2) jit_modr(jit_divr_i, jit_mulr_i, (d), (s1), (s2)) +#define jit_modr_ui(d, s1, s2) jit_modr(jit_divr_ui, jit_mulr_ui, (d), (s1), (s2)) #define jit_movi_i(d, is) SETir((is), (d)) +#define jit_movi_p(d, is) (SETir2(_HI((is)), _LO((is)), (d)), _jit.x.pc) #define jit_movr_i(d, rs) MOVrr((rs), (d)) #define jit_muli_i(d, rs, is) jit_chk_imm((is), SMULrir((rs), (is), (d)), SMULrrr((rs), JIT_BIG, (d))) #define jit_muli_ui(d, rs, is) jit_chk_imm((is), UMULrir((rs), (is), (d)), UMULrrr((rs), JIT_BIG, (d))) @@ -221,14 +237,14 @@ struct jit_local_state { #define jit_nop() NOP() #define jit_ori_i(d, rs, is) jit_chk_imm((is), ORrir((rs), (is), (d)), ORrrr((rs), JIT_BIG, (d))) #define jit_orr_i(d, s1, s2) ORrrr((s1), (s2), (d)) -#define jit_patch(delay_pc) jit_patch_ ( ((delay_pc) - 1) ) +#define jit_patch_at(delay_pc, pv) jit_patch_ (((delay_pc) - 1) , (pv)) #define jit_popr_i(rs) (LDmr(JIT_SP, 0, (rs)), ADDrir(JIT_SP, 8, JIT_SP)) -#define jitfp_prepare(numargs, nf, nd) (_jitl.nextarg_put = (numargs)) -#define jit_prolog(numargs) (SAVErir(JIT_SP, -96, JIT_SP), _jitl.nextarg_get = _Ri(0)) +#define jit_prepare_i(num) (_jitl.nextarg_put += (num)) +#define jit_prolog(numargs) (SAVErir(JIT_SP, -120, JIT_SP), _jitl.nextarg_get = _Ri(0)) #define jit_pushr_i(rs) (STrm((rs), JIT_SP, -8), SUBrir(JIT_SP, 8, JIT_SP)) #define jit_pusharg_i(rs) (--_jitl.nextarg_put, MOVrr((rs), _Ro(_jitl.nextarg_put))) #define jit_ret() (RET(), RESTORE()) -#define jit_retval(rd) MOVrr(_Ro(0), (rd)) +#define jit_retval_i(rd) MOVrr(_Ro(0), (rd)) #define jit_rshi_i(d, rs, is) SRArir((rs), (is), (d)) #define jit_rshi_ui(d, rs, is) SRLrir((rs), (is), (d)) #define jit_rshr_i(d, r1, r2) SRArrr((r1), (r2), (d)) diff --git a/lightning/sparc/fp.h b/lightning/sparc/fp.h index 660af3de7..5632ac8d9 100644 --- a/lightning/sparc/fp.h +++ b/lightning/sparc/fp.h @@ -7,7 +7,7 @@ /*********************************************************************** * - * Copyright 2000, 2001, 2002 Free Software Foundation, Inc. + * Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. * Written by Paolo Bonzini. * * This file is part of GNU lightning. @@ -35,99 +35,61 @@ #ifndef __lightning_asm_fp_h #define __lightning_asm_fp_h -#if 0 +#define JIT_FPR_NUM 6 +#define JIT_FPR(i) (30-(i)*2) +#define JIT_FPTMP 18 -/* dummy for now */ +#define jit_addr_f(rd,s1,s2) FADDSrrr((s1), (s2), (rd)) +#define jit_subr_f(rd,s1,s2) FSUBSrrr((s1), (s2), (rd)) +#define jit_mulr_f(rd,s1,s2) FMULSrrr((s1), (s2), (rd)) +#define jit_divr_f(rd,s1,s2) FDIVSrrr((s1), (s2), (rd)) -#define _FP1(RD, RS1, OPF, RS2) _2f((RD), 52, (RS1), (OPF), (RS2)) -#define _FP2(RD, RS1, OPF, RS2) _2f((RD), 53, (RS1), (OPF), (RS2)) +#define jit_addr_d(rd,s1,s2) FADDDrrr((s1), (s2), (rd)) +#define jit_subr_d(rd,s1,s2) FSUBDrrr((s1), (s2), (rd)) +#define jit_mulr_d(rd,s1,s2) FMULDrrr((s1), (s2), (rd)) +#define jit_divr_d(rd,s1,s2) FDIVDrrr((s1), (s2), (rd)) -#define FITODrr(FRS, FRD) _FP1((FRD), 0, 200, (FRS)) -#define FDTOIrr(FRS, FRD) _FP1((FRD), 0, 210, (FRS)) -#define FSTODrr(FRS, FRD) _FP1((FRD), 0, 201, (FRS)) -#define FDTOSrr(FRS, FRD) _FP1((FRD), 0, 198, (FRS)) -#define FMOVSrr(FRS, FRD) _FP1((FRD), 0, 1, (FRS)) -#define FNEGSrr(FRS, FRD) _FP1((FRD), 0, 5, (FRS)) -#define FABSSrr(FRS, FRD) _FP1((FRD), 0, 9, (FRS)) -#define FSQRTDrr(FRS, FRD) _FP1((FRD), 0, 74, (FRS)) +#define jit_movr_f(rd,rs) FMOVSrr((rs), (rd)) +#define jit_abs_d(rd,rs) FABSSrr((rs), (rd)) +#define jit_negr_d(rd,rs) FNEGSrr((rs), (rd)) +#define jit_sqrt_d(rd,rs) FSQRTSrr((rs), (rd)) +#define jit_movr_d(rd,rs) FMOVDrr((rs), (rd)) +#define jit_abs_f(rd,rs) FABSDrr((rs), (rd)) +#define jit_negr_f(rd,rs) FNEGDrr((rs), (rd)) +#define jit_sqrt_f(rd,rs) FSQRTDrr((rs), (rd)) +#define jit_extr_f_d(rs, rd) FSTODrr((rs), (rd)) +#define jit_extr_d_f(rs, rd) FDTOSrr((rs), (rd)) -#define FADDDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 66, (FRS2)) -#define FSUBDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 70, (FRS2)) -#define FMULDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 82, (FRS2)) -#define FDIVDrrr(FRS1, FRS2, FRD) _FP1((FRD), (FRS1), 86, (FRS2)) +#define jit_movi_f(rd,immf) \ + do { \ + float _v = (immf); \ + _1(_jit.x.pc + 3), LDFmr(_Ro(7), 8, (rd)); \ + memcpy(_jit.x.uc_pc, &_v, sizeof (float)); \ + _jit.x.uc_pc += sizeof (float); \ + } while(0) -#define FCMPDrr(FRS1, FRS2) _FP2(0, (FRS1), 82, (FRS2)) - -#define LDFxr(RS1, RS2, RD) _3 ((RD), 32, (RS1), 0, 0, (RS2)) -#define LDFmr(RS1, IMM, RD) _3i ((RD), 32, (RS1), 1, (IMM)) -#define LDDFxr(RS1, RS2, RD) _3 ((RD), 35, (RS1), 0, 0, (RS2)) -#define LDDFmr(RS1, IMM, RD) _3i ((RD), 35, (RS1), 1, (IMM)) -#define STFrx(RS, RD1, RD2) _3 ((RS), 36, (RD1), 0, 0, (RD2)) -#define STFrm(RS, RD1, IMM) _3i ((RS), 36, (RD1), 1, (IMM)) -#define STDFrx(RS, RD1, RD2) _3 ((RS), 39, (RD1), 0, 0, (RD2)) -#define STDFrm(RS, RD1, IMM) _3i ((RS), 39, (RD1), 1, (IMM)) - -#define FBNi(DISP) _0 (0, 0, 6, (DISP)) -#define FBN_Ai(DISP) _0 (1, 0, 6, (DISP)) -#define FBNEi(DISP) _0 (0, 1, 6, (DISP)) -#define FBNE_Ai(DISP) _0 (1, 1, 6, (DISP)) -#define FBLGi(DISP) _0 (0, 2, 6, (DISP)) -#define FBLG_Ai(DISP) _0 (1, 2, 6, (DISP)) -#define FBULi(DISP) _0 (0, 3, 6, (DISP)) -#define FBUL_Ai(DISP) _0 (1, 3, 6, (DISP)) -#define FBLi(DISP) _0 (0, 4, 6, (DISP)) -#define FBL_Ai(DISP) _0 (1, 4, 6, (DISP)) -#define FBUGi(DISP) _0 (0, 5, 6, (DISP)) -#define FBUG_Ai(DISP) _0 (1, 5, 6, (DISP)) -#define FBGi(DISP) _0 (0, 6, 6, (DISP)) -#define FBG_Ai(DISP) _0 (1, 6, 6, (DISP)) -#define FBUi(DISP) _0 (0, 7, 6, (DISP)) -#define FBU_Ai(DISP) _0 (1, 7, 6, (DISP)) -#define FBAi(DISP) _0 (0, 8, 6, (DISP)) -#define FBA_Ai(DISP) _0 (1, 8, 6, (DISP)) -#define FBEi(DISP) _0 (0, 9, 6, (DISP)) -#define FBE_Ai(DISP) _0 (1, 9, 6, (DISP)) -#define FBUEi(DISP) _0 (0, 10, 6, (DISP)) -#define FBUE_Ai(DISP) _0 (1, 10, 6, (DISP)) -#define FBGEi(DISP) _0 (0, 11, 6, (DISP)) -#define FBGE_Ai(DISP) _0 (1, 11, 6, (DISP)) -#define FBUGEi(DISP) _0 (0, 12, 6, (DISP)) -#define FBUGE_Ai(DISP) _0 (1, 12, 6, (DISP)) -#define FBLEi(DISP) _0 (0, 13, 6, (DISP)) -#define FBLE_Ai(DISP) _0 (1, 13, 6, (DISP)) -#define FBULEi(DISP) _0 (0, 14, 6, (DISP)) -#define FBULE_Ai(DISP) _0 (1, 14, 6, (DISP)) -#define FBOi(DISP) _0 (0, 15, 6, (DISP)) -#define FBO_Ai(DISP) _0 (1, 15, 6, (DISP)) - -#define FSKIPUG() _0d (1, 13, 6, 2) /* fble,a .+8 */ -#define FSKIPUL() _0d (1, 11, 6, 2) /* fbge,a .+8 */ - -#define jit_add_two(reg0) FADDDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - (reg0) * 2) -#define jit_sub_two(reg0) FSUBDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - (reg0) * 2) -#define jit_mul_two(reg0) FMULDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - (reg0) * 2) -#define jit_div_two(reg0) FDIVDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - (reg0) * 2) - -#define jit_abs(reg0) FABSSrr(30 - (reg0) * 2, 30 - (reg0) * 2) -#define jit_neg(reg0) FNEGSrr(30 - (reg0) * 2, 30 - (reg0) * 2) -#define jit_sqrt(reg0) FSQRTDrr(30 - (reg0) * 2, 30 - (reg0) * 2) - -#define jit_fpimm(reg0, first, second) \ - (_1(4), NOP(), _jit_L(first), _jit_L(second), \ - jit_ldxi_d((reg0), _Ro(7), 8)) - -#define jit_ldxi_f(reg0, rs, is) (jit_chk_imm((is), LDFmr((rs), (is), 30 - (reg0) * 2), LDFxr((rs), JIT_BIG, 30 - (reg0) * 2)), FSTODrr(30 - (reg0) * 2, 30 - (reg0) * 2)) -#define jit_ldxi_d(reg0, rs, is) jit_chk_imm((is), LDDFmr((rs), (is), 30 - (reg0) * 2), LDDFxr((rs), JIT_BIG, 30 - (reg0) * 2)) -#define jit_ldxr_f(reg0, s1, s2) (LDFxr((s1), (s2), 30 - (reg0) * 2), FSTODrr(30 - (reg0) * 2, 30 - (reg0) * 2)) -#define jit_ldxr_d(reg0, s1, s2) LDDFxr((s1), (s2), 30 - (reg0) * 2) -#define jit_stxi_f(id, rd, reg0) (FDTOSrr(30 - (reg0) * 2, 30 - (reg0) * 2), jit_chk_imm((id), STFrm(30 - (reg0) * 2, (rd), (id)), STFrx(30 - (reg0) * 2, (rd), JIT_BIG))) -#define jit_stxi_d(id, rd, reg0) jit_chk_imm((id), STDFrm(30 - (reg0) * 2, (rd), (id)), STDFrx(30 - (reg0) * 2, (rd), JIT_BIG)) -#define jit_stxr_f(d1, d2, reg0) (FDTOSrr(30 - (reg0) * 2, 30 - (reg0) * 2), STFrx (30 - (reg0) * 2, (d1), (d2))) -#define jit_stxr_d(d1, d2, reg0) STDFrx(30 - (reg0) * 2, (d1), (d2)) +#define jit_movi_d(rd,immd) \ + do { \ + double _v = (immd); \ + if ((long)_jit.x.pc & 4) NOP(); \ + _1(_jit.x.pc + 4); \ + LDDFmr(_Ro(7), 8, (rd)); \ + memcpy(_jit.x.uc_pc, &_v, sizeof (double)); \ + _jit.x.uc_pc += sizeof (double); \ + } while(0) -#define jit_do_round(mode, rd, freg) ( \ - _1(3), \ +#define jit_ldxi_f(rd, rs, is) jit_chk_imm((is), LDFmr((rs), (is), (rd)), LDFxr((rs), JIT_BIG, (rd))) +#define jit_ldxi_d(rd, rs, is) jit_chk_imm((is), LDDFmr((rs), (is), (rd)), LDDFxr((rs), JIT_BIG, (rd))) +#define jit_ldxr_f(rd, s1, s2) LDFxr((s1), (s2), (rd)) +#define jit_ldxr_d(rd, s1, s2) LDDFxr((s1), (s2), (rd)) +#define jit_stxi_f(id, rd, rs) jit_chk_imm((id), STFrm((rs), (rd), (id)), STFrx((rs), (rd), JIT_BIG)) +#define jit_stxi_d(id, rd, rs) jit_chk_imm((id), STDFrm((rs), (rd), (id)), STDFrx((rs), (rd), JIT_BIG)) +#define jit_stxr_f(d1, d2, rs) STFrx((rs), (d1), (d2)) +#define jit_stxr_d(d1, d2, rs) STDFrx((rs), (d1), (d2)) + +#define jit_do_round(mode, rd, freg, macro) ( \ + _1(_jit.x.pc + 3), \ SETHIir(_HI(mode << 29), JIT_BIG), \ NOP(), \ STFSRm(_Ro(7), 8), /* store fsr */ \ @@ -135,29 +97,71 @@ XORrrr(rd, JIT_BIG, JIT_BIG), /* adjust mode */ \ STrm(JIT_BIG, _Ro(7), 8), \ LDFSRm(_Ro(7), 8), /* load fsr */ \ - FDTOIrr(freg, freg), /* truncate */ \ + macro, /* truncate */ \ STrm(rd, _Ro(7), 8), /* load old fsr */ \ LDFSRm(_Ro(7), 8), \ - STFrm(freg, _Ro(7), 8), /* store truncated value */ \ + STFrm(JIT_FPTMP, _Ro(7), 8), /* store truncated value */ \ LDmr(_Ro(7), 8, rd)) /* load it into rd */ +#define jit_do_round_no_fsr(macro1, macro2) ( \ + _1(_jit.x.pc + 3), \ + macro1, \ + NOP(), \ + macro2) -/* call delay slot data ,--- call lands here */ -#define jit_exti_d(reg0, rs) (_1(3), NOP(), NOP(), STrm((rs), _Ro(7), 8), LDFmr(_Ro(7), 8, 30 - (reg0) * 2), FITODrr(30 - (reg0) * 2, 30 - (reg0) * 2)) -#define jit_round(rd, reg0) (_1(3), FDTOIrr(30 - (reg0) * 2, 30 - (reg0) * 2), NOP(), STFrm(30 - (reg0) * 2, _Ro(7), 8), LDmr(_Ro(7), 8, (rd))) -#define jit_floor(rd, reg0) jit_do_round(3, (rd), (30 - (reg0) * 2)) -#define jit_ceil(rd, reg0) jit_do_round(2, (rd), (30 - (reg0) * 2)) -#define jit_trunc(rd, reg0) jit_do_round(1, (rd), (30 - (reg0) * 2)) +#define jit_extr_i_d(rd, rs) jit_do_round_no_fsr (NOP(), (STrm((rs), _Ro(7), 8), LDFmr(_Ro(7), 8, (rd)), FITODrr((rd), (rd)))) +#define jit_extr_i_f(rd, rs) jit_do_round_no_fsr (NOP(), (STrm((rs), _Ro(7), 8), LDFmr(_Ro(7), 8, (rd)), FITOSrr((rd), (rd)))) +#define jit_roundr_d_i(rd, rs) jit_do_round_no_fsr (FDTOIrr((rs), JIT_FPTMP), (STFrm(JIT_FPTMP, _Ro(7), 8), LDmr(_Ro(7), 8, (rd)))) +#define jit_roundr_f_i(rd, rs) jit_do_round_no_fsr (FSTOIrr((rs), JIT_FPTMP), (STFrm(JIT_FPTMP, _Ro(7), 8), LDmr(_Ro(7), 8, (rd)))) +#define jit_floorr_d_i(rd, rs) jit_do_round(3, (rd), (rs), FDTOIrr((rs), JIT_FPTMP)) +#define jit_ceilr_d_i(rd, rs) jit_do_round(2, (rd), (rs), FDTOIrr((rs), JIT_FPTMP)) +#define jit_truncr_d_i(rd, rs) jit_do_round(1, (rd), (rs), FDTOIrr((rs), JIT_FPTMP)) +#define jit_floorr_f_i(rd, rs) jit_do_round(3, (rd), (rs), FSTOIrr((rs), JIT_FPTMP)) +#define jit_ceilr_f_i(rd, rs) jit_do_round(2, (rd), (rs), FSTOIrr((rs), JIT_FPTMP)) +#define jit_truncr_f_i(rd, rs) jit_do_round(1, (rd), (rs), FSTOIrr((rs), JIT_FPTMP)) -static double jit_zero = 0.0; +#define jit_ltr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBLi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ltr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBLi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ler_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBLEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ler_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBLEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_eqr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_eqr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ner_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBNEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ner_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBNEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ger_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBGEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ger_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBGEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_gtr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBGi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_gtr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBGi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unltr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBULi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unltr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBULi(_jit.x.pc + 3), MOVir (1, (d), MOVir (0, (d))) +#define jit_unler_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBULEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unler_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBULEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_uneqr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBUEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_uneqr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBUEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ltgtr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBLGi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ltgtr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBLGi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unger_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBUGEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unger_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBUGEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ungtr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBUGi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ungtr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBUGi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ordr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBOi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_ordr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBOi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unordr_d(d, s1, s2) (FCMPDrr ((s1), (s2)), FBUi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) +#define jit_unordr_f(d, s1, s2) (FCMPSrr ((s1), (s2)), FBUi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d))) -#define jit_cmp(le, ge, reg0) (SETHIir(_HI(_jit_UL(&jit_zero)), (le)), \ - LDDFmr((le), _LO(_jit_UL(&jit_zero)), 28 - (reg0) * 2), \ - FCMPDrr(30 - (reg0) * 2, 28 - (reg0) * 2), \ - MOVir(0, (le)), MOVir(0, (ge)), \ - FSKIPUL(), MOVir(1, (ge)), \ - FSKIPUG(), MOVir(1, (le))) +#define jit_prepare_f(num) (_jitl.nextarg_put += (num)) +#define jit_prepare_d(num) (_jitl.nextarg_put += 2 * (num)) -#endif +#define jit_arg_f() (_jitl.nextarg_get++) +#define jit_arg_d() (_jitl.nextarg_get += _jitl.nextarg_get & 1, _jitl.nextarg_get += 2, _jitl.nextarg_get - 2) + +#define jit_getarg_f(rd, ofs) (STrm(ofs, _Ri(6), -24), LDFmr (_Ri(6), -24, (rd))) +#define jit_getarg_d(rd, ofs) (STDrm(ofs, _Ri(6), -24), LDDFmr (_Ri(6), -24, (rd))) + +#define jit_pusharg_f(rs) (STFrm((rs), _Ri(6), -24), --_jitl.nextarg_put, LDmr (_Ri(6), -24, _Ro(_jitl.nextarg_put))) +#define jit_pusharg_d(rs) (STDFrm((rs), _Ri(6), -24), _jitl.nextarg_put -= 2, LDmr (_Ri(6), -24, _Ro(_jitl.nextarg_put))) + +#define jit_retval_f(rs) jit_movr_f(0, rs) +#define jit_retval_d(rs) jit_movr_d(0, rs) #endif /* __lightning_asm_fp_h */ diff --git a/opcode/Makefile.am b/opcode/Makefile.am index 198480d4b..ab4e5b028 100644 --- a/opcode/Makefile.am +++ b/opcode/Makefile.am @@ -1,6 +1,8 @@ EXTRA_LIBRARIES = libdisass.a noinst_LIBRARIES = @LIBDISASS@ +AM_CPPFLAGS = -I$(top_srcdir) + libdisass_a_SOURCES = dis-buf.c i386-dis.c ppc-dis.c ppc-opc.c sparc-dis.c \ sparc-opc.c disass.c diff --git a/tests/Makefile.am b/tests/Makefile.am index 04abed31f..ba52a1a46 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,8 +1,7 @@ AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir) -EXTRA_PROGRAMS = testfp funcfp rpnfp -noinst_PROGRAMS = fibit incr printf printf2 rpn fib fibdelay add -noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok fib.ok fibdelay.ok testfp.ok funcfp.ok rpnfp.ok add.ok +noinst_PROGRAMS = fibit incr printf printf2 rpn fib fibdelay add bp testfp funcfp rpnfp +noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok fib.ok fibdelay.ok testfp.ok funcfp.ok rpnfp.ok add.ok bp.ok EXTRA_DIST = $(noinst_DATA) run-test if DISASS @@ -10,7 +9,6 @@ LDADD = $(top_builddir)/opcode/libdisass.a endif if REGRESSION_TESTING -TESTS = fib fibit fibdelay incr printf printf2 rpn add \ - #testfp funcfp rpnfp +TESTS = fib fibit fibdelay incr printf printf2 rpn add bp testfp funcfp rpnfp TESTS_ENVIRONMENT=$(srcdir)/run-test endif diff --git a/tests/bp.c b/tests/bp.c new file mode 100644 index 000000000..47e49c2c5 --- /dev/null +++ b/tests/bp.c @@ -0,0 +1,89 @@ +/******************************** -*- C -*- **************************** + * + * Simple example of recursion and forward references + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2000, 2004 Free Software Foundation, Inc. + * Written by Paolo Bonzini and Laurent Michel. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GNU lightning; see the file COPYING.LESSER; if not, write to the + * Free Software Foundation, 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + ***********************************************************************/ + +#include +#include "lightning.h" + +static jit_insn codeBuffer[1024]; + +typedef int (*pifi)(int); /* Pointer to Int Function of Int */ + +int main() +{ + pifi nfibs = (pifi) (jit_set_ip(codeBuffer).iptr); + int in; /* offset of the argument */ + jit_insn *ref; /* to patch the forward reference */ + jit_insn *mref; /* ref of move to backpatch */ + jit_insn *tp; /* location to patch */ + + jit_prolog (1); + in = jit_arg_ui (); + jit_getarg_ui(JIT_V0, in); /* V0 = n */ + mref= jit_movi_p(JIT_V2,jit_forward ()); /* Generate a dumb movi */ + jit_jmpr(JIT_V2); + /* generate some dump filler that will never be executed!*/ + jit_addi_ui(JIT_V0,JIT_V0,1); + jit_addi_ui(JIT_V0,JIT_V0,1); + jit_addi_ui(JIT_V0,JIT_V0,1); + jit_addi_ui(JIT_V0,JIT_V0,1); + tp = jit_get_label (); + ref = jit_blti_ui (jit_forward(), JIT_V0, 2); + jit_subi_ui (JIT_V1, JIT_V0, 1); /* V1 = n-1 */ + jit_subi_ui (JIT_V2, JIT_V0, 2); /* V2 = n-2 */ + jit_prepare (1); + jit_pusharg_ui(JIT_V1); + jit_finish(nfibs); + jit_retval(JIT_V1); /* V1 = nfibs(n-1) */ + jit_prepare(1); + jit_pusharg_ui(JIT_V2); + jit_finish(nfibs); + jit_retval(JIT_V2); /* V2 = nfibs(n-2) */ + jit_addi_ui(JIT_V1, JIT_V1, 1); + jit_addr_ui(JIT_RET, JIT_V1, JIT_V2); /* RET = V1 + V2 + 1 */ + jit_ret(); + + jit_patch(ref); /* patch jump */ + jit_movi_i(JIT_RET, 1); /* RET = 1 */ + jit_ret(); + + jit_patch_movi(mref,tp); /* Ok. Do the back-patching */ + + /* call the generated code, passing 32 as an argument */ + jit_flush_code(codeBuffer, jit_get_ip().ptr); + +#ifdef LIGHTNING_DISASSEMBLE + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); +#endif +#ifndef LIGHTNING_CROSS + printf("nfibs(%d) = %d\n", 32, nfibs(32)); +#endif + return 0; +} diff --git a/tests/bp.ok b/tests/bp.ok new file mode 100644 index 000000000..ce73f6e2e --- /dev/null +++ b/tests/bp.ok @@ -0,0 +1 @@ +nfibs(32) = 7049155 diff --git a/tests/fib.c b/tests/fib.c index 647ec8d63..88c50da22 100644 --- a/tests/fib.c +++ b/tests/fib.c @@ -48,14 +48,14 @@ int main() ref = jit_blti_ui (jit_forward(), JIT_V0, 2); jit_subi_ui (JIT_V1, JIT_V0, 1); /* V1 = n-1 */ jit_subi_ui (JIT_V2, JIT_V0, 2); /* V2 = n-2 */ - jit_prepare (1); + jit_prepare_i(1); jit_pusharg_ui(JIT_V1); jit_finish(nfibs); - jit_retval(JIT_V1); /* V1 = nfibs(n-1) */ - jit_prepare(1); + jit_retval_i (JIT_V1); /* V1 = nfibs(n-1) */ + jit_prepare_i(1); jit_pusharg_ui(JIT_V2); jit_finish(nfibs); - jit_retval(JIT_V2); /* V2 = nfibs(n-2) */ + jit_retval_i (JIT_V2); /* V2 = nfibs(n-2) */ jit_addi_ui(JIT_V1, JIT_V1, 1); jit_addr_ui(JIT_RET, JIT_V1, JIT_V2); /* RET = V1 + V2 + 1 */ jit_ret(); diff --git a/tests/fibit.c b/tests/fibit.c index 4281b19e4..4758de03a 100644 --- a/tests/fibit.c +++ b/tests/fibit.c @@ -65,7 +65,7 @@ int main() jit_flush_code(codeBuffer, jit_get_ip().ptr); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *) codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS /* call the generated code, passing 36 as an argument */ diff --git a/tests/funcfp.c b/tests/funcfp.c index a95f3f530..01e15b5f7 100644 --- a/tests/funcfp.c +++ b/tests/funcfp.c @@ -7,7 +7,7 @@ /*********************************************************************** * - * Copyright 2000 Free Software Foundation, Inc. + * Copyright 2000, 2004 Free Software Foundation, Inc. * Written by Paolo Bonzini. * * This file is part of GNU lightning. @@ -34,140 +34,140 @@ #include "lightning.h" static jit_insn codeBuffer[300]; -static struct jit_fp buffer[300]; + +typedef int (*intFunc) (int, int); +typedef double (*dblFunc) (double, double); +typedef float (*floatFunc) (float, float); -typedef int (*intFunc)(int,int); -typedef double (*dblFunc)(double,double); -typedef float (*floatFunc)(float,float); - - -dblFunc makeDblFunc() - /* Generate a function that computes and returns the sum of - its two double arguments (return an int) - i.e., double foo(double x,double y) { return x + y;} - */ +/* Generate a function that computes and returns the sum of + its two double arguments (return an int) + i.e., double foo(double x,double y) { return x + y;} */ +dblFunc +makeDblFunc () { - dblFunc retVal; - int dbl1,dbl2; - jit_set_ip(codeBuffer); - retVal = (dblFunc)jit_get_ip().iptr; - jit_prolog(2); - jitfp_begin(buffer); - dbl1 = jit_arg_d(); - dbl2 = jit_arg_d(); - - - jitfp_retval(jitfp_add(jitfp_getarg_d(dbl1), - jitfp_getarg_d(dbl2))); - - jit_ret(); - jit_flush_code((char*)retVal,jit_get_ip().ptr); - + dblFunc retVal; + int dbl1, dbl2; + retVal = (dblFunc) jit_get_ip ().iptr; + jit_prolog (2); + dbl1 = jit_arg_d (); + dbl2 = jit_arg_d (); + jit_getarg_d (JIT_FPR0, dbl1); + jit_getarg_d (JIT_FPR1, dbl2); + jit_addr_d (JIT_FPR0, JIT_FPR0, JIT_FPR1); + jit_retval_d (JIT_FPR0); + jit_ret (); + jit_flush_code ((char *) retVal, jit_get_ip ().ptr); + #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, retVal, jit_get_ip().ptr); + disassemble (stderr, (char *) retVal, jit_get_ip ().ptr); #endif - return retVal; + return retVal; } -floatFunc makeFloatFunc() - /* Generate a function that computes and returns the sum of - its two double arguments (return an int) - i.e., double foo(double x,double y) { return x + y;} - */ +/* Generate a function that computes and returns the sum of + its two double arguments (return an int) + i.e., double foo(double x,double y) { return x + y;} */ +floatFunc +makeFloatFunc () { - floatFunc retVal; - int dbl1,dbl2; - //jit_set_ip(codeBuffer); - retVal = (floatFunc)jit_get_ip().iptr; - jit_prolog(2); - jitfp_begin(buffer); - dbl1 = jit_arg_f(); - dbl2 = jit_arg_f(); - - - jitfp_retval(jitfp_add(jitfp_getarg_f(dbl1), - jitfp_getarg_f(dbl2))); - - jit_ret(); - jit_flush_code((char*)retVal,jit_get_ip().ptr); - + floatFunc retVal; + int dbl1, dbl2; + retVal = (floatFunc) jit_get_ip ().iptr; + jit_prolog (2); + dbl1 = jit_arg_f (); + dbl2 = jit_arg_f (); + jit_getarg_f (JIT_FPR0, dbl1); + jit_getarg_f (JIT_FPR1, dbl2); + jit_addr_f (JIT_FPR0, JIT_FPR0, JIT_FPR1); + jit_retval_f (JIT_FPR0); + jit_ret (); + jit_flush_code ((char *) retVal, jit_get_ip ().ptr); + #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, retVal, jit_get_ip().ptr); + disassemble (stderr, (char *) retVal, jit_get_ip ().ptr); #endif - return retVal; + return retVal; } -dblFunc makeCallFunc(dblFunc theFunc) +dblFunc +makeCallFunc (dblFunc theFunc) { - dblFunc retVal; - int dbl1,dbl2; - //jit_set_ip(codeBuffer); - retVal = (dblFunc)jit_get_ip().iptr; - jit_prolog(2); - jitfp_begin(buffer); - dbl1 = jit_arg_d(); - dbl2 = jit_arg_d(); + dblFunc retVal; + int dbl1, dbl2; + retVal = (dblFunc) jit_get_ip ().iptr; + jit_prolog (2); + dbl1 = jit_arg_d (); + dbl2 = jit_arg_d (); + + jit_prepare_d (2); + jit_getarg_d (JIT_FPR0, dbl1); + jit_getarg_d (JIT_FPR1, dbl2); + jit_mulr_d (JIT_FPR1, JIT_FPR1, JIT_FPR0); + jit_pusharg_d (JIT_FPR1); + jit_pusharg_d (JIT_FPR0); + jit_finish ((void *) theFunc); + jit_ret (); + jit_flush_code ((char *) retVal, jit_get_ip ().ptr); - jitfp_prepare(0,0,2); - jitfp_pusharg_d(jitfp_mul(jitfp_getarg_d(dbl1), - jitfp_getarg_d(dbl2))); - jitfp_pusharg_d(jitfp_getarg_d(dbl1)); - jit_finish((void*)theFunc); - jit_ret(); - jit_flush_code((char*)retVal,jit_get_ip().ptr); - #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, retVal, jit_get_ip().ptr); + disassemble (stderr, (char *) retVal, jit_get_ip ().ptr); #endif - return retVal; + return retVal; } -floatFunc makeCallFloatFunc(floatFunc theFunc) +floatFunc +makeCallFloatFunc (floatFunc theFunc) { - floatFunc retVal; - int dbl1,dbl2; - //jit_set_ip(codeBuffer); - retVal = (floatFunc)jit_get_ip().iptr; - jit_prolog(2); - jitfp_begin(buffer); - dbl1 = jit_arg_f(); - dbl2 = jit_arg_f(); + floatFunc retVal; + int dbl1, dbl2; + retVal = (floatFunc) jit_get_ip ().iptr; + jit_prolog (2); + dbl1 = jit_arg_f (); + dbl2 = jit_arg_f (); + + jit_prepare_f (2); + jit_getarg_f (JIT_FPR0, dbl1); + jit_getarg_f (JIT_FPR1, dbl2); + jit_mulr_f (JIT_FPR1, JIT_FPR1, JIT_FPR0); + jit_pusharg_f (JIT_FPR1); + jit_pusharg_f (JIT_FPR0); + jit_finish ((void *) theFunc); + jit_ret (); + jit_flush_code ((char *) retVal, jit_get_ip ().ptr); - jitfp_prepare(0,2,0); - jitfp_pusharg_f(jitfp_mul(jitfp_getarg_f(dbl1), - jitfp_getarg_f(dbl2))); - jitfp_pusharg_f(jitfp_getarg_f(dbl1)); - jit_finish((void*)theFunc); - jit_ret(); - jit_flush_code((char*)retVal,jit_get_ip().ptr); - #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, retVal, jit_get_ip().ptr); + disassemble (stderr, (char *) retVal, jit_get_ip ().ptr); #endif - return retVal; + return retVal; } -int main(int argc,char* argv[]) +int +main (int argc, char *argv[]) { - dblFunc myFunc2 = makeDblFunc(); - floatFunc myFunc3 = makeFloatFunc(); - dblFunc callIt1 = makeCallFunc(myFunc2); - floatFunc callIt2 = makeCallFloatFunc(myFunc3); + dblFunc myFunc2, callIt1; + floatFunc myFunc3, callIt2; + double y; + float a, b, z; + jit_set_ip (codeBuffer); + myFunc2 = makeDblFunc (); + myFunc3 = makeFloatFunc (); + callIt1 = makeCallFunc (myFunc2); + callIt2 = makeCallFloatFunc (myFunc3); #ifndef LIGHTNING_CROSS - double y = callIt1(10.5,15.3); - float a = 1.5; - float b = 10.5; - float z = callIt2(a,b); - printf("result is %f\t %f\n",y,z); + y = callIt1 (10.5, 15.3); + a = 1.5; + b = 10.5; + z = callIt2 (a, b); + printf ("result is %.5g\t %.5g\n", y, z); #endif - return 0; + return 0; } diff --git a/tests/funcfp.ok b/tests/funcfp.ok index 5077368ec..6282c3055 100644 --- a/tests/funcfp.ok +++ b/tests/funcfp.ok @@ -1 +1 @@ -result is 171.150000 17.250000 +result is 171.15 17.25 diff --git a/tests/printf.c b/tests/printf.c index ec27a2f65..e4291e7af 100644 --- a/tests/printf.c +++ b/tests/printf.c @@ -48,7 +48,7 @@ int main() ofs = jit_arg_i(); jit_movi_p(JIT_R0, "looks like %d bytes sufficed\n"); jit_getarg_i(JIT_R1, ofs); - jit_prepare(2); + jit_prepare_i(2); jit_pusharg_i(JIT_R1); /* push in reverse order */ jit_pusharg_p(JIT_R0); jit_finish(printf); diff --git a/tests/rpnfp.c b/tests/rpnfp.c index 85b10af0a..d756fbe46 100644 --- a/tests/rpnfp.c +++ b/tests/rpnfp.c @@ -38,19 +38,18 @@ static jit_insn codeBuffer[1024]; typedef double (*pdfd) (double); /* Pointer to Double Function of Double */ +static int regs[6] = { JIT_FPR0, JIT_FPR1, JIT_FPR2, JIT_FPR3, JIT_FPR4, JIT_FPR5 }; pdfd compile_rpn (char *expr) { pdfd fn; int ofs, sp = 1; - struct jit_fp buffer[300], *stack[10]; - jitfp_begin (buffer); fn = (pdfd) (jit_get_ip ().dptr); jit_leaf (1); ofs = jit_arg_d (); - stack[0] = jitfp_getarg_d (ofs); + jit_getarg_d (regs[0], ofs); while (*expr) { @@ -62,26 +61,27 @@ compile_rpn (char *expr) { double d = strtod (buf, NULL); expr += n - 1; - stack[sp++] = jitfp_imm (d); + jit_movi_d (regs[sp], d); + sp++; } else if (*expr == '+') { - stack[sp - 2] = jitfp_add (stack[sp - 2], stack[sp - 1]); + jit_addr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]); sp--; } else if (*expr == '-') { - stack[sp - 2] = jitfp_sub (stack[sp - 2], stack[sp - 1]); + jit_subr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]); sp--; } else if (*expr == '*') { - stack[sp - 2] = jitfp_mul (stack[sp - 2], stack[sp - 1]); + jit_mulr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]); sp--; } else if (*expr == '/') { - stack[sp - 2] = jitfp_div (stack[sp - 2], stack[sp - 1]); + jit_divr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]); sp--; } else @@ -91,7 +91,7 @@ compile_rpn (char *expr) } ++expr; } - jitfp_retval (stack[0]); + jit_retval_d (regs[0]); jit_ret (); jit_flush_code ((char *) fn, jit_get_ip ().ptr); diff --git a/tests/testfp.c b/tests/testfp.c index 83d3edfc3..8012b65a4 100644 --- a/tests/testfp.c +++ b/tests/testfp.c @@ -7,7 +7,7 @@ /*********************************************************************** * - * Copyright 2000, 2002 Free Software Foundation, Inc. + * Copyright 2000, 2002, 2004 Free Software Foundation, Inc. * Written by Paolo Bonzini. * * This file is part of GNU lightning. @@ -34,104 +34,115 @@ #include "lightning.h" static jit_insn codeBuffer[300]; -static struct jit_fp buffer[300]; static double a; void -int_test(what, code) - char *what; - jit_code code; +int_test(char *what, jit_code code, double b, double c, double d, double e, double f) { - a = -2.6; printf("%s\t\t%d ", what, code.iptr()); - a = -2.4; printf("%d ", code.iptr()); - a = 0.0; printf("%d ", code.iptr()); - a = 2.4; printf("%d ", code.iptr()); - a = 2.6; printf("%d\n", code.iptr()); + a = b; printf("%s\t\t%d ", what, code.iptr()); + a = c; printf("%d ", code.iptr()); + a = d; printf("%d ", code.iptr()); + a = e; printf("%d ", code.iptr()); + a = f; printf("%d\n", code.iptr()); } int main() { jit_code code; + volatile double x = 0.0; code.ptr = (char *) codeBuffer; jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); - jitfp_cmp(JIT_R1, JIT_R0, - jitfp_ldi_d(&a) - ); + jit_ldi_d(JIT_FPR0, &a); + jit_movi_d(JIT_FPR1, 0.0); + jit_gtr_d(JIT_R0, JIT_FPR0, JIT_FPR1); + jit_ltr_d(JIT_R1, JIT_FPR0, JIT_FPR1); jit_subr_i(JIT_RET, JIT_R0, JIT_R1); /* [greater] - [less] = -1/0/1 */ jit_ret(); jit_flush_code(codeBuffer, jit_get_ip().ptr); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS - int_test("compare", code); + int_test("compare", code, -2.6, -2.4, 0, 2.4, 2.6); +#endif + +#ifdef __GNUC__ + jit_set_ip(codeBuffer); + jit_leaf(0); + jit_ldi_d(JIT_FPR0, &a); + jit_movi_d(JIT_FPR1, 0.0); + jit_eqr_d(JIT_R0, JIT_FPR0, JIT_FPR1); + jit_ltgtr_d(JIT_R1, JIT_FPR0, JIT_FPR1); + jit_lshi_i(JIT_R1, JIT_R1, 1); + jit_orr_i(JIT_RET, JIT_R0, JIT_R1); + jit_ret(); + + jit_flush_code(codeBuffer, jit_get_ip().ptr); +#ifdef LIGHTNING_DISASSEMBLE + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); +#endif +#ifndef LIGHTNING_CROSS + int_test("nans", code, x / x, 1 / (a - a), -1 / (a - a), 0.0, -2.0); +#endif +#else + printf ("nans\t\t1 3 3 0 3\n"); #endif jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); - jitfp_trunc(JIT_RET, - jitfp_ldi_d(&a) - ); + jit_ldi_d(JIT_FPR0, &a); + jit_truncr_d_i(JIT_RET, JIT_FPR0); jit_ret(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS - int_test("trunc", code); + int_test("trunc", code, -2.6, -2.4, 0, 2.4, 2.6); #endif jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); - jitfp_ceil(JIT_RET, - jitfp_ldi_d(&a) - ); + jit_ldi_d(JIT_FPR0, &a); + jit_ceilr_d_i(JIT_RET, JIT_FPR0); jit_ret(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS - int_test("ceil", code); + int_test("ceil", code, -2.6, -2.4, 0, 2.4, 2.6); #endif jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); - jitfp_floor(JIT_RET, - jitfp_ldi_d(&a) - ); + jit_ldi_d(JIT_FPR0, &a); + jit_floorr_d_i(JIT_RET, JIT_FPR0); jit_ret(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS - int_test("floor", code); + int_test("floor", code, -2.6, -2.4, 0, 2.4, 2.6); #endif jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); - jitfp_round(JIT_RET, - jitfp_ldi_d(&a) - ); + jit_ldi_d(JIT_FPR0, &a); + jit_roundr_d_i(JIT_RET, JIT_FPR0); jit_ret(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS - int_test("round", code); + int_test("round", code, -2.6, -2.4, 0, 2.4, 2.6); #endif #if 0 && defined JIT_TRANSCENDENTAL jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); jitfp_sti_d(&a, jitfp_log( jitfp_exp(jitfp_imm(1.0)) @@ -140,7 +151,7 @@ main() jit_ret(); code.vptr(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS printf("log e = \t%f\n", a); @@ -148,7 +159,6 @@ main() jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); jitfp_sti_d(&a, jitfp_atn( jitfp_imm(1.732050807657) @@ -157,7 +167,7 @@ main() jit_ret(); code.vptr(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS printf("pi = \t%f\n", a*3); @@ -165,7 +175,6 @@ main() jit_set_ip(codeBuffer); jit_leaf(0); - jitfp_begin(buffer); jitfp_sti_d(&a, jitfp_tan( jitfp_ldi_d(&a) @@ -174,7 +183,7 @@ main() jit_ret(); code.vptr(); #ifdef LIGHTNING_DISASSEMBLE - disassemble(stderr, codeBuffer, jit_get_ip().ptr); + disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr); #endif #ifndef LIGHTNING_CROSS printf("tan^2 pi/3 = \t%f\n", a*a); diff --git a/tests/testfp.ok b/tests/testfp.ok index 8822deee0..e23a31da3 100644 --- a/tests/testfp.ok +++ b/tests/testfp.ok @@ -1,4 +1,5 @@ -compare 1 1 0 1 1 +compare -1 -1 0 1 1 +nans 0 2 2 1 2 trunc -2 -2 0 2 2 ceil -2 -2 0 3 3 floor -3 -3 0 2 2