From 960280decd66fed85d9d64d2442008420c0cf73f Mon Sep 17 00:00:00 2001 From: pcpa Date: Sat, 18 Oct 2014 11:31:18 -0300 Subject: [PATCH] Implement the jit_rsb* interface. * check/alu_rsb.ok, check/alu_rsb.tst: New files implementing tests for jit_rsb*. * check/Makefile.am, check/lightning.c, include/lightning.h, lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c, lib/jit_arm-swf.c, lib/jit_arm-sz.c, lib/jit_arm-vfp.c, lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_names.c, lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c, lib/jit_s390x.c, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-sz.c, lib/jit_x86-x87.c, lib/jit_x86.c, lib/lightning.c: Implement jit_rsb*. This was a missing lightning 1.x interface, that on most backends is synthesized, but on a few backends (hppa and ia64), it can generate better code as on those there is, or the only instruction with an immediate is in "rsb" format (left operand). --- ChangeLog | 27 +++++++++++++++++ check/Makefile.am | 13 +++++---- check/alu_rsb.ok | 1 + check/alu_rsb.tst | 49 +++++++++++++++++++++++++++++++ check/lightning.c | 9 ++++++ include/lightning.h | 10 ++++++- lib/jit_aarch64-cpu.c | 9 ++++++ lib/jit_aarch64-fpu.c | 8 +++++ lib/jit_aarch64-sz.c | 3 ++ lib/jit_aarch64.c | 3 ++ lib/jit_alpha-cpu.c | 9 ++++++ lib/jit_alpha-fpu.c | 8 +++++ lib/jit_alpha-sz.c | 3 ++ lib/jit_alpha.c | 3 ++ lib/jit_arm-cpu.c | 9 ++++++ lib/jit_arm-swf.c | 68 +++++++++++++++++++++++++++++++++++++++++++ lib/jit_arm-sz.c | 6 ++++ lib/jit_arm-vfp.c | 8 +++++ lib/jit_arm.c | 3 ++ lib/jit_hppa-cpu.c | 18 +++++++++++- lib/jit_hppa-fpu.c | 8 +++++ lib/jit_hppa-sz.c | 3 ++ lib/jit_hppa.c | 3 ++ lib/jit_ia64-cpu.c | 18 ++++++++++++ lib/jit_ia64-fpu.c | 8 +++++ lib/jit_ia64-sz.c | 3 ++ lib/jit_ia64.c | 3 ++ lib/jit_mips-cpu.c | 9 ++++++ lib/jit_mips-fpu.c | 8 +++++ lib/jit_mips-sz.c | 9 ++++++ lib/jit_mips.c | 3 ++ lib/jit_names.c | 1 + lib/jit_ppc-cpu.c | 17 ++++++++--- lib/jit_ppc-fpu.c | 16 +++++----- lib/jit_ppc-sz.c | 9 ++++++ lib/jit_ppc.c | 3 ++ lib/jit_s390x-cpu.c | 9 ++++++ lib/jit_s390x-fpu.c | 6 ++++ lib/jit_s390x-sz.c | 3 ++ lib/jit_s390x.c | 3 ++ lib/jit_sparc-cpu.c | 11 ++++++- lib/jit_sparc-fpu.c | 45 +++++++++++++++++++++------- lib/jit_sparc-sz.c | 3 ++ lib/jit_sparc.c | 3 ++ lib/jit_x86-cpu.c | 9 ++++++ lib/jit_x86-sse.c | 10 +++++++ lib/jit_x86-sz.c | 9 ++++++ lib/jit_x86-x87.c | 9 ++++++ lib/jit_x86.c | 3 ++ lib/lightning.c | 27 +++++++++-------- 50 files changed, 494 insertions(+), 44 deletions(-) create mode 100644 check/alu_rsb.ok create mode 100644 check/alu_rsb.tst diff --git a/ChangeLog b/ChangeLog index eef40a4ec..36ceff9fa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +2014-10-17 Paulo Andrade + + * check/alu_rsb.ok, check/alu_rsb.tst: New files implementing + tests for jit_rsb*. + + * check/Makefile.am, check/lightning.c, include/lightning.h, + lib/jit_aarch64-cpu.c, lib/jit_aarch64-fpu.c, lib/jit_aarch64-sz.c, + lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha-fpu.c, + lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-cpu.c, + lib/jit_arm-swf.c, lib/jit_arm-sz.c, lib/jit_arm-vfp.c, + lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa-fpu.c, + lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c, + lib/jit_ia64-fpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips-fpu.c, lib/jit_mips-sz.c, + lib/jit_mips.c, lib/jit_names.c, lib/jit_ppc-cpu.c, + lib/jit_ppc-fpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, + lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c, + lib/jit_s390x.c, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c, lib/jit_x86-cpu.c, + lib/jit_x86-sse.c, lib/jit_x86-sz.c, lib/jit_x86-x87.c, + lib/jit_x86.c, lib/lightning.c: Implement jit_rsb*. This + was a missing lightning 1.x interface, that on most + backends is synthesized, but on a few backends (hppa and ia64), + it can generate better code as on those there is, or the + only instruction with an immediate is in "rsb" format + (left operand). + 2014-10-17 Paulo Andrade * lib/jit_names.c: New file with single definition of string diff --git a/check/Makefile.am b/check/Makefile.am index 5e0b21b3b..59a4f7a1c 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -60,6 +60,7 @@ EXTRA_DIST = \ alux_add.tst alux_add.ok \ alu_sub.tst alu_sub.ok \ alux_sub.tst alux_sub.ok \ + alu_rsb.tst alu_rsb.ok \ alu_mul.tst alu_mul.ok \ alu_div.tst alu_div.ok \ alu_rem.tst alu_rem.ok \ @@ -99,7 +100,7 @@ base_TESTS = \ ldstr-c ldstxr-c ldstxi-c \ cvt branch \ alu_add alux_add \ - alu_sub alux_sub \ + alu_sub alux_sub alu_rsb \ alu_mul alu_div alu_rem \ alu_and alu_or alu_xor \ alu_lsh alu_rsh \ @@ -126,7 +127,7 @@ x87_TESTS = \ ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87 \ cvt.x87 branch.x87 \ alu_add.x87 alux_add.x87 \ - alu_sub.x87 alux_sub.x87 \ + alu_sub.x87 alux_sub.x87 alu_rsb.x87 \ alu_mul.x87 alu_div.x87 alu_rem.x87 \ alu_and.x87 alu_or.x87 alu_xor.x87 \ alu_lsh.x87 alu_rsh.x87 \ @@ -148,7 +149,7 @@ x87_nodata_TESTS = \ ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata \ cvt.x87.nodata branch.x87.nodata \ alu_add.x87.nodata alux_add.x87.nodata \ - alu_sub.x87.nodata alux_sub.x87.nodata \ + alu_sub.x87.nodata alux_sub.x87.nodata alu_rsb.x87.nodata \ alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \ alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \ alu_lsh.x87.nodata alu_rsh.x87.nodata \ @@ -172,7 +173,7 @@ arm_TESTS = \ ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \ cvt.arm branch.arm \ alu_add.arm alux_add.arm \ - alu_sub.arm alux_sub.arm \ + alu_sub.arm alux_sub.arm alu_rsb.arm \ alu_mul.arm alu_div.arm alu_rem.arm \ alu_and.arm alu_or.arm alu_xor.arm \ alu_lsh.arm alu_rsh.arm \ @@ -196,7 +197,7 @@ swf_TESTS = \ ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \ cvt.swf branch.swf \ alu_add.swf alux_add.swf \ - alu_sub.swf alux_sub.swf \ + alu_sub.swf alux_sub.swf alu_rsb.swf \ alu_mul.swf alu_div.swf alu_rem.swf \ alu_and.swf alu_or.swf alu_xor.swf \ alu_lsh.swf alu_rsh.swf \ @@ -220,7 +221,7 @@ nodata_TESTS = \ ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata \ cvt.nodata branch.nodata \ alu_add.nodata alux_add.nodata \ - alu_sub.nodata alux_sub.nodata \ + alu_sub.nodata alux_sub.nodata alu_rsb.nodata \ alu_mul.nodata alu_div.nodata alu_rem.nodata \ alu_and.nodata alu_or.nodata alu_xor.nodata \ alu_lsh.nodata alu_rsh.nodata \ diff --git a/check/alu_rsb.ok b/check/alu_rsb.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_rsb.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_rsb.tst b/check/alu_rsb.tst new file mode 100644 index 000000000..00e08c22f --- /dev/null +++ b/check/alu_rsb.tst @@ -0,0 +1,49 @@ +#include "alu.inc" + +.code + prolog + +#define RSB(N, I0, I1, V) ALU(N, , rsb, I0, I1, V) + + RSB(0, 1, 0x7fffffff, 0x7ffffffe) + RSB(2, 1, 0x80000000, 0x7fffffff) + RSB(3, 0x7fffffff, 0x80000000, 1) + RSB(4, 0xffffffff, 0xffffffff, 0) + RSB(5, 0x7fffffff, 0xffffffff, 0x80000000) + RSB(6, 0, 0x7fffffff, 0x7fffffff) +#if __WORDSIZE == 32 + RSB(7, 0x7fffffff, 1, 0x80000002) + RSB(8, 0x80000000, 1, 0x80000001) + RSB(9, 0x80000000, 0x7fffffff, 0xffffffff) + RSB(10, 0xffffffff, 0x7fffffff, 0x80000000) + RSB(11, 0x7fffffff, 0, 0x80000001) +#else + RSB(7, 0x7fffffff, 1, 0xffffffff80000002) + RSB(8, 0xffffffff80000000, 1, 0x80000001) + RSB(9, 0xffffffff80000000, 0x7fffffff, 0xffffffff) + RSB(10, 0xffffffffffffffff, 0xffffffff7fffffff, 0xffffffff80000000) + RSB(11, 0x7fffffff, 0, 0xffffffff80000001) + RSB(12, 1, 0x7fffffffffffffff, 0x7ffffffffffffffe) + RSB(13, 0x7fffffffffffffff, 1, 0x8000000000000002) + RSB(14, 1, 0x8000000000000000, 0x7fffffffffffffff) + RSB(15, 0x8000000000000000, 1, 0x8000000000000001) + RSB(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff) + RSB(17, 0x7fffffffffffffff, 0x8000000000000000, 1) + RSB(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000) + RSB(19, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000) + RSB(20, 0xffffffffffffffff, 0xffffffffffffffff, 0) +#endif + +#undef RSB +#define RSB(N, T, I0, I1, V) FOP(N, T, rsb, I0, I1, V) + RSB(0, _f, 0.5, -0.5, -1.0) + RSB(1, _f, 0.75, 0.25, -0.5) + RSB(0, _d, 0.5, -0.5, -1.0) + RSB(1, _d, 0.75, 0.25, -0.5) + + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index cf08dea7c..5f179fbed 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -286,6 +286,7 @@ static void addcr(void); static void addci(void); static void subr(void); static void subi(void); static void subxr(void); static void subxi(void); static void subcr(void); static void subci(void); +static void rsbr(void); static void rsbi(void); static void mulr(void); static void muli(void); static void qmulr(void); static void qmuli(void); static void qmulr_u(void); static void qmuli_u(void); @@ -392,6 +393,7 @@ static void arg_f(void); static void getarg_f(void); static void addr_f(void); static void addi_f(void); static void subr_f(void); static void subi_f(void); +static void rsbr_f(void); static void rsbi_f(void); static void mulr_f(void); static void muli_f(void); static void divr_f(void); static void divi_f(void); static void negr_f(void); static void absr_f(void); @@ -442,6 +444,7 @@ static void arg_d(void); static void getarg_d(void); static void addr_d(void); static void addi_d(void); static void subr_d(void); static void subi_d(void); +static void rsbr_d(void); static void rsbi_d(void); static void mulr_d(void); static void muli_d(void); static void divr_d(void); static void divi_d(void); static void negr_d(void); static void absr_d(void); @@ -586,6 +589,7 @@ static instr_t instr_vector[] = { entry(subr), entry(subi), entry(subxr), entry(subxi), entry(subcr), entry(subci), + entry(rsbr), entry(rsbi), entry(mulr), entry(muli), entry(qmulr), entry(qmuli), entry(qmulr_u), entry(qmuli_u), @@ -692,6 +696,7 @@ static instr_t instr_vector[] = { entry(getarg_f), entry(addr_f), entry(addi_f), entry(subr_f), entry(subi_f), + entry(rsbr_f), entry(rsbi_f), entry(mulr_f), entry(muli_f), entry(divr_f), entry(divi_f), entry(negr_f), entry(absr_f), @@ -742,6 +747,7 @@ static instr_t instr_vector[] = { entry(getarg_d), entry(addr_d), entry(addi_d), entry(subr_d), entry(subi_d), + entry(rsbr_d), entry(rsbi_d), entry(mulr_d), entry(muli_d), entry(divr_d), entry(divi_d), entry(negr_d), entry(absr_d), @@ -1351,6 +1357,7 @@ entry_ir_ir_ir(addcr) entry_ir_ir_im(addci) entry_ir_ir_ir(subr) entry_ir_ir_im(subi) entry_ir_ir_ir(subxr) entry_ir_ir_im(subxi) entry_ir_ir_ir(subcr) entry_ir_ir_im(subci) +entry_ir_ir_ir(rsbr) entry_ir_ir_im(rsbi) entry_ir_ir_ir(mulr) entry_ir_ir_im(muli) entry_ir_ir_ir_ir(qmulr) entry_ir_ir_ir_im(qmuli) entry_ir_ir_ir_ir(qmulr_u) entry_ir_ir_ir_im(qmuli_u) @@ -1504,6 +1511,7 @@ entry_ca(arg_f) entry_fa(getarg_f) entry_fr_fr_fr(addr_f) entry_fr_fr_fm(addi_f) entry_fr_fr_fr(subr_f) entry_fr_fr_fm(subi_f) +entry_fr_fr_fr(rsbr_f) entry_fr_fr_fm(rsbi_f) entry_fr_fr_fr(mulr_f) entry_fr_fr_fm(muli_f) entry_fr_fr_fr(divr_f) entry_fr_fr_fm(divi_f) entry_fr_fr(negr_f) entry_fr_fr(absr_f) @@ -1554,6 +1562,7 @@ entry_ca(arg_d) entry_fa(getarg_d) entry_fr_fr_fr(addr_d) entry_fr_fr_dm(addi_d) entry_fr_fr_fr(subr_d) entry_fr_fr_dm(subi_d) +entry_fr_fr_fr(rsbr_d) entry_fr_fr_dm(rsbi_d) entry_fr_fr_fr(mulr_d) entry_fr_fr_dm(muli_d) entry_fr_fr_fr(divr_d) entry_fr_fr_dm(divi_d) entry_fr_fr(negr_d) entry_fr_fr(absr_d) diff --git a/include/lightning.h b/include/lightning.h index 2e87905be..926d0f18c 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -228,7 +228,9 @@ typedef enum { #define jit_subxr(u,v,w) jit_new_node_www(jit_code_subxr,u,v,w) #define jit_subxi(u,v,w) jit_new_node_www(jit_code_subxi,u,v,w) jit_code_subxr, jit_code_subxi, - +#define jit_rsbr(u,v,w) jit_subr(u,w,v) +#define jit_rsbi(u,v,w) jit_new_node_www(jit_code_rsbi,u,v,w) + jit_code_rsbi, #define jit_mulr(u,v,w) jit_new_node_www(jit_code_mulr,u,v,w) #define jit_muli(u,v,w) jit_new_node_www(jit_code_muli,u,v,w) jit_code_mulr, jit_code_muli, @@ -533,6 +535,9 @@ typedef enum { #define jit_subr_f(u,v,w) jit_new_node_www(jit_code_subr_f,u,v,w) #define jit_subi_f(u,v,w) jit_new_node_wwf(jit_code_subi_f,u,v,w) jit_code_subr_f, jit_code_subi_f, +#define jit_rsbr_f(u,v,w) jit_subr_f(u,w,v) +#define jit_rsbi_f(u,v,w) jit_new_node_wwf(jit_code_rsbi_f,u,v,w) + jit_code_rsbi_f, #define jit_mulr_f(u,v,w) jit_new_node_www(jit_code_mulr_f,u,v,w) #define jit_muli_f(u,v,w) jit_new_node_wwf(jit_code_muli_f,u,v,w) jit_code_mulr_f, jit_code_muli_f, @@ -675,6 +680,9 @@ typedef enum { #define jit_subr_d(u,v,w) jit_new_node_www(jit_code_subr_d,u,v,w) #define jit_subi_d(u,v,w) jit_new_node_wwd(jit_code_subi_d,u,v,w) jit_code_subr_d, jit_code_subi_d, +#define jit_rsbr_d(u,v,w) jit_subr_d(u,w,v) +#define jit_rsbi_d(u,v,w) jit_new_node_wwd(jit_code_rsbi_d,u,v,w) + jit_code_rsbi_d, #define jit_mulr_d(u,v,w) jit_new_node_www(jit_code_mulr_d,u,v,w) #define jit_muli_d(u,v,w) jit_new_node_wwd(jit_code_muli_d,u,v,w) jit_code_mulr_d, jit_code_muli_d, diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index fd25f7f30..29834199b 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -520,6 +520,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define subxr(r0,r1,r2) SBCS(r0,r1,r2) # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) MUL(r0,r1,r2) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1121,6 +1123,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c index ad20714ce..ea8c1cd52 100644 --- a/lib/jit_aarch64-fpu.c +++ b/lib/jit_aarch64-fpu.c @@ -86,6 +86,9 @@ static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define subr_f(r0,r1,r2) FSUBS(r0,r1,r2) # define subi_f(r0,r1,i0) _subi_f(_jit,r0,r1,i0) static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) +# define rsbi_f(r0, r1, i0) _rsbi_f(_jit, r0, r1, i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define mulr_f(r0,r1,r2) FMULS(r0,r1,r2) # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); @@ -198,6 +201,9 @@ static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define subr_d(r0,r1,r2) FSUBD(r0,r1,r2) # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) _rsbi_d(_jit, r0, r1, i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define mulr_d(r0,r1,r2) FMULD(r0,r1,r2) # define muli_d(r0,r1,i0) _muli_d(_jit,r0,r1,i0) static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); @@ -420,6 +426,7 @@ _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) @@ -636,6 +643,7 @@ fbopi(ltgt) dopi(add) dopi(sub) +dopi(rsb) dopi(mul) dopi(div) diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index 98a30f322..bfb096045 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -23,6 +23,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 20, /* rsbi */ 4, /* mulr */ 12, /* muli */ 12, /* qmulr */ @@ -178,6 +179,7 @@ 12, /* addi_f */ 4, /* subr_f */ 12, /* subi_f */ + 12, /* rsbi_f */ 4, /* mulr_f */ 12, /* muli_f */ 4, /* divr_f */ @@ -260,6 +262,7 @@ 12, /* addi_d */ 4, /* subr_d */ 12, /* subi_d */ + 12, /* rsbi_d */ 4, /* mulr_d */ 12, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index 6de141c2b..52d5c4f55 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -744,6 +744,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -913,6 +914,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add); case_rrr(sub, _f); case_rrf(sub); + case_rrf(rsb); case_rrr(mul, _f); case_rrf(mul); case_rrr(div, _f); @@ -995,6 +997,7 @@ _emit_code(jit_state_t *_jit) case_rrd(add); case_rrr(sub, _d); case_rrd(sub); + case_rrd(rsb); case_rrr(mul, _d); case_rrd(mul); case_rrr(div, _d); diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index 8787b0ffd..03513fcd7 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -335,6 +335,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t, jit_word_t); # define mulr(r0,r1,r2) MULQ(r1,r2,r0) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -954,6 +956,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/lib/jit_alpha-fpu.c b/lib/jit_alpha-fpu.c index 026fd4127..e6f92cf92 100644 --- a/lib/jit_alpha-fpu.c +++ b/lib/jit_alpha-fpu.c @@ -340,6 +340,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); static void _subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) +# define rsbi_f(r0, r1, i0) _rsbi_f(_jit, r0, r1, i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) _rsbi_d(_jit, r0, r1, i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) static void _mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) @@ -726,6 +732,7 @@ _subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) TRAPB(); } fopi(sub) +fopi(rsb) static void _subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) @@ -734,6 +741,7 @@ _subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) TRAPB(); } dopi(sub) +dopi(rsb) static void _mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c index a5c9ad227..5bcd24ed1 100644 --- a/lib/jit_alpha-sz.c +++ b/lib/jit_alpha-sz.c @@ -23,6 +23,7 @@ 40, /* subci */ 28, /* subxr */ 28, /* subxi */ + 36, /* rsbi */ 4, /* mulr */ 32, /* muli */ 44, /* qmulr */ @@ -178,6 +179,7 @@ 32, /* addi_f */ 8, /* subr_f */ 32, /* subi_f */ + 32, /* rsbi_f */ 8, /* mulr_f */ 32, /* muli_f */ 8, /* divr_f */ @@ -260,6 +262,7 @@ 28, /* addi_d */ 8, /* subr_d */ 28, /* subi_d */ + 28, /* rsbi_d */ 8, /* mulr_d */ 28, /* muli_d */ 8, /* divr_d */ diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index 6d78c70d4..6fbdf6d46 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -771,6 +771,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -940,6 +941,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1022,6 +1024,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 3f7db0559..552bce3d8 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -868,6 +868,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -1948,6 +1950,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c index f07d65d1c..8afd9e54a 100644 --- a/lib/jit_arm-swf.c +++ b/lib/jit_arm-swf.c @@ -152,6 +152,12 @@ static void _swf_negr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_subi_f(r0,r1,i0) swf_fff_(__aeabi_fsub,r0,r1,i0) # define swf_subr_d(r0,r1,r2) swf_ddd(__aeabi_dsub,r0,r1,r2) # define swf_subi_d(r0,r1,i0) swf_ddd_(__aeabi_dsub,r0,r1,i0) +# define swf_rsbr_f(r0, r1, r2) swf_subr_f(r0, r2, r1) +# define swf_rsbi_f(r0, r1, i0) _swf_rsbi_f(_jit, r0, r1, i0) +static void _swf_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define swf_rsbr_d(r0, r1, r2) swf_subr_d(r0, r2, r1) +# define swf_rsbi_d(r0, r1, i0) _swf_rsbi_d(_jit, r0, r1, i0) +static void _swf_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define swf_mulr_f(r0,r1,r2) swf_fff(__aeabi_fmul,r0,r1,r2) # define swf_muli_f(r0,r1,i0) swf_fff_(__aeabi_fmul,r0,r1,i0) # define swf_mulr_d(r0,r1,r2) swf_ddd(__aeabi_dmul,r0,r1,r2) @@ -690,6 +696,28 @@ _swf_fff_(jit_state_t *_jit, float (*i0)(float, float), jit_unget_reg_args(); } +static void +_swf_rsbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_get_reg_args(); + data.f = i0; + movi(_R0_REGNO, data.i); + if (jit_fpr_p(r1)) + swf_ldrin(_R1_REGNO, _FP_REGNO, swf_off(r1) + 8); + else + movr(_R1_REGNO, r1); + swf_call(__aeabi_fsub, fallback, _R3_REGNO); + if (jit_fpr_p(r0)) + swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); + else + movr(r0, _R0_REGNO); + jit_unget_reg_args(); +} + static void _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), jit_int32_t r0, jit_int32_t r1, jit_float64_t i1) @@ -699,6 +727,7 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), jit_float64_t d; } data; jit_get_reg_args(); + data.d = i1; if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -730,6 +759,45 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), jit_unget_reg_args(); } +static void +_swf_rsbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) +{ + union { + jit_int32_t i[2]; + jit_float64_t d; + } data; + jit_get_reg_args(); + data.d = i0; + movi(_R0_REGNO, data.i[0]); + movi(_R1_REGNO, data.i[1]); + if (jit_fpr_p(r1)) { + if (!jit_thumb_p() && jit_armv5e_p()) + LDRDIN(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8); + else { + swf_ldrin(_R2_REGNO, _FP_REGNO, swf_off(r1) + 8); + swf_ldrin(_R3_REGNO, _FP_REGNO, swf_off(r1) + 4); + } + } + else { + movr(_R2_REGNO, r1); + movr(_R3_REGNO, r1 + 1); + } + swf_call_with_get_reg(__aeabi_dsub, fallback); + if (jit_fpr_p(r0)) { + if (!jit_thumb_p() && jit_armv5e_p()) + STRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); + else { + swf_strin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); + swf_strin(_R1_REGNO, _FP_REGNO, swf_off(r0) + 4); + } + } + else { + movr(r0, _R0_REGNO); + movr(r0 + 1, _R1_REGNO); + } + jit_unget_reg_args(); +} + static void _swf_iff(jit_state_t *_jit, int (*i0)(float, float), jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 988cc0a36..5d4502893 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -24,6 +24,7 @@ 8, /* subci */ 4, /* subxr */ 4, /* subxi */ + 12, /* rsbi */ 4, /* mulr */ 8, /* muli */ 4, /* qmulr */ @@ -179,6 +180,7 @@ 8, /* addi_f */ 4, /* subr_f */ 8, /* subi_f */ + 8, /* rsbi_f */ 4, /* mulr_f */ 8, /* muli_f */ 4, /* divr_f */ @@ -261,6 +263,7 @@ 20, /* addi_d */ 4, /* subr_d */ 20, /* subi_d */ + 20, /* rsbi_d */ 4, /* mulr_d */ 20, /* muli_d */ 4, /* divr_d */ @@ -377,6 +380,7 @@ 8, /* subci */ 4, /* subxr */ 4, /* subxi */ + 12, /* rsbi */ 4, /* mulr */ 8, /* muli */ 4, /* qmulr */ @@ -532,6 +536,7 @@ 38, /* addi_f */ 38, /* subr_f */ 38, /* subi_f */ + 38, /* rsbi_f */ 38, /* mulr_f */ 38, /* muli_f */ 38, /* divr_f */ @@ -614,6 +619,7 @@ 52, /* addi_d */ 50, /* subr_d */ 52, /* subi_d */ + 52, /* rsbi_d */ 50, /* mulr_d */ 52, /* muli_d */ 50, /* divr_d */ diff --git a/lib/jit_arm-vfp.c b/lib/jit_arm-vfp.c index 9b61ddaa0..27e0ccf4a 100644 --- a/lib/jit_arm-vfp.c +++ b/lib/jit_arm-vfp.c @@ -504,6 +504,12 @@ static void _vfp_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); # define vfp_subr_d(r0,r1,r2) VSUB_F64(r0,r1,r2) # define vfp_subi_d(r0,r1,i0) _vfp_subi_d(_jit,r0,r1,i0) static void _vfp_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define vfp_rsbr_f(r0,r1,r2) vfp_subr_f(r0,r2,r1) +# define vfp_rsbi_f(r0,r1,i0) _vfp_rsbi_f(_jit,r0,r1,i0) +static void _vfp_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define vfp_rsbr_d(r0,r1,r2) vfp_subr_d(r0,r2,r1) +# define vfp_rsbi_d(r0,r1,i0) _vfp_rsbi_d(_jit,r0,r1,i0) +static void _vfp_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); # define vfp_mulr_f(r0,r1,r2) VMUL_F32(r0,r1,r2) # define vfp_muli_f(r0,r1,i0) _vfp_muli_f(_jit,r0,r1,i0) static void _vfp_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); @@ -1455,6 +1461,8 @@ _vfp_b##name##i_d(jit_state_t *_jit, \ fopi(add) dopi(add) fopi(sub) +fopi(rsb) +dopi(rsb) dopi(sub) fopi(mul) dopi(mul) diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 179bf68f6..ed086dd5f 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -1074,6 +1074,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1227,6 +1228,7 @@ _emit_code(jit_state_t *_jit) case_vvf(add); case_vvv(sub, _f); case_vvf(sub); + case_vvf(rsb); case_vvv(mul, _f); case_vvf(mul); case_vvv(div, _f); @@ -1312,6 +1314,7 @@ _emit_code(jit_state_t *_jit) case_vvd(add); case_vvv(sub, _d); case_vvd(sub); + case_vvd(rsb); case_vvv(mul, _d); case_vvd(mul); case_vvv(div, _d); diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 3fec62310..59008cbe1 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -677,6 +677,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define subxr(r0,r1,r2) SUB_B(r1,r2,r0) #define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -1629,7 +1631,7 @@ static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (i0 >= -2048 && i0 <= 2047) + if (i0 >= -1024 && i0 <= 1023) ADDI(i0, r1, r0); else { reg = jit_get_reg(jit_class_gpr); @@ -1689,6 +1691,20 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 >= -1024 && i0 <= 1023) + SUBI(i0, r1, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} + static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c index d1b6efec0..6b6b8bfe5 100644 --- a/lib/jit_hppa-fpu.c +++ b/lib/jit_hppa-fpu.c @@ -301,6 +301,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); #define subr_d(r0,r1,r2) FSUB_D(r1,r2,r0) #define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +#define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +#define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +#define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +#define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); #define mulr_f(r0,r1,r2) FMPY_S(r1,r2,r0) #define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -741,6 +747,8 @@ fopi(add) dopi(add) fopi(sub) dopi(sub) +fopi(rsb) +dopi(rsb) fopi(mul) dopi(mul) fopi(div) diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 2c6916f8a..663162b88 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -23,6 +23,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 16, /* rsbi */ 28, /* mulr */ 36, /* muli */ 40, /* qmulr */ @@ -178,6 +179,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -260,6 +262,7 @@ 24, /* addi_d */ 4, /* subr_d */ 24, /* subi_d */ + 24, /* rsbi_d */ 4, /* mulr_d */ 24, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 636f36403..bc277bcb3 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -749,6 +749,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -924,8 +925,10 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(mul, _d); diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index bf701418d..9baeafab7 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1197,6 +1197,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -3621,6 +3623,22 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 >= -128 && i0 <= 127) + SUBI(r0, i0, r1); + else if (!(r1 & ~3) && i0 >= -2097151 && i0 <= 2097152) + ADDL(r1, -i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} + static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index 5557d7b5d..1c5057ca4 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -474,6 +474,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); #define subr_d(r0,r1,r2) FSUB_D(r0,r1,r2) #define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +#define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +#define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +#define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +#define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); #define mulr_f(r0,r1,r2) FMPY_S(r0,r1,r2) #define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -1109,10 +1115,12 @@ _b##name##i_##type(jit_state_t *_jit, \ fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) dopi(add) dopi(sub) +dopi(rsb) dopi(mul) dopi(div) diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c index 4711f6160..8059d8cbe 100644 --- a/lib/jit_ia64-sz.c +++ b/lib/jit_ia64-sz.c @@ -23,6 +23,7 @@ 48, /* subci */ 64, /* subxr */ 64, /* subxi */ + 48, /* rsbi */ 32, /* mulr */ 48, /* muli */ 96, /* qmulr */ @@ -178,6 +179,7 @@ 32, /* addi_f */ 16, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 0, /* mulr_f */ 16, /* muli_f */ 144, /* divr_f */ @@ -260,6 +262,7 @@ 32, /* addi_d */ 16, /* subr_d */ 16, /* subi_d */ + 16, /* rsbi_d */ 0, /* mulr_d */ 16, /* muli_d */ 128, /* divr_d */ diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index e8eee8a03..f35f396dc 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -884,6 +884,7 @@ _emit_code(jit_state_t *_jit) case_rrw(sub,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(subc,); case_rrw(subc,); case_rrr(mul,); @@ -1055,6 +1056,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1137,6 +1139,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 9e9778fc7..1f48ecf08 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -419,6 +419,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -959,6 +961,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(t0); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c index bd83103b8..ff31609b1 100644 --- a/lib/jit_mips-fpu.c +++ b/lib/jit_mips-fpu.c @@ -193,6 +193,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); # define subr_d(r0,r1,r2) SUB_D(r0,r1,r2) # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define mulr_f(r0,r1,r2) MUL_S(r0,r1,r2) # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -585,6 +591,7 @@ _b##name##i_##type(jit_state_t *_jit, \ fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) @@ -746,6 +753,7 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) dopi(add) dopi(sub) +dopi(rsb) dopi(mul) dopi(div) diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index 2efe8f822..517bedc28 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -24,6 +24,7 @@ 20, /* subci */ 28, /* subxr */ 28, /* subxi */ + 16, /* rsbi */ 8, /* mulr */ 16, /* muli */ 12, /* qmulr */ @@ -179,6 +180,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -261,6 +263,7 @@ 16, /* addi_d */ 4, /* subr_d */ 16, /* subi_d */ + 16, /* rsbi_d */ 4, /* mulr_d */ 16, /* muli_d */ 4, /* divr_d */ @@ -377,6 +380,7 @@ 20, /* subci */ 28, /* subxr */ 28, /* subxi */ + 16, /* rsbi */ 8, /* mulr */ 16, /* muli */ 12, /* qmulr */ @@ -532,6 +536,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -614,6 +619,7 @@ 20, /* addi_d */ 4, /* subr_d */ 20, /* subi_d */ + 20, /* rsbi_d */ 4, /* mulr_d */ 20, /* muli_d */ 4, /* divr_d */ @@ -729,6 +735,7 @@ 36, /* subci */ 28, /* subxr */ 28, /* subxi */ + 32, /* rsbi */ 8, /* mulr */ 32, /* muli */ 12, /* qmulr */ @@ -884,6 +891,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -966,6 +974,7 @@ 16, /* addi_d */ 4, /* subr_d */ 16, /* subi_d */ + 16, /* rsbi_d */ 4, /* mulr_d */ 16, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index c31d43bfa..aed730c8c 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -1005,6 +1005,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1186,6 +1187,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1268,6 +1270,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_names.c b/lib/jit_names.c index 171fbfcd5..6af443383 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -31,6 +31,7 @@ static char *code_name[] = { "subr", "subi", "subcr", "subci", "subxr", "subxi", + "rsbi", "mulr", "muli", "qmulr", "qmuli", "qmulr_u", "qmuli_u", diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 844c8b132..2f65d36b5 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -447,10 +447,10 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define TWI(t,a,s) FDs(3,t,a,s) # define TWGTI(a,s) TWI(8,a,s) # define TWLLEI(a,s) TWI(6,a,s) -# define XOR(d,a,b) FXO(31,a,d,b,0,316) -# define XOR_(d,a,b) FXO_(31,a,d,b,0,316) -# define XORI(s,a,u) FDu(26,s,a,u) -# define XORIS(s,a,u) FDu(27,s,a,u) +# define XOR(d,a,b) FX(31,a,d,b,316) +# define XOR_(d,a,b) FX_(31,a,d,b,316) +# define XORI(s,a,u) FDu(26,a,s,u) +# define XORIS(s,a,u) FDu(27,a,s,u) # define nop(c) _nop(_jit,c) static void _nop(jit_state_t*,jit_int32_t); # define movr(r0,r1) _movr(_jit,r0,r1) @@ -492,6 +492,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define subxr(r0,r1,r2) SUBFE(r0,r2,r1) # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # if __WORDSIZE == 32 # define mulr(r0,r1,r2) MULLW(r0,r1,r2) # define mullr(r0,r1,r2) MULLW(r0,r1,r2) @@ -1119,6 +1121,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c index fcfca82eb..4bef6c729 100644 --- a/lib/jit_ppc-fpu.c +++ b/lib/jit_ppc-fpu.c @@ -24,7 +24,6 @@ static void _FA(jit_state_t*,int,int,int,int,int,int,int); #define FXFL(o,m,b,x) _FXFL(_jit,o,m,b,x,0) #define FXFL_(o,m,b,x) _FXFL(_jit,o,m,b,x,1) static void _FXFL(jit_state_t*,int,int,int,int,int) maybe_unused; - # define FABS(d,b) FX(63,d,0,b,264) # define FABS_(d,b) FX_(63,d,0,b,264) # define FADD(d,a,b) FA(63,d,a,b,0,21) @@ -116,7 +115,6 @@ static void _FXFL(jit_state_t*,int,int,int,int,int) maybe_unused; # define STFSU(s,a,d) FDs(53,s,a,d) # define STFSUX(s,a,b) FX(31,s,a,b,695) # define STFSX(s,a,b) FX(31,s,a,b,663) - # define movr_f(r0,r1) movr_d(r0,r1) # define movr_d(r0,r1) _movr_d(_jit,r0,r1) static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t); @@ -127,7 +125,6 @@ static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*); # define extr_f(r0,r1) extr_d(r0,r1) # define extr_d(r0,r1) _extr_d(_jit,r0,r1) static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t); - # define truncr_f(r0,r1) truncr_d(r0,r1) # define truncr_f_i(r0,r1) truncr_d_i(r0,r1) # define truncr_d_i(r0,r1) _truncr_d_i(_jit,r0,r1) @@ -140,17 +137,14 @@ static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t); # define truncr_d_l(r0,r1) _truncr_d_l(_jit,r0,r1) static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t); # endif - # define extr_d_f(r0,r1) FRSP(r0,r1) # define extr_f_d(r0,r1) movr_d(r0,r1) - # define absr_f(r0,r1) absr_d(r0,r1) # define absr_d(r0,r1) FABS(r0,r1) # define negr_f(r0,r1) negr_d(r0,r1) # define negr_d(r0,r1) FNEG(r0,r1) # define sqrtr_f(r0,r1) FSQRTS(r0,r1) # define sqrtr_d(r0,r1) FSQRT(r0,r1) - # define addr_f(r0,r1,r2) FADDS(r0,r1,r2) # define addr_d(r0,r1,r2) FADD(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) @@ -163,6 +157,12 @@ static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); # define subr_d(r0,r1,r2) FSUB(r0,r1,r2) # define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) _rsbi_f(_jit,r0,r1,i0) +static void _rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) _rsbi_d(_jit,r0,r1,i0) +static void _rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define mulr_f(r0,r1,r2) FMULS(r0,r1,r2) # define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -175,7 +175,6 @@ static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); # define divr_d(r0,r1,r2) FDIV(r0,r1,r2) # define divi_d(r0,r1,i0) _divi_d(_jit,r0,r1,i0) static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); - # define ltr_f(r0,r1,r2) ltr_d(r0,r1,r2) # define ltr_d(r0,r1,r2) _ltr_d(_jit,r0,r1,r2) static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); @@ -372,7 +371,6 @@ static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static jit_word_t _bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*); # define bunordi_d(i0,r0,i1) _bunordi_d(_jit,i0,r0,i1) static jit_word_t _bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*); - # define ldr_f(r0,r1) LFSX(r0, _R0_REGNO, r1) # define ldi_f(r0,i0) _ldi_f(_jit,r0,i0) static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); @@ -565,6 +563,8 @@ fopi(add) dopi(add) fopi(sub) dopi(sub) +fopi(rsb) +dopi(rsb) fopi(mul) dopi(mul) fopi(div) diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 7fae39436..ba7881306 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -24,6 +24,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 16, /* rsbi */ 4, /* mulr */ 12, /* muli */ 12, /* qmulr */ @@ -179,6 +180,7 @@ 12, /* addi_f */ 4, /* subr_f */ 12, /* subi_f */ + 12, /* rsbi_f */ 4, /* mulr_f */ 12, /* muli_f */ 4, /* divr_f */ @@ -261,6 +263,7 @@ 12, /* addi_d */ 4, /* subr_d */ 12, /* subi_d */ + 12, /* rsbi_d */ 4, /* mulr_d */ 12, /* muli_d */ 4, /* divr_d */ @@ -377,6 +380,7 @@ 12, /* subci */ 4, /* subxr */ 8, /* subxi */ + 16, /* rsbi */ 4, /* mulr */ 12, /* muli */ 12, /* qmulr */ @@ -532,6 +536,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -614,6 +619,7 @@ 24, /* addi_d */ 4, /* subr_d */ 24, /* subi_d */ + 24, /* rsbi_d */ 4, /* mulr_d */ 24, /* muli_d */ 4, /* divr_d */ @@ -730,6 +736,7 @@ 28, /* subci */ 4, /* subxr */ 8, /* subxi */ + 32, /* rsbi */ 4, /* mulr */ 28, /* muli */ 12, /* qmulr */ @@ -885,6 +892,7 @@ 28, /* addi_f */ 4, /* subr_f */ 28, /* subi_f */ + 28, /* rsbi_f */ 4, /* mulr_f */ 28, /* muli_f */ 4, /* divr_f */ @@ -967,6 +975,7 @@ 28, /* addi_d */ 4, /* subr_d */ 28, /* subi_d */ + 28, /* rsbi_d */ 4, /* mulr_d */ 28, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 88488f091..cbc5ddaab 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -902,6 +902,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1089,6 +1090,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -1171,6 +1173,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c index dfa4e56e5..0030bac02 100644 --- a/lib/jit_s390x-cpu.c +++ b/lib/jit_s390x-cpu.c @@ -934,6 +934,8 @@ static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) _mulr(_jit,r0,r1,r2) static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) @@ -2446,6 +2448,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_s390x-fpu.c b/lib/jit_s390x-fpu.c index 25c2b6ff5..d36f0fbfc 100644 --- a/lib/jit_s390x-fpu.c +++ b/lib/jit_s390x-fpu.c @@ -372,6 +372,10 @@ static void _subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subr_d(r0,r1,r2) _subr_d(_jit,r0,r1,r2) static void _subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define subi_d(r0,r1,i0) dp(sub,r0,r1,i0) +# define rsbr_f(r0,r1,r2) subr_f(r0,r2,r1) +# define rsbi_f(r0,r1,i0) fp(rsb,r0,r1,i0) +# define rsbr_d(r0,r1,r2) subr_d(r0,r2,r1) +# define rsbi_d(r0,r1,i0) dp(rsb,r0,r1,i0) # define mulr_f(r0,r1,r2) _mulr_f(_jit,r0,r1,r2) static void _mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli_f(r0,r1,i0) fp(mul,r0,r1,i0) @@ -597,6 +601,7 @@ _fp(jit_state_t *_jit, jit_code_t code, switch (code) { case jit_code_addi_f: addr_f(r0, r1, rn(reg)); break; case jit_code_subi_f: subr_f(r0, r1, rn(reg)); break; + case jit_code_rsbi_f: rsbr_f(r0, r1, rn(reg)); break; case jit_code_muli_f: mulr_f(r0, r1, rn(reg)); break; case jit_code_divi_f: divr_f(r0, r1, rn(reg)); break; case jit_code_uneqi_f: uneqr_f(r0, r1, rn(reg)); break; @@ -616,6 +621,7 @@ _dp(jit_state_t *_jit, jit_code_t code, switch (code) { case jit_code_addi_d: addr_d(r0, r1, rn(reg)); break; case jit_code_subi_d: subr_d(r0, r1, rn(reg)); break; + case jit_code_rsbi_d: rsbr_d(r0, r1, rn(reg)); break; case jit_code_muli_d: mulr_d(r0, r1, rn(reg)); break; case jit_code_divi_d: divr_d(r0, r1, rn(reg)); break; case jit_code_uneqi_d: uneqr_d(r0, r1, rn(reg)); break; diff --git a/lib/jit_s390x-sz.c b/lib/jit_s390x-sz.c index a80b97346..a933e1553 100644 --- a/lib/jit_s390x-sz.c +++ b/lib/jit_s390x-sz.c @@ -23,6 +23,7 @@ 20, /* subci */ 12, /* subxr */ 12, /* subxi */ + 28, /* rsbi */ 8, /* mulr */ 24, /* muli */ 60, /* qmulr */ @@ -178,6 +179,7 @@ 26, /* addi_f */ 8, /* subr_f */ 26, /* subi_f */ + 26, /* rsbi_f */ 6, /* mulr_f */ 26, /* muli_f */ 8, /* divr_f */ @@ -260,6 +262,7 @@ 26, /* addi_d */ 8, /* subr_d */ 26, /* subi_d */ + 26, /* rsbi_d */ 6, /* mulr_d */ 26, /* muli_d */ 8, /* divr_d */ diff --git a/lib/jit_s390x.c b/lib/jit_s390x.c index 4c45db0b0..460d793c5 100644 --- a/lib/jit_s390x.c +++ b/lib/jit_s390x.c @@ -727,6 +727,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -896,6 +897,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add); case_rrr(sub, _f); case_rrf(sub); + case_rrf(rsb); case_rrr(mul, _f); case_rrf(mul); case_rrr(div, _f); @@ -978,6 +980,7 @@ _emit_code(jit_state_t *_jit) case_rrd(add); case_rrr(sub, _d); case_rrd(sub); + case_rrd(rsb); case_rrr(mul, _d); case_rrd(mul); case_rrr(div, _d); diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 095c36466..96150d3f9 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -452,6 +452,8 @@ static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define subxr(r0, r1, r2) SUBXcc(r1, r2, r0) # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0, r1, r2) UMUL(r1, r2, r0) # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); @@ -927,6 +929,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1325,7 +1334,7 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - ldxr_c(r0, r1, rn(reg)); + ldxr_s(r0, r1, rn(reg)); jit_unget_reg(reg); } } diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index 52d760d9f..dc4d574de 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -143,7 +143,6 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define FCMPEQ(rs1, rs2) FPop2(0, rs1, SPARC_FCMPEQ, rs2) # define CPop1(rd, rs1, opc, rs2) f3f(rd, 54, rs1, opf, rs2) # define CPop2(rd, rs1, opc, rs2) f3f(rd, 55, rs1, opf, rs2) - # define extr_f(r0, r1) _extr_f(_jit, r0, r1) static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_f(r0, r1) truncr_f_i(r0, r1) @@ -156,7 +155,6 @@ static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); # define negr_f(r0, r1) FNEGS(r1, r0) # define absr_f(r0, r1) FABSS(r1, r0) # define sqrtr_f(r0, r1) FSQRTS(r1, r0) - # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_d(r0, r1) truncr_d_i(r0, r1) @@ -172,32 +170,40 @@ static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define absr_d(r0, r1) _absr_d(_jit, r0, r1) static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define sqrtr_d(r0, r1) FSQRTD(r1, r0) - # define fop1f(op, r0, r1, i0) _fop1f(_jit, op, r0, r1, i0) static void _fop1f(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_float32_t*); +# define rfop1f(op, r0, r1, i0) _rfop1f(_jit, op, r0, r1, i0) +static void _rfop1f(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_float32_t*); # define fop1d(op, r0, r1, i0) _fop1d(_jit, op, r0, r1, i0) static void _fop1d(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_float64_t*); - +# define rfop1d(op, r0, r1, i0) _rfop1d(_jit, op, r0, r1, i0) +static void _rfop1d(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_float64_t*); # define addr_f(r0, r1, r2) FADDS(r1, r2, r0) # define addi_f(r0, r1, i0) fop1f(SPARC_FADDS, r0, r1, i0) # define subr_f(r0, r1, r2) FSUBS(r1, r2, r0) # define subi_f(r0, r1, i0) fop1f(SPARC_FSUBS, r0, r1, i0) +# define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) +# define rsbi_f(r0, r1, i0) rfop1f(SPARC_FSUBS, r0, r1, i0) +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) rfop1d(SPARC_FSUBD, r0, r1, i0) # define mulr_f(r0, r1, r2) FMULS(r1, r2, r0) # define muli_f(r0, r1, i0) fop1f(SPARC_FMULS, r0, r1, i0) # define divr_f(r0, r1, r2) FDIVS(r1, r2, r0) # define divi_f(r0, r1, i0) fop1f(SPARC_FDIVS, r0, r1, i0) - # define addr_d(r0, r1, r2) FADDD(r1, r2, r0) # define addi_d(r0, r1, i0) fop1d(SPARC_FADDD, r0, r1, i0) # define subr_d(r0, r1, r2) FSUBD(r1, r2, r0) # define subi_d(r0, r1, i0) fop1d(SPARC_FSUBD, r0, r1, i0) +# define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) +# define rsbi_d(r0, r1, i0) rfop1d(SPARC_FSUBD, r0, r1, i0) # define mulr_d(r0, r1, r2) FMULD(r1, r2, r0) # define muli_d(r0, r1, i0) fop1d(SPARC_FMULD, r0, r1, i0) # define divr_d(r0, r1, r2) FDIVD(r1, r2, r0) # define divi_d(r0, r1, i0) fop1d(SPARC_FDIVD, r0, r1, i0) - #define fcr(cc, r0, r1, r2) _fcr(_jit, cc, r0, r1, r2) static void _fcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define fcw(cc, r0, r1, i0) _fcw(_jit, cc, r0, r1, i0) @@ -231,7 +237,6 @@ _fcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t*); # define ordi_f(r0, r1, i0) fcw(SPARC_FBO, r0, r1, i0) # define unordr_f(r0, r1, r2) fcr(SPARC_FBU, r0, r1, r2) # define unordi_f(r0, r1, i0) fcw(SPARC_FBU, r0, r1, i0) - #define dcr(cc, r0, r1, r2) _dcr(_jit, cc, r0, r1, r2) static void _dcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define dcw(cc, r0, r1, i0) _dcw(_jit, cc, r0, r1, i0) @@ -265,7 +270,6 @@ _dcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t*); # define ordi_d(r0, r1, i0) dcw(SPARC_FBO, r0, r1, i0) # define unordr_d(r0, r1, r2) dcr(SPARC_FBU, r0, r1, r2) # define unordi_d(r0, r1, i0) dcw(SPARC_FBU, r0, r1, i0) - # define ldr_f(r0, r1) LDF(r1, 0, r0) # define ldi_f(r0, i0) _ldi_f(_jit, r0, i0) static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); @@ -278,7 +282,6 @@ static void _sti_f(jit_state_t*,jit_int32_t,jit_word_t); # define stxr_f(r0, r1, r2) STF(r2, r1, r0) # define stxi_f(r0, r1, i0) _stxi_f(_jit, r0, r1, i0) static void _stxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); - # define ldr_d(r0, r1) LDDF(r1, 0, r0) # define ldi_d(r0, i0) _ldi_d(_jit, r0, i0) static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t); @@ -291,7 +294,6 @@ static void _sti_d(jit_state_t*,jit_int32_t,jit_word_t); # define stxr_d(r0, r1, r2) STDF(r2, r1, r0) # define stxi_d(r0, r1, i0) _stxi_d(_jit, r0, r1, i0) static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); - # define fbr(cc, i0, r0, r1) _fbr(_jit, cc, i0, r0, r1) static jit_word_t _fbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); @@ -360,7 +362,6 @@ _dbw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t*); # define bordi_d(i0, r0, i1) dbw(SPARC_FBO, i0, r0, i1) # define bunordr_d(i0, r0, r1) dbr(SPARC_FBU, i0, r0, r1) # define bunordi_d(i0, r0, i1) dbw(SPARC_FBU, i0, r0, i1) - #endif #if CODE @@ -469,6 +470,17 @@ _fop1f(jit_state_t *_jit, jit_int32_t op, jit_unget_reg(reg); } +static void +_rfop1f(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i0); + FPop1(r0, rn(reg), op, r1); + jit_unget_reg(reg); +} + static void _fop1d(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) @@ -480,6 +492,17 @@ _fop1d(jit_state_t *_jit, jit_int32_t op, jit_unget_reg(reg); } +static void +_rfop1d(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i0); + FPop1(r0, rn(reg), op, r1); + jit_unget_reg(reg); +} + static void _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index df52ca357..6acfd0989 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -23,6 +23,7 @@ 12, /* subci */ 4, /* subxr */ 4, /* subxi */ + 16, /* rsbi */ 4, /* mulr */ 12, /* muli */ 8, /* qmulr */ @@ -178,6 +179,7 @@ 16, /* addi_f */ 4, /* subr_f */ 16, /* subi_f */ + 16, /* rsbi_f */ 4, /* mulr_f */ 16, /* muli_f */ 4, /* divr_f */ @@ -260,6 +262,7 @@ 24, /* addi_d */ 4, /* subr_d */ 24, /* subi_d */ + 24, /* rsbi_d */ 4, /* mulr_d */ 24, /* muli_d */ 4, /* divr_d */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index a8488e2c5..dc1557cc4 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -745,6 +745,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(subx,); case_rrw(subx,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -898,6 +899,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _f, 32); case_rrr(sub, _f); case_rrf(sub, _f, 32); + case_rrf(rsb, _f, 32); case_rrr(mul, _f); case_rrf(mul, _f, 32); case_rrr(div, _f); @@ -980,6 +982,7 @@ _emit_code(jit_state_t *_jit) case_rrf(add, _d, 64); case_rrr(sub, _d); case_rrf(sub, _d, 64); + case_rrf(rsb, _d, 64); case_rrr(mul, _d); case_rrf(mul, _d, 64); case_rrr(div, _d); diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 0a5d3302a..2e284f917 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -202,6 +202,8 @@ static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define isubxi(r0, i0) alui(X86_SBB, r0, i0) # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) +static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define imulr(r0, r1) _imulr(_jit, r0, r1) static void _imulr(jit_state_t*, jit_int32_t, jit_int32_t); # define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0) @@ -1109,6 +1111,13 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + subi(r0, r1, i0); + negr(r0, r0); +} + static void _imulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index d3bd7aeea..6a3d996e6 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -131,6 +131,12 @@ static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0) static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1) +# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0) +static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1) +# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0) +static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2) static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0) @@ -592,6 +598,10 @@ _sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) dopi(sub) +fopi(rsb) + +dopi(rsb) + static void _sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index 70f8a03ff..1c73ea3e3 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -23,6 +23,7 @@ 6, /* subci */ 6, /* subxr */ 5, /* subxi */ + 10, /* rsbi */ 5, /* mulr */ 7, /* muli */ 20, /* qmulr */ @@ -178,6 +179,7 @@ 19, /* addi_f */ 12, /* subr_f */ 19, /* subi_f */ + 19, /* rsbi_f */ 8, /* mulr_f */ 19, /* muli_f */ 12, /* divr_f */ @@ -260,6 +262,7 @@ 26, /* addi_d */ 12, /* subr_d */ 26, /* subi_d */ + 26, /* rsbi_d */ 8, /* mulr_d */ 26, /* muli_d */ 12, /* divr_d */ @@ -375,6 +378,7 @@ 13, /* subci */ 9, /* subxr */ 7, /* subxi */ + 19, /* rsbi */ 7, /* mulr */ 14, /* muli */ 20, /* qmulr */ @@ -530,6 +534,7 @@ 21, /* addi_f */ 15, /* subr_f */ 21, /* subi_f */ + 21, /* rsbi_f */ 10, /* mulr_f */ 21, /* muli_f */ 15, /* divr_f */ @@ -612,6 +617,7 @@ 25, /* addi_d */ 15, /* subr_d */ 25, /* subi_d */ + 25, /* rsbi_d */ 10, /* mulr_d */ 25, /* muli_d */ 15, /* divr_d */ @@ -725,6 +731,7 @@ 13, /* subci */ 9, /* subxr */ 7, /* subxi */ + 19, /* rsbi */ 7, /* mulr */ 14, /* muli */ 20, /* qmulr */ @@ -880,6 +887,7 @@ 20, /* addi_f */ 15, /* subr_f */ 20, /* subi_f */ + 20, /* rsbi_f */ 10, /* mulr_f */ 20, /* muli_f */ 15, /* divr_f */ @@ -962,6 +970,7 @@ 25, /* addi_d */ 15, /* subr_d */ 25, /* subi_d */ + 25, /* rsbi_d */ 10, /* mulr_d */ 25, /* muli_d */ 15, /* divr_d */ diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c index 8faa1401c..2677d4349 100644 --- a/lib/jit_x86-x87.c +++ b/lib/jit_x86-x87.c @@ -83,6 +83,12 @@ static void _x87_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); static void _x87_subr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define x87_subi_d(r0, r1, i0) _x87_subi_d(_jit, r0, r1, i0) static void _x87_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define x87_rsbr_f(r0, r1, r2) x87_subr_f(r0, r2, r1) +# define x87_rsbi_f(r0, r1, i0) _x87_rsbi_f(_jit, r0, r1, i0) +static void _x87_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define x87_rsbr_d(r0, r1, r2) x87_subr_d(r0, r2, r1) +# define x87_rsbi_d(r0, r1, i0) _x87_rsbi_d(_jit, r0, r1, i0) +static void _x87_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define x87_mulr_f(r0, r1, r2) _x87_mulr_d(_jit, r0, r1, r2) # define x87_muli_f(r0, r1, i0) _x87_muli_f(_jit, r0, r1, i0) static void _x87_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -446,6 +452,7 @@ _x87rri(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1) fopi(add) fopi(sub) +fopi(rsb) fopi(mul) fopi(div) @@ -517,6 +524,8 @@ _x87_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) dopi(sub) +dopi(rsb) + static void _x87_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 20dcc211d..6f55ebd1d 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -1297,6 +1297,7 @@ _emit_code(jit_state_t *_jit) case_rrw(subx,); case_rrr(subc,); case_rrw(subc,); + case_rrw(rsb,); case_rrr(mul,); case_rrw(mul,); case_rrrr(qmul,); @@ -1478,6 +1479,7 @@ _emit_code(jit_state_t *_jit) case_ffw(add, _f, 32); case_fff(sub, _f); case_ffw(sub, _f, 32); + case_ffw(rsb, _f, 32); case_fff(mul, _f); case_ffw(mul, _f, 32); case_fff(div, _f); @@ -1576,6 +1578,7 @@ _emit_code(jit_state_t *_jit) case_ffw(add, _d, 64); case_fff(sub, _d); case_ffw(sub, _d, 64); + case_ffw(rsb, _d, 64); case_fff(mul, _d); case_ffw(mul, _d, 64); case_fff(div, _d); diff --git a/lib/lightning.c b/lib/lightning.c index 9ee261d79..20aa1bd74 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1237,6 +1237,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_addi: case jit_code_addxi: case jit_code_addci: case jit_code_subi: case jit_code_subxi: case jit_code_subci: + case jit_code_rsbi: case jit_code_muli: case jit_code_divi: case jit_code_divi_u: case jit_code_remi: case jit_code_remi_u: case jit_code_andi: case jit_code_ori: case jit_code_xori: case jit_code_lshi: @@ -1254,20 +1255,22 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg| jit_cc_a1_reg|jit_cc_a2_int; break; - case jit_code_addi_f: case jit_code_subi_f: case jit_code_muli_f: - case jit_code_divi_f: case jit_code_lti_f: case jit_code_lei_f: - case jit_code_eqi_f: case jit_code_gei_f: case jit_code_gti_f: - case jit_code_nei_f: case jit_code_unlti_f: case jit_code_unlei_f: - case jit_code_uneqi_f: case jit_code_ungei_f: case jit_code_ungti_f: - case jit_code_ltgti_f: case jit_code_ordi_f: case jit_code_unordi_f: + case jit_code_addi_f: case jit_code_subi_f: case jit_code_rsbi_f: + case jit_code_muli_f: case jit_code_divi_f: case jit_code_lti_f: + case jit_code_lei_f: case jit_code_eqi_f: case jit_code_gei_f: + case jit_code_gti_f: case jit_code_nei_f: case jit_code_unlti_f: + case jit_code_unlei_f: case jit_code_uneqi_f: case jit_code_ungei_f: + case jit_code_ungti_f: case jit_code_ltgti_f: case jit_code_ordi_f: + case jit_code_unordi_f: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_flt; break; - case jit_code_addi_d: case jit_code_subi_d: case jit_code_muli_d: - case jit_code_divi_d: case jit_code_lti_d: case jit_code_lei_d: - case jit_code_eqi_d: case jit_code_gei_d: case jit_code_gti_d: - case jit_code_nei_d: case jit_code_unlti_d: case jit_code_unlei_d: - case jit_code_uneqi_d: case jit_code_ungei_d: case jit_code_ungti_d: - case jit_code_ltgti_d: case jit_code_ordi_d: case jit_code_unordi_d: + case jit_code_addi_d: case jit_code_subi_d: case jit_code_rsbi_d: + case jit_code_muli_d: case jit_code_divi_d: case jit_code_lti_d: + case jit_code_lei_d: case jit_code_eqi_d: case jit_code_gei_d: + case jit_code_gti_d: case jit_code_nei_d: case jit_code_unlti_d: + case jit_code_unlei_d: case jit_code_uneqi_d: case jit_code_ungei_d: + case jit_code_ungti_d: case jit_code_ltgti_d: case jit_code_ordi_d: + case jit_code_unordi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_dbl; break; case jit_code_addr: case jit_code_addxr: case jit_code_addcr: