From d91b25d1bed7fa285af4009d661a7f9537e422f3 Mon Sep 17 00:00:00 2001 From: pcpa Date: Mon, 4 Feb 2013 18:54:37 -0200 Subject: [PATCH] Implement the qmul and qdiv instructions. 2013-02-04 Paulo Andrade * include/lightning.h, include/lightning/jit_private.h, lib/jit_arm-cpu.c, lib/jit_arm.c, lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc.c, lib/jit_x86-cpu.c, lib/jit_x86.c, lib/lightning.c: Implement the new qmul and qdiv instructions that return signed and unsigned lo/hi multiplication result and div/rem division result. These should be useful for jit translation of code that needs to know if a multiplication overflows (no branch opcode added) or if a division is exact (easy check if remainder is zero). * check/lightning.c, lib/jit_print.c, check/Makefile.am, check/all.tst: Update for the new qmul and qdiv instructions. * check/qalu.inc, check/qalu_div.ok, check/qalu_div.tst, check/qalu_mul.ok, check/qalu_mul.tst: New files implementing simple test cases for qmul and qdiv. --- ChangeLog | 19 ++ TODO | 16 ++ check/Makefile.am | 6 +- check/all.tst | 8 + check/lightning.c | 28 +++ check/qalu.inc | 97 ++++++++++ check/qalu_div.ok | 1 + check/qalu_div.tst | 18 ++ check/qalu_mul.ok | 1 + check/qalu_mul.tst | 25 +++ include/lightning.h | 16 ++ include/lightning/jit_private.h | 11 +- lib/jit_arm-cpu.c | 128 +++++++++++++ lib/jit_arm.c | 18 ++ lib/jit_mips-cpu.c | 66 +++++++ lib/jit_mips.c | 18 ++ lib/jit_ppc-cpu.c | 99 ++++++++++ lib/jit_ppc.c | 18 ++ lib/jit_print.c | 22 ++- lib/jit_x86-cpu.c | 312 +++++++++++++++++++++++++++----- lib/jit_x86.c | 18 ++ lib/lightning.c | 223 ++++++++++++++++++----- 22 files changed, 1073 insertions(+), 95 deletions(-) create mode 100644 check/qalu.inc create mode 100644 check/qalu_div.ok create mode 100644 check/qalu_div.tst create mode 100644 check/qalu_mul.ok create mode 100644 check/qalu_mul.tst diff --git a/ChangeLog b/ChangeLog index fe58a5027..ee46e0bfd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2013-02-04 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_arm-cpu.c, lib/jit_arm.c, lib/jit_mips-cpu.c, + lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc.c, + lib/jit_x86-cpu.c, lib/jit_x86.c, lib/lightning.c: + Implement the new qmul and qdiv instructions that return signed + and unsigned lo/hi multiplication result and div/rem division result. + These should be useful for jit translation of code that needs to + know if a multiplication overflows (no branch opcode added) or if + a division is exact (easy check if remainder is zero). + + * check/lightning.c, lib/jit_print.c, check/Makefile.am, + check/all.tst: Update for the new qmul and qdiv instructions. + + * check/qalu.inc, check/qalu_div.ok, check/qalu_div.tst, + check/qalu_mul.ok, check/qalu_mul.tst: New files implementing + simple test cases for qmul and qdiv. + 2013-01-30 Paulo Andrade * doc/body.texi: Correct "jmpi" description that incorrectly diff --git a/TODO b/TODO index 30c42c236..5fd2df361 100644 --- a/TODO +++ b/TODO @@ -12,3 +12,19 @@ Suggested names for now are "qmul" and "qdiv", with "r" and "i" variants, and possibly unsigned version. Branches would use "bo" and "bx" prefix. + + * Convert retr to an actual node, otherwise, code like: + movi %r0 1 + divr %r1 %r2 %r3 + retr %r0 + will fail in x86 because, besides "divr" telling it clobbers + %rax (r0) it ends being ignored because retr is a noop there + (removed "mov %rax,%rax" expansion) and the code checking for + live registers ends up not knowing about %rax being live after + the "divr". This affects only x86. + + * Validate that divrem in jit_x86-cpu.c is not modifying + the non result arguments. This is not verified by clobber.tst, + as it only checks registers not involved in the operation + (because it does not know about values being set as input + for the the operation). diff --git a/check/Makefile.am b/check/Makefile.am index 33ce2134e..34fbbcf59 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -66,6 +66,9 @@ EXTRA_DIST = \ carry.tst carry.ok \ call.tst call.ok \ float.tst float.ok \ + qalu.inc \ + qalu_mul.tst qalu_mul.ok \ + qalu_div.tst qalu_div.ok \ ccall.ok \ check.sh \ check.x87.sh \ @@ -88,7 +91,8 @@ base_TESTS = \ fop_abs fop_sqrt \ varargs stack \ clobber carry call \ - float + float \ + qalu_mul qalu_div $(base_TESTS): check.sh $(LN_S) $(srcdir)/check.sh $@ diff --git a/check/all.tst b/check/all.tst index d8d476969..9be38eb4f 100644 --- a/check/all.tst +++ b/check/all.tst @@ -34,10 +34,18 @@ subxi %r0 %r1 2 mulr %r0 %r1 %r2 muli %r0 %r1 2 + qmulr %r0 %r1 %r2 %v0 + qmuli %r0 %r1 %r2 3 + qmulr_u %r0 %r1 %r2 %v0 + qmuli_u %r0 %r1 %r2 3 divr %r0 %r1 %r2 divi %r0 %r1 2 divr_u %r0 %r1 %r2 divi_u %r0 %r1 2 + qdivr %r0 %r1 %r2 %v0 + qdivi %r0 %r1 %r2 3 + qdivr_u %r0 %r1 %r2 %v0 + qdivi_u %r0 %r1 %r2 3 remr %r0 %r1 %r2 remi %r0 %r1 2 remr_u %r0 %r1 %r2 diff --git a/check/lightning.c b/check/lightning.c index 13abbfbd4..9722011fe 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -260,8 +260,12 @@ static void subr(void); static void subi(void); static void subxr(void); static void subxi(void); static void subcr(void); static void subci(void); static void mulr(void); static void muli(void); +static void qmulr(void); static void qmuli(void); +static void qmulr_u(void); static void qmuli_u(void); static void divr(void); static void divi(void); static void divr_u(void); static void divi_u(void); +static void qdivr(void); static void qdivi(void); +static void qdivr_u(void); static void qdivi_u(void); static void remr(void); static void remi(void); static void remr_u(void); static void remi_u(void); static void andr(void); static void andi(void); @@ -552,8 +556,12 @@ static instr_t instr_vector[] = { entry(subxr), entry(subxi), entry(subcr), entry(subci), entry(mulr), entry(muli), + entry(qmulr), entry(qmuli), + entry(qmulr_u), entry(qmuli_u), entry(divr), entry(divi), entry(divr_u), entry(divi_u), + entry(qdivr), entry(qdivi), + entry(qdivr_u), entry(qdivi_u), entry(remr), entry(remi), entry(remr_u), entry(remi_u), entry(andr), entry(andi), @@ -920,6 +928,22 @@ name(void) \ jit_word_t im = get_imm(); \ jit_##name(r0, r1, im); \ } +#define entry_ir_ir_ir_ir(name) \ +static void \ +name(void) \ +{ \ + jit_gpr_t r0 = get_ireg(), r1 = get_ireg(), \ + r2 = get_ireg(), r3 = get_ireg(); \ + jit_##name(r0, r1, r2, r3); \ +} +#define entry_ir_ir_ir_im(name) \ +static void \ +name(void) \ +{ \ + jit_gpr_t r0 = get_ireg(), r1 = get_ireg(), r2 = get_ireg(); \ + jit_word_t im = get_imm(); \ + jit_##name(r0, r1, r2, im); \ +} #define entry_ir_ir(name) \ static void \ name(void) \ @@ -1244,8 +1268,12 @@ entry_ir_ir_ir(subr) entry_ir_ir_im(subi) entry_ir_ir_ir(subxr) entry_ir_ir_im(subxi) entry_ir_ir_ir(subcr) entry_ir_ir_im(subci) entry_ir_ir_ir(mulr) entry_ir_ir_im(muli) +entry_ir_ir_ir_ir(qmulr) entry_ir_ir_ir_im(qmuli) +entry_ir_ir_ir_ir(qmulr_u) entry_ir_ir_ir_im(qmuli_u) entry_ir_ir_ir(divr) entry_ir_ir_im(divi) entry_ir_ir_ir(divr_u) entry_ir_ir_im(divi_u) +entry_ir_ir_ir_ir(qdivr) entry_ir_ir_ir_im(qdivi) +entry_ir_ir_ir_ir(qdivr_u) entry_ir_ir_ir_im(qdivi_u) entry_ir_ir_ir(remr) entry_ir_ir_im(remi) entry_ir_ir_ir(remr_u) entry_ir_ir_im(remi_u) entry_ir_ir_ir(andr) entry_ir_ir_im(andi) diff --git a/check/qalu.inc b/check/qalu.inc new file mode 100644 index 000000000..a5e893f20 --- /dev/null +++ b/check/qalu.inc @@ -0,0 +1,97 @@ +.data 8 +ok: +.c "ok\n" + +/* r0,r1 = r2 op r3 */ +#define QALUR(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + movi %R2 I0 \ + movi %R3 I1 \ + OP##r##T %R0 %R1 %R2 %R3 \ + bnei OP##T##N##rlo##R0##R1##R2##R3 %R0 LO \ + beqi OP##T##N##rhi##R0##R1##R2##R3 %R1 HI \ +OP##T##N##rlo##R0##R1##R2##R3: \ + calli @abort \ +OP##T##N##rhi##R0##R1##R2##R3: + +/* r0,r1 = r2 op i0 */ +#define QALUI(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + movi %R2 I0 \ + movi %R3 HI \ + OP##i##T %R0 %R1 %R2 I1 \ + bnei OP##T##N##ilo##R0##R1##R2##R3 %R0 LO \ + beqr OP##T##N##ihi##R0##R1##R2##R3 %R1 %R3 \ +OP##T##N##ilo##R0##R1##R2##R3: \ + calli @abort \ +OP##T##N##ihi##R0##R1##R2##R3: + +/* r0,r1 = r0 op r1 */ +#define QALUX(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + movi %R0 I0 \ + movi %R1 I1 \ + movi %R2 LO \ + movi %R3 HI \ + OP##r##T %R0 %R1 %R0 %R1 \ + bner OP##T##N##0lo##R0##R1##R2##R3 %R0 %R2 \ + beqr OP##T##N##0hi##R0##R1##R2##R3 %R1 %R3 \ +OP##T##N##0lo##R0##R1##R2##R3: \ + calli @abort \ +OP##T##N##0hi##R0##R1##R2##R3: + +/* r0,r1 = r1 op r0 */ +#define QALUY(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + movi %R1 I0 \ + movi %R0 I1 \ + movi %R2 LO \ + movi %R3 HI \ + OP##r##T %R0 %R1 %R1 %R0 \ + bner OP##T##N##1lo##R0##R1##R2##R3 %R0 %R2 \ + beqr OP##T##N##1hi##R0##R1##R2##R3 %R1 %R3 \ +OP##T##N##1lo##R0##R1##R2##R3: \ + calli @abort \ +OP##T##N##1hi##R0##R1##R2##R3: + +/* r0,r1 = r0 op r3 */ +#define QALUZ(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + movi %R0 I0 \ + movi %R3 I1 \ + movi %R2 LO \ + OP##r##T %R0 %R1 %R0 %R3 \ + bner OP##T##N##2lo##R0##R1##R2##R3 %R0 %R2 \ + beqi OP##T##N##2hi##R0##R1##R2##R3 %R1 HI \ +OP##T##N##2lo##R0##R1##R2##R3: \ + calli @abort \ +OP##T##N##2hi##R0##R1##R2##R3: + +/* r0,r1 = r2 op r1 */ +#define QALUW(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + movi %R2 I0 \ + movi %R1 I1 \ + movi %R3 LO \ + OP##r##T %R0 %R1 %R2 %R1 \ + bner OP##T##N##3lo##R0##R1##R2##R3 %R0 %R3 \ + beqi OP##T##N##3hi##R0##R1##R2##R3 %R1 HI \ +OP##T##N##3lo##R0##R1##R2##R3: \ + calli @abort \ +OP##T##N##3hi##R0##R1##R2##R3: + +#define QALU2(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALUR(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALUI(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALUX(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALUY(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALUZ(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALUW(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) + +#define QALU1(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALU2(N, T, OP, I0, I1, LO, HI, R0, R1, R2, R3) \ + QALU2(N, T, OP, I0, I1, LO, HI, R1, R2, R3, R0) \ + QALU2(N, T, OP, I0, I1, LO, HI, R2, R3, R0, R1) \ + QALU2(N, T, OP, I0, I1, LO, HI, R3, R0, R1, R2) + +#define QALU(N, T, OP, I0, I1, LO, HI) \ + QALU1(N, T, OP, I0, I1, LO, HI, v0, v1, v2, r0) \ + QALU1(N, T, OP, I0, I1, LO, HI, v0, v1, v2, r1) \ + QALU1(N, T, OP, I0, I1, LO, HI, v0, v1, v2, r2) \ + QALU1(N, T, OP, I0, I1, LO, HI, v1, v2, r0, r1) \ + QALU1(N, T, OP, I0, I1, LO, HI, v1, v2, r0, r2) \ + QALU1(N, T, OP, I0, I1, LO, HI, v2, r0, r1, r2) diff --git a/check/qalu_div.ok b/check/qalu_div.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/qalu_div.ok @@ -0,0 +1 @@ +ok diff --git a/check/qalu_div.tst b/check/qalu_div.tst new file mode 100644 index 000000000..198dfbbfe --- /dev/null +++ b/check/qalu_div.tst @@ -0,0 +1,18 @@ +#include "qalu.inc" + +.code + prolog +#define QDIV(N, I0, I1, LO, HI) QALU(N, , qdiv, I0, I1, LO, HI) +#define UQDIV(N, I0, I1, LO, HI) QALU(N, _u, qdiv, I0, I1, LO, HI) + QDIV(0, 10, 3, 3, 1) + QDIV(1, -33, 9, -3, -6) + QDIV(2, -41, -7, 5, -6) + QDIV(3, 65536, 4096, 16, 0) + UQDIV(4, -1, -2, 1, 1) + UQDIV(5, -2, -5, 1, 3) + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/check/qalu_mul.ok b/check/qalu_mul.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/qalu_mul.ok @@ -0,0 +1 @@ +ok diff --git a/check/qalu_mul.tst b/check/qalu_mul.tst new file mode 100644 index 000000000..378d38373 --- /dev/null +++ b/check/qalu_mul.tst @@ -0,0 +1,25 @@ +#include "qalu.inc" + +.code + prolog +#define QMUL(N, I0, I1, LO, HI) QALU(N, , qmul, I0, I1, LO, HI) +#define UQMUL(N, I0, I1, LO, HI) QALU(N, _u, qmul, I0, I1, LO, HI) + QMUL(0, -2, -1, 2, 0) + QMUL(1, 0, -1, 0, 0) + QMUL(2, -1, 0, 0, 0) + QMUL(3, 1, -1, -1, -1) +#if __WORDSIZE == 32 + QMUL(4, 0x7ffff, 0x7ffff, 0xfff00001, 0x3f) + UQMUL(5, 0xffffff, 0xffffff, 0xfe000001, 0xffff) + QMUL(6, 0x80000000, -2, 0, 1) +#else + QMUL(4, 0x7ffffffff, 0x7ffffffff, 0xfffffff000000001, 0x3f) + UQMUL(5, 0xffffffffff, 0xffffffffff, 0xfffffe0000000001, 0xffff) + QMUL(6, 0x8000000000000000, -2, 0, 1) +#endif + prepare + pushargi ok + ellipsis + finishi @printf + ret + epilog diff --git a/include/lightning.h b/include/lightning.h index 52ff539fa..3694616cb 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -153,12 +153,24 @@ typedef enum { #define jit_mulr(u,v,w) jit_new_node_www(jit_code_mulr,u,v,w) #define jit_muli(u,v,w) jit_new_node_www(jit_code_muli,u,v,w) jit_code_mulr, jit_code_muli, +#define jit_qmulr(l,h,v,w) jit_new_node_qww(jit_code_qmulr,l,h,v,w) +#define jit_qmuli(l,h,v,w) jit_new_node_qww(jit_code_qmuli,l,h,v,w) + jit_code_qmulr, jit_code_qmuli, +#define jit_qmulr_u(l,h,v,w) jit_new_node_qww(jit_code_qmulr_u,l,h,v,w) +#define jit_qmuli_u(l,h,v,w) jit_new_node_qww(jit_code_qmuli_u,l,h,v,w) + jit_code_qmulr_u, jit_code_qmuli_u, #define jit_divr(u,v,w) jit_new_node_www(jit_code_divr,u,v,w) #define jit_divi(u,v,w) jit_new_node_www(jit_code_divi,u,v,w) jit_code_divr, jit_code_divi, #define jit_divr_u(u,v,w) jit_new_node_www(jit_code_divr_u,u,v,w) #define jit_divi_u(u,v,w) jit_new_node_www(jit_code_divi_u,u,v,w) jit_code_divr_u, jit_code_divi_u, +#define jit_qdivr(l,h,v,w) jit_new_node_qww(jit_code_qdivr,l,h,v,w) +#define jit_qdivi(l,h,v,w) jit_new_node_qww(jit_code_qdivi,l,h,v,w) + jit_code_qdivr, jit_code_qdivi, +#define jit_qdivr_u(l,h,v,w) jit_new_node_qww(jit_code_qdivr_u,l,h,v,w) +#define jit_qdivi_u(l,h,v,w) jit_new_node_qww(jit_code_qdivi_u,l,h,v,w) + jit_code_qdivr_u, jit_code_qdivi_u, #define jit_remr(u,v,w) jit_new_node_www(jit_code_remr,u,v,w) #define jit_remi(u,v,w) jit_new_node_www(jit_code_remi,u,v,w) jit_code_remr, jit_code_remi, @@ -839,6 +851,10 @@ extern jit_node_t *_jit_new_node_wd(jit_state_t*, jit_code_t, #define jit_new_node_www(c,u,v,w) _jit_new_node_www(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_word_t); +#define jit_new_node_qww(c,l,h,v,w) _jit_new_node_qww(_jit,c,l,h,v,w) +extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t, + jit_int32_t, jit_int32_t, + jit_word_t, jit_word_t); #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_float32_t); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index b0be3d90c..202fc67f5 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -103,6 +103,7 @@ #define jit_cc_a0_reg 0x00000001 /* arg0 is a register */ #define jit_cc_a0_chg 0x00000002 /* arg0 is modified */ #define jit_cc_a0_jmp 0x00000004 /* arg0 is a jump target */ +#define jit_cc_a0_rlh 0x00000008 /* arg0 is a register pair */ #define jit_cc_a0_int 0x00000010 /* arg0 is immediate word */ #define jit_cc_a0_flt 0x00000020 /* arg0 is immediate float */ #define jit_cc_a0_dbl 0x00000040 /* arg0 is immediate double */ @@ -170,13 +171,13 @@ typedef struct jit_data_info jit_data_info_t; union jit_data { struct { #if __BYTE_ORDER == __LITTLE_ENDIAN - jit_int32_t l; - jit_int32_t h; + jit_int32_t l; + jit_int32_t h; #else - jit_int32_t h; - jit_int32_t l; + jit_int32_t h; + jit_int32_t l; #endif - } pair; + } q; jit_word_t w; jit_float32_t f; jit_float64_t d; diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index cf1be0fae..499616dd8 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -132,7 +132,9 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB_MUL 0x4340 # define THUMB2_MUL 0xfb00f000 # define ARM_UMULL 0x00800090 +# define THUMB2_UMULL 0xfba00000 # define ARM_SMULL 0x00c00090 +# define THUMB2_SMULL 0xfb800000 # define THUMB2_SDIV 0xfb90f0f0 # define THUMB2_UDIV 0xfbb0f0f0 # define ARM_AND 0x00000000 @@ -868,6 +870,16 @@ static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) +# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) +# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) +static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_bool_t); +# define qmuli(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,1) +# define qmuli_u(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,0) +# define iqmuli(r0,r1,r2,i0,cc) _iqmuli(_jit,r0,r1,r2,i0,cc) +static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t,jit_bool_t); # define divrem(d,s,r0,r1,r2) _divrem(_jit,d,s,r0,r1,r2) static void _divrem(jit_state_t*,int,int,jit_int32_t,jit_int32_t,jit_int32_t); # define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) @@ -878,6 +890,16 @@ static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0) static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qdivr(r0,r1,r2,r3) iqdivr(r0,r1,r2,r3,1) +# define qdivr_u(r0,r1,r2,r3) iqdivr(r0,r1,r2,r3,0) +# define iqdivr(r0,r1,r2,r3,cc) _iqdivr(_jit,r0,r1,r2,r3,cc) +static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_bool_t); +# define qdivi(r0,r1,r2,i0) iqdivi(r0,r1,r2,i0,1) +# define qdivi_u(r0,r1,r2,i0) iqdivi(r0,r1,r2,i0,0) +# define iqdivi(r0,r1,r2,i0,cc) _iqdivi(_jit,r0,r1,r2,i0,cc) +static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t,jit_bool_t); # define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) @@ -1962,6 +1984,70 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t reg; + if (jit_thumb_p()) { + if (r2 == r3) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r2); + if (sign) + T2_SMULL(r0, r1, rn(reg), r2); + else + T2_UMULL(r0, r1, rn(reg), r2); + jit_unget_reg(reg); + } + else if (r0 != r2 && r1 != r2) { + if (sign) + T2_SMULL(r0, r1, r2, r3); + else + T2_UMULL(r0, r1, r2, r3); + } + else { + if (sign) + T2_SMULL(r0, r1, r3, r2); + else + T2_UMULL(r0, r1, r3, r2); + } + } + else { + if (r2 == r3) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r2); + if (sign) + SMULL(r0, r1, rn(reg), r2); + else + UMULL(r0, r1, rn(reg), r2); + jit_unget_reg(reg); + } + else if (r0 != r2 && r1 != r2) { + if (sign) + SMULL(r0, r1, r2, r3); + else + UMULL(r0, r1, r2, r3); + } + else { + if (sign) + SMULL(r0, r1, r3, r2); + else + UMULL(r0, r1, r3, r2); + } + } +} + +static void +_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqmulr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + static void _divrem(jit_state_t *_jit, int div, int sign, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) @@ -2031,6 +2117,48 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_word_t d; + jit_word_t w; + jit_get_reg_args(); + movr(_R0_REGNO, r2); + movr(_R1_REGNO, r3); + if (sign) w = (jit_word_t)__aeabi_idivmod; + else w = (jit_word_t)__aeabi_uidivmod; + if (!jit_exchange_p()) { + if (jit_thumb_p()) d = ((w - _jit->pc.w) >> 1) - 2; + else d = ((w - _jit->pc.w) >> 2) - 2; + if (_s24P(d)) { + if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d)); + else BLI(d & 0x00ffffff); + } + else goto fallback; + } + else { + fallback: + movi(_R2_REGNO, w); + if (jit_thumb_p()) T1_BLX(_R2_REGNO); + else BLX(_R2_REGNO); + } + movr(r0, _R0_REGNO); + movr(r1, _R1_REGNO); + jit_unget_reg_args(); +} + +static void +_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqdivr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + static void _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 95cbfac30..d27d79678 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -849,6 +849,11 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break #define case_vvv(name, type) \ case jit_code_##name##r##type: \ if (jit_swf_p()) \ @@ -862,6 +867,11 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break #define case_vvw(name, type) \ case jit_code_##name##i##type: \ if (jit_swf_p()) \ @@ -1036,10 +1046,18 @@ _emit_code(jit_state_t *_jit) case_rrw(subx,); case_rrr(mul,); case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); case_rrr(div,); case_rrw(div,); case_rrr(div, _u); case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); case_rrr(rem,); case_rrw(rem,); case_rrr(rem, _u); diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 311326032..fc5356b54 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -392,6 +392,16 @@ static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) +# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) +# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) +static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_bool_t); +# define qmuli(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,1) +# define qmuli_u(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,0) +# define iqmuli(r0,r1,r2,i0,cc) _iqmuli(_jit,r0,r1,r2,i0,cc) +static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t,jit_bool_t); # define divr(r0,r1,r2) _divr(_jit,r0,r1,r2) static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) @@ -400,6 +410,16 @@ static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0) static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qdivr(r0,r1,r2,r3) iqdivr(r0,r1,r2,r3,1) +# define qdivr_u(r0,r1,r2,r3) iqdivr(r0,r1,r2,r3,0) +# define iqdivr(r0,r1,r2,r3,cc) _iqdivr(_jit,r0,r1,r2,r3,cc) +static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_bool_t); +# define qdivi(r0,r1,r2,i0) iqdivi(r0,r1,r2,i0,1) +# define qdivi_u(r0,r1,r2,i0) iqdivi(r0,r1,r2,i0,0) +# define iqdivi(r0,r1,r2,i0,cc) _iqdivi(_jit,r0,r1,r2,i0,cc) +static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t,jit_bool_t); # define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) @@ -914,6 +934,29 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + if (sign) + MULT(r2, r3); + else + MULTU(r2, r3); + MFLO(r0); + MFHI(r1); +} + +static void +_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqmulr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + static void _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -948,6 +991,29 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + if (sign) + DIV(r2, r3); + else + DIVU(r2, r3); + MFLO(r0); + MFHI(r1); +} + +static void +_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqdivr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + static void _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 57be725e8..4c606c4bc 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -704,6 +704,16 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break #define case_rrf(name, type, size) \ case jit_code_##name##i##type: \ assert(node->flag & jit_flag_data); \ @@ -785,10 +795,18 @@ _emit_code(jit_state_t *_jit) case_rrw(subx,); case_rrr(mul,); case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); case_rrr(div,); case_rrw(div,); case_rrr(div, _u); case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); case_rrr(rem,); case_rrw(rem,); case_rrr(rem, _u); diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 7f8a83552..56bf56aab 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -390,12 +390,32 @@ static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define mulr(r0,r1,r2) MULLW(r0,r1,r2) # define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) +# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) +# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) +static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_bool_t); +# define qmuli(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,1) +# define qmuli_u(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,0) +# define iqmuli(r0,r1,r2,i0,cc) _iqmuli(_jit,r0,r1,r2,i0,cc) +static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t,jit_bool_t); # define divr(r0,r1,r2) DIVW(r0,r1,r2) # define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define divr_u(r0,r1,r2) DIVWU(r0,r1,r2) # define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0) static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qdivr(r0,r1,r2,r3) iqdivr(r0,r1,r2,r3,1) +# define qdivr_u(r0,r1,r2,r3) iqdivr(r0,r1,r2,r3,0) +# define iqdivr(r0,r1,r2,r3,cc) _iqdivr(_jit,r0,r1,r2,r3,cc) +static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_bool_t); +# define qdivi(r0,r1,r2,i0) iqdivi(r0,r1,r2,i0,1) +# define qdivi_u(r0,r1,r2,i0) iqdivi(r0,r1,r2,i0,0) +# define iqdivi(r0,r1,r2,i0,cc) _iqdivi(_jit,r0,r1,r2,i0,cc) +static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t,jit_bool_t); # define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) @@ -871,6 +891,38 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t reg; + if (r0 == r2 || r0 == r3) { + reg = jit_get_reg(jit_class_gpr); + MULLW(rn(reg), r2, r3); + } + else + MULLW(r0, r2, r3); + if (sign) + MULLH(r1, r2, r3); + else + MULLHU(r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(r0, rn(reg)); + reg = jit_unget_reg(reg); + } +} + +static void +_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqmulr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + static void _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -891,6 +943,53 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t sv0, rg0; + jit_int32_t sv1, rg1; + + if (r0 == r2 || r0 == r3) { + sv0 = jit_get_reg(jit_class_gpr); + rg0 = rn(sv0); + } + else + rg0 = r0; + if (r1 == r2 || r1 == r3) { + sv1 = jit_get_reg(jit_class_gpr); + rg1 = rn(sv1); + } + else + rg1 = r1; + + if (sign) + divr(rg0, r2, r3); + else + divr_u(rg0, r2, r3); + mulr(rg1, r2, r0); + subr(rg1, r2, r0); + if (rg0 != r0) { + movr(r0, rg0); + jit_unget_reg(sv0); + } + if (rg1 != r1) { + movr(r1, rg1); + jit_unget_reg(sv1); + } +} + +static void +_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + iqdivr(r0, r1, r2, rn(reg), sign); + jit_unget_reg(reg); +} + static void _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index aaa2035a1..dafc36b84 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -723,10 +723,20 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break #define case_rrw(name, type) \ case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break #define case_rrf(name, type, size) \ case jit_code_##name##i##type: \ assert(node->flag & jit_flag_data); \ @@ -808,10 +818,18 @@ _emit_code(jit_state_t *_jit) case_rrw(subx,); case_rrr(mul,); case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); case_rrr(div,); case_rrw(div,); case_rrr(div, _u); case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); case_rrr(rem,); case_rrw(rem,); case_rrr(rem, _u); diff --git a/lib/jit_print.c b/lib/jit_print.c index fe4b16509..ee7e08d23 100644 --- a/lib/jit_print.c +++ b/lib/jit_print.c @@ -48,8 +48,12 @@ static char *code_name[] = { "subcr", "subci", "subxr", "subxi", "mulr", "muli", + "qmulr", "qmuli", + "qmulr_u", "qmuli_u", "divr", "divi", "divr_u", "divi_u", + "qdivr", "qdivi", + "qdivr_u", "qdivi_u", "remr", "remi", "remr_u", "remi_u", "andr", "andi", @@ -248,7 +252,7 @@ _jit_print(jit_state_t *_jit) continue; } value = jit_classify(node->code) & - (jit_cc_a0_int|jit_cc_a0_jmp|jit_cc_a0_reg| + (jit_cc_a0_int|jit_cc_a0_jmp|jit_cc_a0_reg|jit_cc_a0_rlh| jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl| jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl); if (value & jit_cc_a0_jmp) @@ -303,6 +307,16 @@ _jit_print(jit_state_t *_jit) print_chr(' '); print_reg(node->u.w); print_chr(' '); print_reg(node->v.w); print_chr(' '); print_hex(node->w.w); continue; + q_r_r: + print_str(" ("); print_reg(node->u.q.l); + print_chr(' '); print_reg(node->u.q.h); + print_str(") "); print_reg(node->v.w); + print_chr(' '); print_reg(node->w.w); continue; + q_r_w: + print_str(" ("); print_reg(node->u.q.l); + print_chr(' '); print_reg(node->u.q.h); + print_str(") "); print_reg(node->v.w); + print_chr(' '); print_hex(node->w.w); continue; r_r_f: print_chr(' '); print_reg(node->u.w); print_chr(' '); print_reg(node->v.w); @@ -419,6 +433,12 @@ _jit_print(jit_state_t *_jit) goto r_r_r; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_int: goto r_r_w; + case jit_cc_a0_reg|jit_cc_a0_rlh| + jit_cc_a1_reg|jit_cc_a2_reg: + goto q_r_r; + case jit_cc_a0_reg|jit_cc_a0_rlh| + jit_cc_a1_reg|jit_cc_a2_int: + goto q_r_w; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_flt: goto r_r_f; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_dbl: diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 9ba93c302..708c68053 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -158,6 +158,10 @@ static void _alur(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define alui(code, r0, i0) _alui(_jit, code, r0, i0) static void _alui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define iaddr(r0, r1) alur(X86_ADD, r0, r1) +# define save(r0) _save(_jit, r0) +static void _save(jit_state_t*, jit_int32_t); +# define load(r0) _load(_jit, r0) +static void _load(jit_state_t*, jit_int32_t); # define addr(r0, r1, r2) _addr(_jit, r0, r1, r2) static void _addr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define iaddi(r0, i0) alui(X86_ADD, r0, i0) @@ -197,10 +201,22 @@ static void _imuli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); static void _mulr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define idivr(r0) unr(X86_IDIV, r0) -# define idivr_u(r0) unr(X86_DIV, r0) +# define umulr(r0) unr(X86_IMUL, r0) +# define umulr_u(r0) unr(X86_MUL, r0) +# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1) +# define qmulr_u(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 0) +# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign) +static void _iqmulr(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t,jit_int32_t, jit_bool_t); +# define qmuli(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 1) +# define qmuli_u(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 0) +# define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign) +static void _iqmuli(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t,jit_word_t, jit_bool_t); # define sign_extend_rdx_rax() _sign_extend_rdx_rax(_jit) static void _sign_extend_rdx_rax(jit_state_t*); +# define idivr(r0) unr(X86_IDIV, r0) +# define idivr_u(r0) unr(X86_DIV, r0) # define divremr(r0, r1, r2, i0, i1) _divremr(_jit, r0, r1, r2, i0, i1) static void _divremr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, @@ -212,6 +228,16 @@ _divremi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t,jit_bool_t,jit_bool_t); # define divi(r0, r1, i0) divremi(r0, r1, i0, 1, 1) # define divr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 1) # define divi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 1) +# define qdivr(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 1) +# define qdivr_u(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 0) +# define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign) +static void _iqdivr(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t,jit_int32_t, jit_bool_t); +# define qdivi(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 1) +# define qdivi_u(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 0) +# define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign) +static void _iqdivi(jit_state_t*, jit_int32_t, jit_int32_t, + jit_int32_t,jit_word_t, jit_bool_t); # define remr(r0, r1, r2) divremr(r0, r1, r2, 1, 0) # define remi(r0, r1, i0) divremi(r0, r1, i0, 1, 0) # define remr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 0) @@ -834,6 +860,27 @@ _alui(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_word_t i0) } } +static void +_save(jit_state_t *_jit, jit_int32_t r0) +{ + if (!_jit->function->regoff[r0]) { + _jit->function->regoff[r0] = jit_allocai(sizeof(jit_word_t)); + _jit->again = 1; + } + assert(!jit_regset_tstbit(_jit->regsav, r0)); + jit_regset_setbit(_jit->regsav, r0); + stxi(_jit->function->regoff[r0], _RBP_REGNO, r0); +} + +static void +_load(jit_state_t *_jit, jit_int32_t r0) +{ + assert(_jit->function->regoff[r0]); + assert(jit_regset_tstbit(_jit->regsav, r0)); + jit_regset_clrbit(_jit->regsav, r0); + ldxi(r0, _RBP_REGNO, _jit->function->regoff[r0]); +} + static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -1130,6 +1177,99 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +#define savset(rn) \ + if (r0 != rn) { \ + sav |= 1 << rn; \ + if (r1 != rn && r2 != rn) \ + set |= 1 << rn; \ + } +#define isavset(rn) \ + if (r0 != rn) { \ + sav |= 1 << rn; \ + if (r1 != rn) \ + set |= 1 << rn; \ + } +#define qsavset(rn) \ + if (r0 != rn && r1 != rn) { \ + sav |= 1 << rn; \ + if (r2 != rn && r3 != rn) \ + set |= 1 << rn; \ + } +#define allocr(rn, rv) \ + if (set & (1 << rn)) \ + (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \ + if (sav & (1 << rn)) { \ + if ( jit_regset_tstbit(_jit->regsav, rv) || \ + !jit_regset_tstbit(_jit->reglive, rv)) \ + sav &= ~(1 << rn); \ + else \ + save(rv); \ + } +#define clear(rn, rv) \ + if (set & (1 << rn)) \ + jit_unget_reg(rv); \ + if (sav & (1 << rn)) \ + load(rv); +static void +_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t mul; + jit_int32_t sav; + jit_int32_t set; + + sav = set = 0; + qsavset(_RDX_REGNO); + qsavset(_RAX_REGNO); + allocr(_RDX_REGNO, _RDX); + allocr(_RAX_REGNO, _RAX); + + if (r3 == _RAX_REGNO) + mul = r2; + else { + mul = r3; + movr(_RAX_REGNO, r2); + } + if (sign) + umulr(mul); + else + umulr_u(mul); + + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) + xchgr(_RAX_REGNO, _RDX_REGNO); + else { + if (r0 != _RDX_REGNO) + movr(r0, _RAX_REGNO); + movr(r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) + movr(r0, _RAX_REGNO); + } + + clear(_RDX_REGNO, _RDX); + clear(_RAX_REGNO, _RAX); +} + +static void +_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + + if (i0 == 0) { + ixorr(r0, r0); + ixorr(r1, r1); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + if (sign) + qmulr(r0, r1, r2, rn(reg)); + else + qmulr_u(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); + } +} + static void _sign_extend_rdx_rax(jit_state_t *_jit) { @@ -1144,17 +1284,14 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t div; jit_int32_t reg; jit_int32_t set; + jit_int32_t sav; jit_int32_t use; - set = use = 0; - if (r0 != _RDX_REGNO && r1 != _RDX_REGNO && r2 != _RDX_REGNO) - set |= 1 << _RDX_REGNO; - if (r0 != _RAX_REGNO && r1 != _RAX_REGNO && r2 != _RAX_REGNO) - set |= 1 << _RAX_REGNO; - if (set & (1 <<_RDX_REGNO)) - (void)jit_get_reg(_RDX|jit_class_gpr|jit_class_named); - if (set & (1 << _RAX_REGNO)) - (void)jit_get_reg(_RAX|jit_class_gpr|jit_class_named); + sav = set = use = 0; + savset(_RDX_REGNO); + savset(_RAX_REGNO); + allocr(_RDX_REGNO, _RDX); + allocr(_RAX_REGNO, _RAX); if (r2 == _RAX_REGNO) { if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { @@ -1215,18 +1352,13 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, if (use) jit_unget_reg(reg); - if (r0 != _RAX_REGNO) { - if (divide) - movr(r0, _RAX_REGNO); - } - if (r0 != _RDX_REGNO) { - if (!divide) - movr(r0, _RDX_REGNO); - } - if (set & (1 <<_RDX_REGNO)) - jit_unget_reg(_RDX); - if (set & (1 << _RAX_REGNO)) - jit_unget_reg(_RAX); + if (divide) + movr(r0, _RAX_REGNO); + else + movr(r0, _RDX_REGNO); + + clear(_RDX_REGNO, _RDX); + clear(_RAX_REGNO, _RAX); } static void @@ -1235,6 +1367,7 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0, { jit_int32_t reg; jit_int32_t div; + jit_int32_t sav; jit_int32_t set; jit_int32_t use; @@ -1283,15 +1416,11 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0, return; } - set = use = 0; - if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) - set |= 1 << _RDX_REGNO; - if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) - set |= 1 << _RAX_REGNO; - if (set & (1 <<_RDX_REGNO)) - (void)jit_get_reg(_RDX|jit_class_gpr|jit_class_named); - if (set & (1 << _RAX_REGNO)) - (void)jit_get_reg(_RAX|jit_class_gpr|jit_class_named); + sav = set = use = 0; + isavset(_RDX_REGNO); + isavset(_RAX_REGNO); + allocr(_RDX_REGNO, _RDX); + allocr(_RAX_REGNO, _RAX); if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) { if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) @@ -1318,20 +1447,119 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0, if (use) jit_unget_reg(reg); - if (r0 != _RAX_REGNO) { - if (divide) + if (divide) + movr(r0, _RAX_REGNO); + else + movr(r0, _RDX_REGNO); + + clear(_RDX_REGNO, _RDX); + clear(_RAX_REGNO, _RAX); +} + +static void +_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) +{ + jit_int32_t div; + jit_int32_t reg; + jit_int32_t sav; + jit_int32_t set; + jit_int32_t use; + + sav = set = use = 0; + qsavset(_RDX_REGNO); + qsavset(_RAX_REGNO); + allocr(_RDX_REGNO, _RDX); + allocr(_RAX_REGNO, _RAX); + if (r3 == _RAX_REGNO) { + if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { + if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) + reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | + jit_class_gpr|jit_class_named); + use = 1; + div = rn(reg); + movr(div, _RAX_REGNO); + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + } + else { + if (r0 == r2) + xchgr(r0, _RAX_REGNO); + else { + if (r0 != _RAX_REGNO) + movr(r0, _RAX_REGNO); + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + } + div = r0; + } + } + else if (r3 == _RDX_REGNO) { + if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { + if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) + reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | + jit_class_gpr|jit_class_named); + use = 1; + div = rn(reg); + movr(div, _RDX_REGNO); + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + } + else { + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + movr(r0, _RDX_REGNO); + div = r0; + } + } + else { + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + div = r3; + } + if (sign) { + sign_extend_rdx_rax(); + idivr(div); + } + else { + ixorr(_RDX_REGNO, _RDX_REGNO); + idivr_u(div); + } + if (use) + jit_unget_reg(reg); + + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) + xchgr(_RAX_REGNO, _RDX_REGNO); + else { + if (r0 != _RDX_REGNO) + movr(r0, _RAX_REGNO); + movr(r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) movr(r0, _RAX_REGNO); } - if (r0 != _RDX_REGNO) { - if (!divide) - movr(r0, _RDX_REGNO); - } - if (set & (1 <<_RDX_REGNO)) - jit_unget_reg(_RDX); - if (set & (1 << _RAX_REGNO)) - jit_unget_reg(_RAX); + + clear(_RDX_REGNO, _RDX); + clear(_RAX_REGNO, _RAX); } +static void +_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + jit_int32_t reg; + + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + if (sign) + qdivr(r0, r1, r2, rn(reg)); + else + qdivr_u(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); +} +#undef clear +#undef allocr +#undef savset + static void _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 4ff1dabbb..bac645964 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -941,6 +941,11 @@ _emit_code(jit_state_t *_jit) name##r##type(rn(node->u.w), \ rn(node->v.w), rn(node->w.w)); \ break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break #define case_frr(name, type) \ case jit_code_##name##r##type: \ if (jit_x87_reg_p(node->u.w)) \ @@ -963,6 +968,11 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break #define case_frw(name, type) \ case jit_code_##name##i##type: \ if (jit_x87_reg_p(node->u.w)) \ @@ -1139,10 +1149,18 @@ _emit_code(jit_state_t *_jit) case_rrw(subc,); case_rrr(mul,); case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); case_rrr(div,); case_rrw(div,); case_rrr(div, _u); case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); case_rrr(rem,); case_rrw(rem,); case_rrr(rem, _u); diff --git a/lib/lightning.c b/lib/lightning.c index e9de59d0a..c2ee09078 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -634,6 +634,21 @@ _jit_new_node_www(jit_state_t *_jit, jit_code_t code, return (link_node(node)); } +jit_node_t * +_jit_new_node_qww(jit_state_t *_jit, jit_code_t code, + jit_int32_t l, jit_int32_t h, + jit_word_t v, jit_word_t w) +{ + jit_node_t *node = new_node(code); + assert(!_jit->emit); + assert(l != h); + node->u.q.l = l; + node->u.q.h = h; + node->v.w = v; + node->w.w = w; + return (link_node(node)); +} + jit_node_t * _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_float32_t w) @@ -857,6 +872,11 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_ldxi_l: case jit_code_ldxi_f: case jit_code_ldxi_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_int; break; + case jit_code_qmuli: case jit_code_qmuli_u: + case jit_code_qdivi: case jit_code_qdivi_u: + mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a2_int; + break; case jit_code_addi_f: case jit_code_subi_f: case jit_code_muli_f: case jit_code_divi_f: case jit_code_lti_f: case jit_code_lei_f: case jit_code_eqi_f: case jit_code_gei_f: case jit_code_gti_f: @@ -900,6 +920,11 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_movr_ww_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|jit_cc_a2_reg; break; + case jit_code_qmulr: case jit_code_qmulr_u: + case jit_code_qdivr: case jit_code_qdivr_u: + mask = jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_chg| + jit_cc_a1_reg|jit_cc_a2_reg; + break; case jit_code_sti_c: case jit_code_sti_s: case jit_code_sti_i: case jit_code_sti_l: case jit_code_sti_f: case jit_code_sti_d: mask = jit_cc_a0_int|jit_cc_a1_reg; @@ -1185,13 +1210,35 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) break; default: value = jit_classify(node->code); - if ((value & jit_cc_a0_reg) && !(node->u.w & jit_regno_patch)) { - if (value & jit_cc_a0_chg) { - jit_regset_clrbit(_jit->reglive, node->u.w); - jit_regset_setbit(_jit->regmask, node->u.w); + if (value & jit_cc_a0_reg) { + if (value & jit_cc_a0_rlh) { + if (!(node->u.q.l & jit_regno_patch)) { + if (value & jit_cc_a0_chg) { + jit_regset_clrbit(_jit->reglive, node->u.q.l); + jit_regset_setbit(_jit->regmask, node->u.q.l); + } + else + jit_regset_setbit(_jit->reglive, node->u.q.l); + } + if (!(node->u.q.h & jit_regno_patch)) { + if (value & jit_cc_a0_chg) { + jit_regset_clrbit(_jit->reglive, node->u.q.h); + jit_regset_setbit(_jit->regmask, node->u.q.h); + } + else + jit_regset_setbit(_jit->reglive, node->u.q.h); + } + } + else { + if (!(node->u.w & jit_regno_patch)) { + if (value & jit_cc_a0_chg) { + jit_regset_clrbit(_jit->reglive, node->u.w); + jit_regset_setbit(_jit->regmask, node->u.w); + } + else + jit_regset_setbit(_jit->reglive, node->u.w); + } } - else - jit_regset_setbit(_jit->reglive, node->u.w); } if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch)) { if (value & jit_cc_a1_chg) { @@ -1219,8 +1266,14 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) void _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) { - if (value & jit_cc_a0_reg) - jit_regset_setbit(_jit->regarg, jit_regno(node->u.w)); + if (value & jit_cc_a0_reg) { + if (value & jit_cc_a0_rlh) { + jit_regset_setbit(_jit->regarg, jit_regno(node->u.q.l)); + jit_regset_setbit(_jit->regarg, jit_regno(node->u.q.h)); + } + else + jit_regset_setbit(_jit->regarg, jit_regno(node->u.w)); + } if (value & jit_cc_a1_reg) jit_regset_setbit(_jit->regarg, jit_regno(node->v.w)); if (value & jit_cc_a2_reg) @@ -1230,8 +1283,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) void _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) { - if (value & jit_cc_a0_reg) - jit_regset_clrbit(_jit->regarg, jit_regno(node->u.w)); + if (value & jit_cc_a0_reg) { + if (value & jit_cc_a0_rlh) { + jit_regset_clrbit(_jit->regarg, jit_regno(node->u.q.l)); + jit_regset_clrbit(_jit->regarg, jit_regno(node->u.q.h)); + } + else + jit_regset_clrbit(_jit->regarg, jit_regno(node->u.w)); + } if (value & jit_cc_a1_reg) jit_regset_clrbit(_jit->regarg, jit_regno(node->v.w)); if (value & jit_cc_a2_reg) @@ -1325,14 +1384,33 @@ _jit_setup(jit_state_t *_jit, jit_block_t *block) return; default: value = jit_classify(node->code); - if ((value & jit_cc_a0_reg) && - !(node->u.w & jit_regno_patch) && - jit_regset_tstbit(regmask, node->u.w)) { + if (value & jit_cc_a0_reg) { live = !(value & jit_cc_a0_chg); - if (live || !jump) - jit_regset_clrbit(regmask, node->u.w); - if (live) - jit_regset_setbit(reglive, node->u.w); + if (value & jit_cc_a0_rlh) { + if (!(node->u.q.l & jit_regno_patch) && + jit_regset_tstbit(regmask, node->u.q.l)) { + if (live || !jump) + jit_regset_clrbit(regmask, node->u.q.l); + if (live) + jit_regset_setbit(reglive, node->u.q.l); + } + if (!(node->u.q.h & jit_regno_patch) && + jit_regset_tstbit(regmask, node->u.q.h)) { + if (live || !jump) + jit_regset_clrbit(regmask, node->u.q.h); + if (live) + jit_regset_setbit(reglive, node->u.q.h); + } + } + else { + if (!(node->u.w & jit_regno_patch) && + jit_regset_tstbit(regmask, node->u.w)) { + if (live || !jump) + jit_regset_clrbit(regmask, node->u.w); + if (live) + jit_regset_setbit(reglive, node->u.w); + } + } } if ((value & jit_cc_a1_reg) && !(node->v.w & jit_regno_patch) && @@ -1453,11 +1531,29 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, } } if (value & jit_cc_a0_reg) { - if (!(node->u.w & jit_regno_patch)) { - if (jit_regset_tstbit(*mask, node->u.w)) { - jit_regset_clrbit(*mask, node->u.w); - if (!(value & jit_cc_a0_chg)) - jit_regset_setbit(*live, node->u.w); + if (value & jit_cc_a0_rlh) { + if (!(node->u.q.l & jit_regno_patch)) { + if (jit_regset_tstbit(*mask, node->u.q.l)) { + jit_regset_clrbit(*mask, node->u.q.l); + if (!(value & jit_cc_a0_chg)) + jit_regset_setbit(*live, node->u.q.l); + } + } + if (!(node->u.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(*mask, node->u.q.h)) { + jit_regset_clrbit(*mask, node->u.q.h); + if (!(value & jit_cc_a0_chg)) + jit_regset_setbit(*live, node->u.q.h); + } + } + } + else { + if (!(node->u.w & jit_regno_patch)) { + if (jit_regset_tstbit(*mask, node->u.w)) { + jit_regset_clrbit(*mask, node->u.w); + if (!(value & jit_cc_a0_chg)) + jit_regset_setbit(*live, node->u.w); + } } } } @@ -1887,14 +1983,25 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) break; default: spec = jit_classify(iter->code); - if ((spec & jit_cc_a0_jmp) || - (((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) == - (jit_cc_a0_reg|jit_cc_a0_chg)) && - regno == jit_regno(iter->u.w)) || - (((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) == - (jit_cc_a1_reg|jit_cc_a1_chg)) && - regno == jit_regno(iter->v.w))) + if (spec & jit_cc_a0_jmp) return; + if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) == + (jit_cc_a0_reg|jit_cc_a0_chg)) { + if (spec & jit_cc_a0_rlh) { + if (regno == jit_regno(iter->u.q.l) || + regno == jit_regno(iter->u.q.h)) + return; + } + else { + if (regno == jit_regno(iter->u.w)) + return; + } + } + if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) == + (jit_cc_a1_reg|jit_cc_a1_chg)) { + if (regno == jit_regno(iter->v.w)) + return; + } break; } } @@ -1912,8 +2019,8 @@ _simplify_movr(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node, right = jit_regno(node->v.w); value = _jit->values + regno; if ((value->kind == jit_kind_register && - jit_regno(value->base.pair.l) == right && - value->base.pair.h == _jit->gen[right]) || + jit_regno(value->base.q.l) == right && + value->base.q.h == _jit->gen[right]) || (value->kind == kind && _jit->values[right].kind == kind && memcmp(&value->base.w, &_jit->values[right].base.w, size) == 0)) { del_node(prev, node); @@ -1923,8 +2030,8 @@ _simplify_movr(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node, memcpy(value, _jit->values + right, sizeof(jit_value_t)); else { value->kind = jit_kind_register; - value->base.pair.l = right; - value->base.pair.h = _jit->gen[regno]; + value->base.q.l = right; + value->base.q.h = _jit->gen[regno]; } ++_jit->gen[regno]; @@ -1996,15 +2103,15 @@ _simplify_ldxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) right = jit_regno(node->v.w); value = _jit->values + regno; if (value->kind == jit_kind_code && value->code == node->code && - value->base.pair.l == right && value->base.pair.h == _jit->gen[right] && + value->base.q.l == right && value->base.q.h == _jit->gen[right] && node->w.w == value->disp.w) { del_node(prev, node); return (1); } value->kind = jit_kind_code; value->code = node->code; - value->base.pair.l = right; - value->base.pair.h = _jit->gen[right]; + value->base.q.l = right; + value->base.q.h = _jit->gen[right]; value->disp.w = node->w.w; ++_jit->gen[regno]; @@ -2025,7 +2132,7 @@ _simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) /* check for redundant store after load */ if (value->kind == jit_kind_code && value->code == node->code && - value->base.pair.l == right && value->base.pair.h == _jit->gen[right] && + value->base.q.l == right && value->base.q.h == _jit->gen[right] && node->w.w == value->disp.w) { del_node(prev, node); return (1); @@ -2055,8 +2162,8 @@ _simplify_stxi(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) } value->kind = jit_kind_code; value->code = node->code; - value->base.pair.l = right; - value->base.pair.h = _jit->gen[right]; + value->base.q.l = right; + value->base.q.h = _jit->gen[right]; value->disp.w = node->u.w; } @@ -2188,9 +2295,19 @@ _simplify(jit_state_t *_jit) * a conditional branch */ goto reset; if (info & jit_cc_a0_chg) { - regno = jit_regno(node->u.w); - _jit->values[regno].kind = 0; - ++_jit->gen[regno]; + if (info & jit_cc_a0_rlh) { + regno = jit_regno(node->u.q.l); + _jit->values[regno].kind = 0; + ++_jit->gen[regno]; + regno = jit_regno(node->u.q.h); + _jit->values[regno].kind = 0; + ++_jit->gen[regno]; + } + else { + regno = jit_regno(node->u.w); + _jit->values[regno].kind = 0; + ++_jit->gen[regno]; + } } if (info & jit_cc_a1_chg) { regno = jit_regno(node->v.w); @@ -2222,8 +2339,12 @@ _register_change_p(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, /* lack of extra information */ if (value & jit_cc_a0_jmp) return (jit_reg_change); - else if ((value & jit_cc_a0_reg) && node->u.w == regno && - (value & jit_cc_a0_chg)) + else if ((value & (jit_cc_a0_reg|jit_cc_a0_chg)) == + (jit_cc_a0_reg|jit_cc_a0_chg) && + (((value & jit_cc_a0_rlh) && + (node->u.q.l == regno || node->u.q.h == regno)) || + (!(value & jit_cc_a0_rlh) && + node->u.w == regno))) return (jit_reg_change); else if ((value & jit_cc_a1_reg) && node->v.w == regno && (value & jit_cc_a1_chg)) @@ -2380,8 +2501,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, for (; node != link; node = node->next) { value = jit_classify(node->code); - if ((value & jit_cc_a0_reg) && node->u.w == regno) - node->u.w = patch; + if (value & jit_cc_a0_reg) { + if (value & jit_cc_a0_rlh) { + if (node->u.q.l == regno) + node->u.q.l = patch; + if (node->u.q.h == regno) + node->u.q.h = patch; + } + else { + if (node->u.w == regno) + node->u.w = patch; + } + } if ((value & jit_cc_a1_reg) && node->v.w == regno) node->v.w = patch; if ((value & jit_cc_a2_reg) && node->w.w == regno)