diff --git a/ChangeLog b/ChangeLog index 0a4b57367..2415689e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +2012-12-11 Paulo Andrade + + * lib/jit_ppc-cpu.c: Make movr a function that checks arguments + so that other code can safely assume it is a noop if src and dst + are the same register. + Implement rem{r,i}{,_u} as a div{,u}/mul/sub. + Correct ANDIS, ORIS and XORIS calls to cast the argument to + unsigned before the shift to avoid an assertion if the argument + had the topmost bit set. + Implement lshi, rshi and rshi_u as functions to test for a + zero argument, that would otherwise trigger an assertion when + computing the shift value. + Do a simple implementation of bm{s,c}{r,i} with a temporary, + "andr" of arguments and jump based on comparison with zero. + Correct typo in ldxi_c. + + * lib/jit_ppc-fpu.c: Correct wrong arguments to FDIV* and STF*. + + * lib/jit_ppc.c: Correct wrong check for 6 instead of 8 integer + arguments in registers. If calling a varargs function and + passing a float or double argument, also either store the + value in the stack or in integer registers, as varargs functions + do not fetch it from float registers. + Add "case" for new functions and incorrectly missing ones. + Call libgcc's __clear_cache, that should know what to do + if the hardware needs flushing cache before execution. + + * lib/lightning.c: Do a simple/trivial logic in jit_regset_scan1, + that should make it easier for the compiler to optimize it, and + that also corrects the previously wrong code for big endian, and + that was causing problems in ppc due to not saving all callee save + registers as it was not "finding" them in the regset due to the + little endian assumption bug. + 2012-12-11 Paulo Andrade * configure.ac: Only default to using the builtin disassembler diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 729d27528..05bebf2bc 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -349,7 +349,8 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int); # define XOR_(d,a,b) FXO_(31,a,d,b,0,316) # define XORI(s,a,u) FDu(26,s,a,u) # define XORIS(s,a,u) FDu(27,s,a,u) -# define movr(r0,r1) MR(r0,r1) +# define movr(r0,r1) _movr(_jit,r0,r1) +static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) @@ -387,7 +388,14 @@ static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define divr_u(r0,r1,r2) DIVWU(r0,r1,r2) # define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0) static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); - +# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) +static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) +static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2) +static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0) +static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define andr(r0,r1,r2) AND(r0,r1,r2) # define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -398,12 +406,14 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define lshr(r0,r1,r2) SLW(r0,r1,r2) -# define lshi(r0,r1,i0) SLWI(r0,r1,i0) +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshr(r0,r1,r2) SRAW(r0,r1,r2) -# define rshi(r0,r1,i0) SRAWI(r0,r1,i0) +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define rshr_u(r0,r1,r2) SRW(r0,r1,r2) -# define rshi_u(r0,r1,i0) SRWI(r0,r1,i0) - +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ltr(r0,r1,r2) _ltr(_jit,r0,r1,r2) static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define lti(r0,r1,i0) _lti(_jit,r0,r1,i0) @@ -485,7 +495,14 @@ static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define bnei(i0,r0,i1) _bnei(_jit,i0,r0,i1) static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); - +#define bmsr(i0,r0,r1) _bmsr(_jit,i0,r0,r1) +static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +#define bmsi(i0,r0,i1) _bmsi(_jit,i0,r0,i1) +static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +#define bmcr(i0,r0,r1) _bmcr(_jit,i0,r0,r1) +static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +#define bmci(i0,r0,i1) _bmci(_jit,i0,r0,i1) +static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); #define boaddr(i0,r0,r1) _boaddr(_jit,i0,r0,r1) static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define boaddi(i0,r0,i1) _boaddi(_jit,i0,r0,i1) @@ -727,6 +744,13 @@ _FM(jit_state_t *_jit, int o, int s, int a, int h, int b, int e, int r) ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(b<<6)|(e<<1)|r); } +static void +_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + MR(r0,r1); +} + static void _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -859,6 +883,62 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +static void +_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + divr(rn(reg), r1, r2); + mulr(rn(reg), r2, rn(reg)); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + else { + divr(r0, r1, r2); + mulr(r0, r2, r0); + subr(r0, r1, r0); + } +} + +static void +_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + remr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + divr_u(rn(reg), r1, r2); + mulr(rn(reg), r2, rn(reg)); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + else { + divr_u(r0, r1, r2); + mulr(r0, r2, r0); + subr(r0, r1, r0); + } +} + +static void +_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + remr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -866,7 +946,7 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) if (can_zero_extend_short_p(i0)) ANDI_(r0, r1, i0); else if (!(i0 & 0x0000ffff)) - ANDIS_(r0, r1, i0 >> 16); + ANDIS_(r0, r1, (jit_uword_t)i0 >> 16); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -882,7 +962,7 @@ _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) if (can_zero_extend_short_p(i0)) ORI(r0, r1, i0); else if (!(i0 & 0x0000ffff)) - ORIS(r0, r1, i0 >> 16); + ORIS(r0, r1, (jit_uword_t)i0 >> 16); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -898,7 +978,7 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) if (can_zero_extend_short_p(i0)) XORI(r0, r1, i0); else if (!(i0 & 0x0000ffff)) - XORIS(r0, r1, i0 >> 16); + XORIS(r0, r1, (jit_uword_t)i0 >> 16); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -907,6 +987,33 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else + SLWI(r0,r1,i0); +} + +static void +_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else + SRAWI(r0,r1,i0); +} + +static void +_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else + SRWI(r0,r1,i0); +} + static void _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -1465,6 +1572,54 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) return (w); } +static jit_word_t +_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + andr(rn(reg), r0, r1); + w = bnei(i0, rn(reg), 0); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + andi(rn(reg), r0, i1); + w = bnei(i0, rn(reg), 0); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + andr(rn(reg), r0, r1); + w = beqi(i0, rn(reg), 0); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + andi(rn(reg), r0, i1); + w = beqi(i0, rn(reg), 0); + jit_unget_reg(reg); + return (w); +} + static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { @@ -1683,7 +1838,7 @@ _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { - ldi_c(r0, i0); + ldi_uc(r0, i0); extr_c(r0, r0); } diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c index 8217f39e2..d22d5870b 100644 --- a/lib/jit_ppc-fpu.c +++ b/lib/jit_ppc-fpu.c @@ -36,10 +36,10 @@ static void _FXFL(jit_state_t*,int,int,int,int,int); # define FCTIW_(d,b) FX_(63,d,0,b,14) # define FCTIWZ(d,b) FX(63,d,0,b,15) # define FCTIWZ_(d,b) FX_(63,d,0,b,15) -# define FDIV(d,a,b) FA(63,d,0,b,0,18) -# define FDIV_(d,a,b) FA_(63,d,0,b,0,18) -# define FDIVS(d,a,b) FA(59,d,0,b,0,18) -# define FDIVS_(d,a,b) FA_(59,d,0,b,0,18) +# define FDIV(d,a,b) FA(63,d,a,b,0,18) +# define FDIV_(d,a,b) FA_(63,d,a,b,0,18) +# define FDIVS(d,a,b) FA(59,d,a,b,0,18) +# define FDIVS_(d,a,b) FA_(59,d,a,b,0,18) # define FMADD(d,a,b,c) FA(63,d,a,b,c,29) # define FMADD_(d,a,b,c) FA(63,d,a,b,c,29) # define FMADDS(d,a,b,c) FA(59,d,a,b,c,29) @@ -103,13 +103,13 @@ static void _FXFL(jit_state_t*,int,int,int,int,int); # define MTFSFI_(d,i) FX_(63,d<<2,0,i<<1,134) # define STFD(s,a,d) FDs(54,s,a,d) # define STFDU(s,a,d) FDs(55,s,a,d) -# define STFDUX(s,a,b) FX(31,a,a,b,759) -# define STFDX(s,a,b) FX(31,a,a,b,727) -# define STFIWX(s,a,b) FX(31,a,a,b,983) +# define STFDUX(s,a,b) FX(31,s,a,b,759) +# define STFDX(s,a,b) FX(31,s,a,b,727) +# define STFIWX(s,a,b) FX(31,s,a,b,983) # define STFS(s,a,d) FDs(52,s,a,d) # define STFSU(s,a,d) FDs(53,s,a,d) -# define STFSUX(s,a,b) FX(31,a,a,b,695) -# define STFSX(s,a,b) FX(31,a,a,b,663) +# define STFSUX(s,a,b) FX(31,s,a,b,695) +# define STFSX(s,a,b) FX(31,s,a,b,663) # define movr_f(r0,r1) movr_d(r0,r1) # define movr_d(r0,r1) _movr_d(_jit,r0,r1) diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 35b0f91e8..19eeca623 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -24,6 +24,9 @@ #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); +/* libgcc */ +extern void __clear_cache(void *, void *); + #define PROTO 1 # include "jit_ppc-cpu.c" # include "jit_ppc-fpu.c" @@ -380,7 +383,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) { jit_int32_t regno; assert(_jit->function); - if (_jit->function->call.argi < 6) { + if (_jit->function->call.argi < 8) { jit_movi(JIT_RA0 - _jit->function->call.argi, u); ++_jit->function->call.argi; } @@ -412,6 +415,15 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) if (_jit->function->call.argf < 8) { jit_movr_d(JIT_FA0 - _jit->function->call.argf, u); ++_jit->function->call.argf; + if (!(_jit->function->call.kind & jit_call_varargs)) + return; + } + if (_jit->function->call.argi < 6) { + jit_stxi_d(-8, JIT_FP, u); + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -8); + _jit->function->call.argi++; + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -4); + _jit->function->call.argi++; } else { jit_stxi_d(_jit->function->call.size, JIT_SP, u); @@ -428,14 +440,23 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) if (_jit->function->call.argf < 8) { jit_movi_d(JIT_FA0 - _jit->function->call.argf, u); ++_jit->function->call.argf; + if (!(_jit->function->call.kind & jit_call_varargs)) + return; + } + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + if (_jit->function->call.argi < 6) { + jit_stxi_d(-8, JIT_FP, regno); + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -8); + _jit->function->call.argi++; + jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -4); + _jit->function->call.argi++; } else { - regno = jit_get_reg(jit_class_fpr); - jit_movi_d(regno, u); jit_stxi_d(_jit->function->call.size, JIT_SP, regno); _jit->function->call.size += (sizeof(jit_float64_t) + 8) & -8; - jit_unget_reg(regno); } + jit_unget_reg(regno); } jit_bool_t @@ -692,6 +713,10 @@ _jit_emit(jit_state_t *_jit) case_rrw(div,); case_rrr(div, _u); case_rrw(div, _u); + case_rrr(rem,); + case_rrw(rem,); + case_rrr(rem, _u); + case_rrw(rem, _u); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -699,6 +724,7 @@ _jit_emit(jit_state_t *_jit) case_rrr(xor,); case_rrw(xor,); case_rrr(lsh,); + case_rrw(lsh,); case_rrr(rsh,); case_rrw(rsh,); case_rrr(rsh, _u); @@ -771,6 +797,10 @@ _jit_emit(jit_state_t *_jit) case_brw(bgt, _u); case_brr(bne,); case_brw(bne,); + case_brr(bms,); + case_brw(bms,); + case_brr(bmc,); + case_brw(bmc,); case_brr(boadd,); case_brw(boadd,); case_brr(boadd, _u); @@ -1064,6 +1094,8 @@ _jit_emit(jit_state_t *_jit) patch_at(_jit->patches.ptr[offset].inst, word); } + __clear_cache(_jit->code.ptr, _jit->pc.uc); + return (_jit->code.ptr); } diff --git a/lib/lightning.c b/lib/lightning.c index 7bef394e1..a416872ff 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -262,31 +262,10 @@ _jit_unget_reg(jit_state_t *_jit, jit_int32_t regno) unsigned long jit_regset_scan1(jit_regset_t set, jit_int32_t offset) { - jit_int32_t index; - jit_int32_t length; - union { - jit_uint64_t ul; - jit_uint8_t uc[8]; - } data; - assert(offset >= 0 && offset <= 63); - data.ul = set; - if (data.uc[index = offset >> 3]) { - length = (index + 1) << 3; - for (; offset < length; offset++) { - if (set & (1LL << offset)) - return (offset); - } - } - for (index++; index < 8; index++) { - if (data.uc[index]) { - offset = index << 3; - length = (index + 1) << 3; - for (; offset < length; offset++) { - if (set & (1LL << offset)) - return (offset); - } - } + for (; offset < 64; offset++) { + if (set & (1LL << offset)) + return (offset); } return (ULONG_MAX); }