1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 04:10:18 +02:00

Make all current test cases pass in Darwin PowerPC.

* lib/jit_ppc-cpu.c: Make movr a function that checks arguments
	so that other code can safely assume it is a noop if src and dst
	are the same register.
	  Implement rem{r,i}{,_u} as a div{,u}/mul/sub.
	  Correct ANDIS, ORIS and XORIS calls to cast the argument to
	unsigned before the shift to avoid an assertion if the argument
	had the topmost bit set.
	  Implement lshi, rshi and rshi_u as functions to test for a
	zero argument, that would otherwise trigger an assertion when
	computing the shift value.
	  Do a simple implementation of bm{s,c}{r,i} with a temporary,
	"andr" of arguments and jump based on comparison with zero.
	  Correct typo in ldxi_c.

	* lib/jit_ppc-fpu.c: Correct wrong arguments to FDIV* and STF*.

	* lib/jit_ppc.c: Correct wrong check for 6 instead of 8 integer
	arguments in registers. If calling a varargs function and
	passing a float or double argument, also either store the
	value in the stack or in integer registers, as varargs functions
	do not fetch it from float registers.
	  Add "case" for new functions and incorrectly missing ones.
	  Call libgcc's __clear_cache, that should know what to do
	if the hardware needs flushing cache before execution.

	* lib/lightning.c: Do a simple/trivial logic in jit_regset_scan1,
	that should make it easier for the compiler to optimize it, and
	that also corrects the previously wrong code for big endian, and
	that was causing problems in ppc due to not saving all callee save
	registers as it was not "finding" them in the regset due to the
	little endian assumption bug.
This commit is contained in:
pcpa 2012-12-11 13:14:09 -02:00
parent 7e3d863767
commit a04df966c0
5 changed files with 248 additions and 48 deletions

View file

@ -1,3 +1,37 @@
2012-12-11 Paulo Andrade <pcpa@gnu.org>
* lib/jit_ppc-cpu.c: Make movr a function that checks arguments
so that other code can safely assume it is a noop if src and dst
are the same register.
Implement rem{r,i}{,_u} as a div{,u}/mul/sub.
Correct ANDIS, ORIS and XORIS calls to cast the argument to
unsigned before the shift to avoid an assertion if the argument
had the topmost bit set.
Implement lshi, rshi and rshi_u as functions to test for a
zero argument, that would otherwise trigger an assertion when
computing the shift value.
Do a simple implementation of bm{s,c}{r,i} with a temporary,
"andr" of arguments and jump based on comparison with zero.
Correct typo in ldxi_c.
* lib/jit_ppc-fpu.c: Correct wrong arguments to FDIV* and STF*.
* lib/jit_ppc.c: Correct wrong check for 6 instead of 8 integer
arguments in registers. If calling a varargs function and
passing a float or double argument, also either store the
value in the stack or in integer registers, as varargs functions
do not fetch it from float registers.
Add "case" for new functions and incorrectly missing ones.
Call libgcc's __clear_cache, that should know what to do
if the hardware needs flushing cache before execution.
* lib/lightning.c: Do a simple/trivial logic in jit_regset_scan1,
that should make it easier for the compiler to optimize it, and
that also corrects the previously wrong code for big endian, and
that was causing problems in ppc due to not saving all callee save
registers as it was not "finding" them in the regset due to the
little endian assumption bug.
2012-12-11 Paulo Andrade <pcpa@gnu.org>
* configure.ac: Only default to using the builtin disassembler

View file

@ -349,7 +349,8 @@ static void _FM(jit_state_t*,int,int,int,int,int,int,int);
# define XOR_(d,a,b) FXO_(31,a,d,b,0,316)
# define XORI(s,a,u) FDu(26,s,a,u)
# define XORIS(s,a,u) FDu(27,s,a,u)
# define movr(r0,r1) MR(r0,r1)
# define movr(r0,r1) _movr(_jit,r0,r1)
static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi(r0,i0) _movi(_jit,r0,i0)
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
@ -387,7 +388,14 @@ static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define divr_u(r0,r1,r2) DIVWU(r0,r1,r2)
# define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0)
static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2)
static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0)
static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2)
static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0)
static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define andr(r0,r1,r2) AND(r0,r1,r2)
# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0)
static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@ -398,12 +406,14 @@ static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0)
static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define lshr(r0,r1,r2) SLW(r0,r1,r2)
# define lshi(r0,r1,i0) SLWI(r0,r1,i0)
# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define rshr(r0,r1,r2) SRAW(r0,r1,r2)
# define rshi(r0,r1,i0) SRAWI(r0,r1,i0)
# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0)
static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define rshr_u(r0,r1,r2) SRW(r0,r1,r2)
# define rshi_u(r0,r1,i0) SRWI(r0,r1,i0)
# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define ltr(r0,r1,r2) _ltr(_jit,r0,r1,r2)
static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define lti(r0,r1,i0) _lti(_jit,r0,r1,i0)
@ -485,7 +495,14 @@ static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define bnei(i0,r0,i1) _bnei(_jit,i0,r0,i1)
static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
#define bmsr(i0,r0,r1) _bmsr(_jit,i0,r0,r1)
static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define bmsi(i0,r0,i1) _bmsi(_jit,i0,r0,i1)
static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
#define bmcr(i0,r0,r1) _bmcr(_jit,i0,r0,r1)
static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define bmci(i0,r0,i1) _bmci(_jit,i0,r0,i1)
static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
#define boaddr(i0,r0,r1) _boaddr(_jit,i0,r0,r1)
static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define boaddi(i0,r0,i1) _boaddi(_jit,i0,r0,i1)
@ -727,6 +744,13 @@ _FM(jit_state_t *_jit, int o, int s, int a, int h, int b, int e, int r)
ii((o<<26)|(s<<21)|(a<<16)|(h<<11)|(b<<6)|(e<<1)|r);
}
static void
_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
MR(r0,r1);
}
static void
_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
@ -859,6 +883,62 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
jit_unget_reg(reg);
}
static void
_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
if (r0 == r1 || r0 == r2) {
reg = jit_get_reg(jit_class_gpr);
divr(rn(reg), r1, r2);
mulr(rn(reg), r2, rn(reg));
subr(r0, r1, rn(reg));
jit_unget_reg(reg);
}
else {
divr(r0, r1, r2);
mulr(r0, r2, r0);
subr(r0, r1, r0);
}
}
static void
_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
remr(r0, r1, rn(reg));
jit_unget_reg(reg);
}
static void
_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
if (r0 == r1 || r0 == r2) {
reg = jit_get_reg(jit_class_gpr);
divr_u(rn(reg), r1, r2);
mulr(rn(reg), r2, rn(reg));
subr(r0, r1, rn(reg));
jit_unget_reg(reg);
}
else {
divr_u(r0, r1, r2);
mulr(r0, r2, r0);
subr(r0, r1, r0);
}
}
static void
_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
remr_u(r0, r1, rn(reg));
jit_unget_reg(reg);
}
static void
_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
@ -866,7 +946,7 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
if (can_zero_extend_short_p(i0))
ANDI_(r0, r1, i0);
else if (!(i0 & 0x0000ffff))
ANDIS_(r0, r1, i0 >> 16);
ANDIS_(r0, r1, (jit_uword_t)i0 >> 16);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
@ -882,7 +962,7 @@ _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
if (can_zero_extend_short_p(i0))
ORI(r0, r1, i0);
else if (!(i0 & 0x0000ffff))
ORIS(r0, r1, i0 >> 16);
ORIS(r0, r1, (jit_uword_t)i0 >> 16);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
@ -898,7 +978,7 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
if (can_zero_extend_short_p(i0))
XORI(r0, r1, i0);
else if (!(i0 & 0x0000ffff))
XORIS(r0, r1, i0 >> 16);
XORIS(r0, r1, (jit_uword_t)i0 >> 16);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
@ -907,6 +987,33 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
}
}
static void
_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
if (i0 == 0)
movr(r0, r1);
else
SLWI(r0,r1,i0);
}
static void
_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
if (i0 == 0)
movr(r0, r1);
else
SRAWI(r0,r1,i0);
}
static void
_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
if (i0 == 0)
movr(r0, r1);
else
SRWI(r0,r1,i0);
}
static void
_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
@ -1465,6 +1572,54 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
return (w);
}
static jit_word_t
_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
andr(rn(reg), r0, r1);
w = bnei(i0, rn(reg), 0);
jit_unget_reg(reg);
return (w);
}
static jit_word_t
_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
andi(rn(reg), r0, i1);
w = bnei(i0, rn(reg), 0);
jit_unget_reg(reg);
return (w);
}
static jit_word_t
_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
andr(rn(reg), r0, r1);
w = beqi(i0, rn(reg), 0);
jit_unget_reg(reg);
return (w);
}
static jit_word_t
_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
andi(rn(reg), r0, i1);
w = beqi(i0, rn(reg), 0);
jit_unget_reg(reg);
return (w);
}
static jit_word_t
_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
@ -1683,7 +1838,7 @@ _ldr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
static void
_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
ldi_c(r0, i0);
ldi_uc(r0, i0);
extr_c(r0, r0);
}

View file

@ -36,10 +36,10 @@ static void _FXFL(jit_state_t*,int,int,int,int,int);
# define FCTIW_(d,b) FX_(63,d,0,b,14)
# define FCTIWZ(d,b) FX(63,d,0,b,15)
# define FCTIWZ_(d,b) FX_(63,d,0,b,15)
# define FDIV(d,a,b) FA(63,d,0,b,0,18)
# define FDIV_(d,a,b) FA_(63,d,0,b,0,18)
# define FDIVS(d,a,b) FA(59,d,0,b,0,18)
# define FDIVS_(d,a,b) FA_(59,d,0,b,0,18)
# define FDIV(d,a,b) FA(63,d,a,b,0,18)
# define FDIV_(d,a,b) FA_(63,d,a,b,0,18)
# define FDIVS(d,a,b) FA(59,d,a,b,0,18)
# define FDIVS_(d,a,b) FA_(59,d,a,b,0,18)
# define FMADD(d,a,b,c) FA(63,d,a,b,c,29)
# define FMADD_(d,a,b,c) FA(63,d,a,b,c,29)
# define FMADDS(d,a,b,c) FA(59,d,a,b,c,29)
@ -103,13 +103,13 @@ static void _FXFL(jit_state_t*,int,int,int,int,int);
# define MTFSFI_(d,i) FX_(63,d<<2,0,i<<1,134)
# define STFD(s,a,d) FDs(54,s,a,d)
# define STFDU(s,a,d) FDs(55,s,a,d)
# define STFDUX(s,a,b) FX(31,a,a,b,759)
# define STFDX(s,a,b) FX(31,a,a,b,727)
# define STFIWX(s,a,b) FX(31,a,a,b,983)
# define STFDUX(s,a,b) FX(31,s,a,b,759)
# define STFDX(s,a,b) FX(31,s,a,b,727)
# define STFIWX(s,a,b) FX(31,s,a,b,983)
# define STFS(s,a,d) FDs(52,s,a,d)
# define STFSU(s,a,d) FDs(53,s,a,d)
# define STFSUX(s,a,b) FX(31,a,a,b,695)
# define STFSX(s,a,b) FX(31,a,a,b,663)
# define STFSUX(s,a,b) FX(31,s,a,b,695)
# define STFSX(s,a,b) FX(31,s,a,b,663)
# define movr_f(r0,r1) movr_d(r0,r1)
# define movr_d(r0,r1) _movr_d(_jit,r0,r1)

View file

@ -24,6 +24,9 @@
#define patch(instr, node) _patch(_jit, instr, node)
static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
/* libgcc */
extern void __clear_cache(void *, void *);
#define PROTO 1
# include "jit_ppc-cpu.c"
# include "jit_ppc-fpu.c"
@ -380,7 +383,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
{
jit_int32_t regno;
assert(_jit->function);
if (_jit->function->call.argi < 6) {
if (_jit->function->call.argi < 8) {
jit_movi(JIT_RA0 - _jit->function->call.argi, u);
++_jit->function->call.argi;
}
@ -412,6 +415,15 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
if (_jit->function->call.argf < 8) {
jit_movr_d(JIT_FA0 - _jit->function->call.argf, u);
++_jit->function->call.argf;
if (!(_jit->function->call.kind & jit_call_varargs))
return;
}
if (_jit->function->call.argi < 6) {
jit_stxi_d(-8, JIT_FP, u);
jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -8);
_jit->function->call.argi++;
jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -4);
_jit->function->call.argi++;
}
else {
jit_stxi_d(_jit->function->call.size, JIT_SP, u);
@ -428,14 +440,23 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
if (_jit->function->call.argf < 8) {
jit_movi_d(JIT_FA0 - _jit->function->call.argf, u);
++_jit->function->call.argf;
if (!(_jit->function->call.kind & jit_call_varargs))
return;
}
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
if (_jit->function->call.argi < 6) {
jit_stxi_d(-8, JIT_FP, regno);
jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -8);
_jit->function->call.argi++;
jit_ldxi(JIT_RA0 - _jit->function->call.argi, JIT_FP, -4);
_jit->function->call.argi++;
}
else {
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
jit_stxi_d(_jit->function->call.size, JIT_SP, regno);
_jit->function->call.size += (sizeof(jit_float64_t) + 8) & -8;
jit_unget_reg(regno);
}
jit_unget_reg(regno);
}
jit_bool_t
@ -692,6 +713,10 @@ _jit_emit(jit_state_t *_jit)
case_rrw(div,);
case_rrr(div, _u);
case_rrw(div, _u);
case_rrr(rem,);
case_rrw(rem,);
case_rrr(rem, _u);
case_rrw(rem, _u);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
@ -699,6 +724,7 @@ _jit_emit(jit_state_t *_jit)
case_rrr(xor,);
case_rrw(xor,);
case_rrr(lsh,);
case_rrw(lsh,);
case_rrr(rsh,);
case_rrw(rsh,);
case_rrr(rsh, _u);
@ -771,6 +797,10 @@ _jit_emit(jit_state_t *_jit)
case_brw(bgt, _u);
case_brr(bne,);
case_brw(bne,);
case_brr(bms,);
case_brw(bms,);
case_brr(bmc,);
case_brw(bmc,);
case_brr(boadd,);
case_brw(boadd,);
case_brr(boadd, _u);
@ -1064,6 +1094,8 @@ _jit_emit(jit_state_t *_jit)
patch_at(_jit->patches.ptr[offset].inst, word);
}
__clear_cache(_jit->code.ptr, _jit->pc.uc);
return (_jit->code.ptr);
}

View file

@ -262,31 +262,10 @@ _jit_unget_reg(jit_state_t *_jit, jit_int32_t regno)
unsigned long
jit_regset_scan1(jit_regset_t set, jit_int32_t offset)
{
jit_int32_t index;
jit_int32_t length;
union {
jit_uint64_t ul;
jit_uint8_t uc[8];
} data;
assert(offset >= 0 && offset <= 63);
data.ul = set;
if (data.uc[index = offset >> 3]) {
length = (index + 1) << 3;
for (; offset < length; offset++) {
if (set & (1LL << offset))
return (offset);
}
}
for (index++; index < 8; index++) {
if (data.uc[index]) {
offset = index << 3;
length = (index + 1) << 3;
for (; offset < length; offset++) {
if (set & (1LL << offset))
return (offset);
}
}
for (; offset < 64; offset++) {
if (set & (1LL << offset))
return (offset);
}
return (ULONG_MAX);
}