diff --git a/ChangeLog b/ChangeLog index bd809e625..18f6440b0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2013-04-26 Paulo Andrade + + * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c, + lib/jit_ia64-fpu.c, lib/jit_ia64.c: + Relocate JIT_Rn registers to the local registers, as, like + float registers, div/rem and sqrt are implemented as function + calls, and may overwrite non saved scratch registers. + Change patch_at to receive a jit_code_t instead of a + jit_node_t, so that it is easier to "inline" patches when + some instruction requires complex code to implement, e.g. + uneq and ltgt. + Correct arguments to FMA and FMA like instructions that, + due to a cut&paste error were passing the wrong argument + to the related F- implementation function. + Rewrite ltgt to return the proper result if one (or both) + of the arguments is unordered. + 2013-04-26 Paulo Andrade * include/lightning/jit_ia64.h, include/lightning/jit_private.h, diff --git a/include/lightning/jit_ia64.h b/include/lightning/jit_ia64.h index 0e91252f3..d81b8bcf2 100644 --- a/include/lightning/jit_ia64.h +++ b/include/lightning/jit_ia64.h @@ -26,16 +26,18 @@ */ #define JIT_FP _R4 /* Not abi specific */ typedef enum { -#define JIT_R0 _R14 -#define JIT_R1 _R15 -#define JIT_R2 _R16 -#define JIT_V0 _R40 -#define JIT_V1 _R41 -#define JIT_V2 _R42 -#define jit_r_num() 16 -#define jit_r(n) (_R14 - (n)) -#define jit_v_num() 8 -#define jit_v(n) (_R40 + (n)) +#define JIT_R0 _R40 +#define JIT_R1 _R41 +#define JIT_R2 _R42 +#define JIT_R3 _R43 +#define JIT_V0 _R44 +#define JIT_V1 _R45 +#define JIT_V2 _R46 +#define JIT_V3 _R47 +#define jit_r_num() 4 +#define jit_r(n) (_R40 - (n)) +#define jit_v_num() 4 +#define jit_v(n) (_R44 + (n)) _R0, /* constant - Always 0 */ _R1, /* special - Global Data pointer (gp) */ /* r2-r3 - scratch - Use with 22-bit immediate add - scratch */ diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 6ca1d4fd3..009b350b7 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -1423,7 +1423,7 @@ static void _prolog(jit_state_t*,jit_node_t*); #define epilog(node) _epilog(_jit,node) static void _epilog(jit_state_t*,jit_node_t*); #define patch_at(node,instr,label) _patch_at(_jit,node,instr,label) -static void _patch_at(jit_state_t*,jit_node_t*,jit_word_t,jit_word_t); +static void _patch_at(jit_state_t*,jit_code_t,jit_word_t,jit_word_t); #endif #if CODE @@ -3414,7 +3414,7 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - addr(r0, r1, rn(reg)); + subr(r0, r1, rn(reg)); jit_unget_reg(reg); } @@ -3607,8 +3607,7 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) return; } else if (i0 > 0 && !(i0 & (i0 - 1))) { - movr(r0, r1); - andi(r0, r0, i0 - 1); + andi(r0, r1, i0 - 1); return; } reg = jit_get_reg(jit_class_gpr); @@ -4926,6 +4925,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node) } rout = reg - _OUT0; + /* Do not know if will call div/mod functions (sqrt) */ + if (rout < 2) + rout = 2; + /* Match gcc prolog */ ALLOC(_jitc->breg + 1, ruse, rout); MOV(_jitc->breg + 2, GR_12); @@ -4999,7 +5002,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node) } static void -_patch_at(jit_state_t *_jit, jit_node_t *node, +_patch_at(jit_state_t *_jit, jit_code_t code, jit_word_t instr, jit_word_t label) { inst_lo_t l; @@ -5013,7 +5016,7 @@ _patch_at(jit_state_t *_jit, jit_node_t *node, c.w = instr; l.w = c.p[0]; h.w = c.p[1]; get_tm(tm); get_s0(s0); get_s1(s1); get_s2(s2); - switch (node->code) { + switch (code) { case jit_code_movi: case jit_code_calli: i1 = (label >> 63) & 0x1L; diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index 214f980ff..ee58cacbd 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -157,12 +157,12 @@ static void F16_(jit_state_t*,jit_word_t, /* fcvt.fxuf */ #define FCVT_XUF(f1,f3) FMA(f1,f3,1,0) /* fma */ -#define FMA(f1,f3,f4,f2) F1(0x8,0,SF_S0,f4,f3,f3,f1) -#define FMA_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0x8,0,sf,f4,f3,f3,f1) -#define FMA_S(f1,f3,f4,f2) F1(0x8,1,SF_S0,f4,f3,f3,f1) -#define FMA_S_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0x8,1,sf,f4,f3,f3,f1) -#define FMA_D(f1,f3,f4,f2) F1(0x9,0,SF_S0,f4,f3,f3,f1) -#define FMA_D_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0x9,0,sf,f4,f3,f3,f1) +#define FMA(f1,f3,f4,f2) F1(0x8,0,SF_S0,f4,f3,f2,f1) +#define FMA_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0x8,0,sf,f4,f3,f2,f1) +#define FMA_S(f1,f3,f4,f2) F1(0x8,1,SF_S0,f4,f3,f2,f1) +#define FMA_S_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0x8,1,sf,f4,f3,f2,f1) +#define FMA_D(f1,f3,f4,f2) F1(0x9,0,SF_S0,f4,f3,f2,f1) +#define FMA_D_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0x9,0,sf,f4,f3,f2,f1) /* fmax */ #define FMAX(f1,f2,f3) F8(0,SF_S0,0x15,f3,f2,f1) /* fmerge */ @@ -183,18 +183,18 @@ static void F16_(jit_state_t*,jit_word_t, #define FMPY_D(f1,f3,f4) FMA_D(f1,f3,f4,0) #define FMPY_D_p(f1,f3,f4,sf,_p) FMA_D_p(f1,f3,f4,0,sf,_p) /* fms */ -#define FMS(f1,f3,f4,f2) F1(0xa,0,SF_S0,f4,f3,f3,f1) -#define FMS_S(f1,f3,f4,f2) F1(0xa,1,SF_S0,f4,f3,f3,f1) -#define FMS_D(f1,f3,f4,f2) F1(0xb,0,SF_S0,f4,f3,f3,f1) +#define FMS(f1,f3,f4,f2) F1(0xa,0,SF_S0,f4,f3,f2,f1) +#define FMS_S(f1,f3,f4,f2) F1(0xa,1,SF_S0,f4,f3,f2,f1) +#define FMS_D(f1,f3,f4,f2) F1(0xb,0,SF_S0,f4,f3,f2,f1) /* fneg */ #define FNEG(f1,f3) FMERGE_NS(f1,f3,f3) /* fnegabs */ #define FNEGABS(f1,f3) FMERGE_NS(f1,0,f3) /* fnma */ -#define FNMA(f1,f3,f4,f2) F1(0xc,0,SF_S0,f4,f3,f3,f1) -#define FNMA_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0xc,0,sf,f4,f3,f3,f1) -#define FNMA_S(f1,f3,f4,f2) F1(0xc,1,SF_S0,f4,f3,f3,f1) -#define FNMA_D(f1,f3,f4,f2) F1(0xd,0,SF_S0,f4,f3,f3,f1) +#define FNMA(f1,f3,f4,f2) F1(0xc,0,SF_S0,f4,f3,f2,f1) +#define FNMA_p(f1,f3,f4,f2,sf,_p) F1_(_jit,_p,0xc,0,sf,f4,f3,f2,f1) +#define FNMA_S(f1,f3,f4,f2) F1(0xc,1,SF_S0,f4,f3,f2,f1) +#define FNMA_D(f1,f3,f4,f2) F1(0xd,0,SF_S0,f4,f3,f2,f1) /* fnmpy */ #define FNMPY(f1,f3,f4) FNMA(f1,f3,f4,0) /* fnorm */ @@ -496,7 +496,7 @@ static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); #define gei_d(r0,r1,i0) _gei_d(_jit,r0,r1,i0) static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); #define gtr_f(r0,r1,r2) gtr_d(r0,r1,r2) -#define gtr_d(r0,r1,r2) _gtr_d(_jit,r0,r2,r1) +#define gtr_d(r0,r1,r2) _gtr_d(_jit,r0,r1,r2) static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define gti_f(r0,r1,i0) _gti_f(_jit,r0,r1,i0) static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -1191,11 +1191,11 @@ dopi(ungt) static void _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - MOV(r0, GR_0); - FCMP_LE(PR_8, PR_9, r1, r2); - FCMP_LE(PR_6, PR_7, r2, r1); - MOVI_p(r0, 1, PR_9); /* !(r1 >= r2) || !(r2 >= r1) */ - MOVI_p(r0, 1, PR_7); + MOVI(r0, 1); + FCMP_EQ(PR_8, PR_9, r1, r2); + FCMP_UNORD(PR_6, PR_7, r1, r2); + MOV_p(r0, GR_0, PR_8); + MOV_p(r0, GR_0, PR_6); } fopi(ltgt) dopi(ltgt) @@ -1444,20 +1444,25 @@ static jit_word_t _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; + jit_word_t junord, jne; FCMP_UNORD(PR_8, PR_9, r0, r1); - /* junord L1 */ sync(); + /* junord L1 */ + junord = _jit->pc.w; BRI_COND(3, PR_8); FCMP_EQ(PR_6, PR_7, r0, r1); - /* jne L2 */ sync(); + /* jne L2 */ + jne = _jit->pc.w; BRI_COND(2, PR_7); sync(); w = _jit->pc.w; /* L1: */ + patch_at(jit_code_bunordr_d, junord, _jit->pc.w); BRI((i0 - w) >> 4); /* unconditional jump to patch */ sync(); /* L2: */ + patch_at(jit_code_bner_d, jne, _jit->pc.w); return (w); } fbopi(uneq) @@ -1467,9 +1472,9 @@ static jit_word_t _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; + FCMP_LT(PR_6, PR_7, r0, r1); sync(); w = _jit->pc.w; - FCMP_LT(PR_6, PR_7, r0, r1); BRI_COND((i0 - w) >> 4, PR_7); return (w); } @@ -1493,19 +1498,24 @@ static jit_word_t _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; + jit_word_t jeq, junord; FCMP_EQ(PR_8, PR_9, r0, r1); /* jeq L1 */ sync(); + jeq = _jit->pc.w; BRI_COND(4, PR_8); FCMP_UNORD(PR_6, PR_7, r0, r1); - /* jord L1 */ + /* junord L1 */ sync(); - BRI_COND(2, PR_7); + junord = _jit->pc.w; + BRI_COND(2, PR_6); sync(); w = _jit->pc.w; BRI((i0 - w) >> 4); /* unconditional jump to patch */ /* L1 */ sync(); + patch_at(jit_code_beqr_d, jeq, _jit->pc.w); + patch_at(jit_code_bordr_d, junord, _jit->pc.w); return (w); } fbopi(ltgt) diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index 098febb75..659320fd8 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -67,7 +67,6 @@ jit_register_t _rvs[] = { { rc(gpr)|23, "r23" }, { rc(gpr)|22, "r22" }, { rc(gpr)|21, "r21" }, { rc(gpr)|20, "r20" }, { rc(gpr)|19, "r19" }, { rc(gpr)|18, "r18" }, - /* JIT_R4-JIT_R0 */ { rc(gpr)|17, "r17" }, { rc(gpr)|16, "r16" }, { rc(gpr)|15, "r15" }, { rc(gpr)|14, "r14" }, /* Do not allow allocating r32-r41 as temoraries for the sake of @@ -76,12 +75,12 @@ jit_register_t _rvs[] = { { rc(arg)|34, "r34" }, { rc(arg)|35, "r35" }, { rc(arg)|36, "r36" }, { rc(arg)|37, "r37" }, { rc(arg)|38, "r38" }, { rc(arg)|39, "r39" }, - /* JIT_V0-JIT_V3 */ + /* JIT_R0-JIT_V3 */ { rc(gpr)|40, "r40" }, { rc(gpr)|41, "r41" }, { rc(gpr)|42, "r42" }, { rc(gpr)|43, "r43" }, - /* Temporaries/locals */ { rc(gpr)|44, "r44" }, { rc(gpr)|45, "r45" }, { rc(gpr)|46, "r46" }, { rc(gpr)|47, "r47" }, + /* Temporaries/locals */ { rc(gpr)|48, "r48" }, { rc(gpr)|49, "r49" }, { rc(gpr)|50, "r50" }, { rc(gpr)|51, "r51" }, { rc(gpr)|52, "r52" }, { rc(gpr)|53, "r53" }, @@ -648,6 +647,11 @@ _emit_code(jit_state_t *_jit) _jitc->function = NULL; + /* If did resize the code buffer, these were not reset */ + _jitc->ioff = 0; + jit_regset_set_ui(&_jitc->regs, 0); + _jitc->pred = 0; + jit_reglive_setup(); undo.word = 0; @@ -1229,7 +1233,7 @@ _emit_code(jit_state_t *_jit) for (offset = 0; offset < _jitc->patches.offset; offset++) { node = _jitc->patches.ptr[offset].node; word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; - patch_at(node, _jitc->patches.ptr[offset].inst, word); + patch_at(node->code, _jitc->patches.ptr[offset].inst, word); } word = sysconf(_SC_PAGE_SIZE);