diff --git a/ChangeLog b/ChangeLog index 746863192..0f51dea97 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +2013-04-27 Paulo Andrade + + * include/lightning.h: Add new backend specific movr_w_d, + movr_d_w and movi_d_w codes as helpers to ia64 varargs + functions arguments. + + * lib/jit_ia64-cpu.c: + Correct wrong encoding of A5 small integers. + Correct define of "mux" instruction modifiers. + Correct ordering of arguments and predicates of cmp_xy + implementation with immediate arguments; like most other + codes with an immediate, the immediate is the second, not + the third argument. + + * lib/jit_ia64-fpu.c: Actual implementation of the code + to move to/from gpr to/from fpr, to implement varargs abi. + + * lib/jit_ia64.c: Make fpr argument registers not allocatable + as temporaries, no need for the extra checks when there are + plenty registers. + + * lib/jit_print.c, lib/lightning.c: Minor updates for the + new movr_w_d, movr_d_w and movi_d_w codes. + 2013-04-26 Paulo Andrade * lib/jit_ia64-cpu.c, lib/jit_ia64-fpu.c: Correct code to diff --git a/include/lightning.h b/include/lightning.h index 88029b861..7cbaf3011 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -739,6 +739,8 @@ typedef enum { jit_code_movr_w_f, jit_code_movr_ww_d, /* w* -> f|d */ #define jit_movr_w_f(u, v) jit_new_node_ww(jit_code_movr_w_f, u, v) #define jit_movr_ww_d(u, v, w) jit_new_node_www(jit_code_movr_ww_d, u, v, w) + jit_code_movr_w_d, /* w -> d */ +#define jit_movr_w_d(u, v) jit_new_node_ww(jit_code_movr_w_d, u, v) jit_code_movr_f_w, jit_code_movi_f_w, /* f|d -> w* */ #define jit_movr_f_w(u, v) jit_new_node_ww(jit_code_movr_f_w, u, v) @@ -746,6 +748,11 @@ typedef enum { jit_code_movr_d_ww, jit_code_movi_d_ww, #define jit_movr_d_ww(u, v, w) jit_new_node_www(jit_code_movr_d_ww, u, v, w) #define jit_movi_d_ww(u, v, w) jit_new_node_wwd(jit_code_movi_d_ww, u, v, w) + + jit_code_movr_d_w, jit_code_movi_d_w, /* d -> w */ +#define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) +#define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) + jit_code_x86_retval_f, jit_code_x86_retval_d, } jit_code_t; diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index dc4f4f1d7..0182d95d7 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -202,11 +202,10 @@ typedef enum { #define BR_INDWH_DPTK 2 #define MUX_BRCST 0 -/* FIXME only @name values in manual and only @brcst disassembled by binutils */ -#define MUX_REV 1 /* not disassembled as @rev */ -#define MUX_MIX 2 /* not disassembled as @mix */ -#define MUX_SHUF 3 /* not disassembled as @shuf */ -#define MUX_ALT 4 /* not disassembled as @alt */ +#define MUX_REV 11 +#define MUX_MIX 8 +#define MUX_SHUF 9 +#define MUX_ALT 10 #define ldr(r0,r1) ldr_l(r0,r1) #define ldi(r0,i0) ldi_l(r0,i0) @@ -855,8 +854,8 @@ static void _X5(jit_state_t*,jit_word_t, #define MOV(r0,r1) ADDS(r0,0,r1) #define MOV_p(r0,r1,_p) ADDS_p(r0,0,r1,_p) /* mov - Move Immediate */ -#define MOVI(r1,im) ADDL(r1,im,0) -#define MOVI_p(r1,im,_p) ADDL_p(r1,im,0,_p) +#define MOVI(r1,im) ADDL(r1,im,GR_0) +#define MOVI_p(r1,im,_p) ADDL_p(r1,im,GR_0,_p) /* mov - Move Indirect Register */ #define MOV_rn_RR(r1,r3) M43(0x10,r3,r1) #define MOV_rn_DBR(r1,r3) M43(0x11,r3,r1) @@ -2075,15 +2074,21 @@ static void _A5(jit_state_t *_jit, jit_word_t _p, jit_word_t r3, jit_word_t im, jit_word_t r1) { + jit_word_t s, i5, i9, i7; assert(!(_p & ~0x3fL)); assert(!(r3 & ~0x3L)); assert(im >= -2097152 && im < 2097151); assert(!(r1 & ~0x7fL)); + /* imm22 = sign_ext(s << 21 | imm5c << 16 | imm9d << 7 | imm7b, 22) */ + s = (im & 0x200000) >> 21; + i5 = (im & 0x1f0000) >> 16; + i9 = (im & 0xff80) >> 7; + i7 = im & 0x7f; TSTREG1(r3); TSTPRED(_p); TSTREG1(r1); - inst((9L<<37)|(((im>>7)&0x7fffL)<<22)|(r3<<20)| - ((im&0x7fL)<<13)|(r1<<6)|_p, INST_A); + inst((9L<<37)|(s<<36)|(i9<<27)|(i5<<22)| + (r3<<20)|(i7<<13)|(r1<<6)|_p, INST_A); SETREG(r1); } @@ -3685,7 +3690,7 @@ _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, } else mulr(r0, r2, r3); - mulh(sign, r1, r2, r3); + mulh(r1, r2, r3, sign); if (r0 == r2 || r0 == r3) { movr(r0, rn(reg)); jit_unget_reg(reg); @@ -3837,7 +3842,7 @@ _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (i0 >= -128 && i0 <= 127) - CMPI_LT(PR_6, PR_7, i0, r1); + CMPI_LT(PR_7, PR_6, i0 - 1, r1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -3861,7 +3866,7 @@ _lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (i0 >= -128 && i0 <= 127) - CMPI_LTU(PR_6, PR_7, i0, r1); + CMPI_LTU(PR_7, PR_6, i0 - 1, r1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -3945,7 +3950,7 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (i0 >= -128 && i0 <= 127) - CMPI_LT(PR_6, PR_7, i0, r1); + CMPI_LT(PR_7, PR_6, i0 - 1, r1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -3969,7 +3974,7 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (i0 >= -128 && i0 <= 127) - CMPI_LTU(PR_6, PR_7, i0, r1); + CMPI_LTU(PR_7, PR_6, i0, r1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -4387,7 +4392,7 @@ _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t reg; if (i1 >= -128 && i1 <= 127) - CMPI_LT(PR_6, PR_7, i1, r0); + CMPI_LT(PR_7, PR_6, i1 - 1, r0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); @@ -4417,7 +4422,7 @@ _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t reg; if (i1 >= -128 && i1 <= 127) - CMPI_LTU(PR_6, PR_7, i1, r0); + CMPI_LTU(PR_7, PR_6, i1 - 1, r0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); @@ -4523,7 +4528,7 @@ _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t reg; if (i1 >= -128 && i1 <= 127) - CMPI_LT(PR_6, PR_7, i1, r0); + CMPI_LT(PR_7, PR_6, i1 - 1, r0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); @@ -4553,7 +4558,7 @@ _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_word_t w; jit_int32_t reg; if (i1 >= -128 && i1 <= 127) - CMPI_LTU(PR_6, PR_7, i1, r0); + CMPI_LTU(PR_7, PR_6, i1 - 1, r0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index 1c24df8fd..eb12ab57a 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -423,6 +423,15 @@ static void F16_(jit_state_t*,jit_word_t, #define movi_f(r0,i0) ldi_f(r0,(jit_word_t)i0) #define movi_d(r0,i0) ldi_d(r0,(jit_word_t)i0) +#define movr_f_w(r0,r1) _movr_f_w(_jit,r0,r1) +static void _movr_f_w(jit_state_t*,jit_int32_t,jit_int32_t); +#define movr_d_w(r0,r1) _movr_d_w(_jit,r0,r1) +static void _movr_d_w(jit_state_t*,jit_int32_t,jit_int32_t); +#define movi_f_w(r0,i0) _movi_f_w(_jit,r0,i0) +static void _movi_f_w(jit_state_t*,jit_int32_t,jit_word_t); +#define movi_d_w(r0,i0) _movi_d_w(_jit,r0,i0) +static void _movi_d_w(jit_state_t*,jit_int32_t,jit_word_t); + #define absr_f(r0,r1) absr_d(r0,r1) #define absr_d(r0,r1) FABS(r0,r1) #define negr_f(r0,r1) negr_d(r0,r1) @@ -978,6 +987,42 @@ F16_(jit_state_t* _jit, jit_word_t _p, inst((((im>>20)&1L)<<36)|(y<<27)|(1L<<26)|((im&0xffffL)<<6)|_p, INST_F); } +static void +_movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + /* Should actually be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); + GETF_S(r0, r1); +} + +static void +_movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + /* Should actually be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); + ldi_i(r0, i0); +} + +static void +_movr_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + /* Should actually be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); + GETF_D(r0, r1); +} + +static void +_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + /* Should actually be used only in this case (with out0 == 120) */ + if (r0 >= 120) + r0 = _jitc->rout + (r0 - 120); + ldi_l(r0, i0); +} + #define fpr_opi(name, type, size) \ static void \ _##name##i_##type(jit_state_t *_jit, \ @@ -1333,7 +1378,7 @@ _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) static void _sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - //MOV(_jitc->rout, r1); + movr_f(GR_8, r1); calli((jit_word_t)sqrtf); MOVF(r0, GR_8); } @@ -1341,7 +1386,7 @@ _sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - //MOV(_jitc->rout, r1); + movr_d(GR_8, r1); calli((jit_word_t)sqrt); MOVF(r0, GR_8); } diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index 659320fd8..d574d24ce 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -133,12 +133,12 @@ jit_register_t _rvs[] = { /* Scratch */ { rc(fpr)|6, "f6" }, { rc(fpr)|7, "f7" }, /* Do not allocate for the sake of simplification */ - { 8, "f8" }, + { rc(arg)|8, "f8" }, /* Scratch - Argument/return registers */ - { rc(fpr)|9, "f9" }, - { rc(fpr)|10, "f10" }, { rc(fpr)|11, "f11" }, - { rc(fpr)|12, "f12" }, { rc(fpr)|13, "f13" }, - { rc(fpr)|14, "f14" }, { rc(fpr)|15, "f15" }, + { rc(arg)|9, "f9" }, + { rc(arg)|10, "f10" }, { rc(arg)|11, "f11" }, + { rc(arg)|12, "f12" }, { rc(arg)|13, "f13" }, + { rc(arg)|14, "f14" }, { rc(arg)|15, "f15" }, /* Do not touch callee save registers not automatically spill/reloaded */ { rc(sav)|16, "f16" }, { rc(sav)|17, "f17" }, { rc(sav)|18, "f18" }, { rc(sav)|19, "f19" }, @@ -456,23 +456,19 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { -/* if (v->u.w < 8) - jit_movr_f(u, _R32 + v->u.w); + jit_movr_f(u, _F8 + v->u.w); else jit_ldxi_f(u, JIT_FP, v->u.w); -*/ } void _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { -/* if (v->u.w < 8) - jit_movr_d(u, _R32 + v->u.w); + jit_movr_d(u, _F8 + v->u.w); else jit_ldxi_d(u, JIT_FP, v->u.w); -*/ } void @@ -511,6 +507,17 @@ void _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); + if (_jitc->function->call.argi < 8) { + if (!(_jitc->function->call.call & jit_call_varargs)) + jit_movr_f(_F8 + _jitc->function->call.argi, u); + else + jit_movr_f_w(_OUT0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxr_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } } void @@ -518,16 +525,37 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) { jit_int32_t regno; assert(_jitc->function); - /* FIXME move to OUTn or stack */ - regno = jit_get_reg(jit_class_fpr); - jit_movi_f(regno, u); - jit_unget_reg(regno); + if (_jitc->function->call.argi < 8) { + if (!(_jitc->function->call.call & jit_call_varargs)) + jit_movi_f(_F8 + _jitc->function->call.argi, u); + else + jit_movi_f_w(_OUT0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += sizeof(jit_word_t); + jit_unget_reg(regno); + } } void _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); + if (_jitc->function->call.argi < 8) { + if (!(_jitc->function->call.call & jit_call_varargs)) + jit_movr_d(_F8 + _jitc->function->call.argi, u); + else + jit_movr_d_w(_OUT0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxr_d(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } } void @@ -535,10 +563,20 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) { jit_int32_t regno; assert(_jitc->function); - /* FIXME move to OUTn or stack */ - regno = jit_get_reg(jit_class_fpr); - jit_movi_d(regno, u); - jit_unget_reg(regno); + if (_jitc->function->call.argi < 8) { + if (!(_jitc->function->call.call & jit_call_varargs)) + jit_movi_d(_F8 + _jitc->function->call.argi, u); + else + jit_movi_d_w(_OUT0 + _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += sizeof(jit_word_t); + jit_unget_reg(regno); + } } jit_bool_t @@ -633,6 +671,7 @@ _jit_retval_d(jit_state_t *_jit, jit_int32_t r0) jit_pointer_t _emit_code(jit_state_t *_jit) { + jit_uint8_t *end; jit_node_t *node; jit_node_t *temp; jit_word_t word; @@ -745,8 +784,11 @@ _emit_code(jit_state_t *_jit) patch(word, node); \ } \ break + /* default of 64 bytes is too low for some possible + * quite long code generation sequences, e.g. qdivi */ + end = _jit->code.ptr + _jit->code.length - 4096; for (node = _jitc->head; node; node = node->next) { - if (_jit->pc.uc >= _jitc->code.end && !jit_remap()) + if (_jit->pc.uc >= end && !jit_remap()) return (NULL); value = jit_classify(node->code); @@ -1195,6 +1237,18 @@ _emit_code(jit_state_t *_jit) case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d: break; + case jit_code_movr_f_w: + movr_f_w(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movr_d_w: + movr_d_w(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_movi_f_w: + movi_f_w(rn(node->u.w), node->v.n->u.w); + break; + case jit_code_movi_d_w: + movi_d_w(rn(node->u.w), node->v.n->u.w); + break; default: abort(); } diff --git a/lib/jit_print.c b/lib/jit_print.c index f692e3c1f..fa3cee805 100644 --- a/lib/jit_print.c +++ b/lib/jit_print.c @@ -209,8 +209,10 @@ static char *code_name[] = { "bordr_d", "bordi_d", "bunordr_d", "bunordi_d", "movr_w_f", "movr_ww_d", + "movr_w_d", "movr_f_w", "movi_f_w", "movr_d_ww", "movi_d_ww", + "movr_d_w", "movi_d_w", "x86_retval_f", "x86_retval_d", }; diff --git a/lib/lightning.c b/lib/lightning.c index f3e0d9b77..6525b12b7 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1153,7 +1153,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_movi_f: case jit_code_movi_f_w: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_flt; break; - case jit_code_movi_d: + case jit_code_movi_d: case jit_code_movi_d_w: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_dbl; break; case jit_code_movi_d_ww: @@ -1174,6 +1174,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_movr_d: case jit_code_extr_d: case jit_code_extr_f_d: case jit_code_ldr_d: case jit_code_movr_w_f: case jit_code_movr_f_w: + case jit_code_movr_w_d: case jit_code_movr_d_w: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg; break; case jit_code_movr_d_ww: