From fe9f114745e8a0297a7fb86746d6b2b619b11ca7 Mon Sep 17 00:00:00 2001 From: PCPA Date: Wed, 25 Aug 2010 07:02:52 -0300 Subject: [PATCH] Implement x86_64 support for multiple integer and float arguments. This changes the usage of some of the jit_local_state fields, and add two extra ones. Since jit_arg_x now returns either a register offset or an stack offset, it does not add _XMM0 to nextarg_putfp to distinguish from stack offsets. The nextarg_puti field is used to known when run out of integer register arguments, and should push arguments on the stack. The new framesize field is used to keep track of jit_arg_x offset on the stack, when jit_arg_x runs out of registers. The argssize field is used in a slightly different way now, and is used to keep track of number of stack slots used to pass arguments, and nextarg_puti is used in its previous semantics, to also match nextarg_putfp logic. --- lightning/i386/core-64.h | 96 ++++++++++++++++++++++++++++------------ lightning/i386/fp-64.h | 41 +++++++++++------ 2 files changed, 96 insertions(+), 41 deletions(-) diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h index fb1189632..ea5384563 100644 --- a/lightning/i386/core-64.h +++ b/lightning/i386/core-64.h @@ -54,6 +54,8 @@ struct jit_local_state { int nextarg_getfp; int nextarg_putfp; int nextarg_geti; + int nextarg_puti; + int framesize; int argssize; int fprssize; int alloca_offset; @@ -131,44 +133,82 @@ struct jit_local_state { #define jit_popr_i(rs) POPQr(rs) /* A return address is 8 bytes, plus 5 registers = 40 bytes, total = 48 bytes. */ -#define jit_prolog(n) (_jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, \ +#define jit_prolog(n) (_jitl.framesize = 48, _jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, \ PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_R14), PUSHQr(_EBP), MOVQrr(_ESP, _EBP)) #define jit_calli(sub) (MOVQir((long) (sub), JIT_REXTMP), CALLsr(JIT_REXTMP)) #define jit_callr(reg) CALLsr((reg)) -/* Stack isn't used for arguments: */ -#if !defined(_ASM_SAFETY) -#define jit_prepare_i(ni) (_jitl.argssize = (ni)) -#else -#define jit_prepare_i(ni) ((ni) <= JIT_ARG_MAX ? _jitl.argssize = (ni) : JITFAIL("too many integer arguments")) -#endif - +#define jit_prepare_i(ni) (_jitl.nextarg_puti = (ni), \ + _jitl.argssize = _jitl.nextarg_puti > JIT_ARG_MAX \ + ? _jitl.nextarg_puti - JIT_ARG_MAX : 0) +#define jit_pusharg_i(rs) (--_jitl.nextarg_puti >= JIT_ARG_MAX \ + ? PUSHQr(rs) : MOVQrr(rs, jit_arg_reg_order[_jitl.nextarg_puti])) -#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_order[_jitl.argssize])) -#define jit_finish(sub) (MOVBir(_jitl.fprssize < JIT_FP_ARG_MAX \ - ? _jitl.fprssize \ - : JIT_FP_ARG_MAX, _AL), \ - jit_calli(sub)) +#define jit_finish(sub) (_jitl.fprssize \ + ? (MOVBir(_jitl.fprssize, _AL), _jitl.fprssize = 0) \ + : MOVBir(0, _AL), \ + ((_jitl.argssize & 1) \ + ? (PUSHQr(_EAX), ++_jitl.argssize) : 0), \ + jit_calli(sub), \ + (_jitl.argssize \ + ? (ADDQir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) \ + : 0)) #define jit_reg_is_arg(reg) ((reg) == _ECX || (reg) == _EDX) -#define jit_finishr(reg) (MOVBir(_jitl.fprssize < JIT_FP_ARG_MAX \ - ? _jitl.fprssize \ - : JIT_FP_ARG_MAX, _AL), \ + +#define jit_finishr(reg) (_jitl.fprssize \ + ? (MOVBir(_jitl.fprssize, _AL), _jitl.fprssize = 0) \ + : MOVBir(0, _AL), \ + ((_jitl.argssize & 1) \ + ? (PUSHQr(_EAX), ++_jitl.argssize) : 0), \ (jit_reg_is_arg((reg)) \ - ? (MOVQrr(reg, JIT_REXTMP), \ - jit_callr(JIT_REXTMP)) \ - : jit_callr(reg))) + ? (MOVQrr(reg, JIT_REXTMP), \ + jit_callr(JIT_REXTMP)) \ + : jit_callr(reg)), \ + (_jitl.argssize \ + ? (ADDQir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) \ + : 0)) #define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX)) -#define jit_arg_c() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_uc() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_s() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_us() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_i() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_ui() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_l() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_ul() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_p() (jit_arg_reg_order[_jitl.nextarg_geti++]) +#define jit_arg_i() (_jitl.nextarg_geti < JIT_ARG_MAX \ + ? _jitl.nextarg_geti++ \ + : ((_jitl.framesize += sizeof(long)) - sizeof(long))) +#define jit_arg_c() jit_arg_i() +#define jit_arg_uc() jit_arg_i() +#define jit_arg_s() jit_arg_i() +#define jit_arg_us() jit_arg_i() +#define jit_arg_ui() jit_arg_i() +#define jit_arg_l() jit_arg_i() +#define jit_arg_ul() jit_arg_i() +#define jit_arg_p() jit_arg_i() + +#define jit_getarg_c(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_c_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_c((reg), JIT_FP, (ofs))) +#define jit_getarg_uc(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_uc_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_uc((reg), JIT_FP, (ofs))) +#define jit_getarg_s(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_s_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_s((reg), JIT_FP, (ofs))) +#define jit_getarg_us(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_extr_us_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_us((reg), JIT_FP, (ofs))) +#define jit_getarg_i(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_i((reg), JIT_FP, (ofs))) +#define jit_getarg_ui(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_ui((reg), JIT_FP, (ofs))) +#define jit_getarg_l(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_l((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_l((reg), JIT_FP, (ofs))) +#define jit_getarg_ul(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_ul((reg), JIT_FP, ofs)) +#define jit_getarg_p(reg, ofs) ((ofs) < JIT_ARG_MAX \ + ? jit_movr_p((reg), jit_arg_reg_order[(ofs)]) \ + : jit_ldxi_p((reg), JIT_FP, (ofs))) static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D }; diff --git a/lightning/i386/fp-64.h b/lightning/i386/fp-64.h index d775474be..22308a7f1 100644 --- a/lightning/i386/fp-64.h +++ b/lightning/i386/fp-64.h @@ -290,21 +290,36 @@ union jit_double_imm { #define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d)))) #define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d)))) -#if !defined(_ASM_SAFETY) -#define jit_prepare_f(num) (_jitl.fprssize += (num), _jitl.nextarg_putfp = _XMM0 + _jitl.fprssize) -#define jit_prepare_d(num) (_jitl.fprssize += (num), _jitl.nextarg_putfp = _XMM0 + _jitl.fprssize) -#else -#define jit_prepare_f(num) (_jitl.fprssize += (num) <= JIT_FP_ARG_MAX ? (_jitl.nextarg_putfp = _XMM0 + _jitl.fprssize) : JITFAIL("too many float arguments")) -#define jit_prepare_d(num) (_jitl.fprssize += (num) <= JIT_FP_ARG_MAX ? (_jitl.nextarg_putfp = _XMM0 + _jitl.fprssize) : JITFAIL("too many float arguments")) -#endif +#define jit_prepare_f(num) ((_jitl.nextarg_putfp + (num) > JIT_FP_ARG_MAX \ + ? (_jitl.argssize += _jitl.nextarg_putfp + (num) - JIT_FP_ARG_MAX, \ + _jitl.fprssize = JIT_FP_ARG_MAX) \ + : (_jitl.fprssize += (num))), \ + _jitl.nextarg_putfp += (num)) +#define jit_prepare_d(num) ((_jitl.nextarg_putfp + (num) > JIT_FP_ARG_MAX \ + ? (_jitl.argssize += _jitl.nextarg_putfp + (num) - JIT_FP_ARG_MAX, \ + _jitl.fprssize = JIT_FP_ARG_MAX) \ + : (_jitl.fprssize += (num))), \ + _jitl.nextarg_putfp += (num)) -#define jit_arg_f() (_XMM0 + _jitl.nextarg_getfp++) -#define jit_arg_d() (_XMM0 + _jitl.nextarg_getfp++) +#define jit_arg_f() (_jitl.nextarg_getfp < JIT_FP_ARG_MAX \ + ? _jitl.nextarg_getfp++ \ + : ((_jitl.framesize += sizeof(double)) - sizeof(double))) +#define jit_arg_d() (_jitl.nextarg_getfp < JIT_FP_ARG_MAX \ + ? _jitl.nextarg_getfp++ \ + : ((_jitl.framesize += sizeof(double)) - sizeof(double))) -#define jit_getarg_f(rd, ofs) (jit_movr_f ((rd), (ofs))) -#define jit_getarg_d(rd, ofs) (jit_movr_d ((rd), (ofs))) +#define jit_getarg_f(reg, ofs) ((ofs) < JIT_FP_ARG_MAX \ + ? jit_movr_f((reg), _XMM0 + (ofs)) \ + : jit_ldxi_f((reg), JIT_FP, (ofs))) +#define jit_getarg_d(reg, ofs) ((ofs) < JIT_FP_ARG_MAX \ + ? jit_movr_d((reg), _XMM0 + (ofs)) \ + : jit_ldxi_d((reg), JIT_FP, (ofs))) -#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp, jit_movr_f (_jitl.nextarg_putfp, (rs))) -#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp, jit_movr_d (_jitl.nextarg_putfp, (rs))) +#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp >= JIT_FP_ARG_MAX \ + ? (SUBQir(sizeof(double), JIT_SP), jit_str_f(JIT_SP,(rs))) \ + : jit_movr_f(_XMM0 + _jitl.nextarg_putfp, (rs))) +#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp >= JIT_FP_ARG_MAX \ + ? (SUBQir(sizeof(double), JIT_SP), jit_str_d(JIT_SP,(rs))) \ + : jit_movr_d(_XMM0 + _jitl.nextarg_putfp, (rs))) #endif /* __lightning_fp_h */