From 7dd18bddd7ec28ee4c6e1591c9ab56ceb594f5f2 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 20 May 2019 11:02:21 +0200 Subject: [PATCH] ARMv7 backend compiling without warnings --- lightening/arm-cpu.c | 214 ++++--- lightening/arm-vfp.c | 1309 ++++++++++++++------------------------- lightening/arm.h | 14 +- lightening/lightening.c | 4 +- 4 files changed, 606 insertions(+), 935 deletions(-) diff --git a/lightening/arm-cpu.c b/lightening/arm-cpu.c index 44988976e..012b73ae8 100644 --- a/lightening/arm-cpu.c +++ b/lightening/arm-cpu.c @@ -32,6 +32,7 @@ #define ARM_CC_NE 0x10000000 /* Z=0 */ #define ARM_CC_HS 0x20000000 /* C=1 */ #define ARM_CC_LO 0x30000000 /* C=0 */ +#define ARM_CC_MI 0x40000000 /* N=1 */ #define ARM_CC_VS 0x60000000 /* V=1 */ #define ARM_CC_VC 0x70000000 /* V=0 */ #define ARM_CC_HI 0x80000000 /* C=1 && Z=0 */ @@ -241,8 +242,23 @@ encode_thumb_word_immediate(unsigned int v) return (-1); } +static uint32_t +read_wide_thumb(uint32_t *loc) +{ + uint16_t *sloc = (uint16_t*)sloc; + return (((uint32_t)sloc[0]) << 16) | sloc[1]; +} + +static void +write_wide_thumb(uint32_t *loc, uint32_t v) +{ + uint16_t *sloc = (uint16_t *)loc; + sloc[0] = v >> 16; + sloc[1] = v & 0xffff; +} + static int -offset_in_thumb_jump_range(int32_t offset) +offset_in_jmp_range(int32_t offset) { return -0x800000 <= offset && offset <= 0x7fffff; } @@ -272,7 +288,7 @@ static const uint32_t thumb_jump_mask = 0xf800d000; static uint32_t encode_thumb_jump(int32_t v) { - ASSERT(offset_in_thumb_jump_range(v)); + ASSERT(offset_in_jmp_range(v)); uint32_t s = !!(v & 0x800000); uint32_t i1 = !!(v & 0x400000); uint32_t i2 = !!(v & 0x200000); @@ -290,6 +306,18 @@ patch_thumb_jump(uint32_t inst, int32_t v) return (inst & thumb_jump_mask) | encode_thumb_jump(v); } +static int32_t +read_jmp_offset(uint32_t *loc) +{ + return decode_thumb_jump(read_wide_thumb(loc)); +} + +static void +patch_jmp_offset(uint32_t *loc, int32_t v) +{ + write_wide_thumb(loc, patch_thumb_jump(read_wide_thumb(loc), v)); +} + static jit_reloc_t emit_thumb_jump(jit_state_t *_jit, uint32_t inst) { @@ -305,7 +333,7 @@ emit_thumb_jump(jit_state_t *_jit, uint32_t inst) } static int -offset_in_thumb_cc_jump_range(int32_t v) +offset_in_jcc_range(int32_t v) { return -0x80000 <= v && v <= 0x7ffff; } @@ -335,7 +363,7 @@ static const uint32_t thumb_cc_jump_mask = 0xfbc0d000; static uint32_t encode_thumb_cc_jump(int32_t v) { - ASSERT(offset_in_thumb_cc_jump_range(v)); + ASSERT(offset_in_jcc_range(v)); uint32_t s = !!(v & 0x80000); uint32_t j1 = !!(v & 0x40000); uint32_t j2 = !!(v & 0x20000); @@ -351,6 +379,18 @@ patch_thumb_cc_jump(uint32_t inst, int32_t v) return (inst & thumb_cc_jump_mask) | encode_thumb_cc_jump(v); } +static int32_t +read_jcc_offset(uint32_t *loc) +{ + return decode_thumb_cc_jump(read_wide_thumb(loc)); +} + +static void +patch_jcc_offset(uint32_t *loc, int32_t v) +{ + write_wide_thumb(loc, patch_thumb_cc_jump(read_wide_thumb(loc), v)); +} + static jit_reloc_t emit_thumb_cc_jump(jit_state_t *_jit, uint32_t inst) { @@ -365,19 +405,6 @@ emit_thumb_cc_jump(jit_state_t *_jit, uint32_t inst) return ret; } -static int -encode_thumb_shift(int v, int type) -{ - switch (type) { - case ARM_ASR: - case ARM_LSL: - case ARM_LSR: type >>= 1; break; - default: assert(!"handled shift"); - } - assert(v >= 0 && v <= 31); - return (((v & 0x1c) << 10) | ((v & 3) << 6) | type); -} - static void torrr(jit_state_t *_jit, int o, int rn, int rd, int rm) { @@ -385,14 +412,6 @@ torrr(jit_state_t *_jit, int o, int rn, int rd, int rm) emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|_u4(rm)); } -static void -torrrs(jit_state_t *_jit, int o, int rn, int rd, int rm, int im) -{ - assert(!(o & 0x000f0f0f)); - assert(!(im & 0xffff8f0f)); - emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rd)<<8)|im|_u4(rm)); -} - static void torxr(jit_state_t *_jit, int o, int rn, int rt, int rm) { @@ -407,14 +426,6 @@ torrrr(jit_state_t *_jit, int o, int rn, int rl, int rh, int rm) emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rl)<<12)|(_u4(rh)<<8)|_u4(rm)); } -static void -torrri8(jit_state_t *_jit, int o, int rn, int rt, int rt2, int im) -{ - assert(!(o & 0x000fffff)); - assert(!(im & 0xffffff00)); - emit_wide_thumb(_jit, o|(_u4(rn)<<16)|(_u4(rt)<<12)|(_u4(rt2)<<8)|im); -} - static void torri(jit_state_t *_jit, int o, int rn, int rd, int im) { @@ -454,22 +465,6 @@ toriw(jit_state_t *_jit, int o, int rd, int im) emit_wide_thumb(_jit, o|((im&0xf000)<<4)|((im&0x800)<<15)|((im&0x700)<<4)|(_u4(rd)<<8)|(im&0xff)); } -static void -tc8(jit_state_t *_jit, int cc, int im) -{ - assert(!(cc & 0x0fffffff)); - assert(cc != ARM_CC_AL && cc != ARM_CC_NV); - assert(im >= -128 && im <= 127); - emit_u16(_jit, THUMB_CC_B|(cc>>20)|(im&0xff)); -} - -static void -t11(jit_state_t *_jit, int im) -{ - assert(!(im & 0xfffff800)); - emit_u16(_jit, THUMB_B|im); -} - static jit_reloc_t tcb(jit_state_t *_jit, int cc) { @@ -486,25 +481,6 @@ tb(jit_state_t *_jit, int o) return emit_thumb_jump(_jit, o); } -static void -tpp(jit_state_t *_jit, int o, int im) -{ - assert(!(o & 0x0000ffff)); - if (o == THUMB2_PUSH) - assert(!(im & 0x8000)); - assert(__builtin_popcount(im & 0x1fff) > 1); - emit_wide_thumb(_jit, o|im); -} - -static void -torl(jit_state_t *_jit, int o, int rn, int im) -{ - assert(!(o & 0xf1fff)); - assert(rn != _NOREG || !im || ((o & 0xc000) == 0xc000)); - assert(!(o & THUMB2_LDM_W) || !(im & (1 << rn))); - emit_wide_thumb(_jit, o | (_u4(rn)<<16)|_u13(im)); -} - static void T1_ORR(jit_state_t *_jit, int32_t rdn, int32_t rm) { @@ -547,12 +523,6 @@ T1_MOV(jit_state_t *_jit, int32_t rd, int32_t rm) return emit_u16(_jit, THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7)); } -static void -T2_MOV(jit_state_t *_jit, int32_t rd, int32_t rm) -{ - return T2_ORR(_jit, rd,_NOREG,rm); -} - static void T1_MOVI(jit_state_t *_jit, int32_t rd, int32_t im) { @@ -1015,14 +985,8 @@ T2_TSTI(jit_state_t *_jit, int32_t rn, int32_t im) return torri(_jit, THUMB2_TSTI,rn,_NOREG,im); } -static void -T1_B(jit_state_t *_jit, int32_t im) -{ - return t11(_jit, im); -} - static jit_reloc_t -T2_CC_B(jit_state_t *_jit, uint8_t cc) +T2_CC_B(jit_state_t *_jit, uint32_t cc) { return tcb(_jit, cc); } @@ -1372,6 +1336,12 @@ movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) return _movi(_jit, r0, i0, FLAGS_UNIMPORTANT); } +static int +offset_in_load_from_pool_range(int32_t offset) +{ + return -0xfff <= offset && offset <= 0xfff; +} + static int32_t decode_load_from_pool_offset(uint32_t inst) { @@ -1382,7 +1352,7 @@ decode_load_from_pool_offset(uint32_t inst) static uint32_t encode_load_from_pool_offset(int32_t off) { - ASSERT(-0xfff <= off && off <= 0xfff); + ASSERT(offset_in_load_from_pool_range(off)); uint32_t u; if (off >= 0) u = 1; @@ -1400,6 +1370,18 @@ patch_load_from_pool(uint32_t inst, int32_t off) return (inst & load_from_pool_mask) | encode_load_from_pool_offset(off); } +static int32_t +read_load_from_pool_offset(uint32_t *loc) +{ + return decode_load_from_pool_offset(read_wide_thumb(loc)); +} + +static void +patch_load_from_pool_offset(uint32_t *loc, int32_t v) +{ + write_wide_thumb(loc, patch_load_from_pool(read_wide_thumb(loc), v)); +} + static jit_reloc_t emit_load_from_pool(jit_state_t *_jit, uint32_t inst) { @@ -2632,7 +2614,7 @@ ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20) T1_LDRI(_jit, r0, r1, i0 >> 2); - else if (r1 == jit_gpr_regno(_SP) && r0 < 8 && + else if (r1 == jit_gpr_regno(JIT_SP) && r0 < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255) T1_LDRISP(_jit, r0, i0 >> 2); else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) @@ -2779,7 +2761,7 @@ stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { if ((r0|r1) < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) < 0x20) T1_STRI(_jit, r1, r0, i0 >> 2); - else if (r0 == jit_gpr_regno(_SP) && r1 < 8 && + else if (r0 == jit_gpr_regno(JIT_SP) && r1 < 8 && i0 >= 0 && !(i0 & 3) && (i0 >> 2) <= 255) T1_STRISP(_jit, r1, i0 >> 2); else if (jit_ldrt_strt_p() && i0 >= 0 && i0 <= 255) @@ -2870,6 +2852,70 @@ calli(jit_state_t *_jit, jit_word_t i0) jit_patch_there(_jit, T2_BLI(_jit), (void*)i0); } +static void +ret(jit_state_t *_jit) +{ + movr(_jit, jit_gpr_regno(_PC), jit_gpr_regno(_LR)); +} + +static void +reti(jit_state_t *_jit, int32_t i0) +{ + movi(_jit, jit_gpr_regno(_R0), i0); + ret(_jit); +} + +static void +retr(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, jit_gpr_regno(_R0), r0); + ret(_jit); +} + +static void +retval_c(jit_state_t *_jit, int32_t r0) +{ + extr_c(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_uc(jit_state_t *_jit, int32_t r0) +{ + extr_uc(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_s(jit_state_t *_jit, int32_t r0) +{ + extr_s(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_us(jit_state_t *_jit, int32_t r0) +{ + extr_us(_jit, r0, jit_gpr_regno(_R0)); +} + +static void +retval_i(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, r0, jit_gpr_regno(_R0)); +} + +struct veneer +{ + uint16_t ldr; + uint16_t br; + uint32_t addr; +}; + +static void +patch_veneer(uint32_t *loc, jit_pointer_t addr) +{ + struct veneer *v = (struct veneer*) v; + v->addr = (uintptr_t) addr; +} + static void emit_veneer(jit_state_t *_jit, jit_pointer_t target) { diff --git a/lightening/arm-vfp.c b/lightening/arm-vfp.c index 885f2db47..208edc316 100644 --- a/lightening/arm-vfp.c +++ b/lightening/arm-vfp.c @@ -17,9 +17,6 @@ * Paulo Cesar Pereira de Andrade */ -/* as per vfp_regno macro, required due to "support" to soft float registers - * or using integer registers as arguments to float operations */ -#define _D8_REGNO 32 #define ARM_V_F64 0x00000100 #define ARM_VADD_F 0x0e300a00 #define ARM_VSUB_F 0x0e300a40 @@ -31,7 +28,6 @@ #define ARM_VMOV_F 0x0eb00a40 #define ARM_VMOV_A_S 0x0e100a10 /* vmov rn, sn */ #define ARM_VMOV_S_A 0x0e000a10 /* vmov sn, rn */ -#define ARM_VMOV_AA_D 0x0c500b10 /* vmov rn,rn, dn */ #define ARM_VMOV_D_AA 0x0c400b10 /* vmov dn, rn,rn */ #define ARM_VCMP 0x0eb40a40 #define ARM_VMRS 0x0ef10a10 @@ -46,6 +42,7 @@ #define ARM_VCVT_F 0x0eb70ac0 #define ARM_VCVT_F32_F64 ARM_VCVT_F #define ARM_VCVT_F64_F32 ARM_VCVT_F|ARM_V_F64 +#define ARM_P 0x00800000 /* positive offset */ #define ARM_V_D 0x00400000 #define ARM_V_N 0x00000080 #define ARM_V_M 0x00000020 @@ -58,148 +55,81 @@ #define ARM_VMOV_A_D 0x0e100b10 #define ARM_VMOV_D_A 0x0e000b10 -#define vfp_regno(rn) (((rn) - 16) >> 1) +#define vfp_regno(rn) ((rn) >> 1) static void vodi(jit_state_t *_jit, int oi, int r0) { - jit_thumb_t thumb; - assert(!(oi & 0x0000f000)); - assert(!(r0 & 1)); r0 = vfp_regno(r0); - thumb.i = oi|(_u4(r0)<<12); - iss(thumb.s[0], thumb.s[1]); -} - -static void -_voqi(jit_state_t *_jit, int oi, int r0) -{ - jit_thumb_t thumb; - assert(!(oi & 0x0000f000)); - assert(!(r0 & 3)); r0 = vfp_regno(r0); - thumb.i = oi|(_u4(r0)<<12); - iss(thumb.s[0], thumb.s[1]); + ASSERT(!(oi & 0x0000f000)); + ASSERT(!(r0 & 1)); + r0 >>= 1; + emit_wide_thumb(_jit, oi|(_u4(r0)<<12)); } static void vo_ss(jit_state_t *_jit, int o, int r0, int r1) { - assert(!(o & 0xf000f00f)); - if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0); - if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1); + ASSERT(!(o & 0xf000f00f)); + if (r0 & 1) o |= ARM_V_D; + if (r1 & 1) o |= ARM_V_M; + r0 >>= 1; r1 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); } static void vo_dd(jit_state_t *_jit, int o, int r0, int r1) { - assert(!(o & 0xf000f00f)); - assert(!(r0 & 1) && !(r1 & 1)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); + ASSERT(!(o & 0xf000f00f)); + ASSERT(!(r0 & 1) && !(r1 & 1)); + r0 >>= 1; r1 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); } -static void -vo_qd(jit_state_t *_jit, int o, int r0, int r1) -{ - assert(!(o & 0xf000f00f)); - assert(!(r0 & 3) && !(r1 & 1)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); -} - -static void -vo_qq(jit_state_t *_jit, int o, int r0, int r1) -{ - assert(!(o & 0xf000f00f)); - assert(!(r0 & 3) && !(r1 & 3)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<12)|_u4(r1)); -} - -static void -vorr_(jit_state_t *_jit, int o, int r0, int r1) -{ - assert(!(o & 0xf000f00f)); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); -} - static void vors_(jit_state_t *_jit, int o, int r0, int r1) { - assert(!(o & 0xf000f00f)); - if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); -} - -static void -vorv_(jit_state_t *_jit, int o, int r0, int r1) -{ - assert(!(o & 0xf000f00f)); - if (r1 & 1) o |= ARM_V_M; r1 = vfp_regno(r1); + ASSERT(!(o & 0xf000f00f)); + if (r1 & 1) o |= ARM_V_N; + r1 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); } static void vori_(jit_state_t *_jit, int o, int r0, int r1) { - assert(!(o & 0xf000f00f)); + ASSERT(!(o & 0xf000f00f)); /* use same bit pattern, to set opc1... */ - if (r1 & 1) o |= ARM_V_I32; r1 = vfp_regno(r1); + if (r1 & 1) o |= ARM_V_I32; + r1 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)); } static void vorrd(jit_state_t *_jit, int o, int r0, int r1, int r2) { - assert(!(o & 0xf00ff00f)); - assert(!(r2 & 1)); - r2 = vfp_regno(r2); + ASSERT(!(o & 0xf00ff00f)); + ASSERT(!(r2 & 1)); + r2 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); } static void vosss(jit_state_t *_jit, int o, int r0, int r1, int r2) { - assert(!(o & 0xf00ff00f)); - if (r0 & 1) o |= ARM_V_D; r0 = vfp_regno(r0); - if (r1 & 1) o |= ARM_V_N; r1 = vfp_regno(r1); - if (r2 & 1) o |= ARM_V_M; r2 = vfp_regno(r2); + ASSERT(!(o & 0xf00ff00f)); + if (r0 & 1) o |= ARM_V_D; + if (r1 & 1) o |= ARM_V_N; + if (r2 & 1) o |= ARM_V_M; + r0 >>= 1; r1 >>= 1; r2 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); } static void voddd(jit_state_t *_jit, int o, int r0, int r1, int r2) { - assert(!(o & 0xf00ff00f)); - assert(!(r0 & 1) && !(r1 & 1) && !(r2 & 1)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); -} - -static void -voqdd(jit_state_t *_jit, int o, int r0, int r1, int r2) -{ - assert(!(o & 0xf00ff00f)); - assert(!(r0 & 3) && !(r1 & 1) && !(r2 & 1)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); -} - -static void -voqqd(jit_state_t *_jit, int o, int r0, int r1, int r2) -{ - assert(!(o & 0xf00ff00f)); - assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 1)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); -} - -static void -voqqq(jit_state_t *_jit, int o, int r0, int r1, int r2) -{ - assert(!(o & 0xf00ff00f)); - assert(!(r0 & 3) && !(r1 & 3) && !(r2 & 3)); - r0 = vfp_regno(r0); r1 = vfp_regno(r1); r2 = vfp_regno(r2); + ASSERT(!(o & 0xf00ff00f)); + ASSERT(!(r0 & 1) && !(r1 & 1) && !(r2 & 1)); + r0 >>= 1; r1 >>= 1; r2 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u4(r2)); } @@ -207,545 +137,630 @@ static void vldst(jit_state_t *_jit, int o, int r0, int r1, int i0) { /* i0 << 2 is byte offset */ - assert(!(o & 0xf00ff0ff)); + ASSERT(!(o & 0xf00ff0ff)); if (r0 & 1) { - assert(!(o & ARM_V_F64)); + ASSERT(!(o & ARM_V_F64)); o |= ARM_V_D; } - r0 = vfp_regno(r0); + r0 >>= 1; emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r1)<<16)|(_u4(r0)<<12)|_u8(i0)); } static void -vorsl(jit_state_t *_jit, int o, int r0, int r1, int i0) -{ - assert(!(o & 0xf00ff0ff)); - /* save i0 double precision registers */ - if (o & ARM_V_F64) i0 <<= 1; - /* if (r1 & 1) cc & ARM_V_F64 must be false */ - if (r1 & 1) o |= ARM_V_D; r1 = vfp_regno(r1); - assert(i0 && !(i0 & 1) && r1 + i0 <= 32); - emit_wide_thumb(_jit, ARM_CC_AL|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0)); -} - -static void -VADD_F32(jit_state_t *_jit, int32_t r0,r1,r2) +VADD_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { vosss(_jit,ARM_VADD_F,r0,r1,r2); } static void -VADD_F64(jit_state_t *_jit, int32_t r0,r1,r2) +VADD_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { voddd(_jit,ARM_VADD_F|ARM_V_F64,r0,r1,r2); } static void -VSUB_F32(jit_state_t *_jit, int32_t r0,r1,r2) +VSUB_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { vosss(_jit,ARM_VSUB_F,r0,r1,r2); } static void -VSUB_F64(jit_state_t *_jit, int32_t r0,r1,r2) +VSUB_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { voddd(_jit,ARM_VSUB_F|ARM_V_F64,r0,r1,r2); } static void -VMUL_F32(jit_state_t *_jit, int32_t r0,r1,r2) +VMUL_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { vosss(_jit,ARM_VMUL_F,r0,r1,r2); } static void -VMUL_F64(jit_state_t *_jit, int32_t r0,r1,r2) +VMUL_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { voddd(_jit,ARM_VMUL_F|ARM_V_F64,r0,r1,r2); } static void -VDIV_F32(jit_state_t *_jit, int32_t r0,r1,r2) +VDIV_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { vosss(_jit,ARM_VDIV_F,r0,r1,r2); } static void -VDIV_F64(jit_state_t *_jit, int32_t r0,r1,r2) +VDIV_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { voddd(_jit,ARM_VDIV_F|ARM_V_F64,r0,r1,r2); } static void -VABS_F32(jit_state_t *_jit, int32_t r0,r1) +VABS_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VABS_F,r0,r1); } static void -VABS_F64(jit_state_t *_jit, int32_t r0,r1) +VABS_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_dd(_jit,ARM_VABS_F|ARM_V_F64,r0,r1); } static void -VNEG_F32(jit_state_t *_jit, int32_t r0,r1) +VNEG_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VNEG_F,r0,r1); } static void -VNEG_F64(jit_state_t *_jit, int32_t r0,r1) +VNEG_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_dd(_jit,ARM_VNEG_F|ARM_V_F64,r0,r1); } static void -VSQRT_F32(jit_state_t *_jit, int32_t r0,r1) +VSQRT_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VSQRT_F,r0,r1); } static void -VSQRT_F64(jit_state_t *_jit, int32_t r0,r1) +VSQRT_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_dd(_jit,ARM_VSQRT_F|ARM_V_F64,r0,r1); } static void -VMOV_F32(jit_state_t *_jit, int32_t r0,r1) +VMOV_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VMOV_F,r0,r1); } static void -VMOV_F64(jit_state_t *_jit, int32_t r0,r1) +VMOV_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_dd(_jit,ARM_VMOV_F|ARM_V_F64,r0,r1); } static void -VMOV_AA_D(jit_state_t *_jit, int32_t r0,r1,r2) -{ - vorrd(_jit,ARM_VMOV_AA_D,r0,r1,r2); -} - -static void -VMOV_D_AA(jit_state_t *_jit, int32_t r0,r1,r2) +VMOV_D_AA(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { vorrd(_jit,ARM_VMOV_D_AA,r1,r2,r0); } static void -VMOV_A_S(jit_state_t *_jit, int32_t r0,r1) -{ - vors_(_jit,ARM_VMOV_A_S,r0,r1); -} - -static void -VMOV_S_A(jit_state_t *_jit, int32_t r0,r1) +VMOV_S_A(jit_state_t *_jit, int32_t r0, int32_t r1) { vors_(_jit,ARM_VMOV_S_A,r1,r0); } static void -VCMP_F32(jit_state_t *_jit, int32_t r0,r1) +VCMP_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCMP,r0,r1); } static void -VCMP_F64(jit_state_t *_jit, int32_t r0,r1) +VCMP_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_dd(_jit,ARM_VCMP|ARM_V_F64,r0,r1); } static void -VMRS(jit_state_t *_jit, int32_t r0) +VMRS(jit_state_t *_jit) { - vorr_(_jit,ARM_VMRS,r0,0); + emit_wide_thumb(_jit, ARM_CC_AL|ARM_VMRS|(0xf<<12)); } static void -VCVT_S32_F32(jit_state_t *_jit, int32_t r0,r1) +VCVT_S32_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCVT_S32_F32,r0,r1); } static void -VCVT_S32_F64(jit_state_t *_jit, int32_t r0,r1) +VCVT_S32_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCVT_S32_F64,r0,r1); } static void -VCVT_F32_S32(jit_state_t *_jit, int32_t r0,r1) +VCVT_F32_S32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCVT_F32_S32,r0,r1); } static void -VCVT_F64_S32(jit_state_t *_jit, int32_t r0,r1) +VCVT_F64_S32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCVT_F64_S32,r0,r1); } static void -VCVT_F32_F64(jit_state_t *_jit, int32_t r0,r1) +VCVT_F32_F64(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCVT_F32_F64,r0,r1); } static void -VCVT_F64_F32(jit_state_t *_jit, int32_t r0,r1) +VCVT_F64_F32(jit_state_t *_jit, int32_t r0, int32_t r1) { vo_ss(_jit,ARM_VCVT_F64_F32,r0,r1); } static void -VMOV_A_S32(jit_state_t *_jit, int32_t r0,r1) +VMOV_A_S32(jit_state_t *_jit, int32_t r0, int32_t r1) { vori_(_jit,ARM_VMOV_A_D,r0,r1); } static void -VMOV_V_I32(jit_state_t *_jit, int32_t r0,r1) +VMOV_V_I32(jit_state_t *_jit, int32_t r0, int32_t r1) { vori_(_jit,ARM_VMOV_D_A,r1,r0); } /* "oi" should be the result of encode_vfp_double */ static void -VIMM(jit_state_t *_jit, int32_t oi,r0) +VIMM(jit_state_t *_jit, int32_t oi, int32_t r0) { vodi(_jit, oi,r0); } /* index is multipled by four */ static void -VLDRN_F32(jit_state_t *_jit, int32_t r0,r1,i0) +VLDRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VLDR,r0,r1,i0); } static void -VLDR_F32(jit_state_t *_jit, int32_t r0,r1,i0) +VLDR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VLDR|ARM_P,r0,r1,i0); } static void -VLDRN_F64(jit_state_t *_jit, int32_t r0,r1,i0) +VLDRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VLDR|ARM_V_F64,r0,r1,i0); } static void -VLDR_F64(jit_state_t *_jit, int32_t r0,r1,i0) +VLDR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0); } static void -VSTRN_F32(jit_state_t *_jit, int32_t r0,r1,i0) +VSTRN_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VSTR,r0,r1,i0); } static void -VSTR_F32(jit_state_t *_jit, int32_t r0,r1,i0) +VSTR_F32(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VSTR|ARM_P,r0,r1,i0); } static void -VSTRN_F64(jit_state_t *_jit, int32_t r0,r1,i0) +VSTRN_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VSTR|ARM_V_F64,r0,r1,i0); } static void -VSTR_F64(jit_state_t *_jit, int32_t r0,r1,i0) +VSTR_F64(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) { vldst(_jit,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0); } static void -vfp_absr_f(jit_state_t *_jit, int32_t r0,r1) +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { VABS_F32(_jit, r0,r1); } static void -vfp_absr_d(jit_state_t *_jit, int32_t r0,r1) +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { VABS_F64(_jit, r0,r1); } static void -vfp_negr_f(jit_state_t *_jit, int32_t r0,r1) +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { VNEG_F32(_jit, r0,r1); } static void -vfp_negr_d(jit_state_t *_jit, int32_t r0,r1) +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { VNEG_F64(_jit, r0,r1); } static void -vfp_sqrtr_f(jit_state_t *_jit, int32_t r0,r1) +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { VSQRT_F32(_jit, r0,r1); } static void -vfp_sqrtr_d(jit_state_t *_jit, int32_t r0,r1) +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { VSQRT_F64(_jit, r0,r1); } static void -vfp_addr_f(jit_state_t *_jit, int32_t r0,r1,r2) +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VADD_F32(_jit, r0,r1,r2); } static void -vfp_addr_d(jit_state_t *_jit, int32_t r0,r1,r2) +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VADD_F64(_jit, r0,r1,r2); } static void -vfp_subr_f(jit_state_t *_jit, int32_t r0,r1,r2) +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VSUB_F32(_jit, r0,r1,r2); } static void -vfp_subr_d(jit_state_t *_jit, int32_t r0,r1,r2) +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VSUB_F64(_jit, r0,r1,r2); } static void -vfp_mulr_f(jit_state_t *_jit, int32_t r0,r1,r2) +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VMUL_F32(_jit, r0,r1,r2); } static void -vfp_mulr_d(jit_state_t *_jit, int32_t r0,r1,r2) +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VMUL_F64(_jit, r0,r1,r2); } static void -vfp_divr_f(jit_state_t *_jit, int32_t r0,r1,r2) +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VDIV_F32(_jit, r0,r1,r2); } static void -vfp_divr_d(jit_state_t *_jit, int32_t r0,r1,r2) +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { VDIV_F64(_jit, r0,r1,r2); } +static void +cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCMP_F32(_jit, r0, r1); +} + +static void +cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCMP_F64(_jit, r0, r1); +} + static jit_reloc_t -vfp_bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +vbcmp_x(jit_state_t *_jit, int cc) +{ + VMRS(_jit); + return T2_CC_B(_jit, cc); +} + +static jit_reloc_t +vbcmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return vbcmp_x(_jit, cc); +} + +static jit_reloc_t +vbcmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return vbcmp_x(_jit, cc); +} + +static jit_reloc_t +vbncmp_x(jit_state_t *_jit, int cc) +{ + VMRS(_jit); + jit_reloc_t cont = T2_CC_B(_jit, cc); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, cont); + return ret; +} + +static jit_reloc_t +vbncmp_f(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return vbncmp_x(_jit, cc); +} + +static jit_reloc_t +vbncmp_d(jit_state_t *_jit, int cc, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return vbncmp_x(_jit, cc); +} + +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_MI, r0, r1); } static jit_reloc_t -vfp_bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_MI, r0, r1); } static jit_reloc_t -vfp_bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_LS, r0, r1); } static jit_reloc_t -vfp_bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_LS, r0, r1); } static jit_reloc_t -vfp_beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_EQ, r0, r1); } static jit_reloc_t -vfp_beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_EQ, r0, r1); } static jit_reloc_t -vfp_bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_GE, r0, r1); } static jit_reloc_t -vfp_bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_GE, r0, r1); } static jit_reloc_t -vfp_bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_GT, r0, r1); } static jit_reloc_t -vfp_bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_GT, r0, r1); } static jit_reloc_t -vfp_bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_NE, r0, r1); } static jit_reloc_t -vfp_bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_NE, r0, r1); } static jit_reloc_t -vfp_bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbncmp_f(_jit, ARM_CC_GE, r0, r1); } static jit_reloc_t -vfp_bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbncmp_d(_jit, ARM_CC_GE, r0, r1); } static jit_reloc_t -vfp_bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbncmp_f(_jit, ARM_CC_GT, r0, r1); } static jit_reloc_t -vfp_bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbncmp_d(_jit, ARM_CC_GT, r0, r1); } static jit_reloc_t -vfp_bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_HI, r0, r1); } static jit_reloc_t -vfp_bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_HI, r0, r1); } static jit_reloc_t -vfp_bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_VC, r0, r1); } static jit_reloc_t -vfp_bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_VC, r0, r1); } static jit_reloc_t -vfp_bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_f(_jit, ARM_CC_VS, r0, r1); } static jit_reloc_t -vfp_bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { return vbcmp_d(_jit, ARM_CC_VS, r0, r1); } +static jit_reloc_t +buneqr_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS); + jit_reloc_t b = T2_CC_B(_jit, ARM_CC_NE); + jit_patch_here(_jit, a); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, b); + return ret; +} + +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return buneqr_x(_jit); +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return buneqr_x(_jit); +} + +static jit_reloc_t +bunger_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_MI); + jit_reloc_t ret = T2_CC_B(_jit, ARM_CC_HS); + jit_patch_here(_jit, a); + return ret; +} + +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return bunger_x(_jit); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return bunger_x(_jit); +} + +static jit_reloc_t +bltgtr_x(jit_state_t *_jit) +{ + VMRS(_jit); + jit_reloc_t a = T2_CC_B(_jit, ARM_CC_VS); + jit_reloc_t b = T2_CC_B(_jit, ARM_CC_EQ); + jit_reloc_t ret = T2_B(_jit); + jit_patch_here(_jit, a); + jit_patch_here(_jit, b); + return ret; +} + +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_f(_jit, r0, r1); + return bltgtr_x(_jit); +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + cmp_d(_jit, r0, r1); + return bltgtr_x(_jit); +} + static void -vfp_ldr_f(jit_state_t *_jit, int32_t r0,r1) +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { VLDR_F32(_jit, r0,r1,0); } static void -vfp_ldr_d(jit_state_t *_jit, int32_t r0,r1) +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { VLDR_F64(_jit, r0,r1,0); } static void -vfp_str_f(jit_state_t *_jit, int32_t r0,r1) +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) { VSTR_F32(_jit, r1,r0,0); } static void -vfp_str_d(jit_state_t *_jit, int32_t r0,r1) +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) { VSTR_F64(_jit, r1,r0,0); } static void -vfp_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 != r1) { - if (jit_fpr_p(r1)) { - if (jit_fpr_p(r0)) - VMOV_F32(r0, r1); - else - VMOV_A_S(r0, r1); - } - else if (jit_fpr_p(r0)) - VMOV_S_A(r0, r1); - else - movr(r0, r1); - } + if (r0 != r1) + VMOV_F32(_jit, r0, r1); } static void -vfp_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 != r1) { - if (jit_fpr_p(r1)) { - if (jit_fpr_p(r0)) - VMOV_F64(r0, r1); - else - VMOV_AA_D(r0, r0 + 1, r1); - } - else if (jit_fpr_p(r0)) - VMOV_D_AA(r0, r1, r1 + 1); - else { - /* minor consistency check */ - assert(r0 + 1 != r1 && r0 -1 != r1); - movr(r0, r1); - movr(r0 + 1, r1 + 1); - } - } + if (r0 != r1) + VMOV_F64(_jit, r0, r1); } static int @@ -831,7 +846,7 @@ encode_vfp_double(int mov, int inv, unsigned lo, unsigned hi) fail: /* need another approach (load from memory, move from arm register, etc) */ - return (-1); + return -1; success: code = inv ? ARM_VMVNI : ARM_VMOVI; @@ -843,19 +858,19 @@ success: break; case 0x1: case 0x3: case 0x5: case 0x7: /* should actually not reach here */ - assert(!inv); + ASSERT(!inv); case 0x9: case 0xb: - assert(!mov); + ASSERT(!mov); break; case 0xc: case 0xd: /* should actually not reach here */ - assert(inv); + ASSERT(inv); case 0xe: - assert(mode & 0x20); - assert(mov && !inv); + ASSERT(mode & 0x20); + ASSERT(mov && !inv); break; default: - assert(!(mode & 0x20)); + ASSERT(!(mode & 0x20)); break; } imm = ((imm & 0x80) << 17) | ((imm & 0x70) << 12) | (imm & 0x0f); @@ -866,678 +881,288 @@ success: else code |= 0xef000000; - return (code); + return code; } static void -_vfp_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) +movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) { - union { - int32_t i; - jit_float32_t f; - } u; - int32_t reg; - int32_t code; - u.f = i0; - if (jit_fpr_p(r0)) { - /* float arguments are packed, for others, - * lightning only address even registers */ - if (!(r0 & 1) && (r0 - 16) >= 0 && - ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 || - (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1)) - VIMM(code, r0); + union { int32_t i; jit_float32_t f; } u = { .f = i0 }; + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), u.i); + VMOV_S_A(_jit, r0, jit_gpr_regno(reg)); + unget_temp_gpr(_jit); +} + +static void +movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) +{ + union { int32_t i[2]; jit_float64_t d; } u = { .d = i0 }; + int32_t code; + if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 || + (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1) + VIMM(_jit, code, r0); + else { + jit_gpr_t rg0 = get_temp_gpr(_jit); + jit_gpr_t rg1 = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(rg0), u.i[0]); + movi(_jit, jit_gpr_regno(rg1), u.i[1]); + VMOV_D_AA(_jit, r0, jit_gpr_regno(rg0), jit_gpr_regno(rg1)); + unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + } +} + +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCVT_F64_F32(_jit, r0, r1); +} + +static void +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VCVT_F32_F64(_jit, r0, r1); +} + +static void +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VMOV_V_I32(_jit, r0, r1); + VCVT_F32_S32(_jit, r0, r0); +} + +static void +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + VMOV_V_I32(_jit, r0, r1); + VCVT_F64_S32(_jit, r0, r0); +} + +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + VCVT_S32_F32(_jit, jit_fpr_regno(reg), r1); + VMOV_A_S32(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); +} + +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + jit_fpr_t reg = get_temp_fpr(_jit); + VCVT_S32_F64(_jit, jit_fpr_regno(reg), r1); + VMOV_A_S32(_jit, r0, jit_fpr_regno(reg)); + unget_temp_fpr(_jit); +} + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t gpr = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(gpr), i0); + VLDR_F32(_jit, r0, jit_gpr_regno(gpr), 0); + unget_temp_gpr(_jit); +} + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r1, r2); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r1, r2); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); +} + +static void +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDR_F32(_jit, r0, r1, i0 >> 2); else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), u.i); - VMOV_S_A(r0, rn(reg)); - jit_unget_reg(reg); + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } - else - movi(r0, u.i); -} - -static void -_vfp_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) -{ - union { - int32_t i[2]; - jit_float64_t d; - } u; - int32_t code; - int32_t rg0, rg1; - u.d = i0; - if (jit_fpr_p(r0)) { - if ((code = encode_vfp_double(1, 0, u.i[0], u.i[1])) != -1 || - (code = encode_vfp_double(1, 1, ~u.i[0], ~u.i[1])) != -1) - VIMM(code, r0); + else { + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDRN_F32(_jit, r0, r1, i0 >> 2); else { - rg0 = jit_get_reg(jit_class_gpr); - rg1 = jit_get_reg(jit_class_gpr); - movi(rn(rg0), u.i[0]); - movi(rn(rg1), u.i[1]); - VMOV_D_AA(r0, rn(rg0), rn(rg1)); - jit_unget_reg(rg1); - jit_unget_reg(rg0); + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } - else { - movi(r0, u.i[0]); - movi(r0 + 1, u.i[1]); - } } static void -_vfp_extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (jit_fpr_p(r1)) { - if (jit_fpr_p(r0)) - VCVT_F64_F32(r0, r1); + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDR_F64(_jit, r0, r1, i0 >> 2); else { - reg = jit_get_reg(jit_class_fpr); - VCVT_F64_F32(rn(reg), r1); - VMOV_A_S(r0, rn(reg)); - jit_unget_reg(reg); + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } else { - reg = jit_get_reg(jit_class_fpr); - VMOV_S_A(rn(reg), r1); - VCVT_F64_F32(rn(reg), rn(reg)); - if (jit_fpr_p(r0)) - VMOV_F32(r0, rn(reg)); - else - VMOV_A_S(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_vfp_extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - int32_t reg; - if (jit_fpr_p(r1)) { - if (jit_fpr_p(r0)) - VCVT_F32_F64(r0, r1); + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VLDRN_F64(_jit, r0, r1, i0 >> 2); else { - reg = jit_get_reg(jit_class_fpr); - VCVT_F32_F64(rn(reg), r1); - VMOV_AA_D(r0, r0 + 1, rn(reg)); - jit_unget_reg(reg); + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r1, i0); + VLDR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } - else { - reg = jit_get_reg(jit_class_fpr); - VMOV_D_AA(rn(reg), r1, r1 + 1); - VCVT_F32_F64(rn(reg), rn(reg)); - if (jit_fpr_p(r0)) - VMOV_F64(r0, rn(reg)); - else - VMOV_AA_D(r0, r0 + 1, rn(reg)); - jit_unget_reg(reg); - } } static void -_vfp_extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) { - int32_t reg; - if (jit_fpr_p(r0)) { - VMOV_V_I32(r0, r1); - VCVT_F32_S32(r0, r0); - } - else { - reg = jit_get_reg(jit_class_fpr); - VMOV_V_I32(rn(reg), r1); - VCVT_F32_S32(rn(reg), rn(reg)); - VMOV_F32(r0, rn(reg)); - jit_unget_reg(reg); - } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VSTR_F32(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } static void -_vfp_extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) { - int32_t reg; - if (jit_fpr_p(r0)) { - VMOV_V_I32(r0, r1); - VCVT_F64_S32(r0, r0); - } - else { - reg = jit_get_reg(jit_class_fpr); - VMOV_V_I32(rn(reg), r1); - VCVT_F64_S32(rn(reg), rn(reg)); - VMOV_F64(r0, rn(reg)); - jit_unget_reg(reg); - } + jit_gpr_t reg = get_temp_gpr(_jit); + movi(_jit, jit_gpr_regno(reg), i0); + VSTR_F64(_jit, r0, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } static void -_vfp_truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t reg; - reg = jit_get_reg(jit_class_fpr); - if (jit_fpr_p(r1)) - VCVT_S32_F32(rn(reg), r1); - else { - VMOV_V_I32(rn(reg), r1); - VCVT_S32_F32(rn(reg), rn(reg)); - } - VMOV_A_S32(r0, rn(reg)); - jit_unget_reg(reg); + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r0, r1); + VSTR_F32(_jit, r2, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } static void -_vfp_truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t reg; - reg = jit_get_reg(jit_class_fpr); - if (jit_fpr_p(r1)) - VCVT_S32_F64(rn(reg), r1); - else { - VMOV_V_I32(rn(reg), r1); - VCVT_S32_F64(rn(reg), rn(reg)); - } - VMOV_A_S32(r0, rn(reg)); - jit_unget_reg(reg); + jit_gpr_t reg = get_temp_gpr(_jit); + addr(_jit, jit_gpr_regno(reg), r0, r1); + VSTR_F64(_jit, r2, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } static void -_vfp_cmp_f(jit_state_t *_jit, int32_t r0, int32_t r1) +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { - int32_t rg0, rg1; - if (jit_fpr_p(r0)) { - if (jit_fpr_p(r1)) - VCMP_F32(r0, r1); + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTR_F32(_jit, r1, r0, i0 >> 2); else { - rg1 = jit_get_reg(jit_class_fpr); - VMOV_S_A(rn(rg1), r1); - VCMP_F32(r0, rn(rg1)); - jit_unget_reg(rg1); + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } else { - rg0 = jit_get_reg(jit_class_fpr); - VMOV_S_A(rn(rg0), r0); - if (jit_fpr_p(r1)) - VCMP_F32(rn(rg0), r1); + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTRN_F32(_jit, r1, r0, i0 >> 2); else { - rg1 = jit_get_reg(jit_class_fpr); - VMOV_S_A(rn(rg1), r1); - VCMP_F32(rn(rg0), rn(rg1)); - jit_unget_reg(rg1); + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F32(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } - jit_unget_reg(rg0); } } static void -_vfp_cmp_d(jit_state_t *_jit, int32_t r0, int32_t r1) +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { - int32_t rg0, rg1; - if (jit_fpr_p(r0)) { - if (jit_fpr_p(r1)) - VCMP_F64(r0, r1); + if (i0 >= 0) { + ASSERT(!(i0 & 3)); + if (i0 < 0124) + VSTR_F64(_jit, r1, r0, i0 >> 2); else { - rg1 = jit_get_reg(jit_class_fpr); - VMOV_D_AA(rn(rg1), r1, r1 + 1); - VCMP_F64(r0, rn(rg1)); - jit_unget_reg(rg1); + jit_gpr_t reg = get_temp_gpr(_jit); + addi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } else { - rg0 = jit_get_reg(jit_class_fpr); - VMOV_D_AA(rn(rg0), r0, r0 + 1); - if (jit_fpr_p(r1)) - VCMP_F64(rn(rg0), r1); + i0 = -i0; + ASSERT(!(i0 & 3)); + if (i0 < 1024) + VSTRN_F64(_jit, r1, r0, i0 >> 2); else { - rg1 = jit_get_reg(jit_class_fpr); - VMOV_D_AA(rn(rg1), r1, r1 + 1); - VCMP_F64(rn(rg0), rn(rg1)); - jit_unget_reg(rg1); - } - jit_unget_reg(rg0); - } -} - -static jit_word_t -_vbcmp_x(jit_state_t *_jit, int cc, jit_word_t i0) -{ - jit_word_t d, w; - VMRS(_R15_REGNO); - w = _jit->pc.w; - - d = ((i0 - w) >> 1) - 2; - assert(_s20P(d)); - T2_CC_B(cc, encode_thumb_cc_jump(d)); - - return (w); -} - - -static jit_word_t -_vbcmp_f(jit_state_t *_jit, int cc, - jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_f(r0, r1); - return (vbcmp_x(cc, i0)); -} - -static jit_word_t -_vbcmp_d(jit_state_t *_jit, int cc, - jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_d(r0, r1); - return (vbcmp_x(cc, i0)); -} - -static jit_word_t -_vbncmp_x(jit_state_t *_jit, int cc, jit_word_t i0) -{ - jit_word_t d, p, w; - VMRS(_R15_REGNO); - p = _jit->pc.w; - - T2_CC_B(cc, 0); - w = _jit->pc.w; - d = ((i0 - w) >> 1) - 2; - assert(_s20P(d)); - T2_B(encode_thumb_jump(d)); - - patch_at(arm_patch_jump, p, _jit->pc.w); - return (w); -} - -static jit_word_t -_vbncmp_f(jit_state_t *_jit, int cc, - jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_f(r0, r1); - return (vbncmp_x(cc, i0)); -} - -static jit_word_t -_vbncmp_d(jit_state_t *_jit, int cc, - jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_d(r0, r1); - return (vbncmp_x(cc, i0)); -} - -static jit_word_t -_vfp_buneqr_x(jit_state_t *_jit, jit_word_t i0) -{ - jit_word_t d, p, q, w; - VMRS(_R15_REGNO); - p = _jit->pc.w; - - T2_CC_B(ARM_CC_VS, 0); - q = _jit->pc.w; - T2_CC_B(ARM_CC_NE, 0); - patch_at(arm_patch_jump, p, _jit->pc.w); - w = _jit->pc.w; - d = ((i0 - w) >> 1) - 2; - assert(_s20P(d)); - T2_B(encode_thumb_jump(d)); - - patch_at(arm_patch_jump, q, _jit->pc.w); - return (w); -} - -static jit_word_t -_vfp_buneqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_f(r0, r1); - return (vfp_buneqr_x(i0)); -} - -static jit_word_t -_vfp_buneqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_d(r0, r1); - return (vfp_buneqr_x(i0)); -} - -static jit_word_t -_vfp_bunger_x(jit_state_t *_jit, jit_word_t i0) -{ - jit_word_t d, p, w; - VMRS(_R15_REGNO); - p = _jit->pc.w; - - T2_CC_B(ARM_CC_MI, 0); - w = _jit->pc.w; - d = ((i0 - w) >> 1) - 2; - assert(_s20P(d)); - T2_CC_B(ARM_CC_HS, encode_thumb_cc_jump(d)); - - patch_at(arm_patch_jump, p, _jit->pc.w); - return (w); -} - -static jit_word_t -_vfp_bunger_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_f(r0, r1); - return (vfp_bunger_x(i0)); -} - -static jit_word_t -_vfp_bunger_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_d(r0, r1); - return (vfp_bunger_x(i0)); -} - -static jit_word_t -_vfp_bltgtr_x(jit_state_t *_jit, jit_word_t i0) -{ - jit_word_t d, p, q, w; - VMRS(_R15_REGNO); - p = _jit->pc.w; - - T2_CC_B(ARM_CC_VS, 0); - q = _jit->pc.w; - T2_CC_B(ARM_CC_EQ, 0); - w = _jit->pc.w; - d = ((i0 - w) >> 1) - 2; - assert(_s20P(d)); - T2_B(encode_thumb_jump(d)); - - patch_at(arm_patch_jump, p, _jit->pc.w); - patch_at(arm_patch_jump, q, _jit->pc.w); - return (w); -} - -static jit_word_t -_vfp_bltgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_f(r0, r1); - return (vfp_bltgtr_x(i0)); -} - -static jit_word_t -_vfp_bltgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - vfp_cmp_d(r0, r1); - return (vfp_bltgtr_x(i0)); -} - -static void -_vfp_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ - int32_t gpr; - if (jit_fpr_p(r0)) { - gpr = jit_get_reg(jit_class_gpr); - movi(rn(gpr), i0); - VLDR_F32(r0, rn(gpr), 0); - jit_unget_reg(gpr); - } - else - ldi_i(r0, i0); -} - -static void -_vfp_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - if (jit_fpr_p(r0)) - VLDR_F64(r0, rn(reg), 0); - else { - ldr_i(r0, rn(reg)); - ldxi_i(r0 + 1, rn(reg), 4); - } - jit_unget_reg(reg); -} - -static void -_vfp_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (jit_fpr_p(r0)) { - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - VLDR_F32(r0, rn(reg), 0); - jit_unget_reg(reg); - } - else - ldxr_i(r0, r1, r2); -} - -static void -_vfp_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - if (jit_fpr_p(r0)) - VLDR_F64(r0, rn(reg), 0); - else { - ldr_i(r0, rn(reg)); - ldxi_i(r0 + 1, rn(reg), 4); - } - jit_unget_reg(reg); -} - -static void -_vfp_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - if (jit_fpr_p(r0)) { - if (i0 >= 0) { - assert(!(i0 & 3)); - if (i0 < 1024) - VLDR_F32(r0, r1, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - VLDR_F32(r0, rn(reg), 0); - jit_unget_reg(reg); - } - } - else { - i0 = -i0; - assert(!(i0 & 3)); - if (i0 < 1024) - VLDRN_F32(r0, r1, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - subi(rn(reg), r1, i0); - VLDR_F32(r0, rn(reg), 0); - jit_unget_reg(reg); - } + jit_gpr_t reg = get_temp_gpr(_jit); + subi(_jit, jit_gpr_regno(reg), r0, i0); + VSTR_F64(_jit, r1, jit_gpr_regno(reg), 0); + unget_temp_gpr(_jit); } } - else - ldxi_i(r0, r1, i0); } static void -_vfp_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +retr_d(jit_state_t *_jit, int32_t r) { - int32_t reg; - if (jit_fpr_p(r0)) { - if (i0 >= 0) { - assert(!(i0 & 3)); - if (i0 < 1024) - VLDR_F64(r0, r1, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - VLDR_F64(r0, rn(reg), 0); - jit_unget_reg(reg); - } - } - else { - i0 = -i0; - assert(!(i0 & 3)); - if (i0 < 1024) - VLDRN_F64(r0, r1, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - subi(rn(reg), r1, i0); - VLDR_F64(r0, rn(reg), 0); - jit_unget_reg(reg); - } - } - } - else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_i(r0, rn(reg)); - ldxi_i(r0 + 1, rn(reg), 4); - jit_unget_reg(reg); - } + movr_d(_jit, jit_fpr_regno(_D0), r); + ret(_jit); } static void -_vfp_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +retr_f(jit_state_t *_jit, int32_t r) { - int32_t reg; - if (jit_fpr_p(r0)) { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - VSTR_F32(r0, rn(reg), 0); - jit_unget_reg(reg); - } - else - sti_i(i0, r0); + movr_f(_jit, jit_fpr_regno(_S0), r); + ret(_jit); } static void -_vfp_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +retval_f(jit_state_t *_jit, int32_t r0) { - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - if (jit_fpr_p(r0)) - VSTR_F64(r0, rn(reg), 0); - else { - str_i(rn(reg), r0); - stxi_i(4, rn(reg), r0 + 1); - } - jit_unget_reg(reg); + movr_f(_jit, r0, jit_fpr_regno(_S0)); } static void -_vfp_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +retval_d(jit_state_t *_jit, int32_t r0) { - int32_t reg; - if (jit_fpr_p(r2)) { - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - VSTR_F32(r2, rn(reg), 0); - jit_unget_reg(reg); - } - else - stxr_i(r0, r1, r2); -} - -static void -_vfp_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - if (jit_fpr_p(r2)) - VSTR_F64(r2, rn(reg), 0); - else { - str_i(rn(reg), r2); - stxi_i(4, rn(reg), r2 + 1); - } - jit_unget_reg(reg); -} - -static void -_vfp_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - int32_t reg; - if (jit_fpr_p(r1)) { - if (i0 >= 0) { - assert(!(i0 & 3)); - if (i0 < 1024) - VSTR_F32(r1, r0, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r0, i0); - VSTR_F32(r1, rn(reg), 0); - jit_unget_reg(reg); - } - } - else { - i0 = -i0; - assert(!(i0 & 3)); - if (i0 < 1024) - VSTRN_F32(r1, r0, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - subi(rn(reg), r0, i0); - VSTR_F32(r1, rn(reg), 0); - jit_unget_reg(reg); - } - } - } - else - stxi_i(i0, r0, r1); -} - -static void -_vfp_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - int32_t reg; - if (jit_fpr_p(r1)) { - if (i0 >= 0) { - assert(!(i0 & 3)); - if (i0 < 0124) - VSTR_F64(r1, r0, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r0, i0); - VSTR_F64(r1, rn(reg), 0); - jit_unget_reg(reg); - } - } - else { - i0 = -i0; - assert(!(i0 & 3)); - if (i0 < 1024) - VSTRN_F64(r1, r0, i0 >> 2); - else { - reg = jit_get_reg(jit_class_gpr); - subi(rn(reg), r0, i0); - VSTR_F64(r1, rn(reg), 0); - jit_unget_reg(reg); - } - } - } - else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r0, i0); - str_i(rn(reg), r1); - stxi_i(4, rn(reg), r1 + 1); - jit_unget_reg(reg); - } -} - -static void -_vfp_vaarg_d(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - int32_t reg; - - assert(_jitc->function->self.call & jit_call_varargs); - - /* Adjust pointer. */ - reg = jit_get_reg(jit_class_gpr); - andi(rn(reg), r1, 7); - addr(r1, r1, rn(reg)); - jit_unget_reg(reg); - - /* Load argument. */ - vfp_ldr_d(r0, r1); - - /* Update stack pointer. */ - addi(r1, r1, sizeof(jit_float64_t)); + movr_d(_jit, r0, jit_fpr_regno(_D0)); } diff --git a/lightening/arm.h b/lightening/arm.h index b4f64667d..8db672ea0 100644 --- a/lightening/arm.h +++ b/lightening/arm.h @@ -99,13 +99,13 @@ #define JIT_V0 _R4 #define JIT_V1 _R5 #define JIT_V2 _R6 -#define JIT_V3 _R7 -#define JIT_V4 _R8 -#define JIT_V5 _R9 -#define JIT_V6 _R10 -#define JIT_V7 _R11 +#define JIT_TMP1 _R7 +#define JIT_V3 _R8 +#define JIT_V4 _R9 +#define JIT_V5 _R10 +#define JIT_V6 _R11 -#define _SP _R13 +#define JIT_SP _R13 #define _LR _R14 #define _PC _R15 @@ -127,7 +127,7 @@ #define JIT_VF6 _D14 #define JIT_VF7 _D15 -#define JIT_PLATFORM_CALLEE_SAVE_GPRS _LR +#define JIT_PLATFORM_CALLEE_SAVE_GPRS _LR, JIT_TMP1 #endif /* _jit_arm_h */ diff --git a/lightening/lightening.c b/lightening/lightening.c index e03dbda4c..2a1d282a6 100644 --- a/lightening/lightening.c +++ b/lightening/lightening.c @@ -1328,11 +1328,11 @@ emit_literal_pool(jit_state_t *_jit, enum guard_pool guard) switch (entry->reloc.kind) { case JIT_RELOC_JMP_WITH_VENEER: patch_jmp_offset((uint32_t*) loc, diff); - emit_veneer(_jit, (void*) entry->value); + emit_veneer(_jit, (void*) (uintptr_t) entry->value); break; case JIT_RELOC_JCC_WITH_VENEER: patch_jcc_offset((uint32_t*) loc, diff); - emit_veneer(_jit, (void*) entry->value); + emit_veneer(_jit, (void*) (uintptr_t) entry->value); break; case JIT_RELOC_LOAD_FROM_POOL: patch_load_from_pool_offset((uint32_t*) loc, diff);