From 2cea99361bfb2281eb4304b1a8b47b54c70bbbbc Mon Sep 17 00:00:00 2001 From: Paulo Andrade Date: Fri, 20 Apr 2018 10:37:37 -0300 Subject: [PATCH] Build and pass all tests on 32 and 64 bit sparc * include/lightning/jit_private.h: Add new register classes to flag float registers and double only registers, required for sparc64 where only low 32 bit fpr registers can be used for single precision operations. Add new 128 bit jit_regset_t type for sparc64 register set. * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc. * lib/lightning.c: Update for new jit_regset_t required for sparc64. --- ChangeLog | 13 + include/lightning/jit_private.h | 26 +- include/lightning/jit_sparc.h | 56 +- lib/jit_sparc-cpu.c | 1049 +++++++++++++++++++++++++++---- lib/jit_sparc-fpu.c | 867 ++++++++++++++++++++++--- lib/jit_sparc-sz.c | 405 +++++++++++- lib/jit_sparc.c | 470 +++++++++++++- lib/lightning.c | 114 ++++ 8 files changed, 2754 insertions(+), 246 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3ace35a11..19b3335f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2018-04-20 Paulo Andrade + + * include/lightning/jit_private.h: Add new register classes to + flag float registers and double only registers, required for sparc64 + where only low 32 bit fpr registers can be used for single precision + operations. + Add new 128 bit jit_regset_t type for sparc64 register set. + + * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc. + + * lib/lightning.c: Update for new jit_regset_t required for sparc64. + 2018-02-26 Paulo Andrade * check/lightning.c, include/lightning.h: Add the new jit_va_push diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 68b0571a3..f06f1c8a1 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -95,7 +95,14 @@ typedef jit_uint64_t jit_regset_t; # define JIT_SP _SP # define JIT_RET _I0 # define JIT_FRET _F0 +# if __WORDSIZE == 32 typedef jit_uint64_t jit_regset_t; +# else +typedef struct { + jit_uint64_t rl; + jit_uint64_t rh; +} jit_regset_t; +# endif #elif defined(__ia64__) # define JIT_SP _R12 # define JIT_RET _R8 @@ -217,6 +224,10 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_class_sft 0x01000000 /* not a hardware register */ #define jit_class_rg8 0x04000000 /* x86 8 bits */ #define jit_class_xpr 0x80000000 /* float / vector */ +/* Used on sparc64 where %f0-%f31 can be encode for single float + * but %f32 to %f62 only as double precision */ +#define jit_class_sng 0x10000000 /* Single precision float */ +#define jit_class_dbl 0x20000000 /* Only double precision float */ #define jit_regno_patch 0x00008000 /* this is a register * returned by a "user" call * to jit_get_reg() */ @@ -250,7 +261,7 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a2_flt 0x00200000 /* arg2 is immediate float */ #define jit_cc_a2_dbl 0x00400000 /* arg2 is immediate double */ -#if __ia64__ +#if __ia64__ || (__sparc__ && __WORDSIZE == 64) extern void jit_regset_com(jit_regset_t*, jit_regset_t*); @@ -286,10 +297,17 @@ jit_regset_setbit(jit_regset_t*, jit_int32_t); extern jit_bool_t jit_regset_tstbit(jit_regset_t*, jit_int32_t); -# define jit_regset_new(set) \ +# if __sparc__ && __WORDSIZE == 64 +# define jit_regset_new(set) \ + do { (set)->rl = (set)->rh = 0; } while (0) +# define jit_regset_del(set) \ + do { (set)->rl = (set)->rh = 0; } while (0) +# else +# define jit_regset_new(set) \ do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0) -# define jit_regset_del(set) \ +# define jit_regset_del(set) \ do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0) +# endif #else # define jit_regset_com(u, v) (*(u) = ~*(v)) # define jit_regset_and(u, v, w) (*(u) = *(v) & *(w)) @@ -457,7 +475,7 @@ struct jit_compiler { jit_int32_t rout; /* first output register */ jit_int32_t breg; /* base register for prolog/epilog */ #endif -#if __mips__ || __ia64__ || __alpha__ +#if __mips__ || __ia64__ || __alpha__ || (__sparc__ && __WORDSIZE == 64) jit_int32_t carry; #define jit_carry _jitc->carry #endif diff --git a/include/lightning/jit_sparc.h b/include/lightning/jit_sparc.h index a16f140da..f74f5ff30 100644 --- a/include/lightning/jit_sparc.h +++ b/include/lightning/jit_sparc.h @@ -32,8 +32,13 @@ typedef enum { #define jit_r_num() 3 #define jit_v(i) (_L0 + (i)) #define jit_v_num() 8 -#define jit_f(i) (_F0 + ((i) << 1)) -#define jit_f_num() 8 +#if __WORDSIZE == 32 +# define jit_f(i) (_F0 + ((i) << 1)) +# define jit_f_num() 8 +#else +# define jit_f(i) (_F32 - (i)) +# define jit_f_num() 16 +#endif #define JIT_R0 _G2 #define JIT_R1 _G3 #define JIT_R2 _G4 @@ -49,16 +54,47 @@ typedef enum { _O0, _O1, _O2, _O3, _O4, _O5, _SP, _O7, _L0, _L1, _L2, _L3, _L4, _L5, _L6, _L7, _I0, _I1, _I2, _I3, _I4, _I5, _FP, _I7, -#define JIT_F0 _F0 -#define JIT_F1 _F2 -#define JIT_F2 _F4 -#define JIT_F3 _F6 -#define JIT_F4 _F8 -#define JIT_F5 _F10 -#define JIT_F6 _F12 -#define JIT_F7 _F14 +#if __WORDSIZE == 32 +# define JIT_F0 _F0 +# define JIT_F1 _F2 +# define JIT_F2 _F4 +# define JIT_F3 _F6 +# define JIT_F4 _F8 +# define JIT_F5 _F10 +# define JIT_F6 _F12 +# define JIT_F7 _F14 _F0, _F1, _F2, _F3, _F4, _F5, _F6, _F7, _F8, _F9, _F10, _F11, _F12, _F13, _F14, _F15, +#else + /* All single precision operations have a high cost due to being + * stored on registers only encodable as double precision. + * The cost is due to needing to move values to a register with + * value <= 31. + * This is a limitation due to using fixed named registers in + * lightning. */ +# define JIT_F0 _F32 +# define JIT_F1 _F34 +# define JIT_F2 _F36 +# define JIT_F3 _F38 +# define JIT_F4 _F40 +# define JIT_F5 _F42 +# define JIT_F6 _F44 +# define JIT_F7 _F46 +# define JIT_F8 _F48 +# define JIT_F9 _F50 +# define JIT_F10 _F52 +# define JIT_F11 _F54 +# define JIT_F12 _F56 +# define JIT_F13 _F58 +# define JIT_F14 _F60 +# define JIT_F15 _F62 + _F62, _F60, _F58, _F56, _F54, _F52, _F50, _F48, + _F46, _F44, _F42, _F40, _F38, _F36, _F34, _F32, + _F31, _F30, _F29, _F28, _F27, _F26, _F25, _F24, + _F23, _F22, _F21, _F20, _F19, _F18, _F17, _F16, + _F15, _F14, _F13, _F12, _F11, _F10, _F9, _F8, + _F7, _F6, _F5, _F4, _F3, _F2, _F1, _F0, +#endif #define JIT_NOREG _NOREG _NOREG, } jit_reg_t; diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 508137758..a4d88d1ca 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -18,6 +18,11 @@ */ #if PROTO +# define _G2_REGNO 0x02 +# define _G3_REGNO 0x03 +# define _G4_REGNO 0x04 +# define _O0_REGNO 0x08 +# define _O1_REGNO 0x09 # define _SP_REGNO 0x0e # define _FP_REGNO 0x1e # define _O7_REGNO 0x0f @@ -37,12 +42,12 @@ * fp- alloca * sp+ stack arguments * sp+ 6 words to save register arguments - * sp+ 1 word for hidden address of aggregate return value + * sp+ 1 word for hidden address of aggregate return value (32 bits only) * sp+ 16 words for in and local registers * sp ---- * decreasing memory address - next stack frame (not yet allocated) */ -# define stack_framesize ((16 + 1 + 6) * 4) +# define stack_framesize ((16 + (__WORDSIZE == 32) + 6) * sizeof(jit_word_t)) typedef union { struct { jit_uint32_t b: 2; } op; struct { jit_uint32_t _: 2; jit_uint32_t b: 1; } a; @@ -51,21 +56,30 @@ typedef union { struct { jit_uint32_t _: 3; jit_uint32_t b: 4; } cond; struct { jit_uint32_t _: 7; jit_uint32_t b: 3; } op2; struct { jit_uint32_t _: 7; jit_uint32_t b: 6; } op3; + struct { jit_uint32_t _: 10; jit_uint32_t b: 1; } cc1; struct { jit_uint32_t _: 10; jit_uint32_t b: 22; } imm22; struct { jit_uint32_t _: 10; jit_uint32_t b: 22; } disp22; + struct { jit_uint32_t _: 11; jit_uint32_t b: 1; } cc0; + struct { jit_uint32_t _: 12; jit_uint32_t b: 1; } p; + struct { jit_uint32_t _: 13; jit_uint32_t b: 19; } disp19; struct { jit_uint32_t _: 13; jit_uint32_t b: 5; } rs1; struct { jit_uint32_t _: 18; jit_uint32_t b: 1; } i; struct { jit_uint32_t _: 18; jit_uint32_t b: 9; } opf; + struct { jit_uint32_t _: 19; jit_uint32_t b: 1; } x; struct { jit_uint32_t _: 19; jit_uint32_t b: 8; } asi; struct { jit_uint32_t _: 19; jit_uint32_t b: 6; } res; struct { jit_uint32_t _: 19; jit_uint32_t b: 13; } simm13; - struct { jit_uint32_t _: 27; jit_uint32_t b: 5; } rs2; + struct { jit_uint32_t _: 20; jit_uint32_t b: 7; } asix; + struct { jit_uint32_t _: 20; jit_uint32_t b: 6; } asis; + struct { jit_uint32_t _: 26; jit_uint32_t b: 6; } shim; struct { jit_uint32_t _: 25; jit_uint32_t b: 7; } imm7; + struct { jit_uint32_t _: 27; jit_uint32_t b: 5; } rs2; jit_int32_t v; } jit_instr_t; # define ii(i) *_jit->pc.ui++ = i # define s7_p(imm) ((imm) <= 63 && (imm) >= -64) # define s13_p(imm) ((imm) <= 4095 && (imm) >= -4096) +# define s19_p(imm) ((imm) <= 262143 && (imm) >= -262144) # define s22_p(imm) ((imm) <= 2097151 && (imm) >= -20971512) # define s30_p(imm) ((imm) <= 536870911 && (imm) >= -536870912) # define f1(op, disp30) _f1(_jit, op, disp30) @@ -75,12 +89,27 @@ static void _f2r(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f2b(op, a, cond, op2, disp22) _f2b(_jit, op, a, cond, op2, disp22) static void _f2b(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define f2bp(op,a,cond,op2,cc1,cc0,p,disp19) \ + _f2bp(_jit,op,a,cond,op2,cc1,cc0,p,disp19) +static void +_f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define f3r(op, rd, op3, rs1, rs2) _f3r(_jit, op, rd, op3, rs1, rs2) static void _f3r(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define f3rx(op, rd, op3, rs1, rs2) _f3rx(_jit, op, rd, op3, rs1, rs2) +static void _f3rx(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define f3i(op, rd, op3, rs1, simm13) _f3i(_jit, op, rd, op3, rs1, simm13) static void _f3i(jit_state_t*, - jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define f3s(op, rd, op3, rs1, simm13) _f3s(_jit, op, rd, op3, rs1, simm13) +static void _f3s(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f3t(cond, rs1, i, ri) _f3t(_jit, cond, rs1, i, ri) static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; @@ -96,16 +125,31 @@ static void _f3a(jit_state_t*,jit_int32_t, # define LDUBI(rs1, imm, rd) f3i(3, rd, 1, rs1, imm) # define LDUH(rs1, rs2, rd) f3r(3, rd, 2, rs1, rs2) # define LDUHI(rs1, imm, rd) f3i(3, rd, 2, rs1, imm) -# define LD(rs1, rs2, rd) f3r(3, rd, 0, rs1, rs2) -# define LDI(rs1, imm, rd) f3i(3, rd, 0, rs1, imm) -# define LDD(rs1, rs2, rd) f3r(3, rd, 3, rs1, rs2) -# define LDDI(rs1, imm, rd) f3i(3, rd, 3, rs1, imm) +# if __WORDSIZE == 32 +# define LD(rs1, rs2, rd) f3r(3, rd, 0, rs1, rs2) +# define LDI(rs1, imm, rd) f3i(3, rd, 0, rs1, imm) +# define LDD(rs1, rs2, rd) f3r(3, rd, 3, rs1, rs2) +# define LDDI(rs1, imm, rd) f3i(3, rd, 3, rs1, imm) +# else +# define LDSW(rs1, rs2, rd) f3r(3, rd, 8, rs1, rs2) +# define LDSWI(rs1, imm, rd) f3i(3, rd, 8, rs1, imm) +# define LDUW(rs1, rs2, rd) f3r(3, rd, 0, rs1, rs2) +# define LDUWI(rs1, imm, rd) f3i(3, rd, 0, rs1, imm) +# define LDX(rs1, rs2, rd) f3r(3, rd, 11, rs1, rs2) +# define LDXI(rs1, imm, rd) f3i(3, rd, 11, rs1, imm) +# endif # define LDSBA(rs1, rs2, asi, rd) f3a(3, rd, 25, rs1, asi, rs2) # define LDSHA(rs1, rs2, asi, rd) f3a(3, rd, 26, rs1, asi, rs2) # define LDUBA(rs1, rs2, asi, rd) f3a(3, rd, 17, rs1, asi, rs2) # define LDUHA(rs1, rs2, asi, rd) f3a(3, rd, 18, rs1, asi, rs2) -# define LDA(rs1, rs2, asi, rd) f3a(3, rd, 16, rs1, asi, rs2) -# define LDDA(rs1, rs2, asi, rd) f3a(3, rd, 19, rs1, asi, rs2) +# if __WORDSIZE == 32 +# define LDA(rs1, rs2, asi, rd) f3a(3, rd, 16, rs1, asi, rs2) +# define LDDA(rs1, rs2, asi, rd) f3a(3, rd, 19, rs1, asi, rs2) +# else +# define LDSWA(rs1, rs2, asi, rd) f3a(3, rd, 24, rs1, asi, rs2) +# define LDUWA(rs1, rs2, asi, rd) f3a(3, rd, 16, rs1, asi, rs2) +# define LDXA(rs1, rs2, asi, rd) f3a(3, rd, 27, rs1, asi, rs2) +# endif # define LDC(rs1, rs2, rd) f3r(3, rd, 48, rs1, rs2) # define LDCI(rs1, imm, rd) f3i(3, rd, 48, rs1, imm) # define LDDC(rs1, rs2, rd) f3r(3, rd, 51, rs1, rs2) @@ -116,14 +160,26 @@ static void _f3a(jit_state_t*,jit_int32_t, # define STBI(rd, rs1, imm) f3i(3, rd, 5, rs1, imm) # define STH(rd, rs1, rs2) f3r(3, rd, 6, rs1, rs2) # define STHI(rd, rs1, imm) f3i(3, rd, 6, rs1, imm) -# define ST(rd, rs1, rs2) f3r(3, rd, 4, rs1, rs2) -# define STI(rd, rs1, imm) f3i(3, rd, 4, rs1, imm) -# define STD(rrd, s1, rs2) f3r(3, rd, 7, rs1, rs2) -# define STDI(rd, rs1, imm) f3i(3, rd, 7, rs1, imm) +# if __WORDSIZE == 32 +# define ST(rd, rs1, rs2) f3r(3, rd, 4, rs1, rs2) +# define STI(rd, rs1, imm) f3i(3, rd, 4, rs1, imm) +# define STD(rrd, s1, rs2) f3r(3, rd, 7, rs1, rs2) +# define STDI(rd, rs1, imm) f3i(3, rd, 7, rs1, imm) +# else +# define STW(rd, rs1, rs2) f3r(3, rd, 4, rs1, rs2) +# define STWI(rd, rs1, imm) f3i(3, rd, 4, rs1, imm) +# define STX(rd, rs1, rs2) f3r(3, rd, 14, rs1, rs2) +# define STXI(rd, rs1, imm) f3i(3, rd, 14, rs1, imm) +# endif # define STBA(rd, rs1, rs2) f3a(3, rd, 21, rs1, asi, rs2) # define STHA(rd, rs1, rs2) f3a(3, rd, 22, rs1, asi, rs2) -# define STA(rd, rs1, rs2) f3a(3, rd, 20, rs1, asi, rs2) -# define STDA(rd, rs1, rs2) f3a(3, rd, 23, rs1, asi, rs2) +# if __WORDSIZE == 32 +# define STA(rd, rs1, rs2) f3a(3, rd, 20, rs1, asi, rs2) +# define STDA(rd, rs1, rs2) f3a(3, rd, 23, rs1, asi, rs2) +# else +# define STWA(rd, rs1, rs2) f3a(3, rd, 20, rs1, asi, rs2) +# define STXA(rd, rs1, rs2) f3a(3, rd, 30, rs1, asi, rs2) +# endif # define STC(rd, rs1, rs2) f3r(3, rd, 52, rs1, rs2) # define STCI(rd, rs1, imm) f3i(3, rd, 52, rs1, imm) # define STDC(rd, rs1, rs2) f3r(3, rd, 55, rs1, rs2) @@ -174,6 +230,14 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SRLI(rs1, imm, rd) f3i(2, rd, 38, rs1, imm) # define SRA(rs1, rs2, rd) f3r(2, rd, 39, rs1, rs2) # define SRAI(rs1, imm, rd) f3i(2, rd, 39, rs1, imm) +# if __WORDSIZE == 64 +# define SLLX(rs1, rs2, rd) f3rx(2, rd, 37, rs1, rs2) +# define SLLXI(rs1, imm, rd) f3s(2, rd, 37, rs1, imm) +# define SRLX(rs1, rs2, rd) f3rx(2, rd, 38, rs1, rs2) +# define SRLXI(rs1, imm, rd) f3s(2, rd, 38, rs1, imm) +# define SRAX(rs1, rs2, rd) f3rx(2, rd, 39, rs1, rs2) +# define SRAXI(rs1, imm, rd) f3s(2, rd, 39, rs1, imm) +# endif # define ADD(rs1, rs2, rd) f3r(2, rd, 0, rs1, rs2) # define ADDI(rs1, imm, rd) f3i(2, rd, 0, rs1, imm) # define ADDcc(rs1, rs2, rd) f3r(2, rd, 16, rs1, rs2) @@ -219,6 +283,14 @@ static void _f3a(jit_state_t*,jit_int32_t, # define UDIVIcc(rs1, imm, rd) f3i(2, rd, 30, rs1, imm) # define SDIVcc(rs1, rs2, rd) f3r(2, rd, 31, rs1, rs2) # define SDIVIcc(rs1, imm, rd) f3i(2, rd, 31, rs1, imm) +# if __WORDSIZE == 64 +# define MULX(rs1, rs2, rd) f3r(2, rd, 9, rs1, rs2) +# define MULXI(rs1, imm, rd) f3i(2, rd, 9, rs1, imm) +# define SDIVX(rs1, rs2, rd) f3r(2, rd, 45, rs1, rs2) +# define SDIVXI(rs1, imm, rd) f3i(2, rd, 45, rs1, imm) +# define UDIVX(rs1, rs2, rd) f3r(2, rd, 13, rs1, rs2) +# define UDIVXI(rs1, imm, rd) f3i(2, rd, 13, rs1, imm) +# endif # define SAVE(rs1, rs2, rd) f3r(2, rd, 60, rs1, rs2) # define SAVEI(rs1, imm, rd) f3i(2, rd, 60, rs1, imm) # define RESTORE(rs1, rs2, rd) f3r(2, rd, 61, rs1, rs2) @@ -243,6 +315,25 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SPARC_BNEG 6 /* negative - N */ # define SPARC_BVC 15 /* overflow clear - not V */ # define SPARC_BVS 7 /* overflow set - V */ +/* Preferred BPcc integer branch opcodes */ +# if __WORDSIZE == 64 +# define SPARC_BPA 8 /* always - 1 */ +# define SPARC_BPN 0 /* never - 0 */ +# define SPARC_BPNE 9 /* not equal - not Z */ +# define SPARC_BPE 1 /* equal - Z */ +# define SPARC_BPG 10 /* greater - not (Z or (N xor V)) */ +# define SPARC_BPLE 2 /* less or equal - Z or (N xor V) */ +# define SPARC_BPGE 11 /* greater or equal - not (N xor V) */ +# define SPARC_BPL 3 /* less - N xor V */ +# define SPARC_BPGU 12 /* greater unsigned - not (C or V) */ +# define SPARC_BPLEU 4 /* less or equal unsigned - C or Z */ +# define SPARC_BPCC 13 /* carry clear (greater than or equal, unsigned) - not C */ +# define SPARC_BPCS 5 /* carry set (less than, unsigned) - C */ +# define SPARC_BPPOS 14 /* positive - not N */ +# define SPARC_BPNEG 6 /* negative - N */ +# define SPARC_BPVC 15 /* overflow clear - not V */ +# define SPARC_BPVS 7 /* overflow set - V */ +# endif # define B(cc, imm) f2b(0, 0, cc, 2, imm) # define Ba(cc, imm) f2b(0, 1, cc, 2, imm) # define BA(imm) B(SPARC_BA, imm) @@ -285,6 +376,28 @@ static void _f3a(jit_state_t*,jit_int32_t, # define BVCa(imm) Ba(SPARC_BVC, imm) # define BVS(imm) B(SPARC_BVS, imm) # define BVSa(imm) Ba(SPARC_BVS, imm) +# if __WORDSIZE == 64 +# define BPccap(cc,a,cc1, cc2,p,imm) f2bp(0, a, cc, 1, cc1, cc0, p, imm) +# define BPap(cc, imm) f2bp(0, 1, cc, 1, 1, 0, p, imm) +# define BPa(cc, imm) f2bp(0, 1, cc, 1, 1, 0, 1, imm) +# define BP(cc, imm) f2bp(0, 0, cc, 1, 1, 0, 1, imm) +# define BPA(imm) BP(SPARC_BPA, imm) +# define BPN(imm) BP(SPARC_BPN, imm) +# define BNPE(imm) BP(SPARC_BPNE, imm) +# define BPE(imm) BP(SPARC_BPE, imm) +# define BPG(imm) BP(SPARC_BPG, imm) +# define BPLE(imm) BP(SPARC_BPLE, imm) +# define BPGE(imm) BP(SPARC_BPGE, imm) +# define BPL(imm) BP(SPARC_BPL, imm) +# define BPGU(imm) BP(SPARC_BPGU, imm) +# define BPLEU(imm) BP(SPARC_BPLEU, imm) +# define BPCC(imm) BP(SPARC_BPCC, imm) +# define BPCS(imm) BP(SPARC_BPCS, imm) +# define BPPOS(imm) BP(SPARC_BPPOS, imm) +# define BPNEG(imm) BP(SPARC_BPNEG, imm) +# define BPVC(imm) BP(SPARC_BPVC, imm) +# define BPVS(imm) BP(SPARC_BPVS, imm) +# endif # define SPARC_CBA 8 /* always */ # define SPARC_CBN 0 /* never */ # define SPARC_CB3 7 /* 3 */ @@ -437,29 +550,54 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); # define addr(r0, r1, r2) ADD(r1, r2, r0) # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define addcr(r0, r1, r2) ADDcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define addcr(r0, r1, r2) ADDcc(r1, r2, r0) +# else +# define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2) +static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define addci(r0, r1, i0) _addci(_jit, r0, r1, i0) static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define addxr(r0, r1, r2) ADDXcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define addxr(r0, r1, r2) ADDXcc(r1, r2, r0) +# else +# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2) +static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0) static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define subr(r0, r1, r2) SUB(r1, r2, r0) # define subi(r0, r1, i0) _subi(_jit, r0, r1, i0) static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define subcr(r0, r1, r2) SUBcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define subcr(r0, r1, r2) SUBcc(r1, r2, r0) +# else +# define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2) +static void _subcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define subci(r0, r1, i0) _subci(_jit, r0, r1, i0) static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define subxr(r0, r1, r2) SUBXcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define subxr(r0, r1, r2) SUBXcc(r1, r2, r0) +# else +# define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2) +static void _subxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define mulr(r0, r1, r2) UMUL(r1, r2, r0) +# if __WORDSIZE == 32 +# define mulr(r0, r1, r2) UMUL(r1, r2, r0) +# else +# define mulr(r0, r1, r2) MULX(r1, r2, r0) +# endif # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) -# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) -# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) +# if __WORDSIZE == 32 +# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) +# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) +# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t,jit_bool_t); # define qmuli(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,1) @@ -467,6 +605,20 @@ static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, # define iqmuli(r0,r1,r2,i0,cc) _iqmuli(_jit,r0,r1,r2,i0,cc) static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t, jit_int32_t,jit_word_t,jit_bool_t); +# else +# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) +static void _qmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t); +# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0) +static void _qmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t); +# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3) +static void _qmulr_u(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t); +# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0) +static void _qmuli_u(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t); +# endif # define divr(r0, r1, r2) _divr(_jit, r0, r1, r2) static void _divr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define divi(r0, r1, i0) _divi(_jit, r0, r1, i0) @@ -502,14 +654,22 @@ static void _ori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define xorr(r0, r1, r2) XOR(r1, r2, r0) # define xori(r0, r1, i0) _xori(_jit, r0, r1, i0) static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define lshr(r0, r1, r2) SLL(r1, r2, r0) -# define lshi(r0, r1, i0) SLLI(r1, i0, r0) -# define rshr(r0, r1, r2) SRA(r1, r2, r0) -# define rshi(r0, r1, i0) SRAI(r1, i0, r0) -# define rshr_u(r0, r1, r2) SRL(r1, r2, r0) -# define rshi_u(r0, r1, i0) SRLI(r1, i0, r0) +# if __WORDSIZE == 32 +# define lshr(r0, r1, r2) SLL(r1, r2, r0) +# define lshi(r0, r1, i0) SLLI(r1, i0, r0) +# define rshr(r0, r1, r2) SRA(r1, r2, r0) +# define rshi(r0, r1, i0) SRAI(r1, i0, r0) +# define rshr_u(r0, r1, r2) SRL(r1, r2, r0) +# define rshi_u(r0, r1, i0) SRLI(r1, i0, r0) +# else +# define lshr(r0, r1, r2) SLLX(r1, r2, r0) +# define lshi(r0, r1, i0) SLLXI(r1, i0, r0) +# define rshr(r0, r1, r2) SRAX(r1, r2, r0) +# define rshi(r0, r1, i0) SRAXI(r1, i0, r0) +# define rshr_u(r0, r1, r2) SRLX(r1, r2, r0) +# define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0) +# endif # define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) movr(r0,r1) # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) andi(r0, r1, 0xff) @@ -517,30 +677,63 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) _extr_us(_jit,r0,r1) static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 32 +# define htonr_ui(r0,r1) movr(r0,r1) +# else +# define htonr_ui(r0,r1) extr_ui(r0,r1) +# define htonr_ul(r0,r1) movr(r0,r1) +# define extr_i(r0,r1) _extr_i(_jit,r0,r1) +static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); +# define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) +static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define cr(cc, r0, r1, r2) _cr(_jit, cc, r0, r1, r2) static void _cr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define cw(cc, r0, r1, i0) _cw(_jit, cc, r0, r1, i0) static void _cw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); -# define ltr(r0, r1, r2) cr(SPARC_BL, r0, r1, r2) -# define lti(r0, r1, i0) cw(SPARC_BL, r0, r1, i0) -# define ltr_u(r0, r1, r2) cr(SPARC_BLU, r0, r1, r2) -# define lti_u(r0, r1, i0) cw(SPARC_BLU, r0, r1, i0) -# define ler(r0, r1, r2) cr(SPARC_BLE, r0, r1, r2) -# define lei(r0, r1, i0) cw(SPARC_BLE, r0, r1, i0) -# define ler_u(r0, r1, r2) cr(SPARC_BLEU, r0, r1, r2) -# define lei_u(r0, r1, i0) cw(SPARC_BLEU, r0, r1, i0) -# define eqr(r0, r1, r2) cr(SPARC_BE, r0, r1, r2) -# define eqi(r0, r1, i0) cw(SPARC_BE, r0, r1, i0) -# define ger(r0, r1, r2) cr(SPARC_BGE, r0, r1, r2) -# define gei(r0, r1, i0) cw(SPARC_BGE, r0, r1, i0) -# define ger_u(r0, r1, r2) cr(SPARC_BGEU, r0, r1, r2) -# define gei_u(r0, r1, i0) cw(SPARC_BGEU, r0, r1, i0) -# define gtr(r0, r1, r2) cr(SPARC_BG, r0, r1, r2) -# define gti(r0, r1, i0) cw(SPARC_BG, r0, r1, i0) -# define gtr_u(r0, r1, r2) cr(SPARC_BGU, r0, r1, r2) -# define gti_u(r0, r1, i0) cw(SPARC_BGU, r0, r1, i0) -# define ner(r0, r1, r2) cr(SPARC_BNE, r0, r1, r2) -# define nei(r0, r1, i0) cw(SPARC_BNE, r0, r1, i0) +# if __WORDSIZE == 32 +# define ltr(r0, r1, r2) cr(SPARC_BL, r0, r1, r2) +# define lti(r0, r1, i0) cw(SPARC_BL, r0, r1, i0) +# define ltr_u(r0, r1, r2) cr(SPARC_BLU, r0, r1, r2) +# define lti_u(r0, r1, i0) cw(SPARC_BLU, r0, r1, i0) +# define ler(r0, r1, r2) cr(SPARC_BLE, r0, r1, r2) +# define lei(r0, r1, i0) cw(SPARC_BLE, r0, r1, i0) +# define ler_u(r0, r1, r2) cr(SPARC_BLEU, r0, r1, r2) +# define lei_u(r0, r1, i0) cw(SPARC_BLEU, r0, r1, i0) +# define eqr(r0, r1, r2) cr(SPARC_BE, r0, r1, r2) +# define eqi(r0, r1, i0) cw(SPARC_BE, r0, r1, i0) +# define ger(r0, r1, r2) cr(SPARC_BGE, r0, r1, r2) +# define gei(r0, r1, i0) cw(SPARC_BGE, r0, r1, i0) +# define ger_u(r0, r1, r2) cr(SPARC_BGEU, r0, r1, r2) +# define gei_u(r0, r1, i0) cw(SPARC_BGEU, r0, r1, i0) +# define gtr(r0, r1, r2) cr(SPARC_BG, r0, r1, r2) +# define gti(r0, r1, i0) cw(SPARC_BG, r0, r1, i0) +# define gtr_u(r0, r1, r2) cr(SPARC_BGU, r0, r1, r2) +# define gti_u(r0, r1, i0) cw(SPARC_BGU, r0, r1, i0) +# define ner(r0, r1, r2) cr(SPARC_BNE, r0, r1, r2) +# define nei(r0, r1, i0) cw(SPARC_BNE, r0, r1, i0) +# else +# define ltr(r0, r1, r2) cr(SPARC_BPL, r0, r1, r2) +# define lti(r0, r1, i0) cw(SPARC_BPL, r0, r1, i0) +# define ltr_u(r0, r1, r2) cr(SPARC_BPCS, r0, r1, r2) +# define lti_u(r0, r1, i0) cw(SPARC_BPCS, r0, r1, i0) +# define ler(r0, r1, r2) cr(SPARC_BPLE, r0, r1, r2) +# define lei(r0, r1, i0) cw(SPARC_BPLE, r0, r1, i0) +# define ler_u(r0, r1, r2) cr(SPARC_BPLEU, r0, r1, r2) +# define lei_u(r0, r1, i0) cw(SPARC_BPLEU, r0, r1, i0) +# define eqr(r0, r1, r2) cr(SPARC_BPE, r0, r1, r2) +# define eqi(r0, r1, i0) cw(SPARC_BPE, r0, r1, i0) +# define ger(r0, r1, r2) cr(SPARC_BPGE, r0, r1, r2) +# define gei(r0, r1, i0) cw(SPARC_BPGE, r0, r1, i0) +# define ger_u(r0, r1, r2) cr(SPARC_BPCC, r0, r1, r2) +# define gei_u(r0, r1, i0) cw(SPARC_BPCC, r0, r1, i0) +# define gtr(r0, r1, r2) cr(SPARC_BPG, r0, r1, r2) +# define gti(r0, r1, i0) cw(SPARC_BPG, r0, r1, i0) +# define gtr_u(r0, r1, r2) cr(SPARC_BPGU, r0, r1, r2) +# define gti_u(r0, r1, i0) cw(SPARC_BPGU, r0, r1, i0) +# define ner(r0, r1, r2) cr(SPARC_BPNE, r0, r1, r2) +# define nei(r0, r1, i0) cw(SPARC_BPNE, r0, r1, i0) +# endif # define ldr_c(r0, r1) LDSB(r1, 0, r0) # define ldi_c(r0, i0) _ldi_c(_jit, r0, i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); @@ -553,11 +746,25 @@ static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t); # define ldr_us(r0, r1) LDUH(r1, 0, r0) # define ldi_us(r0, i0) _ldi_us(_jit, r0, i0) static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t); -# define ldr(u, v) ldr_i(u, v) -# define ldr_i(r0, r1) LD(r1, 0, r0) -# define ldi(u, v) ldi_i(u, v) +# if __WORDSIZE == 32 +# define ldr_i(r0, r1) LD(r1, 0, r0) +# define ldr(u, v) ldr_i(u, v) +# define ldi(u, v) ldi_i(u, v) +# else +# define ldr_i(r0, r1) LDSW(r1, 0, r0) +# define ldr_ui(r0, r1) LDUW(r1, 0, r0) +# define ldr_l(r0, r1) LDX(r1, 0, r0) +# define ldr(u, v) ldr_l(u, v) +# define ldi(u, v) ldi_l(u, v) +# endif # define ldi_i(r0, i0) _ldi_i(_jit, r0, i0) static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t); +# if __WORDSIZE == 64 +# define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0) +static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t); +# define ldi_l(r0, i0) _ldi_l(_jit, r0, i0) +static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t); +# endif # define ldxr_c(r0, r1, r2) LDSB(r1, r2, r0) # define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0) static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -570,59 +777,118 @@ static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ldxr_us(r0, r1, r2) LDUH(r1, r2, r0) # define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0) static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define ldxr(u, v, w) ldxr_i(u, v, w) -# define ldxr_i(r0, r1, r2) LD(r1, r2, r0) -# define ldxi(u, v, w) ldxi_i(u, v, w) +# if __WORDSIZE == 32 +# define ldxr(u, v, w) ldxr_i(u, v, w) +# define ldxr_i(r0, r1, r2) LD(r1, r2, r0) +# define ldxi(u, v, w) ldxi_i(u, v, w) +# else +# define ldxr(u, v, w) ldxr_l(u, v, w) +# define ldxr_i(r0, r1, r2) LDSW(r1, r2, r0) +# define ldxr_ui(r0, r1, r2) LDUW(r1, r2, r0) +# define ldxr_l(r0, r1, r2) LDX(r1, r2, r0) +# define ldxi(u, v, w) ldxi_l(u, v, w) +# endif # define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0) static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# if __WORDSIZE == 64 +# define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0) +static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0) +static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# endif # define str_c(r0, r1) STB(r1, r0, 0) # define sti_c(i0, r0) _sti_c(_jit, i0, r0) static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t); # define str_s(r0, r1) STH(r1, r0, 0) # define sti_s(i0, r0) _sti_s(_jit, i0, r0) static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t); -# define str(u, v) str_i(u, v) -# define str_i(r0, r1) STI(r1, r0, 0) -# define sti(u, v) sti_i(u, v) +# if __WORDSIZE == 32 +# define str(u, v) str_i(u, v) +# define str_i(r0, r1) STI(r1, r0, 0) +# define sti(u, v) sti_i(u, v) +# else +# define str(u, v) str_l(u, v) +# define str_i(r0, r1) STW(r1, r0, 0) +# define str_l(r0, r1) STX(r1, r0, 0) +# define sti(u, v) sti_l(u, v) +# endif # define sti_i(i0, r0) _sti_i(_jit, i0, r0) static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); +# if __WORDSIZE == 64 +# define sti_l(i0, r0) _sti_l(_jit, i0, r0) +static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t); +# endif # define stxr_c(r0, r1, r2) STB(r2, r1, r0) # define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1) static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxr_s(r0, r1, r2) STH(r2, r1, r0) # define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1) static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# define stxr(u, v, w) stxr_i(u, v, w) -# define stxr_i(r0, r1, r2) ST(r2, r1, r0) -# define stxi(u, v, w) stxi_i(u, v, w) +# if __WORDSIZE == 32 +# define stxr(u, v, w) stxr_i(u, v, w) +# define stxr_i(r0, r1, r2) ST(r2, r1, r0) +# define stxi(u, v, w) stxi_i(u, v, w) +# else +# define stxr(u, v, w) stxr_l(u, v, w) +# define stxr_i(r0, r1, r2) STW(r2, r1, r0) +# define stxi(u, v, w) stxi_l(u, v, w) +# define stxr_l(r0, r1, r2) STX(r2, r1, r0) +# endif # define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) +static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# endif # define br(cc, i0, r0, r1) _br(_jit, cc, i0, r0, r1) static jit_word_t _br(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); # define bw(cc, i0, r0, i1) _bw(_jit, cc, i0, r0, i1) static jit_word_t _bw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); -# define bltr(i0, r0, r1) br(SPARC_BL, i0, r0, r1) -# define blti(i0, r0, i1) bw(SPARC_BL, i0, r0, i1) -# define bltr_u(i0, r0, r1) br(SPARC_BLU, i0, r0, r1) -# define blti_u(i0, r0, i1) bw(SPARC_BLU, i0, r0, i1) -# define bler(i0, r0, r1) br(SPARC_BLE, i0, r0, r1) -# define blei(i0, r0, i1) bw(SPARC_BLE, i0, r0, i1) -# define bler_u(i0, r0, r1) br(SPARC_BLEU, i0, r0, r1) -# define blei_u(i0, r0, i1) bw(SPARC_BLEU, i0, r0, i1) -# define beqr(i0, r0, r1) br(SPARC_BE, i0, r0, r1) -# define beqi(i0, r0, i1) bw(SPARC_BE, i0, r0, i1) -# define bger(i0, r0, r1) br(SPARC_BGE, i0, r0, r1) -# define bgei(i0, r0, i1) bw(SPARC_BGE, i0, r0, i1) -# define bger_u(i0, r0, r1) br(SPARC_BGEU, i0, r0, r1) -# define bgei_u(i0, r0, i1) bw(SPARC_BGEU, i0, r0, i1) -# define bgtr(i0, r0, r1) br(SPARC_BG, i0, r0, r1) -# define bgti(i0, r0, i1) bw(SPARC_BG, i0, r0, i1) -# define bgtr_u(i0, r0, r1) br(SPARC_BGU, i0, r0, r1) -# define bgti_u(i0, r0, i1) bw(SPARC_BGU, i0, r0, i1) -# define bner(i0, r0, r1) br(SPARC_BNE, i0, r0, r1) -# define bnei(i0, r0, i1) bw(SPARC_BNE, i0, r0, i1) +# if __WORDSIZE == 32 +# define bltr(i0, r0, r1) br(SPARC_BL, i0, r0, r1) +# define blti(i0, r0, i1) bw(SPARC_BL, i0, r0, i1) +# define bltr_u(i0, r0, r1) br(SPARC_BLU, i0, r0, r1) +# define blti_u(i0, r0, i1) bw(SPARC_BLU, i0, r0, i1) +# define bler(i0, r0, r1) br(SPARC_BLE, i0, r0, r1) +# define blei(i0, r0, i1) bw(SPARC_BLE, i0, r0, i1) +# define bler_u(i0, r0, r1) br(SPARC_BLEU, i0, r0, r1) +# define blei_u(i0, r0, i1) bw(SPARC_BLEU, i0, r0, i1) +# define beqr(i0, r0, r1) br(SPARC_BE, i0, r0, r1) +# define beqi(i0, r0, i1) bw(SPARC_BE, i0, r0, i1) +# define bger(i0, r0, r1) br(SPARC_BGE, i0, r0, r1) +# define bgei(i0, r0, i1) bw(SPARC_BGE, i0, r0, i1) +# define bger_u(i0, r0, r1) br(SPARC_BGEU, i0, r0, r1) +# define bgei_u(i0, r0, i1) bw(SPARC_BGEU, i0, r0, i1) +# define bgtr(i0, r0, r1) br(SPARC_BG, i0, r0, r1) +# define bgti(i0, r0, i1) bw(SPARC_BG, i0, r0, i1) +# define bgtr_u(i0, r0, r1) br(SPARC_BGU, i0, r0, r1) +# define bgti_u(i0, r0, i1) bw(SPARC_BGU, i0, r0, i1) +# define bner(i0, r0, r1) br(SPARC_BNE, i0, r0, r1) +# define bnei(i0, r0, i1) bw(SPARC_BNE, i0, r0, i1) +# else +# define bltr(i0, r0, r1) br(SPARC_BPL, i0, r0, r1) +# define blti(i0, r0, i1) bw(SPARC_BPL, i0, r0, i1) +# define bltr_u(i0, r0, r1) br(SPARC_BPCS, i0, r0, r1) +# define blti_u(i0, r0, i1) bw(SPARC_BPCS, i0, r0, i1) +# define bler(i0, r0, r1) br(SPARC_BPLE, i0, r0, r1) +# define blei(i0, r0, i1) bw(SPARC_BPLE, i0, r0, i1) +# define bler_u(i0, r0, r1) br(SPARC_BPLEU, i0, r0, r1) +# define blei_u(i0, r0, i1) bw(SPARC_BPLEU, i0, r0, i1) +# define beqr(i0, r0, r1) br(SPARC_BPE, i0, r0, r1) +# define beqi(i0, r0, i1) bw(SPARC_BPE, i0, r0, i1) +# define bger(i0, r0, r1) br(SPARC_BPGE, i0, r0, r1) +# define bgei(i0, r0, i1) bw(SPARC_BPGE, i0, r0, i1) +# define bger_u(i0, r0, r1) br(SPARC_BPCC, i0, r0, r1) +# define bgei_u(i0, r0, i1) bw(SPARC_BPCC, i0, r0, i1) +# define bgtr(i0, r0, r1) br(SPARC_BPG, i0, r0, r1) +# define bgti(i0, r0, i1) bw(SPARC_BPG, i0, r0, i1) +# define bgtr_u(i0, r0, r1) br(SPARC_BPGU, i0, r0, r1) +# define bgti_u(i0, r0, i1) bw(SPARC_BPGU, i0, r0, i1) +# define bner(i0, r0, r1) br(SPARC_BPNE, i0, r0, r1) +# define bnei(i0, r0, i1) bw(SPARC_BPNE, i0, r0, i1) +# endif # define b_asr(jif,add,sgn,i0,r0,r1) _b_asr(_jit,jif,add,sgn,i0,r0,r1) static jit_word_t _b_asr(jit_state_t*,jit_bool_t,jit_bool_t,jit_bool_t, @@ -717,6 +983,30 @@ _f2b(jit_state_t *_jit, ii(v.v); } +# if __WORDSIZE == 64 +static void +_f2bp(jit_state_t *_jit, + jit_int32_t op, jit_int32_t a, jit_int32_t cond, jit_int32_t op2, + jit_int32_t cc1, jit_int32_t cc0, jit_int32_t p, jit_int32_t disp19) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(a & 0xfffffffe)); + assert(!(cond & 0xfffffff0)); + assert(!(op2 & 0xfffffff8)); + assert(s19_p(disp19)); + v.op.b = op; + v.a.b = a; + v.cond.b = cond; + v.op2.b = op2; + v.cc1.b = cc1; + v.cc0.b = cc0; + v.p.b = p; + v.disp19.b = disp19; + ii(v.v); +} +# endif + static void _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) @@ -727,16 +1017,60 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, assert(!(op3 & 0xffffffc0)); assert(!(rs1 & 0xffffffe0)); assert(!(rs2 & 0xffffffe0)); - v.op.b = op; - v.rd.b = rd; - v.op3.b = op3; - v.rs1.b = rs1; - v.i.b = 0; - v.asi.b = 0; - v.rs2.b = rs2; + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 0; + v.asi.b = 0; + v.rs2.b = rs2; ii(v.v); } +# if __WORDSIZE == 64 +static void +_f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 0; + v.x.b = 1; + v.asix.b = 0; + v.rs2.b = rs2; + ii(v.v); +} + +static void +_f3s(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t shim) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(shim & 0xffffffc0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 1; + v.x.b = 1; + v.asis.b = 0; + v.shim.b = shim; + ii(v.v); +} +# endif + static void _f3i(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t simm13) @@ -834,9 +1168,23 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) if (s13_p(i0)) ORI(0, i0, r0); else { - SETHI(HI(i0), r0); - if (LO(i0)) - ORI(r0, LO(i0), r0); +# if __WORDSIZE == 64 + if (i0 & 0xffffffff00000000) { + jit_int32_t reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), (i0 >> 32) & 0xffffffff); + movi(r0, i0 & 0xffffffff); + lshi(rn(reg), rn(reg), 32); + OR(rn(reg), r0, r0); + jit_unget_reg(reg); + } + else { +# endif + SETHI(HI((int)i0), r0); + if (LO(i0)) + ORI(r0, LO(i0), r0); +# if __WORDSIZE == 64 + } +# endif } } @@ -844,9 +1192,24 @@ static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_word_t w; +# if __WORDSIZE == 64 + jit_int32_t reg; +# endif w = _jit->pc.w; +# if __WORDSIZE == 64 + reg = jit_get_reg(jit_class_gpr); + SETHI(HI((int)i0), r0); + ORI(r0, LO(i0), r0); + i0 = (int)(i0 >> 32); + SETHI(HI(i0), rn(reg)); + ORI(rn(reg), LO(i0), rn(reg)); + SLLXI(rn(reg), 32, rn(reg)); + OR(rn(reg), r0, r0); + jit_unget_reg(reg); +# else SETHI(HI(i0), r0); ORI(r0, LO(i0), r0); +# endif return (w); } @@ -864,9 +1227,31 @@ _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r1, r2); + ltr_u(rn(jit_carry), rn(reg), r1); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + addr(r0, r1, r2); + ltr_u(rn(jit_carry), r0, r1); + } +} +# endif + static void _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) ADDIcc(r1, i0, r0); @@ -876,11 +1261,42 @@ _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) addcr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ltr_u(rn(jit_carry), rn(reg), r1); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + addi(r0, r1, i0); + ltr_u(rn(jit_carry), r0, r1); + } +# endif } +# if __WORDSIZE == 64 +static void +_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + addcr(r0, r1, r2); + addcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +} +# endif + static void _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) ADDXIcc(r1, i0, r0); @@ -890,6 +1306,15 @@ _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) addxr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + addci(r0, r1, i0); + addcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +# endif } static void @@ -906,9 +1331,31 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + subr(rn(reg), r1, r2); + ltr_u(rn(jit_carry), r1, rn(reg)); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + subr(r0, r1, r2); + ltr_u(rn(jit_carry), r1, r0); + } +} +# endif + static void _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) SUBIcc(r1, i0, r0); @@ -918,11 +1365,42 @@ _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) subcr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, -i0); + ltr_u(rn(jit_carry), r1, rn(reg)); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + addi(r0, r1, -i0); + ltr_u(rn(jit_carry), r1, r0); + } +# endif } +# if __WORDSIZE == 64 +static void +_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + subcr(r0, r1, r2); + subcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +} +#endif + static void _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) SUBXIcc(r1, i0, r0); @@ -932,6 +1410,15 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) subxr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + subci(r0, r1, i0); + subcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +# endif } static void @@ -945,8 +1432,13 @@ static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 UMULI(r1, i0, r0); +# else + MULXI(r1, i0, r0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -955,6 +1447,7 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 32 static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) @@ -986,39 +1479,149 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, } } +# else +static __int128_t __llmul(jit_word_t a, jit_word_t b) +{ + return (__int128_t)a * (__int128_t)b; +} + +# define QMUL_PROLOG() \ + do { \ + (void)jit_get_reg(_O0|jit_class_gpr|jit_class_named); \ + (void)jit_get_reg(_O1|jit_class_gpr|jit_class_named); \ + if (r0 != _G2_REGNO && r1 != _G2_REGNO) \ + stxi(BIAS(-8), _FP_REGNO, _G2_REGNO); \ + if (r0 != _G3_REGNO && r1 != _G3_REGNO) \ + stxi(BIAS(-16), _FP_REGNO, _G3_REGNO); \ + if (r0 != _G4_REGNO && r1 != _G4_REGNO) \ + stxi(BIAS(-24), _FP_REGNO, _G4_REGNO); \ + } while (0) + +# define QMUL_EPILOG() \ + do { \ + if (r0 != _G2_REGNO && r1 != _G2_REGNO) \ + ldxi(_G2_REGNO, _FP_REGNO, BIAS(-8)); \ + if (r0 != _G3_REGNO && r1 != _G3_REGNO) \ + ldxi(_G3_REGNO, _FP_REGNO, BIAS(-16)); \ + if (r0 != _G4_REGNO && r1 != _G4_REGNO) \ + ldxi(_G4_REGNO, _FP_REGNO, BIAS(-24)); \ + (void)jit_unget_reg(_O0); \ + (void)jit_unget_reg(_O1); \ + } while (0) + +static void +_qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + QMUL_PROLOG(); + movr(_O0_REGNO, r3); + movr(_O1_REGNO, r2); + calli((jit_word_t)__llmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} + +static void +_qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0) +{ + QMUL_PROLOG(); + movi(_O0_REGNO, i0); + movr(_O1_REGNO, r2); + calli((jit_word_t)__llmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} + +static __uint128_t __ullmul(jit_uword_t a, jit_uword_t b) +{ + return (__uint128_t)a * (__uint128_t)b; +} + +static void +_qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + QMUL_PROLOG(); + movr(_O0_REGNO, r3); + movr(_O1_REGNO, r2); + calli((jit_word_t)__ullmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} + +static void +_qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0) +{ + QMUL_PROLOG(); + movi(_O0_REGNO, i0); + movr(_O1_REGNO, r2); + calli((jit_word_t)__ullmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} +# endif + static void _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +# if __WORDSIZE == 32 jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); rshi(rn(reg), r1, 31); WRY(rn(reg), 0); SDIV(r1, r2, r0); jit_unget_reg(reg); +# else + SDIVX(r1, r2, r0); +# endif } static void _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; +# if __WORDSIZE == 32 reg = jit_get_reg(jit_class_gpr); +# endif if (s13_p(i0)) { +# if __WORDSIZE == 32 rshi(rn(reg), r1, 31); WRY(rn(reg), 0); SDIVI(r1, i0, r0); +# else + SDIVXI(r1, i0, r0); +# endif } else { +# if __WORDSIZE == 64 + reg = jit_get_reg(jit_class_gpr); +# endif movi(rn(reg), i0); divr(r0, r1, rn(reg)); +# if __WORDSIZE == 64 + jit_unget_reg(reg); +# endif } +# if __WORDSIZE == 32 jit_unget_reg(reg); +# endif } static void _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +# if __WORDSIZE == 32 WRYI(0, 0); UDIV(r1, r2, r0); +# else + UDIVX(r1, r2, r0); +# endif } static void @@ -1026,8 +1629,12 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (s13_p(i0)) { +# if __WORDSIZE == 32 WRYI(0, 0); UDIVI(r1, i0, r0); +# else + UDIVXI(r1, i0, r0); +# endif } else { reg = jit_get_reg(jit_class_gpr); @@ -1185,30 +1792,50 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 24); - rshi(r0, r0, 24); + lshi(r0, r1, __WORDSIZE - 8); + rshi(r0, r0, __WORDSIZE - 8); } static void _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 16); - rshi(r0, r0, 16); + lshi(r0, r1, __WORDSIZE - 16); + rshi(r0, r0, __WORDSIZE - 16); } static void _extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 16); - rshi_u(r0, r0, 16); + lshi(r0, r1, __WORDSIZE - 16); + rshi_u(r0, r0, __WORDSIZE - 16); } +#if __WORDSIZE == 64 +static void +_extr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + lshi(r0, r1, __WORDSIZE - 32); + rshi(r0, r0, __WORDSIZE - 32); +} + +static void +_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + lshi(r0, r1, __WORDSIZE - 32); + rshi_u(r0, r0, __WORDSIZE - 32); +} +#endif + static void _cr(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { CMP(r1, r2); +# if __WORDSIZE == 32 Ba(cc, 3); +# else + BPa(cc, 3); +# endif movi(r0, 1); movi(r0, 0); } @@ -1220,7 +1847,11 @@ _cw(jit_state_t *_jit, jit_int32_t cc, jit_int32_t reg; if (s13_p(i0)) { CMPI(r1, i0); +# if __WORDSIZE == 32 Ba(cc, 3); +# else + BPa(cc, 3); +# endif movi(r0, 1); movi(r0, 0); } @@ -1292,8 +1923,13 @@ static void _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 LDI(0, i0, r0); +# else + LDSWI(0, i0, r0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1302,6 +1938,36 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDUWI(0, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_ui(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDXI(0, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_l(r0, rn(reg)); + jit_unget_reg(reg); + } +} +# endif + static void _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1362,8 +2028,13 @@ static void _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 LDI(r1, i0, r0); +# else + LDSWI(r1, i0, r0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1372,6 +2043,36 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDUWI(r1, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxr_ui(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDXI(r1, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxr_l(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} +# endif + static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -1404,8 +2105,13 @@ static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 STI(r0, 0, i0); +# else + STWI(r0, 0, i0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1414,6 +2120,22 @@ _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } } +# if __WORDSIZE == 64 +static void +_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (s13_p(i0)) + STXI(r0, 0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_l(rn(reg), r0); + jit_unget_reg(reg); + } +} +# endif + static void _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { @@ -1446,8 +2168,13 @@ static void _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 STI(r1, r0, i0); +# else + STWI(r1, r0, i0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1456,6 +2183,22 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +# if __WORDSIZE == 64 +static void +_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (s13_p(i0)) + STXI(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxr_l(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} +# endif + static jit_word_t _br(jit_state_t *_jit, jit_int32_t cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) @@ -1463,7 +2206,11 @@ _br(jit_state_t *_jit, jit_int32_t cc, jit_word_t w; CMP(r0, r1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); +# else + BP(cc, (i0 - w) >> 2); +# endif NOP(); return (w); } @@ -1477,7 +2224,11 @@ _bw(jit_state_t *_jit, jit_int32_t cc, if (s13_p(i1)) { CMPI(r0, i1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); +# else + B(cc, (i0 - w) >> 2); +# endif NOP(); } else { @@ -1499,10 +2250,17 @@ _b_asr(jit_state_t *_jit, jit_bool_t jif, jit_bool_t add, jit_bool_t sgn, else SUBcc(r0, r1, r0); w = _jit->pc.w; +# if __WORDSIZE == 32 B(sgn ? (jif ? SPARC_BVS : SPARC_BVC) : (jif ? SPARC_BCS : SPARC_BCC), (i0 - w) >> 2); +# else + BP(sgn ? + (jif ? SPARC_BPVS : SPARC_BPVC) : + (jif ? SPARC_BPCS : SPARC_BPCC), + (i0 - w) >> 2); +# endif NOP(); return (w); } @@ -1519,10 +2277,17 @@ _b_asw(jit_state_t *_jit, jit_bool_t jif, jit_bool_t add, jit_bool_t sgn, else SUBIcc(r0, i1, r0); w = _jit->pc.w; +# if __WORDSIZE == 32 B(sgn ? (jif ? SPARC_BVS : SPARC_BVC) : (jif ? SPARC_BCS : SPARC_BCC), (i0 - w) >> 2); +# else + BP(sgn ? + (jif ? SPARC_BPVS : SPARC_BPVC) : + (jif ? SPARC_BPCS : SPARC_BPCC), + (i0 - w) >> 2); +# endif NOP(); } else { @@ -1541,7 +2306,11 @@ _bm_r(jit_state_t *_jit, jit_bool_t set, jit_word_t w; BTST(r0, r1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2); +# else + BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2); +# endif NOP(); return (w); } @@ -1555,7 +2324,11 @@ _bm_w(jit_state_t *_jit, jit_bool_t set, if (s13_p(i1)) { BTSTI(r0, i1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2); +# else + BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2); +# endif NOP(); } else { @@ -1632,6 +2405,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) return (w); } +#define OFF(n) BIAS(((n) * sizeof(jit_word_t))) static void _prolog(jit_state_t *_jit, jit_node_t *node) { @@ -1654,32 +2428,34 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* (most) other backends do not save incoming arguments, so, * only save locals here */ if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - stxi(0, _SP_REGNO, _L0_REGNO); + stxi(OFF(0), _SP_REGNO, _L0_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - stxi(4, _SP_REGNO, _L1_REGNO); + stxi(OFF(1), _SP_REGNO, _L1_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - stxi(8, _SP_REGNO, _L2_REGNO); + stxi(OFF(2), _SP_REGNO, _L2_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - stxi(12, _SP_REGNO, _L3_REGNO); + stxi(OFF(3), _SP_REGNO, _L3_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - stxi(16, _SP_REGNO, _L4_REGNO); + stxi(OFF(4), _SP_REGNO, _L4_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - stxi(20, _SP_REGNO, _L5_REGNO); + stxi(OFF(5), _SP_REGNO, _L5_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - stxi(24, _SP_REGNO, _L6_REGNO); + stxi(OFF(6), _SP_REGNO, _L6_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - stxi(28, _SP_REGNO, _L7_REGNO); + stxi(OFF(7), _SP_REGNO, _L7_REGNO); if (_jitc->function->allocar) { reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), _jitc->function->self.aoff); + movi(rn(reg), BIAS(_jitc->function->self.aoff)); + /* Already "biased" by allocai */ stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); jit_unget_reg(reg); } if (_jitc->function->self.call & jit_call_varargs) { for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) - stxi(68 + reg * 4, _SP_REGNO, rn(_I0 + reg)); + stxi(BIAS((16 + (__WORDSIZE == 32)) * sizeof(jit_word_t) + + reg * sizeof(jit_word_t)), _FP_REGNO, rn(_I0 + reg)); } } @@ -1691,21 +2467,21 @@ _epilog(jit_state_t *_jit, jit_node_t *node) /* (most) other backends do not save incoming arguments, so, * only save locals here */ if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - ldxi(_L0_REGNO, _SP_REGNO, 0); + ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0)); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - ldxi(_L1_REGNO, _SP_REGNO, 4); + ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1)); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - ldxi(_L2_REGNO, _SP_REGNO, 8); + ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2)); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - ldxi(_L3_REGNO, _SP_REGNO, 12); + ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3)); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - ldxi(_L4_REGNO, _SP_REGNO, 16); + ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4)); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - ldxi(_L5_REGNO, _SP_REGNO, 20); + ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5)); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - ldxi(_L6_REGNO, _SP_REGNO, 24); + ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6)); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - ldxi(_L7_REGNO, _SP_REGNO, 28); + ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7)); RESTOREI(0, 0, 0); RETL(); NOP(); @@ -1715,7 +2491,12 @@ static void _vastart(jit_state_t *_jit, jit_int32_t r0) { /* Initialize stack pointer to the first stack argument. */ - addi(r0, _SP_REGNO, 68 + _jitc->function->vagp * 4); + if (jit_arg_reg_p(_jitc->function->vagp)) + addi(r0, _FP_REGNO, BIAS((16 + (__WORDSIZE == 32) + + _jitc->function->vagp) * + sizeof(jit_word_t))); + else + addi(r0, _FP_REGNO, BIAS(_jitc->function->self.size)); } static void @@ -1727,7 +2508,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) ldr(r0, r1); /* Update vararg stack pointer. */ - addi(r1, r1, 4); + addi(r1, r1, sizeof(jit_word_t)); } static void @@ -1747,15 +2528,33 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) i.disp22.b = (label - instr) >> 2; u.i[0] = i.v; } +# if __WORDSIZE == 64 + else if (i.op2.b == 1) { + i.disp19.b = (label - instr) >> 2; + u.i[0] = i.v; + } +# endif else if (i.op2.b == 4) { /* movi_p */ /* SETHI */ - i.imm22.b = HI(label); + i.imm22.b = HI((int)label); u.i[0] = i.v; i.v = u.i[1]; if (i.op.b == 2 && i.op3.b == 2) { /* ORI */ i.simm13.b = LO(label); u.i[1] = i.v; +# if __WORDSIZE == 64 + i.v = u.i[2]; + assert(i.op2.b == 4); + label = (label >> 32) & 0xffffffff; + i.imm22.b = HI((int)label); + u.i[2] = i.v; + i.v = u.i[3]; + assert(i.op.b == 2 && i.op3.b == 2); + /* ORI */ + i.simm13.b = LO(label); + u.i[3] = i.v; +# endif } else abort(); diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index e8dea3412..c56b211e9 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -18,20 +18,30 @@ */ #if PROTO -# define LDF(rs1, rs2, rd) f3r(3, rd, 32, rs1, rs2) -# define LDFI(rs1, imm, rd) f3i(3, rd, 32, rs1, imm) -# define LDDF(rs1, rs2, rd) f3r(3, rd, 35, rs1, rs2) -# define LDDFI(rs1, imm, rd) f3i(3, rd, 35, rs1, imm) -# define LDFSR(rs1, rs2, rd) f3r(3, rd, 33, rs1, rs2) -# define LDFSRI(rs1, imm, rd) f3i(3, rd, 33, rs1, imm) -# define STF(rd, rs1, rs2) f3r(3, rd, 36, rs1, rs2) -# define STFI(rd, rs1, imm) f3i(3, rd, 36, rs1, imm) -# define STDF(rd, rs1, rs2) f3r(3, rd, 39, rs1, rs2) -# define STDFI(rd, rs1, imm) f3i(3, rd, 39, rs1, imm) -# define STFSR(rd, rs1, rs2) f3r(3, rd, 37, rs1, rs2) -# define STFSRI(rd, rs1, imm) f3i(3, rd, 37, rs1, imm) -# define STDFQ(rd, rs1, rs2) f3r(3, rd, 38, rs1, rs2) -# define STFDFQ(rd, rs1, imm) f3i(3, rd, 38, rs1, imm) +# if __WORDSIZE == 32 +# define FPR(r) (r) +# define CLASS_SNG jit_class_fpr +# define CLASS_DBL jit_class_fpr +# else +# define single_precision_p(r) ((r) >= 0 && (r) <= 31) +# define FPR(r) ((r) > 31 ? (r) - 31 : (r)) +# define CLASS_SNG (jit_class_fpr | jit_class_sng) +# define CLASS_DBL (jit_class_fpr | jit_class_dbl) +# endif +# define LDF(rs1, rs2, rd) f3r(3, FPR(rd), 32, FPR(rs1), FPR(rs2)) +# define LDFI(rs1, imm, rd) f3i(3, FPR(rd), 32, FPR(rs1), imm) +# define LDDF(rs1, rs2, rd) f3r(3, FPR(rd), 35, FPR(rs1), FPR(rs2)) +# define LDDFI(rs1, imm, rd) f3i(3, FPR(rd), 35, FPR(rs1), imm) +# define LDFSR(rs1, rs2, rd) f3r(3, FPR(rd), 33, FPR(rs1), FPR(rs2)) +# define LDFSRI(rs1, imm, rd) f3i(3, FPR(rd), 33, FPR(rs1), imm) +# define STF(rd, rs1, rs2) f3r(3, FPR(rd), 36, FPR(rs1), FPR(rs2)) +# define STFI(rd, rs1, imm) f3i(3, FPR(rd), 36, FPR(rs1), imm) +# define STDF(rd, rs1, rs2) f3r(3, FPR(rd), 39, FPR(rs1), FPR(rs2)) +# define STDFI(rd, rs1, imm) f3i(3, FPR(rd), 39, FPR(rs1), imm) +# define STFSR(rd, rs1, rs2) f3r(3, FPR(rd), 37, FPR(rs1), FPR(rs2)) +# define STFSRI(rd, rs1, imm) f3i(3, FPR(rd), 37, FPR(rs1), imm) +# define STDFQ(rd, rs1, rs2) f3r(3, FPR(rd), 38, FPR(rs1), FPR(rs2)) +# define STFDFQ(rd, rs1, imm) f3i(3, FPR(rd), 38, FPR(rs1), imm) # define SPARC_FBA 8 /* always - 1 */ # define SPARC_FBN 0 /* never - 0 */ # define SPARC_FBU 7 /* unordered - U */ @@ -86,9 +96,17 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define FITOS(rs2, rd) FPop1(rd, 0, 196, rs2) # define FITOD(rs2, rd) FPop1(rd, 0, 200, rs2) # define FITOQ(rs2, rd) FPop1(rd, 0, 204, rs2) +# if __WORDSIZE == 64 +# define FXTOS(rs2, rd) FPop1(rd, 0, 132, rs2) +# define FXTOD(rs2, rd) FPop1(rd, 0, 136, rs2) +# define FxTOQ(rs2, rd) FPop1(rd, 0, 140, rs2) +# endif # define FSTOI(rs2, rd) FPop1(rd, 0, 209, rs2) # define FDTOI(rs2, rd) FPop1(rd, 0, 210, rs2) # define FQTOI(rs2, rd) FPop1(rd, 0, 211, rs2) +# define FSTOX(rs2, rd) FPop1(rd, 0, 129, rs2) +# define FDTOX(rs2, rd) FPop1(rd, 0, 130, rs2) +# define FQTOX(rs2, rd) FPop1(rd, 0, 131, rs2) # define FSTOD(rs2, rd) FPop1(rd, 0, 201, rs2) # define FSTOQ(rs2, rd) FPop1(rd, 0, 205, rs2) # define FDTOS(rs2, rd) FPop1(rd, 0, 198, rs2) @@ -96,8 +114,14 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define FQTOS(rs2, rd) FPop1(rd, 0, 199, rs2) # define FQTOD(rs2, rd) FPop1(rd, 0, 203, rs2) # define FMOVS(rs2, rd) FPop1(rd, 0, 1, rs2) +# define FMOVD(rs2, rd) FPop1(rd, 0, 2, rs2) +# define FMOVQ(rs2, rd) FPop1(rd, 0, 3, rs2) # define FNEGS(rs2, rd) FPop1(rd, 0, 5, rs2) +# define FNEGD(rs2, rd) FPop1(rd, 0, 6, rs2) +# define FNEGQ(rs2, rd) FPop1(rd, 0, 7, rs2) # define FABSS(rs2, rd) FPop1(rd, 0, 9, rs2) +# define FABSD(rs2, rd) FPop1(rd, 0, 10, rs2) +# define FABSQ(rs2, rd) FPop1(rd, 0, 11, rs2) # define FSQRTS(rs2, rd) FPop1(rd, 0, 41, rs2) # define FSQRTD(rs2, rd) FPop1(rd, 0, 42, rs2) # define FSQRTQ(rs2, rd) FPop1(rd, 0, 43, rs2) @@ -145,30 +169,73 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define CPop2(rd, rs1, opc, rs2) f3f(rd, 55, rs1, opf, rs2) # define extr_f(r0, r1) _extr_f(_jit, r0, r1) static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); -# define truncr_f(r0, r1) truncr_f_i(r0, r1) +# if __WORDSIZSE == 32 +# define truncr_f(r0, r1) truncr_f_i(r0, r1) +# define truncr_d(r0, r1) truncr_d_i(r0, r1) +# else +# define truncr_f(r0, r1) truncr_f_l(r0, r1) +# define truncr_d(r0, r1) truncr_d_l(r0, r1) +# endif # define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t); -# define extr_d_f(r0, r1) FDTOS(r1, r0) +# if __WORDSIZE == 64 +# define truncr_f_l(r0, r1) _truncr_f_l(_jit, r0, r1) +static void _truncr_f_l(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# if __WORDSIZE == 32 +# define extr_d_f(r0, r1) FDTOS(r1, r0) +# else +# define extr_d_f(r0, r1) _extr_d_f(_jit, r0, r1) +static void _extr_d_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif # define movi_f(r0, i0) _movi_f(_jit, r0, i0) +# if __WORDSIZE == 32 +# define movr_f(r0, r1) FMOVS(r1, r0) +# else +# define movr_f(r0, r1) _movr_f(_jit, r0, r1) +static void _movr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); -# define movr_f(r0, r1) FMOVS(r1, r0) -# define negr_f(r0, r1) FNEGS(r1, r0) -# define absr_f(r0, r1) FABSS(r1, r0) -# define sqrtr_f(r0, r1) FSQRTS(r1, r0) +# if __WORDSIZE == 32 +# define negr_f(r0, r1) FNEGS(r1, r0) +# define absr_f(r0, r1) FABSS(r1, r0) +# define sqrtr_f(r0, r1) FSQRTS(r1, r0) +# else +# define negr_f(r0, r1) _negr_f(_jit, r0, r1) +static void _negr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# define absr_f(r0, r1) _absr_f(_jit, r0, r1) +static void _absr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# define sqrtr_f(r0, r1) _sqrtr_f(_jit, r0, r1) +static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); -# define truncr_d(r0, r1) truncr_d_i(r0, r1) # define truncr_d_i(r0, r1) _truncr_d_i(_jit, r0, r1) static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t); -# define extr_f_d(r0, r1) FSTOD(r1, r0) +# if __WORDSIZE == 64 +# define truncr_d_l(r0, r1) _truncr_d_l(_jit, r0, r1) +static void _truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# if __WORDSIZE == 32 +# define extr_f_d(r0, r1) FSTOD(r1, r0) +# else +# define extr_f_d(r0, r1) _extr_f_d(_jit, r0, r1) +static void _extr_f_d(jit_state_t*, jit_int32_t, jit_int32_t); +# endif # define movi_d(r0, i0) _movi_d(_jit, r0, i0) static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*); +# if __WORDSIZE == 32 # define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define negr_d(r0, r1) _negr_d(_jit, r0, r1) static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define absr_d(r0, r1) _absr_d(_jit, r0, r1) static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t); +# else +# define movr_d(r0, r1) FMOVD(r1, r0) +# define negr_d(r0, r1) FNEGD(r1, r0) +# define absr_d(r0, r1) FABSD(r1, r0) +# endif # define sqrtr_d(r0, r1) FSQRTD(r1, r0) # define fop1f(op, r0, r1, i0) _fop1f(_jit, op, r0, r1, i0) static void _fop1f(jit_state_t*,jit_int32_t, @@ -182,17 +249,27 @@ static void _fop1d(jit_state_t*,jit_int32_t, # define rfop1d(op, r0, r1, i0) _rfop1d(_jit, op, r0, r1, i0) static void _rfop1d(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_float64_t*); -# define addr_f(r0, r1, r2) FADDS(r1, r2, r0) +# if __WORDSIZE == 32 +# define addr_f(r0, r1, r2) FADDS(r1, r2, r0) +# define subr_f(r0, r1, r2) FSUBS(r1, r2, r0) +# define mulr_f(r0, r1, r2) FMULS(r1, r2, r0) +# define divr_f(r0, r1, r2) FDIVS(r1, r2, r0) +# else +# define fop2f(op, r0, r1, r2) _fop2f(_jit, op, r0, r1, r2) +static void _fop2f(jit_state_t*, jit_int32_t, + jit_int32_t, jit_int32_t, jit_int32_t); +# define addr_f(r0, r1, r2) fop2f(SPARC_FADDS, r0, r1, r2) +# define subr_f(r0, r1, r2) fop2f(SPARC_FSUBS, r0, r1, r2) +# define mulr_f(r0, r1, r2) fop2f(SPARC_FMULS, r0, r1, r2) +# define divr_f(r0, r1, r2) fop2f(SPARC_FDIVS, r0, r1, r2) +# endif # define addi_f(r0, r1, i0) fop1f(SPARC_FADDS, r0, r1, i0) -# define subr_f(r0, r1, r2) FSUBS(r1, r2, r0) # define subi_f(r0, r1, i0) fop1f(SPARC_FSUBS, r0, r1, i0) # define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) # define rsbi_f(r0, r1, i0) rfop1f(SPARC_FSUBS, r0, r1, i0) # define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) # define rsbi_d(r0, r1, i0) rfop1d(SPARC_FSUBD, r0, r1, i0) -# define mulr_f(r0, r1, r2) FMULS(r1, r2, r0) # define muli_f(r0, r1, i0) fop1f(SPARC_FMULS, r0, r1, i0) -# define divr_f(r0, r1, r2) FDIVS(r1, r2, r0) # define divi_f(r0, r1, i0) fop1f(SPARC_FDIVS, r0, r1, i0) # define addr_d(r0, r1, r2) FADDD(r1, r2, r0) # define addi_d(r0, r1, i0) fop1d(SPARC_FADDD, r0, r1, i0) @@ -270,30 +347,50 @@ _dcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t*); # define ordi_d(r0, r1, i0) dcw(SPARC_FBO, r0, r1, i0) # define unordr_d(r0, r1, r2) dcr(SPARC_FBU, r0, r1, r2) # define unordi_d(r0, r1, i0) dcw(SPARC_FBU, r0, r1, i0) -# define ldr_f(r0, r1) LDF(r1, 0, r0) +# if __WORDSIZE == 32 +# define ldr_f(r0, r1) LDF(r1, 0, r0) +# else +# define ldr_f(r0, r1) _ldr_f(_jit, r0, r1) +static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define ldi_f(r0, i0) _ldi_f(_jit, r0, i0) static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); -# define ldxr_f(r0, r1, r2) LDF(r1, r2, r0) +# if __WORDSIZE == 32 +# define ldxr_f(r0, r1, r2) LDF(r1, r2, r0) +# else +# define ldxr_f(r0, r1, r2) _ldxr_f(_jit, r0, r1, r2) +static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define ldxi_f(r0, r1, i0) _ldxi_f(_jit, r0, r1, i0) static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define str_f(r0, r1) STF(r1, r0, 0) +# if __WORDSIZE == 32 +# define str_f(r0, r1) STF(r1, r0, 0) +# else +# define str_f(r0, r1) _str_f(_jit, r0, r1) +static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define sti_f(r0, i0) _sti_f(_jit, r0, i0) -static void _sti_f(jit_state_t*,jit_int32_t,jit_word_t); -# define stxr_f(r0, r1, r2) STF(r2, r1, r0) +static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); +# if __WORDSIZE == 32 +# define stxr_f(r0, r1, r2) STF(r2, r1, r0) +# else +# define stxr_f(r0, r1, r2) _stxr_f(_jit, r0, r1, r2) +static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define stxi_f(r0, r1, i0) _stxi_f(_jit, r0, r1, i0) -static void _stxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define ldr_d(r0, r1) LDDF(r1, 0, r0) # define ldi_d(r0, i0) _ldi_d(_jit, r0, i0) static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t); # define ldxr_d(r0, r1, r2) LDDF(r1, r2, r0) # define ldxi_d(r0, r1, i0) _ldxi_d(_jit, r0, r1, i0) -static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define str_d(r0, r1) STDF(r1, r0, 0) # define sti_d(r0, i0) _sti_d(_jit, r0, i0) -static void _sti_d(jit_state_t*,jit_int32_t,jit_word_t); +static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); # define stxr_d(r0, r1, r2) STDF(r2, r1, r0) # define stxi_d(r0, r1, i0) _stxi_d(_jit, r0, r1, i0) -static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define fbr(cc, i0, r0, r1) _fbr(_jit, cc, i0, r0, r1) static jit_word_t _fbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); @@ -372,6 +469,20 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2) { jit_instr_t v; +# if __WORDSIZE == 64 + if (rd > 31) { + assert(rd <= 63 && (rd & 1) == 0); + rd -= 31; + } + if (rs1 > 31) { + assert(rs1 <= 63 && (rs1 & 1) == 0); + rs1 -= 31; + } + if (rs2 > 31) { + assert(rs2 <= 63 && (rs2 & 1) == 0); + rs2 -= 31; + } +# endif assert(!(rd & 0xffffffe0)); assert(!(op3 & 0xffffffc0)); assert(!(rs1 & 0xffffffe0)); @@ -386,6 +497,151 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, ii(v.v); } +# if __WORDSIZE == 64 +static void +_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (r0 != r1) { + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FMOVS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FMOVS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FMOVS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FMOVS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } + } +} + +static void +_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FNEGS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FNEGS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FNEGS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FNEGS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } +} + +static void +_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FABSS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FABSS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FABSS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FABSS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } +} + +static void +_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FSQRTS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FSQRTS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FSQRTS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FSQRTS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } +} +# endif + +# if __WORDSIZE == 64 +static void +_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r0); + FDTOS(r1, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + FDTOS(r1, r0); +} +# endif + static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { @@ -399,19 +655,39 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) data.f = *i0; reg = jit_get_reg(jit_class_gpr); movi(rn(reg), data.i & 0xffffffff); - stxi_i(-8, _FP_REGNO, rn(reg)); + stxi_i(BIAS(-8), _FP_REGNO, rn(reg)); jit_unget_reg(reg); - ldxi_f(r0, _FP_REGNO, -8); + ldxi_f(r0, _FP_REGNO, BIAS(-8)); } else ldi_f(r0, (jit_word_t)i0); } +# if __WORDSIZE == 64 +static void +_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r1)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r1); + FSTOD(rn(reg), r0); + jit_unget_reg(reg); + } + else + FSTOD(r1, r0); +} +# endif + static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { union { +# if __WORDSIZE == 32 jit_int32_t i[2]; +# else + jit_word_t w; +# endif jit_float64_t d; } data; jit_int32_t reg; @@ -419,17 +695,24 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) if (_jitc->no_data) { data.d = *i0; reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 32 movi(rn(reg), data.i[0]); - stxi_i(-8, _FP_REGNO, rn(reg)); +# else + movi(rn(reg), data.w); +# endif + stxi(BIAS(-8), _FP_REGNO, rn(reg)); +# if __WORDSIZE == 32 movi(rn(reg), data.i[1]); - stxi_i(-4, _FP_REGNO, rn(reg)); + stxi_i(BIAS(-4), _FP_REGNO, rn(reg)); +# endif jit_unget_reg(reg); - ldxi_d(r0, _FP_REGNO, -8); + ldxi_d(r0, _FP_REGNO, BIAS(-8)); } else ldi_d(r0, (jit_word_t)i0); } +# if __WORDSIZE == 32 static void _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -460,15 +743,162 @@ _absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) if (r0 != r1) FMOVS(r1 + 1, r0 + 1); } +# endif + +# if __WORDSIZE == 64 +# define single_rrr(NAME, CODE) \ +static void \ +NAME(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \ +{ \ + jit_int32_t x0, t0, x1, t1, x2, t2, mask = 0; \ + if (!single_precision_p(r0)) { \ + mask |= 1; \ + t0 = jit_get_reg(CLASS_SNG); \ + x0 = rn(t0); \ + if (r0 == r1) { \ + x1 = x0; \ + movr_d(x1, r1); \ + if (r0 == r2) \ + x2 = x0; \ + } \ + else if (r0 == r2) { \ + x2 = x0; \ + movr_d(x2, r2); \ + } \ + } \ + else \ + x0 = r0; \ + if (!single_precision_p(r1)) { \ + if (r0 != r1) { \ + mask |= 2; \ + t1 = jit_get_reg(CLASS_SNG); \ + x1 = rn(t1); \ + movr_d(x1, r1); \ + if (r1 == r2) \ + x2 = x1; \ + } \ + } \ + else \ + x1 = r1; \ + if (!single_precision_p(r2)) { \ + if (r0 != r2 && r1 != r2) { \ + mask |= 4; \ + t2 = jit_get_reg(CLASS_SNG); \ + x2 = rn(t2); \ + movr_d(x2, r2); \ + } \ + } \ + else \ + x2 = r2; \ + CODE(x1, x2, x0); \ + if (mask & 1) { \ + movr_d(r0, x0); \ + jit_unget_reg(t0); \ + } \ + if (mask & 2) \ + jit_unget_reg(t1); \ + if (mask & 4) \ + jit_unget_reg(t2); \ +} + +static void +_fop2f(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t x0, t0, x1, t1, x2, t2, mask = 0; + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + if (r0 == r2) + x2 = x0; + } + else if (r0 == r2) { + x2 = x0; + movr_d(x2, r2); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + if (r1 == r2) + x2 = x1; + } + } + else + x1 = r1; + if (!single_precision_p(r2)) { + if (r0 != r2 && r1 != r2) { + mask |= 4; + t2 = jit_get_reg(CLASS_SNG); + x2 = rn(t2); + movr_d(x2, r2); + } + } + else + x2 = r2; + FPop1(x0, x1, op, x2); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); + if (mask & 4) + jit_unget_reg(t2); +} +# endif static void _fop1f(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i0); +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + } + } + else + x1 = r1; + FPop1(x0, x1, op, rn(reg)); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); +# else FPop1(r0, r1, op, rn(reg)); +# endif jit_unget_reg(reg); } @@ -477,9 +907,43 @@ _rfop1f(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i0); +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + } + } + else + x1 = r1; + FPop1(x0, rn(reg), op, x1); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); +# else FPop1(r0, rn(reg), op, r1); +# endif jit_unget_reg(reg); } @@ -488,7 +952,7 @@ _fop1d(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i0); FPop1(r0, r1, op, rn(reg)); jit_unget_reg(reg); @@ -499,7 +963,7 @@ _rfop1d(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i0); FPop1(r0, rn(reg), op, r1); jit_unget_reg(reg); @@ -508,27 +972,95 @@ _rfop1d(jit_state_t *_jit, jit_int32_t op, static void _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - stxi(-8, _FP_REGNO, r1); - ldxi_f(r0, _FP_REGNO, -8); + stxi(BIAS(-8), _FP_REGNO, r1); +# if __WORDSIZE == 32 + ldxi_f(r0, _FP_REGNO, BIAS(-8)); FITOS(r0, r0); +# else + ldxi_d(r0, _FP_REGNO, BIAS(-8)); + if (!single_precision_p(r0)) { + jit_int32_t reg; + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r0); + FXTOS(rn(reg), rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + FXTOS(r0, r0); +# endif } static void _truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); - FSTOI(r1, rn(reg)); - stxi_f(-8, _FP_REGNO, rn(reg)); - ldxi_i(r0, _FP_REGNO, -8); + reg = jit_get_reg(CLASS_SNG); +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + movr_d(rn(reg), r1); + FSTOI(rn(reg), rn(reg)); + } + else +# endif + FSTOI(r1, rn(reg)); + stxi_f(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_i(r0, _FP_REGNO, BIAS(-8)); jit_unget_reg(reg); } +# if __WORDSIZE == 64 +static void +_truncr_f_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(CLASS_SNG); +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + movr_d(rn(reg), r1); + FSTOX(rn(reg), rn(reg)); + } + else +# endif + FSTOX(r1, rn(reg)); + stxi_d(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_l(r0, _FP_REGNO, BIAS(-8)); + jit_unget_reg(reg); +} +# endif + static void _fcr(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; + if (!single_precision_p(r1)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r1); + } + else + x0 = r1; + if (r1 == r2) + x1 = x0; + else if (!single_precision_p(r2)) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r2); + } + else + x1 = r2; + FCMPS(x0, x1); + if (mask & 1) + jit_unget_reg(t0); + if (mask & 2) + jit_unget_reg(t1); +# else FCMPS(r1, r2); +# endif FBa(cc, 3); movi(r0, 1); movi(r0, 0); @@ -539,9 +1071,26 @@ _fcw(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, mask = 0; + if (!single_precision_p(r1)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r1); + } + else + x0 = r1; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i0); +# if __WORDSIZE == 64 + FCMPS(x0, rn(reg)); + if (mask & 1) + jit_unget_reg(t0); +# else FCMPS(r1, rn(reg)); +# endif jit_unget_reg(reg); FBa(cc, 3); movi(r0, 1); @@ -563,7 +1112,7 @@ _dcw(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i0); FCMPD(r1, rn(reg)); jit_unget_reg(reg); @@ -572,12 +1121,38 @@ _dcw(jit_state_t *_jit, jit_int32_t cc, movi(r0, 0); } +# if __WORDSIZE == 64 +static void +_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDF(r1, 0, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + LDF(r1, 0, r0); +} +# endif + static void _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) - LDFI(0, i0, r0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDFI(0, i0, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else +# endif + LDFI(0, i0, r0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -586,12 +1161,38 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDF(r1, r2, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + LDF(r1, r2, r0); +} +# endif + static void _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) - LDFI(r1, i0, r0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDFI(r1, i0, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else +# endif + LDFI(r1, i0, r0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -600,12 +1201,38 @@ _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r1)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r1); + STF(rn(reg), r0, 0); + jit_unget_reg(reg); + } + else + STF(r1, r0, 0); +} +# endif + static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (s13_p(i0)) - STFI(r0, 0, i0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r0); + STFI(rn(reg), 0, i0); + jit_unget_reg(reg); + } + else +# endif + STFI(r0, 0, i0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -614,12 +1241,38 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } } +# if __WORDSIZE == 64 +static void +_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (!single_precision_p(r2)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r2); + STF(rn(reg), r1, r0); + jit_unget_reg(reg); + } + else + STF(r2, r1, r0); +} +# endif + static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - if (s13_p(i0)) - STFI(r1, r0, i0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r1); + STFI(rn(reg), r0, i0); + jit_unget_reg(reg); + } + else +# endif + STFI(r1, r0, i0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -631,23 +1284,49 @@ _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) static void _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - stxi(-8, _FP_REGNO, r1); - stxi(-4, _FP_REGNO, 0); - ldxi_d(r0, _FP_REGNO, -8); + stxi(BIAS(-8), _FP_REGNO, r1); +# if __WORDSIZE == 32 + stxi(BIAS(-4), _FP_REGNO, 0); +# endif + ldxi_d(r0, _FP_REGNO, BIAS(-8)); +# if __WORDSIZE == 32 FITOD(r0, r0); +# else + FXTOD(r0, r0); +# endif } static void _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); - FDTOI(r1, rn(reg)); - stxi_d(-8, _FP_REGNO, rn(reg)); - ldxi_i(r0, _FP_REGNO, -8); + reg = jit_get_reg(CLASS_SNG); +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + movr_d(rn(reg), r1); + FDTOI(rn(reg), rn(reg)); + } + else +# endif + FDTOI(r1, rn(reg)); + stxi_d(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_i(r0, _FP_REGNO, BIAS(-8)); jit_unget_reg(reg); } +# if __WORDSIZE == 64 +static void +_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(CLASS_DBL); + FDTOX(r1, rn(reg)); + stxi_d(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_l(r0, _FP_REGNO, BIAS(-8)); + jit_unget_reg(reg); +} +# endif + static void _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -663,7 +1342,7 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } static void -_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t i0) { jit_int32_t reg; if (s13_p(i0)) @@ -691,7 +1370,7 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } static void -_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +_stxi_d(jit_state_t *_jit, jit_int32_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (s13_p(i0)) @@ -708,8 +1387,37 @@ static jit_word_t _fbr(jit_state_t *_jit, jit_int32_t cc, jit_word_t i0, jit_int32_t r0,jit_int32_t r1) { +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; +# endif jit_word_t w; +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r0); + } + else + x0 = r0; + if (r0 == r1) + x1 = x0; + else if (!single_precision_p(r1)) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + } + else + x1 = r1; + FCMPS(x0, x1); + if (mask & 1) + jit_unget_reg(t0); + if (mask & 2) + jit_unget_reg(t1); +# else FCMPS(r0, r1); +# endif w = _jit->pc.w; FB(cc, (i0 - w) >> 2); NOP(); @@ -722,9 +1430,26 @@ _fbw(jit_state_t *_jit, jit_int32_t cc, { jit_word_t w; jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, mask = 0; + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r0); + } + else + x0 = r0; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i1); +# if __WORDSIZE == 64 + FCMPS(x0, rn(reg)); + if (mask & 1) + jit_unget_reg(t0); +# else FCMPS(r0, rn(reg)); +# endif jit_unget_reg(reg); w = _jit->pc.w; FB(cc, (i0 - w) >> 2); @@ -750,7 +1475,7 @@ _dbw(jit_state_t *_jit, jit_int32_t cc, { jit_word_t w; jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i1); FCMPD(r0, rn(reg)); jit_unget_reg(reg); diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 654e34e11..2f828ea66 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -1,5 +1,4 @@ - -#if __WORDSIZE == 32 +#if WORDSIZE == 32 #define JIT_INSTR_MAX 40 0, /* data */ 0, /* live */ @@ -400,3 +399,405 @@ 0, /* movr_d_w */ 0, /* movi_d_w */ #endif /* __WORDSIZE */ + +#if __WORDSIZE == 64 +#define JIT_INSTR_MAX 64 + 0, /* data */ + 0, /* live */ + 4, /* align */ + 0, /* save */ + 0, /* load */ + 0, /* #name */ + 0, /* #note */ + 4, /* label */ + 36, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr */ + 0, /* putargi */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 28, /* addi */ + 24, /* addcr */ + 48, /* addci */ + 52, /* addxr */ + 52, /* addxi */ + 4, /* subr */ + 28, /* subi */ + 24, /* subcr */ + 48, /* subci */ + 52, /* subxr */ + 52, /* subxi */ + 32, /* rsbi */ + 4, /* mulr */ + 28, /* muli */ + 48, /* qmulr */ + 64, /* qmuli */ + 48, /* qmulr_u */ + 64, /* qmuli_u */ + 4, /* divr */ + 28, /* divi */ + 4, /* divr_u */ + 28, /* divi_u */ + 20, /* qdivr */ + 16, /* qdivi */ + 20, /* qdivr_u */ + 16, /* qdivi_u */ + 12, /* remr */ + 36, /* remi */ + 12, /* remr_u */ + 36, /* remi_u */ + 4, /* andr */ + 28, /* andi */ + 4, /* orr */ + 28, /* ori */ + 4, /* xorr */ + 28, /* xori */ + 4, /* lshr */ + 4, /* lshi */ + 4, /* rshr */ + 4, /* rshi */ + 4, /* rshr_u */ + 4, /* rshi_u */ + 4, /* negr */ + 4, /* comr */ + 16, /* ltr */ + 16, /* lti */ + 16, /* ltr_u */ + 16, /* lti_u */ + 16, /* ler */ + 16, /* lei */ + 16, /* ler_u */ + 16, /* lei_u */ + 16, /* eqr */ + 16, /* eqi */ + 16, /* ger */ + 16, /* gei */ + 16, /* ger_u */ + 16, /* gei_u */ + 16, /* gtr */ + 16, /* gti */ + 16, /* gtr_u */ + 16, /* gti_u */ + 16, /* ner */ + 16, /* nei */ + 4, /* movr */ + 24, /* movi */ + 8, /* extr_c */ + 4, /* extr_uc */ + 8, /* extr_s */ + 8, /* extr_us */ + 8, /* extr_i */ + 8, /* extr_ui */ + 8, /* htonr_us */ + 8, /* htonr_ui */ + 4, /* htonr_ul */ + 4, /* ldr_c */ + 28, /* ldi_c */ + 4, /* ldr_uc */ + 28, /* ldi_uc */ + 4, /* ldr_s */ + 28, /* ldi_s */ + 4, /* ldr_us */ + 28, /* ldi_us */ + 4, /* ldr_i */ + 28, /* ldi_i */ + 4, /* ldr_ui */ + 28, /* ldi_ui */ + 4, /* ldr_l */ + 28, /* ldi_l */ + 4, /* ldxr_c */ + 24, /* ldxi_c */ + 4, /* ldxr_uc */ + 24, /* ldxi_uc */ + 4, /* ldxr_s */ + 24, /* ldxi_s */ + 4, /* ldxr_us */ + 24, /* ldxi_us */ + 4, /* ldxr_i */ + 24, /* ldxi_i */ + 4, /* ldxr_ui */ + 24, /* ldxi_ui */ + 4, /* ldxr_l */ + 24, /* ldxi_l */ + 4, /* str_c */ + 28, /* sti_c */ + 4, /* str_s */ + 28, /* sti_s */ + 4, /* str_i */ + 28, /* sti_i */ + 4, /* str_l */ + 28, /* sti_l */ + 4, /* stxr_c */ + 24, /* stxi_c */ + 4, /* stxr_s */ + 24, /* stxi_s */ + 4, /* stxr_i */ + 24, /* stxi_i */ + 4, /* stxr_l */ + 24, /* stxi_l */ + 12, /* bltr */ + 12, /* blti */ + 12, /* bltr_u */ + 12, /* blti_u */ + 12, /* bler */ + 12, /* blei */ + 12, /* bler_u */ + 12, /* blei_u */ + 12, /* beqr */ + 36, /* beqi */ + 12, /* bger */ + 12, /* bgei */ + 12, /* bger_u */ + 12, /* bgei_u */ + 12, /* bgtr */ + 12, /* bgti */ + 12, /* bgtr_u */ + 12, /* bgti_u */ + 12, /* bner */ + 36, /* bnei */ + 12, /* bmsr */ + 12, /* bmsi */ + 12, /* bmcr */ + 12, /* bmci */ + 12, /* boaddr */ + 12, /* boaddi */ + 12, /* boaddr_u */ + 12, /* boaddi_u */ + 12, /* bxaddr */ + 12, /* bxaddi */ + 12, /* bxaddr_u */ + 12, /* bxaddi_u */ + 12, /* bosubr */ + 12, /* bosubi */ + 12, /* bosubr_u */ + 12, /* bosubi_u */ + 12, /* bxsubr */ + 12, /* bxsubi */ + 12, /* bxsubr_u */ + 12, /* bxsubi_u */ + 8, /* jmpr */ + 32, /* jmpi */ + 8, /* callr */ + 32, /* calli */ + 0, /* prepare */ + 0, /* pushargr */ + 0, /* pushargi */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr */ + 0, /* reti */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 44, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 16, /* addr_f */ + 40, /* addi_f */ + 24, /* subr_f */ + 40, /* subi_f */ + 40, /* rsbi_f */ + 16, /* mulr_f */ + 40, /* muli_f */ + 16, /* divr_f */ + 40, /* divi_f */ + 12, /* negr_f */ + 12, /* absr_f */ + 12, /* sqrtr_f */ + 24, /* ltr_f */ + 48, /* lti_f */ + 24, /* ler_f */ + 48, /* lei_f */ + 24, /* eqr_f */ + 48, /* eqi_f */ + 24, /* ger_f */ + 48, /* gei_f */ + 24, /* gtr_f */ + 48, /* gti_f */ + 24, /* ner_f */ + 48, /* nei_f */ + 24, /* unltr_f */ + 48, /* unlti_f */ + 24, /* unler_f */ + 48, /* unlei_f */ + 24, /* uneqr_f */ + 48, /* uneqi_f */ + 24, /* unger_f */ + 48, /* ungei_f */ + 24, /* ungtr_f */ + 48, /* ungti_f */ + 24, /* ltgtr_f */ + 48, /* ltgti_f */ + 24, /* ordr_f */ + 48, /* ordi_f */ + 24, /* unordr_f */ + 48, /* unordi_f */ + 16, /* truncr_f_i */ + 16, /* truncr_f_l */ + 20, /* extr_f */ + 12, /* extr_d_f */ + 16, /* movr_f */ + 32, /* movi_f */ + 8, /* ldr_f */ + 32, /* ldi_f */ + 8, /* ldxr_f */ + 28, /* ldxi_f */ + 8, /* str_f */ + 32, /* sti_f */ + 8, /* stxr_f */ + 28, /* stxi_f */ + 20, /* bltr_f */ + 44, /* blti_f */ + 20, /* bler_f */ + 44, /* blei_f */ + 28, /* beqr_f */ + 60, /* beqi_f */ + 20, /* bger_f */ + 44, /* bgei_f */ + 20, /* bgtr_f */ + 44, /* bgti_f */ + 20, /* bner_f */ + 44, /* bnei_f */ + 20, /* bunltr_f */ + 44, /* bunlti_f */ + 20, /* bunler_f */ + 44, /* bunlei_f */ + 20, /* buneqr_f */ + 44, /* buneqi_f */ + 20, /* bunger_f */ + 44, /* bungei_f */ + 20, /* bungtr_f */ + 44, /* bungti_f */ + 20, /* bltgtr_f */ + 44, /* bltgti_f */ + 20, /* bordr_f */ + 44, /* bordi_f */ + 20, /* bunordr_f */ + 44, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 32, /* addi_d */ + 4, /* subr_d */ + 32, /* subi_d */ + 32, /* rsbi_d */ + 4, /* mulr_d */ + 32, /* muli_d */ + 4, /* divr_d */ + 32, /* divi_d */ + 4, /* negr_d */ + 4, /* absr_d */ + 4, /* sqrtr_d */ + 16, /* ltr_d */ + 48, /* lti_d */ + 16, /* ler_d */ + 48, /* lei_d */ + 16, /* eqr_d */ + 48, /* eqi_d */ + 16, /* ger_d */ + 48, /* gei_d */ + 16, /* gtr_d */ + 48, /* gti_d */ + 16, /* ner_d */ + 48, /* nei_d */ + 16, /* unltr_d */ + 48, /* unlti_d */ + 16, /* unler_d */ + 48, /* unlei_d */ + 16, /* uneqr_d */ + 48, /* uneqi_d */ + 16, /* unger_d */ + 48, /* ungei_d */ + 16, /* ungtr_d */ + 48, /* ungti_d */ + 16, /* ltgtr_d */ + 48, /* ltgti_d */ + 16, /* ordr_d */ + 48, /* ordi_d */ + 16, /* unordr_d */ + 48, /* unordi_d */ + 16, /* truncr_d_i */ + 12, /* truncr_d_l */ + 12, /* extr_d */ + 8, /* extr_f_d */ + 4, /* movr_d */ + 32, /* movi_d */ + 4, /* ldr_d */ + 28, /* ldi_d */ + 4, /* ldxr_d */ + 24, /* ldxi_d */ + 4, /* str_d */ + 28, /* sti_d */ + 4, /* stxr_d */ + 24, /* stxi_d */ + 12, /* bltr_d */ + 40, /* blti_d */ + 12, /* bler_d */ + 40, /* blei_d */ + 12, /* beqr_d */ + 40, /* beqi_d */ + 12, /* bger_d */ + 40, /* bgei_d */ + 12, /* bgtr_d */ + 40, /* bgti_d */ + 12, /* bner_d */ + 44, /* bnei_d */ + 12, /* bunltr_d */ + 44, /* bunlti_d */ + 12, /* bunler_d */ + 44, /* bunlei_d */ + 12, /* buneqr_d */ + 44, /* buneqi_d */ + 12, /* bunger_d */ + 44, /* bungei_d */ + 12, /* bungtr_d */ + 44, /* bungti_d */ + 12, /* bltgtr_d */ + 40, /* bltgti_d */ + 12, /* bordr_d */ + 40, /* bordi_d */ + 12, /* bunordr_d */ + 44, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 0, /* movr_w_f */ + 0, /* movr_ww_d */ + 0, /* movr_w_d */ + 0, /* movr_f_w */ + 0, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 0, /* movr_d_w */ + 0, /* movi_d_w */ +#endif /* __WORDSIZE */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 16d2610f8..3ac80d489 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -18,7 +18,13 @@ */ #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) -#define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 5) +#if __WORDSIZE == 32 +# define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 5) +# define BIAS(n) (n) +#else +# define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 16) +# define BIAS(n) ((n) + 2047) +#endif /* * Types @@ -72,6 +78,7 @@ jit_register_t _rvs[] = { { 0x1d, "%i5" }, { rc(sav) | 0x1e, "%fp" }, { 0x1f, "%i7" }, +# if __WORDSIZE == 32 { rc(fpr) | 0x00, "%f0" }, { 0x01, "%f1" }, { rc(fpr) | 0x02, "%f2" }, @@ -79,7 +86,7 @@ jit_register_t _rvs[] = { { rc(fpr) | 0x04, "%f4" }, { 0x05, "%f5" }, { rc(fpr) | 0x06, "%f6" }, - { 0x06, "%f7" }, + { 0x07, "%f7" }, { rc(fpr) | 0x08, "%f8" }, { 0x09, "%f9" }, { rc(fpr) | 0x0a, "%f10" }, @@ -88,6 +95,56 @@ jit_register_t _rvs[] = { { 0x0d, "%f13" }, { rc(fpr) | 0x0e, "%f14" }, { 0x0f, "%f15" }, +# else + { rc(fpr) | rc(dbl) | 0x3e, "%f62" }, + { rc(fpr) | rc(dbl) | 0x3c, "%f60" }, + { rc(fpr) | rc(dbl) | 0x3a, "%f58" }, + { rc(fpr) | rc(dbl) | 0x38, "%f56" }, + { rc(fpr) | rc(dbl) | 0x36, "%f54" }, + { rc(fpr) | rc(dbl) | 0x34, "%f52" }, + { rc(fpr) | rc(dbl) | 0x32, "%f50" }, + { rc(fpr) | rc(dbl) | 0x30, "%f48" }, + { rc(fpr) | rc(dbl) | 0x2e, "%f46" }, + { rc(fpr) | rc(dbl) | 0x2c, "%f44" }, + { rc(fpr) | rc(dbl) | 0x2a, "%f42" }, + { rc(fpr) | rc(dbl) | 0x28, "%f40" }, + { rc(fpr) | rc(dbl) | 0x26, "%f38" }, + { rc(fpr) | rc(dbl) | 0x24, "%f36" }, + { rc(fpr) | rc(dbl) | 0x22, "%f34" }, + { rc(fpr) | rc(dbl) | 0x20, "%f32" }, + { 0x1f, "%f31" }, + { rc(arg)|rc(fpr)|rc(sng)|0x1e, "%f30" }, + { 0x1d, "%f29" }, + { rc(arg)|rc(fpr)|rc(sng)|0x1c, "%f28" }, + { 0x1b, "%f27" }, + { rc(arg)|rc(fpr)|rc(sng)|0x1a, "%f26" }, + { 0x19, "%f25" }, + { rc(arg)|rc(fpr)|rc(sng)|0x18, "%f24" }, + { 0x17, "%f23" }, + { rc(arg)|rc(fpr)|rc(sng)|0x16, "%f22" }, + { 0x15, "%f21" }, + { rc(arg)|rc(fpr)|rc(sng)|0x14, "%f20" }, + { 0x13, "%f19" }, + { rc(arg)|rc(fpr)|rc(sng)|0x12, "%f18" }, + { 0x11, "%f17" }, + { rc(arg)|rc(fpr)|rc(sng)|0x10, "%f16" }, + { 0x0f, "%f15" }, + { rc(arg)|rc(fpr)|rc(sng)|0x0e, "%f14" }, + { 0x0d, "%f13" }, + { rc(arg)|rc(fpr)|rc(sng)|0x0c, "%f12" }, + { 0x0b, "%f11" }, + { rc(arg)|rc(fpr)|rc(sng)|0x0a, "%f10" }, + { 0x09, "%f9" }, + { rc(arg)|rc(fpr)|rc(sng)|0x08, "%f8" }, + { 0x07, "%f7" }, + { rc(arg)|rc(fpr)|rc(sng)|0x06, "%f6" }, + { 0x05, "%f5" }, + { rc(arg)|rc(fpr)|rc(sng)|0x04, "%f4" }, + { 0x03, "%f3" }, + { rc(arg)|rc(fpr)|rc(sng)|0x02, "%f2" }, + { 0x01, "%f1" }, + { rc(arg)|rc(fpr)|rc(sng)|0x00, "%f0" }, +# endif { _NOREG, "" }, }; @@ -103,6 +160,9 @@ void _jit_init(jit_state_t *_jit) { _jitc->reglen = jit_size(_rvs) - 1; +# if __WORDSIZE == 64 + jit_carry = _NOREG; +# endif } void @@ -126,7 +186,12 @@ _jit_prolog(jit_state_t *_jit) _jitc->function->self.argi = _jitc->function->self.argf = _jitc->function->self.aoff = _jitc->function->self.alen = 0; /* float conversion */ +# if __WORDSIZE == 32 _jitc->function->self.aoff = -8; +# else + /* extra slots in case qmul is called */ + _jitc->function->self.aoff = -24; +# endif _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -163,7 +228,7 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); jit_dec_synth(); } - return (_jitc->function->self.aoff); + return (BIAS(_jitc->function->self.aoff)); } void @@ -273,10 +338,17 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { +# if __WORDSIZE == 32 if (u->code == jit_code_arg || u->code == jit_code_arg_f) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_d); return (jit_arg_d_reg_p(u->u.w)); +# else + if (u->code == jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f); + return (jit_arg_d_reg_p(u->u.w)); +# endif } void @@ -315,12 +387,16 @@ _jit_arg(jit_state_t *_jit) if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { - offset = _jitc->function->self.size; +# if __WORDSIZE == 64 + if (jit_arg_d_reg_p(_jitc->function->self.argi)) + ++_jitc->function->self.argi; +# endif + offset = BIAS(_jitc->function->self.size); _jitc->function->self.size += sizeof(jit_word_t); } node = jit_new_node_ww(jit_code_arg, offset, ++_jitc->function->self.argn); - jit_link_prepare(); + jit_link_prolog(); return (node); } @@ -329,16 +405,29 @@ _jit_arg_f(jit_state_t *_jit) { jit_node_t *node; jit_int32_t offset; +# if __WORDSIZE == 64 + jit_bool_t inc; +# endif assert(_jitc->function); +# if __WORDSIZE == 32 if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; - _jitc->function->self.size += sizeof(jit_float32_t); + _jitc->function->self.size += sizeof(jit_word_t); } +# else + inc = !jit_arg_reg_p(_jitc->function->self.argi); + if (jit_arg_d_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else + offset = BIAS(_jitc->function->self.size); + if (inc) + _jitc->function->self.size += sizeof(jit_word_t); +# endif node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); - jit_link_prepare(); + jit_link_prolog(); return (node); } @@ -347,7 +436,11 @@ _jit_arg_d(jit_state_t *_jit) { jit_node_t *node; jit_int32_t offset; +# if __WORDSIZE == 64 + jit_bool_t inc; +# endif assert(_jitc->function); +# if __WORDSIZE == 32 if (jit_arg_d_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi; _jitc->function->self.argi += 2; @@ -360,9 +453,18 @@ _jit_arg_d(jit_state_t *_jit) offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_float64_t); } +# else + inc = !jit_arg_reg_p(_jitc->function->self.argi); + if (jit_arg_d_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else + offset = BIAS(_jitc->function->self.size); + if (inc) + _jitc->function->self.size += sizeof(jit_word_t); +# endif node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); - jit_link_prepare(); + jit_link_prolog(); return (node); } @@ -420,15 +522,48 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) { +# if __WORDSIZE == 64 + jit_extr_i(u, _I0 + v->u.w); +# else + jit_movr(u, _I0 + v->u.w); +# endif + } + else + jit_ldxi_i(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t)); + jit_dec_synth(); +} + +# if __WORDSIZE == 64 +void +_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_ui(u, _I0 + v->u.w); + else + jit_ldxi_ui(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t)); + jit_dec_synth(); +} + +void +_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { assert(v->code == jit_code_arg); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _I0 + v->u.w); else - jit_ldxi_i(u, JIT_FP, v->u.w); + jit_ldxi_l(u, JIT_FP, v->u.w); jit_dec_synth(); } +# endif void _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) @@ -465,12 +600,20 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) assert(v->code == jit_code_arg_f); assert(_jitc->function); jit_inc_synth_wp(getarg_f, u, v); +# if __WORDSIZE == 32 if (jit_arg_reg_p(v->u.w)) { - jit_stxi(-4, JIT_FP, _I0 + v->u.w); + jit_stxi_i(-4, JIT_FP, _I0 + v->u.w); jit_ldxi_f(u, JIT_FP, -4); } +# else + if (jit_arg_d_reg_p(v->u.w)) { + jit_live(_F0 - (v->u.w << 1)); /* pair of registers is live */ + jit_movr_f(u, (_F0 - (v->u.w << 1)) - 1); + } +# endif else - jit_ldxi_f(u, JIT_FP, v->u.w); + jit_ldxi_f(u, JIT_FP, v->u.w + (__WORDSIZE >> 3) - + sizeof(jit_float32_t)); jit_dec_synth(); } @@ -479,12 +622,20 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { assert(v->code == jit_code_arg_f); jit_inc_synth_wp(putargr_f, u, v); +# if __WORDSIZE == 32 if (jit_arg_reg_p(v->u.w)) { jit_stxi_f(-4, JIT_FP, u); - jit_ldxi(_I0 + v->u.w, JIT_FP, -4); + jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4); } +# else + if (jit_arg_d_reg_p(v->u.w)) { + jit_live(_F0 - (v->u.w << 1)); /* pair of registers is live */ + jit_movr_f((_F0 - (v->u.w << 1)) - 1, u); + } +# endif else - jit_stxi_f(v->u.w, JIT_FP, u); + jit_stxi_f(v->u.w + (__WORDSIZE >> 3) - + sizeof(jit_float32_t), JIT_FP, u); jit_dec_synth(); } @@ -494,15 +645,29 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) jit_int32_t regno; assert(v->code == jit_code_arg_f); jit_inc_synth_fp(putargi_f, u, v); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); if (jit_arg_reg_p(v->u.w)) { jit_stxi_f(-4, JIT_FP, regno); - jit_ldxi(_I0 + v->u.w, JIT_FP, -4); + jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4); } else jit_stxi_f(v->u.w, JIT_FP, regno); jit_unget_reg(regno); +# else + if (jit_arg_d_reg_p(v->u.w)) { + jit_live(_F0 - (v->u.w << 1)); /* pair of registers is live */ + jit_movi_f((_F0 - (v->u.w << 1)) - 1, u); + } + else { + regno = jit_get_reg(jit_class_fpr | jit_class_sng); + jit_movi_f(regno, u); + jit_stxi_f(v->u.w + (__WORDSIZE >> 3) - + sizeof(jit_float32_t), JIT_FP, regno); + jit_unget_reg(regno); + } +# endif jit_dec_synth(); } @@ -513,18 +678,28 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) assert(_jitc->function); jit_inc_synth_wp(getarg_d, u, v); if (jit_arg_d_reg_p(v->u.w)) { +# if __WORDSIZE == 32 jit_stxi(-8, JIT_FP, _I0 + v->u.w); jit_stxi(-4, JIT_FP, _I0 + v->u.w + 1); jit_ldxi_d(u, JIT_FP, -8); +# else + jit_movr_d(u, _F0 - (v->u.w << 1)); +# endif } +# if __WORDSIZE == 32 else if (jit_arg_reg_p(v->u.w)) { jit_stxi(-8, JIT_FP, _I0 + v->u.w); jit_ldxi_f(u, JIT_FP, -8); jit_ldxi_f(u + 1, JIT_FP, stack_framesize); } +# endif else { +# if __WORDSIZE == 32 jit_ldxi_f(u, JIT_FP, v->u.w); jit_ldxi_f(u + 1, JIT_FP, v->u.w + 4); +# else + jit_ldxi_d(u, JIT_FP, v->u.w); +# endif } jit_dec_synth(); } @@ -532,9 +707,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - jit_int32_t regno; + jit_int32_t regno; assert(v->code == jit_code_arg_d); jit_inc_synth_wp(putargr_d, u, v); +# if __WORDSIZE == 32 if (jit_arg_d_reg_p(v->u.w)) { jit_stxi_d(-8, JIT_FP, u); jit_ldxi(_I0 + v->u.w, JIT_FP, -8); @@ -559,15 +735,25 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_stxi(v->u.w + 4, JIT_FP, regno); jit_unget_reg(regno); } +# else + if (jit_arg_d_reg_p(v->u.w)) + jit_movr_d(_F0 - (v->u.w << 1), u); + else + jit_stxi_d(v->u.w, JIT_FP, u); +# endif jit_dec_synth(); } void _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) { - jit_int32_t regno, gpr; +# if __WORDSIZE == 32 + jit_int32_t gpr; +# endif + jit_int32_t regno; assert(v->code == jit_code_arg_d); jit_inc_synth_dp(putargi_d, u, v); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); if (jit_arg_d_reg_p(v->u.w)) { @@ -595,6 +781,16 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) jit_unget_reg(gpr); } jit_unget_reg(regno); +# else + if (jit_arg_d_reg_p(v->u.w)) + jit_movi_d(_F0 - (v->u.w << 1), u); + else { + regno = jit_get_reg(jit_class_fpr | jit_class_dbl); + jit_movi_d(regno, u); + jit_stxi_d(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } +# endif jit_dec_synth(); } @@ -608,7 +804,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argi; } else { - jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, u); +# if __WORDSIZE == 64 + if (jit_arg_d_reg_p(_jitc->function->call.argi)) + ++_jitc->function->call.argi; +# endif + jit_stxi(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); } jit_dec_synth(); @@ -625,9 +826,14 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) ++_jitc->function->call.argi; } else { +# if __WORDSIZE == 64 + if (jit_arg_d_reg_p(_jitc->function->call.argi)) + ++_jitc->function->call.argi; +# endif regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, regno); + jit_stxi(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); } @@ -639,15 +845,39 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { jit_inc_synth_w(pushargr_f, u); jit_link_prepare(); +# if __WORDSIZE == 32 if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_stxi_f(-4, JIT_FP, u); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -4); + jit_stxi_f(-8, JIT_FP, u); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); ++_jitc->function->call.argi; } else { - jit_stxi_f(_jitc->function->call.size + stack_framesize, JIT_SP, u); + jit_stxi_f(_jitc->function->call.size + stack_framesize, + JIT_SP, u); _jitc->function->call.size += sizeof(jit_float32_t); } +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + jit_stxi_f(BIAS(-8), JIT_FP, u); + jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + /* pair of registers is live */ + jit_live(_F0 - (_jitc->function->call.argi << 1)); + jit_movr_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4), + JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -657,18 +887,48 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_int32_t regno; jit_inc_synth_f(pushargi_f, u); jit_link_prepare(); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_stxi_f(-4, JIT_FP, regno); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -4); - ++_jitc->function->call.argi; + jit_stxi_f(-8, JIT_FP, regno); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); + _jitc->function->call.argi++; } else { - jit_stxi_f(_jitc->function->call.size + stack_framesize, JIT_SP, regno); + jit_stxi_f(_jitc->function->call.size + stack_framesize, + JIT_SP, regno); _jitc->function->call.size += sizeof(jit_float32_t); } jit_unget_reg(regno); +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + regno = jit_get_reg(jit_class_fpr | jit_class_sng); + jit_movi_f(regno, u); + jit_stxi_f(BIAS(-8), JIT_FP, regno); + jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + jit_unget_reg(regno); + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + /* pair of registers is live */ + jit_live(_F0 - (_jitc->function->call.argi << 1)); + jit_movi_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr | jit_class_sng); + jit_movi_f(regno, u); + jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4), + JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -677,9 +937,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { jit_inc_synth_w(pushargr_d, u); jit_link_prepare(); +# if __WORDSIZE == 32 if (jit_arg_d_reg_p(_jitc->function->call.argi)) { - jit_stxi_d(-8, JIT_FP, u); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); + jit_stxi_d(BIAS(-8), JIT_FP, u); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4); _jitc->function->call.argi += 2; } @@ -697,6 +958,26 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) JIT_SP, u + 1); _jitc->function->call.size += sizeof(jit_float64_t); } +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + jit_stxi_d(BIAS(-8), JIT_FP, u); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + jit_movr_d(_F0 - (_jitc->function->call.argi << 1), u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -706,11 +987,12 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_int32_t regno; jit_inc_synth_d(pushargi_d, u); jit_link_prepare(); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); if (jit_arg_d_reg_p(_jitc->function->call.argi)) { - jit_stxi_d(-8, JIT_FP, regno); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); + jit_stxi_d(BIAS(-8), JIT_FP, regno); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4); _jitc->function->call.argi += 2; } @@ -718,7 +1000,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_f(-8, JIT_FP, regno); jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); ++_jitc->function->call.argi; - jit_stxi_f(stack_framesize, JIT_SP, regno + 1); + jit_stxi_f(stack_framesize, JIT_SP, u + 1); _jitc->function->call.size += sizeof(jit_float32_t); } else { @@ -729,6 +1011,32 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) _jitc->function->call.size += sizeof(jit_float64_t); } jit_unget_reg(regno); +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + regno = jit_get_reg(jit_class_fpr | jit_class_dbl); + jit_movi_d(regno, u); + jit_stxi_d(BIAS(-8), JIT_FP, regno); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + jit_unget_reg(regno); + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + jit_movi_d(_F0 - (_jitc->function->call.argi << 1), u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr | jit_class_dbl); + jit_movi_d(regno, u); + jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -740,10 +1048,18 @@ _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) spec = jit_class(_rvs[regno].spec); if ((spec & (jit_class_arg|jit_class_gpr)) == (jit_class_arg|jit_class_gpr)) { - regno = _O0 - regno; + regno -= _O0; if (regno >= 0 && regno < node->v.w) return (1); } +# if __WORDSIZE == 64 + if ((spec & (jit_class_arg|jit_class_fpr)) == + (jit_class_arg|jit_class_fpr)) { + regno = _F0 - (regno >> 1); + if (regno >= 0 && regno < node->v.w) + return (1); + } +# endif return (0); } @@ -817,12 +1133,36 @@ _jit_retval_us(jit_state_t *_jit, jit_int32_t r0) void _jit_retval_i(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_i, r0); +# if __WORDSIZE == 32 + if (r0 != _O0) + jit_movr(r0, _O0); +# else + jit_extr_i(r0, _O0); +# endif + jit_dec_synth(); +} + +# if __WORDSIZE == 64 +void +_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_i, r0); + if (r0 != _O0) + jit_extr_ui(r0, _O0); + jit_dec_synth(); +} + +void +_jit_retval_l(jit_state_t *_jit, jit_int32_t r0) { jit_inc_synth_w(retval_i, r0); if (r0 != _O0) jit_movr(r0, _O0); jit_dec_synth(); } +# endif void _jit_retval_f(jit_state_t *_jit, jit_int32_t r0) @@ -1043,6 +1383,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(trunc, _f_i); case_rr(trunc, _d_i); +#if __WORDSIZE == 64 + case_rr(trunc, _f_l); + case_rr(trunc, _d_l); +#endif case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); @@ -1073,6 +1417,12 @@ _emit_code(jit_state_t *_jit) case_rw(ld, _us); case_rr(ld, _i); case_rw(ld, _i); +#if __WORDSIZE == 64 + case_rr(ld, _ui); + case_rw(ld, _ui); + case_rr(ld, _l); + case_rw(ld, _l); +#endif case_rrr(ldx, _c); case_rrw(ldx, _c); case_rrr(ldx, _uc); @@ -1083,24 +1433,45 @@ _emit_code(jit_state_t *_jit) case_rrw(ldx, _us); case_rrr(ldx, _i); case_rrw(ldx, _i); +#if __WORDSIZE == 64 + case_rrr(ldx, _ui); + case_rrw(ldx, _ui); + case_rrr(ldx, _l); + case_rrw(ldx, _l); +#endif case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); case_wr(st, _s); case_rr(st, _i); case_wr(st, _i); +#if __WORDSIZE == 64 + case_rr(st, _l); + case_wr(st, _l); +#endif case_rrr(stx, _c); case_wrr(stx, _c); case_rrr(stx, _s); case_wrr(stx, _s); case_rrr(stx, _i); case_wrr(stx, _i); +#if __WORDSIZE == 64 + case_rrr(stx, _l); + case_wrr(stx, _l); +#endif case_rr(hton, _us); case_rr(hton, _ui); +#if __WORDSIZE == 64 + case_rr(hton, _ul); +#endif case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); +#if __WORDSIZE == 64 + case_rr(ext, _i); + case_rr(ext, _ui); +#endif case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1418,6 +1789,9 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_c: case jit_code_getarg_uc: case jit_code_getarg_s: case jit_code_getarg_us: case jit_code_getarg_i: +#if __WORDSIZE == 64 + case jit_code_getarg_ui: case jit_code_getarg_l: +#endif case jit_code_getarg_f: case jit_code_getarg_d: case jit_code_putargr: case jit_code_putargi: case jit_code_putargr_f: case jit_code_putargi_f: @@ -1428,6 +1802,9 @@ _emit_code(jit_state_t *_jit) case jit_code_retval_c: case jit_code_retval_uc: case jit_code_retval_s: case jit_code_retval_us: case jit_code_retval_i: +#if __WORDSIZE == 64 + case jit_code_retval_ui: case jit_code_retval_l: +#endif case jit_code_retval_f: case jit_code_retval_d: case jit_code_prepare: case jit_code_finishr: case jit_code_finishi: @@ -1435,9 +1812,34 @@ _emit_code(jit_state_t *_jit) default: abort(); } +# if __WORDSIZE == 64 + if (jit_carry != _NOREG) { + switch (node->code) { + case jit_code_note: + case jit_code_addcr: case jit_code_addci: + case jit_code_addxr: case jit_code_addxi: + case jit_code_subcr: case jit_code_subci: + case jit_code_subxr: case jit_code_subxi: + break; + default: + jit_unget_reg(jit_carry); + jit_carry = _NOREG; + break; + } + } +# endif jit_regarg_clr(node, value); +# if __WORDSIZE == 64 + if (jit_carry == _NOREG) + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + else { + assert(jit_regset_scan1(&_jitc->regarg, 0) == jit_carry); + assert(jit_regset_scan1(&_jitc->regarg, jit_carry + 1) == ULONG_MAX); + } + assert(_jitc->synth == 0); +# else assert(_jitc->regarg == 0 && _jitc->synth == 0); - /* update register live state */ +# endif jit_reglive(node); } #undef case_brf @@ -1478,13 +1880,13 @@ jit_flush(void *fptr, void *tptr) void _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0) { - ldxi_i(rn(r0), rn(r1), i0); + ldxi(rn(r0), rn(r1), i0); } void _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1) { - stxi_i(i0, rn(r0), rn(r1)); + stxi(i0, rn(r0), rn(r1)); } void diff --git a/lib/lightning.c b/lib/lightning.c index 31205034b..bce5968c3 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -497,6 +497,120 @@ jit_regset_scan1(jit_regset_t *set, jit_int32_t offset) } return (ULONG_MAX); } + +#elif __sparc__ && __WORDSIZE == 64 +void +jit_regset_com(jit_regset_t *u, jit_regset_t *v) +{ + u->rl = ~v->rl; u->rh = ~v->rh; +} + +void +jit_regset_and(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w) +{ + u->rl = v->rl & w->rl; u->rh = v->rh & w->rh; +} + +void +jit_regset_ior(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w) +{ + u->rl = v->rl | w->rl; u->rh = v->rh | w->rh; +} + +void +jit_regset_xor(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w) +{ + u->rl = v->rl ^ w->rl; u->rh = v->rh ^ w->rh; +} + +void +jit_regset_set(jit_regset_t *u, jit_regset_t *v) +{ + u->rl = v->rl; u->rh = v->rh; +} + +void +jit_regset_set_mask(jit_regset_t *u, jit_int32_t v) +{ + jit_bool_t w = !!(v & (v - 1)); + + assert(v >= 0 && v <= 128); + if (v == 0) + u->rl = u->rh = -1LL; + else if (v <= 64) { + u->rl = w ? (1LL << v) - 1 : -1LL; + u->rh = 0; + } + else { + u->rl = -1LL; + u->rh = w ? (1LL << (v - 64)) - 1 : -1LL; + } +} + +jit_bool_t +jit_regset_cmp_ui(jit_regset_t *u, jit_word_t v) +{ + return !((u->rl == v && u->rh == 0)); +} + +void +jit_regset_set_ui(jit_regset_t *u, jit_word_t v) +{ + u->rl = v; + u->rh = 0; +} + +jit_bool_t +jit_regset_set_p(jit_regset_t *u) +{ + return (u->rl || u->rh); +} + +void +jit_regset_clrbit(jit_regset_t *set, jit_int32_t bit) +{ + assert(bit >= 0 && bit <= 128); + if (bit < 64) + set->rl &= ~(1LL << bit); + else + set->rh &= ~(1LL << (bit - 64)); +} + +void +jit_regset_setbit(jit_regset_t *set, jit_int32_t bit) +{ + assert(bit >= 0 && bit <= 127); + if (bit < 64) + set->rl |= 1LL << bit; + else + set->rh |= 1LL << (bit - 64); +} + +jit_bool_t +jit_regset_tstbit(jit_regset_t *set, jit_int32_t bit) +{ + assert(bit >= 0 && bit <= 127); + if (bit < 64) + return (!!(set->rl & (1LL << bit))); + else + return (!!(set->rh & (1LL << (bit - 64)))); +} + +unsigned long +jit_regset_scan1(jit_regset_t *set, jit_int32_t offset) +{ + assert(offset >= 0 && offset <= 127); + for (; offset < 64; offset++) { + if (set->rl & (1LL << offset)) + return (offset); + } + for (; offset < 128; offset++) { + if (set->rh & (1LL << (offset - 64))) + return (offset); + } + return (ULONG_MAX); +} + #else unsigned long jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)