diff --git a/ChangeLog b/ChangeLog index 3ace35a11..19b3335f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2018-04-20 Paulo Andrade + + * include/lightning/jit_private.h: Add new register classes to + flag float registers and double only registers, required for sparc64 + where only low 32 bit fpr registers can be used for single precision + operations. + Add new 128 bit jit_regset_t type for sparc64 register set. + + * include/lightning/jit_sparc.h, lib/jit_sparc-cpu.c, lib/jit_sparc-fpu.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c: Update for 64 bits sparc. + + * lib/lightning.c: Update for new jit_regset_t required for sparc64. + 2018-02-26 Paulo Andrade * check/lightning.c, include/lightning.h: Add the new jit_va_push diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 68b0571a3..f06f1c8a1 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -95,7 +95,14 @@ typedef jit_uint64_t jit_regset_t; # define JIT_SP _SP # define JIT_RET _I0 # define JIT_FRET _F0 +# if __WORDSIZE == 32 typedef jit_uint64_t jit_regset_t; +# else +typedef struct { + jit_uint64_t rl; + jit_uint64_t rh; +} jit_regset_t; +# endif #elif defined(__ia64__) # define JIT_SP _R12 # define JIT_RET _R8 @@ -217,6 +224,10 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_class_sft 0x01000000 /* not a hardware register */ #define jit_class_rg8 0x04000000 /* x86 8 bits */ #define jit_class_xpr 0x80000000 /* float / vector */ +/* Used on sparc64 where %f0-%f31 can be encode for single float + * but %f32 to %f62 only as double precision */ +#define jit_class_sng 0x10000000 /* Single precision float */ +#define jit_class_dbl 0x20000000 /* Only double precision float */ #define jit_regno_patch 0x00008000 /* this is a register * returned by a "user" call * to jit_get_reg() */ @@ -250,7 +261,7 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a2_flt 0x00200000 /* arg2 is immediate float */ #define jit_cc_a2_dbl 0x00400000 /* arg2 is immediate double */ -#if __ia64__ +#if __ia64__ || (__sparc__ && __WORDSIZE == 64) extern void jit_regset_com(jit_regset_t*, jit_regset_t*); @@ -286,10 +297,17 @@ jit_regset_setbit(jit_regset_t*, jit_int32_t); extern jit_bool_t jit_regset_tstbit(jit_regset_t*, jit_int32_t); -# define jit_regset_new(set) \ +# if __sparc__ && __WORDSIZE == 64 +# define jit_regset_new(set) \ + do { (set)->rl = (set)->rh = 0; } while (0) +# define jit_regset_del(set) \ + do { (set)->rl = (set)->rh = 0; } while (0) +# else +# define jit_regset_new(set) \ do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0) -# define jit_regset_del(set) \ +# define jit_regset_del(set) \ do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0) +# endif #else # define jit_regset_com(u, v) (*(u) = ~*(v)) # define jit_regset_and(u, v, w) (*(u) = *(v) & *(w)) @@ -457,7 +475,7 @@ struct jit_compiler { jit_int32_t rout; /* first output register */ jit_int32_t breg; /* base register for prolog/epilog */ #endif -#if __mips__ || __ia64__ || __alpha__ +#if __mips__ || __ia64__ || __alpha__ || (__sparc__ && __WORDSIZE == 64) jit_int32_t carry; #define jit_carry _jitc->carry #endif diff --git a/include/lightning/jit_sparc.h b/include/lightning/jit_sparc.h index a16f140da..f74f5ff30 100644 --- a/include/lightning/jit_sparc.h +++ b/include/lightning/jit_sparc.h @@ -32,8 +32,13 @@ typedef enum { #define jit_r_num() 3 #define jit_v(i) (_L0 + (i)) #define jit_v_num() 8 -#define jit_f(i) (_F0 + ((i) << 1)) -#define jit_f_num() 8 +#if __WORDSIZE == 32 +# define jit_f(i) (_F0 + ((i) << 1)) +# define jit_f_num() 8 +#else +# define jit_f(i) (_F32 - (i)) +# define jit_f_num() 16 +#endif #define JIT_R0 _G2 #define JIT_R1 _G3 #define JIT_R2 _G4 @@ -49,16 +54,47 @@ typedef enum { _O0, _O1, _O2, _O3, _O4, _O5, _SP, _O7, _L0, _L1, _L2, _L3, _L4, _L5, _L6, _L7, _I0, _I1, _I2, _I3, _I4, _I5, _FP, _I7, -#define JIT_F0 _F0 -#define JIT_F1 _F2 -#define JIT_F2 _F4 -#define JIT_F3 _F6 -#define JIT_F4 _F8 -#define JIT_F5 _F10 -#define JIT_F6 _F12 -#define JIT_F7 _F14 +#if __WORDSIZE == 32 +# define JIT_F0 _F0 +# define JIT_F1 _F2 +# define JIT_F2 _F4 +# define JIT_F3 _F6 +# define JIT_F4 _F8 +# define JIT_F5 _F10 +# define JIT_F6 _F12 +# define JIT_F7 _F14 _F0, _F1, _F2, _F3, _F4, _F5, _F6, _F7, _F8, _F9, _F10, _F11, _F12, _F13, _F14, _F15, +#else + /* All single precision operations have a high cost due to being + * stored on registers only encodable as double precision. + * The cost is due to needing to move values to a register with + * value <= 31. + * This is a limitation due to using fixed named registers in + * lightning. */ +# define JIT_F0 _F32 +# define JIT_F1 _F34 +# define JIT_F2 _F36 +# define JIT_F3 _F38 +# define JIT_F4 _F40 +# define JIT_F5 _F42 +# define JIT_F6 _F44 +# define JIT_F7 _F46 +# define JIT_F8 _F48 +# define JIT_F9 _F50 +# define JIT_F10 _F52 +# define JIT_F11 _F54 +# define JIT_F12 _F56 +# define JIT_F13 _F58 +# define JIT_F14 _F60 +# define JIT_F15 _F62 + _F62, _F60, _F58, _F56, _F54, _F52, _F50, _F48, + _F46, _F44, _F42, _F40, _F38, _F36, _F34, _F32, + _F31, _F30, _F29, _F28, _F27, _F26, _F25, _F24, + _F23, _F22, _F21, _F20, _F19, _F18, _F17, _F16, + _F15, _F14, _F13, _F12, _F11, _F10, _F9, _F8, + _F7, _F6, _F5, _F4, _F3, _F2, _F1, _F0, +#endif #define JIT_NOREG _NOREG _NOREG, } jit_reg_t; diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 508137758..a4d88d1ca 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -18,6 +18,11 @@ */ #if PROTO +# define _G2_REGNO 0x02 +# define _G3_REGNO 0x03 +# define _G4_REGNO 0x04 +# define _O0_REGNO 0x08 +# define _O1_REGNO 0x09 # define _SP_REGNO 0x0e # define _FP_REGNO 0x1e # define _O7_REGNO 0x0f @@ -37,12 +42,12 @@ * fp- alloca * sp+ stack arguments * sp+ 6 words to save register arguments - * sp+ 1 word for hidden address of aggregate return value + * sp+ 1 word for hidden address of aggregate return value (32 bits only) * sp+ 16 words for in and local registers * sp ---- * decreasing memory address - next stack frame (not yet allocated) */ -# define stack_framesize ((16 + 1 + 6) * 4) +# define stack_framesize ((16 + (__WORDSIZE == 32) + 6) * sizeof(jit_word_t)) typedef union { struct { jit_uint32_t b: 2; } op; struct { jit_uint32_t _: 2; jit_uint32_t b: 1; } a; @@ -51,21 +56,30 @@ typedef union { struct { jit_uint32_t _: 3; jit_uint32_t b: 4; } cond; struct { jit_uint32_t _: 7; jit_uint32_t b: 3; } op2; struct { jit_uint32_t _: 7; jit_uint32_t b: 6; } op3; + struct { jit_uint32_t _: 10; jit_uint32_t b: 1; } cc1; struct { jit_uint32_t _: 10; jit_uint32_t b: 22; } imm22; struct { jit_uint32_t _: 10; jit_uint32_t b: 22; } disp22; + struct { jit_uint32_t _: 11; jit_uint32_t b: 1; } cc0; + struct { jit_uint32_t _: 12; jit_uint32_t b: 1; } p; + struct { jit_uint32_t _: 13; jit_uint32_t b: 19; } disp19; struct { jit_uint32_t _: 13; jit_uint32_t b: 5; } rs1; struct { jit_uint32_t _: 18; jit_uint32_t b: 1; } i; struct { jit_uint32_t _: 18; jit_uint32_t b: 9; } opf; + struct { jit_uint32_t _: 19; jit_uint32_t b: 1; } x; struct { jit_uint32_t _: 19; jit_uint32_t b: 8; } asi; struct { jit_uint32_t _: 19; jit_uint32_t b: 6; } res; struct { jit_uint32_t _: 19; jit_uint32_t b: 13; } simm13; - struct { jit_uint32_t _: 27; jit_uint32_t b: 5; } rs2; + struct { jit_uint32_t _: 20; jit_uint32_t b: 7; } asix; + struct { jit_uint32_t _: 20; jit_uint32_t b: 6; } asis; + struct { jit_uint32_t _: 26; jit_uint32_t b: 6; } shim; struct { jit_uint32_t _: 25; jit_uint32_t b: 7; } imm7; + struct { jit_uint32_t _: 27; jit_uint32_t b: 5; } rs2; jit_int32_t v; } jit_instr_t; # define ii(i) *_jit->pc.ui++ = i # define s7_p(imm) ((imm) <= 63 && (imm) >= -64) # define s13_p(imm) ((imm) <= 4095 && (imm) >= -4096) +# define s19_p(imm) ((imm) <= 262143 && (imm) >= -262144) # define s22_p(imm) ((imm) <= 2097151 && (imm) >= -20971512) # define s30_p(imm) ((imm) <= 536870911 && (imm) >= -536870912) # define f1(op, disp30) _f1(_jit, op, disp30) @@ -75,12 +89,27 @@ static void _f2r(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f2b(op, a, cond, op2, disp22) _f2b(_jit, op, a, cond, op2, disp22) static void _f2b(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define f2bp(op,a,cond,op2,cc1,cc0,p,disp19) \ + _f2bp(_jit,op,a,cond,op2,cc1,cc0,p,disp19) +static void +_f2bp(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define f3r(op, rd, op3, rs1, rs2) _f3r(_jit, op, rd, op3, rs1, rs2) static void _f3r(jit_state_t*, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define f3rx(op, rd, op3, rs1, rs2) _f3rx(_jit, op, rd, op3, rs1, rs2) +static void _f3rx(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define f3i(op, rd, op3, rs1, simm13) _f3i(_jit, op, rd, op3, rs1, simm13) static void _f3i(jit_state_t*, - jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define f3s(op, rd, op3, rs1, simm13) _f3s(_jit, op, rd, op3, rs1, simm13) +static void _f3s(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define f3t(cond, rs1, i, ri) _f3t(_jit, cond, rs1, i, ri) static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; @@ -96,16 +125,31 @@ static void _f3a(jit_state_t*,jit_int32_t, # define LDUBI(rs1, imm, rd) f3i(3, rd, 1, rs1, imm) # define LDUH(rs1, rs2, rd) f3r(3, rd, 2, rs1, rs2) # define LDUHI(rs1, imm, rd) f3i(3, rd, 2, rs1, imm) -# define LD(rs1, rs2, rd) f3r(3, rd, 0, rs1, rs2) -# define LDI(rs1, imm, rd) f3i(3, rd, 0, rs1, imm) -# define LDD(rs1, rs2, rd) f3r(3, rd, 3, rs1, rs2) -# define LDDI(rs1, imm, rd) f3i(3, rd, 3, rs1, imm) +# if __WORDSIZE == 32 +# define LD(rs1, rs2, rd) f3r(3, rd, 0, rs1, rs2) +# define LDI(rs1, imm, rd) f3i(3, rd, 0, rs1, imm) +# define LDD(rs1, rs2, rd) f3r(3, rd, 3, rs1, rs2) +# define LDDI(rs1, imm, rd) f3i(3, rd, 3, rs1, imm) +# else +# define LDSW(rs1, rs2, rd) f3r(3, rd, 8, rs1, rs2) +# define LDSWI(rs1, imm, rd) f3i(3, rd, 8, rs1, imm) +# define LDUW(rs1, rs2, rd) f3r(3, rd, 0, rs1, rs2) +# define LDUWI(rs1, imm, rd) f3i(3, rd, 0, rs1, imm) +# define LDX(rs1, rs2, rd) f3r(3, rd, 11, rs1, rs2) +# define LDXI(rs1, imm, rd) f3i(3, rd, 11, rs1, imm) +# endif # define LDSBA(rs1, rs2, asi, rd) f3a(3, rd, 25, rs1, asi, rs2) # define LDSHA(rs1, rs2, asi, rd) f3a(3, rd, 26, rs1, asi, rs2) # define LDUBA(rs1, rs2, asi, rd) f3a(3, rd, 17, rs1, asi, rs2) # define LDUHA(rs1, rs2, asi, rd) f3a(3, rd, 18, rs1, asi, rs2) -# define LDA(rs1, rs2, asi, rd) f3a(3, rd, 16, rs1, asi, rs2) -# define LDDA(rs1, rs2, asi, rd) f3a(3, rd, 19, rs1, asi, rs2) +# if __WORDSIZE == 32 +# define LDA(rs1, rs2, asi, rd) f3a(3, rd, 16, rs1, asi, rs2) +# define LDDA(rs1, rs2, asi, rd) f3a(3, rd, 19, rs1, asi, rs2) +# else +# define LDSWA(rs1, rs2, asi, rd) f3a(3, rd, 24, rs1, asi, rs2) +# define LDUWA(rs1, rs2, asi, rd) f3a(3, rd, 16, rs1, asi, rs2) +# define LDXA(rs1, rs2, asi, rd) f3a(3, rd, 27, rs1, asi, rs2) +# endif # define LDC(rs1, rs2, rd) f3r(3, rd, 48, rs1, rs2) # define LDCI(rs1, imm, rd) f3i(3, rd, 48, rs1, imm) # define LDDC(rs1, rs2, rd) f3r(3, rd, 51, rs1, rs2) @@ -116,14 +160,26 @@ static void _f3a(jit_state_t*,jit_int32_t, # define STBI(rd, rs1, imm) f3i(3, rd, 5, rs1, imm) # define STH(rd, rs1, rs2) f3r(3, rd, 6, rs1, rs2) # define STHI(rd, rs1, imm) f3i(3, rd, 6, rs1, imm) -# define ST(rd, rs1, rs2) f3r(3, rd, 4, rs1, rs2) -# define STI(rd, rs1, imm) f3i(3, rd, 4, rs1, imm) -# define STD(rrd, s1, rs2) f3r(3, rd, 7, rs1, rs2) -# define STDI(rd, rs1, imm) f3i(3, rd, 7, rs1, imm) +# if __WORDSIZE == 32 +# define ST(rd, rs1, rs2) f3r(3, rd, 4, rs1, rs2) +# define STI(rd, rs1, imm) f3i(3, rd, 4, rs1, imm) +# define STD(rrd, s1, rs2) f3r(3, rd, 7, rs1, rs2) +# define STDI(rd, rs1, imm) f3i(3, rd, 7, rs1, imm) +# else +# define STW(rd, rs1, rs2) f3r(3, rd, 4, rs1, rs2) +# define STWI(rd, rs1, imm) f3i(3, rd, 4, rs1, imm) +# define STX(rd, rs1, rs2) f3r(3, rd, 14, rs1, rs2) +# define STXI(rd, rs1, imm) f3i(3, rd, 14, rs1, imm) +# endif # define STBA(rd, rs1, rs2) f3a(3, rd, 21, rs1, asi, rs2) # define STHA(rd, rs1, rs2) f3a(3, rd, 22, rs1, asi, rs2) -# define STA(rd, rs1, rs2) f3a(3, rd, 20, rs1, asi, rs2) -# define STDA(rd, rs1, rs2) f3a(3, rd, 23, rs1, asi, rs2) +# if __WORDSIZE == 32 +# define STA(rd, rs1, rs2) f3a(3, rd, 20, rs1, asi, rs2) +# define STDA(rd, rs1, rs2) f3a(3, rd, 23, rs1, asi, rs2) +# else +# define STWA(rd, rs1, rs2) f3a(3, rd, 20, rs1, asi, rs2) +# define STXA(rd, rs1, rs2) f3a(3, rd, 30, rs1, asi, rs2) +# endif # define STC(rd, rs1, rs2) f3r(3, rd, 52, rs1, rs2) # define STCI(rd, rs1, imm) f3i(3, rd, 52, rs1, imm) # define STDC(rd, rs1, rs2) f3r(3, rd, 55, rs1, rs2) @@ -174,6 +230,14 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SRLI(rs1, imm, rd) f3i(2, rd, 38, rs1, imm) # define SRA(rs1, rs2, rd) f3r(2, rd, 39, rs1, rs2) # define SRAI(rs1, imm, rd) f3i(2, rd, 39, rs1, imm) +# if __WORDSIZE == 64 +# define SLLX(rs1, rs2, rd) f3rx(2, rd, 37, rs1, rs2) +# define SLLXI(rs1, imm, rd) f3s(2, rd, 37, rs1, imm) +# define SRLX(rs1, rs2, rd) f3rx(2, rd, 38, rs1, rs2) +# define SRLXI(rs1, imm, rd) f3s(2, rd, 38, rs1, imm) +# define SRAX(rs1, rs2, rd) f3rx(2, rd, 39, rs1, rs2) +# define SRAXI(rs1, imm, rd) f3s(2, rd, 39, rs1, imm) +# endif # define ADD(rs1, rs2, rd) f3r(2, rd, 0, rs1, rs2) # define ADDI(rs1, imm, rd) f3i(2, rd, 0, rs1, imm) # define ADDcc(rs1, rs2, rd) f3r(2, rd, 16, rs1, rs2) @@ -219,6 +283,14 @@ static void _f3a(jit_state_t*,jit_int32_t, # define UDIVIcc(rs1, imm, rd) f3i(2, rd, 30, rs1, imm) # define SDIVcc(rs1, rs2, rd) f3r(2, rd, 31, rs1, rs2) # define SDIVIcc(rs1, imm, rd) f3i(2, rd, 31, rs1, imm) +# if __WORDSIZE == 64 +# define MULX(rs1, rs2, rd) f3r(2, rd, 9, rs1, rs2) +# define MULXI(rs1, imm, rd) f3i(2, rd, 9, rs1, imm) +# define SDIVX(rs1, rs2, rd) f3r(2, rd, 45, rs1, rs2) +# define SDIVXI(rs1, imm, rd) f3i(2, rd, 45, rs1, imm) +# define UDIVX(rs1, rs2, rd) f3r(2, rd, 13, rs1, rs2) +# define UDIVXI(rs1, imm, rd) f3i(2, rd, 13, rs1, imm) +# endif # define SAVE(rs1, rs2, rd) f3r(2, rd, 60, rs1, rs2) # define SAVEI(rs1, imm, rd) f3i(2, rd, 60, rs1, imm) # define RESTORE(rs1, rs2, rd) f3r(2, rd, 61, rs1, rs2) @@ -243,6 +315,25 @@ static void _f3a(jit_state_t*,jit_int32_t, # define SPARC_BNEG 6 /* negative - N */ # define SPARC_BVC 15 /* overflow clear - not V */ # define SPARC_BVS 7 /* overflow set - V */ +/* Preferred BPcc integer branch opcodes */ +# if __WORDSIZE == 64 +# define SPARC_BPA 8 /* always - 1 */ +# define SPARC_BPN 0 /* never - 0 */ +# define SPARC_BPNE 9 /* not equal - not Z */ +# define SPARC_BPE 1 /* equal - Z */ +# define SPARC_BPG 10 /* greater - not (Z or (N xor V)) */ +# define SPARC_BPLE 2 /* less or equal - Z or (N xor V) */ +# define SPARC_BPGE 11 /* greater or equal - not (N xor V) */ +# define SPARC_BPL 3 /* less - N xor V */ +# define SPARC_BPGU 12 /* greater unsigned - not (C or V) */ +# define SPARC_BPLEU 4 /* less or equal unsigned - C or Z */ +# define SPARC_BPCC 13 /* carry clear (greater than or equal, unsigned) - not C */ +# define SPARC_BPCS 5 /* carry set (less than, unsigned) - C */ +# define SPARC_BPPOS 14 /* positive - not N */ +# define SPARC_BPNEG 6 /* negative - N */ +# define SPARC_BPVC 15 /* overflow clear - not V */ +# define SPARC_BPVS 7 /* overflow set - V */ +# endif # define B(cc, imm) f2b(0, 0, cc, 2, imm) # define Ba(cc, imm) f2b(0, 1, cc, 2, imm) # define BA(imm) B(SPARC_BA, imm) @@ -285,6 +376,28 @@ static void _f3a(jit_state_t*,jit_int32_t, # define BVCa(imm) Ba(SPARC_BVC, imm) # define BVS(imm) B(SPARC_BVS, imm) # define BVSa(imm) Ba(SPARC_BVS, imm) +# if __WORDSIZE == 64 +# define BPccap(cc,a,cc1, cc2,p,imm) f2bp(0, a, cc, 1, cc1, cc0, p, imm) +# define BPap(cc, imm) f2bp(0, 1, cc, 1, 1, 0, p, imm) +# define BPa(cc, imm) f2bp(0, 1, cc, 1, 1, 0, 1, imm) +# define BP(cc, imm) f2bp(0, 0, cc, 1, 1, 0, 1, imm) +# define BPA(imm) BP(SPARC_BPA, imm) +# define BPN(imm) BP(SPARC_BPN, imm) +# define BNPE(imm) BP(SPARC_BPNE, imm) +# define BPE(imm) BP(SPARC_BPE, imm) +# define BPG(imm) BP(SPARC_BPG, imm) +# define BPLE(imm) BP(SPARC_BPLE, imm) +# define BPGE(imm) BP(SPARC_BPGE, imm) +# define BPL(imm) BP(SPARC_BPL, imm) +# define BPGU(imm) BP(SPARC_BPGU, imm) +# define BPLEU(imm) BP(SPARC_BPLEU, imm) +# define BPCC(imm) BP(SPARC_BPCC, imm) +# define BPCS(imm) BP(SPARC_BPCS, imm) +# define BPPOS(imm) BP(SPARC_BPPOS, imm) +# define BPNEG(imm) BP(SPARC_BPNEG, imm) +# define BPVC(imm) BP(SPARC_BPVC, imm) +# define BPVS(imm) BP(SPARC_BPVS, imm) +# endif # define SPARC_CBA 8 /* always */ # define SPARC_CBN 0 /* never */ # define SPARC_CB3 7 /* 3 */ @@ -437,29 +550,54 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); # define addr(r0, r1, r2) ADD(r1, r2, r0) # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define addcr(r0, r1, r2) ADDcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define addcr(r0, r1, r2) ADDcc(r1, r2, r0) +# else +# define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2) +static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define addci(r0, r1, i0) _addci(_jit, r0, r1, i0) static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define addxr(r0, r1, r2) ADDXcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define addxr(r0, r1, r2) ADDXcc(r1, r2, r0) +# else +# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2) +static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0) static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define subr(r0, r1, r2) SUB(r1, r2, r0) # define subi(r0, r1, i0) _subi(_jit, r0, r1, i0) static void _subi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define subcr(r0, r1, r2) SUBcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define subcr(r0, r1, r2) SUBcc(r1, r2, r0) +# else +# define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2) +static void _subcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define subci(r0, r1, i0) _subci(_jit, r0, r1, i0) static void _subci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define subxr(r0, r1, r2) SUBXcc(r1, r2, r0) +# if __WORDSIZE == 32 +# define subxr(r0, r1, r2) SUBXcc(r1, r2, r0) +# else +# define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2) +static void _subxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# endif # define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) static void _subxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) static void _rsbi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define mulr(r0, r1, r2) UMUL(r1, r2, r0) +# if __WORDSIZE == 32 +# define mulr(r0, r1, r2) UMUL(r1, r2, r0) +# else +# define mulr(r0, r1, r2) MULX(r1, r2, r0) +# endif # define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) static void _muli(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) -# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) -# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) +# if __WORDSIZE == 32 +# define qmulr(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,1) +# define qmulr_u(r0,r1,r2,r3) iqmulr(r0,r1,r2,r3,0) +# define iqmulr(r0,r1,r2,r3,cc) _iqmulr(_jit,r0,r1,r2,r3,cc) static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t,jit_bool_t); # define qmuli(r0,r1,r2,i0) iqmuli(r0,r1,r2,i0,1) @@ -467,6 +605,20 @@ static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t, # define iqmuli(r0,r1,r2,i0,cc) _iqmuli(_jit,r0,r1,r2,i0,cc) static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t, jit_int32_t,jit_word_t,jit_bool_t); +# else +# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) +static void _qmulr(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t); +# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0) +static void _qmuli(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t); +# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3) +static void _qmulr_u(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t); +# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0) +static void _qmuli_u(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_word_t); +# endif # define divr(r0, r1, r2) _divr(_jit, r0, r1, r2) static void _divr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define divi(r0, r1, i0) _divi(_jit, r0, r1, i0) @@ -502,14 +654,22 @@ static void _ori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); # define xorr(r0, r1, r2) XOR(r1, r2, r0) # define xori(r0, r1, i0) _xori(_jit, r0, r1, i0) static void _xori(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define lshr(r0, r1, r2) SLL(r1, r2, r0) -# define lshi(r0, r1, i0) SLLI(r1, i0, r0) -# define rshr(r0, r1, r2) SRA(r1, r2, r0) -# define rshi(r0, r1, i0) SRAI(r1, i0, r0) -# define rshr_u(r0, r1, r2) SRL(r1, r2, r0) -# define rshi_u(r0, r1, i0) SRLI(r1, i0, r0) +# if __WORDSIZE == 32 +# define lshr(r0, r1, r2) SLL(r1, r2, r0) +# define lshi(r0, r1, i0) SLLI(r1, i0, r0) +# define rshr(r0, r1, r2) SRA(r1, r2, r0) +# define rshi(r0, r1, i0) SRAI(r1, i0, r0) +# define rshr_u(r0, r1, r2) SRL(r1, r2, r0) +# define rshi_u(r0, r1, i0) SRLI(r1, i0, r0) +# else +# define lshr(r0, r1, r2) SLLX(r1, r2, r0) +# define lshi(r0, r1, i0) SLLXI(r1, i0, r0) +# define rshr(r0, r1, r2) SRAX(r1, r2, r0) +# define rshi(r0, r1, i0) SRAXI(r1, i0, r0) +# define rshr_u(r0, r1, r2) SRLX(r1, r2, r0) +# define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0) +# endif # define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) movr(r0,r1) # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_uc(r0,r1) andi(r0, r1, 0xff) @@ -517,30 +677,63 @@ static void _extr_c(jit_state_t*,jit_int32_t,jit_int32_t); static void _extr_s(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) _extr_us(_jit,r0,r1) static void _extr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 32 +# define htonr_ui(r0,r1) movr(r0,r1) +# else +# define htonr_ui(r0,r1) extr_ui(r0,r1) +# define htonr_ul(r0,r1) movr(r0,r1) +# define extr_i(r0,r1) _extr_i(_jit,r0,r1) +static void _extr_i(jit_state_t*,jit_int32_t,jit_int32_t); +# define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) +static void _extr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define cr(cc, r0, r1, r2) _cr(_jit, cc, r0, r1, r2) static void _cr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); # define cw(cc, r0, r1, i0) _cw(_jit, cc, r0, r1, i0) static void _cw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); -# define ltr(r0, r1, r2) cr(SPARC_BL, r0, r1, r2) -# define lti(r0, r1, i0) cw(SPARC_BL, r0, r1, i0) -# define ltr_u(r0, r1, r2) cr(SPARC_BLU, r0, r1, r2) -# define lti_u(r0, r1, i0) cw(SPARC_BLU, r0, r1, i0) -# define ler(r0, r1, r2) cr(SPARC_BLE, r0, r1, r2) -# define lei(r0, r1, i0) cw(SPARC_BLE, r0, r1, i0) -# define ler_u(r0, r1, r2) cr(SPARC_BLEU, r0, r1, r2) -# define lei_u(r0, r1, i0) cw(SPARC_BLEU, r0, r1, i0) -# define eqr(r0, r1, r2) cr(SPARC_BE, r0, r1, r2) -# define eqi(r0, r1, i0) cw(SPARC_BE, r0, r1, i0) -# define ger(r0, r1, r2) cr(SPARC_BGE, r0, r1, r2) -# define gei(r0, r1, i0) cw(SPARC_BGE, r0, r1, i0) -# define ger_u(r0, r1, r2) cr(SPARC_BGEU, r0, r1, r2) -# define gei_u(r0, r1, i0) cw(SPARC_BGEU, r0, r1, i0) -# define gtr(r0, r1, r2) cr(SPARC_BG, r0, r1, r2) -# define gti(r0, r1, i0) cw(SPARC_BG, r0, r1, i0) -# define gtr_u(r0, r1, r2) cr(SPARC_BGU, r0, r1, r2) -# define gti_u(r0, r1, i0) cw(SPARC_BGU, r0, r1, i0) -# define ner(r0, r1, r2) cr(SPARC_BNE, r0, r1, r2) -# define nei(r0, r1, i0) cw(SPARC_BNE, r0, r1, i0) +# if __WORDSIZE == 32 +# define ltr(r0, r1, r2) cr(SPARC_BL, r0, r1, r2) +# define lti(r0, r1, i0) cw(SPARC_BL, r0, r1, i0) +# define ltr_u(r0, r1, r2) cr(SPARC_BLU, r0, r1, r2) +# define lti_u(r0, r1, i0) cw(SPARC_BLU, r0, r1, i0) +# define ler(r0, r1, r2) cr(SPARC_BLE, r0, r1, r2) +# define lei(r0, r1, i0) cw(SPARC_BLE, r0, r1, i0) +# define ler_u(r0, r1, r2) cr(SPARC_BLEU, r0, r1, r2) +# define lei_u(r0, r1, i0) cw(SPARC_BLEU, r0, r1, i0) +# define eqr(r0, r1, r2) cr(SPARC_BE, r0, r1, r2) +# define eqi(r0, r1, i0) cw(SPARC_BE, r0, r1, i0) +# define ger(r0, r1, r2) cr(SPARC_BGE, r0, r1, r2) +# define gei(r0, r1, i0) cw(SPARC_BGE, r0, r1, i0) +# define ger_u(r0, r1, r2) cr(SPARC_BGEU, r0, r1, r2) +# define gei_u(r0, r1, i0) cw(SPARC_BGEU, r0, r1, i0) +# define gtr(r0, r1, r2) cr(SPARC_BG, r0, r1, r2) +# define gti(r0, r1, i0) cw(SPARC_BG, r0, r1, i0) +# define gtr_u(r0, r1, r2) cr(SPARC_BGU, r0, r1, r2) +# define gti_u(r0, r1, i0) cw(SPARC_BGU, r0, r1, i0) +# define ner(r0, r1, r2) cr(SPARC_BNE, r0, r1, r2) +# define nei(r0, r1, i0) cw(SPARC_BNE, r0, r1, i0) +# else +# define ltr(r0, r1, r2) cr(SPARC_BPL, r0, r1, r2) +# define lti(r0, r1, i0) cw(SPARC_BPL, r0, r1, i0) +# define ltr_u(r0, r1, r2) cr(SPARC_BPCS, r0, r1, r2) +# define lti_u(r0, r1, i0) cw(SPARC_BPCS, r0, r1, i0) +# define ler(r0, r1, r2) cr(SPARC_BPLE, r0, r1, r2) +# define lei(r0, r1, i0) cw(SPARC_BPLE, r0, r1, i0) +# define ler_u(r0, r1, r2) cr(SPARC_BPLEU, r0, r1, r2) +# define lei_u(r0, r1, i0) cw(SPARC_BPLEU, r0, r1, i0) +# define eqr(r0, r1, r2) cr(SPARC_BPE, r0, r1, r2) +# define eqi(r0, r1, i0) cw(SPARC_BPE, r0, r1, i0) +# define ger(r0, r1, r2) cr(SPARC_BPGE, r0, r1, r2) +# define gei(r0, r1, i0) cw(SPARC_BPGE, r0, r1, i0) +# define ger_u(r0, r1, r2) cr(SPARC_BPCC, r0, r1, r2) +# define gei_u(r0, r1, i0) cw(SPARC_BPCC, r0, r1, i0) +# define gtr(r0, r1, r2) cr(SPARC_BPG, r0, r1, r2) +# define gti(r0, r1, i0) cw(SPARC_BPG, r0, r1, i0) +# define gtr_u(r0, r1, r2) cr(SPARC_BPGU, r0, r1, r2) +# define gti_u(r0, r1, i0) cw(SPARC_BPGU, r0, r1, i0) +# define ner(r0, r1, r2) cr(SPARC_BPNE, r0, r1, r2) +# define nei(r0, r1, i0) cw(SPARC_BPNE, r0, r1, i0) +# endif # define ldr_c(r0, r1) LDSB(r1, 0, r0) # define ldi_c(r0, i0) _ldi_c(_jit, r0, i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); @@ -553,11 +746,25 @@ static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t); # define ldr_us(r0, r1) LDUH(r1, 0, r0) # define ldi_us(r0, i0) _ldi_us(_jit, r0, i0) static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t); -# define ldr(u, v) ldr_i(u, v) -# define ldr_i(r0, r1) LD(r1, 0, r0) -# define ldi(u, v) ldi_i(u, v) +# if __WORDSIZE == 32 +# define ldr_i(r0, r1) LD(r1, 0, r0) +# define ldr(u, v) ldr_i(u, v) +# define ldi(u, v) ldi_i(u, v) +# else +# define ldr_i(r0, r1) LDSW(r1, 0, r0) +# define ldr_ui(r0, r1) LDUW(r1, 0, r0) +# define ldr_l(r0, r1) LDX(r1, 0, r0) +# define ldr(u, v) ldr_l(u, v) +# define ldi(u, v) ldi_l(u, v) +# endif # define ldi_i(r0, i0) _ldi_i(_jit, r0, i0) static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t); +# if __WORDSIZE == 64 +# define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0) +static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t); +# define ldi_l(r0, i0) _ldi_l(_jit, r0, i0) +static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t); +# endif # define ldxr_c(r0, r1, r2) LDSB(r1, r2, r0) # define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0) static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -570,59 +777,118 @@ static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # define ldxr_us(r0, r1, r2) LDUH(r1, r2, r0) # define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0) static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define ldxr(u, v, w) ldxr_i(u, v, w) -# define ldxr_i(r0, r1, r2) LD(r1, r2, r0) -# define ldxi(u, v, w) ldxi_i(u, v, w) +# if __WORDSIZE == 32 +# define ldxr(u, v, w) ldxr_i(u, v, w) +# define ldxr_i(r0, r1, r2) LD(r1, r2, r0) +# define ldxi(u, v, w) ldxi_i(u, v, w) +# else +# define ldxr(u, v, w) ldxr_l(u, v, w) +# define ldxr_i(r0, r1, r2) LDSW(r1, r2, r0) +# define ldxr_ui(r0, r1, r2) LDUW(r1, r2, r0) +# define ldxr_l(r0, r1, r2) LDX(r1, r2, r0) +# define ldxi(u, v, w) ldxi_l(u, v, w) +# endif # define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0) static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# if __WORDSIZE == 64 +# define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0) +static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0) +static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# endif # define str_c(r0, r1) STB(r1, r0, 0) # define sti_c(i0, r0) _sti_c(_jit, i0, r0) static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t); # define str_s(r0, r1) STH(r1, r0, 0) # define sti_s(i0, r0) _sti_s(_jit, i0, r0) static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t); -# define str(u, v) str_i(u, v) -# define str_i(r0, r1) STI(r1, r0, 0) -# define sti(u, v) sti_i(u, v) +# if __WORDSIZE == 32 +# define str(u, v) str_i(u, v) +# define str_i(r0, r1) STI(r1, r0, 0) +# define sti(u, v) sti_i(u, v) +# else +# define str(u, v) str_l(u, v) +# define str_i(r0, r1) STW(r1, r0, 0) +# define str_l(r0, r1) STX(r1, r0, 0) +# define sti(u, v) sti_l(u, v) +# endif # define sti_i(i0, r0) _sti_i(_jit, i0, r0) static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); +# if __WORDSIZE == 64 +# define sti_l(i0, r0) _sti_l(_jit, i0, r0) +static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t); +# endif # define stxr_c(r0, r1, r2) STB(r2, r1, r0) # define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1) static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define stxr_s(r0, r1, r2) STH(r2, r1, r0) # define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1) static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -# define stxr(u, v, w) stxr_i(u, v, w) -# define stxr_i(r0, r1, r2) ST(r2, r1, r0) -# define stxi(u, v, w) stxi_i(u, v, w) +# if __WORDSIZE == 32 +# define stxr(u, v, w) stxr_i(u, v, w) +# define stxr_i(r0, r1, r2) ST(r2, r1, r0) +# define stxi(u, v, w) stxi_i(u, v, w) +# else +# define stxr(u, v, w) stxr_l(u, v, w) +# define stxr_i(r0, r1, r2) STW(r2, r1, r0) +# define stxi(u, v, w) stxi_l(u, v, w) +# define stxr_l(r0, r1, r2) STX(r2, r1, r0) +# endif # define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1) static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) +static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# endif # define br(cc, i0, r0, r1) _br(_jit, cc, i0, r0, r1) static jit_word_t _br(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); # define bw(cc, i0, r0, i1) _bw(_jit, cc, i0, r0, i1) static jit_word_t _bw(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); -# define bltr(i0, r0, r1) br(SPARC_BL, i0, r0, r1) -# define blti(i0, r0, i1) bw(SPARC_BL, i0, r0, i1) -# define bltr_u(i0, r0, r1) br(SPARC_BLU, i0, r0, r1) -# define blti_u(i0, r0, i1) bw(SPARC_BLU, i0, r0, i1) -# define bler(i0, r0, r1) br(SPARC_BLE, i0, r0, r1) -# define blei(i0, r0, i1) bw(SPARC_BLE, i0, r0, i1) -# define bler_u(i0, r0, r1) br(SPARC_BLEU, i0, r0, r1) -# define blei_u(i0, r0, i1) bw(SPARC_BLEU, i0, r0, i1) -# define beqr(i0, r0, r1) br(SPARC_BE, i0, r0, r1) -# define beqi(i0, r0, i1) bw(SPARC_BE, i0, r0, i1) -# define bger(i0, r0, r1) br(SPARC_BGE, i0, r0, r1) -# define bgei(i0, r0, i1) bw(SPARC_BGE, i0, r0, i1) -# define bger_u(i0, r0, r1) br(SPARC_BGEU, i0, r0, r1) -# define bgei_u(i0, r0, i1) bw(SPARC_BGEU, i0, r0, i1) -# define bgtr(i0, r0, r1) br(SPARC_BG, i0, r0, r1) -# define bgti(i0, r0, i1) bw(SPARC_BG, i0, r0, i1) -# define bgtr_u(i0, r0, r1) br(SPARC_BGU, i0, r0, r1) -# define bgti_u(i0, r0, i1) bw(SPARC_BGU, i0, r0, i1) -# define bner(i0, r0, r1) br(SPARC_BNE, i0, r0, r1) -# define bnei(i0, r0, i1) bw(SPARC_BNE, i0, r0, i1) +# if __WORDSIZE == 32 +# define bltr(i0, r0, r1) br(SPARC_BL, i0, r0, r1) +# define blti(i0, r0, i1) bw(SPARC_BL, i0, r0, i1) +# define bltr_u(i0, r0, r1) br(SPARC_BLU, i0, r0, r1) +# define blti_u(i0, r0, i1) bw(SPARC_BLU, i0, r0, i1) +# define bler(i0, r0, r1) br(SPARC_BLE, i0, r0, r1) +# define blei(i0, r0, i1) bw(SPARC_BLE, i0, r0, i1) +# define bler_u(i0, r0, r1) br(SPARC_BLEU, i0, r0, r1) +# define blei_u(i0, r0, i1) bw(SPARC_BLEU, i0, r0, i1) +# define beqr(i0, r0, r1) br(SPARC_BE, i0, r0, r1) +# define beqi(i0, r0, i1) bw(SPARC_BE, i0, r0, i1) +# define bger(i0, r0, r1) br(SPARC_BGE, i0, r0, r1) +# define bgei(i0, r0, i1) bw(SPARC_BGE, i0, r0, i1) +# define bger_u(i0, r0, r1) br(SPARC_BGEU, i0, r0, r1) +# define bgei_u(i0, r0, i1) bw(SPARC_BGEU, i0, r0, i1) +# define bgtr(i0, r0, r1) br(SPARC_BG, i0, r0, r1) +# define bgti(i0, r0, i1) bw(SPARC_BG, i0, r0, i1) +# define bgtr_u(i0, r0, r1) br(SPARC_BGU, i0, r0, r1) +# define bgti_u(i0, r0, i1) bw(SPARC_BGU, i0, r0, i1) +# define bner(i0, r0, r1) br(SPARC_BNE, i0, r0, r1) +# define bnei(i0, r0, i1) bw(SPARC_BNE, i0, r0, i1) +# else +# define bltr(i0, r0, r1) br(SPARC_BPL, i0, r0, r1) +# define blti(i0, r0, i1) bw(SPARC_BPL, i0, r0, i1) +# define bltr_u(i0, r0, r1) br(SPARC_BPCS, i0, r0, r1) +# define blti_u(i0, r0, i1) bw(SPARC_BPCS, i0, r0, i1) +# define bler(i0, r0, r1) br(SPARC_BPLE, i0, r0, r1) +# define blei(i0, r0, i1) bw(SPARC_BPLE, i0, r0, i1) +# define bler_u(i0, r0, r1) br(SPARC_BPLEU, i0, r0, r1) +# define blei_u(i0, r0, i1) bw(SPARC_BPLEU, i0, r0, i1) +# define beqr(i0, r0, r1) br(SPARC_BPE, i0, r0, r1) +# define beqi(i0, r0, i1) bw(SPARC_BPE, i0, r0, i1) +# define bger(i0, r0, r1) br(SPARC_BPGE, i0, r0, r1) +# define bgei(i0, r0, i1) bw(SPARC_BPGE, i0, r0, i1) +# define bger_u(i0, r0, r1) br(SPARC_BPCC, i0, r0, r1) +# define bgei_u(i0, r0, i1) bw(SPARC_BPCC, i0, r0, i1) +# define bgtr(i0, r0, r1) br(SPARC_BPG, i0, r0, r1) +# define bgti(i0, r0, i1) bw(SPARC_BPG, i0, r0, i1) +# define bgtr_u(i0, r0, r1) br(SPARC_BPGU, i0, r0, r1) +# define bgti_u(i0, r0, i1) bw(SPARC_BPGU, i0, r0, i1) +# define bner(i0, r0, r1) br(SPARC_BPNE, i0, r0, r1) +# define bnei(i0, r0, i1) bw(SPARC_BPNE, i0, r0, i1) +# endif # define b_asr(jif,add,sgn,i0,r0,r1) _b_asr(_jit,jif,add,sgn,i0,r0,r1) static jit_word_t _b_asr(jit_state_t*,jit_bool_t,jit_bool_t,jit_bool_t, @@ -717,6 +983,30 @@ _f2b(jit_state_t *_jit, ii(v.v); } +# if __WORDSIZE == 64 +static void +_f2bp(jit_state_t *_jit, + jit_int32_t op, jit_int32_t a, jit_int32_t cond, jit_int32_t op2, + jit_int32_t cc1, jit_int32_t cc0, jit_int32_t p, jit_int32_t disp19) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(a & 0xfffffffe)); + assert(!(cond & 0xfffffff0)); + assert(!(op2 & 0xfffffff8)); + assert(s19_p(disp19)); + v.op.b = op; + v.a.b = a; + v.cond.b = cond; + v.op2.b = op2; + v.cc1.b = cc1; + v.cc0.b = cc0; + v.p.b = p; + v.disp19.b = disp19; + ii(v.v); +} +# endif + static void _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) @@ -727,16 +1017,60 @@ _f3r(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, assert(!(op3 & 0xffffffc0)); assert(!(rs1 & 0xffffffe0)); assert(!(rs2 & 0xffffffe0)); - v.op.b = op; - v.rd.b = rd; - v.op3.b = op3; - v.rs1.b = rs1; - v.i.b = 0; - v.asi.b = 0; - v.rs2.b = rs2; + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 0; + v.asi.b = 0; + v.rs2.b = rs2; ii(v.v); } +# if __WORDSIZE == 64 +static void +_f3rx(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(rs2 & 0xffffffe0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 0; + v.x.b = 1; + v.asix.b = 0; + v.rs2.b = rs2; + ii(v.v); +} + +static void +_f3s(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t shim) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(op3 & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(shim & 0xffffffc0)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.i.b = 1; + v.x.b = 1; + v.asis.b = 0; + v.shim.b = shim; + ii(v.v); +} +# endif + static void _f3i(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t simm13) @@ -834,9 +1168,23 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) if (s13_p(i0)) ORI(0, i0, r0); else { - SETHI(HI(i0), r0); - if (LO(i0)) - ORI(r0, LO(i0), r0); +# if __WORDSIZE == 64 + if (i0 & 0xffffffff00000000) { + jit_int32_t reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), (i0 >> 32) & 0xffffffff); + movi(r0, i0 & 0xffffffff); + lshi(rn(reg), rn(reg), 32); + OR(rn(reg), r0, r0); + jit_unget_reg(reg); + } + else { +# endif + SETHI(HI((int)i0), r0); + if (LO(i0)) + ORI(r0, LO(i0), r0); +# if __WORDSIZE == 64 + } +# endif } } @@ -844,9 +1192,24 @@ static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_word_t w; +# if __WORDSIZE == 64 + jit_int32_t reg; +# endif w = _jit->pc.w; +# if __WORDSIZE == 64 + reg = jit_get_reg(jit_class_gpr); + SETHI(HI((int)i0), r0); + ORI(r0, LO(i0), r0); + i0 = (int)(i0 >> 32); + SETHI(HI(i0), rn(reg)); + ORI(rn(reg), LO(i0), rn(reg)); + SLLXI(rn(reg), 32, rn(reg)); + OR(rn(reg), r0, r0); + jit_unget_reg(reg); +# else SETHI(HI(i0), r0); ORI(r0, LO(i0), r0); +# endif return (w); } @@ -864,9 +1227,31 @@ _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r1, r2); + ltr_u(rn(jit_carry), rn(reg), r1); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + addr(r0, r1, r2); + ltr_u(rn(jit_carry), r0, r1); + } +} +# endif + static void _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) ADDIcc(r1, i0, r0); @@ -876,11 +1261,42 @@ _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) addcr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ltr_u(rn(jit_carry), rn(reg), r1); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + addi(r0, r1, i0); + ltr_u(rn(jit_carry), r0, r1); + } +# endif } +# if __WORDSIZE == 64 +static void +_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + addcr(r0, r1, r2); + addcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +} +# endif + static void _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) ADDXIcc(r1, i0, r0); @@ -890,6 +1306,15 @@ _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) addxr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + addci(r0, r1, i0); + addcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +# endif } static void @@ -906,9 +1331,31 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + subr(rn(reg), r1, r2); + ltr_u(rn(jit_carry), r1, rn(reg)); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + subr(r0, r1, r2); + ltr_u(rn(jit_carry), r1, r0); + } +} +# endif + static void _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) SUBIcc(r1, i0, r0); @@ -918,11 +1365,42 @@ _subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) subcr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + if (jit_carry == _NOREG) + jit_carry = jit_get_reg(jit_class_gpr); + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, -i0); + ltr_u(rn(jit_carry), r1, rn(reg)); + movr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + addi(r0, r1, -i0); + ltr_u(rn(jit_carry), r1, r0); + } +# endif } +# if __WORDSIZE == 64 +static void +_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + subcr(r0, r1, r2); + subcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +} +#endif + static void _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { +# if __WORDSIZE == 32 jit_int32_t reg; if (s13_p(i0)) SUBXIcc(r1, i0, r0); @@ -932,6 +1410,15 @@ _subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) subxr(r0, r1, rn(reg)); jit_unget_reg(reg); } +# else + jit_int32_t reg; + assert(jit_carry != _NOREG); + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), rn(jit_carry)); + subci(r0, r1, i0); + subcr(r0, r0, rn(reg)); + jit_unget_reg(reg); +# endif } static void @@ -945,8 +1432,13 @@ static void _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 UMULI(r1, i0, r0); +# else + MULXI(r1, i0, r0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -955,6 +1447,7 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 32 static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) @@ -986,39 +1479,149 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, } } +# else +static __int128_t __llmul(jit_word_t a, jit_word_t b) +{ + return (__int128_t)a * (__int128_t)b; +} + +# define QMUL_PROLOG() \ + do { \ + (void)jit_get_reg(_O0|jit_class_gpr|jit_class_named); \ + (void)jit_get_reg(_O1|jit_class_gpr|jit_class_named); \ + if (r0 != _G2_REGNO && r1 != _G2_REGNO) \ + stxi(BIAS(-8), _FP_REGNO, _G2_REGNO); \ + if (r0 != _G3_REGNO && r1 != _G3_REGNO) \ + stxi(BIAS(-16), _FP_REGNO, _G3_REGNO); \ + if (r0 != _G4_REGNO && r1 != _G4_REGNO) \ + stxi(BIAS(-24), _FP_REGNO, _G4_REGNO); \ + } while (0) + +# define QMUL_EPILOG() \ + do { \ + if (r0 != _G2_REGNO && r1 != _G2_REGNO) \ + ldxi(_G2_REGNO, _FP_REGNO, BIAS(-8)); \ + if (r0 != _G3_REGNO && r1 != _G3_REGNO) \ + ldxi(_G3_REGNO, _FP_REGNO, BIAS(-16)); \ + if (r0 != _G4_REGNO && r1 != _G4_REGNO) \ + ldxi(_G4_REGNO, _FP_REGNO, BIAS(-24)); \ + (void)jit_unget_reg(_O0); \ + (void)jit_unget_reg(_O1); \ + } while (0) + +static void +_qmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + QMUL_PROLOG(); + movr(_O0_REGNO, r3); + movr(_O1_REGNO, r2); + calli((jit_word_t)__llmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} + +static void +_qmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0) +{ + QMUL_PROLOG(); + movi(_O0_REGNO, i0); + movr(_O1_REGNO, r2); + calli((jit_word_t)__llmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} + +static __uint128_t __ullmul(jit_uword_t a, jit_uword_t b) +{ + return (__uint128_t)a * (__uint128_t)b; +} + +static void +_qmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3) +{ + QMUL_PROLOG(); + movr(_O0_REGNO, r3); + movr(_O1_REGNO, r2); + calli((jit_word_t)__ullmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} + +static void +_qmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_word_t i0) +{ + QMUL_PROLOG(); + movi(_O0_REGNO, i0); + movr(_O1_REGNO, r2); + calli((jit_word_t)__ullmul); + movr(r0, _O1_REGNO); + movr(r1, _O0_REGNO); + QMUL_EPILOG(); +} +# endif + static void _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +# if __WORDSIZE == 32 jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); rshi(rn(reg), r1, 31); WRY(rn(reg), 0); SDIV(r1, r2, r0); jit_unget_reg(reg); +# else + SDIVX(r1, r2, r0); +# endif } static void _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; +# if __WORDSIZE == 32 reg = jit_get_reg(jit_class_gpr); +# endif if (s13_p(i0)) { +# if __WORDSIZE == 32 rshi(rn(reg), r1, 31); WRY(rn(reg), 0); SDIVI(r1, i0, r0); +# else + SDIVXI(r1, i0, r0); +# endif } else { +# if __WORDSIZE == 64 + reg = jit_get_reg(jit_class_gpr); +# endif movi(rn(reg), i0); divr(r0, r1, rn(reg)); +# if __WORDSIZE == 64 + jit_unget_reg(reg); +# endif } +# if __WORDSIZE == 32 jit_unget_reg(reg); +# endif } static void _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +# if __WORDSIZE == 32 WRYI(0, 0); UDIV(r1, r2, r0); +# else + UDIVX(r1, r2, r0); +# endif } static void @@ -1026,8 +1629,12 @@ _divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (s13_p(i0)) { +# if __WORDSIZE == 32 WRYI(0, 0); UDIVI(r1, i0, r0); +# else + UDIVXI(r1, i0, r0); +# endif } else { reg = jit_get_reg(jit_class_gpr); @@ -1185,30 +1792,50 @@ _xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _extr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 24); - rshi(r0, r0, 24); + lshi(r0, r1, __WORDSIZE - 8); + rshi(r0, r0, __WORDSIZE - 8); } static void _extr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 16); - rshi(r0, r0, 16); + lshi(r0, r1, __WORDSIZE - 16); + rshi(r0, r0, __WORDSIZE - 16); } static void _extr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - lshi(r0, r1, 16); - rshi_u(r0, r0, 16); + lshi(r0, r1, __WORDSIZE - 16); + rshi_u(r0, r0, __WORDSIZE - 16); } +#if __WORDSIZE == 64 +static void +_extr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + lshi(r0, r1, __WORDSIZE - 32); + rshi(r0, r0, __WORDSIZE - 32); +} + +static void +_extr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + lshi(r0, r1, __WORDSIZE - 32); + rshi_u(r0, r0, __WORDSIZE - 32); +} +#endif + static void _cr(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { CMP(r1, r2); +# if __WORDSIZE == 32 Ba(cc, 3); +# else + BPa(cc, 3); +# endif movi(r0, 1); movi(r0, 0); } @@ -1220,7 +1847,11 @@ _cw(jit_state_t *_jit, jit_int32_t cc, jit_int32_t reg; if (s13_p(i0)) { CMPI(r1, i0); +# if __WORDSIZE == 32 Ba(cc, 3); +# else + BPa(cc, 3); +# endif movi(r0, 1); movi(r0, 0); } @@ -1292,8 +1923,13 @@ static void _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 LDI(0, i0, r0); +# else + LDSWI(0, i0, r0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1302,6 +1938,36 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDUWI(0, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_ui(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDXI(0, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_l(r0, rn(reg)); + jit_unget_reg(reg); + } +} +# endif + static void _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1362,8 +2028,13 @@ static void _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 LDI(r1, i0, r0); +# else + LDSWI(r1, i0, r0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1372,6 +2043,36 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDUWI(r1, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxr_ui(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (s13_p(i0)) + LDXI(r1, i0, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldxr_l(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} +# endif + static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { @@ -1404,8 +2105,13 @@ static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 STI(r0, 0, i0); +# else + STWI(r0, 0, i0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1414,6 +2120,22 @@ _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } } +# if __WORDSIZE == 64 +static void +_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (s13_p(i0)) + STXI(r0, 0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_l(rn(reg), r0); + jit_unget_reg(reg); + } +} +# endif + static void _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { @@ -1446,8 +2168,13 @@ static void _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - if (s13_p(i0)) + if (s13_p(i0)) { +# if __WORDSIZE == 32 STI(r1, r0, i0); +# else + STWI(r1, r0, i0); +# endif + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -1456,6 +2183,22 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +# if __WORDSIZE == 64 +static void +_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (s13_p(i0)) + STXI(r1, r0, i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + stxr_l(r0, rn(reg), r1); + jit_unget_reg(reg); + } +} +# endif + static jit_word_t _br(jit_state_t *_jit, jit_int32_t cc, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) @@ -1463,7 +2206,11 @@ _br(jit_state_t *_jit, jit_int32_t cc, jit_word_t w; CMP(r0, r1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); +# else + BP(cc, (i0 - w) >> 2); +# endif NOP(); return (w); } @@ -1477,7 +2224,11 @@ _bw(jit_state_t *_jit, jit_int32_t cc, if (s13_p(i1)) { CMPI(r0, i1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); +# else + B(cc, (i0 - w) >> 2); +# endif NOP(); } else { @@ -1499,10 +2250,17 @@ _b_asr(jit_state_t *_jit, jit_bool_t jif, jit_bool_t add, jit_bool_t sgn, else SUBcc(r0, r1, r0); w = _jit->pc.w; +# if __WORDSIZE == 32 B(sgn ? (jif ? SPARC_BVS : SPARC_BVC) : (jif ? SPARC_BCS : SPARC_BCC), (i0 - w) >> 2); +# else + BP(sgn ? + (jif ? SPARC_BPVS : SPARC_BPVC) : + (jif ? SPARC_BPCS : SPARC_BPCC), + (i0 - w) >> 2); +# endif NOP(); return (w); } @@ -1519,10 +2277,17 @@ _b_asw(jit_state_t *_jit, jit_bool_t jif, jit_bool_t add, jit_bool_t sgn, else SUBIcc(r0, i1, r0); w = _jit->pc.w; +# if __WORDSIZE == 32 B(sgn ? (jif ? SPARC_BVS : SPARC_BVC) : (jif ? SPARC_BCS : SPARC_BCC), (i0 - w) >> 2); +# else + BP(sgn ? + (jif ? SPARC_BPVS : SPARC_BPVC) : + (jif ? SPARC_BPCS : SPARC_BPCC), + (i0 - w) >> 2); +# endif NOP(); } else { @@ -1541,7 +2306,11 @@ _bm_r(jit_state_t *_jit, jit_bool_t set, jit_word_t w; BTST(r0, r1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2); +# else + BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2); +# endif NOP(); return (w); } @@ -1555,7 +2324,11 @@ _bm_w(jit_state_t *_jit, jit_bool_t set, if (s13_p(i1)) { BTSTI(r0, i1); w = _jit->pc.w; +# if __WORDSIZE == 32 B(set ? SPARC_BNZ : SPARC_BZ, (i0 - w) >> 2); +# else + BP(set ? SPARC_BPNE : SPARC_BPE, (i0 - w) >> 2); +# endif NOP(); } else { @@ -1632,6 +2405,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) return (w); } +#define OFF(n) BIAS(((n) * sizeof(jit_word_t))) static void _prolog(jit_state_t *_jit, jit_node_t *node) { @@ -1654,32 +2428,34 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* (most) other backends do not save incoming arguments, so, * only save locals here */ if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - stxi(0, _SP_REGNO, _L0_REGNO); + stxi(OFF(0), _SP_REGNO, _L0_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - stxi(4, _SP_REGNO, _L1_REGNO); + stxi(OFF(1), _SP_REGNO, _L1_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - stxi(8, _SP_REGNO, _L2_REGNO); + stxi(OFF(2), _SP_REGNO, _L2_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - stxi(12, _SP_REGNO, _L3_REGNO); + stxi(OFF(3), _SP_REGNO, _L3_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - stxi(16, _SP_REGNO, _L4_REGNO); + stxi(OFF(4), _SP_REGNO, _L4_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - stxi(20, _SP_REGNO, _L5_REGNO); + stxi(OFF(5), _SP_REGNO, _L5_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - stxi(24, _SP_REGNO, _L6_REGNO); + stxi(OFF(6), _SP_REGNO, _L6_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - stxi(28, _SP_REGNO, _L7_REGNO); + stxi(OFF(7), _SP_REGNO, _L7_REGNO); if (_jitc->function->allocar) { reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), _jitc->function->self.aoff); + movi(rn(reg), BIAS(_jitc->function->self.aoff)); + /* Already "biased" by allocai */ stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); jit_unget_reg(reg); } if (_jitc->function->self.call & jit_call_varargs) { for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) - stxi(68 + reg * 4, _SP_REGNO, rn(_I0 + reg)); + stxi(BIAS((16 + (__WORDSIZE == 32)) * sizeof(jit_word_t) + + reg * sizeof(jit_word_t)), _FP_REGNO, rn(_I0 + reg)); } } @@ -1691,21 +2467,21 @@ _epilog(jit_state_t *_jit, jit_node_t *node) /* (most) other backends do not save incoming arguments, so, * only save locals here */ if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - ldxi(_L0_REGNO, _SP_REGNO, 0); + ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0)); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - ldxi(_L1_REGNO, _SP_REGNO, 4); + ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1)); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - ldxi(_L2_REGNO, _SP_REGNO, 8); + ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2)); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - ldxi(_L3_REGNO, _SP_REGNO, 12); + ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3)); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - ldxi(_L4_REGNO, _SP_REGNO, 16); + ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4)); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - ldxi(_L5_REGNO, _SP_REGNO, 20); + ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5)); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - ldxi(_L6_REGNO, _SP_REGNO, 24); + ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6)); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - ldxi(_L7_REGNO, _SP_REGNO, 28); + ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7)); RESTOREI(0, 0, 0); RETL(); NOP(); @@ -1715,7 +2491,12 @@ static void _vastart(jit_state_t *_jit, jit_int32_t r0) { /* Initialize stack pointer to the first stack argument. */ - addi(r0, _SP_REGNO, 68 + _jitc->function->vagp * 4); + if (jit_arg_reg_p(_jitc->function->vagp)) + addi(r0, _FP_REGNO, BIAS((16 + (__WORDSIZE == 32) + + _jitc->function->vagp) * + sizeof(jit_word_t))); + else + addi(r0, _FP_REGNO, BIAS(_jitc->function->self.size)); } static void @@ -1727,7 +2508,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) ldr(r0, r1); /* Update vararg stack pointer. */ - addi(r1, r1, 4); + addi(r1, r1, sizeof(jit_word_t)); } static void @@ -1747,15 +2528,33 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) i.disp22.b = (label - instr) >> 2; u.i[0] = i.v; } +# if __WORDSIZE == 64 + else if (i.op2.b == 1) { + i.disp19.b = (label - instr) >> 2; + u.i[0] = i.v; + } +# endif else if (i.op2.b == 4) { /* movi_p */ /* SETHI */ - i.imm22.b = HI(label); + i.imm22.b = HI((int)label); u.i[0] = i.v; i.v = u.i[1]; if (i.op.b == 2 && i.op3.b == 2) { /* ORI */ i.simm13.b = LO(label); u.i[1] = i.v; +# if __WORDSIZE == 64 + i.v = u.i[2]; + assert(i.op2.b == 4); + label = (label >> 32) & 0xffffffff; + i.imm22.b = HI((int)label); + u.i[2] = i.v; + i.v = u.i[3]; + assert(i.op.b == 2 && i.op3.b == 2); + /* ORI */ + i.simm13.b = LO(label); + u.i[3] = i.v; +# endif } else abort(); diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index e8dea3412..c56b211e9 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -18,20 +18,30 @@ */ #if PROTO -# define LDF(rs1, rs2, rd) f3r(3, rd, 32, rs1, rs2) -# define LDFI(rs1, imm, rd) f3i(3, rd, 32, rs1, imm) -# define LDDF(rs1, rs2, rd) f3r(3, rd, 35, rs1, rs2) -# define LDDFI(rs1, imm, rd) f3i(3, rd, 35, rs1, imm) -# define LDFSR(rs1, rs2, rd) f3r(3, rd, 33, rs1, rs2) -# define LDFSRI(rs1, imm, rd) f3i(3, rd, 33, rs1, imm) -# define STF(rd, rs1, rs2) f3r(3, rd, 36, rs1, rs2) -# define STFI(rd, rs1, imm) f3i(3, rd, 36, rs1, imm) -# define STDF(rd, rs1, rs2) f3r(3, rd, 39, rs1, rs2) -# define STDFI(rd, rs1, imm) f3i(3, rd, 39, rs1, imm) -# define STFSR(rd, rs1, rs2) f3r(3, rd, 37, rs1, rs2) -# define STFSRI(rd, rs1, imm) f3i(3, rd, 37, rs1, imm) -# define STDFQ(rd, rs1, rs2) f3r(3, rd, 38, rs1, rs2) -# define STFDFQ(rd, rs1, imm) f3i(3, rd, 38, rs1, imm) +# if __WORDSIZE == 32 +# define FPR(r) (r) +# define CLASS_SNG jit_class_fpr +# define CLASS_DBL jit_class_fpr +# else +# define single_precision_p(r) ((r) >= 0 && (r) <= 31) +# define FPR(r) ((r) > 31 ? (r) - 31 : (r)) +# define CLASS_SNG (jit_class_fpr | jit_class_sng) +# define CLASS_DBL (jit_class_fpr | jit_class_dbl) +# endif +# define LDF(rs1, rs2, rd) f3r(3, FPR(rd), 32, FPR(rs1), FPR(rs2)) +# define LDFI(rs1, imm, rd) f3i(3, FPR(rd), 32, FPR(rs1), imm) +# define LDDF(rs1, rs2, rd) f3r(3, FPR(rd), 35, FPR(rs1), FPR(rs2)) +# define LDDFI(rs1, imm, rd) f3i(3, FPR(rd), 35, FPR(rs1), imm) +# define LDFSR(rs1, rs2, rd) f3r(3, FPR(rd), 33, FPR(rs1), FPR(rs2)) +# define LDFSRI(rs1, imm, rd) f3i(3, FPR(rd), 33, FPR(rs1), imm) +# define STF(rd, rs1, rs2) f3r(3, FPR(rd), 36, FPR(rs1), FPR(rs2)) +# define STFI(rd, rs1, imm) f3i(3, FPR(rd), 36, FPR(rs1), imm) +# define STDF(rd, rs1, rs2) f3r(3, FPR(rd), 39, FPR(rs1), FPR(rs2)) +# define STDFI(rd, rs1, imm) f3i(3, FPR(rd), 39, FPR(rs1), imm) +# define STFSR(rd, rs1, rs2) f3r(3, FPR(rd), 37, FPR(rs1), FPR(rs2)) +# define STFSRI(rd, rs1, imm) f3i(3, FPR(rd), 37, FPR(rs1), imm) +# define STDFQ(rd, rs1, rs2) f3r(3, FPR(rd), 38, FPR(rs1), FPR(rs2)) +# define STFDFQ(rd, rs1, imm) f3i(3, FPR(rd), 38, FPR(rs1), imm) # define SPARC_FBA 8 /* always - 1 */ # define SPARC_FBN 0 /* never - 0 */ # define SPARC_FBU 7 /* unordered - U */ @@ -86,9 +96,17 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define FITOS(rs2, rd) FPop1(rd, 0, 196, rs2) # define FITOD(rs2, rd) FPop1(rd, 0, 200, rs2) # define FITOQ(rs2, rd) FPop1(rd, 0, 204, rs2) +# if __WORDSIZE == 64 +# define FXTOS(rs2, rd) FPop1(rd, 0, 132, rs2) +# define FXTOD(rs2, rd) FPop1(rd, 0, 136, rs2) +# define FxTOQ(rs2, rd) FPop1(rd, 0, 140, rs2) +# endif # define FSTOI(rs2, rd) FPop1(rd, 0, 209, rs2) # define FDTOI(rs2, rd) FPop1(rd, 0, 210, rs2) # define FQTOI(rs2, rd) FPop1(rd, 0, 211, rs2) +# define FSTOX(rs2, rd) FPop1(rd, 0, 129, rs2) +# define FDTOX(rs2, rd) FPop1(rd, 0, 130, rs2) +# define FQTOX(rs2, rd) FPop1(rd, 0, 131, rs2) # define FSTOD(rs2, rd) FPop1(rd, 0, 201, rs2) # define FSTOQ(rs2, rd) FPop1(rd, 0, 205, rs2) # define FDTOS(rs2, rd) FPop1(rd, 0, 198, rs2) @@ -96,8 +114,14 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define FQTOS(rs2, rd) FPop1(rd, 0, 199, rs2) # define FQTOD(rs2, rd) FPop1(rd, 0, 203, rs2) # define FMOVS(rs2, rd) FPop1(rd, 0, 1, rs2) +# define FMOVD(rs2, rd) FPop1(rd, 0, 2, rs2) +# define FMOVQ(rs2, rd) FPop1(rd, 0, 3, rs2) # define FNEGS(rs2, rd) FPop1(rd, 0, 5, rs2) +# define FNEGD(rs2, rd) FPop1(rd, 0, 6, rs2) +# define FNEGQ(rs2, rd) FPop1(rd, 0, 7, rs2) # define FABSS(rs2, rd) FPop1(rd, 0, 9, rs2) +# define FABSD(rs2, rd) FPop1(rd, 0, 10, rs2) +# define FABSQ(rs2, rd) FPop1(rd, 0, 11, rs2) # define FSQRTS(rs2, rd) FPop1(rd, 0, 41, rs2) # define FSQRTD(rs2, rd) FPop1(rd, 0, 42, rs2) # define FSQRTQ(rs2, rd) FPop1(rd, 0, 43, rs2) @@ -145,30 +169,73 @@ _f3f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); # define CPop2(rd, rs1, opc, rs2) f3f(rd, 55, rs1, opf, rs2) # define extr_f(r0, r1) _extr_f(_jit, r0, r1) static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); -# define truncr_f(r0, r1) truncr_f_i(r0, r1) +# if __WORDSIZSE == 32 +# define truncr_f(r0, r1) truncr_f_i(r0, r1) +# define truncr_d(r0, r1) truncr_d_i(r0, r1) +# else +# define truncr_f(r0, r1) truncr_f_l(r0, r1) +# define truncr_d(r0, r1) truncr_d_l(r0, r1) +# endif # define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t); -# define extr_d_f(r0, r1) FDTOS(r1, r0) +# if __WORDSIZE == 64 +# define truncr_f_l(r0, r1) _truncr_f_l(_jit, r0, r1) +static void _truncr_f_l(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# if __WORDSIZE == 32 +# define extr_d_f(r0, r1) FDTOS(r1, r0) +# else +# define extr_d_f(r0, r1) _extr_d_f(_jit, r0, r1) +static void _extr_d_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif # define movi_f(r0, i0) _movi_f(_jit, r0, i0) +# if __WORDSIZE == 32 +# define movr_f(r0, r1) FMOVS(r1, r0) +# else +# define movr_f(r0, r1) _movr_f(_jit, r0, r1) +static void _movr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); -# define movr_f(r0, r1) FMOVS(r1, r0) -# define negr_f(r0, r1) FNEGS(r1, r0) -# define absr_f(r0, r1) FABSS(r1, r0) -# define sqrtr_f(r0, r1) FSQRTS(r1, r0) +# if __WORDSIZE == 32 +# define negr_f(r0, r1) FNEGS(r1, r0) +# define absr_f(r0, r1) FABSS(r1, r0) +# define sqrtr_f(r0, r1) FSQRTS(r1, r0) +# else +# define negr_f(r0, r1) _negr_f(_jit, r0, r1) +static void _negr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# define absr_f(r0, r1) _absr_f(_jit, r0, r1) +static void _absr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# define sqrtr_f(r0, r1) _sqrtr_f(_jit, r0, r1) +static void _sqrtr_f(jit_state_t*, jit_int32_t, jit_int32_t); +# endif # define extr_d(r0, r1) _extr_d(_jit, r0, r1) static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); -# define truncr_d(r0, r1) truncr_d_i(r0, r1) # define truncr_d_i(r0, r1) _truncr_d_i(_jit, r0, r1) static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t); -# define extr_f_d(r0, r1) FSTOD(r1, r0) +# if __WORDSIZE == 64 +# define truncr_d_l(r0, r1) _truncr_d_l(_jit, r0, r1) +static void _truncr_d_l(jit_state_t*, jit_int32_t, jit_int32_t); +# endif +# if __WORDSIZE == 32 +# define extr_f_d(r0, r1) FSTOD(r1, r0) +# else +# define extr_f_d(r0, r1) _extr_f_d(_jit, r0, r1) +static void _extr_f_d(jit_state_t*, jit_int32_t, jit_int32_t); +# endif # define movi_d(r0, i0) _movi_d(_jit, r0, i0) static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*); +# if __WORDSIZE == 32 # define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define negr_d(r0, r1) _negr_d(_jit, r0, r1) static void _negr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define absr_d(r0, r1) _absr_d(_jit, r0, r1) static void _absr_d(jit_state_t*, jit_int32_t, jit_int32_t); +# else +# define movr_d(r0, r1) FMOVD(r1, r0) +# define negr_d(r0, r1) FNEGD(r1, r0) +# define absr_d(r0, r1) FABSD(r1, r0) +# endif # define sqrtr_d(r0, r1) FSQRTD(r1, r0) # define fop1f(op, r0, r1, i0) _fop1f(_jit, op, r0, r1, i0) static void _fop1f(jit_state_t*,jit_int32_t, @@ -182,17 +249,27 @@ static void _fop1d(jit_state_t*,jit_int32_t, # define rfop1d(op, r0, r1, i0) _rfop1d(_jit, op, r0, r1, i0) static void _rfop1d(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_float64_t*); -# define addr_f(r0, r1, r2) FADDS(r1, r2, r0) +# if __WORDSIZE == 32 +# define addr_f(r0, r1, r2) FADDS(r1, r2, r0) +# define subr_f(r0, r1, r2) FSUBS(r1, r2, r0) +# define mulr_f(r0, r1, r2) FMULS(r1, r2, r0) +# define divr_f(r0, r1, r2) FDIVS(r1, r2, r0) +# else +# define fop2f(op, r0, r1, r2) _fop2f(_jit, op, r0, r1, r2) +static void _fop2f(jit_state_t*, jit_int32_t, + jit_int32_t, jit_int32_t, jit_int32_t); +# define addr_f(r0, r1, r2) fop2f(SPARC_FADDS, r0, r1, r2) +# define subr_f(r0, r1, r2) fop2f(SPARC_FSUBS, r0, r1, r2) +# define mulr_f(r0, r1, r2) fop2f(SPARC_FMULS, r0, r1, r2) +# define divr_f(r0, r1, r2) fop2f(SPARC_FDIVS, r0, r1, r2) +# endif # define addi_f(r0, r1, i0) fop1f(SPARC_FADDS, r0, r1, i0) -# define subr_f(r0, r1, r2) FSUBS(r1, r2, r0) # define subi_f(r0, r1, i0) fop1f(SPARC_FSUBS, r0, r1, i0) # define rsbr_f(r0, r1, r2) subr_f(r0, r2, r1) # define rsbi_f(r0, r1, i0) rfop1f(SPARC_FSUBS, r0, r1, i0) # define rsbr_d(r0, r1, r2) subr_d(r0, r2, r1) # define rsbi_d(r0, r1, i0) rfop1d(SPARC_FSUBD, r0, r1, i0) -# define mulr_f(r0, r1, r2) FMULS(r1, r2, r0) # define muli_f(r0, r1, i0) fop1f(SPARC_FMULS, r0, r1, i0) -# define divr_f(r0, r1, r2) FDIVS(r1, r2, r0) # define divi_f(r0, r1, i0) fop1f(SPARC_FDIVS, r0, r1, i0) # define addr_d(r0, r1, r2) FADDD(r1, r2, r0) # define addi_d(r0, r1, i0) fop1d(SPARC_FADDD, r0, r1, i0) @@ -270,30 +347,50 @@ _dcw(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t*); # define ordi_d(r0, r1, i0) dcw(SPARC_FBO, r0, r1, i0) # define unordr_d(r0, r1, r2) dcr(SPARC_FBU, r0, r1, r2) # define unordi_d(r0, r1, i0) dcw(SPARC_FBU, r0, r1, i0) -# define ldr_f(r0, r1) LDF(r1, 0, r0) +# if __WORDSIZE == 32 +# define ldr_f(r0, r1) LDF(r1, 0, r0) +# else +# define ldr_f(r0, r1) _ldr_f(_jit, r0, r1) +static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define ldi_f(r0, i0) _ldi_f(_jit, r0, i0) static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); -# define ldxr_f(r0, r1, r2) LDF(r1, r2, r0) +# if __WORDSIZE == 32 +# define ldxr_f(r0, r1, r2) LDF(r1, r2, r0) +# else +# define ldxr_f(r0, r1, r2) _ldxr_f(_jit, r0, r1, r2) +static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define ldxi_f(r0, r1, i0) _ldxi_f(_jit, r0, r1, i0) static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# define str_f(r0, r1) STF(r1, r0, 0) +# if __WORDSIZE == 32 +# define str_f(r0, r1) STF(r1, r0, 0) +# else +# define str_f(r0, r1) _str_f(_jit, r0, r1) +static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define sti_f(r0, i0) _sti_f(_jit, r0, i0) -static void _sti_f(jit_state_t*,jit_int32_t,jit_word_t); -# define stxr_f(r0, r1, r2) STF(r2, r1, r0) +static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); +# if __WORDSIZE == 32 +# define stxr_f(r0, r1, r2) STF(r2, r1, r0) +# else +# define stxr_f(r0, r1, r2) _stxr_f(_jit, r0, r1, r2) +static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# endif # define stxi_f(r0, r1, i0) _stxi_f(_jit, r0, r1, i0) -static void _stxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define ldr_d(r0, r1) LDDF(r1, 0, r0) # define ldi_d(r0, i0) _ldi_d(_jit, r0, i0) static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t); # define ldxr_d(r0, r1, r2) LDDF(r1, r2, r0) # define ldxi_d(r0, r1, i0) _ldxi_d(_jit, r0, r1, i0) -static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define str_d(r0, r1) STDF(r1, r0, 0) # define sti_d(r0, i0) _sti_d(_jit, r0, i0) -static void _sti_d(jit_state_t*,jit_int32_t,jit_word_t); +static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); # define stxr_d(r0, r1, r2) STDF(r2, r1, r0) # define stxi_d(r0, r1, i0) _stxi_d(_jit, r0, r1, i0) -static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +static void _stxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define fbr(cc, i0, r0, r1) _fbr(_jit, cc, i0, r0, r1) static jit_word_t _fbr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); @@ -372,6 +469,20 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2) { jit_instr_t v; +# if __WORDSIZE == 64 + if (rd > 31) { + assert(rd <= 63 && (rd & 1) == 0); + rd -= 31; + } + if (rs1 > 31) { + assert(rs1 <= 63 && (rs1 & 1) == 0); + rs1 -= 31; + } + if (rs2 > 31) { + assert(rs2 <= 63 && (rs2 & 1) == 0); + rs2 -= 31; + } +# endif assert(!(rd & 0xffffffe0)); assert(!(op3 & 0xffffffc0)); assert(!(rs1 & 0xffffffe0)); @@ -386,6 +497,151 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, ii(v.v); } +# if __WORDSIZE == 64 +static void +_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (r0 != r1) { + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FMOVS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FMOVS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FMOVS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FMOVS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } + } +} + +static void +_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FNEGS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FNEGS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FNEGS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FNEGS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } +} + +static void +_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FABSS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FABSS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FABSS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FABSS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } +} + +static void +_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1; + if (single_precision_p(r0)) { + if (single_precision_p(r1)) + FSQRTS(r1, r0); + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FSQRTS(rn(t1), r0); + jit_unget_reg(t1); + } + } + else { + if (single_precision_p(r1)) { + t0 = jit_get_reg(CLASS_SNG); + FSQRTS(r1, rn(t0)); + movr_d(r0, rn(t0)); + jit_unget_reg(t0); + } + else { + t1 = jit_get_reg(CLASS_SNG); + movr_d(rn(t1), r1); + FSQRTS(rn(t1), rn(t1)); + movr_d(r0, rn(t1)); + jit_unget_reg(t1); + } + } +} +# endif + +# if __WORDSIZE == 64 +static void +_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r0); + FDTOS(r1, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + FDTOS(r1, r0); +} +# endif + static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { @@ -399,19 +655,39 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) data.f = *i0; reg = jit_get_reg(jit_class_gpr); movi(rn(reg), data.i & 0xffffffff); - stxi_i(-8, _FP_REGNO, rn(reg)); + stxi_i(BIAS(-8), _FP_REGNO, rn(reg)); jit_unget_reg(reg); - ldxi_f(r0, _FP_REGNO, -8); + ldxi_f(r0, _FP_REGNO, BIAS(-8)); } else ldi_f(r0, (jit_word_t)i0); } +# if __WORDSIZE == 64 +static void +_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r1)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r1); + FSTOD(rn(reg), r0); + jit_unget_reg(reg); + } + else + FSTOD(r1, r0); +} +# endif + static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { union { +# if __WORDSIZE == 32 jit_int32_t i[2]; +# else + jit_word_t w; +# endif jit_float64_t d; } data; jit_int32_t reg; @@ -419,17 +695,24 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) if (_jitc->no_data) { data.d = *i0; reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 32 movi(rn(reg), data.i[0]); - stxi_i(-8, _FP_REGNO, rn(reg)); +# else + movi(rn(reg), data.w); +# endif + stxi(BIAS(-8), _FP_REGNO, rn(reg)); +# if __WORDSIZE == 32 movi(rn(reg), data.i[1]); - stxi_i(-4, _FP_REGNO, rn(reg)); + stxi_i(BIAS(-4), _FP_REGNO, rn(reg)); +# endif jit_unget_reg(reg); - ldxi_d(r0, _FP_REGNO, -8); + ldxi_d(r0, _FP_REGNO, BIAS(-8)); } else ldi_d(r0, (jit_word_t)i0); } +# if __WORDSIZE == 32 static void _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -460,15 +743,162 @@ _absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) if (r0 != r1) FMOVS(r1 + 1, r0 + 1); } +# endif + +# if __WORDSIZE == 64 +# define single_rrr(NAME, CODE) \ +static void \ +NAME(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \ +{ \ + jit_int32_t x0, t0, x1, t1, x2, t2, mask = 0; \ + if (!single_precision_p(r0)) { \ + mask |= 1; \ + t0 = jit_get_reg(CLASS_SNG); \ + x0 = rn(t0); \ + if (r0 == r1) { \ + x1 = x0; \ + movr_d(x1, r1); \ + if (r0 == r2) \ + x2 = x0; \ + } \ + else if (r0 == r2) { \ + x2 = x0; \ + movr_d(x2, r2); \ + } \ + } \ + else \ + x0 = r0; \ + if (!single_precision_p(r1)) { \ + if (r0 != r1) { \ + mask |= 2; \ + t1 = jit_get_reg(CLASS_SNG); \ + x1 = rn(t1); \ + movr_d(x1, r1); \ + if (r1 == r2) \ + x2 = x1; \ + } \ + } \ + else \ + x1 = r1; \ + if (!single_precision_p(r2)) { \ + if (r0 != r2 && r1 != r2) { \ + mask |= 4; \ + t2 = jit_get_reg(CLASS_SNG); \ + x2 = rn(t2); \ + movr_d(x2, r2); \ + } \ + } \ + else \ + x2 = r2; \ + CODE(x1, x2, x0); \ + if (mask & 1) { \ + movr_d(r0, x0); \ + jit_unget_reg(t0); \ + } \ + if (mask & 2) \ + jit_unget_reg(t1); \ + if (mask & 4) \ + jit_unget_reg(t2); \ +} + +static void +_fop2f(jit_state_t *_jit, jit_int32_t op, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t x0, t0, x1, t1, x2, t2, mask = 0; + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + if (r0 == r2) + x2 = x0; + } + else if (r0 == r2) { + x2 = x0; + movr_d(x2, r2); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + if (r1 == r2) + x2 = x1; + } + } + else + x1 = r1; + if (!single_precision_p(r2)) { + if (r0 != r2 && r1 != r2) { + mask |= 4; + t2 = jit_get_reg(CLASS_SNG); + x2 = rn(t2); + movr_d(x2, r2); + } + } + else + x2 = r2; + FPop1(x0, x1, op, x2); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); + if (mask & 4) + jit_unget_reg(t2); +} +# endif static void _fop1f(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i0); +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + } + } + else + x1 = r1; + FPop1(x0, x1, op, rn(reg)); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); +# else FPop1(r0, r1, op, rn(reg)); +# endif jit_unget_reg(reg); } @@ -477,9 +907,43 @@ _rfop1f(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i0); +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + if (r0 == r1) { + x1 = x0; + movr_d(x1, r1); + } + } + else + x0 = r0; + if (!single_precision_p(r1)) { + if (r0 != r1) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + } + } + else + x1 = r1; + FPop1(x0, rn(reg), op, x1); + if (mask & 1) { + movr_d(r0, x0); + jit_unget_reg(t0); + } + if (mask & 2) + jit_unget_reg(t1); +# else FPop1(r0, rn(reg), op, r1); +# endif jit_unget_reg(reg); } @@ -488,7 +952,7 @@ _fop1d(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i0); FPop1(r0, r1, op, rn(reg)); jit_unget_reg(reg); @@ -499,7 +963,7 @@ _rfop1d(jit_state_t *_jit, jit_int32_t op, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i0); FPop1(r0, rn(reg), op, r1); jit_unget_reg(reg); @@ -508,27 +972,95 @@ _rfop1d(jit_state_t *_jit, jit_int32_t op, static void _extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - stxi(-8, _FP_REGNO, r1); - ldxi_f(r0, _FP_REGNO, -8); + stxi(BIAS(-8), _FP_REGNO, r1); +# if __WORDSIZE == 32 + ldxi_f(r0, _FP_REGNO, BIAS(-8)); FITOS(r0, r0); +# else + ldxi_d(r0, _FP_REGNO, BIAS(-8)); + if (!single_precision_p(r0)) { + jit_int32_t reg; + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r0); + FXTOS(rn(reg), rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + FXTOS(r0, r0); +# endif } static void _truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); - FSTOI(r1, rn(reg)); - stxi_f(-8, _FP_REGNO, rn(reg)); - ldxi_i(r0, _FP_REGNO, -8); + reg = jit_get_reg(CLASS_SNG); +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + movr_d(rn(reg), r1); + FSTOI(rn(reg), rn(reg)); + } + else +# endif + FSTOI(r1, rn(reg)); + stxi_f(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_i(r0, _FP_REGNO, BIAS(-8)); jit_unget_reg(reg); } +# if __WORDSIZE == 64 +static void +_truncr_f_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(CLASS_SNG); +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + movr_d(rn(reg), r1); + FSTOX(rn(reg), rn(reg)); + } + else +# endif + FSTOX(r1, rn(reg)); + stxi_d(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_l(r0, _FP_REGNO, BIAS(-8)); + jit_unget_reg(reg); +} +# endif + static void _fcr(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; + if (!single_precision_p(r1)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r1); + } + else + x0 = r1; + if (r1 == r2) + x1 = x0; + else if (!single_precision_p(r2)) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r2); + } + else + x1 = r2; + FCMPS(x0, x1); + if (mask & 1) + jit_unget_reg(t0); + if (mask & 2) + jit_unget_reg(t1); +# else FCMPS(r1, r2); +# endif FBa(cc, 3); movi(r0, 1); movi(r0, 0); @@ -539,9 +1071,26 @@ _fcw(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_float32_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, mask = 0; + if (!single_precision_p(r1)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r1); + } + else + x0 = r1; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i0); +# if __WORDSIZE == 64 + FCMPS(x0, rn(reg)); + if (mask & 1) + jit_unget_reg(t0); +# else FCMPS(r1, rn(reg)); +# endif jit_unget_reg(reg); FBa(cc, 3); movi(r0, 1); @@ -563,7 +1112,7 @@ _dcw(jit_state_t *_jit, jit_int32_t cc, jit_int32_t r0, jit_int32_t r1, jit_float64_t *i0) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i0); FCMPD(r1, rn(reg)); jit_unget_reg(reg); @@ -572,12 +1121,38 @@ _dcw(jit_state_t *_jit, jit_int32_t cc, movi(r0, 0); } +# if __WORDSIZE == 64 +static void +_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDF(r1, 0, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + LDF(r1, 0, r0); +} +# endif + static void _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) - LDFI(0, i0, r0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDFI(0, i0, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else +# endif + LDFI(0, i0, r0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -586,12 +1161,38 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDF(r1, r2, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else + LDF(r1, r2, r0); +} +# endif + static void _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (s13_p(i0)) - LDFI(r1, i0, r0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + LDFI(r1, i0, rn(reg)); + movr_d(r0, rn(reg)); + jit_unget_reg(reg); + } + else +# endif + LDFI(r1, i0, r0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -600,12 +1201,38 @@ _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +# if __WORDSIZE == 64 +static void +_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (!single_precision_p(r1)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r1); + STF(rn(reg), r0, 0); + jit_unget_reg(reg); + } + else + STF(r1, r0, 0); +} +# endif + static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (s13_p(i0)) - STFI(r0, 0, i0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r0); + STFI(rn(reg), 0, i0); + jit_unget_reg(reg); + } + else +# endif + STFI(r0, 0, i0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -614,12 +1241,38 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } } +# if __WORDSIZE == 64 +static void +_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (!single_precision_p(r2)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r2); + STF(rn(reg), r1, r0); + jit_unget_reg(reg); + } + else + STF(r2, r1, r0); +} +# endif + static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - if (s13_p(i0)) - STFI(r1, r0, i0); + if (s13_p(i0)) { +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + reg = jit_get_reg(CLASS_SNG); + movr_d(rn(reg), r1); + STFI(rn(reg), r0, i0); + jit_unget_reg(reg); + } + else +# endif + STFI(r1, r0, i0); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); @@ -631,23 +1284,49 @@ _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) static void _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { - stxi(-8, _FP_REGNO, r1); - stxi(-4, _FP_REGNO, 0); - ldxi_d(r0, _FP_REGNO, -8); + stxi(BIAS(-8), _FP_REGNO, r1); +# if __WORDSIZE == 32 + stxi(BIAS(-4), _FP_REGNO, 0); +# endif + ldxi_d(r0, _FP_REGNO, BIAS(-8)); +# if __WORDSIZE == 32 FITOD(r0, r0); +# else + FXTOD(r0, r0); +# endif } static void _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); - FDTOI(r1, rn(reg)); - stxi_d(-8, _FP_REGNO, rn(reg)); - ldxi_i(r0, _FP_REGNO, -8); + reg = jit_get_reg(CLASS_SNG); +# if __WORDSIZE == 64 + if (!single_precision_p(r1)) { + movr_d(rn(reg), r1); + FDTOI(rn(reg), rn(reg)); + } + else +# endif + FDTOI(r1, rn(reg)); + stxi_d(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_i(r0, _FP_REGNO, BIAS(-8)); jit_unget_reg(reg); } +# if __WORDSIZE == 64 +static void +_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(CLASS_DBL); + FDTOX(r1, rn(reg)); + stxi_d(BIAS(-8), _FP_REGNO, rn(reg)); + ldxi_l(r0, _FP_REGNO, BIAS(-8)); + jit_unget_reg(reg); +} +# endif + static void _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -663,7 +1342,7 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } static void -_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t i0) { jit_int32_t reg; if (s13_p(i0)) @@ -691,7 +1370,7 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } static void -_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +_stxi_d(jit_state_t *_jit, jit_int32_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (s13_p(i0)) @@ -708,8 +1387,37 @@ static jit_word_t _fbr(jit_state_t *_jit, jit_int32_t cc, jit_word_t i0, jit_int32_t r0,jit_int32_t r1) { +# if __WORDSIZE == 64 + jit_int32_t x0, t0, x1, t1, mask = 0; +# endif jit_word_t w; +# if __WORDSIZE == 64 + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r0); + } + else + x0 = r0; + if (r0 == r1) + x1 = x0; + else if (!single_precision_p(r1)) { + mask |= 2; + t1 = jit_get_reg(CLASS_SNG); + x1 = rn(t1); + movr_d(x1, r1); + } + else + x1 = r1; + FCMPS(x0, x1); + if (mask & 1) + jit_unget_reg(t0); + if (mask & 2) + jit_unget_reg(t1); +# else FCMPS(r0, r1); +# endif w = _jit->pc.w; FB(cc, (i0 - w) >> 2); NOP(); @@ -722,9 +1430,26 @@ _fbw(jit_state_t *_jit, jit_int32_t cc, { jit_word_t w; jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); +# if __WORDSIZE == 64 + jit_int32_t x0, t0, mask = 0; + if (!single_precision_p(r0)) { + mask |= 1; + t0 = jit_get_reg(CLASS_SNG); + x0 = rn(t0); + movr_d(x0, r0); + } + else + x0 = r0; +# endif + reg = jit_get_reg(CLASS_SNG); movi_f(rn(reg), i1); +# if __WORDSIZE == 64 + FCMPS(x0, rn(reg)); + if (mask & 1) + jit_unget_reg(t0); +# else FCMPS(r0, rn(reg)); +# endif jit_unget_reg(reg); w = _jit->pc.w; FB(cc, (i0 - w) >> 2); @@ -750,7 +1475,7 @@ _dbw(jit_state_t *_jit, jit_int32_t cc, { jit_word_t w; jit_int32_t reg; - reg = jit_get_reg(jit_class_fpr); + reg = jit_get_reg(CLASS_DBL); movi_d(rn(reg), i1); FCMPD(r0, rn(reg)); jit_unget_reg(reg); diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 654e34e11..2f828ea66 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -1,5 +1,4 @@ - -#if __WORDSIZE == 32 +#if WORDSIZE == 32 #define JIT_INSTR_MAX 40 0, /* data */ 0, /* live */ @@ -400,3 +399,405 @@ 0, /* movr_d_w */ 0, /* movi_d_w */ #endif /* __WORDSIZE */ + +#if __WORDSIZE == 64 +#define JIT_INSTR_MAX 64 + 0, /* data */ + 0, /* live */ + 4, /* align */ + 0, /* save */ + 0, /* load */ + 0, /* #name */ + 0, /* #note */ + 4, /* label */ + 36, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr */ + 0, /* putargi */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 28, /* addi */ + 24, /* addcr */ + 48, /* addci */ + 52, /* addxr */ + 52, /* addxi */ + 4, /* subr */ + 28, /* subi */ + 24, /* subcr */ + 48, /* subci */ + 52, /* subxr */ + 52, /* subxi */ + 32, /* rsbi */ + 4, /* mulr */ + 28, /* muli */ + 48, /* qmulr */ + 64, /* qmuli */ + 48, /* qmulr_u */ + 64, /* qmuli_u */ + 4, /* divr */ + 28, /* divi */ + 4, /* divr_u */ + 28, /* divi_u */ + 20, /* qdivr */ + 16, /* qdivi */ + 20, /* qdivr_u */ + 16, /* qdivi_u */ + 12, /* remr */ + 36, /* remi */ + 12, /* remr_u */ + 36, /* remi_u */ + 4, /* andr */ + 28, /* andi */ + 4, /* orr */ + 28, /* ori */ + 4, /* xorr */ + 28, /* xori */ + 4, /* lshr */ + 4, /* lshi */ + 4, /* rshr */ + 4, /* rshi */ + 4, /* rshr_u */ + 4, /* rshi_u */ + 4, /* negr */ + 4, /* comr */ + 16, /* ltr */ + 16, /* lti */ + 16, /* ltr_u */ + 16, /* lti_u */ + 16, /* ler */ + 16, /* lei */ + 16, /* ler_u */ + 16, /* lei_u */ + 16, /* eqr */ + 16, /* eqi */ + 16, /* ger */ + 16, /* gei */ + 16, /* ger_u */ + 16, /* gei_u */ + 16, /* gtr */ + 16, /* gti */ + 16, /* gtr_u */ + 16, /* gti_u */ + 16, /* ner */ + 16, /* nei */ + 4, /* movr */ + 24, /* movi */ + 8, /* extr_c */ + 4, /* extr_uc */ + 8, /* extr_s */ + 8, /* extr_us */ + 8, /* extr_i */ + 8, /* extr_ui */ + 8, /* htonr_us */ + 8, /* htonr_ui */ + 4, /* htonr_ul */ + 4, /* ldr_c */ + 28, /* ldi_c */ + 4, /* ldr_uc */ + 28, /* ldi_uc */ + 4, /* ldr_s */ + 28, /* ldi_s */ + 4, /* ldr_us */ + 28, /* ldi_us */ + 4, /* ldr_i */ + 28, /* ldi_i */ + 4, /* ldr_ui */ + 28, /* ldi_ui */ + 4, /* ldr_l */ + 28, /* ldi_l */ + 4, /* ldxr_c */ + 24, /* ldxi_c */ + 4, /* ldxr_uc */ + 24, /* ldxi_uc */ + 4, /* ldxr_s */ + 24, /* ldxi_s */ + 4, /* ldxr_us */ + 24, /* ldxi_us */ + 4, /* ldxr_i */ + 24, /* ldxi_i */ + 4, /* ldxr_ui */ + 24, /* ldxi_ui */ + 4, /* ldxr_l */ + 24, /* ldxi_l */ + 4, /* str_c */ + 28, /* sti_c */ + 4, /* str_s */ + 28, /* sti_s */ + 4, /* str_i */ + 28, /* sti_i */ + 4, /* str_l */ + 28, /* sti_l */ + 4, /* stxr_c */ + 24, /* stxi_c */ + 4, /* stxr_s */ + 24, /* stxi_s */ + 4, /* stxr_i */ + 24, /* stxi_i */ + 4, /* stxr_l */ + 24, /* stxi_l */ + 12, /* bltr */ + 12, /* blti */ + 12, /* bltr_u */ + 12, /* blti_u */ + 12, /* bler */ + 12, /* blei */ + 12, /* bler_u */ + 12, /* blei_u */ + 12, /* beqr */ + 36, /* beqi */ + 12, /* bger */ + 12, /* bgei */ + 12, /* bger_u */ + 12, /* bgei_u */ + 12, /* bgtr */ + 12, /* bgti */ + 12, /* bgtr_u */ + 12, /* bgti_u */ + 12, /* bner */ + 36, /* bnei */ + 12, /* bmsr */ + 12, /* bmsi */ + 12, /* bmcr */ + 12, /* bmci */ + 12, /* boaddr */ + 12, /* boaddi */ + 12, /* boaddr_u */ + 12, /* boaddi_u */ + 12, /* bxaddr */ + 12, /* bxaddi */ + 12, /* bxaddr_u */ + 12, /* bxaddi_u */ + 12, /* bosubr */ + 12, /* bosubi */ + 12, /* bosubr_u */ + 12, /* bosubi_u */ + 12, /* bxsubr */ + 12, /* bxsubi */ + 12, /* bxsubr_u */ + 12, /* bxsubi_u */ + 8, /* jmpr */ + 32, /* jmpi */ + 8, /* callr */ + 32, /* calli */ + 0, /* prepare */ + 0, /* pushargr */ + 0, /* pushargi */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr */ + 0, /* reti */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 44, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 16, /* addr_f */ + 40, /* addi_f */ + 24, /* subr_f */ + 40, /* subi_f */ + 40, /* rsbi_f */ + 16, /* mulr_f */ + 40, /* muli_f */ + 16, /* divr_f */ + 40, /* divi_f */ + 12, /* negr_f */ + 12, /* absr_f */ + 12, /* sqrtr_f */ + 24, /* ltr_f */ + 48, /* lti_f */ + 24, /* ler_f */ + 48, /* lei_f */ + 24, /* eqr_f */ + 48, /* eqi_f */ + 24, /* ger_f */ + 48, /* gei_f */ + 24, /* gtr_f */ + 48, /* gti_f */ + 24, /* ner_f */ + 48, /* nei_f */ + 24, /* unltr_f */ + 48, /* unlti_f */ + 24, /* unler_f */ + 48, /* unlei_f */ + 24, /* uneqr_f */ + 48, /* uneqi_f */ + 24, /* unger_f */ + 48, /* ungei_f */ + 24, /* ungtr_f */ + 48, /* ungti_f */ + 24, /* ltgtr_f */ + 48, /* ltgti_f */ + 24, /* ordr_f */ + 48, /* ordi_f */ + 24, /* unordr_f */ + 48, /* unordi_f */ + 16, /* truncr_f_i */ + 16, /* truncr_f_l */ + 20, /* extr_f */ + 12, /* extr_d_f */ + 16, /* movr_f */ + 32, /* movi_f */ + 8, /* ldr_f */ + 32, /* ldi_f */ + 8, /* ldxr_f */ + 28, /* ldxi_f */ + 8, /* str_f */ + 32, /* sti_f */ + 8, /* stxr_f */ + 28, /* stxi_f */ + 20, /* bltr_f */ + 44, /* blti_f */ + 20, /* bler_f */ + 44, /* blei_f */ + 28, /* beqr_f */ + 60, /* beqi_f */ + 20, /* bger_f */ + 44, /* bgei_f */ + 20, /* bgtr_f */ + 44, /* bgti_f */ + 20, /* bner_f */ + 44, /* bnei_f */ + 20, /* bunltr_f */ + 44, /* bunlti_f */ + 20, /* bunler_f */ + 44, /* bunlei_f */ + 20, /* buneqr_f */ + 44, /* buneqi_f */ + 20, /* bunger_f */ + 44, /* bungei_f */ + 20, /* bungtr_f */ + 44, /* bungti_f */ + 20, /* bltgtr_f */ + 44, /* bltgti_f */ + 20, /* bordr_f */ + 44, /* bordi_f */ + 20, /* bunordr_f */ + 44, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 32, /* addi_d */ + 4, /* subr_d */ + 32, /* subi_d */ + 32, /* rsbi_d */ + 4, /* mulr_d */ + 32, /* muli_d */ + 4, /* divr_d */ + 32, /* divi_d */ + 4, /* negr_d */ + 4, /* absr_d */ + 4, /* sqrtr_d */ + 16, /* ltr_d */ + 48, /* lti_d */ + 16, /* ler_d */ + 48, /* lei_d */ + 16, /* eqr_d */ + 48, /* eqi_d */ + 16, /* ger_d */ + 48, /* gei_d */ + 16, /* gtr_d */ + 48, /* gti_d */ + 16, /* ner_d */ + 48, /* nei_d */ + 16, /* unltr_d */ + 48, /* unlti_d */ + 16, /* unler_d */ + 48, /* unlei_d */ + 16, /* uneqr_d */ + 48, /* uneqi_d */ + 16, /* unger_d */ + 48, /* ungei_d */ + 16, /* ungtr_d */ + 48, /* ungti_d */ + 16, /* ltgtr_d */ + 48, /* ltgti_d */ + 16, /* ordr_d */ + 48, /* ordi_d */ + 16, /* unordr_d */ + 48, /* unordi_d */ + 16, /* truncr_d_i */ + 12, /* truncr_d_l */ + 12, /* extr_d */ + 8, /* extr_f_d */ + 4, /* movr_d */ + 32, /* movi_d */ + 4, /* ldr_d */ + 28, /* ldi_d */ + 4, /* ldxr_d */ + 24, /* ldxi_d */ + 4, /* str_d */ + 28, /* sti_d */ + 4, /* stxr_d */ + 24, /* stxi_d */ + 12, /* bltr_d */ + 40, /* blti_d */ + 12, /* bler_d */ + 40, /* blei_d */ + 12, /* beqr_d */ + 40, /* beqi_d */ + 12, /* bger_d */ + 40, /* bgei_d */ + 12, /* bgtr_d */ + 40, /* bgti_d */ + 12, /* bner_d */ + 44, /* bnei_d */ + 12, /* bunltr_d */ + 44, /* bunlti_d */ + 12, /* bunler_d */ + 44, /* bunlei_d */ + 12, /* buneqr_d */ + 44, /* buneqi_d */ + 12, /* bunger_d */ + 44, /* bungei_d */ + 12, /* bungtr_d */ + 44, /* bungti_d */ + 12, /* bltgtr_d */ + 40, /* bltgti_d */ + 12, /* bordr_d */ + 40, /* bordi_d */ + 12, /* bunordr_d */ + 44, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 0, /* movr_w_f */ + 0, /* movr_ww_d */ + 0, /* movr_w_d */ + 0, /* movr_f_w */ + 0, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 0, /* movr_d_w */ + 0, /* movi_d_w */ +#endif /* __WORDSIZE */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 16d2610f8..3ac80d489 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -18,7 +18,13 @@ */ #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) -#define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 5) +#if __WORDSIZE == 32 +# define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 5) +# define BIAS(n) (n) +#else +# define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 16) +# define BIAS(n) ((n) + 2047) +#endif /* * Types @@ -72,6 +78,7 @@ jit_register_t _rvs[] = { { 0x1d, "%i5" }, { rc(sav) | 0x1e, "%fp" }, { 0x1f, "%i7" }, +# if __WORDSIZE == 32 { rc(fpr) | 0x00, "%f0" }, { 0x01, "%f1" }, { rc(fpr) | 0x02, "%f2" }, @@ -79,7 +86,7 @@ jit_register_t _rvs[] = { { rc(fpr) | 0x04, "%f4" }, { 0x05, "%f5" }, { rc(fpr) | 0x06, "%f6" }, - { 0x06, "%f7" }, + { 0x07, "%f7" }, { rc(fpr) | 0x08, "%f8" }, { 0x09, "%f9" }, { rc(fpr) | 0x0a, "%f10" }, @@ -88,6 +95,56 @@ jit_register_t _rvs[] = { { 0x0d, "%f13" }, { rc(fpr) | 0x0e, "%f14" }, { 0x0f, "%f15" }, +# else + { rc(fpr) | rc(dbl) | 0x3e, "%f62" }, + { rc(fpr) | rc(dbl) | 0x3c, "%f60" }, + { rc(fpr) | rc(dbl) | 0x3a, "%f58" }, + { rc(fpr) | rc(dbl) | 0x38, "%f56" }, + { rc(fpr) | rc(dbl) | 0x36, "%f54" }, + { rc(fpr) | rc(dbl) | 0x34, "%f52" }, + { rc(fpr) | rc(dbl) | 0x32, "%f50" }, + { rc(fpr) | rc(dbl) | 0x30, "%f48" }, + { rc(fpr) | rc(dbl) | 0x2e, "%f46" }, + { rc(fpr) | rc(dbl) | 0x2c, "%f44" }, + { rc(fpr) | rc(dbl) | 0x2a, "%f42" }, + { rc(fpr) | rc(dbl) | 0x28, "%f40" }, + { rc(fpr) | rc(dbl) | 0x26, "%f38" }, + { rc(fpr) | rc(dbl) | 0x24, "%f36" }, + { rc(fpr) | rc(dbl) | 0x22, "%f34" }, + { rc(fpr) | rc(dbl) | 0x20, "%f32" }, + { 0x1f, "%f31" }, + { rc(arg)|rc(fpr)|rc(sng)|0x1e, "%f30" }, + { 0x1d, "%f29" }, + { rc(arg)|rc(fpr)|rc(sng)|0x1c, "%f28" }, + { 0x1b, "%f27" }, + { rc(arg)|rc(fpr)|rc(sng)|0x1a, "%f26" }, + { 0x19, "%f25" }, + { rc(arg)|rc(fpr)|rc(sng)|0x18, "%f24" }, + { 0x17, "%f23" }, + { rc(arg)|rc(fpr)|rc(sng)|0x16, "%f22" }, + { 0x15, "%f21" }, + { rc(arg)|rc(fpr)|rc(sng)|0x14, "%f20" }, + { 0x13, "%f19" }, + { rc(arg)|rc(fpr)|rc(sng)|0x12, "%f18" }, + { 0x11, "%f17" }, + { rc(arg)|rc(fpr)|rc(sng)|0x10, "%f16" }, + { 0x0f, "%f15" }, + { rc(arg)|rc(fpr)|rc(sng)|0x0e, "%f14" }, + { 0x0d, "%f13" }, + { rc(arg)|rc(fpr)|rc(sng)|0x0c, "%f12" }, + { 0x0b, "%f11" }, + { rc(arg)|rc(fpr)|rc(sng)|0x0a, "%f10" }, + { 0x09, "%f9" }, + { rc(arg)|rc(fpr)|rc(sng)|0x08, "%f8" }, + { 0x07, "%f7" }, + { rc(arg)|rc(fpr)|rc(sng)|0x06, "%f6" }, + { 0x05, "%f5" }, + { rc(arg)|rc(fpr)|rc(sng)|0x04, "%f4" }, + { 0x03, "%f3" }, + { rc(arg)|rc(fpr)|rc(sng)|0x02, "%f2" }, + { 0x01, "%f1" }, + { rc(arg)|rc(fpr)|rc(sng)|0x00, "%f0" }, +# endif { _NOREG, "" }, }; @@ -103,6 +160,9 @@ void _jit_init(jit_state_t *_jit) { _jitc->reglen = jit_size(_rvs) - 1; +# if __WORDSIZE == 64 + jit_carry = _NOREG; +# endif } void @@ -126,7 +186,12 @@ _jit_prolog(jit_state_t *_jit) _jitc->function->self.argi = _jitc->function->self.argf = _jitc->function->self.aoff = _jitc->function->self.alen = 0; /* float conversion */ +# if __WORDSIZE == 32 _jitc->function->self.aoff = -8; +# else + /* extra slots in case qmul is called */ + _jitc->function->self.aoff = -24; +# endif _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -163,7 +228,7 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); jit_dec_synth(); } - return (_jitc->function->self.aoff); + return (BIAS(_jitc->function->self.aoff)); } void @@ -273,10 +338,17 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { +# if __WORDSIZE == 32 if (u->code == jit_code_arg || u->code == jit_code_arg_f) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_d); return (jit_arg_d_reg_p(u->u.w)); +# else + if (u->code == jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f); + return (jit_arg_d_reg_p(u->u.w)); +# endif } void @@ -315,12 +387,16 @@ _jit_arg(jit_state_t *_jit) if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { - offset = _jitc->function->self.size; +# if __WORDSIZE == 64 + if (jit_arg_d_reg_p(_jitc->function->self.argi)) + ++_jitc->function->self.argi; +# endif + offset = BIAS(_jitc->function->self.size); _jitc->function->self.size += sizeof(jit_word_t); } node = jit_new_node_ww(jit_code_arg, offset, ++_jitc->function->self.argn); - jit_link_prepare(); + jit_link_prolog(); return (node); } @@ -329,16 +405,29 @@ _jit_arg_f(jit_state_t *_jit) { jit_node_t *node; jit_int32_t offset; +# if __WORDSIZE == 64 + jit_bool_t inc; +# endif assert(_jitc->function); +# if __WORDSIZE == 32 if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; - _jitc->function->self.size += sizeof(jit_float32_t); + _jitc->function->self.size += sizeof(jit_word_t); } +# else + inc = !jit_arg_reg_p(_jitc->function->self.argi); + if (jit_arg_d_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else + offset = BIAS(_jitc->function->self.size); + if (inc) + _jitc->function->self.size += sizeof(jit_word_t); +# endif node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); - jit_link_prepare(); + jit_link_prolog(); return (node); } @@ -347,7 +436,11 @@ _jit_arg_d(jit_state_t *_jit) { jit_node_t *node; jit_int32_t offset; +# if __WORDSIZE == 64 + jit_bool_t inc; +# endif assert(_jitc->function); +# if __WORDSIZE == 32 if (jit_arg_d_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi; _jitc->function->self.argi += 2; @@ -360,9 +453,18 @@ _jit_arg_d(jit_state_t *_jit) offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_float64_t); } +# else + inc = !jit_arg_reg_p(_jitc->function->self.argi); + if (jit_arg_d_reg_p(_jitc->function->self.argi)) + offset = _jitc->function->self.argi++; + else + offset = BIAS(_jitc->function->self.size); + if (inc) + _jitc->function->self.size += sizeof(jit_word_t); +# endif node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); - jit_link_prepare(); + jit_link_prolog(); return (node); } @@ -420,15 +522,48 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) { +# if __WORDSIZE == 64 + jit_extr_i(u, _I0 + v->u.w); +# else + jit_movr(u, _I0 + v->u.w); +# endif + } + else + jit_ldxi_i(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t)); + jit_dec_synth(); +} + +# if __WORDSIZE == 64 +void +_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + assert(v->code == jit_code_arg); + jit_inc_synth_wp(getarg_i, u, v); + if (jit_arg_reg_p(v->u.w)) + jit_extr_ui(u, _I0 + v->u.w); + else + jit_ldxi_ui(u, JIT_FP, + v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t)); + jit_dec_synth(); +} + +void +_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { assert(v->code == jit_code_arg); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _I0 + v->u.w); else - jit_ldxi_i(u, JIT_FP, v->u.w); + jit_ldxi_l(u, JIT_FP, v->u.w); jit_dec_synth(); } +# endif void _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) @@ -465,12 +600,20 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) assert(v->code == jit_code_arg_f); assert(_jitc->function); jit_inc_synth_wp(getarg_f, u, v); +# if __WORDSIZE == 32 if (jit_arg_reg_p(v->u.w)) { - jit_stxi(-4, JIT_FP, _I0 + v->u.w); + jit_stxi_i(-4, JIT_FP, _I0 + v->u.w); jit_ldxi_f(u, JIT_FP, -4); } +# else + if (jit_arg_d_reg_p(v->u.w)) { + jit_live(_F0 - (v->u.w << 1)); /* pair of registers is live */ + jit_movr_f(u, (_F0 - (v->u.w << 1)) - 1); + } +# endif else - jit_ldxi_f(u, JIT_FP, v->u.w); + jit_ldxi_f(u, JIT_FP, v->u.w + (__WORDSIZE >> 3) - + sizeof(jit_float32_t)); jit_dec_synth(); } @@ -479,12 +622,20 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { assert(v->code == jit_code_arg_f); jit_inc_synth_wp(putargr_f, u, v); +# if __WORDSIZE == 32 if (jit_arg_reg_p(v->u.w)) { jit_stxi_f(-4, JIT_FP, u); - jit_ldxi(_I0 + v->u.w, JIT_FP, -4); + jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4); } +# else + if (jit_arg_d_reg_p(v->u.w)) { + jit_live(_F0 - (v->u.w << 1)); /* pair of registers is live */ + jit_movr_f((_F0 - (v->u.w << 1)) - 1, u); + } +# endif else - jit_stxi_f(v->u.w, JIT_FP, u); + jit_stxi_f(v->u.w + (__WORDSIZE >> 3) - + sizeof(jit_float32_t), JIT_FP, u); jit_dec_synth(); } @@ -494,15 +645,29 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) jit_int32_t regno; assert(v->code == jit_code_arg_f); jit_inc_synth_fp(putargi_f, u, v); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); if (jit_arg_reg_p(v->u.w)) { jit_stxi_f(-4, JIT_FP, regno); - jit_ldxi(_I0 + v->u.w, JIT_FP, -4); + jit_ldxi_i(_I0 + v->u.w, JIT_FP, -4); } else jit_stxi_f(v->u.w, JIT_FP, regno); jit_unget_reg(regno); +# else + if (jit_arg_d_reg_p(v->u.w)) { + jit_live(_F0 - (v->u.w << 1)); /* pair of registers is live */ + jit_movi_f((_F0 - (v->u.w << 1)) - 1, u); + } + else { + regno = jit_get_reg(jit_class_fpr | jit_class_sng); + jit_movi_f(regno, u); + jit_stxi_f(v->u.w + (__WORDSIZE >> 3) - + sizeof(jit_float32_t), JIT_FP, regno); + jit_unget_reg(regno); + } +# endif jit_dec_synth(); } @@ -513,18 +678,28 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) assert(_jitc->function); jit_inc_synth_wp(getarg_d, u, v); if (jit_arg_d_reg_p(v->u.w)) { +# if __WORDSIZE == 32 jit_stxi(-8, JIT_FP, _I0 + v->u.w); jit_stxi(-4, JIT_FP, _I0 + v->u.w + 1); jit_ldxi_d(u, JIT_FP, -8); +# else + jit_movr_d(u, _F0 - (v->u.w << 1)); +# endif } +# if __WORDSIZE == 32 else if (jit_arg_reg_p(v->u.w)) { jit_stxi(-8, JIT_FP, _I0 + v->u.w); jit_ldxi_f(u, JIT_FP, -8); jit_ldxi_f(u + 1, JIT_FP, stack_framesize); } +# endif else { +# if __WORDSIZE == 32 jit_ldxi_f(u, JIT_FP, v->u.w); jit_ldxi_f(u + 1, JIT_FP, v->u.w + 4); +# else + jit_ldxi_d(u, JIT_FP, v->u.w); +# endif } jit_dec_synth(); } @@ -532,9 +707,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - jit_int32_t regno; + jit_int32_t regno; assert(v->code == jit_code_arg_d); jit_inc_synth_wp(putargr_d, u, v); +# if __WORDSIZE == 32 if (jit_arg_d_reg_p(v->u.w)) { jit_stxi_d(-8, JIT_FP, u); jit_ldxi(_I0 + v->u.w, JIT_FP, -8); @@ -559,15 +735,25 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_stxi(v->u.w + 4, JIT_FP, regno); jit_unget_reg(regno); } +# else + if (jit_arg_d_reg_p(v->u.w)) + jit_movr_d(_F0 - (v->u.w << 1), u); + else + jit_stxi_d(v->u.w, JIT_FP, u); +# endif jit_dec_synth(); } void _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) { - jit_int32_t regno, gpr; +# if __WORDSIZE == 32 + jit_int32_t gpr; +# endif + jit_int32_t regno; assert(v->code == jit_code_arg_d); jit_inc_synth_dp(putargi_d, u, v); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); if (jit_arg_d_reg_p(v->u.w)) { @@ -595,6 +781,16 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) jit_unget_reg(gpr); } jit_unget_reg(regno); +# else + if (jit_arg_d_reg_p(v->u.w)) + jit_movi_d(_F0 - (v->u.w << 1), u); + else { + regno = jit_get_reg(jit_class_fpr | jit_class_dbl); + jit_movi_d(regno, u); + jit_stxi_d(v->u.w, JIT_FP, regno); + jit_unget_reg(regno); + } +# endif jit_dec_synth(); } @@ -608,7 +804,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argi; } else { - jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, u); +# if __WORDSIZE == 64 + if (jit_arg_d_reg_p(_jitc->function->call.argi)) + ++_jitc->function->call.argi; +# endif + jit_stxi(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); } jit_dec_synth(); @@ -625,9 +826,14 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) ++_jitc->function->call.argi; } else { +# if __WORDSIZE == 64 + if (jit_arg_d_reg_p(_jitc->function->call.argi)) + ++_jitc->function->call.argi; +# endif regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(_jitc->function->call.size + stack_framesize, JIT_SP, regno); + jit_stxi(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); } @@ -639,15 +845,39 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { jit_inc_synth_w(pushargr_f, u); jit_link_prepare(); +# if __WORDSIZE == 32 if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_stxi_f(-4, JIT_FP, u); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -4); + jit_stxi_f(-8, JIT_FP, u); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); ++_jitc->function->call.argi; } else { - jit_stxi_f(_jitc->function->call.size + stack_framesize, JIT_SP, u); + jit_stxi_f(_jitc->function->call.size + stack_framesize, + JIT_SP, u); _jitc->function->call.size += sizeof(jit_float32_t); } +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + jit_stxi_f(BIAS(-8), JIT_FP, u); + jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + /* pair of registers is live */ + jit_live(_F0 - (_jitc->function->call.argi << 1)); + jit_movr_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4), + JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -657,18 +887,48 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_int32_t regno; jit_inc_synth_f(pushargi_f, u); jit_link_prepare(); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_stxi_f(-4, JIT_FP, regno); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -4); - ++_jitc->function->call.argi; + jit_stxi_f(-8, JIT_FP, regno); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); + _jitc->function->call.argi++; } else { - jit_stxi_f(_jitc->function->call.size + stack_framesize, JIT_SP, regno); + jit_stxi_f(_jitc->function->call.size + stack_framesize, + JIT_SP, regno); _jitc->function->call.size += sizeof(jit_float32_t); } jit_unget_reg(regno); +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + regno = jit_get_reg(jit_class_fpr | jit_class_sng); + jit_movi_f(regno, u); + jit_stxi_f(BIAS(-8), JIT_FP, regno); + jit_ldxi_i(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + jit_unget_reg(regno); + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + /* pair of registers is live */ + jit_live(_F0 - (_jitc->function->call.argi << 1)); + jit_movi_f((_F0 - (_jitc->function->call.argi << 1)) - 1, u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr | jit_class_sng); + jit_movi_f(regno, u); + jit_stxi_f(BIAS(_jitc->function->call.size + stack_framesize + 4), + JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -677,9 +937,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { jit_inc_synth_w(pushargr_d, u); jit_link_prepare(); +# if __WORDSIZE == 32 if (jit_arg_d_reg_p(_jitc->function->call.argi)) { - jit_stxi_d(-8, JIT_FP, u); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); + jit_stxi_d(BIAS(-8), JIT_FP, u); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4); _jitc->function->call.argi += 2; } @@ -697,6 +958,26 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) JIT_SP, u + 1); _jitc->function->call.size += sizeof(jit_float64_t); } +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + jit_stxi_d(BIAS(-8), JIT_FP, u); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + jit_movr_d(_F0 - (_jitc->function->call.argi << 1), u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -706,11 +987,12 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_int32_t regno; jit_inc_synth_d(pushargi_d, u); jit_link_prepare(); +# if __WORDSIZE == 32 regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); if (jit_arg_d_reg_p(_jitc->function->call.argi)) { - jit_stxi_d(-8, JIT_FP, regno); - jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); + jit_stxi_d(BIAS(-8), JIT_FP, regno); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); jit_ldxi(_O0 + _jitc->function->call.argi + 1, JIT_FP, -4); _jitc->function->call.argi += 2; } @@ -718,7 +1000,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_f(-8, JIT_FP, regno); jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, -8); ++_jitc->function->call.argi; - jit_stxi_f(stack_framesize, JIT_SP, regno + 1); + jit_stxi_f(stack_framesize, JIT_SP, u + 1); _jitc->function->call.size += sizeof(jit_float32_t); } else { @@ -729,6 +1011,32 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) _jitc->function->call.size += sizeof(jit_float64_t); } jit_unget_reg(regno); +# else + if ((_jitc->function->call.call & jit_call_varargs) && + jit_arg_reg_p(_jitc->function->call.argi)) { + regno = jit_get_reg(jit_class_fpr | jit_class_dbl); + jit_movi_d(regno, u); + jit_stxi_d(BIAS(-8), JIT_FP, regno); + jit_ldxi(_O0 + _jitc->function->call.argi, JIT_FP, BIAS(-8)); + ++_jitc->function->call.argi; + jit_unget_reg(regno); + } + else if (!(_jitc->function->call.call & jit_call_varargs) && + jit_arg_d_reg_p(_jitc->function->call.argi)) { + jit_movi_d(_F0 - (_jitc->function->call.argi << 1), u); + if (!jit_arg_reg_p(_jitc->function->call.argi)) + _jitc->function->call.size += sizeof(jit_float64_t); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_fpr | jit_class_dbl); + jit_movi_d(regno, u); + jit_stxi_d(BIAS(_jitc->function->call.size + stack_framesize), + JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_float64_t); + } +# endif jit_dec_synth(); } @@ -740,10 +1048,18 @@ _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) spec = jit_class(_rvs[regno].spec); if ((spec & (jit_class_arg|jit_class_gpr)) == (jit_class_arg|jit_class_gpr)) { - regno = _O0 - regno; + regno -= _O0; if (regno >= 0 && regno < node->v.w) return (1); } +# if __WORDSIZE == 64 + if ((spec & (jit_class_arg|jit_class_fpr)) == + (jit_class_arg|jit_class_fpr)) { + regno = _F0 - (regno >> 1); + if (regno >= 0 && regno < node->v.w) + return (1); + } +# endif return (0); } @@ -817,12 +1133,36 @@ _jit_retval_us(jit_state_t *_jit, jit_int32_t r0) void _jit_retval_i(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_i, r0); +# if __WORDSIZE == 32 + if (r0 != _O0) + jit_movr(r0, _O0); +# else + jit_extr_i(r0, _O0); +# endif + jit_dec_synth(); +} + +# if __WORDSIZE == 64 +void +_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0) +{ + jit_inc_synth_w(retval_i, r0); + if (r0 != _O0) + jit_extr_ui(r0, _O0); + jit_dec_synth(); +} + +void +_jit_retval_l(jit_state_t *_jit, jit_int32_t r0) { jit_inc_synth_w(retval_i, r0); if (r0 != _O0) jit_movr(r0, _O0); jit_dec_synth(); } +# endif void _jit_retval_f(jit_state_t *_jit, jit_int32_t r0) @@ -1043,6 +1383,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(trunc, _f_i); case_rr(trunc, _d_i); +#if __WORDSIZE == 64 + case_rr(trunc, _f_l); + case_rr(trunc, _d_l); +#endif case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); @@ -1073,6 +1417,12 @@ _emit_code(jit_state_t *_jit) case_rw(ld, _us); case_rr(ld, _i); case_rw(ld, _i); +#if __WORDSIZE == 64 + case_rr(ld, _ui); + case_rw(ld, _ui); + case_rr(ld, _l); + case_rw(ld, _l); +#endif case_rrr(ldx, _c); case_rrw(ldx, _c); case_rrr(ldx, _uc); @@ -1083,24 +1433,45 @@ _emit_code(jit_state_t *_jit) case_rrw(ldx, _us); case_rrr(ldx, _i); case_rrw(ldx, _i); +#if __WORDSIZE == 64 + case_rrr(ldx, _ui); + case_rrw(ldx, _ui); + case_rrr(ldx, _l); + case_rrw(ldx, _l); +#endif case_rr(st, _c); case_wr(st, _c); case_rr(st, _s); case_wr(st, _s); case_rr(st, _i); case_wr(st, _i); +#if __WORDSIZE == 64 + case_rr(st, _l); + case_wr(st, _l); +#endif case_rrr(stx, _c); case_wrr(stx, _c); case_rrr(stx, _s); case_wrr(stx, _s); case_rrr(stx, _i); case_wrr(stx, _i); +#if __WORDSIZE == 64 + case_rrr(stx, _l); + case_wrr(stx, _l); +#endif case_rr(hton, _us); case_rr(hton, _ui); +#if __WORDSIZE == 64 + case_rr(hton, _ul); +#endif case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); +#if __WORDSIZE == 64 + case_rr(ext, _i); + case_rr(ext, _ui); +#endif case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1418,6 +1789,9 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_c: case jit_code_getarg_uc: case jit_code_getarg_s: case jit_code_getarg_us: case jit_code_getarg_i: +#if __WORDSIZE == 64 + case jit_code_getarg_ui: case jit_code_getarg_l: +#endif case jit_code_getarg_f: case jit_code_getarg_d: case jit_code_putargr: case jit_code_putargi: case jit_code_putargr_f: case jit_code_putargi_f: @@ -1428,6 +1802,9 @@ _emit_code(jit_state_t *_jit) case jit_code_retval_c: case jit_code_retval_uc: case jit_code_retval_s: case jit_code_retval_us: case jit_code_retval_i: +#if __WORDSIZE == 64 + case jit_code_retval_ui: case jit_code_retval_l: +#endif case jit_code_retval_f: case jit_code_retval_d: case jit_code_prepare: case jit_code_finishr: case jit_code_finishi: @@ -1435,9 +1812,34 @@ _emit_code(jit_state_t *_jit) default: abort(); } +# if __WORDSIZE == 64 + if (jit_carry != _NOREG) { + switch (node->code) { + case jit_code_note: + case jit_code_addcr: case jit_code_addci: + case jit_code_addxr: case jit_code_addxi: + case jit_code_subcr: case jit_code_subci: + case jit_code_subxr: case jit_code_subxi: + break; + default: + jit_unget_reg(jit_carry); + jit_carry = _NOREG; + break; + } + } +# endif jit_regarg_clr(node, value); +# if __WORDSIZE == 64 + if (jit_carry == _NOREG) + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + else { + assert(jit_regset_scan1(&_jitc->regarg, 0) == jit_carry); + assert(jit_regset_scan1(&_jitc->regarg, jit_carry + 1) == ULONG_MAX); + } + assert(_jitc->synth == 0); +# else assert(_jitc->regarg == 0 && _jitc->synth == 0); - /* update register live state */ +# endif jit_reglive(node); } #undef case_brf @@ -1478,13 +1880,13 @@ jit_flush(void *fptr, void *tptr) void _emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0) { - ldxi_i(rn(r0), rn(r1), i0); + ldxi(rn(r0), rn(r1), i0); } void _emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1) { - stxi_i(i0, rn(r0), rn(r1)); + stxi(i0, rn(r0), rn(r1)); } void diff --git a/lib/lightning.c b/lib/lightning.c index 31205034b..bce5968c3 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -497,6 +497,120 @@ jit_regset_scan1(jit_regset_t *set, jit_int32_t offset) } return (ULONG_MAX); } + +#elif __sparc__ && __WORDSIZE == 64 +void +jit_regset_com(jit_regset_t *u, jit_regset_t *v) +{ + u->rl = ~v->rl; u->rh = ~v->rh; +} + +void +jit_regset_and(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w) +{ + u->rl = v->rl & w->rl; u->rh = v->rh & w->rh; +} + +void +jit_regset_ior(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w) +{ + u->rl = v->rl | w->rl; u->rh = v->rh | w->rh; +} + +void +jit_regset_xor(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w) +{ + u->rl = v->rl ^ w->rl; u->rh = v->rh ^ w->rh; +} + +void +jit_regset_set(jit_regset_t *u, jit_regset_t *v) +{ + u->rl = v->rl; u->rh = v->rh; +} + +void +jit_regset_set_mask(jit_regset_t *u, jit_int32_t v) +{ + jit_bool_t w = !!(v & (v - 1)); + + assert(v >= 0 && v <= 128); + if (v == 0) + u->rl = u->rh = -1LL; + else if (v <= 64) { + u->rl = w ? (1LL << v) - 1 : -1LL; + u->rh = 0; + } + else { + u->rl = -1LL; + u->rh = w ? (1LL << (v - 64)) - 1 : -1LL; + } +} + +jit_bool_t +jit_regset_cmp_ui(jit_regset_t *u, jit_word_t v) +{ + return !((u->rl == v && u->rh == 0)); +} + +void +jit_regset_set_ui(jit_regset_t *u, jit_word_t v) +{ + u->rl = v; + u->rh = 0; +} + +jit_bool_t +jit_regset_set_p(jit_regset_t *u) +{ + return (u->rl || u->rh); +} + +void +jit_regset_clrbit(jit_regset_t *set, jit_int32_t bit) +{ + assert(bit >= 0 && bit <= 128); + if (bit < 64) + set->rl &= ~(1LL << bit); + else + set->rh &= ~(1LL << (bit - 64)); +} + +void +jit_regset_setbit(jit_regset_t *set, jit_int32_t bit) +{ + assert(bit >= 0 && bit <= 127); + if (bit < 64) + set->rl |= 1LL << bit; + else + set->rh |= 1LL << (bit - 64); +} + +jit_bool_t +jit_regset_tstbit(jit_regset_t *set, jit_int32_t bit) +{ + assert(bit >= 0 && bit <= 127); + if (bit < 64) + return (!!(set->rl & (1LL << bit))); + else + return (!!(set->rh & (1LL << (bit - 64)))); +} + +unsigned long +jit_regset_scan1(jit_regset_t *set, jit_int32_t offset) +{ + assert(offset >= 0 && offset <= 127); + for (; offset < 64; offset++) { + if (set->rl & (1LL << offset)) + return (offset); + } + for (; offset < 128; offset++) { + if (set->rh & (1LL << (offset - 64))) + return (offset); + } + return (ULONG_MAX); +} + #else unsigned long jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)