diff --git a/ChangeLog b/ChangeLog index ebff1f877..3e6e0dbe8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +2014-09-24 Paulo Andrade + + * check/lightning.c: Add a hopefully temporary kludge to not use + sprintf and sscanf returned by dlsym. This is required to pass + the varargs test. + + * include/lightning/jit_private.h: Use symbolic name for first + integer register argument, as this is different in sysv and + win64 abi. + + * include/lightning/jit_x86.h: Add conditionals and definitions + for Windows x64 (under __CYGWIN__ preprocessor conditional). + + * lib/jit_x86-cpu.c: Correct one instruction encoding bug, that + was working by accident. Only use rax to rdx for some byte + operations to work on compatibility mode (that is, to generate + the proper encoding, instead of actually generating encoding + for high byte registers, e.g. %bh). + Add proper prolog and epilog for windows x64. + + * lib/jit_x86-sse.c: Correct a swapped rex prefix for float + operations. + + * lib/jit_x86.c: Adjust to support Windows x64 abi. + + * check/check.x87.nodata.sh: New file, previously used but that + was missing git. + 2014-09-07 Paulo Andrade * lib/lightning.c: Mark all registers advertised as live, as diff --git a/check/check.x87.nodata.sh b/check/check.x87.nodata.sh new file mode 100755 index 000000000..1582e9f62 --- /dev/null +++ b/check/check.x87.nodata.sh @@ -0,0 +1,15 @@ +#!/bin/sh +test=`basename $0 | sed -e 's|\.x87.nodata$||'` +./lightning -data=0 -mx87=1 $srcdir/$test.tst | tr -d \\r > $test.out +if test $? != 0; then + exit $? +fi + +cmp -s $srcdir/$test.ok $test.out +result=$? +if test $result != 0; then + diff $srcdir/$test.ok $test.out + rm $test.out + exit 1 +fi +rm $test.out diff --git a/check/lightning.c b/check/lightning.c index 72555be12..47e39a3c3 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -2384,9 +2384,20 @@ dynamic(void) char *string; (void)identifier('@'); if ((label = get_label_by_name(parser.string)) == NULL) { - value = dlsym(DL_HANDLE, parser.string + 1); - if ((string = dlerror())) - error("%s", string); +#if __CYGWIN__ + /* FIXME kludge to pass varargs test case, otherwise, + * will not print/scan float values */ + if (strcmp(parser.string + 1, "sprintf") == 0) + value = sprintf; + else if (strcmp(parser.string + 1, "sscanf") == 0) + value = sscanf; + else +#endif + { + value = dlsym(DL_HANDLE, parser.string + 1); + if ((string = dlerror())) + error("%s", string); + } label = new_label(label_kind_dynamic, parser.string, value); } parser.type = type_p; diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 9b50d1f86..b620a1b06 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -54,7 +54,11 @@ # define JIT_FRET _ST0 typedef jit_uint32_t jit_regset_t; # else -# define JIT_RA0 _RDI +# if __CYGWIN__ +# define JIT_RA0 _RCX +# else +# define JIT_RA0 _RDI +# endif # define JIT_FA0 _XMM0 # define JIT_FRET _XMM0 typedef jit_uint64_t jit_regset_t; diff --git a/include/lightning/jit_x86.h b/include/lightning/jit_x86.h index bf33125c2..b96eab607 100644 --- a/include/lightning/jit_x86.h +++ b/include/lightning/jit_x86.h @@ -60,37 +60,89 @@ typedef enum { _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7, # define jit_sse_reg_p(reg) ((reg) >= _XMM0 && (reg) <= _XMM7) #else -# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) -# define jit_r(i) (_RAX + (i)) -# define jit_r_num() 4 -# define jit_v(i) (_RBX + (i)) -# define jit_v_num() 4 -# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) -# define jit_f(index) (_XMM8 + (index)) -# define jit_f_num() 8 -# define JIT_R0 _RAX -# define JIT_R1 _R10 -# define JIT_R2 _R11 -# define JIT_R3 _R12 +# if __CYGWIN__ +# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 3 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 7 +# define jit_arg_f_reg_p(i) jit_arg_reg_p(i) +# define jit_f(index) (_XMM4 + (index)) +# define jit_f_num() 12 +# define JIT_R0 _RAX +# define JIT_R1 _R10 +# define JIT_R2 _R11 +# define JIT_V0 _RBX +# define JIT_V1 _RDI +# define JIT_V2 _RSI +# define JIT_V3 _R12 +# define JIT_V4 _R13 +# define JIT_V5 _R14 +# define JIT_V6 _R15 + /* Volatile - Return value register */ + _RAX, + /* Volatile */ + _R10, _R11, + /* Nonvolatile */ + _RBX, _RDI, _RSI, + _R12, _R13, _R14, _R15, + /* Volatile - Integer arguments (4 to 1) */ + _R9, _R8, _RDX, _RCX, + /* Nonvolatile */ + _RSP, _RBP, +# define JIT_F0 _XMM4 +# define JIT_F1 _XMM5 +# define JIT_F2 _XMM6 +# define JIT_F3 _XMM7 +# define JIT_F4 _XMM8 +# define JIT_F5 _XMM9 +# define JIT_F6 _XMM10 +# define JIT_F7 _XMM11 +# define JIT_F8 _XMM12 +# define JIT_F9 _XMM13 +# define JIT_F10 _XMM14 +# define JIT_F11 _XMM15 + /* Volatile */ + _XMM4, _XMM5, + /* Nonvolatile */ + _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, + _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, + /* Volatile - FP arguments (4 to 1) */ + _XMM3, _XMM2, _XMM1, _XMM0, +# define jit_sse_reg_p(reg) ((reg) >= _XMM4 && (reg) <= _XMM0) +# else +# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 4 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 4 +# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +# define jit_f(index) (_XMM8 + (index)) +# define jit_f_num() 8 +# define JIT_R0 _RAX +# define JIT_R1 _R10 +# define JIT_R2 _R11 +# define JIT_R3 _R12 _RAX, _R10, _R11, _R12, -# define JIT_V0 _RBX -# define JIT_V1 _R13 -# define JIT_V2 _R14 -# define JIT_V3 _R15 +# define JIT_V0 _RBX +# define JIT_V1 _R13 +# define JIT_V2 _R14 +# define JIT_V3 _R15 _RBX, _R13, _R14, _R15, _R9, _R8, _RCX, _RDX, _RSI, _RDI, _RSP, _RBP, -# define JIT_F0 _XMM8 -# define JIT_F1 _XMM9 -# define JIT_F2 _XMM10 -# define JIT_F3 _XMM11 -# define JIT_F4 _XMM12 -# define JIT_F5 _XMM13 -# define JIT_F6 _XMM14 -# define JIT_F7 _XMM15 +# define JIT_F0 _XMM8 +# define JIT_F1 _XMM9 +# define JIT_F2 _XMM10 +# define JIT_F3 _XMM11 +# define JIT_F4 _XMM12 +# define JIT_F5 _XMM13 +# define JIT_F6 _XMM14 +# define JIT_F7 _XMM15 _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, _XMM7, _XMM6, _XMM5, _XMM4, _XMM3, _XMM2, _XMM1, _XMM0, -# define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0) +# define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0) +# endif #endif _ST0, _ST1, _ST2, _ST3, _ST4, _ST5, _ST6, _ST7, # define JIT_NOREG _NOREG diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index ce67c20b1..c24ad16e8 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -39,10 +39,15 @@ # define can_sign_extend_int_p(im) \ (((im) >= 0 && (long)(im) <= 0x7fffffffL) || \ ((im) < 0 && (long)(im) > -0x80000000L)) -# define can_zero_extend_int_p(im) \ - ((im) >= 0 && (im) < 0x80000000L) -# define fits_uint32_p(im) ((im & 0xffffffff00000000L) == 0) -# define reg8_p(rn) 1 +# define can_zero_extend_int_p(im) \ + ((im) >= 0 && (im) < 0x80000000L) +# define fits_uint32_p(im) (((im) & 0xffffffff00000000L) == 0) +# if __CYGWIN__ +# define reg8_p(rn) \ + (r7(rn) >= _RAX_REGNO && r7(rn) <= _RBX_REGNO) +# else +# define reg8_p(rn) 1 +# endif # endif # define _RAX_REGNO 0 # define _RCX_REGNO 1 @@ -60,8 +65,8 @@ # define _R13_REGNO 13 # define _R14_REGNO 14 # define _R15_REGNO 15 -# define r7(reg) (reg & 7) -# define r8(reg) (reg & 15) +# define r7(reg) ((reg) & 7) +# define r8(reg) ((reg) & 15) # define _SCL1 0x00 # define _SCL2 0x01 # define _SCL4 0x02 @@ -634,6 +639,9 @@ static void _prolog(jit_state_t*, jit_node_t*); static void _epilog(jit_state_t*, jit_node_t*); # define patch_at(node, instr, label) _patch_at(_jit, node, instr, label) static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); +# if __WORDSIZE == 64 && !defined(HAVE_FFSL) +static int ffsl(long); +# endif #endif #if CODE @@ -1871,7 +1879,7 @@ _ci(jit_state_t *_jit, reg = jit_get_reg(jit_class_gpr|jit_class_rg8); ixorr(rn(reg), rn(reg)); icmpi(r1, i0); - cc(code, reg); + cc(code, rn(reg)); movr(r0, rn(reg)); jit_unget_reg(reg); } @@ -2596,6 +2604,7 @@ _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r1); + rex(0, 0, rn(reg), _NOREG, r0); ic(0x88); rx(rn(reg), 0, r0, _NOREG, _SCL1); jit_unget_reg(reg); @@ -2615,6 +2624,7 @@ _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r0); + rex(0, 0, rn(reg), _NOREG, _NOREG); ic(0x88); rx(rn(reg), i0, _NOREG, _NOREG, _SCL1); jit_unget_reg(reg); @@ -2719,6 +2729,7 @@ _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r2); + rex(0, 0, rn(reg), r1, r0); ic(0x88); rx(rn(reg), 0, r0, r1, _SCL1); jit_unget_reg(reg); @@ -2738,6 +2749,7 @@ _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r1); + rex(0, 0, rn(reg), _NOREG, r0); ic(0x88); rx(rn(reg), i0, r0, _NOREG, _SCL1); jit_unget_reg(reg); @@ -3310,14 +3322,18 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { -#if __WORDSIZE == 32 - _jitc->function->stack = (((_jitc->function->self.alen - - _jitc->function->self.aoff) + 15) & -16) + 12; +#if __WORDSIZE == 64 && __CYGWIN__ + _jitc->function->stack = (((/* first 32 bytes must be allocated */ + (_jitc->function->self.alen > 32 ? + _jitc->function->self.alen : 32) - + /* align stack at 16 bytes */ + _jitc->function->self.aoff) + 15) & -16) + + stack_adjust; #else _jitc->function->stack = (((_jitc->function->self.alen - - _jitc->function->self.aoff) + 15) & -16) + 8; + _jitc->function->self.aoff) + 15) & -16) + + stack_adjust; #endif - /* callee save registers */ subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - sizeof(jit_word_t)); #if __WORDSIZE == 32 @@ -3328,6 +3344,42 @@ _prolog(jit_state_t *_jit, jit_node_t *node) if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) stxi( 4, _RSP_REGNO, _RBX_REGNO); #else +# if __CYGWIN__ + if (jit_regset_tstbit(&_jitc->function->regset, _XMM15)) + sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM14)) + sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM13)) + sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM12)) + sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM11)) + sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM10)) + sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM9)) + sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM8)) + sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM7)) + sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM6)) + sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R15)) + stxi(56, _RSP_REGNO, _R15_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R14)) + stxi(48, _RSP_REGNO, _R14_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R13)) + stxi(40, _RSP_REGNO, _R13_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R12)) + stxi(32, _RSP_REGNO, _R12_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) + stxi(24, _RSP_REGNO, _RSI_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) + stxi(16, _RSP_REGNO, _RDI_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) + stxi( 8, _RSP_REGNO, _RBX_REGNO); +# else if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) stxi(40, _RSP_REGNO, _RBX_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _R12)) @@ -3338,6 +3390,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi(16, _RSP_REGNO, _R14_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _R15)) stxi( 8, _RSP_REGNO, _R15_REGNO); +# endif #endif stxi(0, _RSP_REGNO, _RBP_REGNO); movr(_RBP_REGNO, _RSP_REGNO); @@ -3359,6 +3412,42 @@ _epilog(jit_state_t *_jit, jit_node_t *node) if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) ldxi(_RBX_REGNO, _RSP_REGNO, 4); #else +# if __CYGWIN__ + if (jit_regset_tstbit(&_jitc->function->regset, _XMM15)) + sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM14)) + sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM13)) + sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM12)) + sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM11)) + sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM10)) + sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM9)) + sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM8)) + sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM7)) + sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM6)) + sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64); + if (jit_regset_tstbit(&_jitc->function->regset, _R15)) + ldxi(_R15_REGNO, _RSP_REGNO, 56); + if (jit_regset_tstbit(&_jitc->function->regset, _R14)) + ldxi(_R14_REGNO, _RSP_REGNO, 48); + if (jit_regset_tstbit(&_jitc->function->regset, _R13)) + ldxi(_R13_REGNO, _RSP_REGNO, 40); + if (jit_regset_tstbit(&_jitc->function->regset, _R12)) + ldxi(_R12_REGNO, _RSP_REGNO, 32); + if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) + ldxi(_RSI_REGNO, _RSP_REGNO, 24); + if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) + ldxi(_RDI_REGNO, _RSP_REGNO, 16); + if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) + ldxi(_RBX_REGNO, _RSP_REGNO, 8); +# else if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) ldxi(_RBX_REGNO, _RSP_REGNO, 40); if (jit_regset_tstbit(&_jitc->function->regset, _R12)) @@ -3369,6 +3458,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node) ldxi(_R14_REGNO, _RSP_REGNO, 16); if (jit_regset_tstbit(&_jitc->function->regset, _R15)) ldxi(_R15_REGNO, _RSP_REGNO, 8); +# endif #endif ldxi(_RBP_REGNO, _RSP_REGNO, 0); addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - sizeof(jit_word_t)); @@ -3392,4 +3482,23 @@ _patch_at(jit_state_t *_jit, jit_node_t *node, break; } } + +# if __WORDSIZE == 64 && !defined(HAVE_FFSL) +static int +ffsl(long i) +{ + int bit; +# if __CYGWIN__ + /* Bug workaround */ + if ((int)i == (int)0x80000000) + bit = 32; + else +# endif + if ((bit = ffs((int)i)) == 0) { + if ((bit = ffs((int)((unsigned long)i >> 32)))) + bit += 32; + } + return (bit); +} +# endif #endif diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index edac0fcba..d3bd7aeea 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -18,6 +18,16 @@ */ #if PROTO +# define _XMM6_REGNO 6 +# define _XMM7_REGNO 7 +# define _XMM8_REGNO 8 +# define _XMM9_REGNO 9 +# define _XMM10_REGNO 10 +# define _XMM11_REGNO 11 +# define _XMM12_REGNO 12 +# define _XMM13_REGNO 13 +# define _XMM14_REGNO 14 +# define _XMM15_REGNO 15 #define X86_SSE_MOV 0x10 #define X86_SSE_MOV1 0x11 #define X86_SSE_MOVLP 0x12 @@ -457,7 +467,7 @@ _sse_b##name##i_##type(jit_state_t *_jit, \ static void _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1) { - rex(0, 0, r1, 0, r0); + rex(0, 0, r0, 0, r1); ic(0x0f); ic(c); mrm(0x03, r7(r0), r7(r1)); diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 5eaad68e9..dc386bea2 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -24,13 +24,17 @@ #define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) #if __WORDSIZE == 32 -# define stack_alignment 4 # define stack_framesize 20 -# define CVT_OFFSET -12 +# define stack_adjust 12 +# define CVT_OFFSET -12 #else -# define stack_alignment 8 -# define stack_framesize 56 -# define CVT_OFFSET -8 +# if __CYGWIN__ +# define stack_framesize 152 +# else +# define stack_framesize 56 +# endif +# define stack_adjust 8 +# define CVT_OFFSET -8 #endif /* @@ -83,6 +87,40 @@ jit_register_t _rvs[] = { { rc(fpr) | 5, "st(5)" }, { rc(fpr) | 6, "st(6)" }, { rc(fpr) | 7, "st(7)" }, +#else +# if __CYGWIN__ + { rc(gpr) | rc(rg8) | 0, "%rax" }, + { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" }, + { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" }, + { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" }, + { rc(sav) | rc(gpr) | 7, "%rdi" }, + { rc(sav) | rc(gpr) | 6, "%rsi" }, + { rc(sav) | rc(gpr) | 12, "%r12" }, + { rc(sav) | rc(gpr) | 13, "%r13" }, + { rc(sav) | rc(gpr) | 14, "%r14" }, + { rc(sav) | rc(gpr) | 15, "%r15" }, + { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" }, + { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" }, + { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" }, + { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" }, + { rc(sav) | 4, "%rsp" }, + { rc(sav) | 5, "%rbp" }, + { rc(xpr) | rc(fpr) | 4, "%xmm4" }, + { rc(xpr) | rc(fpr) | 5, "%xmm5" }, + { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" }, + { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" }, + { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" }, + { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" }, + { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" }, + { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" }, + { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" }, + { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" }, + { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" }, + { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" }, + { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" }, + { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, + { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, + { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, #else /* %rax is a pseudo flag argument for varargs functions */ { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" }, @@ -117,6 +155,7 @@ jit_register_t _rvs[] = { { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, +# endif { rc(fpr) | 0, "st(0)" }, { rc(fpr) | 1, "st(1)" }, { rc(fpr) | 2, "st(2)" }, @@ -429,8 +468,12 @@ _jit_arg(jit_state_t *_jit) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->self.argi < 6) + if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; +# if __CYGWIN__ + _jitc->function->self.size += sizeof(jit_word_t); +# endif + } else #endif { @@ -446,7 +489,11 @@ _jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset) #if __WORDSIZE == 32 return (0); #else +# if __CYGWIN__ + return (offset >= 0 && offset < 4); +# else return (offset >= 0 && offset < 6); +# endif #endif } @@ -457,8 +504,15 @@ _jit_arg_f(jit_state_t *_jit) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->self.argf < 8) +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->self.argi)) { + offset = _jitc->function->self.argi++; + _jitc->function->self.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; +# endif else #endif { @@ -478,7 +532,11 @@ _jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset) #if __WORDSIZE == 32 return (0); #else +# if __CYGWIN__ + return (offset >= 0 && offset < 4); +# else return (offset >= 0 && offset < 8); +# endif #endif } @@ -489,8 +547,15 @@ _jit_arg_d(jit_state_t *_jit) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->self.argf < 8) +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->self.argi)) { + offset = _jitc->function->self.argi++; + _jitc->function->self.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; +# endif else #endif { @@ -510,8 +575,8 @@ void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_c(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_c(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_c(u, _RBP, v->u.w); @@ -521,8 +586,8 @@ void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_uc(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_uc(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_uc(u, _RBP, v->u.w); @@ -532,8 +597,8 @@ void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_s(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_s(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_s(u, _RBP, v->u.w); @@ -543,8 +608,8 @@ void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_us(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_us(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_us(u, _RBP, v->u.w); @@ -554,8 +619,8 @@ void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_i(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_i(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_i(u, _RBP, v->u.w); @@ -565,8 +630,8 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - if (v->u.w < 6) - jit_extr_ui(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_ui(u, JIT_RA0 - v->u.w); else jit_ldxi_ui(u, _RBP, v->u.w); } @@ -574,8 +639,8 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - if (v->u.w < 6) - jit_movr(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_movr(u, JIT_RA0 - v->u.w); else jit_ldxi_l(u, _RBP, v->u.w); } @@ -585,7 +650,7 @@ void _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 8) + if (jit_arg_f_reg_p(v->u.w)) jit_movr_f(u, _XMM0 - v->u.w); else #endif @@ -596,7 +661,7 @@ void _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 8) + if (jit_arg_f_reg_p(v->u.w)) jit_movr_d(u, _XMM0 - v->u.w); else #endif @@ -608,9 +673,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argi < 6) { - jit_movr(_RDI - _jitc->function->call.argi, u); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr(JIT_RA0 - _jitc->function->call.argi, u); ++_jitc->function->call.argi; +# if __CYGWIN__ + _jitc->function->call.size += sizeof(jit_word_t); +# endif } else #endif @@ -627,9 +695,12 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argi < 6) { - jit_movi(_RDI - _jitc->function->call.argi, u); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi(JIT_RA0 - _jitc->function->call.argi, u); ++_jitc->function->call.argi; +# if __CYGWIN__ + _jitc->function->call.size += sizeof(jit_word_t); +# endif } else #endif @@ -647,10 +718,24 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr_f(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_f(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->self.argf)) { jit_movr_f(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -666,10 +751,24 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi_f(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_f(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_f(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -686,10 +785,24 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr_d(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_d(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movr_d(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -705,10 +818,24 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi_d(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_d(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_d(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -729,7 +856,7 @@ _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) spec = jit_class(_rvs[regno].spec); if (spec & jit_class_arg) { if (spec & jit_class_gpr) { - regno = _RDI - regno; + regno = JIT_RA0 - regno; if (regno >= 0 && regno < node->v.w) return (1); } @@ -754,6 +881,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; #if __WORDSIZE == 64 +# if !__CYGWIN__ if (_jitc->function->call.call & jit_call_varargs) { if (jit_regno(reg) == _RAX) { reg = jit_get_reg(jit_class_gpr); @@ -766,6 +894,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) if (reg != r0) jit_unget_reg(reg); } +# endif #endif call = jit_callr(reg); call->v.w = _jitc->function->call.argi;