From f469a5c87b0fc73765e65ad853b4e5868f4f31b4 Mon Sep 17 00:00:00 2001 From: Paulo Andrade Date: Wed, 24 Sep 2014 11:03:20 -0300 Subject: [PATCH] x86: Build and pass all tests under cygwin64 (Windows x64) * check/lightning.c: Add a hopefully temporary kludge to not use sprintf and sscanf returned by dlsym. This is required to pass the varargs test. * include/lightning/jit_private.h: Use symbolic name for first integer register argument, as this is different in sysv and win64 abi. * include/lightning/jit_x86.h: Add conditionals and definitions for Windows x64 (under __CYGWIN__ preprocessor conditional). * lib/jit_x86-cpu.c: Correct one instruction encoding bug, that was working by accident. Only use rax to rdx for some byte operations to work on compatibility mode (that is, to generate the proper encoding, instead of actually generating encoding for high byte registers, e.g. %bh). Add proper prolog and epilog for windows x64. * lib/jit_x86-sse.c: Correct a swapped rex prefix for float operations. * lib/jit_x86.c: Adjust to support Windows x64 abi. * check/check.x87.nodata.sh: New file, previously used but that was missing git. --- ChangeLog | 28 +++++ check/check.x87.nodata.sh | 15 +++ check/lightning.c | 17 ++- include/lightning/jit_private.h | 6 +- include/lightning/jit_x86.h | 102 +++++++++++++---- lib/jit_x86-cpu.c | 133 ++++++++++++++++++++-- lib/jit_x86-sse.c | 12 +- lib/jit_x86.c | 195 ++++++++++++++++++++++++++------ 8 files changed, 433 insertions(+), 75 deletions(-) create mode 100755 check/check.x87.nodata.sh diff --git a/ChangeLog b/ChangeLog index ebff1f877..3e6e0dbe8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +2014-09-24 Paulo Andrade + + * check/lightning.c: Add a hopefully temporary kludge to not use + sprintf and sscanf returned by dlsym. This is required to pass + the varargs test. + + * include/lightning/jit_private.h: Use symbolic name for first + integer register argument, as this is different in sysv and + win64 abi. + + * include/lightning/jit_x86.h: Add conditionals and definitions + for Windows x64 (under __CYGWIN__ preprocessor conditional). + + * lib/jit_x86-cpu.c: Correct one instruction encoding bug, that + was working by accident. Only use rax to rdx for some byte + operations to work on compatibility mode (that is, to generate + the proper encoding, instead of actually generating encoding + for high byte registers, e.g. %bh). + Add proper prolog and epilog for windows x64. + + * lib/jit_x86-sse.c: Correct a swapped rex prefix for float + operations. + + * lib/jit_x86.c: Adjust to support Windows x64 abi. + + * check/check.x87.nodata.sh: New file, previously used but that + was missing git. + 2014-09-07 Paulo Andrade * lib/lightning.c: Mark all registers advertised as live, as diff --git a/check/check.x87.nodata.sh b/check/check.x87.nodata.sh new file mode 100755 index 000000000..1582e9f62 --- /dev/null +++ b/check/check.x87.nodata.sh @@ -0,0 +1,15 @@ +#!/bin/sh +test=`basename $0 | sed -e 's|\.x87.nodata$||'` +./lightning -data=0 -mx87=1 $srcdir/$test.tst | tr -d \\r > $test.out +if test $? != 0; then + exit $? +fi + +cmp -s $srcdir/$test.ok $test.out +result=$? +if test $result != 0; then + diff $srcdir/$test.ok $test.out + rm $test.out + exit 1 +fi +rm $test.out diff --git a/check/lightning.c b/check/lightning.c index 72555be12..47e39a3c3 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -2384,9 +2384,20 @@ dynamic(void) char *string; (void)identifier('@'); if ((label = get_label_by_name(parser.string)) == NULL) { - value = dlsym(DL_HANDLE, parser.string + 1); - if ((string = dlerror())) - error("%s", string); +#if __CYGWIN__ + /* FIXME kludge to pass varargs test case, otherwise, + * will not print/scan float values */ + if (strcmp(parser.string + 1, "sprintf") == 0) + value = sprintf; + else if (strcmp(parser.string + 1, "sscanf") == 0) + value = sscanf; + else +#endif + { + value = dlsym(DL_HANDLE, parser.string + 1); + if ((string = dlerror())) + error("%s", string); + } label = new_label(label_kind_dynamic, parser.string, value); } parser.type = type_p; diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 9b50d1f86..b620a1b06 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -54,7 +54,11 @@ # define JIT_FRET _ST0 typedef jit_uint32_t jit_regset_t; # else -# define JIT_RA0 _RDI +# if __CYGWIN__ +# define JIT_RA0 _RCX +# else +# define JIT_RA0 _RDI +# endif # define JIT_FA0 _XMM0 # define JIT_FRET _XMM0 typedef jit_uint64_t jit_regset_t; diff --git a/include/lightning/jit_x86.h b/include/lightning/jit_x86.h index bf33125c2..b96eab607 100644 --- a/include/lightning/jit_x86.h +++ b/include/lightning/jit_x86.h @@ -60,37 +60,89 @@ typedef enum { _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7, # define jit_sse_reg_p(reg) ((reg) >= _XMM0 && (reg) <= _XMM7) #else -# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) -# define jit_r(i) (_RAX + (i)) -# define jit_r_num() 4 -# define jit_v(i) (_RBX + (i)) -# define jit_v_num() 4 -# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) -# define jit_f(index) (_XMM8 + (index)) -# define jit_f_num() 8 -# define JIT_R0 _RAX -# define JIT_R1 _R10 -# define JIT_R2 _R11 -# define JIT_R3 _R12 +# if __CYGWIN__ +# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 3 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 7 +# define jit_arg_f_reg_p(i) jit_arg_reg_p(i) +# define jit_f(index) (_XMM4 + (index)) +# define jit_f_num() 12 +# define JIT_R0 _RAX +# define JIT_R1 _R10 +# define JIT_R2 _R11 +# define JIT_V0 _RBX +# define JIT_V1 _RDI +# define JIT_V2 _RSI +# define JIT_V3 _R12 +# define JIT_V4 _R13 +# define JIT_V5 _R14 +# define JIT_V6 _R15 + /* Volatile - Return value register */ + _RAX, + /* Volatile */ + _R10, _R11, + /* Nonvolatile */ + _RBX, _RDI, _RSI, + _R12, _R13, _R14, _R15, + /* Volatile - Integer arguments (4 to 1) */ + _R9, _R8, _RDX, _RCX, + /* Nonvolatile */ + _RSP, _RBP, +# define JIT_F0 _XMM4 +# define JIT_F1 _XMM5 +# define JIT_F2 _XMM6 +# define JIT_F3 _XMM7 +# define JIT_F4 _XMM8 +# define JIT_F5 _XMM9 +# define JIT_F6 _XMM10 +# define JIT_F7 _XMM11 +# define JIT_F8 _XMM12 +# define JIT_F9 _XMM13 +# define JIT_F10 _XMM14 +# define JIT_F11 _XMM15 + /* Volatile */ + _XMM4, _XMM5, + /* Nonvolatile */ + _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, + _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, + /* Volatile - FP arguments (4 to 1) */ + _XMM3, _XMM2, _XMM1, _XMM0, +# define jit_sse_reg_p(reg) ((reg) >= _XMM4 && (reg) <= _XMM0) +# else +# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 4 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 4 +# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +# define jit_f(index) (_XMM8 + (index)) +# define jit_f_num() 8 +# define JIT_R0 _RAX +# define JIT_R1 _R10 +# define JIT_R2 _R11 +# define JIT_R3 _R12 _RAX, _R10, _R11, _R12, -# define JIT_V0 _RBX -# define JIT_V1 _R13 -# define JIT_V2 _R14 -# define JIT_V3 _R15 +# define JIT_V0 _RBX +# define JIT_V1 _R13 +# define JIT_V2 _R14 +# define JIT_V3 _R15 _RBX, _R13, _R14, _R15, _R9, _R8, _RCX, _RDX, _RSI, _RDI, _RSP, _RBP, -# define JIT_F0 _XMM8 -# define JIT_F1 _XMM9 -# define JIT_F2 _XMM10 -# define JIT_F3 _XMM11 -# define JIT_F4 _XMM12 -# define JIT_F5 _XMM13 -# define JIT_F6 _XMM14 -# define JIT_F7 _XMM15 +# define JIT_F0 _XMM8 +# define JIT_F1 _XMM9 +# define JIT_F2 _XMM10 +# define JIT_F3 _XMM11 +# define JIT_F4 _XMM12 +# define JIT_F5 _XMM13 +# define JIT_F6 _XMM14 +# define JIT_F7 _XMM15 _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, _XMM7, _XMM6, _XMM5, _XMM4, _XMM3, _XMM2, _XMM1, _XMM0, -# define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0) +# define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0) +# endif #endif _ST0, _ST1, _ST2, _ST3, _ST4, _ST5, _ST6, _ST7, # define JIT_NOREG _NOREG diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index ce67c20b1..c24ad16e8 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -39,10 +39,15 @@ # define can_sign_extend_int_p(im) \ (((im) >= 0 && (long)(im) <= 0x7fffffffL) || \ ((im) < 0 && (long)(im) > -0x80000000L)) -# define can_zero_extend_int_p(im) \ - ((im) >= 0 && (im) < 0x80000000L) -# define fits_uint32_p(im) ((im & 0xffffffff00000000L) == 0) -# define reg8_p(rn) 1 +# define can_zero_extend_int_p(im) \ + ((im) >= 0 && (im) < 0x80000000L) +# define fits_uint32_p(im) (((im) & 0xffffffff00000000L) == 0) +# if __CYGWIN__ +# define reg8_p(rn) \ + (r7(rn) >= _RAX_REGNO && r7(rn) <= _RBX_REGNO) +# else +# define reg8_p(rn) 1 +# endif # endif # define _RAX_REGNO 0 # define _RCX_REGNO 1 @@ -60,8 +65,8 @@ # define _R13_REGNO 13 # define _R14_REGNO 14 # define _R15_REGNO 15 -# define r7(reg) (reg & 7) -# define r8(reg) (reg & 15) +# define r7(reg) ((reg) & 7) +# define r8(reg) ((reg) & 15) # define _SCL1 0x00 # define _SCL2 0x01 # define _SCL4 0x02 @@ -634,6 +639,9 @@ static void _prolog(jit_state_t*, jit_node_t*); static void _epilog(jit_state_t*, jit_node_t*); # define patch_at(node, instr, label) _patch_at(_jit, node, instr, label) static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); +# if __WORDSIZE == 64 && !defined(HAVE_FFSL) +static int ffsl(long); +# endif #endif #if CODE @@ -1871,7 +1879,7 @@ _ci(jit_state_t *_jit, reg = jit_get_reg(jit_class_gpr|jit_class_rg8); ixorr(rn(reg), rn(reg)); icmpi(r1, i0); - cc(code, reg); + cc(code, rn(reg)); movr(r0, rn(reg)); jit_unget_reg(reg); } @@ -2596,6 +2604,7 @@ _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r1); + rex(0, 0, rn(reg), _NOREG, r0); ic(0x88); rx(rn(reg), 0, r0, _NOREG, _SCL1); jit_unget_reg(reg); @@ -2615,6 +2624,7 @@ _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r0); + rex(0, 0, rn(reg), _NOREG, _NOREG); ic(0x88); rx(rn(reg), i0, _NOREG, _NOREG, _SCL1); jit_unget_reg(reg); @@ -2719,6 +2729,7 @@ _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r2); + rex(0, 0, rn(reg), r1, r0); ic(0x88); rx(rn(reg), 0, r0, r1, _SCL1); jit_unget_reg(reg); @@ -2738,6 +2749,7 @@ _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) else { reg = jit_get_reg(jit_class_gpr|jit_class_rg8); movr(rn(reg), r1); + rex(0, 0, rn(reg), _NOREG, r0); ic(0x88); rx(rn(reg), i0, r0, _NOREG, _SCL1); jit_unget_reg(reg); @@ -3310,14 +3322,18 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { -#if __WORDSIZE == 32 - _jitc->function->stack = (((_jitc->function->self.alen - - _jitc->function->self.aoff) + 15) & -16) + 12; +#if __WORDSIZE == 64 && __CYGWIN__ + _jitc->function->stack = (((/* first 32 bytes must be allocated */ + (_jitc->function->self.alen > 32 ? + _jitc->function->self.alen : 32) - + /* align stack at 16 bytes */ + _jitc->function->self.aoff) + 15) & -16) + + stack_adjust; #else _jitc->function->stack = (((_jitc->function->self.alen - - _jitc->function->self.aoff) + 15) & -16) + 8; + _jitc->function->self.aoff) + 15) & -16) + + stack_adjust; #endif - /* callee save registers */ subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - sizeof(jit_word_t)); #if __WORDSIZE == 32 @@ -3328,6 +3344,42 @@ _prolog(jit_state_t *_jit, jit_node_t *node) if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) stxi( 4, _RSP_REGNO, _RBX_REGNO); #else +# if __CYGWIN__ + if (jit_regset_tstbit(&_jitc->function->regset, _XMM15)) + sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM14)) + sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM13)) + sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM12)) + sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM11)) + sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM10)) + sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM9)) + sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM8)) + sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM7)) + sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM6)) + sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R15)) + stxi(56, _RSP_REGNO, _R15_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R14)) + stxi(48, _RSP_REGNO, _R14_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R13)) + stxi(40, _RSP_REGNO, _R13_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _R12)) + stxi(32, _RSP_REGNO, _R12_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) + stxi(24, _RSP_REGNO, _RSI_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) + stxi(16, _RSP_REGNO, _RDI_REGNO); + if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) + stxi( 8, _RSP_REGNO, _RBX_REGNO); +# else if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) stxi(40, _RSP_REGNO, _RBX_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _R12)) @@ -3338,6 +3390,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi(16, _RSP_REGNO, _R14_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _R15)) stxi( 8, _RSP_REGNO, _R15_REGNO); +# endif #endif stxi(0, _RSP_REGNO, _RBP_REGNO); movr(_RBP_REGNO, _RSP_REGNO); @@ -3359,6 +3412,42 @@ _epilog(jit_state_t *_jit, jit_node_t *node) if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) ldxi(_RBX_REGNO, _RSP_REGNO, 4); #else +# if __CYGWIN__ + if (jit_regset_tstbit(&_jitc->function->regset, _XMM15)) + sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM14)) + sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM13)) + sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM12)) + sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM11)) + sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM10)) + sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM9)) + sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM8)) + sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM7)) + sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72); + if (jit_regset_tstbit(&_jitc->function->regset, _XMM6)) + sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64); + if (jit_regset_tstbit(&_jitc->function->regset, _R15)) + ldxi(_R15_REGNO, _RSP_REGNO, 56); + if (jit_regset_tstbit(&_jitc->function->regset, _R14)) + ldxi(_R14_REGNO, _RSP_REGNO, 48); + if (jit_regset_tstbit(&_jitc->function->regset, _R13)) + ldxi(_R13_REGNO, _RSP_REGNO, 40); + if (jit_regset_tstbit(&_jitc->function->regset, _R12)) + ldxi(_R12_REGNO, _RSP_REGNO, 32); + if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) + ldxi(_RSI_REGNO, _RSP_REGNO, 24); + if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) + ldxi(_RDI_REGNO, _RSP_REGNO, 16); + if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) + ldxi(_RBX_REGNO, _RSP_REGNO, 8); +# else if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) ldxi(_RBX_REGNO, _RSP_REGNO, 40); if (jit_regset_tstbit(&_jitc->function->regset, _R12)) @@ -3369,6 +3458,7 @@ _epilog(jit_state_t *_jit, jit_node_t *node) ldxi(_R14_REGNO, _RSP_REGNO, 16); if (jit_regset_tstbit(&_jitc->function->regset, _R15)) ldxi(_R15_REGNO, _RSP_REGNO, 8); +# endif #endif ldxi(_RBP_REGNO, _RSP_REGNO, 0); addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - sizeof(jit_word_t)); @@ -3392,4 +3482,23 @@ _patch_at(jit_state_t *_jit, jit_node_t *node, break; } } + +# if __WORDSIZE == 64 && !defined(HAVE_FFSL) +static int +ffsl(long i) +{ + int bit; +# if __CYGWIN__ + /* Bug workaround */ + if ((int)i == (int)0x80000000) + bit = 32; + else +# endif + if ((bit = ffs((int)i)) == 0) { + if ((bit = ffs((int)((unsigned long)i >> 32)))) + bit += 32; + } + return (bit); +} +# endif #endif diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index edac0fcba..d3bd7aeea 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -18,6 +18,16 @@ */ #if PROTO +# define _XMM6_REGNO 6 +# define _XMM7_REGNO 7 +# define _XMM8_REGNO 8 +# define _XMM9_REGNO 9 +# define _XMM10_REGNO 10 +# define _XMM11_REGNO 11 +# define _XMM12_REGNO 12 +# define _XMM13_REGNO 13 +# define _XMM14_REGNO 14 +# define _XMM15_REGNO 15 #define X86_SSE_MOV 0x10 #define X86_SSE_MOV1 0x11 #define X86_SSE_MOVLP 0x12 @@ -457,7 +467,7 @@ _sse_b##name##i_##type(jit_state_t *_jit, \ static void _sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1) { - rex(0, 0, r1, 0, r0); + rex(0, 0, r0, 0, r1); ic(0x0f); ic(c); mrm(0x03, r7(r0), r7(r1)); diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 5eaad68e9..dc386bea2 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -24,13 +24,17 @@ #define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) #if __WORDSIZE == 32 -# define stack_alignment 4 # define stack_framesize 20 -# define CVT_OFFSET -12 +# define stack_adjust 12 +# define CVT_OFFSET -12 #else -# define stack_alignment 8 -# define stack_framesize 56 -# define CVT_OFFSET -8 +# if __CYGWIN__ +# define stack_framesize 152 +# else +# define stack_framesize 56 +# endif +# define stack_adjust 8 +# define CVT_OFFSET -8 #endif /* @@ -83,6 +87,40 @@ jit_register_t _rvs[] = { { rc(fpr) | 5, "st(5)" }, { rc(fpr) | 6, "st(6)" }, { rc(fpr) | 7, "st(7)" }, +#else +# if __CYGWIN__ + { rc(gpr) | rc(rg8) | 0, "%rax" }, + { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" }, + { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" }, + { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" }, + { rc(sav) | rc(gpr) | 7, "%rdi" }, + { rc(sav) | rc(gpr) | 6, "%rsi" }, + { rc(sav) | rc(gpr) | 12, "%r12" }, + { rc(sav) | rc(gpr) | 13, "%r13" }, + { rc(sav) | rc(gpr) | 14, "%r14" }, + { rc(sav) | rc(gpr) | 15, "%r15" }, + { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" }, + { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" }, + { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" }, + { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" }, + { rc(sav) | 4, "%rsp" }, + { rc(sav) | 5, "%rbp" }, + { rc(xpr) | rc(fpr) | 4, "%xmm4" }, + { rc(xpr) | rc(fpr) | 5, "%xmm5" }, + { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" }, + { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" }, + { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" }, + { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" }, + { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" }, + { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" }, + { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" }, + { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" }, + { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" }, + { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" }, + { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" }, + { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, + { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, + { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, #else /* %rax is a pseudo flag argument for varargs functions */ { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" }, @@ -117,6 +155,7 @@ jit_register_t _rvs[] = { { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, +# endif { rc(fpr) | 0, "st(0)" }, { rc(fpr) | 1, "st(1)" }, { rc(fpr) | 2, "st(2)" }, @@ -429,8 +468,12 @@ _jit_arg(jit_state_t *_jit) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->self.argi < 6) + if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; +# if __CYGWIN__ + _jitc->function->self.size += sizeof(jit_word_t); +# endif + } else #endif { @@ -446,7 +489,11 @@ _jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset) #if __WORDSIZE == 32 return (0); #else +# if __CYGWIN__ + return (offset >= 0 && offset < 4); +# else return (offset >= 0 && offset < 6); +# endif #endif } @@ -457,8 +504,15 @@ _jit_arg_f(jit_state_t *_jit) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->self.argf < 8) +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->self.argi)) { + offset = _jitc->function->self.argi++; + _jitc->function->self.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; +# endif else #endif { @@ -478,7 +532,11 @@ _jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset) #if __WORDSIZE == 32 return (0); #else +# if __CYGWIN__ + return (offset >= 0 && offset < 4); +# else return (offset >= 0 && offset < 8); +# endif #endif } @@ -489,8 +547,15 @@ _jit_arg_d(jit_state_t *_jit) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->self.argf < 8) +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->self.argi)) { + offset = _jitc->function->self.argi++; + _jitc->function->self.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; +# endif else #endif { @@ -510,8 +575,8 @@ void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_c(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_c(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_c(u, _RBP, v->u.w); @@ -521,8 +586,8 @@ void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_uc(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_uc(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_uc(u, _RBP, v->u.w); @@ -532,8 +597,8 @@ void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_s(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_s(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_s(u, _RBP, v->u.w); @@ -543,8 +608,8 @@ void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_us(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_us(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_us(u, _RBP, v->u.w); @@ -554,8 +619,8 @@ void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 6) - jit_extr_i(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_i(u, JIT_RA0 - v->u.w); else #endif jit_ldxi_i(u, _RBP, v->u.w); @@ -565,8 +630,8 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - if (v->u.w < 6) - jit_extr_ui(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_extr_ui(u, JIT_RA0 - v->u.w); else jit_ldxi_ui(u, _RBP, v->u.w); } @@ -574,8 +639,8 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - if (v->u.w < 6) - jit_movr(u, _RDI - v->u.w); + if (jit_arg_reg_p(v->u.w)) + jit_movr(u, JIT_RA0 - v->u.w); else jit_ldxi_l(u, _RBP, v->u.w); } @@ -585,7 +650,7 @@ void _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 8) + if (jit_arg_f_reg_p(v->u.w)) jit_movr_f(u, _XMM0 - v->u.w); else #endif @@ -596,7 +661,7 @@ void _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { #if __WORDSIZE == 64 - if (v->u.w < 8) + if (jit_arg_f_reg_p(v->u.w)) jit_movr_d(u, _XMM0 - v->u.w); else #endif @@ -608,9 +673,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argi < 6) { - jit_movr(_RDI - _jitc->function->call.argi, u); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr(JIT_RA0 - _jitc->function->call.argi, u); ++_jitc->function->call.argi; +# if __CYGWIN__ + _jitc->function->call.size += sizeof(jit_word_t); +# endif } else #endif @@ -627,9 +695,12 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argi < 6) { - jit_movi(_RDI - _jitc->function->call.argi, u); + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi(JIT_RA0 - _jitc->function->call.argi, u); ++_jitc->function->call.argi; +# if __CYGWIN__ + _jitc->function->call.size += sizeof(jit_word_t); +# endif } else #endif @@ -647,10 +718,24 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr_f(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_f(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->self.argf)) { jit_movr_f(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -666,10 +751,24 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi_f(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_f(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_f(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -686,10 +785,24 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movr_d(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_d(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movr_d(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -705,10 +818,24 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) assert(_jitc->function); #if __WORDSIZE == 64 - if (_jitc->function->call.argf < 8) { +# if __CYGWIN__ + if (jit_arg_reg_p(_jitc->function->call.argi)) { + jit_movi_d(_XMM0 - _jitc->function->call.argi, u); + if (_jitc->function->call.call & jit_call_varargs) { + jit_stxi_d(_jitc->function->call.size, _RSP, + _XMM0 - _jitc->function->call.argi); + jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP, + _jitc->function->call.size); + } + ++_jitc->function->call.argi; + _jitc->function->call.size += sizeof(jit_word_t); + } +# else + if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_d(_XMM0 - _jitc->function->call.argf, u); ++_jitc->function->call.argf; } +# endif else #endif { @@ -729,7 +856,7 @@ _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) spec = jit_class(_rvs[regno].spec); if (spec & jit_class_arg) { if (spec & jit_class_gpr) { - regno = _RDI - regno; + regno = JIT_RA0 - regno; if (regno >= 0 && regno < node->v.w) return (1); } @@ -754,6 +881,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; #if __WORDSIZE == 64 +# if !__CYGWIN__ if (_jitc->function->call.call & jit_call_varargs) { if (jit_regno(reg) == _RAX) { reg = jit_get_reg(jit_class_gpr); @@ -766,6 +894,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) if (reg != r0) jit_unget_reg(reg); } +# endif #endif call = jit_callr(reg); call->v.w = _jitc->function->call.argi;