From 58c4dcea4396193ec4ac18b54ff84dba0c12719c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 23 Nov 2006 09:01:19 +0000 Subject: [PATCH] add floating-point for x86-64 git-archimport-id: bonzini@gnu.org--2004b/lightning--stable--1.2--patch-49 --- ChangeLog | 14 ++ NEWS | 5 + lightning/core-common.h | 24 ++-- lightning/i386/asm-32.h | 7 +- lightning/i386/asm-64.h | 36 +++++ lightning/i386/asm-i386.h | 279 ++++++++++++++++++++++++++++++++++++- lightning/i386/core-64.h | 44 +++--- lightning/i386/core-i386.h | 5 - lightning/i386/fp-32.h | 5 + lightning/i386/fp-64.h | 260 +++++++++++++++++++++++++++++++++- lightning/sparc/fp.h | 4 +- 11 files changed, 639 insertions(+), 44 deletions(-) diff --git a/ChangeLog b/ChangeLog index a749cebbd..79dbe9124 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2006-11-23 Paolo Bonzini + + * lightning/core-common.h: Add casts in "*i_p" variants. + * lightning/i386/asm-32.h: Add _r1. + * lightning/i386/asm-64.h: Likewise, and add SSE instructions. + * lightning/i386/asm-i386.h: Merge SSE instructions from Gwenole. + Use short form for 16-bit AX instructions. Remove _r1 + * lightning/i386/core-64.h: Add FP ABI support in its infancy. + * lightning/i386/core-i386.h: Move jit_arg_f and jit_arg_d... + * lightning/i386/core-32.h: ... and jit_prepare_f and jit_prepare_d... + * lightning/i386/fp-32.h: ... here. + * lightning/i386/fp-64.h: Write the code. + * lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order. + 2006-11-22 Paolo Bonzini * lightning/i386/asm-i386.h: Move x86-64 instructions... diff --git a/NEWS b/NEWS index 58698673d..be756755d 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,11 @@ NEWS FROM VERSION 1.2 TO 1.3 o Initial support for x86-64 back-end (mostly untested). +o lightning is more strict on casts from integer to pointer. + Be sure to use the _p variants when your immediates are + of pointer type. This was done to ease 64-bit cleanliness + tests. + o Many bug fixes. o JIT_FPRET is used as JIT_RET to move return values. diff --git a/lightning/core-common.h b/lightning/core-common.h index d9edaabd8..1a90c576b 100644 --- a/lightning/core-common.h +++ b/lightning/core-common.h @@ -396,30 +396,30 @@ typedef union jit_code { #define jit_bmsi_ul(label, rs, is) jit_bmsi_l((label), (rs), (is)) #define jit_ltr_p(d, s1, s2) jit_ltr_ul((d), (s1), (s2)) -#define jit_lti_p(d, rs, is) jit_lti_ul((d), (rs), (is)) +#define jit_lti_p(d, rs, is) jit_lti_ul((d), (rs), (long)(is)) #define jit_ler_p(d, s1, s2) jit_ler_ul((d), (s1), (s2)) -#define jit_lei_p(d, rs, is) jit_lei_ul((d), (rs), (is)) +#define jit_lei_p(d, rs, is) jit_lei_ul((d), (rs), (long)(is)) #define jit_gtr_p(d, s1, s2) jit_gtr_ul((d), (s1), (s2)) -#define jit_gti_p(d, rs, is) jit_gti_ul((d), (rs), (is)) +#define jit_gti_p(d, rs, is) jit_gti_ul((d), (rs), (long)(is)) #define jit_ger_p(d, s1, s2) jit_ger_ul((d), (s1), (s2)) -#define jit_gei_p(d, rs, is) jit_gei_ul((d), (rs), (is)) +#define jit_gei_p(d, rs, is) jit_gei_ul((d), (rs), (long)(is)) #define jit_eqr_p(d, s1, s2) jit_eqr_ul((d), (s1), (s2)) -#define jit_eqi_p(d, rs, is) jit_eqi_ul((d), (rs), (is)) +#define jit_eqi_p(d, rs, is) jit_eqi_ul((d), (rs), (long)(is)) #define jit_ner_p(d, s1, s2) jit_ner_ul((d), (s1), (s2)) -#define jit_nei_p(d, rs, is) jit_nei_ul((d), (rs), (is)) +#define jit_nei_p(d, rs, is) jit_nei_ul((d), (rs), (long)(is)) #define jit_bltr_p(label, s1, s2) jit_bltr_ul((label), (s1), (s2)) -#define jit_blti_p(label, rs, is) jit_blti_ul((label), (rs), (is)) +#define jit_blti_p(label, rs, is) jit_blti_ul((label), (rs), (long)(is)) #define jit_bler_p(label, s1, s2) jit_bler_ul((label), (s1), (s2)) -#define jit_blei_p(label, rs, is) jit_blei_ul((label), (rs), (is)) +#define jit_blei_p(label, rs, is) jit_blei_ul((label), (rs), (long)(is)) #define jit_bgtr_p(label, s1, s2) jit_bgtr_ul((label), (s1), (s2)) -#define jit_bgti_p(label, rs, is) jit_bgti_ul((label), (rs), (is)) +#define jit_bgti_p(label, rs, is) jit_bgti_ul((label), (rs), (long)(is)) #define jit_bger_p(label, s1, s2) jit_bger_ul((label), (s1), (s2)) -#define jit_bgei_p(label, rs, is) jit_bgei_ul((label), (rs), (is)) +#define jit_bgei_p(label, rs, is) jit_bgei_ul((label), (rs), (long)(is)) #define jit_beqr_p(label, s1, s2) jit_beqr_ul((label), (s1), (s2)) -#define jit_beqi_p(label, rs, is) jit_beqi_ul((label), (rs), (is)) +#define jit_beqi_p(label, rs, is) jit_beqi_ul((label), (rs), (long)(is)) #define jit_bner_p(label, s1, s2) jit_bner_ul((label), (s1), (s2)) -#define jit_bnei_p(label, rs, is) jit_bnei_ul((label), (rs), (is)) +#define jit_bnei_p(label, rs, is) jit_bnei_ul((label), (rs), (long)(is)) #define jit_retval_ui(rd) jit_retval_i((rd)) #define jit_retval_uc(rd) jit_retval_i((rd)) diff --git a/lightning/i386/asm-32.h b/lightning/i386/asm-32.h index 1945a4974..d336cb2c7 100644 --- a/lightning/i386/asm-32.h +++ b/lightning/i386/asm-32.h @@ -45,6 +45,7 @@ #include "asm-i386.h" +#define _r1(R) ( ((R) & ~3) == _AL || ((R) & ~3) == _AH ? _rN(R) : JITFAIL( "8-bit register required")) #define _rA(R) _r4(R) /* Use RIP-addressing in 64-bit mode, if possible */ @@ -58,14 +59,14 @@ #define _m64only(X) JITFAIL("invalid instruction in 32-bit mode") #define _m64(X) ((void)0) -#define CALLsr(R) CALLLsr(R) -#define JMPsr(R) JMPLsr(R) - #define _AH 0x24 #define _CH 0x25 #define _DH 0x26 #define _BH 0x27 +#define CALLsr(R) CALLLsr(R) +#define JMPsr(R) JMPLsr(R) + #define DECWr(RD) (_d16(), _Or (0x48,_r2(RD) )) #define DECLr(RD) _Or (0x48,_r4(RD) ) #define INCWr(RD) (_d16(), _Or (0x40,_r2(RD) )) diff --git a/lightning/i386/asm-64.h b/lightning/i386/asm-64.h index 2280c8391..9f5431cf1 100644 --- a/lightning/i386/asm-64.h +++ b/lightning/i386/asm-64.h @@ -127,6 +127,8 @@ #define _R15 0x4F #define _RIP -2 +#define _r1(R) ( ((unsigned) _rC((R) - 16)) < (0x30 - 16) ? _rN(R) : JITFAIL( "8-bit register required")) + #if 0 #define _r8(R) ( (_rC(R) == 0x50) ? _rN(R) : JITFAIL("64-bit register required")) #else @@ -335,6 +337,40 @@ #define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) )) + + +#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __SSEQ1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA)) +#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS)) +#define _SSEQ1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQ1rm(OP, RS, RSA, MD, MB, MI, MS)) + +#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) +#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) + +#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) +#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8) +#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM) +#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM) +#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8) +#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS) + + + #define CALLsr(R) CALLQsr(R) #define JMPsr(R) JMPQsr(R) diff --git a/lightning/i386/asm-i386.h b/lightning/i386/asm-i386.h index ffe870efb..94d944c17 100644 --- a/lightning/i386/asm-i386.h +++ b/lightning/i386/asm-i386.h @@ -129,7 +129,6 @@ typedef _uc jit_insn; #define _rM(R) _rN(R) #define _rX(R) _rN(R) #else -#define _r1(R) ( ((unsigned) _rC((R) - 16)) < (0x30 - 16) ? _rN(R) : JITFAIL( "8-bit register required")) #define _r2(R) ( (_rC(R) == 0x30) ? _rN(R) : JITFAIL("16-bit register required")) #define _r4(R) ( (_rC(R) == 0x40) ? _rN(R) : JITFAIL("32-bit register required")) #define _rM(R) ( (_rC(R) == 0x60) ? _rN(R) : JITFAIL("MMX register required")) @@ -314,7 +313,7 @@ enum { #define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) )) #define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS )) #define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS )) -#define _ALUWir(OP, IM, RD) (!_s8P(IM) && (RD) == _AX ? \ +#define _ALUWir(OP, IM, RD) ((RD) == _AX ? \ (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \ (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) ) #define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM))) @@ -1084,7 +1083,7 @@ enum { #define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) )) #define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS )) -#define TESTWir(IM, RD) (!_s8P(IM) && (RD) == _AX ? \ +#define TESTWir(IM, RD) ((RD) == _AX ? \ (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \ (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) ) #define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM))) @@ -1302,6 +1301,280 @@ enum { JITFAIL(".align argument too large"))) +/* --- Media 128-bit instructions ------------------------------------------ */ + +enum { + X86_SSE_MOV = 0x10, + X86_SSE_MOVLP = 0x12, + X86_SSE_MOVHP = 0x16, + X86_SSE_MOVA = 0x28, + X86_SSE_CVTIS = 0x2a, + X86_SSE_CVTSI = 0x2d, + X86_SSE_UCOMI = 0x2e, + X86_SSE_COMI = 0x2f, + X86_SSE_SQRT = 0x51, + X86_SSE_RSQRT = 0x52, + X86_SSE_RCP = 0x53, + X86_SSE_AND = 0x54, + X86_SSE_ANDN = 0x55, + X86_SSE_OR = 0x56, + X86_SSE_XOR = 0x57, + X86_SSE_ADD = 0x58, + X86_SSE_MUL = 0x59, + X86_SSE_CVTSD = 0x5a, + X86_SSE_CVTDT = 0x5b, + X86_SSE_SUB = 0x5c, + X86_SSE_MIN = 0x5d, + X86_SSE_DIV = 0x5e, + X86_SSE_MAX = 0x5f, + X86_SSE_MOV2 = 0xd6 +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __SSEL1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSELrr(OP, RS, RSA, RD, RDA)) +#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS)) +#define _SSEL1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEL1rm(OP, RS, RSA, MD, MB, MI, MS)) + +#define _SSEPSrr(OP,RS,RD) __SSELrr ( OP, RS,_rX, RD,_rX) +#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr ( OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm ( OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __SSEL1rm( OP, RS,_rX, MD, MB, MI, MS) + +#define _SSEPDrr(OP,RS,RD) _SSELrr (0x66, OP, RS,_rX, RD,_rX) +#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0x66, OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0x66, OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0x66, OP, RS,_rX, MD, MB, MI, MS) + +#define _SSESSrr(OP,RS,RD) _SSELrr (0xf3, OP, RS,_rX, RD,_rX) +#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf3, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf3, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf3, OP, RS,_rX, MD, MB, MI, MS) + +#define _SSESDrr(OP,RS,RD) _SSELrr (0xf2, OP, RS,_rX, RD,_rX) +#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf2, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf2, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf2, OP, RS,_rX, MD, MB, MI, MS) + +#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD) +#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD) +#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD) +#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD) +#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD) +#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) +#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD) +#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) + +#define ANDNSSrr ANDNPSrr +#define ANDNSSmr ANDNPSrr +#define ANDNSDrr ANDNPDrr +#define ANDNSDmr ANDNPDrr + +#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD) +#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD) +#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD) +#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD) + +#define ANDSSrr ANDPSrr +#define ANDSSmr ANDPSrr +#define ANDSDrr ANDPDrr +#define ANDSDmr ANDPDrr + +#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD) +#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD) +#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD) +#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD) +#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD) +#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD) +#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD) +#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD) +#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD) +#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD) +#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD) +#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD) +#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD) +#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD) +#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD) +#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD) +#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD) +#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD) +#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD) +#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD) + +#define ORSSrr ORPSrr +#define ORSSmr ORPSrr +#define ORSDrr ORPDrr +#define ORSDmr ORPDrr + +#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD) +#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) +#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD) +#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) + +#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) +#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) + +#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD) +#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD) +#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD) +#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD) +#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD) +#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD) +#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD) +#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD) +#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD) +#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD) +#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD) +#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD) + +#define XORSSrr XORPSrr +#define XORSSmr XORPSrr +#define XORSDrr XORPDrr +#define XORSDmr XORPDrr + +/* No prefixes here. */ +#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD) +#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD) +#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) + +/* No prefixes here. */ +#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) + +#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD) +#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD) +#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS) + +#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD) +#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD) +#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS) + +#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD) +#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD) +#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS) + +#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD) +#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD) +#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS) + +#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) +#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) + +#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) +#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) + +#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) +#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4) +#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) + +#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM) +#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM) + +#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4) +#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS) + +#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, X86_SSE_MOV2, RS,_rX, RD,_rM) +#define MOVQ2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_MOV2, RS,_rM, RD,_rX) +#define MOVHLPSrr(RS, RD) __SSELrr( X86_SSE_MOVLP, RS,_rX, RD,_rX) +#define MOVLHPSrr(RS, RD) __SSELrr( X86_SSE_MOVHP, RS,_rX, RD,_rX) + +#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX) +#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX) +#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX) +#define MOVHPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS) +#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX) +#define MOVHPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS) + +#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX) +#define MOVLPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS) +#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX) +#define MOVLPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS) + /*** References: */ /* */ /* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture", */ diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h index e0950919a..922cd260d 100644 --- a/lightning/i386/core-64.h +++ b/lightning/i386/core-64.h @@ -43,6 +43,8 @@ struct jit_local_state { int long_jumps; + int nextarg_getfp; + int nextarg_putfp; int nextarg_geti; int argssize; int alloca_offset; @@ -63,13 +65,19 @@ struct jit_local_state { jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15) /* 3-parameter operation */ -#define jit_qopr_(d, s1, s2, op1d, op2d) \ - ( (s2 == d) ? op1d : \ - ( ((s1 == d) ? (void)0 : (void)MOVQrr(s1, d)), op2d ) \ +#define jit_qopr_(d, s1, s2, op1d, op2d) \ + ( ((s2) == (d)) ? op1d : \ + ( (((s1) == (d)) ? (void)0 : (void)MOVQrr((s1), (d))), op2d ) \ ) -/* 3-parameter operation, with immediate */ -#define jit_qop_(d, s1, op2d) \ +/* 3-parameter operation, with immediate. TODO: fix the case where mmediate + does not fit! */ +#define jit_qop_small(d, s1, op2d) \ + (((s1) == (d)) ? op2d : (MOVQrr((s1), (d)), op2d)) +#define jit_qop_(d, s1, is, op2d, op2i) \ + (_s32P((long)(is)) \ + ? jit_qop_small ((d), (s1), (op2d)) \ + : (MOVQrr ((is), JIT_REXTMP), jit_qop_small ((d), (s1), (op2i)))) #define jit_bra_qr(s1, s2, op) (CMPQrr(s2, s1), op, _jit.x.pc) #define _jit_bra_l(rs, is, op) (CMPQir(is, rs), op, _jit.x.pc) @@ -88,21 +96,19 @@ struct jit_local_state { #define jit_addi_l(d, rs, is) jit_opi_((d), (rs), ADDQir((is), (d)), LEAQmr((is), (rs), 0, 0, (d)) ) #define jit_addr_l(d, s1, s2) jit_opo_((d), (s1), (s2), ADDQrr((s2), (d)), ADDQrr((s1), (d)), LEAQmr(0, (s1), (s2), 1, (d)) ) -#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), ANDQir((is), (d)) ) +#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ANDQir((is), (d)), ANDQrr(JIT_REXTMP, (d))) #define jit_andr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ANDQrr((s1), (d)), ANDQrr((s2), (d)) ) #define jit_orr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ORQrr((s1), (d)), ORQrr((s2), (d)) ) #define jit_subr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), (SUBQrr((s1), (d)), NEGQr(d)), SUBQrr((s2), (d)) ) #define jit_xorr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), XORQrr((s1), (d)), XORQrr((s2), (d)) ) /* These can sometimes use byte or word versions! */ -#define jit_ori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(OR, (is), (d)) ) -#define jit_xori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(XOR, (is), (d)) ) -#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(OR, (is), (d)) ) -#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(XOR, (is), (d)) ) +#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(OR, (is), (d)), ORQrr(JIT_REXTMP, (d)) ) +#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(XOR, (is), (d)), ORQrr(JIT_REXTMP, (d)) ) -#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is), (d)) : jit_qop_ ((d), (rs), SHLQir((is), (d)) )) -#define jit_rshi_l(d, rs, is) jit_qop_ ((d), (rs), SARQir((is), (d)) ) -#define jit_rshi_ul(d, rs, is) jit_qop_ ((d), (rs), SHRQir((is), (d)) ) +#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is), (d)) : jit_qop_small ((d), (rs), SHLQir((is), (d)) )) +#define jit_rshi_l(d, rs, is) jit_qop_small ((d), (rs), SARQir((is), (d)) ) +#define jit_rshi_ul(d, rs, is) jit_qop_small ((d), (rs), SHRQir((is), (d)) ) #define jit_lshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHLQrr(_CL, (d)) )) #define jit_rshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SARQrr(_CL, (d)) )) #define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) )) @@ -112,7 +118,7 @@ struct jit_local_state { #define jit_popr_i(rs) POPQr(rs) #define jit_base_prolog() (PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_EBP), MOVQrr(_ESP, _EBP)) -#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, jit_base_prolog()) +#define jit_prolog(n) (_jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, jit_base_prolog()) /* Stack isn't used for arguments: */ #define jit_prepare_i(ni) (_jitl.argssize = 0) @@ -181,17 +187,19 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX }; #define jit_ret() ((_jitl.alloca_offset < 0 ? LEAVE_() : POPQr(_EBP)), POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_()) #define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d)) +#define _jit_ldxi_l(d, rs, is) MOVQmr((is), (rs), 0, 0, (d)) #define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d)) #define jit_ldxr_l(d, s1, s2) MOVQmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_l(d, rs, is) MOVQmr((is), (rs), 0, 0, (d)) #define _jit_sti_l(id, rs) MOVQrm((rs), (id), 0, 0, 0) +#define _jit_stxi_l(id, rd, rs) MOVQrm((rs), (id), (rd), 0, 0) #define jit_str_l(rd, rs) MOVQrm((rs), 0, (rd), 0, 0) #define jit_stxr_l(d1, d2, rs) MOVQrm((rs), 0, (d1), (d2), 1) -#define jit_stxi_l(id, rd, rs) MOVQrm((rs), (id), (rd), 0, 0) -#define jit_ldi_l(d, is) (_u32P((long)(is)) ? _jit_ldi_l((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_l(JIT_REXTMP))) -#define jit_sti_l(id, rs) (_u32P((long)(id)) ? _jit_sti_l(id, rs) : (jit_movi_l(JIT_REXTMP, id), jit_str_l (JIT_REXTMP, (rs)))) +#define jit_ldi_l(d, is) (_u32P((long)(is)) ? _jit_ldi_l((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_l((d), JIT_REXTMP))) +#define jit_sti_l(id, rs) (_u32P((long)(id)) ? _jit_sti_l((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_l (JIT_REXTMP, (rs)))) +#define jit_ldxi_l(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_l((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_l((d), (rs), JIT_REXTMP))) +#define jit_stxi_l(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_l((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_l (JIT_REXTMP, (rd), (rs)))) /* Memory */ diff --git a/lightning/i386/core-i386.h b/lightning/i386/core-i386.h index 2a9a8c2ab..24d12b524 100644 --- a/lightning/i386/core-i386.h +++ b/lightning/i386/core-i386.h @@ -244,13 +244,8 @@ #define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHRLrr(_CL, (d)) )) /* Stack */ -#define jit_prepare_f(nf) (_jitl.argssize += (nf)) -#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) #define jit_retval_i(rd) ((void)jit_movr_i ((rd), _EAX)) -#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float)) -#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double)) - /* Unary */ #define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) ) diff --git a/lightning/i386/fp-32.h b/lightning/i386/fp-32.h index 31a1d3d9a..1ee56db4c 100644 --- a/lightning/i386/fp-32.h +++ b/lightning/i386/fp-32.h @@ -346,4 +346,9 @@ union jit_double_imm { _OO(0xd9f1)) /* fyl2x */ #endif +#define jit_prepare_f(nf) (_jitl.argssize += (nf)) +#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) +#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float)) +#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double)) + #endif /* __lightning_asm_h */ diff --git a/lightning/i386/fp-64.h b/lightning/i386/fp-64.h index 19e73dcda..74cdfecce 100644 --- a/lightning/i386/fp-64.h +++ b/lightning/i386/fp-64.h @@ -33,6 +33,264 @@ #ifndef __lightning_fp_h #define __lightning_fp_h -#warning SSE math not yet supported +#include + +#define JIT_FPR_NUM 9 +#define JIT_FPRET _XMM0 +#define JIT_FPR(i) (_XMM7 + (i)) +#define JIT_FPTMP _XMM6 + +/* Either use a temporary register that is finally AND/OR/XORed with RS = RD, + or use RD as the temporary register and to the AND/OR/XOR with RS. */ +#define jit_unop_tmp(rd, rs, op) \ + ( (rs) == (rd) \ + ? op((rd), JIT_FPTMP, JIT_FPTMP)) \ + : op((rd), (rd), (rs))) + +#define jit_unop_f(rd, rs, op) \ + ((rs) == (rd) ? op((rd)) : (MOVSSrr ((rs), (rd)), op((rd)))) + +#define jit_unop_d(rd, rs, op) \ + ((rs) == (rd) ? op((rd)) : (MOVSDrr ((rs), (rd)), op((rd)))) + +#define jit_3opc_f(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) ? op((s1), (rd)) \ + : (MOVSSrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_3opc_d(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) ? op((s1), (rd)) \ + : (MOVSDrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_3op_f(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) \ + ? (MOVSSrr ((rd), JIT_FPTMP), MOVSSrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \ + : (MOVSSrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_3op_d(rd, s1, s2, op) \ + ( (s1) == (rd) ? op((s2), (rd)) \ + : ((s2) == (rd) \ + ? (MOVSDrr ((rd), JIT_FPTMP), MOVSDrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \ + : (MOVSDrr ((s1), (rd)), op((s2), (rd))))) + +#define jit_addr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), ADDSSrr) +#define jit_subr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), SUBSSrr) +#define jit_mulr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), MULSSrr) +#define jit_divr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), DIVSSrr) + +#define jit_addr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), ADDSDrr) +#define jit_subr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), SUBSDrr) +#define jit_mulr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), MULSDrr) +#define jit_divr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), DIVSDrr) + +#define jit_movr_f(rd,rs) MOVSSrr((rs), (rd)) +#define jit_movr_d(rd,rs) MOVSDrr((rs), (rd)) + +/* either pcmpeqd %xmm7, %xmm7 / psrld $1, %xmm7 / andps %xmm7, %RD (if RS = RD) + or pcmpeqd %RD, %RD / psrld $1, %RD / andps %RS, %RD (if RS != RD) */ +#define _jit_abs_f(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSRLDir (1, (cnst)), ANDPSrr ((rs), (rd))) +#define _jit_neg_f(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSLLDir (31, (cnst)), XORPSrr ((rs), (rd))) +#define jit_abs_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_f) +#define jit_neg_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_f) + +#define _jit_abs_d(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSRLQir (1, (cnst)), ANDPDrr ((rs), (rd))) +#define _jit_neg_d(rd,cnst,rs) \ + (PCMPEQDrr((cnst), (cnst)), PSLLQir (63, (cnst)), XORPDrr ((rs), (rd))) +#define jit_abs_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_d) +#define jit_neg_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_d) + +#define jit_sqrt_d(rd,rs) SQRTSSrr((rs), (rd)) +#define jit_sqrt_f(rd,rs) SQRTSDrr((rs), (rd)) + +#define _jit_ldi_f(d, is) MOVSSmr((is), 0, 0, 0, (d)) +#define _jit_ldxi_f(d, rs, is) MOVSSmr((is), (rs), 0, 0, (d)) +#define jit_ldr_f(d, rs) MOVSSmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_f(d, s1, s2) MOVSSmr(0, (s1), (s2), 1, (d)) + +#define _jit_sti_f(id, rs) MOVSSrm((rs), (id), 0, 0, 0) +#define _jit_stxi_f(id, rd, rs) MOVSSrm((rs), (id), (rd), 0, 0) +#define jit_str_f(rd, rs) MOVSSrm((rs), 0, (rd), 0, 0) +#define jit_stxr_f(d1, d2, rs) MOVSSrm((rs), 0, (d1), (d2), 1) + +#define jit_ldi_f(d, is) (_u32P((long)(is)) ? _jit_ldi_f((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_f((d), JIT_REXTMP))) +#define jit_sti_f(id, rs) (_u32P((long)(id)) ? _jit_sti_f((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_f (JIT_REXTMP, (rs)))) +#define jit_ldxi_f(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_f((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_f((d), (rs), JIT_REXTMP))) +#define jit_stxi_f(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_f((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_f (JIT_REXTMP, (rd), (rs)))) + +#define _jit_ldi_d(d, is) MOVSDmr((is), 0, 0, 0, (d)) +#define _jit_ldxi_d(d, rs, is) MOVSDmr((is), (rs), 0, 0, (d)) +#define jit_ldr_d(d, rs) MOVSDmr(0, (rs), 0, 0, (d)) +#define jit_ldxr_d(d, s1, s2) MOVSDmr(0, (s1), (s2), 1, (d)) + +#define _jit_sti_d(id, rs) MOVSDrm((rs), (id), 0, 0, 0) +#define _jit_stxi_d(id, rd, rs) MOVSDrm((rs), (id), (rd), 0, 0) +#define jit_str_d(rd, rs) MOVSDrm((rs), 0, (rd), 0, 0) +#define jit_stxr_d(d1, d2, rs) MOVSDrm((rs), 0, (d1), (d2), 1) + +#define jit_ldi_d(d, is) (_u32P((long)(is)) ? _jit_ldi_d((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_d((d), JIT_REXTMP))) +#define jit_sti_d(id, rs) (_u32P((long)(id)) ? _jit_sti_d((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_d (JIT_REXTMP, (rs)))) +#define jit_ldxi_d(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_d((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_d((d), (rs), JIT_REXTMP))) +#define jit_stxi_d(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_d((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_d (JIT_REXTMP, (rd), (rs)))) + + +#define jit_movi_f(rd,immf) \ + ((immf) == 0.0 ? XORSSrr ((rd), (rd)) : \ + (_O (0x50), \ + MOVLim (0x12345678L, 0, _ESP, 0, 0), \ + *((float *) (_jit.x.uc_pc - 4)) = (float) immf, \ + jit_ldr_f((rd), _ESP), \ + ADDLir(4, _ESP))) + +union jit_double_imm { + double d; + long l; +}; + +#define jit_movi_d(rd,immd) \ + ((immd) == 0.0 ? XORSDrr ((rd), (rd)) : \ + (_O (0x50), \ + MOVQir (0x123456789abcdef0L, _EAX), \ + ((union jit_double_imm *) (_jit.x.uc_pc - 8))->d = (double) immd, \ + _O (0x50), jit_ldr_d((rd), _ESP), \ + _O (0x58), _O (0x58))) + +#define jit_extr_i_d(rd, rs) CVTSI2SDLrr((rs), (rd)) +#define jit_extr_i_f(rd, rs) CVTSI2SSLrr((rs), (rd)) +#define jit_extr_l_d(rd, rs) CVTSI2SDQrr((rs), (rd)) +#define jit_extr_l_f(rd, rs) CVTSI2SSQrr((rs), (rd)) +#define jit_roundr_d_i(rd, rs) CVTSD2SILrr((rs), (rd)) +#define jit_roundr_f_i(rd, rs) CVTSS2SILrr((rs), (rd)) +#define jit_roundr_d_l(rd, rs) CVTSD2SIQrr((rs), (rd)) +#define jit_roundr_f_l(rd, rs) CVTSS2SIQrr((rs), (rd)) + + +#define jit_ceilr_f_i(rd, rs) do { \ + jit_roundr_f_i ((rd), (rs)); \ + jit_extr_i_f (JIT_FPTMP, (rd)); \ + UCOMISSrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_ceilr_d_i(rd, rs) do { \ + jit_roundr_d_i ((rd), (rs)); \ + jit_extr_i_d (JIT_FPTMP, (rd)); \ + UCOMISDrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_truncr_f_i(rd, rs) do { \ + jit_roundr_f_i ((rd), (rs)); \ + jit_extr_i_f (JIT_FPTMP, (rd)); \ + TESTLrr ((rd), (rd)); \ + JSm (_jit.x.pc + 9); \ + UCOMISSrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + JMPSm (_jit.x.pc + 7); \ + UCOMISSrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_truncr_d_i(rd, rs) do { \ + jit_roundr_d_i ((rd), (rs)); \ + jit_extr_i_d (JIT_FPTMP, (rd)); \ + TESTLrr ((rd), (rd)); \ + JSm (_jit.x.pc + 9); \ + UCOMISDrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + JMPSm (_jit.x.pc + 7); \ + UCOMISDrr ((rs), JIT_FPTMP); \ + ADCLir (0, (rd)); \ + } while (0) + +#define jit_floorr_f_i(rd, rs) do { \ + jit_roundr_f_i ((rd), (rs)); \ + jit_extr_i_f (JIT_FPTMP, (rd)); \ + UCOMISSrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + } while (0) + +#define jit_floorr_d_i(rd, rs) do { \ + jit_roundr_d_i ((rd), (rs)); \ + jit_extr_i_d (JIT_FPTMP, (rd)); \ + UCOMISDrr (JIT_FPTMP, (rs)); \ + SBBLir (0, (rd)); \ + } while (0) + +#define jit_bltr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAm ((d))) +#define jit_bler_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAEm ((d))) +#define jit_beqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a06), JEm ((d))) +#define jit_bner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */ +#define jit_bger_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAEm ((d))) +#define jit_bgtr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAm ((d))) +#define jit_bunltr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAEm ((d))) +#define jit_bunler_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAm ((d))) +#define jit_buneqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JEm ((d))) +#define jit_bltgtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNEm ((d))) +#define jit_bunger_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAm ((d))) +#define jit_bungtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAEm ((d))) +#define jit_bordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNPm ((d))) +#define jit_bunordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JPm ((d))) + +#define jit_bltr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAm ((d))) +#define jit_bler_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAEm ((d))) +#define jit_beqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a06), JEm ((d))) +#define jit_bner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */ +#define jit_bger_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAEm ((d))) +#define jit_bgtr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAm ((d))) +#define jit_bunltr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAEm ((d))) +#define jit_bunler_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAm ((d))) +#define jit_buneqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JEm ((d))) +#define jit_bltgtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNEm ((d))) +#define jit_bunger_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAm ((d))) +#define jit_bungtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAEm ((d))) +#define jit_bordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNPm ((d))) +#define jit_bunordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JPm ((d))) + +#define jit_ltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAr (jit_reg8((d)))) +#define jit_ler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAEr (jit_reg8((d)))) +#define jit_eqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d)))) +#define jit_ner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d)))) +#define jit_ger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAEr (jit_reg8((d)))) +#define jit_gtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAr (jit_reg8((d)))) +#define jit_unltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAEr (jit_reg8((d)))) +#define jit_unler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAr (jit_reg8((d)))) +#define jit_uneqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETEr (jit_reg8((d)))) +#define jit_ltgtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNEr (jit_reg8((d)))) +#define jit_unger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAr (jit_reg8((d)))) +#define jit_ungtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAEr (jit_reg8((d)))) +#define jit_ordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNPr (jit_reg8((d)))) +#define jit_unordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETPr (jit_reg8((d)))) + +#define jit_ltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAr (jit_reg8((d)))) +#define jit_ler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAEr (jit_reg8((d)))) +#define jit_eqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d)))) +#define jit_ner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d)))) +#define jit_ger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAEr (jit_reg8((d)))) +#define jit_gtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAr (jit_reg8((d)))) +#define jit_unltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAEr (jit_reg8((d)))) +#define jit_unler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAr (jit_reg8((d)))) +#define jit_uneqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETEr (jit_reg8((d)))) +#define jit_ltgtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNEr (jit_reg8((d)))) +#define jit_unger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAr (jit_reg8((d)))) +#define jit_ungtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAEr (jit_reg8((d)))) +#define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d)))) +#define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d)))) + +#define jit_prepare_f(num) (_jitl.nextarg_putfp = _XMM0 + (num)) +#define jit_prepare_d(num) (_jitl.nextarg_putfp = _XMM0 + (num)) + +#define jit_arg_f() (_XMM0 + _jitl.nextarg_getfp++) +#define jit_arg_d() (_XMM0 + _jitl.nextarg_getfp++) + +#define jit_getarg_f(rd, ofs) (jit_movr_f ((rd), (ofs))) +#define jit_getarg_d(rd, ofs) (jit_movr_d ((rd), (ofs))) + +#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp, jit_movr_f (_jitl.nextarg_putfp, (rs))) +#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp, jit_movr_d (_jitl.nextarg_putfp, (rs))) #endif /* __lightning_fp_h */ diff --git a/lightning/sparc/fp.h b/lightning/sparc/fp.h index 5a34e7d3e..a11f2eb51 100644 --- a/lightning/sparc/fp.h +++ b/lightning/sparc/fp.h @@ -60,8 +60,8 @@ #define jit_abs_f(rd,rs) FABSDrr((rs), (rd)) #define jit_negr_f(rd,rs) FNEGDrr((rs), (rd)) #define jit_sqrt_f(rd,rs) FSQRTDrr((rs), (rd)) -#define jit_extr_f_d(rs, rd) FSTODrr((rs), (rd)) -#define jit_extr_d_f(rs, rd) FDTOSrr((rs), (rd)) +#define jit_extr_f_d(rd, rs) FSTODrr((rs), (rd)) +#define jit_extr_d_f(rd, rs) FDTOSrr((rs), (rd)) #define jit_movi_f(rd,immf) \ do { \