mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 04:10:18 +02:00
add floating-point for x86-64
git-archimport-id: bonzini@gnu.org--2004b/lightning--stable--1.2--patch-49
This commit is contained in:
parent
3a04a40aae
commit
58c4dcea43
11 changed files with 639 additions and 44 deletions
14
ChangeLog
14
ChangeLog
|
@ -1,3 +1,17 @@
|
||||||
|
2006-11-23 Paolo Bonzini <bonzini@gnu.org>
|
||||||
|
|
||||||
|
* lightning/core-common.h: Add casts in "*i_p" variants.
|
||||||
|
* lightning/i386/asm-32.h: Add _r1.
|
||||||
|
* lightning/i386/asm-64.h: Likewise, and add SSE instructions.
|
||||||
|
* lightning/i386/asm-i386.h: Merge SSE instructions from Gwenole.
|
||||||
|
Use short form for 16-bit AX instructions. Remove _r1
|
||||||
|
* lightning/i386/core-64.h: Add FP ABI support in its infancy.
|
||||||
|
* lightning/i386/core-i386.h: Move jit_arg_f and jit_arg_d...
|
||||||
|
* lightning/i386/core-32.h: ... and jit_prepare_f and jit_prepare_d...
|
||||||
|
* lightning/i386/fp-32.h: ... here.
|
||||||
|
* lightning/i386/fp-64.h: Write the code.
|
||||||
|
* lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order.
|
||||||
|
|
||||||
2006-11-22 Paolo Bonzini <bonzini@gnu.org>
|
2006-11-22 Paolo Bonzini <bonzini@gnu.org>
|
||||||
|
|
||||||
* lightning/i386/asm-i386.h: Move x86-64 instructions...
|
* lightning/i386/asm-i386.h: Move x86-64 instructions...
|
||||||
|
|
5
NEWS
5
NEWS
|
@ -2,6 +2,11 @@ NEWS FROM VERSION 1.2 TO 1.3
|
||||||
|
|
||||||
o Initial support for x86-64 back-end (mostly untested).
|
o Initial support for x86-64 back-end (mostly untested).
|
||||||
|
|
||||||
|
o lightning is more strict on casts from integer to pointer.
|
||||||
|
Be sure to use the _p variants when your immediates are
|
||||||
|
of pointer type. This was done to ease 64-bit cleanliness
|
||||||
|
tests.
|
||||||
|
|
||||||
o Many bug fixes.
|
o Many bug fixes.
|
||||||
|
|
||||||
o JIT_FPRET is used as JIT_RET to move return values.
|
o JIT_FPRET is used as JIT_RET to move return values.
|
||||||
|
|
|
@ -396,30 +396,30 @@ typedef union jit_code {
|
||||||
#define jit_bmsi_ul(label, rs, is) jit_bmsi_l((label), (rs), (is))
|
#define jit_bmsi_ul(label, rs, is) jit_bmsi_l((label), (rs), (is))
|
||||||
|
|
||||||
#define jit_ltr_p(d, s1, s2) jit_ltr_ul((d), (s1), (s2))
|
#define jit_ltr_p(d, s1, s2) jit_ltr_ul((d), (s1), (s2))
|
||||||
#define jit_lti_p(d, rs, is) jit_lti_ul((d), (rs), (is))
|
#define jit_lti_p(d, rs, is) jit_lti_ul((d), (rs), (long)(is))
|
||||||
#define jit_ler_p(d, s1, s2) jit_ler_ul((d), (s1), (s2))
|
#define jit_ler_p(d, s1, s2) jit_ler_ul((d), (s1), (s2))
|
||||||
#define jit_lei_p(d, rs, is) jit_lei_ul((d), (rs), (is))
|
#define jit_lei_p(d, rs, is) jit_lei_ul((d), (rs), (long)(is))
|
||||||
#define jit_gtr_p(d, s1, s2) jit_gtr_ul((d), (s1), (s2))
|
#define jit_gtr_p(d, s1, s2) jit_gtr_ul((d), (s1), (s2))
|
||||||
#define jit_gti_p(d, rs, is) jit_gti_ul((d), (rs), (is))
|
#define jit_gti_p(d, rs, is) jit_gti_ul((d), (rs), (long)(is))
|
||||||
#define jit_ger_p(d, s1, s2) jit_ger_ul((d), (s1), (s2))
|
#define jit_ger_p(d, s1, s2) jit_ger_ul((d), (s1), (s2))
|
||||||
#define jit_gei_p(d, rs, is) jit_gei_ul((d), (rs), (is))
|
#define jit_gei_p(d, rs, is) jit_gei_ul((d), (rs), (long)(is))
|
||||||
#define jit_eqr_p(d, s1, s2) jit_eqr_ul((d), (s1), (s2))
|
#define jit_eqr_p(d, s1, s2) jit_eqr_ul((d), (s1), (s2))
|
||||||
#define jit_eqi_p(d, rs, is) jit_eqi_ul((d), (rs), (is))
|
#define jit_eqi_p(d, rs, is) jit_eqi_ul((d), (rs), (long)(is))
|
||||||
#define jit_ner_p(d, s1, s2) jit_ner_ul((d), (s1), (s2))
|
#define jit_ner_p(d, s1, s2) jit_ner_ul((d), (s1), (s2))
|
||||||
#define jit_nei_p(d, rs, is) jit_nei_ul((d), (rs), (is))
|
#define jit_nei_p(d, rs, is) jit_nei_ul((d), (rs), (long)(is))
|
||||||
|
|
||||||
#define jit_bltr_p(label, s1, s2) jit_bltr_ul((label), (s1), (s2))
|
#define jit_bltr_p(label, s1, s2) jit_bltr_ul((label), (s1), (s2))
|
||||||
#define jit_blti_p(label, rs, is) jit_blti_ul((label), (rs), (is))
|
#define jit_blti_p(label, rs, is) jit_blti_ul((label), (rs), (long)(is))
|
||||||
#define jit_bler_p(label, s1, s2) jit_bler_ul((label), (s1), (s2))
|
#define jit_bler_p(label, s1, s2) jit_bler_ul((label), (s1), (s2))
|
||||||
#define jit_blei_p(label, rs, is) jit_blei_ul((label), (rs), (is))
|
#define jit_blei_p(label, rs, is) jit_blei_ul((label), (rs), (long)(is))
|
||||||
#define jit_bgtr_p(label, s1, s2) jit_bgtr_ul((label), (s1), (s2))
|
#define jit_bgtr_p(label, s1, s2) jit_bgtr_ul((label), (s1), (s2))
|
||||||
#define jit_bgti_p(label, rs, is) jit_bgti_ul((label), (rs), (is))
|
#define jit_bgti_p(label, rs, is) jit_bgti_ul((label), (rs), (long)(is))
|
||||||
#define jit_bger_p(label, s1, s2) jit_bger_ul((label), (s1), (s2))
|
#define jit_bger_p(label, s1, s2) jit_bger_ul((label), (s1), (s2))
|
||||||
#define jit_bgei_p(label, rs, is) jit_bgei_ul((label), (rs), (is))
|
#define jit_bgei_p(label, rs, is) jit_bgei_ul((label), (rs), (long)(is))
|
||||||
#define jit_beqr_p(label, s1, s2) jit_beqr_ul((label), (s1), (s2))
|
#define jit_beqr_p(label, s1, s2) jit_beqr_ul((label), (s1), (s2))
|
||||||
#define jit_beqi_p(label, rs, is) jit_beqi_ul((label), (rs), (is))
|
#define jit_beqi_p(label, rs, is) jit_beqi_ul((label), (rs), (long)(is))
|
||||||
#define jit_bner_p(label, s1, s2) jit_bner_ul((label), (s1), (s2))
|
#define jit_bner_p(label, s1, s2) jit_bner_ul((label), (s1), (s2))
|
||||||
#define jit_bnei_p(label, rs, is) jit_bnei_ul((label), (rs), (is))
|
#define jit_bnei_p(label, rs, is) jit_bnei_ul((label), (rs), (long)(is))
|
||||||
|
|
||||||
#define jit_retval_ui(rd) jit_retval_i((rd))
|
#define jit_retval_ui(rd) jit_retval_i((rd))
|
||||||
#define jit_retval_uc(rd) jit_retval_i((rd))
|
#define jit_retval_uc(rd) jit_retval_i((rd))
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
|
|
||||||
#include "asm-i386.h"
|
#include "asm-i386.h"
|
||||||
|
|
||||||
|
#define _r1(R) ( ((R) & ~3) == _AL || ((R) & ~3) == _AH ? _rN(R) : JITFAIL( "8-bit register required"))
|
||||||
#define _rA(R) _r4(R)
|
#define _rA(R) _r4(R)
|
||||||
|
|
||||||
/* Use RIP-addressing in 64-bit mode, if possible */
|
/* Use RIP-addressing in 64-bit mode, if possible */
|
||||||
|
@ -58,14 +59,14 @@
|
||||||
#define _m64only(X) JITFAIL("invalid instruction in 32-bit mode")
|
#define _m64only(X) JITFAIL("invalid instruction in 32-bit mode")
|
||||||
#define _m64(X) ((void)0)
|
#define _m64(X) ((void)0)
|
||||||
|
|
||||||
#define CALLsr(R) CALLLsr(R)
|
|
||||||
#define JMPsr(R) JMPLsr(R)
|
|
||||||
|
|
||||||
#define _AH 0x24
|
#define _AH 0x24
|
||||||
#define _CH 0x25
|
#define _CH 0x25
|
||||||
#define _DH 0x26
|
#define _DH 0x26
|
||||||
#define _BH 0x27
|
#define _BH 0x27
|
||||||
|
|
||||||
|
#define CALLsr(R) CALLLsr(R)
|
||||||
|
#define JMPsr(R) JMPLsr(R)
|
||||||
|
|
||||||
#define DECWr(RD) (_d16(), _Or (0x48,_r2(RD) ))
|
#define DECWr(RD) (_d16(), _Or (0x48,_r2(RD) ))
|
||||||
#define DECLr(RD) _Or (0x48,_r4(RD) )
|
#define DECLr(RD) _Or (0x48,_r4(RD) )
|
||||||
#define INCWr(RD) (_d16(), _Or (0x40,_r2(RD) ))
|
#define INCWr(RD) (_d16(), _Or (0x40,_r2(RD) ))
|
||||||
|
|
|
@ -127,6 +127,8 @@
|
||||||
#define _R15 0x4F
|
#define _R15 0x4F
|
||||||
#define _RIP -2
|
#define _RIP -2
|
||||||
|
|
||||||
|
#define _r1(R) ( ((unsigned) _rC((R) - 16)) < (0x30 - 16) ? _rN(R) : JITFAIL( "8-bit register required"))
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
#define _r8(R) ( (_rC(R) == 0x50) ? _rN(R) : JITFAIL("64-bit register required"))
|
#define _r8(R) ( (_rC(R) == 0x50) ? _rN(R) : JITFAIL("64-bit register required"))
|
||||||
#else
|
#else
|
||||||
|
@ -335,6 +337,40 @@
|
||||||
|
|
||||||
#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) ))
|
#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) ))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
|
||||||
|
#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
|
||||||
|
#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
|
||||||
|
#define __SSEQ1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
|
||||||
|
|
||||||
|
#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
|
||||||
|
#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
|
||||||
|
#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
|
||||||
|
#define _SSEQ1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEQ1rm(OP, RS, RSA, MD, MB, MI, MS))
|
||||||
|
|
||||||
|
#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8)
|
||||||
|
#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8)
|
||||||
|
#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8)
|
||||||
|
#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8)
|
||||||
|
|
||||||
|
#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX)
|
||||||
|
#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX)
|
||||||
|
#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
|
||||||
|
#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8)
|
||||||
|
#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM)
|
||||||
|
#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM)
|
||||||
|
#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8)
|
||||||
|
#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define CALLsr(R) CALLQsr(R)
|
#define CALLsr(R) CALLQsr(R)
|
||||||
#define JMPsr(R) JMPQsr(R)
|
#define JMPsr(R) JMPQsr(R)
|
||||||
|
|
||||||
|
|
|
@ -129,7 +129,6 @@ typedef _uc jit_insn;
|
||||||
#define _rM(R) _rN(R)
|
#define _rM(R) _rN(R)
|
||||||
#define _rX(R) _rN(R)
|
#define _rX(R) _rN(R)
|
||||||
#else
|
#else
|
||||||
#define _r1(R) ( ((unsigned) _rC((R) - 16)) < (0x30 - 16) ? _rN(R) : JITFAIL( "8-bit register required"))
|
|
||||||
#define _r2(R) ( (_rC(R) == 0x30) ? _rN(R) : JITFAIL("16-bit register required"))
|
#define _r2(R) ( (_rC(R) == 0x30) ? _rN(R) : JITFAIL("16-bit register required"))
|
||||||
#define _r4(R) ( (_rC(R) == 0x40) ? _rN(R) : JITFAIL("32-bit register required"))
|
#define _r4(R) ( (_rC(R) == 0x40) ? _rN(R) : JITFAIL("32-bit register required"))
|
||||||
#define _rM(R) ( (_rC(R) == 0x60) ? _rN(R) : JITFAIL("MMX register required"))
|
#define _rM(R) ( (_rC(R) == 0x60) ? _rN(R) : JITFAIL("MMX register required"))
|
||||||
|
@ -314,7 +313,7 @@ enum {
|
||||||
#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
|
#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
|
||||||
#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS ))
|
#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS ))
|
||||||
#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS ))
|
#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS ))
|
||||||
#define _ALUWir(OP, IM, RD) (!_s8P(IM) && (RD) == _AX ? \
|
#define _ALUWir(OP, IM, RD) ((RD) == _AX ? \
|
||||||
(_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \
|
(_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \
|
||||||
(_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) )
|
(_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) )
|
||||||
#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM)))
|
#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM)))
|
||||||
|
@ -1084,7 +1083,7 @@ enum {
|
||||||
|
|
||||||
#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ))
|
#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ))
|
||||||
#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ))
|
#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ))
|
||||||
#define TESTWir(IM, RD) (!_s8P(IM) && (RD) == _AX ? \
|
#define TESTWir(IM, RD) ((RD) == _AX ? \
|
||||||
(_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \
|
(_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \
|
||||||
(_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) )
|
(_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) )
|
||||||
#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)))
|
#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)))
|
||||||
|
@ -1302,6 +1301,280 @@ enum {
|
||||||
JITFAIL(".align argument too large")))
|
JITFAIL(".align argument too large")))
|
||||||
|
|
||||||
|
|
||||||
|
/* --- Media 128-bit instructions ------------------------------------------ */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
X86_SSE_MOV = 0x10,
|
||||||
|
X86_SSE_MOVLP = 0x12,
|
||||||
|
X86_SSE_MOVHP = 0x16,
|
||||||
|
X86_SSE_MOVA = 0x28,
|
||||||
|
X86_SSE_CVTIS = 0x2a,
|
||||||
|
X86_SSE_CVTSI = 0x2d,
|
||||||
|
X86_SSE_UCOMI = 0x2e,
|
||||||
|
X86_SSE_COMI = 0x2f,
|
||||||
|
X86_SSE_SQRT = 0x51,
|
||||||
|
X86_SSE_RSQRT = 0x52,
|
||||||
|
X86_SSE_RCP = 0x53,
|
||||||
|
X86_SSE_AND = 0x54,
|
||||||
|
X86_SSE_ANDN = 0x55,
|
||||||
|
X86_SSE_OR = 0x56,
|
||||||
|
X86_SSE_XOR = 0x57,
|
||||||
|
X86_SSE_ADD = 0x58,
|
||||||
|
X86_SSE_MUL = 0x59,
|
||||||
|
X86_SSE_CVTSD = 0x5a,
|
||||||
|
X86_SSE_CVTDT = 0x5b,
|
||||||
|
X86_SSE_SUB = 0x5c,
|
||||||
|
X86_SSE_MIN = 0x5d,
|
||||||
|
X86_SSE_DIV = 0x5e,
|
||||||
|
X86_SSE_MAX = 0x5f,
|
||||||
|
X86_SSE_MOV2 = 0xd6
|
||||||
|
};
|
||||||
|
|
||||||
|
/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
|
||||||
|
|
||||||
|
#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
|
||||||
|
#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
|
||||||
|
#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
|
||||||
|
#define __SSEL1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
|
||||||
|
|
||||||
|
#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_jit_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
|
||||||
|
#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_jit_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
|
||||||
|
#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
|
||||||
|
#define _SSEL1rm(PX,OP,RS,RSA,MD,MB,MI,MS) (_jit_B(PX), __SSEL1rm(OP, RS, RSA, MD, MB, MI, MS))
|
||||||
|
|
||||||
|
#define _SSEPSrr(OP,RS,RD) __SSELrr ( OP, RS,_rX, RD,_rX)
|
||||||
|
#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr ( OP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm ( OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __SSEL1rm( OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define _SSEPDrr(OP,RS,RD) _SSELrr (0x66, OP, RS,_rX, RD,_rX)
|
||||||
|
#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0x66, OP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0x66, OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0x66, OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define _SSESSrr(OP,RS,RD) _SSELrr (0xf3, OP, RS,_rX, RD,_rX)
|
||||||
|
#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf3, OP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf3, OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf3, OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define _SSESDrr(OP,RS,RD) _SSELrr (0xf2, OP, RS,_rX, RD,_rX)
|
||||||
|
#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf2, OP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf2, OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf2, OP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
|
||||||
|
#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
|
||||||
|
#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
|
||||||
|
#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
|
||||||
|
#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
|
||||||
|
#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
|
||||||
|
#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
|
||||||
|
#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
|
||||||
|
#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
|
||||||
|
#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define ANDNSSrr ANDNPSrr
|
||||||
|
#define ANDNSSmr ANDNPSrr
|
||||||
|
#define ANDNSDrr ANDNPDrr
|
||||||
|
#define ANDNSDmr ANDNPDrr
|
||||||
|
|
||||||
|
#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
|
||||||
|
#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
|
||||||
|
#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
|
||||||
|
#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define ANDSSrr ANDPSrr
|
||||||
|
#define ANDSSmr ANDPSrr
|
||||||
|
#define ANDSDrr ANDPDrr
|
||||||
|
#define ANDSDmr ANDPDrr
|
||||||
|
|
||||||
|
#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
|
||||||
|
#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
|
||||||
|
#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
|
||||||
|
#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
|
||||||
|
#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
|
||||||
|
#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
|
||||||
|
#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
|
||||||
|
#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
|
||||||
|
#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
|
||||||
|
#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
|
||||||
|
#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
|
||||||
|
#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
|
||||||
|
#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
|
||||||
|
#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
|
||||||
|
#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
|
||||||
|
#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
|
||||||
|
#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
|
||||||
|
#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
|
||||||
|
#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
|
||||||
|
#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
|
||||||
|
#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
|
||||||
|
#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
|
||||||
|
#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
|
||||||
|
#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
|
||||||
|
#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
|
||||||
|
#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
|
||||||
|
#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
|
||||||
|
#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define ORSSrr ORPSrr
|
||||||
|
#define ORSSmr ORPSrr
|
||||||
|
#define ORSDrr ORPDrr
|
||||||
|
#define ORSDmr ORPDrr
|
||||||
|
|
||||||
|
#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
|
||||||
|
#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
|
||||||
|
#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
|
||||||
|
#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
|
||||||
|
#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
|
||||||
|
#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
|
||||||
|
#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
|
||||||
|
#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
|
||||||
|
#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
|
||||||
|
#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
|
||||||
|
#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
|
||||||
|
#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
|
||||||
|
#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
|
||||||
|
#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
|
||||||
|
#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
|
||||||
|
#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
|
||||||
|
#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
|
||||||
|
#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
|
||||||
|
#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
|
||||||
|
#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
|
||||||
|
#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
|
||||||
|
#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define XORSSrr XORPSrr
|
||||||
|
#define XORSSmr XORPSrr
|
||||||
|
#define XORSDrr XORPDrr
|
||||||
|
#define XORSDmr XORPDrr
|
||||||
|
|
||||||
|
/* No prefixes here. */
|
||||||
|
#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
|
||||||
|
#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
|
||||||
|
#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
|
||||||
|
#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
/* No prefixes here. */
|
||||||
|
#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
|
||||||
|
#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
|
||||||
|
#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
|
||||||
|
#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
|
||||||
|
|
||||||
|
#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD)
|
||||||
|
#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
|
||||||
|
#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD)
|
||||||
|
#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
|
||||||
|
#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD)
|
||||||
|
#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
|
||||||
|
#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD)
|
||||||
|
#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
|
||||||
|
#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX, RD,_rM)
|
||||||
|
#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM)
|
||||||
|
#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM)
|
||||||
|
#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM)
|
||||||
|
|
||||||
|
#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM, RD,_rX)
|
||||||
|
#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX)
|
||||||
|
#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX, RD,_rX)
|
||||||
|
#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX)
|
||||||
|
#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX)
|
||||||
|
#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX)
|
||||||
|
#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4)
|
||||||
|
#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4)
|
||||||
|
#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4)
|
||||||
|
#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4)
|
||||||
|
|
||||||
|
#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX)
|
||||||
|
#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX)
|
||||||
|
#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
|
||||||
|
#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
|
||||||
|
|
||||||
|
#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4)
|
||||||
|
#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM)
|
||||||
|
#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM)
|
||||||
|
|
||||||
|
#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4)
|
||||||
|
#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, X86_SSE_MOV2, RS,_rX, RD,_rM)
|
||||||
|
#define MOVQ2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_MOV2, RS,_rM, RD,_rX)
|
||||||
|
#define MOVHLPSrr(RS, RD) __SSELrr( X86_SSE_MOVLP, RS,_rX, RD,_rX)
|
||||||
|
#define MOVLHPSrr(RS, RD) __SSELrr( X86_SSE_MOVHP, RS,_rX, RD,_rX)
|
||||||
|
|
||||||
|
#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
|
||||||
|
#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
|
||||||
|
#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define MOVHPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVHP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define MOVHPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVHP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
|
#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define MOVLPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVLP, MD, MB, MI, MS, RD,_rX)
|
||||||
|
#define MOVLPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVLP, RS,_rX, MD, MB, MI, MS)
|
||||||
|
|
||||||
/*** References: */
|
/*** References: */
|
||||||
/* */
|
/* */
|
||||||
/* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture", */
|
/* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture", */
|
||||||
|
|
|
@ -43,6 +43,8 @@
|
||||||
|
|
||||||
struct jit_local_state {
|
struct jit_local_state {
|
||||||
int long_jumps;
|
int long_jumps;
|
||||||
|
int nextarg_getfp;
|
||||||
|
int nextarg_putfp;
|
||||||
int nextarg_geti;
|
int nextarg_geti;
|
||||||
int argssize;
|
int argssize;
|
||||||
int alloca_offset;
|
int alloca_offset;
|
||||||
|
@ -63,13 +65,19 @@ struct jit_local_state {
|
||||||
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
|
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
|
||||||
|
|
||||||
/* 3-parameter operation */
|
/* 3-parameter operation */
|
||||||
#define jit_qopr_(d, s1, s2, op1d, op2d) \
|
#define jit_qopr_(d, s1, s2, op1d, op2d) \
|
||||||
( (s2 == d) ? op1d : \
|
( ((s2) == (d)) ? op1d : \
|
||||||
( ((s1 == d) ? (void)0 : (void)MOVQrr(s1, d)), op2d ) \
|
( (((s1) == (d)) ? (void)0 : (void)MOVQrr((s1), (d))), op2d ) \
|
||||||
)
|
)
|
||||||
|
|
||||||
/* 3-parameter operation, with immediate */
|
/* 3-parameter operation, with immediate. TODO: fix the case where mmediate
|
||||||
#define jit_qop_(d, s1, op2d) \
|
does not fit! */
|
||||||
|
#define jit_qop_small(d, s1, op2d) \
|
||||||
|
(((s1) == (d)) ? op2d : (MOVQrr((s1), (d)), op2d))
|
||||||
|
#define jit_qop_(d, s1, is, op2d, op2i) \
|
||||||
|
(_s32P((long)(is)) \
|
||||||
|
? jit_qop_small ((d), (s1), (op2d)) \
|
||||||
|
: (MOVQrr ((is), JIT_REXTMP), jit_qop_small ((d), (s1), (op2i))))
|
||||||
|
|
||||||
#define jit_bra_qr(s1, s2, op) (CMPQrr(s2, s1), op, _jit.x.pc)
|
#define jit_bra_qr(s1, s2, op) (CMPQrr(s2, s1), op, _jit.x.pc)
|
||||||
#define _jit_bra_l(rs, is, op) (CMPQir(is, rs), op, _jit.x.pc)
|
#define _jit_bra_l(rs, is, op) (CMPQir(is, rs), op, _jit.x.pc)
|
||||||
|
@ -88,21 +96,19 @@ struct jit_local_state {
|
||||||
|
|
||||||
#define jit_addi_l(d, rs, is) jit_opi_((d), (rs), ADDQir((is), (d)), LEAQmr((is), (rs), 0, 0, (d)) )
|
#define jit_addi_l(d, rs, is) jit_opi_((d), (rs), ADDQir((is), (d)), LEAQmr((is), (rs), 0, 0, (d)) )
|
||||||
#define jit_addr_l(d, s1, s2) jit_opo_((d), (s1), (s2), ADDQrr((s2), (d)), ADDQrr((s1), (d)), LEAQmr(0, (s1), (s2), 1, (d)) )
|
#define jit_addr_l(d, s1, s2) jit_opo_((d), (s1), (s2), ADDQrr((s2), (d)), ADDQrr((s1), (d)), LEAQmr(0, (s1), (s2), 1, (d)) )
|
||||||
#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), ANDQir((is), (d)) )
|
#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ANDQir((is), (d)), ANDQrr(JIT_REXTMP, (d)))
|
||||||
#define jit_andr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ANDQrr((s1), (d)), ANDQrr((s2), (d)) )
|
#define jit_andr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ANDQrr((s1), (d)), ANDQrr((s2), (d)) )
|
||||||
#define jit_orr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ORQrr((s1), (d)), ORQrr((s2), (d)) )
|
#define jit_orr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ORQrr((s1), (d)), ORQrr((s2), (d)) )
|
||||||
#define jit_subr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), (SUBQrr((s1), (d)), NEGQr(d)), SUBQrr((s2), (d)) )
|
#define jit_subr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), (SUBQrr((s1), (d)), NEGQr(d)), SUBQrr((s2), (d)) )
|
||||||
#define jit_xorr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), XORQrr((s1), (d)), XORQrr((s2), (d)) )
|
#define jit_xorr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), XORQrr((s1), (d)), XORQrr((s2), (d)) )
|
||||||
|
|
||||||
/* These can sometimes use byte or word versions! */
|
/* These can sometimes use byte or word versions! */
|
||||||
#define jit_ori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(OR, (is), (d)) )
|
#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(OR, (is), (d)), ORQrr(JIT_REXTMP, (d)) )
|
||||||
#define jit_xori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(XOR, (is), (d)) )
|
#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(XOR, (is), (d)), ORQrr(JIT_REXTMP, (d)) )
|
||||||
#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(OR, (is), (d)) )
|
|
||||||
#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(XOR, (is), (d)) )
|
|
||||||
|
|
||||||
#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is), (d)) : jit_qop_ ((d), (rs), SHLQir((is), (d)) ))
|
#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is), (d)) : jit_qop_small ((d), (rs), SHLQir((is), (d)) ))
|
||||||
#define jit_rshi_l(d, rs, is) jit_qop_ ((d), (rs), SARQir((is), (d)) )
|
#define jit_rshi_l(d, rs, is) jit_qop_small ((d), (rs), SARQir((is), (d)) )
|
||||||
#define jit_rshi_ul(d, rs, is) jit_qop_ ((d), (rs), SHRQir((is), (d)) )
|
#define jit_rshi_ul(d, rs, is) jit_qop_small ((d), (rs), SHRQir((is), (d)) )
|
||||||
#define jit_lshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHLQrr(_CL, (d)) ))
|
#define jit_lshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHLQrr(_CL, (d)) ))
|
||||||
#define jit_rshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SARQrr(_CL, (d)) ))
|
#define jit_rshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SARQrr(_CL, (d)) ))
|
||||||
#define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) ))
|
#define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) ))
|
||||||
|
@ -112,7 +118,7 @@ struct jit_local_state {
|
||||||
#define jit_popr_i(rs) POPQr(rs)
|
#define jit_popr_i(rs) POPQr(rs)
|
||||||
|
|
||||||
#define jit_base_prolog() (PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_EBP), MOVQrr(_ESP, _EBP))
|
#define jit_base_prolog() (PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_EBP), MOVQrr(_ESP, _EBP))
|
||||||
#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, jit_base_prolog())
|
#define jit_prolog(n) (_jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, jit_base_prolog())
|
||||||
|
|
||||||
/* Stack isn't used for arguments: */
|
/* Stack isn't used for arguments: */
|
||||||
#define jit_prepare_i(ni) (_jitl.argssize = 0)
|
#define jit_prepare_i(ni) (_jitl.argssize = 0)
|
||||||
|
@ -181,17 +187,19 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
|
||||||
#define jit_ret() ((_jitl.alloca_offset < 0 ? LEAVE_() : POPQr(_EBP)), POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_())
|
#define jit_ret() ((_jitl.alloca_offset < 0 ? LEAVE_() : POPQr(_EBP)), POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_())
|
||||||
|
|
||||||
#define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d))
|
#define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d))
|
||||||
|
#define _jit_ldxi_l(d, rs, is) MOVQmr((is), (rs), 0, 0, (d))
|
||||||
#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))
|
#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))
|
||||||
#define jit_ldxr_l(d, s1, s2) MOVQmr(0, (s1), (s2), 1, (d))
|
#define jit_ldxr_l(d, s1, s2) MOVQmr(0, (s1), (s2), 1, (d))
|
||||||
#define jit_ldxi_l(d, rs, is) MOVQmr((is), (rs), 0, 0, (d))
|
|
||||||
|
|
||||||
#define _jit_sti_l(id, rs) MOVQrm((rs), (id), 0, 0, 0)
|
#define _jit_sti_l(id, rs) MOVQrm((rs), (id), 0, 0, 0)
|
||||||
|
#define _jit_stxi_l(id, rd, rs) MOVQrm((rs), (id), (rd), 0, 0)
|
||||||
#define jit_str_l(rd, rs) MOVQrm((rs), 0, (rd), 0, 0)
|
#define jit_str_l(rd, rs) MOVQrm((rs), 0, (rd), 0, 0)
|
||||||
#define jit_stxr_l(d1, d2, rs) MOVQrm((rs), 0, (d1), (d2), 1)
|
#define jit_stxr_l(d1, d2, rs) MOVQrm((rs), 0, (d1), (d2), 1)
|
||||||
#define jit_stxi_l(id, rd, rs) MOVQrm((rs), (id), (rd), 0, 0)
|
|
||||||
|
|
||||||
#define jit_ldi_l(d, is) (_u32P((long)(is)) ? _jit_ldi_l((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_l(JIT_REXTMP)))
|
#define jit_ldi_l(d, is) (_u32P((long)(is)) ? _jit_ldi_l((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_l((d), JIT_REXTMP)))
|
||||||
#define jit_sti_l(id, rs) (_u32P((long)(id)) ? _jit_sti_l(id, rs) : (jit_movi_l(JIT_REXTMP, id), jit_str_l (JIT_REXTMP, (rs))))
|
#define jit_sti_l(id, rs) (_u32P((long)(id)) ? _jit_sti_l((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_l (JIT_REXTMP, (rs))))
|
||||||
|
#define jit_ldxi_l(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_l((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_l((d), (rs), JIT_REXTMP)))
|
||||||
|
#define jit_stxi_l(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_l((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_l (JIT_REXTMP, (rd), (rs))))
|
||||||
|
|
||||||
/* Memory */
|
/* Memory */
|
||||||
|
|
||||||
|
|
|
@ -244,13 +244,8 @@
|
||||||
#define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHRLrr(_CL, (d)) ))
|
#define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_op_ ((d), (r1), SHRLrr(_CL, (d)) ))
|
||||||
|
|
||||||
/* Stack */
|
/* Stack */
|
||||||
#define jit_prepare_f(nf) (_jitl.argssize += (nf))
|
|
||||||
#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd))
|
|
||||||
#define jit_retval_i(rd) ((void)jit_movr_i ((rd), _EAX))
|
#define jit_retval_i(rd) ((void)jit_movr_i ((rd), _EAX))
|
||||||
|
|
||||||
#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float))
|
|
||||||
#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double))
|
|
||||||
|
|
||||||
/* Unary */
|
/* Unary */
|
||||||
#define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) )
|
#define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d), (d)), SUBLrr((rs), (d))) )
|
||||||
|
|
||||||
|
|
|
@ -346,4 +346,9 @@ union jit_double_imm {
|
||||||
_OO(0xd9f1)) /* fyl2x */
|
_OO(0xd9f1)) /* fyl2x */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define jit_prepare_f(nf) (_jitl.argssize += (nf))
|
||||||
|
#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd))
|
||||||
|
#define jit_arg_f() ((_jitl.framesize += sizeof(float)) - sizeof(float))
|
||||||
|
#define jit_arg_d() ((_jitl.framesize += sizeof(double)) - sizeof(double))
|
||||||
|
|
||||||
#endif /* __lightning_asm_h */
|
#endif /* __lightning_asm_h */
|
||||||
|
|
|
@ -33,6 +33,264 @@
|
||||||
#ifndef __lightning_fp_h
|
#ifndef __lightning_fp_h
|
||||||
#define __lightning_fp_h
|
#define __lightning_fp_h
|
||||||
|
|
||||||
#warning SSE math not yet supported
|
#include <float.h>
|
||||||
|
|
||||||
|
#define JIT_FPR_NUM 9
|
||||||
|
#define JIT_FPRET _XMM0
|
||||||
|
#define JIT_FPR(i) (_XMM7 + (i))
|
||||||
|
#define JIT_FPTMP _XMM6
|
||||||
|
|
||||||
|
/* Either use a temporary register that is finally AND/OR/XORed with RS = RD,
|
||||||
|
or use RD as the temporary register and to the AND/OR/XOR with RS. */
|
||||||
|
#define jit_unop_tmp(rd, rs, op) \
|
||||||
|
( (rs) == (rd) \
|
||||||
|
? op((rd), JIT_FPTMP, JIT_FPTMP)) \
|
||||||
|
: op((rd), (rd), (rs)))
|
||||||
|
|
||||||
|
#define jit_unop_f(rd, rs, op) \
|
||||||
|
((rs) == (rd) ? op((rd)) : (MOVSSrr ((rs), (rd)), op((rd))))
|
||||||
|
|
||||||
|
#define jit_unop_d(rd, rs, op) \
|
||||||
|
((rs) == (rd) ? op((rd)) : (MOVSDrr ((rs), (rd)), op((rd))))
|
||||||
|
|
||||||
|
#define jit_3opc_f(rd, s1, s2, op) \
|
||||||
|
( (s1) == (rd) ? op((s2), (rd)) \
|
||||||
|
: ((s2) == (rd) ? op((s1), (rd)) \
|
||||||
|
: (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
|
||||||
|
|
||||||
|
#define jit_3opc_d(rd, s1, s2, op) \
|
||||||
|
( (s1) == (rd) ? op((s2), (rd)) \
|
||||||
|
: ((s2) == (rd) ? op((s1), (rd)) \
|
||||||
|
: (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
|
||||||
|
|
||||||
|
#define jit_3op_f(rd, s1, s2, op) \
|
||||||
|
( (s1) == (rd) ? op((s2), (rd)) \
|
||||||
|
: ((s2) == (rd) \
|
||||||
|
? (MOVSSrr ((rd), JIT_FPTMP), MOVSSrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \
|
||||||
|
: (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
|
||||||
|
|
||||||
|
#define jit_3op_d(rd, s1, s2, op) \
|
||||||
|
( (s1) == (rd) ? op((s2), (rd)) \
|
||||||
|
: ((s2) == (rd) \
|
||||||
|
? (MOVSDrr ((rd), JIT_FPTMP), MOVSDrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \
|
||||||
|
: (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
|
||||||
|
|
||||||
|
#define jit_addr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), ADDSSrr)
|
||||||
|
#define jit_subr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), SUBSSrr)
|
||||||
|
#define jit_mulr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), MULSSrr)
|
||||||
|
#define jit_divr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), DIVSSrr)
|
||||||
|
|
||||||
|
#define jit_addr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), ADDSDrr)
|
||||||
|
#define jit_subr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), SUBSDrr)
|
||||||
|
#define jit_mulr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), MULSDrr)
|
||||||
|
#define jit_divr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), DIVSDrr)
|
||||||
|
|
||||||
|
#define jit_movr_f(rd,rs) MOVSSrr((rs), (rd))
|
||||||
|
#define jit_movr_d(rd,rs) MOVSDrr((rs), (rd))
|
||||||
|
|
||||||
|
/* either pcmpeqd %xmm7, %xmm7 / psrld $1, %xmm7 / andps %xmm7, %RD (if RS = RD)
|
||||||
|
or pcmpeqd %RD, %RD / psrld $1, %RD / andps %RS, %RD (if RS != RD) */
|
||||||
|
#define _jit_abs_f(rd,cnst,rs) \
|
||||||
|
(PCMPEQDrr((cnst), (cnst)), PSRLDir (1, (cnst)), ANDPSrr ((rs), (rd)))
|
||||||
|
#define _jit_neg_f(rd,cnst,rs) \
|
||||||
|
(PCMPEQDrr((cnst), (cnst)), PSLLDir (31, (cnst)), XORPSrr ((rs), (rd)))
|
||||||
|
#define jit_abs_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_f)
|
||||||
|
#define jit_neg_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_f)
|
||||||
|
|
||||||
|
#define _jit_abs_d(rd,cnst,rs) \
|
||||||
|
(PCMPEQDrr((cnst), (cnst)), PSRLQir (1, (cnst)), ANDPDrr ((rs), (rd)))
|
||||||
|
#define _jit_neg_d(rd,cnst,rs) \
|
||||||
|
(PCMPEQDrr((cnst), (cnst)), PSLLQir (63, (cnst)), XORPDrr ((rs), (rd)))
|
||||||
|
#define jit_abs_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_d)
|
||||||
|
#define jit_neg_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_d)
|
||||||
|
|
||||||
|
#define jit_sqrt_d(rd,rs) SQRTSSrr((rs), (rd))
|
||||||
|
#define jit_sqrt_f(rd,rs) SQRTSDrr((rs), (rd))
|
||||||
|
|
||||||
|
#define _jit_ldi_f(d, is) MOVSSmr((is), 0, 0, 0, (d))
|
||||||
|
#define _jit_ldxi_f(d, rs, is) MOVSSmr((is), (rs), 0, 0, (d))
|
||||||
|
#define jit_ldr_f(d, rs) MOVSSmr(0, (rs), 0, 0, (d))
|
||||||
|
#define jit_ldxr_f(d, s1, s2) MOVSSmr(0, (s1), (s2), 1, (d))
|
||||||
|
|
||||||
|
#define _jit_sti_f(id, rs) MOVSSrm((rs), (id), 0, 0, 0)
|
||||||
|
#define _jit_stxi_f(id, rd, rs) MOVSSrm((rs), (id), (rd), 0, 0)
|
||||||
|
#define jit_str_f(rd, rs) MOVSSrm((rs), 0, (rd), 0, 0)
|
||||||
|
#define jit_stxr_f(d1, d2, rs) MOVSSrm((rs), 0, (d1), (d2), 1)
|
||||||
|
|
||||||
|
#define jit_ldi_f(d, is) (_u32P((long)(is)) ? _jit_ldi_f((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_f((d), JIT_REXTMP)))
|
||||||
|
#define jit_sti_f(id, rs) (_u32P((long)(id)) ? _jit_sti_f((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_f (JIT_REXTMP, (rs))))
|
||||||
|
#define jit_ldxi_f(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_f((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_f((d), (rs), JIT_REXTMP)))
|
||||||
|
#define jit_stxi_f(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_f((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_f (JIT_REXTMP, (rd), (rs))))
|
||||||
|
|
||||||
|
#define _jit_ldi_d(d, is) MOVSDmr((is), 0, 0, 0, (d))
|
||||||
|
#define _jit_ldxi_d(d, rs, is) MOVSDmr((is), (rs), 0, 0, (d))
|
||||||
|
#define jit_ldr_d(d, rs) MOVSDmr(0, (rs), 0, 0, (d))
|
||||||
|
#define jit_ldxr_d(d, s1, s2) MOVSDmr(0, (s1), (s2), 1, (d))
|
||||||
|
|
||||||
|
#define _jit_sti_d(id, rs) MOVSDrm((rs), (id), 0, 0, 0)
|
||||||
|
#define _jit_stxi_d(id, rd, rs) MOVSDrm((rs), (id), (rd), 0, 0)
|
||||||
|
#define jit_str_d(rd, rs) MOVSDrm((rs), 0, (rd), 0, 0)
|
||||||
|
#define jit_stxr_d(d1, d2, rs) MOVSDrm((rs), 0, (d1), (d2), 1)
|
||||||
|
|
||||||
|
#define jit_ldi_d(d, is) (_u32P((long)(is)) ? _jit_ldi_d((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_d((d), JIT_REXTMP)))
|
||||||
|
#define jit_sti_d(id, rs) (_u32P((long)(id)) ? _jit_sti_d((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_d (JIT_REXTMP, (rs))))
|
||||||
|
#define jit_ldxi_d(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_d((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_d((d), (rs), JIT_REXTMP)))
|
||||||
|
#define jit_stxi_d(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_d((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_d (JIT_REXTMP, (rd), (rs))))
|
||||||
|
|
||||||
|
|
||||||
|
#define jit_movi_f(rd,immf) \
|
||||||
|
((immf) == 0.0 ? XORSSrr ((rd), (rd)) : \
|
||||||
|
(_O (0x50), \
|
||||||
|
MOVLim (0x12345678L, 0, _ESP, 0, 0), \
|
||||||
|
*((float *) (_jit.x.uc_pc - 4)) = (float) immf, \
|
||||||
|
jit_ldr_f((rd), _ESP), \
|
||||||
|
ADDLir(4, _ESP)))
|
||||||
|
|
||||||
|
union jit_double_imm {
|
||||||
|
double d;
|
||||||
|
long l;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define jit_movi_d(rd,immd) \
|
||||||
|
((immd) == 0.0 ? XORSDrr ((rd), (rd)) : \
|
||||||
|
(_O (0x50), \
|
||||||
|
MOVQir (0x123456789abcdef0L, _EAX), \
|
||||||
|
((union jit_double_imm *) (_jit.x.uc_pc - 8))->d = (double) immd, \
|
||||||
|
_O (0x50), jit_ldr_d((rd), _ESP), \
|
||||||
|
_O (0x58), _O (0x58)))
|
||||||
|
|
||||||
|
#define jit_extr_i_d(rd, rs) CVTSI2SDLrr((rs), (rd))
|
||||||
|
#define jit_extr_i_f(rd, rs) CVTSI2SSLrr((rs), (rd))
|
||||||
|
#define jit_extr_l_d(rd, rs) CVTSI2SDQrr((rs), (rd))
|
||||||
|
#define jit_extr_l_f(rd, rs) CVTSI2SSQrr((rs), (rd))
|
||||||
|
#define jit_roundr_d_i(rd, rs) CVTSD2SILrr((rs), (rd))
|
||||||
|
#define jit_roundr_f_i(rd, rs) CVTSS2SILrr((rs), (rd))
|
||||||
|
#define jit_roundr_d_l(rd, rs) CVTSD2SIQrr((rs), (rd))
|
||||||
|
#define jit_roundr_f_l(rd, rs) CVTSS2SIQrr((rs), (rd))
|
||||||
|
|
||||||
|
|
||||||
|
#define jit_ceilr_f_i(rd, rs) do { \
|
||||||
|
jit_roundr_f_i ((rd), (rs)); \
|
||||||
|
jit_extr_i_f (JIT_FPTMP, (rd)); \
|
||||||
|
UCOMISSrr ((rs), JIT_FPTMP); \
|
||||||
|
ADCLir (0, (rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define jit_ceilr_d_i(rd, rs) do { \
|
||||||
|
jit_roundr_d_i ((rd), (rs)); \
|
||||||
|
jit_extr_i_d (JIT_FPTMP, (rd)); \
|
||||||
|
UCOMISDrr ((rs), JIT_FPTMP); \
|
||||||
|
ADCLir (0, (rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define jit_truncr_f_i(rd, rs) do { \
|
||||||
|
jit_roundr_f_i ((rd), (rs)); \
|
||||||
|
jit_extr_i_f (JIT_FPTMP, (rd)); \
|
||||||
|
TESTLrr ((rd), (rd)); \
|
||||||
|
JSm (_jit.x.pc + 9); \
|
||||||
|
UCOMISSrr (JIT_FPTMP, (rs)); \
|
||||||
|
SBBLir (0, (rd)); \
|
||||||
|
JMPSm (_jit.x.pc + 7); \
|
||||||
|
UCOMISSrr ((rs), JIT_FPTMP); \
|
||||||
|
ADCLir (0, (rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define jit_truncr_d_i(rd, rs) do { \
|
||||||
|
jit_roundr_d_i ((rd), (rs)); \
|
||||||
|
jit_extr_i_d (JIT_FPTMP, (rd)); \
|
||||||
|
TESTLrr ((rd), (rd)); \
|
||||||
|
JSm (_jit.x.pc + 9); \
|
||||||
|
UCOMISDrr (JIT_FPTMP, (rs)); \
|
||||||
|
SBBLir (0, (rd)); \
|
||||||
|
JMPSm (_jit.x.pc + 7); \
|
||||||
|
UCOMISDrr ((rs), JIT_FPTMP); \
|
||||||
|
ADCLir (0, (rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define jit_floorr_f_i(rd, rs) do { \
|
||||||
|
jit_roundr_f_i ((rd), (rs)); \
|
||||||
|
jit_extr_i_f (JIT_FPTMP, (rd)); \
|
||||||
|
UCOMISSrr (JIT_FPTMP, (rs)); \
|
||||||
|
SBBLir (0, (rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define jit_floorr_d_i(rd, rs) do { \
|
||||||
|
jit_roundr_d_i ((rd), (rs)); \
|
||||||
|
jit_extr_i_d (JIT_FPTMP, (rd)); \
|
||||||
|
UCOMISDrr (JIT_FPTMP, (rs)); \
|
||||||
|
SBBLir (0, (rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define jit_bltr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAm ((d)))
|
||||||
|
#define jit_bler_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAEm ((d)))
|
||||||
|
#define jit_beqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a06), JEm ((d)))
|
||||||
|
#define jit_bner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */
|
||||||
|
#define jit_bger_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAEm ((d)))
|
||||||
|
#define jit_bgtr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAm ((d)))
|
||||||
|
#define jit_bunltr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAEm ((d)))
|
||||||
|
#define jit_bunler_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAm ((d)))
|
||||||
|
#define jit_buneqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JEm ((d)))
|
||||||
|
#define jit_bltgtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNEm ((d)))
|
||||||
|
#define jit_bunger_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAm ((d)))
|
||||||
|
#define jit_bungtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAEm ((d)))
|
||||||
|
#define jit_bordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNPm ((d)))
|
||||||
|
#define jit_bunordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JPm ((d)))
|
||||||
|
|
||||||
|
#define jit_bltr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAm ((d)))
|
||||||
|
#define jit_bler_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAEm ((d)))
|
||||||
|
#define jit_beqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a06), JEm ((d)))
|
||||||
|
#define jit_bner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */
|
||||||
|
#define jit_bger_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAEm ((d)))
|
||||||
|
#define jit_bgtr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAm ((d)))
|
||||||
|
#define jit_bunltr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAEm ((d)))
|
||||||
|
#define jit_bunler_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAm ((d)))
|
||||||
|
#define jit_buneqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JEm ((d)))
|
||||||
|
#define jit_bltgtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNEm ((d)))
|
||||||
|
#define jit_bunger_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAm ((d)))
|
||||||
|
#define jit_bungtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAEm ((d)))
|
||||||
|
#define jit_bordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNPm ((d)))
|
||||||
|
#define jit_bunordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JPm ((d)))
|
||||||
|
|
||||||
|
#define jit_ltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAr (jit_reg8((d))))
|
||||||
|
#define jit_ler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAEr (jit_reg8((d))))
|
||||||
|
#define jit_eqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
|
||||||
|
#define jit_ner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
|
||||||
|
#define jit_ger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAEr (jit_reg8((d))))
|
||||||
|
#define jit_gtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAr (jit_reg8((d))))
|
||||||
|
#define jit_unltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAEr (jit_reg8((d))))
|
||||||
|
#define jit_unler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAr (jit_reg8((d))))
|
||||||
|
#define jit_uneqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETEr (jit_reg8((d))))
|
||||||
|
#define jit_ltgtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNEr (jit_reg8((d))))
|
||||||
|
#define jit_unger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAr (jit_reg8((d))))
|
||||||
|
#define jit_ungtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAEr (jit_reg8((d))))
|
||||||
|
#define jit_ordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNPr (jit_reg8((d))))
|
||||||
|
#define jit_unordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETPr (jit_reg8((d))))
|
||||||
|
|
||||||
|
#define jit_ltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAr (jit_reg8((d))))
|
||||||
|
#define jit_ler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAEr (jit_reg8((d))))
|
||||||
|
#define jit_eqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
|
||||||
|
#define jit_ner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
|
||||||
|
#define jit_ger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAEr (jit_reg8((d))))
|
||||||
|
#define jit_gtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAr (jit_reg8((d))))
|
||||||
|
#define jit_unltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAEr (jit_reg8((d))))
|
||||||
|
#define jit_unler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAr (jit_reg8((d))))
|
||||||
|
#define jit_uneqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETEr (jit_reg8((d))))
|
||||||
|
#define jit_ltgtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNEr (jit_reg8((d))))
|
||||||
|
#define jit_unger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAr (jit_reg8((d))))
|
||||||
|
#define jit_ungtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAEr (jit_reg8((d))))
|
||||||
|
#define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d))))
|
||||||
|
#define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d))))
|
||||||
|
|
||||||
|
#define jit_prepare_f(num) (_jitl.nextarg_putfp = _XMM0 + (num))
|
||||||
|
#define jit_prepare_d(num) (_jitl.nextarg_putfp = _XMM0 + (num))
|
||||||
|
|
||||||
|
#define jit_arg_f() (_XMM0 + _jitl.nextarg_getfp++)
|
||||||
|
#define jit_arg_d() (_XMM0 + _jitl.nextarg_getfp++)
|
||||||
|
|
||||||
|
#define jit_getarg_f(rd, ofs) (jit_movr_f ((rd), (ofs)))
|
||||||
|
#define jit_getarg_d(rd, ofs) (jit_movr_d ((rd), (ofs)))
|
||||||
|
|
||||||
|
#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp, jit_movr_f (_jitl.nextarg_putfp, (rs)))
|
||||||
|
#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp, jit_movr_d (_jitl.nextarg_putfp, (rs)))
|
||||||
|
|
||||||
#endif /* __lightning_fp_h */
|
#endif /* __lightning_fp_h */
|
||||||
|
|
|
@ -60,8 +60,8 @@
|
||||||
#define jit_abs_f(rd,rs) FABSDrr((rs), (rd))
|
#define jit_abs_f(rd,rs) FABSDrr((rs), (rd))
|
||||||
#define jit_negr_f(rd,rs) FNEGDrr((rs), (rd))
|
#define jit_negr_f(rd,rs) FNEGDrr((rs), (rd))
|
||||||
#define jit_sqrt_f(rd,rs) FSQRTDrr((rs), (rd))
|
#define jit_sqrt_f(rd,rs) FSQRTDrr((rs), (rd))
|
||||||
#define jit_extr_f_d(rs, rd) FSTODrr((rs), (rd))
|
#define jit_extr_f_d(rd, rs) FSTODrr((rs), (rd))
|
||||||
#define jit_extr_d_f(rs, rd) FDTOSrr((rs), (rd))
|
#define jit_extr_d_f(rd, rs) FDTOSrr((rs), (rd))
|
||||||
|
|
||||||
#define jit_movi_f(rd,immf) \
|
#define jit_movi_f(rd,immf) \
|
||||||
do { \
|
do { \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue