1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-04 14:20:26 +02:00
guile/lib/jit_x86-sse.c
pcpa 33ee2337c7 Implement the new jit_set_data interface.
* include/lightning.h, include/lightning/jit_private.h,
	lib/lightning.c: Implement the new jit_set_data() interface,
	and the new jit_get_data() helper. Like jit_set_code(),
	jit_realize() should be called before jit_set_data().
	The most common usage should be jit_set_data(JIT_DISABLE_DATA
	| JIT_DISABLE_NOTE), to force synthesize any float/double
	constant in the stack and not generate any debug information.

	* lib/jit_note.c: Minor change to debug note generation as
	now it uses an alternate temporary data buffer during constants
	and debug generation to accommodate the possibility of the user
	setting an alternate data buffer.

	* lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c,
	lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c,
	lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data.

	* lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c,
	lib/jit_s390x-sz.c: Update for several instructions that now
	have a different maximum length due to jit_set_data.

	* lib/jit_mips-fpu.c: Implement jit_set_data, but missing
	validation on n32 and n64 abis (and/or big endian).

	* lib/jit_mips-sz.c: Update for changes in o32.

	* lib/jit_ppc-fpu.c: Implement jit_set_data, but missing
	validation on Darwin PPC.

	* lib/jit_ppc-sz.c: Update for changes in powerpc 32 and
	64 bit.

	* lib/jit_ia64-fpu.c: Implement untested jit_set_data.

	* TODO: Add note to list ports that were not tested for the
	new jit_set_data() feature, due to no longer having access
	to them.

	* check/nodata.c: New file implementing a simple test exercising
	several different conditions created by jit_set_data().

	* check/check.nodata.sh: New file implementing a wrapper
	over the existing *.tst files, that runs all tests without
	using a data buffer for constants; only meaningful (and
	enabled) on architectures that used to store float/double
	constants on a read only data buffer.

	* configure.ac, check/Makefile.am: Update for the new test
	cases.

	* check/lightning.c: Implement the new "-d" option that
	sets an internal flag to call jit_set_data() disable
	constants and debug, that is, using only a pure code
	buffer.
2014-03-12 14:50:31 -03:00

1459 lines
44 KiB
C

/*
* Copyright (C) 2012, 2013 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* Authors:
* Paulo Cesar Pereira de Andrade
*/
#if PROTO
#define X86_SSE_MOV 0x10
#define X86_SSE_MOV1 0x11
#define X86_SSE_MOVLP 0x12
#define X86_SSE_MOVHP 0x16
#define X86_SSE_MOVA 0x28
#define X86_SSE_CVTIS 0x2a
#define X86_SSE_CVTTSI 0x2c
#define X86_SSE_CVTSI 0x2d
#define X86_SSE_UCOMI 0x2e
#define X86_SSE_COMI 0x2f
#define X86_SSE_ROUND 0x3a
#define X86_SSE_SQRT 0x51
#define X86_SSE_RSQRT 0x52
#define X86_SSE_RCP 0x53
#define X86_SSE_AND 0x54
#define X86_SSE_ANDN 0x55
#define X86_SSE_OR 0x56
#define X86_SSE_XOR 0x57
#define X86_SSE_ADD 0x58
#define X86_SSE_MUL 0x59
#define X86_SSE_CVTSD 0x5a
#define X86_SSE_CVTDT 0x5b
#define X86_SSE_SUB 0x5c
#define X86_SSE_MIN 0x5d
#define X86_SSE_DIV 0x5e
#define X86_SSE_MAX 0x5f
#define X86_SSE_X2G 0x6e
#define X86_SSE_EQB 0x74
#define X86_SSE_EQW 0x75
#define X86_SSE_EQD 0x76
#define X86_SSE_G2X 0x7e
#define X86_SSE_MOV2 0xd6
# define sser(c,r0,r1) _sser(_jit,c,r0,r1)
static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1)
static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i)
static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1)
# define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1)
# define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1)
# define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1)
# define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1)
# define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1)
# define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1)
# define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1)
# define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1)
# define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1)
# define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1)
# define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1)
# if __WORDSIZE == 64
# define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1)
# define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1)
# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1)
# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1)
# else
# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1)
# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1)
# endif
# define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1)
# define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1)
# define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)
# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1)
# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1)
# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1)
# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1)
# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1)
# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0)
# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0)
# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0)
# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0)
# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1)
# if __WORDSIZE == 64
# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1)
static void
_sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# else
# define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1)
# endif
# define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd)
# define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd)
# define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd)
# define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs)
# define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs)
static void
_ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t,
jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2)
static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0)
static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2)
static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0)
static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2)
static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0)
static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2)
static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0)
static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2)
static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0)
static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2)
static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0)
static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2)
static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0)
static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2)
static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0)
static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1)
static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t);
# define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1)
static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1)
static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t);
# define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1)
static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t);
# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1)
# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1)
# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2)
# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2)
static void
_ssecmp(jit_state_t*, jit_bool_t, jit_int32_t,
jit_int32_t, jit_int32_t, jit_int32_t);
#define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1)
static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t);
#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0)
static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*);
# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0)
static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2)
# define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0)
static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2)
# define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0)
static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2)
static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0)
static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1)
# define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0)
static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1)
# define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0)
static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2)
static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0)
static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1)
# define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0)
static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2)
# define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0)
static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2)
static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0)
static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2)
static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0)
static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2)
# define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0)
static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2)
static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0)
static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1)
# define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0)
static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1)
# define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0)
# define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0)
static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
# define sse_ldxr_f(r0, r1, r2) movssmr(0, r1, r2, _SCL1, r0)
# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0)
static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1)
# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0)
static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
# define sse_stxr_f(r0, r1, r2) movssrm(r2, 0, r0, r1, _SCL1)
# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1)
static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1)
static jit_word_t
_sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1)
static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1)
static jit_word_t
_sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1)
static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1)
static jit_word_t
_sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1)
static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1)
static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1)
static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1)
static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1)
static jit_word_t
_sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1)
static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
# define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1)
static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1)
static jit_word_t
_sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*);
#define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1)
static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t);
#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0)
static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*);
# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2)
# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0)
static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2)
# define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0)
static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2)
static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0)
static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1)
# define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0)
static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1)
# define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0)
static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2)
static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0)
static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1)
# define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0)
static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2)
static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0)
static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2)
static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0)
static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2)
static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0)
static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2)
# define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0)
static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2)
static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0)
static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1)
# define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0)
static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1)
# define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0)
static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
# define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0)
# define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0)
static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
# define sse_ldxr_d(r0, r1, r2) movsdmr(0, r1, r2, _SCL1, r0)
# define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0)
static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1)
# define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1)
# define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0)
static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
# define sse_stxr_d(r0, r1, r2) movsdrm(r2, 0, r0, r1, _SCL1)
# define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1)
static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1)
static jit_word_t
_sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1)
static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1)
static jit_word_t
_sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1)
static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1)
static jit_word_t
_sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1)
static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1)
static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1)
static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1)
static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1)
static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1)
static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1)
static jit_word_t
_sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1)
static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1)
static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1)
static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1)
static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
# define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1)
static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1)
static jit_word_t
_sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*);
#endif
#if CODE
# define fpr_opi(name, type, size) \
static void \
_sse_##name##i_##type(jit_state_t *_jit, \
jit_int32_t r0, jit_int32_t r1, \
jit_float##size##_t *i0) \
{ \
jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
assert(jit_sse_reg_p(reg)); \
sse_movi_##type(rn(reg), i0); \
sse_##name##r_##type(r0, r1, rn(reg)); \
jit_unget_reg(reg); \
}
# define fpr_bopi(name, type, size) \
static jit_word_t \
_sse_b##name##i_##type(jit_state_t *_jit, \
jit_word_t i0, jit_int32_t r0, \
jit_float##size##_t *i1) \
{ \
jit_word_t word; \
jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \
assert(jit_sse_reg_p(reg)); \
sse_movi_##type(rn(reg), i1); \
word = sse_b##name##r_##type(i0, r0, rn(reg)); \
jit_unget_reg(reg); \
return (word); \
}
# define fopi(name) fpr_opi(name, f, 32)
# define fbopi(name) fpr_bopi(name, f, 32)
# define dopi(name) fpr_opi(name, d, 64)
# define dbopi(name) fpr_bopi(name, d, 64)
static void
_sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1)
{
rex(0, 0, r1, 0, r0);
ic(0x0f);
ic(c);
mrm(0x03, r7(r0), r7(r1));
}
static void
_ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
jit_int32_t r0, jit_int32_t r1)
{
ic(p);
rex(0, 0, r0, 0, r1);
ic(0x0f);
ic(c);
mrm(0x03, r7(r0), r7(r1));
}
static void
_ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0,
jit_int32_t m, jit_int32_t i)
{
ic(0x66);
rex(0, 0, 0, 0, r0);
ic(0x0f);
ic(c);
mrm(0x03, r7(m), r7(r0));
ic(i);
}
#if __WORDSIZE == 64
static void
_sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c,
jit_int32_t r0, jit_int32_t r1)
{
ic(p);
rex(0, 1, r0, 0, r1);
ic(0x0f);
ic(c);
mrm(0x03, r7(r0), r7(r1));
}
#endif
static void
_ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md,
jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd)
{
ic(px);
rex(0, 0, rd, ri, rb);
ic(0x0f);
ic(code);
rx(rd, md, rb, ri, ms);
}
static void
_sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r0 == r1)
addssr(r0, r2);
else if (r0 == r2)
addssr(r0, r1);
else {
sse_movr_f(r0, r1);
addssr(r0, r2);
}
}
fopi(add)
static void
_sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r0 == r1)
addsdr(r0, r2);
else if (r0 == r2)
addsdr(r0, r1);
else {
sse_movr_d(r0, r1);
addsdr(r0, r2);
}
}
dopi(add)
static void
_sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
if (r0 == r1)
subssr(r0, r2);
else if (r0 == r2) {
reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
sse_movr_f(rn(reg), r0);
sse_movr_f(r0, r1);
subssr(r0, rn(reg));
jit_unget_reg(reg);
}
else {
sse_movr_f(r0, r1);
subssr(r0, r2);
}
}
fopi(sub)
static void
_sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
if (r0 == r1)
subsdr(r0, r2);
else if (r0 == r2) {
reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
sse_movr_d(rn(reg), r0);
sse_movr_d(r0, r1);
subsdr(r0, rn(reg));
jit_unget_reg(reg);
}
else {
sse_movr_d(r0, r1);
subsdr(r0, r2);
}
}
dopi(sub)
static void
_sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r0 == r1)
mulssr(r0, r2);
else if (r0 == r2)
mulssr(r0, r1);
else {
sse_movr_f(r0, r1);
mulssr(r0, r2);
}
}
fopi(mul)
static void
_sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r0 == r1)
mulsdr(r0, r2);
else if (r0 == r2)
mulsdr(r0, r1);
else {
sse_movr_d(r0, r1);
mulsdr(r0, r2);
}
}
dopi(mul)
static void
_sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
if (r0 == r1)
divssr(r0, r2);
else if (r0 == r2) {
reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
sse_movr_f(rn(reg), r0);
sse_movr_f(r0, r1);
divssr(r0, rn(reg));
jit_unget_reg(reg);
}
else {
sse_movr_f(r0, r1);
divssr(r0, r2);
}
}
fopi(div)
static void
_sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_int32_t reg;
if (r0 == r1)
divsdr(r0, r2);
else if (r0 == r2) {
reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
sse_movr_d(rn(reg), r0);
sse_movr_d(r0, r1);
divsdr(r0, rn(reg));
jit_unget_reg(reg);
}
else {
sse_movr_d(r0, r1);
divsdr(r0, r2);
}
}
dopi(div)
static void
_sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
if (r0 == r1) {
reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
pcmpeqlr(rn(reg), rn(reg));
psrl(rn(reg), 1);
andpsr(r0, rn(reg));
jit_unget_reg(reg);
}
else {
pcmpeqlr(r0, r0);
psrl(r0, 1);
andpsr(r0, r1);
}
}
static void
_sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
if (r0 == r1) {
reg = jit_get_reg(jit_class_fpr|jit_class_xpr);
pcmpeqlr(rn(reg), rn(reg));
psrq(rn(reg), 1);
andpdr(r0, rn(reg));
jit_unget_reg(reg);
}
else {
pcmpeqlr(r0, r0);
psrq(r0, 1);
andpdr(r0, r1);
}
}
static void
_sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t freg, ireg;
ireg = jit_get_reg(jit_class_gpr);
imovi(rn(ireg), 0x80000000);
if (r0 == r1) {
freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
movdlxr(rn(freg), rn(ireg));
xorpsr(r0, rn(freg));
jit_unget_reg(freg);
}
else {
movdlxr(r0, rn(ireg));
xorpsr(r0, r1);
}
jit_unget_reg(ireg);
}
static void
_sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t freg, ireg;
ireg = jit_get_reg(jit_class_gpr);
imovi(rn(ireg), 0x80000000);
if (r0 == r1) {
freg = jit_get_reg(jit_class_fpr|jit_class_xpr);
movdlxr(rn(freg), rn(ireg));
pslq(rn(freg), 32);
xorpdr(r0, rn(freg));
jit_unget_reg(freg);
}
else {
movdlxr(r0, rn(ireg));
pslq(r0, 32);
xorpdr(r0, r1);
}
jit_unget_reg(ireg);
}
static void
_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code,
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_bool_t rc;
jit_int32_t reg;
if ((rc = reg8_p(r0)))
reg = r0;
else {
reg = _RAX_REGNO;
movr(r0, reg);
}
ixorr(reg, reg);
if (d)
ucomisdr(r2, r1);
else
ucomissr(r2, r1);
cc(code, reg);
if (!rc)
xchgr(r0, reg);
}
static void
_sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
ssexr(0xf3, X86_SSE_MOV, r0, r1);
}
static void
_sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
{
union {
jit_int32_t i;
jit_float32_t f;
} data;
jit_int32_t reg;
jit_bool_t ldi;
data.f = *i0;
if (data.f == 0.0 && !(data.i & 0x80000000))
xorpsr(r0, r0);
else {
ldi = !_jitc->no_data;
#if __WORDSIZE == 64
/* if will allocate a register for offset, just use immediate */
if (ldi && !can_sign_extend_int_p((jit_word_t)i0))
ldi = 0;
#endif
if (ldi)
sse_ldi_f(r0, (jit_word_t)i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), data.i);
movdlxr(r0, rn(reg));
jit_unget_reg(reg);
}
}
}
fopi(lt)
fopi(le)
static void
_sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_bool_t rc;
jit_int32_t reg;
jit_word_t jp_code;
if ((rc = reg8_p(r0)))
reg = r0;
else {
reg = _RAX_REGNO;
movr(r0, _RAX_REGNO);
}
ixorr(reg, reg);
ucomissr(r2, r1);
jpes(0);
jp_code = _jit->pc.w;
cc(X86_CC_E, reg);
patch_rel_char(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
fopi(eq)
fopi(ge)
fopi(gt)
static void
_sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_bool_t rc;
jit_int32_t reg;
jit_word_t jp_code;
if ((rc = reg8_p(r0)))
reg = r0;
else {
reg = _RAX_REGNO;
movr(r0, _RAX_REGNO);
}
imovi(reg, 1);
ucomissr(r2, r1);
jpes(0);
jp_code = _jit->pc.w;
cc(X86_CC_NE, reg);
patch_rel_char(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
fopi(ne)
fopi(unlt)
static void
_sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
movi(r0, 1);
else
ssecmpf(X86_CC_NA, r0, r2, r1);
}
fopi(unle)
static void
_sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
movi(r0, 1);
else
ssecmpf(X86_CC_E, r0, r1, r2);
}
fopi(uneq)
static void
_sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
movi(r0, 1);
else
ssecmpf(X86_CC_NA, r0, r1, r2);
}
fopi(unge)
fopi(ungt)
static void
_sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
ixorr(r0, r0);
else
ssecmpf(X86_CC_NE, r0, r1, r2);
}
fopi(ltgt)
fopi(ord)
fopi(unord)
static void
_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_ldr_f(r0, rn(reg));
jit_unget_reg(reg);
}
}
static void
_sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movssmr(i0, r1, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_ldxr_f(r0, r1, rn(reg));
jit_unget_reg(reg);
}
}
static void
_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_str_f(rn(reg), r0);
jit_unget_reg(reg);
}
}
static void
_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movssrm(r1, i0, r0, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_stxr_f(rn(reg), r0, r1);
jit_unget_reg(reg);
}
}
static jit_word_t
_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
ja(i0);
return (_jit->pc.w);
}
fbopi(lt)
static jit_word_t
_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
jae(i0);
return (_jit->pc.w);
}
fbopi(le)
static jit_word_t
_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t jp_code;
ucomissr(r0, r1);
jps(0);
jp_code = _jit->pc.w;
je(i0);
patch_rel_char(jp_code, _jit->pc.w);
return (_jit->pc.w);
}
fbopi(eq)
static jit_word_t
_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
jae(i0);
return (_jit->pc.w);
}
fbopi(ge)
static jit_word_t
_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
ja(i0);
return (_jit->pc.w);
}
fbopi(gt)
static jit_word_t
_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t jp_code;
jit_word_t jz_code;
ucomissr(r0, r1);
jps(0);
jp_code = _jit->pc.w;
jzs(0);
jz_code = _jit->pc.w;
patch_rel_char(jp_code, _jit->pc.w);
jmpi(i0);
patch_rel_char(jz_code, _jit->pc.w);
return (_jit->pc.w);
}
fbopi(ne)
static jit_word_t
_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
jnae(i0);
return (_jit->pc.w);
}
fbopi(unlt)
static jit_word_t
_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
if (r0 == r1)
jmpi(i0);
else {
ucomissr(r0, r1);
jna(i0);
}
return (_jit->pc.w);
}
fbopi(unle)
static jit_word_t
_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
if (r0 == r1)
jmpi(i0);
else {
ucomissr(r0, r1);
je(i0);
}
return (_jit->pc.w);
}
fbopi(uneq)
static jit_word_t
_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
if (r0 == r1)
jmpi(i0);
else {
ucomissr(r1, r0);
jna(i0);
}
return (_jit->pc.w);
}
fbopi(unge)
static jit_word_t
_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
jnae(i0);
return (_jit->pc.w);
}
fbopi(ungt)
static jit_word_t
_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
jne(i0);
return (_jit->pc.w);
}
fbopi(ltgt)
static jit_word_t
_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
jnp(i0);
return (_jit->pc.w);
}
fbopi(ord)
static jit_word_t
_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
jp(i0);
return (_jit->pc.w);
}
fbopi(unord)
dopi(lt)
dopi(le)
static void
_sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_bool_t rc;
jit_int32_t reg;
jit_word_t jp_code;
if ((rc = reg8_p(r0)))
reg = r0;
else {
reg = _RAX_REGNO;
movr(r0, _RAX_REGNO);
}
ixorr(reg, reg);
ucomisdr(r2, r1);
jpes(0);
jp_code = _jit->pc.w;
cc(X86_CC_E, reg);
patch_rel_char(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
dopi(eq)
dopi(ge)
dopi(gt)
static void
_sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_bool_t rc;
jit_int32_t reg;
jit_word_t jp_code;
if ((rc = reg8_p(r0)))
reg = r0;
else {
reg = _RAX_REGNO;
movr(r0, _RAX_REGNO);
}
imovi(reg, 1);
ucomisdr(r2, r1);
jpes(0);
jp_code = _jit->pc.w;
cc(X86_CC_NE, reg);
patch_rel_char(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
dopi(ne)
dopi(unlt)
static void
_sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
movi(r0, 1);
else
ssecmpd(X86_CC_NA, r0, r2, r1);
}
dopi(unle)
static void
_sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
movi(r0, 1);
else
ssecmpd(X86_CC_E, r0, r1, r2);
}
dopi(uneq)
static void
_sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
movi(r0, 1);
else
ssecmpd(X86_CC_NA, r0, r1, r2);
}
dopi(unge)
dopi(ungt)
static void
_sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
if (r1 == r2)
ixorr(r0, r0);
else
ssecmpd(X86_CC_NE, r0, r1, r2);
}
dopi(ltgt)
dopi(ord)
dopi(unord)
static void
_sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (r0 != r1)
ssexr(0xf2, X86_SSE_MOV, r0, r1);
}
static void
_sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
{
union {
jit_int32_t ii[2];
jit_word_t w;
jit_float64_t d;
} data;
jit_int32_t reg;
jit_bool_t ldi;
data.d = *i0;
if (data.d == 0.0 && !(data.ii[1] & 0x80000000))
xorpdr(r0, r0);
else {
ldi = !_jitc->no_data;
#if __WORDSIZE == 64
/* if will allocate a register for offset, just use immediate */
if (ldi && !can_sign_extend_int_p((jit_word_t)i0))
ldi = 0;
#endif
if (ldi)
sse_ldi_d(r0, (jit_word_t)i0);
else {
reg = jit_get_reg(jit_class_gpr);
#if __WORDSIZE == 64
movi(rn(reg), data.w);
movdqxr(r0, rn(reg));
jit_unget_reg(reg);
#else
movi(rn(reg), data.ii[0]);
stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
movi(rn(reg), data.ii[1]);
stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg));
jit_unget_reg(reg);
sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
#endif
}
}
}
static void
_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_ldr_d(r0, rn(reg));
jit_unget_reg(reg);
}
}
static void
_sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movsdmr(i0, r1, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_ldxr_f(r0, r1, rn(reg));
jit_unget_reg(reg);
}
}
static void
_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_str_d(rn(reg), r0);
jit_unget_reg(reg);
}
}
static void
_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
if (can_sign_extend_int_p(i0))
movsdrm(r1, i0, r0, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
sse_stxr_f(rn(reg), r0, r1);
jit_unget_reg(reg);
}
}
static jit_word_t
_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
ja(i0);
return (_jit->pc.w);
}
dbopi(lt)
static jit_word_t
_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
jae(i0);
return (_jit->pc.w);
}
dbopi(le)
static jit_word_t
_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t jp_code;
ucomisdr(r0, r1);
jps(0);
jp_code = _jit->pc.w;
je(i0);
patch_rel_char(jp_code, _jit->pc.w);
return (_jit->pc.w);
}
dbopi(eq)
static jit_word_t
_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
jae(i0);
return (_jit->pc.w);
}
dbopi(ge)
static jit_word_t
_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
ja(i0);
return (_jit->pc.w);
}
dbopi(gt)
static jit_word_t
_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t jp_code;
jit_word_t jz_code;
ucomisdr(r0, r1);
jps(0);
jp_code = _jit->pc.w;
jzs(0);
jz_code = _jit->pc.w;
patch_rel_char(jp_code, _jit->pc.w);
jmpi(i0);
patch_rel_char(jz_code, _jit->pc.w);
return (_jit->pc.w);
}
dbopi(ne)
static jit_word_t
_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
jnae(i0);
return (_jit->pc.w);
}
dbopi(unlt)
static jit_word_t
_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
if (r0 == r1)
jmpi(i0);
else {
ucomisdr(r0, r1);
jna(i0);
}
return (_jit->pc.w);
}
dbopi(unle)
static jit_word_t
_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
if (r0 == r1)
jmpi(i0);
else {
ucomisdr(r0, r1);
je(i0);
}
return (_jit->pc.w);
}
dbopi(uneq)
static jit_word_t
_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
if (r0 == r1)
jmpi(i0);
else {
ucomisdr(r1, r0);
jna(i0);
}
return (_jit->pc.w);
}
dbopi(unge)
static jit_word_t
_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
jnae(i0);
return (_jit->pc.w);
}
dbopi(ungt)
static jit_word_t
_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
jne(i0);
return (_jit->pc.w);
}
dbopi(ltgt)
static jit_word_t
_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
jnp(i0);
return (_jit->pc.w);
}
dbopi(ord)
static jit_word_t
_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
jp(i0);
return (_jit->pc.w);
}
dbopi(unord)
# undef fopi
# undef fbopi
# undef bopi
# undef dbopi
# undef fpr_bopi
# undef fpr_opi
#endif