diff --git a/jit.h b/jit.h index c81548c03..f52263f07 100644 --- a/jit.h +++ b/jit.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2018 Free Software Foundation, Inc. + * Copyright (C) 2012-2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -45,7 +45,23 @@ typedef void* jit_addr_t; typedef ptrdiff_t jit_off_t; typedef intptr_t jit_imm_t; typedef uintptr_t jit_uimm_t; -typedef struct jit_reloc *jit_reloc_t; + +enum jit_reloc_kind +{ + JIT_RELOC_ABSOLUTE, + JIT_RELOC_REL8, + JIT_RELOC_REL16, + JIT_RELOC_REL32, + JIT_RELOC_REL64, +}; + +typedef struct jit_reloc +{ + uint8_t kind; + uint8_t inst_start_offset; + uint16_t flags; + uint32_t offset; +} jit_reloc_t; #if defined(__GNUC__) && (__GNUC__ >= 4) # define JIT_API extern __attribute__ ((__visibility__("hidden"))) @@ -91,17 +107,32 @@ typedef struct jit_reloc *jit_reloc_t; #define jit_regno(reg) ((reg) & 0x00007fff) typedef struct jit_state jit_state_t; -enum jit_arg_kind +enum jit_arg_loc { - JIT_CALL_ARG_IMM, - JIT_CALL_ARG_GPR, - JIT_CALL_ARG_FPR, - JIT_CALL_ARG_MEM + JIT_ARG_LOC_IMM, + JIT_ARG_LOC_GPR, + JIT_ARG_LOC_FPR, + JIT_ARG_LOC_MEM }; +typedef enum jit_arg_abi +{ + JIT_ARG_ABI_UINT8, + JIT_ARG_ABI_INT8, + JIT_ARG_ABI_UINT16, + JIT_ARG_ABI_INT16, + JIT_ARG_ABI_UINT32, + JIT_ARG_ABI_INT32, + JIT_ARG_ABI_UINT64, + JIT_ARG_ABI_INT64, + JIT_ARG_ABI_POINTER, + JIT_ARG_ABI_FLOAT, + JIT_ARG_ABI_DOUBLE +} jit_arg_abi_t; + typedef struct jit_arg { - enum jit_arg_kind kind; + enum jit_arg_loc kind; union { intptr_t imm; @@ -111,28 +142,30 @@ typedef struct jit_arg } loc; } jit_arg_t; -JIT_API void init_jit(void); +JIT_API jit_bool_t init_jit(void); JIT_API jit_state_t *jit_new_state(void); JIT_API void jit_destroy_state(jit_state_t*); -JIT_API void jit_begin(jit_state_t*, jit_addr_t, size_t); +JIT_API void jit_begin(jit_state_t*, uint8_t*, size_t); +JIT_API jit_bool_t jit_has_overflow(jit_state_t*); JIT_API void jit_reset(jit_state_t*); -JIT_API jit_addr_t jit_end(jit_state_t*, size_t*); +JIT_API void* jit_end(jit_state_t*, size_t*); JIT_API void jit_align(jit_state_t*, unsigned); -JIT_API void jit_allocai(jit_state_t*, size_t); -JIT_API void jit_allocar(jit_state_t*, jit_gpr_t, jit_gpr_t); JIT_API jit_pointer_t jit_address(jit_state_t*); JIT_API void jit_patch_here(jit_state_t*, jit_reloc_t); JIT_API void jit_patch_there(jit_state_t*, jit_reloc_t, jit_pointer_t); JIT_API void jit_calli(jit_state_t *, jit_pointer_t f, - size_t argc, const jit_arg_t *argv); + size_t argc, const jit_arg_abi_t abi[], + const jit_arg_t args[]); JIT_API void jit_callr(jit_state_t *, jit_gpr_t f, - size_t argc, const jit_arg_t *argv); -JIT_API void jit_receive(jit_state_t*, size_t argc, jit_arg_t *argv); + size_t argc, const jit_arg_abi_t abi[], + const jit_arg_t args[]); +JIT_API void jit_receive(jit_state_t*, size_t argc, + const jit_arg_abi_t abi[], jit_arg_t args[]); #define JIT_PROTO_0(stem, ret) \ ret jit_##stem (jit_state_t* _jit) @@ -164,8 +197,8 @@ JIT_API void jit_receive(jit_state_t*, size_t argc, jit_arg_t *argv); #define JIT_PROTO__GGF_(stem) JIT_PROTO_3(stem, void, gpr, gpr, fpr) #define JIT_PROTO__GGGG(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, gpr) #define JIT_PROTO__GGG_(stem) JIT_PROTO_3(stem, void, gpr, gpr, gpr) -#define JIT_PROTO__GGGi(stem) JIT_PROTO_3(stem, void, gpr, gpr, imm) -#define JIT_PROTO__GGGu(stem) JIT_PROTO_3(stem, void, gpr, gpr, uimm) +#define JIT_PROTO__GGGi(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, imm) +#define JIT_PROTO__GGGu(stem) JIT_PROTO_4(stem, void, gpr, gpr, gpr, uimm) #define JIT_PROTO__GG__(stem) JIT_PROTO_2(stem, void, gpr, gpr) #define JIT_PROTO__GGi_(stem) JIT_PROTO_3(stem, void, gpr, gpr, imm) #define JIT_PROTO__GGo_(stem) JIT_PROTO_3(stem, void, gpr, gpr, off) @@ -192,7 +225,7 @@ JIT_API void jit_receive(jit_state_t*, size_t argc, jit_arg_t *argv); M(_GGi_, addxi) \ M(_GGG_, subr) \ M(_FFF_, subr_f) \ - M(_FFF_, subr_f) \ + M(_FFF_, subr_d) \ M(_GGi_, subi) \ M(_GGG_, subcr) \ M(_GGi_, subci) \ @@ -388,6 +421,8 @@ JIT_API void jit_receive(jit_state_t*, size_t argc, jit_arg_t *argv); M(RGG__, bxsubr_u) \ M(RGu__, bxsubi_u) \ \ + M(_i___, nop) \ + \ M(_G___, jmpr) \ M(_p___, jmpi) \ M(R____, jmp) \ diff --git a/jit/aarch64-cpu.c b/jit/aarch64-cpu.c index 665f2d731..98f2dabde 100644 --- a/jit/aarch64-cpu.c +++ b/jit/aarch64-cpu.c @@ -658,15 +658,15 @@ static void _stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t); # define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t); # if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); -# define htonr_ul(r0,r1) REV(r0,r1) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,int32_t,int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,int32_t,int32_t); +# define bswapr_ul(r0,r1) REV(r0,r1) # else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr_ul(r0,r1) movr(r0,r1) # endif # define extr_c(r0,r1) SXTB(r0,r1) # define extr_uc(r0,r1) UXTB(r0,r1) @@ -1443,16 +1443,16 @@ _xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) #if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { - htonr_ul(r0, r1); + bswapr_ul(r0, r1); rshi_u(r0, r0, 48); } static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { - htonr_ul(r0, r1); + bswapr, 2019_ul(r0, r1); rshi_u(r0, r0, 32); } #endif diff --git a/jit/alpha-cpu.c b/jit/alpha-cpu.c index e3854fd25..a31640f37 100644 --- a/jit/alpha-cpu.c +++ b/jit/alpha-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014, 2017 Free Software Foundation, Inc. + * Copyright (C) 2014, 2017, 2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -623,16 +623,16 @@ static void _extr_i(jit_state_t*,int32_t,int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) static void _extr_ui(jit_state_t*,int32_t,int32_t); # if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,int32_t,int32_t); +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,int32_t,int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,int32_t,int32_t); +# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1) +static void _bswapr_ul(jit_state_t*,int32_t,int32_t); # else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr_ul(r0,r1) movr(r0,r1) # endif # define jmpr(r0) JMP(_R31_REGNO,r0,0) # define jmpi(i0) _jmpi(_jit,i0) @@ -2453,7 +2453,7 @@ _extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -2465,7 +2465,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; int32_t t1; @@ -2491,7 +2491,7 @@ _htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; int32_t t1; diff --git a/jit/arm-cpu.c b/jit/arm-cpu.c index 74b653203..9d44699b9 100644 --- a/jit/arm-cpu.c +++ b/jit/arm-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1092,13 +1092,13 @@ static void _stxr_i(jit_state_t*,jit_word_t,int32_t,int32_t); # define stxi_i(r0,r1,i0) _stxi_i(_jit,r0,r1,i0) static void _stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t); # if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,int32_t,int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,int32_t,int32_t); # else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr(r0,r1) movr(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) +# define bswapr(r0,r1) movr(r0,r1) # endif # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,int32_t,int32_t); @@ -3578,7 +3578,7 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) # if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; if (jit_thumb_p()) { @@ -3607,7 +3607,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) /* inline glibc htonl (without register clobber) */ static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t reg; if (jit_thumb_p()) { diff --git a/jit/hppa-cpu.c b/jit/hppa-cpu.c index 43d2f7094..68281e4a5 100644 --- a/jit/hppa-cpu.c +++ b/jit/hppa-cpu.c @@ -655,10 +655,10 @@ static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t); #define extr_s(r0,r1) EXTRWR(r1,31,16,r0) #define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0) #if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) movr(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) +# define bswapr_ui(r0,r1) movr(r0,r1) #else -# error need htonr implementation +# error need bswapr implementation #endif #define addr(r0,r1,r2) ADD(r1,r2,r0) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) diff --git a/jit/ia64-cpu.c b/jit/ia64-cpu.c index 3b397747a..160f640bb 100644 --- a/jit/ia64-cpu.c +++ b/jit/ia64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2017 Free Software Foundation, Inc. + * Copyright (C) 2013-2017, 2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1308,15 +1308,15 @@ static void _movi(jit_state_t*,int32_t,jit_word_t); #define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t); #if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); -# define htonr_ul(r0,r1) MUX1(r0,r1,MUX_REV) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,int32_t,int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,int32_t,int32_t); +# define bswapr_ul(r0,r1) MUX1(r0,r1,MUX_REV) #else -# define htonr_us(r0,r1) extr_us(r0,r1) -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr_ul(r0,r1) movr(r0,r1) #endif #define extr_c(r0,r1) SXT1(r0,r1) #define extr_uc(r0,r1) ZXT1(r0,r1) @@ -3951,7 +3951,7 @@ _xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) #if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -3964,7 +3964,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; int32_t t1; diff --git a/jit/jit.c b/jit/jit.c index ca0a07a65..17115bf85 100644 --- a/jit/jit.c +++ b/jit/jit.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2018 Free Software Foundation, Inc. + * Copyright (C) 2012-2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -14,7 +14,7 @@ * License for more details. * * Authors: - * Paulo Cesar Pereira de Andrade + * Paulo Cesar Pereira de Andrade */ #if HAVE_CONFIG_H @@ -30,107 +30,118 @@ #include "../jit.h" #if defined(__GNUC__) -# define maybe_unused __attribute__ ((unused)) +# define maybe_unused __attribute__ ((unused)) #else -# define maybe_unused /**/ +# define maybe_unused /**/ #endif -#define rc(value) jit_class_##value -#define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) +#define rc(value) jit_class_##value +#define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) #if defined(__i386__) || defined(__x86_64__) -# define JIT_SP _RSP -# define JIT_RET _RAX -# if __X32 -# define JIT_FRET _ST0 +# define JIT_SP _RSP +# define JIT_RET _RAX +# if __X32 +# define JIT_FRET _ST0 +# else +# if __CYGWIN__ +# define JIT_RA0 _RCX # else -# if __CYGWIN__ -# define JIT_RA0 _RCX -# else -# define JIT_RA0 _RDI -# endif -# define JIT_FA0 _XMM0 -# define JIT_FRET _XMM0 +# define JIT_RA0 _RDI # endif +# define JIT_FA0 _XMM0 +# define JIT_FRET _XMM0 +# endif #elif defined(__mips__) -# define JIT_RA0 _A0 -# define JIT_FA0 _F12 -# define JIT_SP _SP -# define JIT_RET _V0 -# define JIT_FRET _F0 +# define JIT_RA0 _A0 +# define JIT_FA0 _F12 +# define JIT_SP _SP +# define JIT_RET _V0 +# define JIT_FRET _F0 #elif defined(__arm__) -# define JIT_RA0 _R0 -# define JIT_FA0 _D0 -# define JIT_SP _R13 -# define JIT_RET _R0 -# if defined(__ARM_PCS_VFP) -# define JIT_FRET _D0 -# else -# define JIT_FRET _R0 -# endif +# define JIT_RA0 _R0 +# define JIT_FA0 _D0 +# define JIT_SP _R13 +# define JIT_RET _R0 +# if defined(__ARM_PCS_VFP) +# define JIT_FRET _D0 +# else +# define JIT_FRET _R0 +# endif #elif defined(__ppc__) || defined(__powerpc__) -# define JIT_RA0 _R3 -# define JIT_FA0 _F1 -# define JIT_SP _R1 -# define JIT_RET _R3 -# define JIT_FRET _F1 +# define JIT_RA0 _R3 +# define JIT_FA0 _F1 +# define JIT_SP _R1 +# define JIT_RET _R3 +# define JIT_FRET _F1 #elif defined(__sparc__) -# define JIT_SP _SP -# define JIT_RET _I0 -# define JIT_FRET _F0 +# define JIT_SP _SP +# define JIT_RET _I0 +# define JIT_FRET _F0 #elif defined(__ia64__) -# define JIT_SP _R12 -# define JIT_RET _R8 -# define JIT_FRET _F8 +# define JIT_SP _R12 +# define JIT_RET _R8 +# define JIT_FRET _F8 #elif defined(__hppa__) -# define JIT_SP _R30 -# define JIT_RET _R28 -# define JIT_FRET _F4 +# define JIT_SP _R30 +# define JIT_RET _R28 +# define JIT_FRET _F4 #elif defined(__aarch64__) -# define JIT_RA0 _R0 -# define JIT_FA0 _V0 -# define JIT_SP _SP -# define JIT_RET _R0 -# define JIT_FRET _V0 +# define JIT_RA0 _R0 +# define JIT_FA0 _V0 +# define JIT_SP _SP +# define JIT_RET _R0 +# define JIT_FRET _V0 #elif defined(__s390__) || defined(__s390x__) -# define JIT_SP _R15 -# define JIT_RET _R2 -# define JIT_FRET _F0 +# define JIT_SP _R15 +# define JIT_RET _R2 +# define JIT_FRET _F0 #elif defined(__alpha__) -# define JIT_SP _SP -# define JIT_RET _V0 -# define JIT_FRET _F0 +# define JIT_SP _SP +# define JIT_RET _V0 +# define JIT_FRET _F0 #endif /* * Private jit_class bitmasks */ -#define jit_class_named 0x00400000 /* hit must be the named reg */ -#define jit_class_nospill 0x00800000 /* hint to fail if need spill */ -#define jit_class_sft 0x01000000 /* not a hardware register */ -#define jit_class_rg8 0x04000000 /* x86 8 bits */ -#define jit_class_xpr 0x80000000 /* float / vector */ +#define jit_class_named 0x00400000 /* hit must be the named reg */ +#define jit_class_nospill 0x00800000 /* hint to fail if need spill */ +#define jit_class_sft 0x01000000 /* not a hardware register */ +#define jit_class_rg8 0x04000000 /* x86 8 bits */ +#define jit_class_xpr 0x80000000 /* float / vector */ /* Used on sparc64 where %f0-%f31 can be encode for single float * but %f32 to %f62 only as double precision */ -#define jit_class_sng 0x10000000 /* Single precision float */ -#define jit_class_dbl 0x20000000 /* Only double precision float */ -#define jit_regno_patch 0x00008000 /* this is a register - * returned by a "user" call - * to jit_get_reg() */ +#define jit_class_sng 0x10000000 /* Single precision float */ +#define jit_class_dbl 0x20000000 /* Only double precision float */ +#define jit_regno_patch 0x00008000 /* this is a register + * returned by a "user" call + * to jit_get_reg() */ + +union jit_pc +{ + uint8_t *uc; + uint16_t *us; + uint32_t *ui; + uint64_t *ul; + intptr_t w; + uintptr_t uw; +}; struct jit_state { - union { - uint8_t *uc; - uint16_t *us; - uint32_t *ui; - uint64_t *ul; - intptr_t w; - uintptr_t uw; - } pc; + union jit_pc pc; uint8_t *start; uint8_t *last_instruction_start; uint8_t *limit; + uint8_t temp_gpr_saved; + uint8_t temp_fpr_saved; + uint8_t overflow; +}; + +enum jit_reloc_flags +{ + JIT_RELOC_CAN_SHORTEN = 1<<0 }; struct jit_register @@ -143,45 +154,40 @@ typedef struct jit_register jit_register_t; static const jit_register_t _rvs[]; -#define jit_regload_reload 0 /* convert to reload */ -#define jit_regload_delete 1 /* just remove node */ -#define jit_regload_isdead 2 /* delete and unset live bit */ +#define jit_regload_reload 0 /* convert to reload */ +#define jit_regload_delete 1 /* just remove node */ +#define jit_regload_isdead 2 /* delete and unset live bit */ #define ASSERT(x) do { if (!(x)) abort(); } while (0) +#if defined(__GNUC__) +# define UNLIKELY(exprn) __builtin_expect(exprn, 0) +#else +# define UNLIKELY(exprn) exprn +#endif -static inline uint8_t* -jit_reloc_instruction (jit_reloc_t reloc) -{ - return (uint8_t*) reloc; -} - -static void jit_get_cpu(void); -static void jit_init(jit_state_t *); -static void jit_nop(jit_state_t *, unsigned); -static void jit_patch(jit_state_t *, const uint8_t *loc, const uint8_t *addr); -static void jit_patch_last(jit_state_t *, const uint8_t *loc, const uint8_t *addr); +static jit_bool_t jit_get_cpu(void); +static jit_bool_t jit_init(jit_state_t *); static void jit_flush(void *fptr, void *tptr); +static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc); -void +jit_bool_t init_jit(void) { - jit_get_cpu(); + return jit_get_cpu (); } jit_state_t * jit_new_state(void) { - jit_state_t *_jit; + jit_state_t *_jit = malloc (sizeof (*_jit)); + if (!_jit) + abort (); - _jit = malloc (sizeof (*_jit)); - if (!_jit) - abort (); + memset(_jit, 0, sizeof (*_jit)); - memset(_jit, 0, sizeof (*_jit)); + if (!jit_init (_jit)); - jit_init (_jit); - - return _jit; + return _jit; } void @@ -193,36 +199,44 @@ jit_destroy_state(jit_state_t *_jit) jit_pointer_t jit_address(jit_state_t *_jit) { - /* TODO: FIXME */ - abort (); + return _jit->pc.uc; } void -jit_begin(jit_state_t *_jit, jit_addr_t addr, size_t length) +jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length) { ASSERT (!_jit->start); - _jit->start = addr; - _jit->limit = _jit->start + length; + _jit->start = buf; + _jit->limit = buf + length; jit_reset(_jit); } +jit_bool_t +jit_has_overflow(jit_state_t *_jit) +{ + ASSERT (_jit->start); + return _jit->overflow; +} + void jit_reset(jit_state_t *_jit) { ASSERT (_jit->start); - _jit->pc.uc = _jit->start = _jit->limit = NULL; + _jit->pc.uc = _jit->start; + _jit->overflow = 0; } -jit_addr_t +void* jit_end(jit_state_t *_jit, size_t *length) { uint8_t *code = _jit->start; uint8_t *end = _jit->pc.uc; ASSERT (code); - ASSERT (end > code); + ASSERT (code <= end); ASSERT (end <= _jit->limit); + ASSERT (!_jit->overflow); jit_flush (code, end); @@ -230,7 +244,8 @@ jit_end(jit_state_t *_jit, size_t *length) *length = end - code; } - jit_reset (_jit); + _jit->pc.uc = _jit->start = _jit->limit = NULL; + _jit->overflow = 0; return code; } @@ -251,6 +266,79 @@ jit_align(jit_state_t *_jit, unsigned align) jit_nop(_jit, there - here); } +static inline void emit_u8(jit_state_t *_jit, uint8_t u8) { + if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.uc++ = u8; + } +} + +static inline void emit_u16(jit_state_t *_jit, uint16_t u16) { + if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.us++ = u16; + } +} + +static inline void emit_u32(jit_state_t *_jit, uint32_t u32) { + if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.ui++ = u32; + } +} + +static inline void emit_u64(jit_state_t *_jit, uint64_t u64) { + if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) { + _jit->overflow = 1; + } else { + *_jit->pc.ul++ = u64; + } +} + +static inline jit_reloc_t +jit_reloc (jit_state_t *_jit, enum jit_reloc_kind kind, + uint8_t inst_start_offset, uint16_t flags, intptr_t addend) +{ + jit_reloc_t ret; + + ret.kind = kind; + ret.inst_start_offset = inst_start_offset; + ret.flags = 0; + ret.offset = _jit->pc.uc - _jit->start; + + switch (kind) + { + case JIT_RELOC_ABSOLUTE: + if (sizeof(intptr_t) == 4) + emit_u32 (_jit, addend); + else + emit_u64 (_jit, addend); + break; + case JIT_RELOC_REL8: + ASSERT (INT8_MIN <= addend && addend <= INT8_MAX); + emit_u8 (_jit, addend - 1); + break; + case JIT_RELOC_REL16: + ASSERT (INT16_MIN <= addend && addend <= INT16_MAX); + emit_u16 (_jit, addend - 2); + break; + case JIT_RELOC_REL32: + ASSERT (INT32_MIN <= addend && addend <= INT32_MAX); + emit_u32 (_jit, addend - 4); + break; + case JIT_RELOC_REL64: + emit_u64 (_jit, addend - 8); + break; + default: + abort (); + } + + return ret; +} + void jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc) { @@ -260,82 +348,145 @@ jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc) void jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr) { - const uint8_t *loc = jit_reloc_instruction (reloc); + if (_jit->overflow) + return; + union jit_pc loc; + loc.uc = _jit->start + reloc.offset; + ptrdiff_t diff = addr - ((void*) 0); - if (loc == _jit->last_instruction_start) - jit_patch_last (_jit, loc, addr); - else - jit_patch (_jit, loc, addr); + switch (reloc.kind) + { + case JIT_RELOC_ABSOLUTE: + if (sizeof(diff) == 4) + *loc.ui = diff + (int32_t)*loc.ui; + else + *loc.ul = diff + (int64_t)*loc.ul; + if (loc.uc + sizeof(diff) == _jit->pc.uc && + (reloc.flags & JIT_RELOC_CAN_SHORTEN)) + jit_try_shorten (_jit, reloc); + break; + case JIT_RELOC_REL8: + diff += (int8_t)*loc.uc; + ASSERT (INT8_MIN <= diff && diff <= INT8_MAX); + *loc.uc = diff; + break; + case JIT_RELOC_REL16: + diff += (int16_t)*loc.us; + ASSERT (INT16_MIN <= diff && diff <= INT16_MAX); + *loc.us = diff; + if ((loc.uc + 1) == _jit->pc.uc && (reloc.flags & JIT_RELOC_CAN_SHORTEN)) + jit_try_shorten (_jit, reloc); + break; + case JIT_RELOC_REL32: + diff += (int32_t)*loc.ui; + ASSERT (INT32_MIN <= diff && diff <= INT32_MAX); + *loc.ui = diff; + if ((loc.ui + 1) == _jit->pc.ui && (reloc.flags & JIT_RELOC_CAN_SHORTEN)) + jit_try_shorten (_jit, reloc); + break; + case JIT_RELOC_REL64: + *loc.ul = diff + (int64_t)*loc.ul; + if ((loc.ul + 1) == _jit->pc.ul && (reloc.flags & JIT_RELOC_CAN_SHORTEN)) + jit_try_shorten (_jit, reloc); + break; + default: + abort (); + } } #if defined(__i386__) || defined(__x86_64__) -# include "x86.c" +# include "x86.c" #elif defined(__mips__) -# include "mips.c" +# include "mips.c" #elif defined(__arm__) -# include "arm.c" +# include "arm.c" #elif defined(__ppc__) || defined(__powerpc__) -# include "ppc.c" +# include "ppc.c" #elif defined(__sparc__) -# include "sparc.c" +# include "sparc.c" #elif defined(__ia64__) -# include "ia64.c" +# include "ia64.c" #elif defined(__hppa__) -# include "hppa.c" +# include "hppa.c" #elif defined(__aarch64__) -# include "aarch64.c" +# include "aarch64.c" #elif defined(__s390__) || defined(__s390x__) -# include "s390.c" +# include "s390.c" #elif defined(__alpha__) -# include "alpha.c" +# include "alpha.c" #endif -#define JIT_CALL_0(stem) _jit_##stem (_jit) -#define JIT_CALL_1(stem) _jit_##stem (_jit, a) -#define JIT_CALL_2(stem) _jit_##stem (_jit, a, b) -#define JIT_CALL_3(stem) _jit_##stem (_jit, a, b, c) -#define JIT_CALL_4(stem) _jit_##stem (_jit, a, b, c, d) - -#define JIT_TAIL_CALL_RFF__(stem) return JIT_CALL_2(stem) -#define JIT_TAIL_CALL_RGG__(stem) return JIT_CALL_2(stem) -#define JIT_TAIL_CALL_RG___(stem) return JIT_CALL_1(stem) -#define JIT_TAIL_CALL_RGi__(stem) return JIT_CALL_2(stem) -#define JIT_TAIL_CALL_RGu__(stem) return JIT_CALL_2(stem) -#define JIT_TAIL_CALL_R____(stem) return JIT_CALL_0(stem) -#define JIT_TAIL_CALL__FFF_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__FF__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__FGG_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__FG__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__FGo_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__F___(stem) JIT_CALL_1(stem) -#define JIT_TAIL_CALL__Fd__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__Ff__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__Fp__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__GF__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__GGF_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__GGGG(stem) JIT_CALL_4(stem) -#define JIT_TAIL_CALL__GGG_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__GGGi(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__GGGu(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__GG__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__GGi_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__GGo_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__GGu_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__G___(stem) JIT_CALL_1(stem) -#define JIT_TAIL_CALL__Gi__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__Gp__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL______(stem) JIT_CALL_0(stem) -#define JIT_TAIL_CALL__i___(stem) JIT_CALL_1(stem) -#define JIT_TAIL_CALL__oGF_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__oGG_(stem) JIT_CALL_3(stem) -#define JIT_TAIL_CALL__pF__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__pG__(stem) JIT_CALL_2(stem) -#define JIT_TAIL_CALL__p___(stem) JIT_CALL_1(stem) - -#define DEFINE_INSTRUCTION(kind, stem) \ - JIT_PROTO_##kind(stem) \ - { \ - JIT_TAIL_CALL_##kind(stem); \ +#define JIT_IMPL_0(stem, ret) \ + ret jit_##stem (jit_state_t* _jit) \ + { \ + return stem(_jit); \ } -FOR_EACH_INSTRUCTION(DEFINE_INSTRUCTION) -#undef DEFINE_INSTRUCTION +#define JIT_IMPL_1(stem, ret, ta) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \ + { \ + return stem(_jit, unwrap_##ta(a)); \ + } +#define JIT_IMPL_2(stem, ret, ta, tb) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \ + { \ + return stem(_jit, unwrap_##ta(a), unwrap_##tb(b)); \ + } +#define JIT_IMPL_3(stem, ret, ta, tb, tc) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) \ + { \ + return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \ + } +#define JIT_IMPL_4(stem, ret, ta, tb, tc, td) \ + ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) \ + { \ + return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), unwrap_##td(d)); \ + } + +#define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr) +#define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr) +#define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr) +#define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm) +#define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm) +#define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t) +#define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr) +#define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr) +#define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr) +#define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr) +#define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off) +#define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr) +#define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64) +#define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32) +#define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer) +#define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr) +#define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr) +#define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr) +#define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr) +#define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm) +#define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm) +#define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr) +#define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm) +#define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off) +#define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm) +#define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr) +#define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm) +#define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer) +#define JIT_IMPL______(stem) JIT_IMPL_0(stem, void) +#define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm) +#define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr) +#define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr) +#define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr) +#define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr) +#define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer) + +#define unwrap_gpr(r) rn(r) +#define unwrap_fpr(r) rn(r) +#define unwrap_imm(i) i +#define unwrap_uimm(u) u +#define unwrap_off(o) o +#define unwrap_pointer(p) ((uintptr_t) p) +#define unwrap_float32(f) f +#define unwrap_float64(d) d + +#define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem) +FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION) +#undef IMPL_INSTRUCTION diff --git a/jit/mips-cpu.c b/jit/mips-cpu.c index c8c6ce3ab..7ab58b873 100644 --- a/jit/mips-cpu.c +++ b/jit/mips-cpu.c @@ -581,21 +581,21 @@ static void _stxr_l(jit_state_t*,int32_t,int32_t,int32_t); static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t); # endif # if __BYTE_ORDER == __LITTLE_ENDIAN -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,int32_t,int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,int32_t,int32_t); # if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,int32_t,int32_t); +# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1) +static void _bswapr_ul(jit_state_t*,int32_t,int32_t); # endif # else -# define htonr_us(r0,r1) extr_us(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) # if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) movr(r0,r1) # else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr_ul(r0,r1) movr(r0,r1) # endif # endif # define extr_c(r0,r1) _extr_c(_jit,r0,r1) @@ -1676,7 +1676,7 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) # if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -1689,7 +1689,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; int32_t t1; @@ -1716,13 +1716,13 @@ _htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t reg; reg = jit_get_reg(jit_class_gpr); rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); + bswapr_ui(r0, r1); + bswapr, 2019_ui(rn(reg), rn(reg)); lshi(r0, r0, 32); orr(r0, r0, rn(reg)); jit_unget_reg(reg); diff --git a/jit/ppc-cpu.c b/jit/ppc-cpu.c index e8c4ce335..6f911dd1f 100644 --- a/jit/ppc-cpu.c +++ b/jit/ppc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -510,21 +510,21 @@ static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t); # define extr_ui(r0,r1) CLRLDI(r0,r1,32) # endif # if __BYTE_ORDER == __BIG_ENDIAN -# define htonr_us(r0,r1) extr_us(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) # if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) movr(r0,r1) # else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr_ul(r0,r1) movr(r0,r1) # endif # else -# define htonr_us(r0,r1) _htonr_us(_jit,r0,r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0,r1) _htonr_ui(_jit,r0,r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) +static void _bswapr_us(jit_state_t*,int32_t,int32_t); +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) +static void _bswapr_ui(jit_state_t*,int32_t,int32_t); # if __WORDSIZE == 64 -# define htonr_ul(r0,r1) _htonr_ul(_jit,r0,r1) -static void _htonr_ul(jit_state_t*,int32_t,int32_t); +# define bswapr_ul(r0,r1) _bswapr_ul(_jit,r0,r1) +static void _bswapr_ul(jit_state_t*,int32_t,int32_t); # endif # endif # define addr(r0,r1,r2) ADD(r0,r1,r2) @@ -1121,7 +1121,7 @@ _movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0) # if __BYTE_ORDER == __LITTLE_ENDIAN static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t t0; t0 = jit_get_reg(jit_class_gpr); @@ -1134,7 +1134,7 @@ _htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) } static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t reg; reg = jit_get_reg(jit_class_gpr); @@ -1147,13 +1147,13 @@ _htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) # if __WORDSIZE == 64 static void -_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +_bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) { int32_t reg; reg = jit_get_reg(jit_class_gpr); rshi_u(rn(reg), r1, 32); - htonr_ui(r0, r1); - htonr_ui(rn(reg), rn(reg)); + bswapr_ui(r0, r1); + bswapr_ui(rn(reg), rn(reg)); lshi(r0, r0, 32); orr(r0, r0, rn(reg)); jit_unget_reg(reg); diff --git a/jit/s390-cpu.c b/jit/s390-cpu.c index b8b9df6af..02f2675ec 100644 --- a/jit/s390-cpu.c +++ b/jit/s390-cpu.c @@ -1079,12 +1079,12 @@ static void _ori(jit_state_t*,int32_t,int32_t,jit_word_t); static void _xorr(jit_state_t*,int32_t,int32_t,int32_t); # define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) static void _xori(jit_state_t*,int32_t,int32_t,jit_word_t); -# define htonr_us(r0,r1) extr_us(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) # if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) movr(r0,r1) # else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr, 2019_ul(r0,r1) movr(r0,r1) # endif # define extr_c(r0,r1) LGBR(r0,r1) # define extr_uc(r0,r1) LLGCR(r0,r1) diff --git a/jit/sparc-cpu.c b/jit/sparc-cpu.c index 21e78ada2..81f92ce23 100644 --- a/jit/sparc-cpu.c +++ b/jit/sparc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2017 Free Software Foundation, Inc. + * Copyright (C) 2013-2017, 2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -669,7 +669,7 @@ static void _xori(jit_state_t*, int32_t, int32_t, jit_word_t); # define rshr_u(r0, r1, r2) SRLX(r1, r2, r0) # define rshi_u(r0, r1, i0) SRLXI(r1, i0, r0) # endif -# define htonr_us(r0,r1) extr_us(r0,r1) +# define bswapr_us(r0,r1) extr_us(r0,r1) # define extr_c(r0,r1) _extr_c(_jit,r0,r1) static void _extr_c(jit_state_t*,int32_t,int32_t); # define extr_uc(r0,r1) andi(r0, r1, 0xff) @@ -678,10 +678,10 @@ static void _extr_s(jit_state_t*,int32_t,int32_t); # define extr_us(r0,r1) _extr_us(_jit,r0,r1) static void _extr_us(jit_state_t*,int32_t,int32_t); # if __WORDSIZE == 32 -# define htonr_ui(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) movr(r0,r1) # else -# define htonr_ui(r0,r1) extr_ui(r0,r1) -# define htonr_ul(r0,r1) movr(r0,r1) +# define bswapr_ui(r0,r1) extr_ui(r0,r1) +# define bswapr_ul(r0,r1) movr(r0,r1) # define extr_i(r0,r1) _extr_i(_jit,r0,r1) static void _extr_i(jit_state_t*,int32_t,int32_t); # define extr_ui(r0,r1) _extr_ui(_jit,r0,r1) diff --git a/jit/x86-cpu.c b/jit/x86-cpu.c index 86e7686e0..0bc73b297 100644 --- a/jit/x86-cpu.c +++ b/jit/x86-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * Copyright (C) 2012-2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -14,3493 +14,2739 @@ * License for more details. * * Authors: - * Paulo Cesar Pereira de Andrade + * Paulo Cesar Pereira de Andrade */ /* avoid using it due to partial stalls */ -#define USE_INC_DEC 0 +#define USE_INC_DEC 0 -#if PROTO -# if __X32 || __X64_32 -# define WIDE 0 -# define ldi(u, v) ldi_i(u, v) -# define ldr(u, v) ldr_i(u, v) -# define ldxr(u, v, w) ldxr_i(u, v, w) -# define ldxi(u, v, w) ldxi_i(u, v, w) -# define sti(u, v) sti_i(u, v) -# define stxi(u, v, w) stxi_i(u, v, w) -# define can_sign_extend_int_p(im) 1 -# define can_zero_extend_int_p(im) 1 -# define fits_uint32_p(im) 1 -# else -# define WIDE 1 -# define ldi(u, v) ldi_l(u, v) -# define ldr(u, v) ldr_l(u, v) -# define ldxr(u, v, w) ldxr_l(u, v, w) -# define ldxi(u, v, w) ldxi_l(u, v, w) -# define sti(u, v) sti_l(u, v) -# define stxi(u, v, w) stxi_l(u, v, w) -# define can_sign_extend_int_p(im) \ - (((im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \ - ((im) < 0 && (long long)(im) > -0x80000000LL)) -# define can_zero_extend_int_p(im) \ - ((im) >= 0 && (im) < 0x80000000LL) -# define fits_uint32_p(im) (((im) & 0xffffffff00000000LL) == 0) -# endif -# if __X32 || __CYGWIN__ || __X64_32 -# define reg8_p(rn) \ - ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO) -# else -# define reg8_p(rn) 1 -# endif -# define _RAX_REGNO 0 -# define _RCX_REGNO 1 -# define _RDX_REGNO 2 -# define _RBX_REGNO 3 -# define _RSP_REGNO 4 -# define _RBP_REGNO 5 -# define _RSI_REGNO 6 -# define _RDI_REGNO 7 -# define _R8_REGNO 8 -# define _R9_REGNO 9 -# define _R10_REGNO 10 -# define _R11_REGNO 11 -# define _R12_REGNO 12 -# define _R13_REGNO 13 -# define _R14_REGNO 14 -# define _R15_REGNO 15 -# define r7(reg) ((reg) & 7) -# define r8(reg) ((reg) & 15) -# define _SCL1 0x00 -# define _SCL2 0x01 -# define _SCL4 0x02 -# define _SCL8 0x03 -# define X86_ADD 0 -# define X86_OR 1 << 3 -# define X86_ADC 2 << 3 -# define X86_SBB 3 << 3 -# define X86_AND 4 << 3 -# define X86_SUB 5 << 3 -# define X86_XOR 6 << 3 -# define X86_CMP 7 << 3 -# define X86_ROL 0 -# define X86_ROR 1 -# define X86_RCL 2 -# define X86_RCR 3 -# define X86_SHL 4 -# define X86_SHR 5 -# define X86_SAR 7 -# define X86_NOT 2 -# define X86_NEG 3 -# define X86_MUL 4 -# define X86_IMUL 5 -# define X86_DIV 6 -# define X86_IDIV 7 -# define X86_CC_O 0x0 -# define X86_CC_NO 0x1 -# define X86_CC_NAE 0x2 -# define X86_CC_B 0x2 -# define X86_CC_C 0x2 -# define X86_CC_AE 0x3 -# define X86_CC_NB 0x3 -# define X86_CC_NC 0x3 -# define X86_CC_E 0x4 -# define X86_CC_Z 0x4 -# define X86_CC_NE 0x5 -# define X86_CC_NZ 0x5 -# define X86_CC_BE 0x6 -# define X86_CC_NA 0x6 -# define X86_CC_A 0x7 -# define X86_CC_NBE 0x7 -# define X86_CC_S 0x8 -# define X86_CC_NS 0x9 -# define X86_CC_P 0xa -# define X86_CC_PE 0xa -# define X86_CC_NP 0xb -# define X86_CC_PO 0xb -# define X86_CC_L 0xc -# define X86_CC_NGE 0xc -# define X86_CC_GE 0xd -# define X86_CC_NL 0xd -# define X86_CC_LE 0xe -# define X86_CC_NG 0xe -# define X86_CC_G 0xf -# define X86_CC_NLE 0xf -# define mrm(md, r, m) *_jit->pc.uc++ = (md<<6) | (r<<3) | m -# define sib(sc, i, b) *_jit->pc.uc++ = (sc<<6) | (i<<3) | b -# define ic(c) *_jit->pc.uc++ = c -# define is(s) *_jit->pc.us++ = s -# define ii(i) *_jit->pc.ui++ = i -# if __X64 && !__X64_32 -# define il(l) *_jit->pc.ul++ = l -# else -# define il(l) ii(l) -# endif -# define patch_abs(instr, label) \ - *(jit_word_t *)(instr - sizeof(jit_word_t)) = label -# define patch_rel(instr, label) \ - *(int32_t *)(instr - 4) = label - instr -# define patch_rel_char(instr, label) \ - *(int8_t *)(instr - 1) = label - instr -# define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b) -static void -_rex(jit_state_t*,int32_t,int32_t,int32_t,int32_t,int32_t); -# define rx(rd, md, rb, ri, ms) _rx(_jit, rd, md, rb, ri, ms) -static void -_rx(jit_state_t*,int32_t,int32_t,int32_t,int32_t,int32_t); -# define nop(n) _nop(_jit, n) -static void _nop(jit_state_t*, int32_t); -# define emms() is(0x770f) -# define lea(md, rb, ri, ms, rd) _lea(_jit, md, rb, ri, ms, rd) -static void -_lea(jit_state_t*,int32_t,int32_t,int32_t,int32_t,int32_t); -# define pushr(r0) _pushr(_jit, r0) -static void _pushr(jit_state_t*, int32_t) maybe_unused; -# define popr(r0) _popr(_jit, r0) -static void _popr(jit_state_t*, int32_t) maybe_unused; -# define xchgr(r0, r1) _xchgr(_jit, r0, r1) -static void _xchgr(jit_state_t*, int32_t, int32_t); -# define testr(r0, r1) _testr(_jit, r0, r1) -static void _testr(jit_state_t*, int32_t, int32_t); -# define testi(r0, i0) _testi(_jit, r0, i0) -static void _testi(jit_state_t*, int32_t, jit_word_t); -# define cc(code, r0) _cc(_jit, code, r0) -static void _cc(jit_state_t*, int32_t, int32_t); -# define icmpr(r0, r1) alur(X86_CMP, r0, r1) -# define alur(code, r0, r1) _alur(_jit, code, r0, r1) -static void _alur(jit_state_t*, int32_t, int32_t, int32_t); -# define icmpi(r0, i0) alui(X86_CMP, r0, i0) -# define alui(code, r0, i0) _alui(_jit, code, r0, i0) -static void _alui(jit_state_t*, int32_t, int32_t, jit_word_t); -# define iaddr(r0, r1) alur(X86_ADD, r0, r1) -# define save(r0) _save(_jit, r0) -static void _save(jit_state_t*, int32_t); -# define load(r0) _load(_jit, r0) -static void _load(jit_state_t*, int32_t); -# define addr(r0, r1, r2) _addr(_jit, r0, r1, r2) -static void _addr(jit_state_t*, int32_t, int32_t, int32_t); -# define iaddi(r0, i0) alui(X86_ADD, r0, i0) -# define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) -static void _addi(jit_state_t*, int32_t, int32_t, jit_word_t); -#define addcr(r0, r1, r2) _addcr(_jit, r0, r1, r2) -static void _addcr(jit_state_t*, int32_t, int32_t, int32_t); -#define addci(r0, r1, i0) _addci(_jit, r0, r1, i0) -static void _addci(jit_state_t*, int32_t, int32_t, jit_word_t); -# define iaddxr(r0, r1) alur(X86_ADC, r0, r1) -# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2) -static void _addxr(jit_state_t*, int32_t, int32_t, int32_t); -# define iaddxi(r0, i0) alui(X86_ADC, r0, i0) -# define addxi(r0, r1, i0) _addxi(_jit, r0, r1, i0) -static void _addxi(jit_state_t*, int32_t, int32_t, jit_word_t); -# define isubr(r0, r1) alur(X86_SUB, r0, r1) -# define subr(r0, r1, r2) _subr(_jit, r0, r1, r2) -static void _subr(jit_state_t*, int32_t, int32_t, int32_t); -# define isubi(r0, i0) alui(X86_SUB, r0, i0) -# define subi(r0, r1, i0) _subi(_jit, r0, r1, i0) -static void _subi(jit_state_t*, int32_t, int32_t, jit_word_t); -# define subcr(r0, r1, r2) _subcr(_jit, r0, r1, r2) -static void _subcr(jit_state_t*,int32_t,int32_t,int32_t); -# define subci(r0, r1, i0) _subci(_jit, r0, r1, i0) -static void _subci(jit_state_t*,int32_t,int32_t,jit_word_t); -# define isubxr(r0, r1) alur(X86_SBB, r0, r1) -# define subxr(r0, r1, r2) _subxr(_jit, r0, r1, r2) -static void _subxr(jit_state_t*,int32_t,int32_t,int32_t); -# define isubxi(r0, i0) alui(X86_SBB, r0, i0) -# define subxi(r0, r1, i0) _subxi(_jit, r0, r1, i0) -static void _subxi(jit_state_t*,int32_t,int32_t,jit_word_t); -# define rsbi(r0, r1, i0) _rsbi(_jit, r0, r1, i0) -static void _rsbi(jit_state_t*,int32_t,int32_t,jit_word_t); -# define imulr(r0, r1) _imulr(_jit, r0, r1) -static void _imulr(jit_state_t*, int32_t, int32_t); -# define imuli(r0, r1, i0) _imuli(_jit, r0, r1, i0) -static void _imuli(jit_state_t*, int32_t, int32_t, jit_word_t); -# define mulr(r0, r1, r2) _mulr(_jit, r0, r1, r2) -static void _mulr(jit_state_t*, int32_t, int32_t, int32_t); -# define muli(r0, r1, i0) _muli(_jit, r0, r1, i0) -static void _muli(jit_state_t*, int32_t, int32_t, jit_word_t); -# define umulr(r0) unr(X86_IMUL, r0) -# define umulr_u(r0) unr(X86_MUL, r0) -# define qmulr(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 1) -# define qmulr_u(r0, r1, r2, r3) _iqmulr(_jit, r0, r1, r2, r3, 0) -# define iqmulr(r0, r1, r2, r3, sign) _iqmulr(_jit, r0, r1, r2, r3, sign) -static void _iqmulr(jit_state_t*, int32_t, int32_t, - int32_t,int32_t, jit_bool_t); -# define qmuli(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 1) -# define qmuli_u(r0, r1, r2, i0) _iqmuli(_jit, r0, r1, r2, i0, 0) -# define iqmuli(r0, r1, r2, i0, sign) _iqmuli(_jit, r0, r1, r2, i0, sign) -static void _iqmuli(jit_state_t*, int32_t, int32_t, - int32_t,jit_word_t, jit_bool_t); -# define sign_extend_rdx_rax() _sign_extend_rdx_rax(_jit) -static void _sign_extend_rdx_rax(jit_state_t*); -# define idivr(r0) unr(X86_IDIV, r0) -# define idivr_u(r0) unr(X86_DIV, r0) -# define divremr(r0, r1, r2, i0, i1) _divremr(_jit, r0, r1, r2, i0, i1) -static void -_divremr(jit_state_t*,int32_t,int32_t,int32_t, - jit_bool_t,jit_bool_t); -# define divremi(r0, r1, i0, i1, i2) _divremi(_jit, r0, r1, i0, i1, i2) -static void -_divremi(jit_state_t*,int32_t,int32_t,jit_word_t,jit_bool_t,jit_bool_t); -# define divr(r0, r1, r2) divremr(r0, r1, r2, 1, 1) -# define divi(r0, r1, i0) divremi(r0, r1, i0, 1, 1) -# define divr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 1) -# define divi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 1) -# define qdivr(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 1) -# define qdivr_u(r0, r1, r2, r3) _iqdivr(_jit, r0, r1, r2, r3, 0) -# define iqdivr(r0, r1, r2, r3, sign) _iqdivr(_jit, r0, r1, r2, r3, sign) -static void _iqdivr(jit_state_t*, int32_t, int32_t, - int32_t,int32_t, jit_bool_t); -# define qdivi(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 1) -# define qdivi_u(r0, r1, r2, i0) _iqdivi(_jit, r0, r1, r2, i0, 0) -# define iqdivi(r0, r1, r2, i0, sign) _iqdivi(_jit, r0, r1, r2, i0, sign) -static void _iqdivi(jit_state_t*, int32_t, int32_t, - int32_t,jit_word_t, jit_bool_t); -# define remr(r0, r1, r2) divremr(r0, r1, r2, 1, 0) -# define remi(r0, r1, i0) divremi(r0, r1, i0, 1, 0) -# define remr_u(r0, r1, r2) divremr(r0, r1, r2, 0, 0) -# define remi_u(r0, r1, i0) divremi(r0, r1, i0, 0, 0) -# define iandr(r0, r1) alur(X86_AND, r0, r1) -# define andr(r0, r1, r2) _andr(_jit, r0, r1, r2) -static void _andr(jit_state_t*,int32_t,int32_t,int32_t); -# define iandi(r0, i0) alui(X86_AND, r0, i0) -# define andi(r0, r1, i0) _andi(_jit, r0, r1, i0) -static void _andi(jit_state_t*, int32_t,int32_t,jit_word_t); -# define iorr(r0, r1) alur(X86_OR, r0, r1) -# define orr(r0, r1, r2) _orr(_jit, r0, r1, r2) -static void _orr(jit_state_t*, int32_t,int32_t,int32_t); -# define iori(r0, i0) alui(X86_OR, r0, i0) -# define ori(r0, r1, i0) _ori(_jit, r0, r1, i0) -static void _ori(jit_state_t*, int32_t,int32_t,jit_word_t); -# define ixorr(r0, r1) alur(X86_XOR, r0, r1) -# define xorr(r0, r1, r2) _xorr(_jit, r0, r1, r2) -static void _xorr(jit_state_t*, int32_t,int32_t,int32_t); -# define ixori(r0, i0) alui(X86_XOR, r0, i0) -# define xori(r0, r1, i0) _xori(_jit, r0, r1, i0) -static void _xori(jit_state_t*, int32_t,int32_t,jit_word_t); -# define irotshr(code, r0) _irotshr(_jit, code, r0) -static void _irotshr(jit_state_t*, int32_t, int32_t); -# define rotshr(code, r0, r1, r2) _rotshr(_jit, code, r0, r1, r2) -static void -_rotshr(jit_state_t*,int32_t,int32_t,int32_t,int32_t); -# define irotshi(code, r0, i0) _irotshi(_jit, code, r0, i0) -static void _irotshi(jit_state_t*, int32_t, int32_t, jit_word_t); -# define rotshi(code, r0, r1, i0) _rotshi(_jit, code, r0, r1, i0) -static void -_rotshi(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t); -# define lshr(r0, r1, r2) rotshr(X86_SHL, r0, r1, r2) -# define lshi(r0, r1, i0) _lshi(_jit, r0, r1, i0) -static void _lshi(jit_state_t*, int32_t, int32_t, jit_word_t); -# define rshr(r0, r1, r2) rotshr(X86_SAR, r0, r1, r2) -# define rshi(r0, r1, i0) rotshi(X86_SAR, r0, r1, i0) -# define rshr_u(r0, r1, r2) rotshr(X86_SHR, r0, r1, r2) -# define rshi_u(r0, r1, i0) rotshi(X86_SHR, r0, r1, i0) -# define unr(code, r0) _unr(_jit, code, r0) -static void _unr(jit_state_t*, int32_t, int32_t); -# define inegr(r0) unr(X86_NEG, r0) -# define negr(r0, r1) _negr(_jit, r0, r1) -static void _negr(jit_state_t*, int32_t, int32_t); -# define icomr(r0) unr(X86_NOT, r0) -# define comr(r0, r1) _comr(_jit, r0, r1) -static void _comr(jit_state_t*, int32_t, int32_t); -# if USE_INC_DEC -# define incr(r0, r1) _incr(_jit, r0, r1) -static void _incr(jit_state_t*, int32_t, int32_t); -# define decr(r0, r1) _decr(_jit, r0, r1) -static void _decr(jit_state_t*, int32_t, int32_t); -# endif -# define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2) -static void -_cr(jit_state_t*, int32_t, int32_t, int32_t, int32_t); -# define ci(code, r0, r1, i0) _ci(_jit, code, r0, r1, i0) -static void -_ci(jit_state_t *_jit, int32_t, int32_t, int32_t, jit_word_t); -# define ci0(code, r0, r1) _ci0(_jit, code, r0, r1) -static void _ci0(jit_state_t*, int32_t, int32_t, int32_t); -# define ltr(r0, r1, r2) _ltr(_jit, r0, r1, r2) -static void _ltr(jit_state_t*, int32_t, int32_t, int32_t); -# define lti(r0, r1, i0) _lti(_jit, r0, r1, i0) -static void _lti(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ltr_u(r0, r1, r2) _ltr_u(_jit, r0, r1, r2) -static void _ltr_u(jit_state_t*, int32_t, int32_t, int32_t); -# define lti_u(r0, r1, i0) ci(X86_CC_B, r0, r1, i0) -# define ler(r0, r1, r2) _ler(_jit, r0, r1, r2) -static void _ler(jit_state_t*, int32_t, int32_t, int32_t); -# define lei(r0, r1, i0) ci(X86_CC_LE, r0, r1, i0) -# define ler_u(r0, r1, r2) _ler_u(_jit, r0, r1, r2) -static void _ler_u(jit_state_t*, int32_t, int32_t, int32_t); -# define lei_u(r0, r1, i0) _lei_u(_jit, r0, r1, i0) -static void _lei_u(jit_state_t*, int32_t, int32_t, jit_word_t); -# define eqr(r0, r1, r2) _eqr(_jit, r0, r1, r2) -static void _eqr(jit_state_t*, int32_t, int32_t, int32_t); -# define eqi(r0, r1, i0) _eqi(_jit, r0, r1, i0) -static void _eqi(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ger(r0, r1, r2) _ger(_jit, r0, r1, r2) -static void _ger(jit_state_t*, int32_t, int32_t, int32_t); -# define gei(r0, r1, i0) _gei(_jit, r0, r1, i0) -static void _gei(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ger_u(r0, r1, r2) _ger_u(_jit, r0, r1, r2) -static void _ger_u(jit_state_t*, int32_t, int32_t, int32_t); -# define gei_u(r0, r1, i0) _gei_u(_jit, r0, r1, i0) -static void _gei_u(jit_state_t*, int32_t, int32_t, jit_word_t); -# define gtr(r0, r1, r2) _gtr(_jit, r0, r1, r2) -static void _gtr(jit_state_t*, int32_t, int32_t, int32_t); -# define gti(r0, r1, i0) _ci(_jit, X86_CC_G, r0, r1, i0) -# define gtr_u(r0, r1, r2) _gtr_u(_jit, r0, r1, r2) -static void _gtr_u(jit_state_t*, int32_t, int32_t, int32_t); -# define gti_u(r0, r1, i0) _gti_u(_jit, r0, r1, i0) -static void _gti_u(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ner(r0, r1, r2) _ner(_jit, r0, r1, r2) -static void _ner(jit_state_t*, int32_t, int32_t, int32_t); -# define nei(r0, r1, i0) _nei(_jit, r0, r1, i0) -static void _nei(jit_state_t*, int32_t, int32_t, jit_word_t); -# define movr(r0, r1) _movr(_jit, r0, r1) -static void _movr(jit_state_t*, int32_t, int32_t); -# define imovi(r0, i0) _imovi(_jit, r0, i0) -static void _imovi(jit_state_t*, int32_t, jit_word_t); -# define movi(r0, i0) _movi(_jit, r0, i0) -static void _movi(jit_state_t*, int32_t, jit_word_t); -# define movi_p(r0, i0) _movi_p(_jit, r0, i0) -static jit_word_t _movi_p(jit_state_t*, int32_t, jit_word_t); -# define movcr(r0, r1) _movcr(_jit, r0, r1) -static void _movcr(jit_state_t*,int32_t,int32_t); -# define movcr_u(r0, r1) _movcr_u(_jit, r0, r1) -static void _movcr_u(jit_state_t*,int32_t,int32_t); -# define movsr(r0, r1) _movsr(_jit, r0, r1) -static void _movsr(jit_state_t*,int32_t,int32_t); -# define movsr_u(r0, r1) _movsr_u(_jit, r0, r1) -static void _movsr_u(jit_state_t*,int32_t,int32_t); -# if __X64 && !__X64_32 -# define movir(r0, r1) _movir(_jit, r0, r1) -static void _movir(jit_state_t*,int32_t,int32_t); -# define movir_u(r0, r1) _movir_u(_jit, r0, r1) -static void _movir_u(jit_state_t*,int32_t,int32_t); -# endif -# define htonr_us(r0, r1) _htonr_us(_jit, r0, r1) -static void _htonr_us(jit_state_t*,int32_t,int32_t); -# define htonr_ui(r0, r1) _htonr_ui(_jit, r0, r1) -static void _htonr_ui(jit_state_t*,int32_t,int32_t); -# if __X64 && !__X64_32 -#define htonr_ul(r0, r1) _htonr_ul(_jit, r0, r1) -static void _htonr_ul(jit_state_t*,int32_t,int32_t); -#endif -# define extr_c(r0, r1) _extr_c(_jit, r0, r1) -static void _extr_c(jit_state_t*,int32_t,int32_t); -# define extr_uc(r0, r1) _extr_uc(_jit, r0, r1) -static void _extr_uc(jit_state_t*,int32_t,int32_t); -# define extr_s(r0, r1) movsr(r0, r1) -# define extr_us(r0, r1) movsr_u(r0, r1) -# if __X64 && !__X64_32 -# define extr_i(r0, r1) movir(r0, r1) -# define extr_ui(r0, r1) movir_u(r0, r1) -# endif -# define ldr_c(r0, r1) _ldr_c(_jit, r0, r1) -static void _ldr_c(jit_state_t*, int32_t, int32_t); -# define ldi_c(r0, i0) _ldi_c(_jit, r0, i0) -static void _ldi_c(jit_state_t*, int32_t, jit_word_t); -# define ldr_uc(r0, r1) _ldr_uc(_jit, r0, r1) -static void _ldr_uc(jit_state_t*, int32_t, int32_t); -# define ldi_uc(r0, i0) _ldi_uc(_jit, r0, i0) -static void _ldi_uc(jit_state_t*, int32_t, jit_word_t); -# define ldr_s(r0, r1) _ldr_s(_jit, r0, r1) -static void _ldr_s(jit_state_t*, int32_t, int32_t); -# define ldi_s(r0, i0) _ldi_s(_jit, r0, i0) -static void _ldi_s(jit_state_t*, int32_t, jit_word_t); -# define ldr_us(r0, r1) _ldr_us(_jit, r0, r1) -static void _ldr_us(jit_state_t*, int32_t, int32_t); -# define ldi_us(r0, i0) _ldi_us(_jit, r0, i0) -static void _ldi_us(jit_state_t*, int32_t, jit_word_t); -# if __X32 || !__X64_32 -# define ldr_i(r0, r1) _ldr_i(_jit, r0, r1) -static void _ldr_i(jit_state_t*, int32_t, int32_t); -# define ldi_i(r0, i0) _ldi_i(_jit, r0, i0) -static void _ldi_i(jit_state_t*, int32_t, jit_word_t); -# endif -# if __X64 -# if __X64_32 -# define ldr_i(r0, r1) _ldr_ui(_jit, r0, r1) -# define ldi_i(r0, i0) _ldi_ui(_jit, r0, i0) -# else -# define ldr_ui(r0, r1) _ldr_ui(_jit, r0, r1) -# define ldi_ui(r0, i0) _ldi_ui(_jit, r0, i0) -# endif -static void _ldr_ui(jit_state_t*, int32_t, int32_t); -static void _ldi_ui(jit_state_t*, int32_t, jit_word_t); -# if !__X64_32 -# define ldr_l(r0, r1) _ldr_l(_jit, r0, r1) -static void _ldr_l(jit_state_t*, int32_t, int32_t); -# define ldi_l(r0, i0) _ldi_l(_jit, r0, i0) -static void _ldi_l(jit_state_t*, int32_t, jit_word_t); -# endif -# endif -# define ldxr_c(r0, r1, r2) _ldxr_c(_jit, r0, r1, r2) -static void _ldxr_c(jit_state_t*, int32_t, int32_t, int32_t); -# define ldxi_c(r0, r1, i0) _ldxi_c(_jit, r0, r1, i0) -static void _ldxi_c(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ldxr_uc(r0, r1, r2) _ldxr_uc(_jit, r0, r1, r2) -static void _ldxr_uc(jit_state_t*, int32_t, int32_t, int32_t); -# define ldxi_uc(r0, r1, i0) _ldxi_uc(_jit, r0, r1, i0) -static void _ldxi_uc(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ldxr_s(r0, r1, r2) _ldxr_s(_jit, r0, r1, r2) -static void _ldxr_s(jit_state_t*, int32_t, int32_t, int32_t); -# define ldxi_s(r0, r1, i0) _ldxi_s(_jit, r0, r1, i0) -static void _ldxi_s(jit_state_t*, int32_t, int32_t, jit_word_t); -# define ldxr_us(r0, r1, r2) _ldxr_us(_jit, r0, r1, r2) -static void _ldxr_us(jit_state_t*, int32_t, int32_t, int32_t); -# define ldxi_us(r0, r1, i0) _ldxi_us(_jit, r0, r1, i0) -static void _ldxi_us(jit_state_t*, int32_t, int32_t, jit_word_t); -# if __X32 || !__X64_32 -# define ldxr_i(r0, r1, r2) _ldxr_i(_jit, r0, r1, r2) -static void _ldxr_i(jit_state_t*, int32_t, int32_t, int32_t); -# define ldxi_i(r0, r1, i0) _ldxi_i(_jit, r0, r1, i0) -static void _ldxi_i(jit_state_t*, int32_t, int32_t, jit_word_t); -# endif -# if __X64 -# if __X64_32 -# define ldxr_i(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2) -# define ldxi_i(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0) -# else -# define ldxr_ui(r0, r1, r2) _ldxr_ui(_jit, r0, r1, r2) -# define ldxi_ui(r0, r1, i0) _ldxi_ui(_jit, r0, r1, i0) -# endif -static void _ldxr_ui(jit_state_t*, int32_t, int32_t, int32_t); -static void _ldxi_ui(jit_state_t*, int32_t, int32_t, jit_word_t); -# if !__X64_32 -# define ldxr_l(r0, r1, r2) _ldxr_l(_jit, r0, r1, r2) -static void _ldxr_l(jit_state_t*, int32_t, int32_t, int32_t); -# define ldxi_l(r0, r1, i0) _ldxi_l(_jit, r0, r1, i0) -static void _ldxi_l(jit_state_t*, int32_t, int32_t, jit_word_t); -# endif -# endif -# define str_c(r0, r1) _str_c(_jit, r0, r1) -static void _str_c(jit_state_t*, int32_t, int32_t); -# define sti_c(i0, r0) _sti_c(_jit, i0, r0) -static void _sti_c(jit_state_t*, jit_word_t, int32_t); -# define str_s(r0, r1) _str_s(_jit, r0, r1) -static void _str_s(jit_state_t*, int32_t, int32_t); -# define sti_s(i0, r0) _sti_s(_jit, i0, r0) -static void _sti_s(jit_state_t*, jit_word_t, int32_t); -# define str_i(r0, r1) _str_i(_jit, r0, r1) -static void _str_i(jit_state_t*, int32_t, int32_t); -# define sti_i(i0, r0) _sti_i(_jit, i0, r0) -static void _sti_i(jit_state_t*, jit_word_t, int32_t); -# if __X64 && !__X64_32 -# define str_l(r0, r1) _str_l(_jit, r0, r1) -static void _str_l(jit_state_t*, int32_t, int32_t); -# define sti_l(i0, r0) _sti_l(_jit, i0, r0) -static void _sti_l(jit_state_t*, jit_word_t, int32_t); -# endif -# define stxr_c(r0, r1, r2) _stxr_c(_jit, r0, r1, r2) -static void _stxr_c(jit_state_t*, int32_t, int32_t, int32_t); -# define stxi_c(i0, r0, r1) _stxi_c(_jit, i0, r0, r1) -static void _stxi_c(jit_state_t*, jit_word_t, int32_t, int32_t); -# define stxr_s(r0, r1, r2) _stxr_s(_jit, r0, r1, r2) -static void _stxr_s(jit_state_t*, int32_t, int32_t, int32_t); -# define stxi_s(i0, r0, r1) _stxi_s(_jit, i0, r0, r1) -static void _stxi_s(jit_state_t*, jit_word_t, int32_t, int32_t); -# define stxr_i(r0, r1, r2) _stxr_i(_jit, r0, r1, r2) -static void _stxr_i(jit_state_t*, int32_t, int32_t, int32_t); -# define stxi_i(i0, r0, r1) _stxi_i(_jit, i0, r0, r1) -static void _stxi_i(jit_state_t*, jit_word_t, int32_t, int32_t); -# if __X64 && !__X64_32 -# define stxr_l(r0, r1, r2) _stxr_l(_jit, r0, r1, r2) -static void _stxr_l(jit_state_t*, int32_t, int32_t, int32_t); -# define stxi_l(i0, r0, r1) _stxi_l(_jit, i0, r0, r1) -static void _stxi_l(jit_state_t*, jit_word_t, int32_t, int32_t); -# endif -# define jcc(code, i0) _jcc(_jit, code, i0) -# define jo(i0) jcc(X86_CC_O, i0) -# define jno(i0) jcc(X86_CC_NO, i0) -# define jnae(i0) jcc(X86_CC_NAE, i0) -# define jb(i0) jcc(X86_CC_B, i0) -# define jc(i0) jcc(X86_CC_C, i0) -# define jae(i0) jcc(X86_CC_AE, i0) -# define jnb(i0) jcc(X86_CC_NB, i0) -# define jnc(i0) jcc(X86_CC_NC, i0) -# define je(i0) jcc(X86_CC_E, i0) -# define jz(i0) jcc(X86_CC_Z, i0) -# define jne(i0) jcc(X86_CC_NE, i0) -# define jnz(i0) jcc(X86_CC_NZ, i0) -# define jbe(i0) jcc(X86_CC_BE, i0) -# define jna(i0) jcc(X86_CC_NA, i0) -# define ja(i0) jcc(X86_CC_A, i0) -# define jnbe(i0) jcc(X86_CC_NBE, i0) -# define js(i0) jcc(X86_CC_S, i0) -# define jns(i0) jcc(X86_CC_NS, i0) -# define jp(i0) jcc(X86_CC_P, i0) -# define jpe(i0) jcc(X86_CC_PE, i0) -# define jnp(i0) jcc(X86_CC_NP, i0) -# define jpo(i0) jcc(X86_CC_PO, i0) -# define jl(i0) jcc(X86_CC_L, i0) -# define jnge(i0) jcc(X86_CC_NGE, i0) -# define jge(i0) jcc(X86_CC_GE, i0) -# define jnl(i0) jcc(X86_CC_NL, i0) -# define jle(i0) jcc(X86_CC_LE, i0) -# define jng(i0) jcc(X86_CC_NG, i0) -# define jg(i0) jcc(X86_CC_G, i0) -# define jnle(i0) jcc(X86_CC_NLE, i0) -static void _jcc(jit_state_t*, int32_t, jit_word_t); -# define jccs(code, i0) _jccs(_jit, code, i0) -# define jos(i0) jccs(X86_CC_O, i0) -# define jnos(i0) jccs(X86_CC_NO, i0) -# define jnaes(i0) jccs(X86_CC_NAE, i0) -# define jbs(i0) jccs(X86_CC_B, i0) -# define jcs(i0) jccs(X86_CC_C, i0) -# define jaes(i0) jccs(X86_CC_AE, i0) -# define jnbs(i0) jccs(X86_CC_NB, i0) -# define jncs(i0) jccs(X86_CC_NC, i0) -# define jes(i0) jccs(X86_CC_E, i0) -# define jzs(i0) jccs(X86_CC_Z, i0) -# define jnes(i0) jccs(X86_CC_NE, i0) -# define jnzs(i0) jccs(X86_CC_NZ, i0) -# define jbes(i0) jccs(X86_CC_BE, i0) -# define jnas(i0) jccs(X86_CC_NA, i0) -# define jas(i0) jccs(X86_CC_A, i0) -# define jnbes(i0) jccs(X86_CC_NBE, i0) -# define jss(i0) jccs(X86_CC_S, i0) -# define jnss(i0) jccs(X86_CC_NS, i0) -# define jps(i0) jccs(X86_CC_P, i0) -# define jpes(i0) jccs(X86_CC_PE, i0) -# define jnps(i0) jccs(X86_CC_NP, i0) -# define jpos(i0) jccs(X86_CC_PO, i0) -# define jls(i0) jccs(X86_CC_L, i0) -# define jnges(i0) jccs(X86_CC_NGE, i0) -# define jges(i0) jccs(X86_CC_GE, i0) -# define jnls(i0) jccs(X86_CC_NL, i0) -# define jles(i0) jccs(X86_CC_LE, i0) -# define jngs(i0) jccs(X86_CC_NG, i0) -# define jgs(i0) jccs(X86_CC_G, i0) -# define jnles(i0) jccs(X86_CC_NLE, i0) -static void _jccs(jit_state_t*, int32_t, jit_word_t); -# define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1) -static void _jcr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t); -# define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1) -static void _jci(jit_state_t*,int32_t,jit_word_t,int32_t,jit_word_t); -# define jci0(code, i0, r0) _jci0(_jit, code, i0, r0) -static void _jci0(jit_state_t*, int32_t, jit_word_t, int32_t); -# define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1) -static jit_word_t _bltr(jit_state_t*, jit_word_t, int32_t, int32_t); -# define blti(i0, r0, i1) _blti(_jit, i0, r0, i1) -static jit_word_t _blti(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bltr_u(i0, r0, r1) _bltr_u(_jit, i0, r0, r1) -static jit_word_t _bltr_u(jit_state_t*, jit_word_t, int32_t, int32_t); -# define blti_u(i0, r0, i1) _blti_u(_jit, i0, r0, i1) -static jit_word_t _blti_u(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bler(i0, r0, r1) _bler(_jit, i0, r0, r1) -static jit_word_t _bler(jit_state_t*, jit_word_t, int32_t, int32_t); -# define blei(i0, r0, i1) _blei(_jit, i0, r0, i1) -static jit_word_t _blei(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bler_u(i0, r0, r1) _bler_u(_jit, i0, r0, r1) -static jit_word_t _bler_u(jit_state_t*, jit_word_t, int32_t, int32_t); -# define blei_u(i0, r0, i1) _blei_u(_jit, i0, r0, i1) -static jit_word_t _blei_u(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define beqr(i0, r0, r1) _beqr(_jit, i0, r0, r1) -static jit_word_t _beqr(jit_state_t*, jit_word_t, int32_t, int32_t); -# define beqi(i0, r0, i1) _beqi(_jit, i0, r0, i1) -static jit_word_t _beqi(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bger(i0, r0, r1) _bger(_jit, i0, r0, r1) -static jit_word_t _bger(jit_state_t*, jit_word_t, int32_t, int32_t); -# define bgei(i0, r0, i1) _bgei(_jit, i0, r0, i1) -static jit_word_t _bgei(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bger_u(i0, r0, r1) _bger_u(_jit, i0, r0, r1) -static jit_word_t _bger_u(jit_state_t*, jit_word_t, int32_t, int32_t); -# define bgei_u(i0, r0, i1) _bgei_u(_jit, i0, r0, i1) -static jit_word_t _bgei_u(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bgtr(i0, r0, r1) _bgtr(_jit, i0, r0, r1) -static jit_word_t _bgtr(jit_state_t*, jit_word_t, int32_t, int32_t); -# define bgti(i0, r0, i1) _bgti(_jit, i0, r0, i1) -static jit_word_t _bgti(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bgtr_u(i0, r0, r1) _bgtr_u(_jit, i0, r0, r1) -static jit_word_t _bgtr_u(jit_state_t*, jit_word_t, int32_t, int32_t); -# define bgti_u(i0, r0, i1) _bgti_u(_jit, i0, r0, i1) -static jit_word_t _bgti_u(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bner(i0, r0, r1) _bner(_jit, i0, r0, r1) -static jit_word_t _bner(jit_state_t*, jit_word_t, int32_t, int32_t); -# define bnei(i0, r0, i1) _bnei(_jit, i0, r0, i1) -static jit_word_t _bnei(jit_state_t*, jit_word_t, int32_t, jit_word_t); -# define bmsr(i0, r0, r1) _bmsr(_jit, i0, r0, r1) -static jit_word_t _bmsr(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bmsi(i0, r0, i1) _bmsi(_jit, i0, r0, i1) -static jit_word_t _bmsi(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bmcr(i0, r0, r1) _bmcr(_jit, i0, r0, r1) -static jit_word_t _bmcr(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bmci(i0, r0, i1) _bmci(_jit, i0, r0, i1) -static jit_word_t _bmci(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1) -static jit_word_t _boaddr(jit_state_t*,jit_word_t,int32_t,int32_t); -# define boaddi(i0, r0, i1) _boaddi(_jit, i0, r0, i1) -static jit_word_t _boaddi(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define boaddr_u(i0, r0, r1) _boaddr_u(_jit, i0, r0, r1) -static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,int32_t,int32_t); -# define boaddi_u(i0, r0, i1) _boaddi_u(_jit, i0, r0, i1) -static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bxaddr(i0, r0, r1) _bxaddr(_jit, i0, r0, r1) -static jit_word_t _bxaddr(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bxaddi(i0, r0, i1) _bxaddi(_jit, i0, r0, i1) -static jit_word_t _bxaddi(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bxaddr_u(i0, r0, r1) _bxaddr_u(_jit, i0, r0, r1) -static jit_word_t _bxaddr_u(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bxaddi_u(i0, r0, i1) _bxaddi_u(_jit, i0, r0, i1) -static jit_word_t _bxaddi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bosubr(i0, r0, r1) _bosubr(_jit, i0, r0, r1) -static jit_word_t _bosubr(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bosubi(i0, r0, i1) _bosubi(_jit, i0, r0, i1) -static jit_word_t _bosubi(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bosubr_u(i0, r0, r1) _bosubr_u(_jit, i0, r0, r1) -static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bosubi_u(i0, r0, i1) _bosubi_u(_jit, i0, r0, i1) -static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bxsubr(i0, r0, r1) _bxsubr(_jit, i0, r0, r1) -static jit_word_t _bxsubr(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bxsubi(i0, r0, i1) _bxsubi(_jit, i0, r0, i1) -static jit_word_t _bxsubi(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define bxsubr_u(i0, r0, r1) _bxsubr_u(_jit, i0, r0, r1) -static jit_word_t _bxsubr_u(jit_state_t*,jit_word_t,int32_t,int32_t); -# define bxsubi_u(i0, r0, i1) _bxsubi_u(_jit, i0, r0, i1) -static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,int32_t,jit_word_t); -# define callr(r0) _callr(_jit, r0) -static void _callr(jit_state_t*, int32_t); -# define calli(i0) _calli(_jit, i0) -static jit_word_t _calli(jit_state_t*, jit_word_t); -# define jmpr(r0) _jmpr(_jit, r0) -static void _jmpr(jit_state_t*, int32_t); -# define jmpi(i0) _jmpi(_jit, i0) -static jit_word_t _jmpi(jit_state_t*, jit_word_t); -# define jmpsi(i0) _jmpsi(_jit, i0) -static void _jmpsi(jit_state_t*, uint8_t); -# if !defined(HAVE_FFSL) -# if __X32 -# define ffsl(i) ffs(i) -# else -static int ffsl(long); -# endif -# endif +#if __X32 || __X64_32 +# define WIDE 0 +# define IF_WIDE(wide, narrow) narrow +#else +# define WIDE 1 +# define IF_WIDE(wide, narrow) wide #endif -#if CODE +#define _RAX_REGNO 0 +#define _RCX_REGNO 1 +#define _RDX_REGNO 2 +#define _RBX_REGNO 3 +#define _RSP_REGNO 4 +#define _RBP_REGNO 5 +#define _RSI_REGNO 6 +#define _RDI_REGNO 7 +#define _R8_REGNO 8 +#define _R9_REGNO 9 +#define _R10_REGNO 10 +#define _R11_REGNO 11 +#define _R12_REGNO 12 +#define _R13_REGNO 13 +#define _R14_REGNO 14 +#define _R15_REGNO 15 +#define r7(reg) ((reg) & 7) +#define r8(reg) ((reg) & 15) +#if __X32 || __CYGWIN__ || __X64_32 +# define reg8_p(rn) ((rn) >= _RAX_REGNO && (rn) <= _RBX_REGNO) +#else +# define reg8_p(rn) 1 +#endif + +#define can_sign_extend_int_p(im) \ + IF_WIDE((((im) >= 0 && (long long)(im) <= 0x7fffffffLL) || \ + ((im) < 0 && (long long)(im) > -0x80000000LL)), \ + 1) +#define can_zero_extend_int_p(im) \ + IF_WIDE(((im) >= 0 && (im) < 0x80000000LL), \ + 1) +#define fits_uint32_p(im) \ + IF_WIDE((((im) & 0xffffffff00000000LL) == 0), \ + 1) + +#define _SCL1 0x00 +#define _SCL2 0x01 +#define _SCL4 0x02 +#define _SCL8 0x03 + +#define X86_ADD 0 +#define X86_OR 1 << 3 +#define X86_ADC 2 << 3 +#define X86_SBB 3 << 3 +#define X86_AND 4 << 3 +#define X86_SUB 5 << 3 +#define X86_XOR 6 << 3 +#define X86_CMP 7 << 3 +#define X86_ROL 0 +#define X86_ROR 1 +#define X86_RCL 2 +#define X86_RCR 3 +#define X86_SHL 4 +#define X86_SHR 5 +#define X86_SAR 7 +#define X86_NOT 2 +#define X86_NEG 3 +#define X86_MUL 4 +#define X86_IMUL 5 +#define X86_DIV 6 +#define X86_IDIV 7 + +#define FOR_EACH_CC(M) \ + M(o, O, 0x0) \ + M(no, NO, 0x1) \ + M(nae, NAE, 0x2) \ + M(b, B, 0x2) \ + M(c, C, 0x2) \ + M(ae, AE, 0x3) \ + M(nb, NB, 0x3) \ + M(nc, NC, 0x3) \ + M(e, E, 0x4) \ + M(z, Z, 0x4) \ + M(ne, NE, 0x5) \ + M(nz, NZ, 0x5) \ + M(be, BE, 0x6) \ + M(na, NA, 0x6) \ + M(a, A, 0x7) \ + M(nbe, NBE, 0x7) \ + M(s, S, 0x8) \ + M(ns, NS, 0x9) \ + M(p, P, 0xa) \ + M(pe, PE, 0xa) \ + M(np, NP, 0xb) \ + M(po, PO, 0xb) \ + M(l, L, 0xc) \ + M(nge, NGE, 0xc) \ + M(ge, GE, 0xd) \ + M(nl_, NL, 0xd) \ + M(le, LE, 0xe) \ + M(ng, NG, 0xe) \ + M(g, G, 0xf) \ + M(nle, NLE, 0xf) \ + /* EOL */ + +enum x86_cc +{ +#define DEFINE_ENUM(cc, CC, code) X86_CC_##CC = code, + FOR_EACH_CC(DEFINE_ENUM) +#undef DEFINE_ENUM +}; + +static inline void +mrm(jit_state_t *_jit, uint8_t md, uint8_t r, uint8_t m) +{ + emit_u8(_jit, (md<<6) | (r<<3) | m); +} + +static inline void +sib(jit_state_t *_jit, uint8_t sc, uint8_t i, uint8_t b) +{ + emit_u8(_jit, (sc<<6) | (i<<3) | b); +} + +static inline void +ic(jit_state_t *_jit, uint8_t c) +{ + emit_u8(_jit, c); +} + +static inline void +is(jit_state_t *_jit, uint16_t s) +{ + emit_u16(_jit, s); +} + +static inline void +ii(jit_state_t *_jit, uint32_t i) +{ + emit_u32(_jit, i); +} + +static inline void +il(jit_state_t *_jit, unsigned long l) +{ +#if __X64 && !__X64_32 + emit_u64(_jit, l); +#else + ii(_jit, l); +#endif +} + static void -_rex(jit_state_t *_jit, int32_t l, int32_t w, - int32_t r, int32_t x, int32_t b) +rex(jit_state_t *_jit, int32_t l, int32_t w, + int32_t r, int32_t x, int32_t b) { #if __X64 - int32_t v = 0x40 | (w << 3); + int32_t v = 0x40 | (w << 3); - if (r != _NOREG) - v |= (r & 8) >> 1; - if (x != _NOREG) - v |= (x & 8) >> 2; - if (b != _NOREG) - v |= (b & 8) >> 3; - if (l || v != 0x40) - ic(v); + if (r != _NOREG) + v |= (r & 8) >> 1; + if (x != _NOREG) + v |= (x & 8) >> 2; + if (b != _NOREG) + v |= (b & 8) >> 3; + if (l || v != 0x40) + ic(_jit, v); #endif } static void -_rx(jit_state_t *_jit, int32_t rd, int32_t md, - int32_t rb, int32_t ri, int32_t ms) +rx(jit_state_t *_jit, int32_t rd, int32_t md, + int32_t rb, int32_t ri, int32_t ms) { - if (ri == _NOREG) { - if (rb == _NOREG) { + if (ri == _NOREG) { + if (rb == _NOREG) { #if __X32 - mrm(0x00, r7(rd), 0x05); + mrm(_jit, 0x00, r7(rd), 0x05); #else - mrm(0x00, r7(rd), 0x04); - sib(_SCL1, 0x04, 0x05); + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, _SCL1, 0x04, 0x05); #endif - ii(md); - } - else if (r7(rb) == _RSP_REGNO) { - if (md == 0) { - mrm(0x00, r7(rd), 0x04); - sib(ms, 0x04, 0x04); - } - else if ((int8_t)md == md) { - mrm(0x01, r7(rd), 0x04); - sib(ms, 0x04, 0x04); - ic(md); - } - else { - mrm(0x02, r7(rd), 0x04); - sib(ms, 0x04, 0x04); - ii(md); - } - } - else { - if (md == 0 && r7(rb) != _RBP_REGNO) - mrm(0x00, r7(rd), r7(rb)); - else if ((int8_t)md == md) { - mrm(0x01, r7(rd), r7(rb)); - ic(md); - } - else { - mrm(0x02, r7(rd), r7(rb)); - ii(md); - } - } + ii(_jit, md); + } else if (r7(rb) == _RSP_REGNO) { + if (md == 0) { + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, ms, 0x04, 0x04); + } + else if ((int8_t)md == md) { + mrm(_jit, 0x01, r7(rd), 0x04); + sib(_jit, ms, 0x04, 0x04); + ic(_jit, md); + } else { + mrm(_jit, 0x02, r7(rd), 0x04); + sib(_jit, ms, 0x04, 0x04); + ii(_jit, md); + } + } else { + if (md == 0 && r7(rb) != _RBP_REGNO) + mrm(_jit, 0x00, r7(rd), r7(rb)); + else if ((int8_t)md == md) { + mrm(_jit, 0x01, r7(rd), r7(rb)); + ic(_jit, md); + } else { + mrm(_jit, 0x02, r7(rd), r7(rb)); + ii(_jit, md); + } } - else if (rb == _NOREG) { - mrm(0x00, r7(rd), 0x04); - sib(ms, r7(ri), 0x05); - ii(md); - } - else if (r8(ri) != _RSP_REGNO) { - if (md == 0 && r7(rb) != _RBP_REGNO) { - mrm(0x00, r7(rd), 0x04); - sib(ms, r7(ri), r7(rb)); - } - else if ((int8_t)md == md) { - mrm(0x01, r7(rd), 0x04); - sib(ms, r7(ri), r7(rb)); - ic(md); - } - else { - mrm(0x02, r7(rd), 0x04); - sib(ms, r7(ri), r7(rb)); - ic(md); - } - } - else { - fprintf(stderr, "illegal index register"); - abort(); + } + else if (rb == _NOREG) { + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, ms, r7(ri), 0x05); + ii(_jit, md); + } + else if (r8(ri) != _RSP_REGNO) { + if (md == 0 && r7(rb) != _RBP_REGNO) { + mrm(_jit, 0x00, r7(rd), 0x04); + sib(_jit, ms, r7(ri), r7(rb)); + } else if ((int8_t)md == md) { + mrm(_jit, 0x01, r7(rd), 0x04); + sib(_jit, ms, r7(ri), r7(rb)); + ic(_jit, md); + } else { + mrm(_jit, 0x02, r7(rd), 0x04); + sib(_jit, ms, r7(ri), r7(rb)); + ic(_jit, md); } + } else { + fprintf(stderr, "illegal index register"); + abort(); + } } static void -_nop(jit_state_t *_jit, int32_t count) +pushr(jit_state_t *_jit, int32_t r0) { - switch (count) { - case 0: - break; - case 1: /* NOP */ - ic(0x90); break; - case 2: /* 66 NOP */ - ic(0x66); ic(0x90); - break; - case 3: /* NOP DWORD ptr [EAX] */ - ic(0x0f); ic(0x1f); ic(0x00); - break; - case 4: /* NOP DWORD ptr [EAX + 00H] */ - ic(0x0f); ic(0x1f); ic(0x40); ic(0x00); - break; - case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ - ic(0x0f); ic(0x1f); ic(0x44); ic(0x00); - ic(0x00); - break; - case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ - ic(0x66); ic(0x0f); ic(0x1f); ic(0x44); - ic(0x00); ic(0x00); - break; - case 7: /* NOP DWORD ptr [EAX + 00000000H] */ - ic(0x0f); ic(0x1f); ic(0x80); ii(0x0000); - break; - case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ - ic(0x0f); ic(0x1f); ic(0x84); ic(0x00); - ii(0x0000); - break; - case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ - ic(0x66); ic(0x0f); ic(0x1f); ic(0x84); - ic(0x00); ii(0x0000); - break; - default: - abort(); - } + rex(_jit, 0, WIDE, 0, 0, r0); + ic(_jit, 0x50 | r7(r0)); } static void -_lea(jit_state_t *_jit, int32_t md, int32_t rb, +popr(jit_state_t *_jit, int32_t r0) +{ + rex(_jit, 0, WIDE, 0, 0, r0); + ic(_jit, 0x58 | r7(r0)); +} + +static int32_t +get_temp_gpr(jit_state_t *_jit) +{ + ASSERT(!_jit->temp_gpr_saved); + _jit->temp_gpr_saved = 1; +#if __X32 + pushr(_jit, _RBP_REGNO); + return _RBP_REGNO; +#else + return _R8_REGNO; +#endif +} + +static void +unget_temp_gpr(jit_state_t *_jit) +{ + ASSERT(_jit->temp_gpr_saved); + _jit->temp_gpr_saved = 0; +#if __X32 + popr(_jit, _RBP_REGNO); +#endif +} + +static void +nop(jit_state_t *_jit, int32_t count) +{ + switch (count) { + case 0: + break; + case 1: /* NOP */ + ic(_jit, 0x90); + break; + case 2: /* 66 NOP */ + ic(_jit, 0x66); ic(_jit, 0x90); + break; + case 3: /* NOP DWORD ptr [EAX] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x00); + break; + case 4: /* NOP DWORD ptr [EAX + 00H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x40); ic(_jit, 0x00); + break; + case 5: /* NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44); ic(_jit, 0x00); + ic(_jit, 0x00); + break; + case 6: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00H] */ + ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x44); + ic(_jit, 0x00); ic(_jit, 0x00); + break; + case 7: /* NOP DWORD ptr [EAX + 00000000H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x80); ii(_jit, 0x0000); + break; + case 8: /* NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84); ic(_jit, 0x00); + ii(_jit, 0x0000); + break; + case 9: /* 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] */ + ic(_jit, 0x66); ic(_jit, 0x0f); ic(_jit, 0x1f); ic(_jit, 0x84); + ic(_jit, 0x00); ii(_jit, 0x0000); + break; + default: + abort(); + } +} + +static void +movr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 != r1) { + rex(_jit, 0, 1, r1, _NOREG, r0); + ic(_jit, 0x89); + ic(_jit, 0xc0 | (r1 << 3) | r7(r0)); + } +} + +static void +movcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movcr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movsr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +#if __X64 +static void +movir(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 1, r0, _NOREG, r1); + ic(_jit, 0x63); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +movir_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + ic(_jit, 0xc0 | (r1 << 3) | r7(r0)); +} +#endif + +static jit_reloc_t +mov_addr(jit_state_t *_jit, int32_t r0) +{ + uint8_t *pc_start = _jit->pc.uc; + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xb8 | r7(r0)); + ptrdiff_t inst_start = _jit->pc.uc - pc_start; + return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, 0, 0); +} + +static void +imovi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ +#if __X64 +# if !__X64_32 + if (fits_uint32_p(i0)) { +# endif + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0xb8 | r7(r0)); + ii(_jit, i0); +# if !__X64_32 + } else { + rex(_jit, 0, 1, _NOREG, _NOREG, r0); + ic(_jit, 0xb8 | r7(r0)); + il(_jit, i0); + } +# endif +#else + ic(_jit, 0xb8 | r7(r0)); + ii(_jit, i0); +#endif +} + +static void +alur(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r1, _NOREG, r0); + ic(_jit, code | 0x01); + mrm(_jit, 0x03, r7(r1), r7(r0)); +} + +static inline void +icmpr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_CMP, r0, r1); +} +static inline void +iaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_ADD, r0, r1); +} +static inline void +iaddxr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_ADC, r0, r1); +} +static inline void +isubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_SUB, r0, r1); +} +static inline void +isubxr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_SBB, r0, r1); +} +static inline void +iandr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_AND, r0, r1); +} +static inline void +iorr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_OR, r0, r1); +} +static inline void +ixorr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return alur(_jit, X86_XOR, r0, r1); +} + +static void +movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (i0) + imovi(_jit, r0, i0); + else + ixorr(_jit, r0, r0); +} + +static void +alui(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + if ((int8_t)i0 == i0) { + ic(_jit, 0x83); + ic(_jit, 0xc0 | code | r7(r0)); + ic(_jit, i0); + } else { + if (r0 == _RAX_REGNO) { + ic(_jit, code | 0x05); + } else { + ic(_jit, 0x81); + ic(_jit, 0xc0 | code | r7(r0)); + } + ii(_jit, i0); + } + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + alur(_jit, code, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static inline void +icmpi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_CMP, r0, i0); +} +static inline void +iaddi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_ADD, r0, i0); +} +static inline void +iaddxi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_ADC, r0, i0); +} +static inline void +isubi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_SUB, r0, i0); +} +static inline void +isubxi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_SBB, r0, i0); +} +static inline void +iandi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_AND, r0, i0); +} +static inline void +iori(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_OR, r0, i0); +} +static inline void +ixori(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + return alui(_jit, X86_XOR, r0, i0); +} + +static void +unr(jit_state_t *_jit, int32_t code, int32_t r0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xf7); + mrm(_jit, 0x03, code, r7(r0)); +} + +static inline void +umulr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_IMUL, r0); +} +static inline void +umulr_u(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_MUL, r0); +} +static inline void +idivr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_IDIV, r0); +} +static inline void +idivr_u(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_DIV, r0); +} +static inline void +inegr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_NEG, r0); +} +static inline void +icomr(jit_state_t *_jit, int32_t r0) +{ + return unr(_jit, X86_NOT, r0); +} + +#if USE_INC_DEC +static void +incr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); +# if __X64 + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + ic(_jit, 0xc0 | r7(r0)); +# else + ic(_jit, 0x40 | r7(r0)); +# endif +} + +static void +decr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); +# if __X64 + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + ic(_jit, 0xc8 | r7(r0)); +# else + ic(_jit, 0x48 | r7(r0)); +# endif +} +#endif + +static void +lea(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd) { - rex(0, WIDE, rd, ri, rb); - ic(0x8d); - rx(rd, md, rb, ri, ms); + rex(_jit, 0, WIDE, rd, ri, rb); + ic(_jit, 0x8d); + rx(_jit, rd, md, rb, ri, ms); } static void -_pushr(jit_state_t *_jit, int32_t r0) +xchgr(jit_state_t *_jit, int32_t r0, int32_t r1) { - rex(0, WIDE, 0, 0, r0); - ic(0x50 | r7(r0)); + rex(_jit, 0, WIDE, r1, _NOREG, r0); + ic(_jit, 0x87); + mrm(_jit, 0x03, r7(r1), r7(r0)); } static void -_popr(jit_state_t *_jit, int32_t r0) +testr(jit_state_t *_jit, int32_t r0, int32_t r1) { - rex(0, WIDE, 0, 0, r0); - ic(0x58 | r7(r0)); + rex(_jit, 0, WIDE, r1, _NOREG, r0); + ic(_jit, 0x85); + mrm(_jit, 0x03, r7(r1), r7(r0)); } static void -_xchgr(jit_state_t *_jit, int32_t r0, int32_t r1) +testi(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - rex(0, WIDE, r1, _NOREG, r0); - ic(0x87); - mrm(0x03, r7(r1), r7(r0)); + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + if (r0 == _RAX_REGNO) { + ic(_jit, 0xa9); + } else { + ic(_jit, 0xf7); + mrm(_jit, 0x03, 0x00, r7(r0)); + } + ii(_jit, i0); } static void -_testr(jit_state_t *_jit, int32_t r0, int32_t r1) +cc(jit_state_t *_jit, int32_t code, int32_t r0) { - rex(0, WIDE, r1, _NOREG, r0); - ic(0x85); - mrm(0x03, r7(r1), r7(r0)); + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0x0f); + ic(_jit, 0x90 | code); + mrm(_jit, 0x03, 0x00, r7(r0)); } static void -_testi(jit_state_t *_jit, int32_t r0, jit_word_t i0) +negr(jit_state_t *_jit, int32_t r0, int32_t r1) { - rex(0, WIDE, _NOREG, _NOREG, r0); - if (r0 == _RAX_REGNO) - ic(0xa9); - else { - ic(0xf7); - mrm(0x03, 0x00, r7(r0)); - } - ii(i0); + if (r0 == r1) { + inegr(_jit, r0); + } else { + ixorr(_jit, r0, r0); + isubr(_jit, r0, r1); + } } static void -_cc(jit_state_t *_jit, int32_t code, int32_t r0) +addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - rex(0, 0, _NOREG, _NOREG, r0); - ic(0x0f); - ic(0x90 | code); - mrm(0x03, 0x00, r7(r0)); + if (r0 == r1) + iaddr(_jit, r0, r2); + else if (r0 == r2) + iaddr(_jit, r0, r1); + else + lea(_jit, 0, r1, r2, _SCL1, r0); } static void -_alur(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) -{ - rex(0, WIDE, r1, _NOREG, r0); - ic(code | 0x01); - mrm(0x03, r7(r1), r7(r0)); -} - -static void -_alui(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, _NOREG, _NOREG, r0); - if ((int8_t)i0 == i0) { - ic(0x83); - ic(0xc0 | code | r7(r0)); - ic(i0); - } - else { - if (r0 == _RAX_REGNO) - ic(code | 0x05); - else { - ic(0x81); - ic(0xc0 | code | r7(r0)); - } - ii(i0); - } - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - alur(code, r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_save(jit_state_t *_jit, int32_t r0) -{ - if (!_jitc->function->regoff[r0]) { - _jitc->function->regoff[r0] = jit_allocai(sizeof(jit_word_t)); - _jitc->again = 1; - } - assert(!jit_regset_tstbit(&_jitc->regsav, r0)); - jit_regset_setbit(&_jitc->regsav, r0); - stxi(_jitc->function->regoff[r0], _RBP_REGNO, r0); -} - -static void -_load(jit_state_t *_jit, int32_t r0) -{ - assert(_jitc->function->regoff[r0]); - assert(jit_regset_tstbit(&_jitc->regsav, r0)); - jit_regset_clrbit(&_jitc->regsav, r0); - ldxi(r0, _RBP_REGNO, _jitc->function->regoff[r0]); -} - -static void -_addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { + if (i0 == 0) + movr(_jit, r0, r1); +#if USE_INC_DEC + else if (i0 == 1) + incr(_jit, r0, r1); + else if (i0 == -1) + decr(_jit, r0, r1); +#endif + else if (can_sign_extend_int_p(i0)) { if (r0 == r1) - iaddr(r0, r2); - else if (r0 == r2) - iaddr(r0, r1); + iaddi(_jit, r0, i0); else - lea(0, r1, r2, _SCL1, r0); + lea(_jit, i0, r1, _NOREG, _SCL1, r0); + } + else if (r0 != r1) { + movi(_jit, r0, i0); + iaddr(_jit, r0, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iaddr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t reg; - if (i0 == 0) - movr(r0, r1); + if (r0 == r2) { + iaddr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iaddr(_jit, r0, r2); + } +} + +static void +addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + iaddi(_jit, r0, i0); + } + else if (r0 == r1) { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iaddr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } else { + movi(_jit, r0, i0); + iaddr(_jit, r0, r1); + } +} + +static void +addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2) { + iaddxr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iaddxr(_jit, r0, r2); + } +} + +static void +addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + iaddxi(_jit, r0, i0); + } + else if (r0 == r1) { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iaddxr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } else { + movi(_jit, r0, i0); + iaddxr(_jit, r0, r1); + } +} + +static void +subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == r2) + ixorr(_jit, r0, r0); + else if (r0 == r2) { + isubr(_jit, r0, r1); + inegr(_jit, r0); + } else { + movr(_jit, r0, r1); + isubr(_jit, r0, r2); + } +} + +static void +subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); #if USE_INC_DEC - else if (i0 == 1) - incr(r0, r1); - else if (i0 == -1) - decr(r0, r1); + else if (i0 == 1) + decr(_jit, r0, r1); + else if (i0 == -1) + incr(_jit, r0, r1); #endif - else if (can_sign_extend_int_p(i0)) { - if (r0 == r1) - iaddi(r0, i0); - else - lea(i0, r1, _NOREG, _SCL1, r0); - } - else if (r0 != r1) { - movi(r0, i0); - iaddr(r0, r1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - iaddr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r0 == r2) - iaddr(r0, r1); - else { - movr(r0, r1); - iaddr(r0, r2); - } -} - -static void -_addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - movr(r0, r1); - iaddi(r0, i0); - } - else if (r0 == r1) { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - iaddr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - movi(r0, i0); - iaddr(r0, r1); - } -} - -static void -_addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r0 == r2) - iaddxr(r0, r1); - else { - movr(r0, r1); - iaddxr(r0, r2); - } -} - -static void -_addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - movr(r0, r1); - iaddxi(r0, i0); - } - else if (r0 == r1) { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - iaddxr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - movi(r0, i0); - iaddxr(r0, r1); - } -} - -static void -_subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - ixorr(r0, r0); - else if (r0 == r2) { - isubr(r0, r1); - inegr(r0); - } - else { - movr(r0, r1); - isubr(r0, r2); - } -} - -static void -_subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - if (i0 == 0) - movr(r0, r1); -#if USE_INC_DEC - else if (i0 == 1) - decr(r0, r1); - else if (i0 == -1) - incr(r0, r1); -#endif - else if (can_sign_extend_int_p(i0)) { - if (r0 == r1) - isubi(r0, i0); - else - lea(-i0, r1, _NOREG, _SCL1, r0); - } - else if (r0 != r1) { - movi(r0, -i0); - iaddr(r0, r1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - isubr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (r0 == r2 && r0 != r1) { - reg = jit_get_reg(jit_class_gpr); - movr(rn(reg), r0); - movr(r0, r1); - isubr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - movr(r0, r1); - isubr(r0, r2); - } -} - -static void -_subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - movr(r0, r1); - if (can_sign_extend_int_p(i0)) - isubi(r0, i0); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - isubr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (r0 == r2 && r0 != r1) { - reg = jit_get_reg(jit_class_gpr); - movr(rn(reg), r0); - movr(r0, r1); - isubxr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - movr(r0, r1); - isubxr(r0, r2); - } -} - -static void -_subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - movr(r0, r1); - if (can_sign_extend_int_p(i0)) - isubxi(r0, i0); - else { - reg = jit_get_reg(jit_class_gpr); - imovi(rn(reg), i0); - isubxr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - subi(r0, r1, i0); - negr(r0, r0); -} - -static void -_imulr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xaf); - mrm(0x03, r7(r0), r7(r1)); -} - -static void -_imuli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, r1); - if ((int8_t)i0 == i0) { - ic(0x6b); - mrm(0x03, r7(r0), r7(r1)); - ic(i0); - } - else { - ic(0x69); - mrm(0x03, r7(r0), r7(r1)); - ii(i0); - } - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - imulr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ + else if (can_sign_extend_int_p(i0)) { if (r0 == r1) - imulr(r0, r2); - else if (r0 == r2) - imulr(r0, r1); - else { - movr(r0, r1); - imulr(r0, r2); - } + isubi(_jit, r0, i0); + else + lea(_jit, -i0, r1, _NOREG, _SCL1, r0); + } + else if (r0 != r1) { + movi(_jit, r0, -i0); + iaddr(_jit, r0, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + isubr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - switch (i0) { - case 0: - ixorr(r0, r0); - break; - case 1: - movr(r0, r1); - break; - case -1: - negr(r0, r1); - break; - case 2: - lea(0, _NOREG, r1, _SCL2, r0); - break; - case 4: - lea(0, _NOREG, r1, _SCL4, r0); - break; - case 8: - lea(0, _NOREG, r1, _SCL8, r0); - break; - default: - if (i0 > 0 && !(i0 & (i0 - 1))) - lshi(r0, r1, ffsl(i0) - 1); - else if (can_sign_extend_int_p(i0)) - imuli(r0, r1, i0); - else if (r0 != r1) { - movi(r0, i0); - imulr(r0, r1); - } - else - imuli(r0, r0, i0); - break; - } + if (r0 == r2 && r0 != r1) { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + movr(_jit, r0, r1); + isubr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } else { + movr(_jit, r0, r1); + isubr(_jit, r0, r2); + } } -#define savset(rn) \ - if (r0 != rn) { \ - sav |= 1 << rn; \ - if (r1 != rn && r2 != rn) \ - set |= 1 << rn; \ - } -#define isavset(rn) \ - if (r0 != rn) { \ - sav |= 1 << rn; \ - if (r1 != rn) \ - set |= 1 << rn; \ - } -#define qsavset(rn) \ - if (r0 != rn && r1 != rn) { \ - sav |= 1 << rn; \ - if (r2 != rn && r3 != rn) \ - set |= 1 << rn; \ - } -#define allocr(rn, rv) \ - if (set & (1 << rn)) \ - (void)jit_get_reg(rv|jit_class_gpr|jit_class_named); \ - if (sav & (1 << rn)) { \ - if ( jit_regset_tstbit(&_jitc->regsav, rv) || \ - !jit_regset_tstbit(&_jitc->reglive, rv)) \ - sav &= ~(1 << rn); \ - else \ - save(rv); \ - } -#define clear(rn, rv) \ - if (set & (1 << rn)) \ - jit_unget_reg(rv); \ - if (sav & (1 << rn)) \ - load(rv); static void -_iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1, - int32_t r2, int32_t r3, jit_bool_t sign) +subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t mul; - int32_t sav; - int32_t set; + movr(_jit, r0, r1); + if (can_sign_extend_int_p(i0)) { + isubi(_jit, r0, i0); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + isubr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} - sav = set = 0; - qsavset(_RDX_REGNO); - qsavset(_RAX_REGNO); - allocr(_RDX_REGNO, _RDX); - allocr(_RAX_REGNO, _RAX); +static void +subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r2 && r0 != r1) { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + movr(_jit, r0, r1); + isubxr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } else { + movr(_jit, r0, r1); + isubxr(_jit, r0, r2); + } +} - if (r3 == _RAX_REGNO) - mul = r2; - else { - mul = r3; - movr(_RAX_REGNO, r2); +static void +subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(_jit, r0, r1); + if (can_sign_extend_int_p(i0)) { + isubxi(_jit, r0, i0); + } else { + int32_t reg = get_temp_gpr(_jit); + imovi(_jit, rn(reg), i0); + isubxr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +irotshr(jit_state_t *_jit, int32_t code, int32_t r0) +{ + rex(_jit, 0, WIDE, _RCX_REGNO, _NOREG, r0); + ic(_jit, 0xd3); + mrm(_jit, 0x03, code, r7(r0)); +} + +static void +rotshr(jit_state_t *_jit, int32_t code, + int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == _RCX_REGNO) { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + if (r2 != _RCX_REGNO) + movr(_jit, _RCX_REGNO, r2); + irotshr(_jit, code, rn(reg)); + movr(_jit, _RCX_REGNO, rn(reg)); + unget_temp_gpr(_jit); + } else if (r2 != _RCX_REGNO) { + /* Already know that R0 isn't RCX. */ + pushr(_jit, _RCX_REGNO); + if (r1 == _RCX_REGNO) { + if (r0 == r2) + xchgr(_jit, r0, _RCX_REGNO); + else { + movr(_jit, r0, r1); + movr(_jit, _RCX_REGNO, r2); + } + } else { + movr(_jit, _RCX_REGNO, r2); + movr(_jit, r0, r1); } + irotshr(_jit, code, r0); + popr(_jit, _RCX_REGNO); + } else { + movr(_jit, r0, r1); + irotshr(_jit, code, r0); + } +} + +static void +irotshi(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + if (i0 == 1) { + ic(_jit, 0xd1); + mrm(_jit, 0x03, code, r7(r0)); + } else { + ic(_jit, 0xc1); + mrm(_jit, 0x03, code, r7(r0)); + ic(_jit, i0); + } +} + +static void +rotshi(jit_state_t *_jit, int32_t code, + int32_t r0, int32_t r1, jit_word_t i0) +{ + movr(_jit, r0, r1); + if (i0) + irotshi(_jit, code, r0, i0); +} + +static void +lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(_jit, r0, r1); + else if (i0 <= 3) + lea(_jit, 0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0); + else + rotshi(_jit, X86_SHL, r0, r1, i0); +} + +static void +lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return rotshr(_jit, X86_SHL, r0, r1, r2); +} + +static void +rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return rotshr(_jit, X86_SAR, r0, r1, r2); +} + +static void +rshi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return rotshi(_jit, X86_SAR, r0, r1, i0); +} + +static void +rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return rotshr(_jit, X86_SHR, r0, r1, r2); +} + +static void +rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) +{ + return rotshi(_jit, X86_SHR, r0, r1, i0); +} + +static void +imulr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xaf); + mrm(_jit, 0x03, r7(r0), r7(r1)); +} + +static void +imuli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + if ((int8_t)i0 == i0) { + ic(_jit, 0x6b); + mrm(_jit, 0x03, r7(r0), r7(r1)); + ic(_jit, i0); + } else { + ic(_jit, 0x69); + mrm(_jit, 0x03, r7(r0), r7(r1)); + ii(_jit, i0); + } + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + imulr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + imulr(_jit, r0, r2); + else if (r0 == r2) { + imulr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + imulr(_jit, r0, r2); + } +} + +static int +ffsw(jit_word_t i) +{ + if (sizeof(int) == sizeof(i)) + return ffs(i); + int bit = ffs((int)i); + if (bit == 0) { + bit = ffs((int)((unsigned long)i >> 32)); + if (bit) + bit += 32; + } + return bit; +} + +static void +muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + switch (i0) { + case 0: + ixorr(_jit, r0, r0); + break; + case 1: + movr(_jit, r0, r1); + break; + case -1: + negr(_jit, r0, r1); + break; + case 2: + lea(_jit, 0, _NOREG, r1, _SCL2, r0); + break; + case 4: + lea(_jit, 0, _NOREG, r1, _SCL4, r0); + break; + case 8: + lea(_jit, 0, _NOREG, r1, _SCL8, r0); + break; + default: + if (i0 > 0 && !(i0 & (i0 - 1))) + lshi(_jit, r0, r1, ffsw(i0) - 1); + else if (can_sign_extend_int_p(i0)) + imuli(_jit, r0, r1, i0); + else if (r0 != r1) { + movi(_jit, r0, i0); + imulr(_jit, r0, r1); + } + else + imuli(_jit, r0, r0, i0); + break; + } +} + +static void +iqmulr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + pushr(_jit, _RAX_REGNO); + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + pushr(_jit, _RDX_REGNO); + + int32_t mul; + if (r3 == _RAX_REGNO) { + mul = r2; + } else { + mul = r3; + movr(_jit, _RAX_REGNO, r2); + } + if (sign) + umulr(_jit, mul); + else + umulr_u(_jit, mul); + + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) { + xchgr(_jit, _RAX_REGNO, _RDX_REGNO); + } else { + if (r0 != _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + movr(_jit, r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + } + + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + popr(_jit, _RDX_REGNO); + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + popr(_jit, _RAX_REGNO); +} + +static void +qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqmulr(_jit, r0, r1, r2, r3, 1); +} + +static void +qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqmulr(_jit, r0, r1, r2, r3, 0); +} + +static void +iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + if (i0 == 0) { + ixorr(_jit, r0, r0); + ixorr(_jit, r1, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); if (sign) - umulr(mul); + qmulr(_jit, r0, r1, r2, rn(reg)); else - umulr_u(mul); + qmulr_u(_jit, r0, r1, r2, rn(reg)); + unget_temp_gpr(_jit); + } +} - if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) - xchgr(_RAX_REGNO, _RDX_REGNO); - else { - if (r0 != _RDX_REGNO) - movr(r0, _RAX_REGNO); - movr(r1, _RDX_REGNO); - if (r0 == _RDX_REGNO) - movr(r0, _RAX_REGNO); +static void +qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqmuli(_jit, r0, r1, r2, i0, 1); +} + +static void +qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqmuli(_jit, r0, r1, r2, i0, 0); +} + +static void +sign_extend_rdx_rax(jit_state_t *_jit) +{ + rex(_jit, 0, WIDE, 0, 0, 0); + ic(_jit, 0x99); +} + +static void +divremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, + jit_bool_t sign, jit_bool_t divide) +{ + if (r0 != _RAX_REGNO) + pushr(_jit, _RAX_REGNO); + if (r0 != _RDX_REGNO) + pushr(_jit, _RDX_REGNO); + + int tmp_divisor = 0; + if (r2 == _RAX_REGNO || r2 == _RDX_REGNO) { + int32_t tmp = get_temp_gpr(_jit); + movr(_jit, tmp, r2); + r2 = tmp; + tmp_divisor = 1; + } + + movr(_jit, _RAX_REGNO, r1); + + if (sign) { + sign_extend_rdx_rax(_jit); + idivr(_jit, r2); + } else { + ixorr(_jit, _RDX_REGNO, _RDX_REGNO); + idivr_u(_jit, r2); + } + + if (divide) + movr(_jit, r0, _RAX_REGNO); + else + movr(_jit, r0, _RDX_REGNO); + + if (tmp_divisor) + unget_temp_gpr(_jit); + + if (r0 != _RDX_REGNO) + popr(_jit, _RDX_REGNO); + if (r0 != _RAX_REGNO) + popr(_jit, _RAX_REGNO); +} + +static void +divremi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0, + jit_bool_t sign, jit_bool_t divide) +{ + int32_t r2 = get_temp_gpr(_jit); + movi(_jit, r2, i0); + + divremr(_jit, r0, r1, r2, sign, divide); +} + +static void +divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 1, 1); +} + +static void +divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 1, 1); +} + +static void +divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 0, 1); +} + +static void +divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 0, 1); +} + + +static void +remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 1, 0); +} + +static void +remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 1, 0); +} + +static void +remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + return divremr(_jit, r0, r1, r2, 0, 0); +} + +static void +remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + return divremi(_jit, r0, r1, i0, 0, 0); +} + +static void +iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, int32_t r3, jit_bool_t sign) +{ + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + pushr(_jit, _RAX_REGNO); + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + pushr(_jit, _RDX_REGNO); + + int tmp_divisor = 0; + if (r3 == _RAX_REGNO || r3 == _RDX_REGNO) { + int32_t tmp = get_temp_gpr(_jit); + movr(_jit, tmp, r3); + r3 = tmp; + tmp_divisor = 1; + } + + movr(_jit, _RAX_REGNO, r2); + + if (sign) { + sign_extend_rdx_rax(_jit); + idivr(_jit, r3); + } else { + ixorr(_jit, _RDX_REGNO, _RDX_REGNO); + idivr_u(_jit, r3); + } + + if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) { + xchgr(_jit, _RAX_REGNO, _RDX_REGNO); + } else { + if (r0 != _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + movr(_jit, r1, _RDX_REGNO); + if (r0 == _RDX_REGNO) + movr(_jit, r0, _RAX_REGNO); + } + + if (tmp_divisor) + unget_temp_gpr(_jit); + + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + popr(_jit, _RDX_REGNO); + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + popr(_jit, _RAX_REGNO); +} + +static void +qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit, r0, r1, r2, r3, 1); +} + +static void +qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3) +{ + return iqdivr(_jit, r0, r1, r2, r3, 0); +} + +static void +iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1, + int32_t r2, jit_word_t i0, jit_bool_t sign) +{ + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + if (sign) + qdivr(_jit, r0, r1, r2, rn(reg)); + else + qdivr_u(_jit, r0, r1, r2, rn(reg)); + unget_temp_gpr(_jit); +} + +static void +qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqdivi(_jit, r0, r1, r2, i0, 1); +} + +static void +qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0) +{ + return iqdivi(_jit, r0, r1, r2, i0, 0); +} + +static void +comr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); + icomr(_jit, r0); +} + +static void +andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r1 == r2) + movr(_jit, r0, r1); + else if (r0 == r1) + iandr(_jit, r0, r2); + else if (r0 == r2) { + iandr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iandr(_jit, r0, r2); + } +} + +static void +andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + + if (i0 == 0) + ixorr(_jit, r0, r0); + else if (i0 == -1) + movr(_jit, r0, r1); + else if (r0 == r1) { + if (can_sign_extend_int_p(i0)) { + iandi(_jit, r0, i0); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iandr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); } - - clear(_RDX_REGNO, _RDX); - clear(_RAX_REGNO, _RAX); + } else { + movi(_jit, r0, i0); + iandr(_jit, r0, r1); + } } static void -_iqmuli(jit_state_t *_jit, int32_t r0, int32_t r1, - int32_t r2, jit_word_t i0, jit_bool_t sign) +orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t reg; - - if (i0 == 0) { - ixorr(r0, r0); - ixorr(r1, r1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - if (sign) - qmulr(r0, r1, r2, rn(reg)); - else - qmulr_u(r0, r1, r2, rn(reg)); - jit_unget_reg(reg); - } + if (r1 == r2) + movr(_jit, r0, r1); + else if (r0 == r1) + iorr(_jit, r0, r2); + else if (r0 == r2) { + iorr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + iorr(_jit, r0, r2); + } } static void -_sign_extend_rdx_rax(jit_state_t *_jit) +ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - rex(0, WIDE, 0, 0, 0); - ic(0x99); + if (i0 == 0) + movr(_jit, r0, r1); + else if (i0 == -1) + movi(_jit, r0, -1); + else if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + iori(_jit, r0, i0); + } + else if (r0 != r1) { + movi(_jit, r0, i0); + iorr(_jit, r0, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + iorr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_divremr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, - jit_bool_t sign, jit_bool_t divide) +xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t div; - int32_t reg; - int32_t set; - int32_t sav; - int32_t use; - - sav = set = use = 0; - savset(_RDX_REGNO); - savset(_RAX_REGNO); - allocr(_RDX_REGNO, _RDX); - allocr(_RAX_REGNO, _RAX); - - if (r2 == _RAX_REGNO) { - if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { - if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | - jit_class_gpr|jit_class_named); - use = 1; - div = rn(reg); - movr(div, _RAX_REGNO); - if (r1 != _RAX_REGNO) - movr(_RAX_REGNO, r1); - } - else { - if (r0 == r1) - xchgr(r0, _RAX_REGNO); - else { - if (r0 != _RAX_REGNO) - movr(r0, _RAX_REGNO); - if (r1 != _RAX_REGNO) - movr(_RAX_REGNO, r1); - } - div = r0; - } - } - else if (r2 == _RDX_REGNO) { - if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { - if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | - jit_class_gpr|jit_class_named); - use = 1; - div = rn(reg); - movr(div, _RDX_REGNO); - if (r1 != _RAX_REGNO) - movr(_RAX_REGNO, r1); - } - else { - if (r1 != _RAX_REGNO) - movr(_RAX_REGNO, r1); - movr(r0, _RDX_REGNO); - div = r0; - } - } - else { - if (r1 != _RAX_REGNO) - movr(_RAX_REGNO, r1); - div = r2; - } - - if (sign) { - sign_extend_rdx_rax(); - idivr(div); - } - else { - ixorr(_RDX_REGNO, _RDX_REGNO); - idivr_u(div); - } - - if (use) - jit_unget_reg(reg); - - if (divide) - movr(r0, _RAX_REGNO); - else - movr(r0, _RDX_REGNO); - - clear(_RDX_REGNO, _RDX); - clear(_RAX_REGNO, _RAX); + if (r1 == r2) + ixorr(_jit, r0, r0); + else if (r0 == r1) + ixorr(_jit, r0, r2); + else if (r0 == r2) { + ixorr(_jit, r0, r1); + } else { + movr(_jit, r0, r1); + ixorr(_jit, r0, r2); + } } static void -_divremi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0, - jit_bool_t sign, jit_bool_t divide) +xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - int32_t div; - int32_t sav; - int32_t set; - int32_t use; - - if (divide) { - switch (i0) { - case 1: - movr(r0, r1); - return; - case -1: - if (sign) { - negr(r0, r1); - return; - } - break; - default: - if (i0 > 0 && !(i0 & (i0 - 1))) { - movr(r0, r1); - if (sign) - rshi(r0, r0, ffsl(i0) - 1); - else - rshi_u(r0, r0, ffsl(i0) - 1); - return; - } - break; - } - } - else if (i0 == 1 || (sign && i0 == -1)) { - ixorr(r0, r0); - return; - } - else if (!sign && i0 > 0 && !(i0 & (i0 - 1))) { - if (can_sign_extend_int_p(i0)) { - movr(r0, r1); - iandi(r0, i0 - 1); - } - else if (r0 != r1) { - movi(r0, i0 - 1); - iandr(r0, r1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0 - 1); - iandr(r0, rn(reg)); - jit_unget_reg(reg); - } - return; - } - - sav = set = use = 0; - isavset(_RDX_REGNO); - isavset(_RAX_REGNO); - allocr(_RDX_REGNO, _RDX); - allocr(_RAX_REGNO, _RAX); - - if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) { - if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | - jit_class_gpr|jit_class_named); - use = 1; - div = rn(reg); - } - else - div = r0; - - movi(div, i0); - movr(_RAX_REGNO, r1); - - if (sign) { - sign_extend_rdx_rax(); - idivr(div); - } - else { - ixorr(_RDX_REGNO, _RDX_REGNO); - idivr_u(div); - } - - if (use) - jit_unget_reg(reg); - - if (divide) - movr(r0, _RAX_REGNO); - else - movr(r0, _RDX_REGNO); - - clear(_RDX_REGNO, _RDX); - clear(_RAX_REGNO, _RAX); + if (i0 == 0) + movr(_jit, r0, r1); + else if (i0 == -1) + comr(_jit, r0, r1); + else if (can_sign_extend_int_p(i0)) { + movr(_jit, r0, r1); + ixori(_jit, r0, i0); + } + else if (r0 != r1) { + movi(_jit, r0, i0); + ixorr(_jit, r0, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ixorr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_iqdivr(jit_state_t *_jit, int32_t r0, int32_t r1, - int32_t r2, int32_t r3, jit_bool_t sign) +cr(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1, int32_t r2) { - int32_t div; - int32_t reg; - int32_t sav; - int32_t set; - int32_t use; - - sav = set = use = 0; - qsavset(_RDX_REGNO); - qsavset(_RAX_REGNO); - allocr(_RDX_REGNO, _RDX); - allocr(_RAX_REGNO, _RAX); - if (r3 == _RAX_REGNO) { - if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { - if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | - jit_class_gpr|jit_class_named); - use = 1; - div = rn(reg); - movr(div, _RAX_REGNO); - if (r2 != _RAX_REGNO) - movr(_RAX_REGNO, r2); - } - else { - if (r0 == r2) - xchgr(r0, _RAX_REGNO); - else { - if (r0 != _RAX_REGNO) - movr(r0, _RAX_REGNO); - if (r2 != _RAX_REGNO) - movr(_RAX_REGNO, r2); - } - div = r0; - } - } - else if (r3 == _RDX_REGNO) { - if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { - if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | - jit_class_gpr|jit_class_named); - use = 1; - div = rn(reg); - movr(div, _RDX_REGNO); - if (r2 != _RAX_REGNO) - movr(_RAX_REGNO, r2); - } - else { - if (r2 != _RAX_REGNO) - movr(_RAX_REGNO, r2); - movr(r0, _RDX_REGNO); - div = r0; - } - } - else { - if (r2 != _RAX_REGNO) - movr(_RAX_REGNO, r2); - div = r3; - } - if (sign) { - sign_extend_rdx_rax(); - idivr(div); - } - else { - ixorr(_RDX_REGNO, _RDX_REGNO); - idivr_u(div); - } - if (use) - jit_unget_reg(reg); - - if (r0 == _RDX_REGNO && r1 == _RAX_REGNO) - xchgr(_RAX_REGNO, _RDX_REGNO); - else { - if (r0 != _RDX_REGNO) - movr(r0, _RAX_REGNO); - movr(r1, _RDX_REGNO); - if (r0 == _RDX_REGNO) - movr(r0, _RAX_REGNO); - } - - clear(_RDX_REGNO, _RDX); - clear(_RAX_REGNO, _RAX); + if (reg8_p(r0)) { + jit_bool_t same = r0 == r1 || r0 == r2; + if (!same) + ixorr(_jit, r0, r0); + icmpr(_jit, r1, r2); + if (same) + imovi(_jit, r0, 0); + cc(_jit, code, r0); + } else { + int32_t reg = get_temp_gpr(_jit); + ixorr(_jit, rn(reg), rn(reg)); + icmpr(_jit, r1, r2); + cc(_jit, code, rn(reg)); + movr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_iqdivi(jit_state_t *_jit, int32_t r0, int32_t r1, - int32_t r2, jit_word_t i0, jit_bool_t sign) +ci(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - if (sign) - qdivr(r0, r1, r2, rn(reg)); - else - qdivr_u(r0, r1, r2, rn(reg)); - jit_unget_reg(reg); -} -#undef clear -#undef allocr -#undef savset - -static void -_andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movr(r0, r1); - else if (r0 == r1) - iandr(r0, r2); - else if (r0 == r2) - iandr(r0, r1); - else { - movr(r0, r1); - iandr(r0, r2); - } + if (reg8_p(r0)) { + jit_bool_t same = r0 == r1; + if (!same) + ixorr(_jit, r0, r0); + icmpi(_jit, r1, i0); + if (same) + imovi(_jit, r0, 0); + cc(_jit, code, r0); + } else { + int32_t reg = get_temp_gpr(_jit); + ixorr(_jit, rn(reg), rn(reg)); + icmpi(_jit, r1, i0); + cc(_jit, code, rn(reg)); + movr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ci0(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) { - int32_t reg; - - if (i0 == 0) - ixorr(r0, r0); - else if (i0 == -1) - movr(r0, r1); - else if (r0 == r1) { - if (can_sign_extend_int_p(i0)) - iandi(r0, i0); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - iandr(r0, rn(reg)); - jit_unget_reg(reg); - } - } - else { - movi(r0, i0); - iandr(r0, r1); - } + if (reg8_p(r0)) { + jit_bool_t same = r0 == r1; + if (!same) + ixorr(_jit, r0, r0); + testr(_jit, r1, r1); + if (same) + imovi(_jit, r0, 0); + cc(_jit, code, r0); + } else { + int32_t reg = get_temp_gpr(_jit); + ixorr(_jit, rn(reg), rn(reg)); + testr(_jit, r1, r1); + cc(_jit, code, rn(reg)); + movr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +extr_c(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r1 == r2) - movr(r0, r1); - else if (r0 == r1) - iorr(r0, r2); - else if (r0 == r2) - iorr(r0, r1); - else { - movr(r0, r1); - iorr(r0, r2); - } + if (reg8_p(r1)) { + movcr(_jit, r0, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + movcr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (i0 == 0) - movr(r0, r1); - else if (i0 == -1) - movi(r0, -1); - else if (can_sign_extend_int_p(i0)) { - movr(r0, r1); - iori(r0, i0); - } - else if (r0 != r1) { - movi(r0, i0); - iorr(r0, r1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - iorr(r0, rn(reg)); - jit_unget_reg(reg); - } + if (reg8_p(r1)) { + movcr_u(_jit, r0, r1); + } else { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + movcr_u(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +extr_s(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r1 == r2) - ixorr(r0, r0); - else if (r0 == r1) - ixorr(r0, r2); - else if (r0 == r2) - ixorr(r0, r1); - else { - movr(r0, r1); - ixorr(r0, r2); - } + return movsr(_jit, r0, r1); } static void -_xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +extr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (i0 == 0) - movr(r0, r1); - else if (i0 == -1) - comr(r0, r1); - else if (can_sign_extend_int_p(i0)) { - movr(r0, r1); - ixori(r0, i0); - } - else if (r0 != r1) { - movi(r0, i0); - ixorr(r0, r1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ixorr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_irotshr(jit_state_t *_jit, int32_t code, int32_t r0) -{ - rex(0, WIDE, _RCX_REGNO, _NOREG, r0); - ic(0xd3); - mrm(0x03, code, r7(r0)); -} - -static void -_rotshr(jit_state_t *_jit, int32_t code, - int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - int32_t use; - - if (r0 == _RCX_REGNO) { - reg = jit_get_reg(jit_class_gpr); - movr(rn(reg), r1); - if (r2 != _RCX_REGNO) - movr(_RCX_REGNO, r2); - irotshr(code, rn(reg)); - movr(_RCX_REGNO, rn(reg)); - jit_unget_reg(reg); - } - else if (r2 != _RCX_REGNO) { - use = !jit_reg_free_p(_RCX); - if (use) { - reg = jit_get_reg(jit_class_gpr); - movr(rn(reg), _RCX_REGNO); - } - else - reg = 0; - if (r1 == _RCX_REGNO) { - if (r0 == r2) - xchgr(r0, _RCX_REGNO); - else { - movr(r0, r1); - movr(_RCX_REGNO, r2); - } - } - else { - movr(_RCX_REGNO, r2); - movr(r0, r1); - } - irotshr(code, r0); - if (use) { - movr(_RCX_REGNO, rn(reg)); - jit_unget_reg(reg); - } - } - else { - movr(r0, r1); - irotshr(code, r0); - } -} - -static void -_irotshi(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) -{ - rex(0, WIDE, _NOREG, _NOREG, r0); - if (i0 == 1) { - ic(0xd1); - mrm(0x03, code, r7(r0)); - } - else { - ic(0xc1); - mrm(0x03, code, r7(r0)); - ic(i0); - } -} - -static void -_rotshi(jit_state_t *_jit, int32_t code, - int32_t r0, int32_t r1, jit_word_t i0) -{ - movr(r0, r1); - if (i0) - irotshi(code, r0, i0); -} - -static void -_lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0 == 0) - movr(r0, r1); - else if (i0 <= 3) - lea(0, _NOREG, r1, i0 == 1 ? _SCL2 : i0 == 2 ? _SCL4 : _SCL8, r0); - else - rotshi(X86_SHL, r0, r1, i0); -} - -static void -_unr(jit_state_t *_jit, int32_t code, int32_t r0) -{ - rex(0, WIDE, _NOREG, _NOREG, r0); - ic(0xf7); - mrm(0x03, code, r7(r0)); -} - -static void -_negr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - if (r0 == r1) - inegr(r0); - else { - ixorr(r0, r0); - isubr(r0, r1); - } -} - -static void -_comr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - movr(r0, r1); - icomr(r0); -} - -#if USE_INC_DEC -static void -_incr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - movr(r0, r1); -# if __X64 - rex(0, WIDE, _NOREG, _NOREG, r0); - ic(0xff); - ic(0xc0 | r7(r0)); -# else - ic(0x40 | r7(r0)); -# endif -} - -static void -_decr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - movr(r0, r1); -# if __X64 - rex(0, WIDE, _NOREG, _NOREG, r0); - ic(0xff); - ic(0xc8 | r7(r0)); -# else - ic(0x48 | r7(r0)); -# endif -} -#endif - -static void -_cr(jit_state_t *_jit, - int32_t code, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - jit_bool_t same; - if (reg8_p(r0)) { - same = r0 == r1 || r0 == r2; - if (!same) - ixorr(r0, r0); - icmpr(r1, r2); - if (same) - imovi(r0, 0); - cc(code, r0); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - ixorr(rn(reg), rn(reg)); - icmpr(r1, r2); - cc(code, rn(reg)); - movr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_ci(jit_state_t *_jit, - int32_t code, int32_t r0, int32_t r1, jit_word_t i0) -{ - int32_t reg; - jit_bool_t same; - if (reg8_p(r0)) { - same = r0 == r1; - if (!same) - ixorr(r0, r0); - icmpi(r1, i0); - if (same) - imovi(r0, 0); - cc(code, r0); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - ixorr(rn(reg), rn(reg)); - icmpi(r1, i0); - cc(code, rn(reg)); - movr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_ci0(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) -{ - int32_t reg; - jit_bool_t same; - if (reg8_p(r0)) { - same = r0 == r1; - if (!same) - ixorr(r0, r0); - testr(r1, r1); - if (same) - imovi(r0, 0); - cc(code, r0); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - ixorr(rn(reg), rn(reg)); - testr(r1, r1); - cc(code, rn(reg)); - movr(r0, rn(reg)); - jit_unget_reg(reg); - } -} - -static void -_ltr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 0); - else - cr(X86_CC_L, r0, r1, r2); -} - -static void -_lti(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_L, r0, r1, i0); - else - ci0(X86_CC_S, r0, r1); -} - -static void -_ltr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 0); - else - cr(X86_CC_B, r0, r1, r2); -} - -static void -_ler(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - cr(X86_CC_LE, r0, r1, r2); -} - -static void -_ler_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - cr(X86_CC_BE, r0, r1, r2); -} - -static void -_lei_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_BE, r0, r1, i0); - else - ci0(X86_CC_E, r0, r1); -} - -static void -_eqr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - cr(X86_CC_E, r0, r1, r2); -} - -static void -_eqi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_E, r0, r1, i0); - else - ci0(X86_CC_E, r0, r1); -} - -static void -_ger(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - cr(X86_CC_GE, r0, r1, r2); -} - -static void -_gei(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_GE, r0, r1, i0); - else - ci0(X86_CC_NS, r0, r1); -} - -static void -_ger_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - cr(X86_CC_AE, r0, r1, r2); -} - -static void -_gei_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_AE, r0, r1, i0); - else - ci0(X86_CC_NB, r0, r1); -} - -static void -_gtr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 0); - else - cr(X86_CC_G, r0, r1, r2); -} - -static void -_gtr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 0); - else - cr(X86_CC_A, r0, r1, r2); -} - -static void -_gti_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_A, r0, r1, i0); - else - ci0(X86_CC_NE, r0, r1); -} - -static void -_ner(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 0); - else - cr(X86_CC_NE, r0, r1, r2); -} - -static void -_nei(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) -{ - if (i0) - ci(X86_CC_NE, r0, r1, i0); - else - ci0(X86_CC_NE, r0, r1); -} - -static void -_movr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - if (r0 != r1) { - rex(0, 1, r1, _NOREG, r0); - ic(0x89); - ic(0xc0 | (r1 << 3) | r7(r0)); - } -} - -static void -_imovi(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ -#if __X64 -# if !__X64_32 - if (fits_uint32_p(i0)) { -# endif - rex(0, 0, _NOREG, _NOREG, r0); - ic(0xb8 | r7(r0)); - ii(i0); -# if !__X64_32 - } - else { - rex(0, 1, _NOREG, _NOREG, r0); - ic(0xb8 | r7(r0)); - il(i0); - } -# endif -#else - ic(0xb8 | r7(r0)); - ii(i0); -#endif -} - -static void -_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ - if (i0) - imovi(r0, i0); - else - ixorr(r0, r0); -} - -static jit_word_t -_movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ - rex(0, WIDE, _NOREG, _NOREG, r0); - ic(0xb8 | r7(r0)); - il(i0); - return (_jit->pc.w); -} - -static void -_movcr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xbe); - mrm(0x03, r7(r0), r7(r1)); -} - -static void -_movcr_u(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xb6); - mrm(0x03, r7(r0), r7(r1)); -} - -static void -_movsr(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xbf); - mrm(0x03, r7(r0), r7(r1)); -} - -static void -_movsr_u(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xb7); - mrm(0x03, r7(r0), r7(r1)); -} - -#if __X64 -static void -_movir(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, 1, r0, _NOREG, r1); - ic(0x63); - mrm(0x03, r7(r0), r7(r1)); -} - -static void -_movir_u(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - rex(0, 0, r1, _NOREG, r0); - ic(0x89); - ic(0xc0 | (r1 << 3) | r7(r0)); -} -#endif - -static void -_htonr_us(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - extr_us(r0, r1); - ic(0x66); - rex(0, 0, _NOREG, _NOREG, r0); - ic(0xc1); - mrm(0x03, X86_ROR, r7(r0)); - ic(8); -} - -static void -_htonr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - movr(r0, r1); - rex(0, 0, _NOREG, _NOREG, r0); - ic(0x0f); - ic(0xc8 | r7(r0)); + return movsr_u(_jit, r0, r1); } #if __X64 && !__X64_32 static void -_htonr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +extr_i(jit_state_t *_jit, int32_t r0, int32_t r1) { - movr(r0, r1); - rex(0, 1, _NOREG, _NOREG, r0); - ic(0x0f); - ic(0xc8 | r7(r0)); + return movir(_jit, r0, r1); +} +static void +extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return movir_u(_jit, r0, r1); } #endif static void -_extr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (reg8_p(r1)) - movcr(r0, r1); - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - movr(rn(reg), r1); - movcr(r0, rn(reg)); - jit_unget_reg(reg); - } + extr_us(_jit, r0, r1); + ic(_jit, 0x66); + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0xc1); + mrm(_jit, 0x03, X86_ROR, r7(r0)); + ic(_jit, 8); } static void -_extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (reg8_p(r1)) - movcr_u(r0, r1); - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - movr(rn(reg), r1); - movcr_u(r0, rn(reg)); - jit_unget_reg(reg); - } + movr(_jit, r0, r1); + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0x0f); + ic(_jit, 0xc8 | r7(r0)); +} + +#if __X64 && !__X64_32 +static void +bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movr(_jit, r0, r1); + rex(_jit, 0, 1, _NOREG, _NOREG, r0); + ic(_jit, 0x0f); + ic(_jit, 0xc8 | r7(r0)); +} +#endif + +static void +ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1) +ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xbe); - rx(r0, 0, r1, _NOREG, _SCL1); + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_c(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0) +ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, _NOREG); - ic(0x0f); - ic(0xbe); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_c(r0, rn(reg)); - jit_unget_reg(reg); - } + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1) +ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xb6); - rx(r0, 0, r1, _NOREG, _SCL1); + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_uc(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0) +ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, _NOREG); - ic(0x0f); - ic(0xb6); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_uc(r0, rn(reg)); - jit_unget_reg(reg); - } + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1) +ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xbf); - rx(r0, 0, r1, _NOREG, _SCL1); + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_s(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0) +ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, _NOREG); - ic(0x0f); - ic(0xbf); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_s(r0, rn(reg)); - jit_unget_reg(reg); - } + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1) +ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xb7); - rx(r0, 0, r1, _NOREG, _SCL1); -} - -static void -_ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, _NOREG); - ic(0x0f); - ic(0xb7); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_us(r0, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_us(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } #if __X32 || !__X64_32 static void -_ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1) +ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1) { #if __X64 - rex(0, WIDE, r0, _NOREG, r1); - ic(0x63); + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x63); #else - ic(0x8b); + ic(_jit, 0x8b); #endif - rx(r0, 0, r1, _NOREG, _SCL1); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) +ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { + if (can_sign_extend_int_p(i0)) { #if __X64 - rex(0, WIDE, r0, _NOREG, _NOREG); - ic(0x63); + rex(_jit, 0, WIDE, r0, _NOREG, _NOREG); + ic(_jit, 0x63); #else - ic(0x8b); + ic(_jit, 0x8b); #endif - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_i(r0, rn(reg)); - jit_unget_reg(reg); - } + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_i(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } #endif #if __X64 static void -_ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) +ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1) { - rex(0, 0, r0, _NOREG, r1); - ic(0x63); - rx(r0, 0, r1, _NOREG, _SCL1); + rex(_jit, 0, 0, r0, _NOREG, r1); + ic(_jit, 0x63); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0) +ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 0, r0, _NOREG, _NOREG); - ic(0x63); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_ui(r0, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x63); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_ui(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } # if !__X64_32 static void -_ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1) +ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1) { - rex(0, 1, r0, _NOREG, r1); - ic(0x8b); - rx(r0, 0, r1, _NOREG, _SCL1); + rex(_jit, 0, 1, r0, _NOREG, r1); + ic(_jit, 0x8b); + rx(_jit, r0, 0, r1, _NOREG, _SCL1); } static void -_ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0) +ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 1, r0, _NOREG, _NOREG); - ic(0x8b); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldr_l(r0, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r0, _NOREG, _NOREG); + ic(_jit, 0x8b); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_l(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } # endif #endif static void -_ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - addr(r0, r1, r2); - ldr_c(r0, r0); + addr(_jit, r0, r1, r2); + ldr_c(r0, r0); #else - rex(0, WIDE, r0, r1, r2); - ic(0x0f); - ic(0xbe); - rx(r0, 0, r2, r1, _SCL1); + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, 0, r2, r1, _SCL1); #endif } static void -_ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xbe); - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_c(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbe); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_c(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - addr(r0, r1, r2); - ldr_uc(r0, r0); + addr(_jit, r0, r1, r2); + ldr_uc(_jit, r0, r0); #else - rex(0, WIDE, r0, r1, r2); - ic(0x0f); - ic(0xb6); - rx(r0, 0, r2, r1, _SCL1); + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, 0, r2, r1, _SCL1); #endif } static void -_ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xb6); - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_uc(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb6); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_uc(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - addr(r0, r1, r2); - ldr_s(r0, r0); + addr(_jit, r0, r1, r2); + ldr_s(_jit, r0, r0); #else - rex(0, WIDE, r0, r1, r2); - ic(0x0f); - ic(0xbf); - rx(r0, 0, r2, r1, _SCL1); + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, 0, r2, r1, _SCL1); #endif } static void -_ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xbf); - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_s(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xbf); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_s(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - addr(r0, r1, r2); - ldr_us(r0, r0); + addr(_jit, r0, r1, r2); + ldr_us(_jit, r0, r0); #else - rex(0, WIDE, r0, r1, r2); - ic(0x0f); - ic(0xb7); - rx(r0, 0, r2, r1, _SCL1); + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, 0, r2, r1, _SCL1); #endif } static void -_ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, WIDE, r0, _NOREG, r1); - ic(0x0f); - ic(0xb7); - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_us(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x0f); + ic(_jit, 0xb7); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_us(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } #if __X64 || !__X64_32 static void -_ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64 - rex(0, WIDE, r0, r1, r2); - ic(0x63); + rex(_jit, 0, WIDE, r0, r1, r2); + ic(_jit, 0x63); #else - ic(0x8b); + ic(_jit, 0x8b); #endif - rx(r0, 0, r2, r1, _SCL1); + rx(_jit, r0, 0, r2, r1, _SCL1); } static void -_ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { + if (can_sign_extend_int_p(i0)) { #if __X64 - rex(0, WIDE, r0, _NOREG, r1); - ic(0x63); + rex(_jit, 0, WIDE, r0, _NOREG, r1); + ic(_jit, 0x63); #else - ic(0x8b); + ic(_jit, 0x8b); #endif - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_i(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_i(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } #endif #if __X64 static void -_ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - addr(r0, r1, r2); - /* to avoid confusion with macro renames */ - _ldr_ui(_jit, r0, r0); + addr(_jit, r0, r1, r2); + /* to avoid confusion with macro renames */ + _ldr_ui(_jit, r0, r0); #else - rex(0, 0, r0, r1, r2); - ic(0x8b); - rx(r0, 0, r2, r1, _SCL1); + rex(_jit, 0, 0, r0, r1, r2); + ic(_jit, 0x8b); + rx(_jit, r0, 0, r2, r1, _SCL1); #endif } static void -_ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 0, r0, _NOREG, r1); - ic(0x8b); - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_ui(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r0, _NOREG, r1); + ic(_jit, 0x8b); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_ui(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } # if !__X64_32 static void -_ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - rex(0, 1, r0, r1, r2); - ic(0x8b); - rx(r0, 0, r2, r1, _SCL1); + rex(_jit, 0, 1, r0, r1, r2); + ic(_jit, 0x8b); + rx(_jit, r0, 0, r2, r1, _SCL1); } static void -_ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 1, r0, _NOREG, r1); - ic(0x8b); - rx(r0, i0, r1, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - ldxr_l(r0, r1, rn(reg)); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r0, _NOREG, r1); + ic(_jit, 0x8b); + rx(_jit, r0, i0, r1, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldxr_l(_jit, r0, r1, rn(reg)); + unget_temp_gpr(_jit); + } } # endif #endif static void -_str_c(jit_state_t *_jit, int32_t r0, int32_t r1) +str_c(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t reg; + if (reg8_p(r1)) { + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + rex(_jit, 0, 0, rn(reg), _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, rn(reg), 0, r0, _NOREG, _SCL1); + unget_temp_gpr(_jit); + } +} + +static void +sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + if (reg8_p(r0)) { + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x88); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r0); + rex(_jit, 0, 0, rn(reg), _NOREG, _NOREG); + ic(_jit, 0x88); + rx(_jit, rn(reg), i0, _NOREG, _NOREG, _SCL1); + unget_temp_gpr(_jit); + } + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_c(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +str_s(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ic(_jit, 0x66); + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + ic(_jit, 0x66); + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x89); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_s(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +str_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r0, _NOREG, _NOREG); + ic(_jit, 0x89); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_i(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +#if __X64 && !__X64_32 +static void +str_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + rex(_jit, 0, 1, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, 0, r0, _NOREG, _SCL1); +} + +static void +sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r0, _NOREG, _NOREG); + ic(_jit, 0x89); + rx(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_l(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} +#endif + +static void +stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ +#if __X64_32 + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r0, r1); + str_c(_jit, rn(reg), r2); + unget_temp_gpr(_jit); +#else + if (reg8_p(r2)) { + rex(_jit, 0, 0, r2, r1, r0); + ic(_jit, 0x88); + rx(_jit, r2, 0, r0, r1, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r2); + rex(_jit, 0, 0, rn(reg), r1, r0); + ic(_jit, 0x88); + rx(_jit, rn(reg), 0, r0, r1, _SCL1); + unget_temp_gpr(_jit); + } +#endif +} + +static void +stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) { if (reg8_p(r1)) { - rex(0, 0, r1, _NOREG, r0); - ic(0x88); - rx(r1, 0, r0, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - movr(rn(reg), r1); - rex(0, 0, rn(reg), _NOREG, r0); - ic(0x88); - rx(rn(reg), 0, r0, _NOREG, _SCL1); - jit_unget_reg(reg); + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movr(_jit, rn(reg), r1); + rex(_jit, 0, 0, rn(reg), _NOREG, r0); + ic(_jit, 0x88); + rx(_jit, rn(reg), i0, r0, _NOREG, _SCL1); + unget_temp_gpr(_jit); } + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_c(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } } static void -_sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0) +stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - if (reg8_p(r0)) { - rex(0, 0, r0, _NOREG, _NOREG); - ic(0x88); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - movr(rn(reg), r0); - rex(0, 0, rn(reg), _NOREG, _NOREG); - ic(0x88); - rx(rn(reg), i0, _NOREG, _NOREG, _SCL1); - jit_unget_reg(reg); - } - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - str_c(rn(reg), r0); - jit_unget_reg(reg); - } +#if __X64_32 + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r0, r1); + str_s(_jit, rn(reg), r2); + unget_temp_gpr(_jit); +#else + ic(_jit, 0x66); + rex(_jit, 0, 0, r2, r1, r0); + ic(_jit, 0x89); + rx(_jit, r2, 0, r0, r1, _SCL1); +#endif } static void -_str_s(jit_state_t *_jit, int32_t r0, int32_t r1) +stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { - ic(0x66); - rex(0, 0, r1, _NOREG, r0); - ic(0x89); - rx(r1, 0, r0, _NOREG, _SCL1); + if (can_sign_extend_int_p(i0)) { + ic(_jit, 0x66); + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_s(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } } static void -_sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0) +stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - ic(0x66); - rex(0, 0, r0, _NOREG, _NOREG); - ic(0x89); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - str_s(rn(reg), r0); - jit_unget_reg(reg); - } +#if __X64_32 + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r0, r1); + str_i(rn(reg), r2); + unget_temp_gpr(_jit); +#else + rex(_jit, 0, 0, r2, r1, r0); + ic(_jit, 0x89); + rx(_jit, r2, 0, r0, r1, _SCL1); +#endif } static void -_str_i(jit_state_t *_jit, int32_t r0, int32_t r1) +stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { - rex(0, 0, r1, _NOREG, r0); - ic(0x89); - rx(r1, 0, r0, _NOREG, _SCL1); -} - -static void -_sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 0, r0, _NOREG, _NOREG); - ic(0x89); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - str_i(rn(reg), r0); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 0, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_i(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } } #if __X64 && !__X64_32 static void -_str_l(jit_state_t *_jit, int32_t r0, int32_t r1) +stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { - rex(0, 1, r1, _NOREG, r0); - ic(0x89); - rx(r1, 0, r0, _NOREG, _SCL1); + rex(_jit, 0, 1, r2, r1, r0); + ic(_jit, 0x89); + rx(_jit, r2, 0, r0, r1, _SCL1); } static void -_sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0) +stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 1, r0, _NOREG, _NOREG); - ic(0x89); - rx(r0, i0, _NOREG, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - str_l(rn(reg), r0); - jit_unget_reg(reg); - } + if (can_sign_extend_int_p(i0)) { + rex(_jit, 0, 1, r1, _NOREG, r0); + ic(_jit, 0x89); + rx(_jit, r1, i0, r0, _NOREG, _SCL1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + stxr_l(_jit, rn(reg), r0, r1); + unget_temp_gpr(_jit); + } } #endif -static void -_stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +static jit_reloc_t +jccs(jit_state_t *_jit, int32_t code) { - int32_t reg; -#if __X64_32 - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - str_c(rn(reg), r2); - jit_unget_reg(reg); + ic(_jit, 0x70 | code); + return jit_reloc(_jit, JIT_RELOC_REL8, 1, 0, -_jit->pc.w); +} + +static jit_reloc_t +jcc(jit_state_t *_jit, int32_t code) +{ + ic(_jit, 0x0f); + ic(_jit, 0x80 | code); + return jit_reloc(_jit, JIT_RELOC_REL32, 2, 0, -_jit->pc.w); +} + +#define DEFINE_JUMPS(cc, CC, code) \ + static inline jit_reloc_t j##cc(jit_state_t *_jit) \ + { \ + return jcc(_jit, X86_CC_##CC); \ + } \ + static inline jit_reloc_t j##cc##s(jit_state_t *_jit) \ + { \ + return jccs(_jit, X86_CC_##CC); \ + } +FOR_EACH_CC(DEFINE_JUMPS) +#undef DEFINE_JUMPS + +static jit_reloc_t +jcr(jit_state_t *_jit, int32_t code, int32_t r0, int32_t r1) +{ + alur(_jit, X86_CMP, r0, r1); + return jcc(_jit, code); +} + +static jit_reloc_t +jci(jit_state_t *_jit, int32_t code, int32_t r0, jit_word_t i0) +{ + alui(_jit, X86_CMP, r0, i0); + return jcc(_jit, code); +} + +static jit_reloc_t +jci0(jit_state_t *_jit, int32_t code, int32_t r0) +{ + testr(_jit, r0, r0); + return jcc(_jit, code); +} + +static jit_reloc_t +bltr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_L, r0, r1); +} + +static jit_reloc_t +blti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_L, r0, i1); + else return jci0(_jit, X86_CC_S, r0); +} + +static jit_reloc_t +bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_B, r0, r1); +} + +static jit_reloc_t +blti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_B, r0, i1); + else return jci0(_jit, X86_CC_B, r0); +} + +static jit_reloc_t +bler(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_LE, r0, r1); +} + +static jit_reloc_t +blei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_LE, r0, i1); + else return jci0(_jit, X86_CC_LE, r0); +} + +static jit_reloc_t +bler_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_BE, r0, r1); +} + +static jit_reloc_t +blei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_BE, r0, i1); + else return jci0(_jit, X86_CC_BE, r0); +} + +static jit_reloc_t +beqr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_E, r0, r1); +} + +static jit_reloc_t +beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_E, r0, i1); + else return jci0(_jit, X86_CC_E, r0); +} + +static jit_reloc_t +bger(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_GE, r0, r1); +} + +static jit_reloc_t +bgei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_GE, r0, i1); + else return jci0(_jit, X86_CC_NS, r0); +} + +static jit_reloc_t +bger_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr (_jit, X86_CC_AE, r0, r1); +} + +static jit_reloc_t +bgei_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return jci (_jit, X86_CC_AE, r0, i1); +} + +static jit_reloc_t +bgtr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_G, r0, r1); +} + +static jit_reloc_t +bgti(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + return jci(_jit, X86_CC_G, r0, i1); +} + +static jit_reloc_t +bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_A, r0, r1); +} + +static jit_reloc_t +bgti_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_A, r0, i1); + else return jci0(_jit, X86_CC_NE, r0); +} + +static jit_reloc_t +bner(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + return jcr(_jit, X86_CC_NE, r0, r1); +} + +static jit_reloc_t +bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (i1) return jci (_jit, X86_CC_NE, r0, i1); + else return jci0(_jit, X86_CC_NE, r0); +} + +static jit_reloc_t +bmsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + testr(_jit, r0, r1); + return jnz(_jit); +} + +static jit_reloc_t +bmsi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_int_p(i1)) { + testi(_jit, r0, i1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + testr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } + return jnz(_jit); +} + +static jit_reloc_t +bmcr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + testr(_jit, r0, r1); + return jz(_jit); +} + +static jit_reloc_t +bmci(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_zero_extend_int_p(i1)) { + testi(_jit, r0, i1); + } else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + testr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } + return jz(_jit); +} + +static jit_reloc_t +boaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jo(_jit); +} + +static jit_reloc_t +boaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jo(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return boaddr(_jit, r0, rn(reg)); +} + +static jit_reloc_t +boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jc(_jit); +} + +static jit_reloc_t +boaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jc(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return boaddr_u(_jit, r0, rn(reg)); +} + +static jit_reloc_t +bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jno(_jit); +} + +static jit_reloc_t +bxaddi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jno(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return bxaddr(_jit, r0, rn(reg)); +} + +static jit_reloc_t +bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + iaddr(_jit, r0, r1); + return jnc(_jit); +} + +static jit_reloc_t +bxaddi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + iaddi(_jit, r0, i1); + return jnc(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return bxaddr_u(_jit, r0, rn(reg)); +} + +static jit_reloc_t +bosubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jo(_jit); +} + +static jit_reloc_t +bosubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jo(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return bosubr(_jit, r0, rn(reg)); +} + +static jit_reloc_t +bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jc(_jit); +} + +static jit_reloc_t +bosubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jc(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return bosubr_u(_jit, r0, rn(reg)); +} + +static jit_reloc_t +bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jno(_jit); +} + +static jit_reloc_t +bxsubi(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jno(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return bxsubr(_jit, r0, rn(reg)); +} + +static jit_reloc_t +bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + isubr(_jit, r0, r1); + return jnc(_jit); +} + +static jit_reloc_t +bxsubi_u(jit_state_t *_jit, int32_t r0, jit_word_t i1) +{ + if (can_sign_extend_int_p(i1)) { + isubi(_jit, r0, i1); + return jnc(_jit); + } + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i1); + unget_temp_gpr(_jit); + return bxsubr_u(_jit, r0, rn(reg)); +} + +static void +callr(jit_state_t *_jit, int32_t r0) +{ + rex(_jit, 0, 0, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + mrm(_jit, 0x03, 0x02, r7(r0)); +} + +static void +calli(jit_state_t *_jit, jit_word_t i0) +{ + if (__X64) + { + int32_t reg = get_temp_gpr(_jit); + jit_patch_there(_jit, mov_addr(_jit, rn(reg)), (void*)i0); + callr(_jit, rn(reg)); + unget_temp_gpr(_jit); + } + else + { + ic(_jit, 0xe8); + ii(_jit, i0 - (_jit->pc.w + 4)); + } +} + +static void +jmpr(jit_state_t *_jit, int32_t r0) +{ + rex(_jit, 0, WIDE, _NOREG, _NOREG, r0); + ic(_jit, 0xff); + mrm(_jit, 0x03, 0x04, r7(r0)); +} + +static void +jmpi(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + ic(_jit, 0xe9); + w = i0 - (_jit->pc.w + 4); + ii(_jit, w); +} + +static jit_reloc_t +jmp(jit_state_t *_jit) +{ + ic(_jit, 0xe9); + return jit_reloc(_jit, JIT_RELOC_REL32, 1, 0, -_jit->pc.w); +} + +static jit_reloc_t +jmpsi(jit_state_t *_jit) +{ + ic(_jit, 0xeb); + return jit_reloc(_jit, JIT_RELOC_REL8, 1, 0, -_jit->pc.w); +} + +static void +ret(jit_state_t *_jit) +{ + ic(_jit, 0xc3); +} + +static void +retr(jit_state_t *_jit, int32_t r0) +{ + movr(_jit, _RAX_REGNO, r0); + ret(_jit); +} + +static void +reti(jit_state_t *_jit, jit_word_t i0) +{ + movi(_jit, _RAX_REGNO, i0); + ret(_jit); +} + +static void +retval_c(jit_state_t *_jit, int32_t r0) +{ + extr_c(_jit, r0, rn(JIT_RET)); +} + +static void +retval_uc(jit_state_t *_jit, int32_t r0) +{ + extr_uc(_jit, r0, rn(JIT_RET)); +} + +static void +retval_s(jit_state_t *_jit, int32_t r0) +{ + extr_s(_jit, r0, rn(JIT_RET)); +} + +static void +retval_us(jit_state_t *_jit, int32_t r0) +{ + extr_us(_jit, r0, rn(JIT_RET)); +} + +static void +retval_i(jit_state_t *_jit, int32_t r0) +{ +#if __X32 || __X64_32 + movr(_jit, r0, rn(JIT_RET)); #else - if (reg8_p(r2)) { - rex(0, 0, r2, r1, r0); - ic(0x88); - rx(r2, 0, r0, r1, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - movr(rn(reg), r2); - rex(0, 0, rn(reg), r1, r0); - ic(0x88); - rx(rn(reg), 0, r0, r1, _SCL1); - jit_unget_reg(reg); - } + extr_i(_jit, r0, rn(JIT_RET)); #endif } -static void -_stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - if (reg8_p(r1)) { - rex(0, 0, r1, _NOREG, r0); - ic(0x88); - rx(r1, i0, r0, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_rg8); - movr(rn(reg), r1); - rex(0, 0, rn(reg), _NOREG, r0); - ic(0x88); - rx(rn(reg), i0, r0, _NOREG, _SCL1); - jit_unget_reg(reg); - } - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - stxr_c(rn(reg), r0, r1); - jit_unget_reg(reg); - } -} - -static void -_stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ -#if __X64_32 - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - str_s(rn(reg), r2); - jit_unget_reg(reg); -#else - ic(0x66); - rex(0, 0, r2, r1, r0); - ic(0x89); - rx(r2, 0, r0, r1, _SCL1); -#endif -} - -static void -_stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - ic(0x66); - rex(0, 0, r1, _NOREG, r0); - ic(0x89); - rx(r1, i0, r0, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - stxr_s(rn(reg), r0, r1); - jit_unget_reg(reg); - } -} - -static void -_stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ -#if __X64_32 - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - str_i(rn(reg), r2); - jit_unget_reg(reg); -#else - rex(0, 0, r2, r1, r0); - ic(0x89); - rx(r2, 0, r0, r1, _SCL1); -#endif -} - -static void -_stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 0, r1, _NOREG, r0); - ic(0x89); - rx(r1, i0, r0, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - stxr_i(rn(reg), r0, r1); - jit_unget_reg(reg); - } -} - #if __X64 && !__X64_32 static void -_stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +retval_ui(jit_state_t *_jit, int32_t r0) { - rex(0, 1, r2, r1, r0); - ic(0x89); - rx(r2, 0, r0, r1, _SCL1); + extr_ui(_jit, r0, rn(JIT_RET)); } static void -_stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +retval_l(jit_state_t *_jit, int32_t r0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 1, r1, _NOREG, r0); - ic(0x89); - rx(r1, i0, r0, _NOREG, _SCL1); - } - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - stxr_l(rn(reg), r0, r1); - jit_unget_reg(reg); - } + movr(_jit, r0, rn(JIT_RET)); } #endif - -static void -_jccs(jit_state_t *_jit, int32_t code, jit_word_t i0) -{ - jit_word_t w; - ic(0x70 | code); - w = i0 - (_jit->pc.w + 1); - ic(w); -} - -static void -_jcc(jit_state_t *_jit, int32_t code, jit_word_t i0) -{ - jit_word_t w; - ic(0x0f); - ic(0x80 | code); - w = i0 - (_jit->pc.w + 4); - ii(w); -} - -static void -_jcr(jit_state_t *_jit, - int32_t code, jit_word_t i0, int32_t r0, int32_t r1) -{ - alur(X86_CMP, r0, r1); - jcc(code, i0); -} - -static void -_jci(jit_state_t *_jit, - int32_t code, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - alui(X86_CMP, r0, i1); - jcc(code, i0); -} - -static void -_jci0(jit_state_t *_jit, int32_t code, jit_word_t i0, int32_t r0) -{ - testr(r0, r0); - jcc(code, i0); -} - -static jit_word_t -_bltr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jcr(X86_CC_L, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_blti(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_L, i0, r0, i1); - else jci0(X86_CC_S, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bltr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jcr(X86_CC_B, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_blti_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_B, i0, r0, i1); - else jci0(X86_CC_B, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bler(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_LE, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_blei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_LE, i0, r0, i1); - else jci0(X86_CC_LE, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bler_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_BE, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_blei_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_BE, i0, r0, i1); - else jci0(X86_CC_BE, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_beqr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_E, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_beqi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_E, i0, r0, i1); - else jci0(X86_CC_E, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bger(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_GE, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_bgei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_GE, i0, r0, i1); - else jci0(X86_CC_NS, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bger_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_AE, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_bgei_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_AE, i0, r0, i1); - else jmpi(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bgtr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jcr(X86_CC_G, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_bgti(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - jci(X86_CC_G, i0, r0, i1); - return (_jit->pc.w); -} - -static jit_word_t -_bgtr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jcr(X86_CC_A, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_bgti_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_A, i0, r0, i1); - else jci0(X86_CC_NE, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bner(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jcr(X86_CC_NE, i0, r0, r1); - return (_jit->pc.w); -} - -static jit_word_t -_bnei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - if (i1) jci (X86_CC_NE, i0, r0, i1); - else jci0(X86_CC_NE, i0, r0); - return (_jit->pc.w); -} - -static jit_word_t -_bmsr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - testr(r0, r1); - jnz(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bmsi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_zero_extend_int_p(i1)) - testi(r0, i1); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i1); - testr(r0, rn(reg)); - jit_unget_reg(reg); - } - jnz(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bmcr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - testr(r0, r1); - jz(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bmci(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_zero_extend_int_p(i1)) - testi(r0, i1); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i1); - testr(r0, rn(reg)); - jit_unget_reg(reg); - } - jz(i0); - return (_jit->pc.w); -} - -static jit_word_t -_boaddr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - iaddr(r0, r1); - jo(i0); - return (_jit->pc.w); -} - -static jit_word_t -_boaddi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - iaddi(r0, i1); - jo(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (boaddr(i0, r0, rn(reg))); -} - -static jit_word_t -_boaddr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - iaddr(r0, r1); - jc(i0); - return (_jit->pc.w); -} - -static jit_word_t -_boaddi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - iaddi(r0, i1); - jc(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (boaddr_u(i0, r0, rn(reg))); -} - -static jit_word_t -_bxaddr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - iaddr(r0, r1); - jno(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bxaddi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - iaddi(r0, i1); - jno(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (bxaddr(i0, r0, rn(reg))); -} - -static jit_word_t -_bxaddr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - iaddr(r0, r1); - jnc(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bxaddi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - iaddi(r0, i1); - jnc(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (bxaddr_u(i0, r0, rn(reg))); -} - -static jit_word_t -_bosubr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - isubr(r0, r1); - jo(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bosubi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - isubi(r0, i1); - jo(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (bosubr(i0, r0, rn(reg))); -} - -static jit_word_t -_bosubr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - isubr(r0, r1); - jc(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bosubi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - isubi(r0, i1); - jc(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (bosubr_u(i0, r0, rn(reg))); -} - -static jit_word_t -_bxsubr(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - isubr(r0, r1); - jno(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bxsubi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - isubi(r0, i1); - jno(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (bxsubr(i0, r0, rn(reg))); -} - -static jit_word_t -_bxsubr_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - isubr(r0, r1); - jnc(i0); - return (_jit->pc.w); -} - -static jit_word_t -_bxsubi_u(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1) -{ - int32_t reg; - if (can_sign_extend_int_p(i1)) { - isubi(r0, i1); - jnc(i0); - return (_jit->pc.w); - } - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - jit_unget_reg(reg); - return (bxsubr_u(i0, r0, rn(reg))); -} - -static void -_callr(jit_state_t *_jit, int32_t r0) -{ - rex(0, 0, _NOREG, _NOREG, r0); - ic(0xff); - mrm(0x03, 0x02, r7(r0)); -} - -static jit_word_t -_calli(jit_state_t *_jit, jit_word_t i0) -{ - jit_word_t word; -#if __X64 - int32_t reg; - - reg = jit_get_reg(jit_class_gpr); - word = movi_p(rn(reg), i0); - callr(rn(reg)); - jit_unget_reg(reg); -#else - jit_word_t w; - ic(0xe8); - w = i0 - (_jit->pc.w + 4); - ii(w); - word = _jit->pc.w; -#endif - return (word); -} - -static void -_jmpr(jit_state_t *_jit, int32_t r0) -{ - rex(0, WIDE, _NOREG, _NOREG, r0); - ic(0xff); - mrm(0x03, 0x04, r7(r0)); -} - -static jit_word_t -_jmpi(jit_state_t *_jit, jit_word_t i0) -{ - jit_word_t w; - ic(0xe9); - w = i0 - (_jit->pc.w + 4); - ii(w); - return (_jit->pc.w); -} - -static void -_jmpsi(jit_state_t *_jit, uint8_t i0) -{ - ic(0xeb); - ic(i0); -} - -static void -_vastart(jit_state_t *_jit, int32_t r0) -{ -#if __X32 || __CYGWIN__ - assert(_jitc->function->self.call & jit_call_varargs); - addi(r0, _RBP_REGNO, _jitc->function->self.size); -#else - int32_t reg; - - assert(_jitc->function->self.call & jit_call_varargs); - - /* Return jit_va_list_t in the register argument */ - addi(r0, _RBP_REGNO, _jitc->function->vaoff); - reg = jit_get_reg(jit_class_gpr); - - /* Initialize gp offset in the save area. */ - movi(rn(reg), _jitc->function->vagp); - stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg)); - - /* Initialize fp offset in the save area. */ - movi(rn(reg), _jitc->function->vafp); - stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg)); - - /* Initialize overflow pointer to the first stack argument. */ - addi(rn(reg), _RBP_REGNO, _jitc->function->self.size); - stxi(offsetof(jit_va_list_t, over), r0, rn(reg)); - - /* Initialize register save area pointer. */ - addi(rn(reg), r0, first_gp_offset); - stxi(offsetof(jit_va_list_t, save), r0, rn(reg)); - - jit_unget_reg(reg); -#endif -} - -#define UNIMPLEMENTED() abort() - -static void -_patch_at(jit_state_t *_jit, jit_node_t *node, - jit_word_t instr, jit_word_t label) -{ - UNIMPLEMENTED(); -} - -# if __X64 && !defined(HAVE_FFSL) -static int -ffsl(long i) -{ - int bit; -# if __CYGWIN__ - /* Bug workaround */ - if ((int)i == (int)0x80000000) - bit = 32; - else -# endif - if ((bit = ffs((int)i)) == 0) { - if ((bit = ffs((int)((unsigned long)i >> 32)))) - bit += 32; - } - return (bit); -} -# endif -#endif diff --git a/jit/x86-sse.c b/jit/x86-sse.c index db5f63ecd..21730517d 100644 --- a/jit/x86-sse.c +++ b/jit/x86-sse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2017 Free Software Foundation, Inc. + * Copyright (C) 2012-2017, 2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -14,1556 +14,1023 @@ * License for more details. * * Authors: - * Paulo Cesar Pereira de Andrade + * Paulo Cesar Pereira de Andrade */ -#if PROTO -# if __X32 -# define sse_address_p(i0) 1 -# else -# if __X64_32 -# define sse_address_p(i0) ((jit_word_t)(i0) >= 0) -# else -# define sse_address_p(i0) can_sign_extend_int_p(i0) -# endif -# endif -# define _XMM6_REGNO 6 -# define _XMM7_REGNO 7 -# define _XMM8_REGNO 8 -# define _XMM9_REGNO 9 -# define _XMM10_REGNO 10 -# define _XMM11_REGNO 11 -# define _XMM12_REGNO 12 -# define _XMM13_REGNO 13 -# define _XMM14_REGNO 14 -# define _XMM15_REGNO 15 -#define X86_SSE_MOV 0x10 -#define X86_SSE_MOV1 0x11 -#define X86_SSE_MOVLP 0x12 -#define X86_SSE_MOVHP 0x16 -#define X86_SSE_MOVA 0x28 -#define X86_SSE_CVTIS 0x2a -#define X86_SSE_CVTTSI 0x2c -#define X86_SSE_CVTSI 0x2d -#define X86_SSE_UCOMI 0x2e -#define X86_SSE_COMI 0x2f -#define X86_SSE_ROUND 0x3a -#define X86_SSE_SQRT 0x51 -#define X86_SSE_RSQRT 0x52 -#define X86_SSE_RCP 0x53 -#define X86_SSE_AND 0x54 -#define X86_SSE_ANDN 0x55 -#define X86_SSE_OR 0x56 -#define X86_SSE_XOR 0x57 -#define X86_SSE_ADD 0x58 -#define X86_SSE_MUL 0x59 -#define X86_SSE_CVTSD 0x5a -#define X86_SSE_CVTDT 0x5b -#define X86_SSE_SUB 0x5c -#define X86_SSE_MIN 0x5d -#define X86_SSE_DIV 0x5e -#define X86_SSE_MAX 0x5f -#define X86_SSE_X2G 0x6e -#define X86_SSE_EQB 0x74 -#define X86_SSE_EQW 0x75 -#define X86_SSE_EQD 0x76 -#define X86_SSE_G2X 0x7e -#define X86_SSE_MOV2 0xd6 -# define sser(c,r0,r1) _sser(_jit,c,r0,r1) -static void _sser(jit_state_t*,int32_t,int32_t,int32_t); -# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1) -static void _ssexr(jit_state_t*,int32_t,int32_t,int32_t,int32_t); -# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i) -static void _ssexi(jit_state_t*,int32_t,int32_t,int32_t,int32_t); -# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1) -# define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1) -# define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1) -# define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1) -# define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1) -# define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1) -# define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1) -# define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1) -# define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1) -# define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1) -# define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1) -# define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1) -# if __X64 -# define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1) -# define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1) -# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1) -# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1) -# else -# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1) -# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1) -# endif -# define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1) -# define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1) -# define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1) -# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1) -# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1) -# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1) -# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1) -# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1) -# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0) -# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0) -# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0) -# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0) -# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1) -# if __X64 && !__X64_32 -# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1) -static void -_sselxr(jit_state_t*, int32_t, int32_t, int32_t, int32_t); -# else -# define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1) -# endif -# define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd) -# define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd) -# define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd) -# define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs) -# define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs) -static void -_ssexrx(jit_state_t*, int32_t, int32_t, int32_t, - int32_t, int32_t, int32_t, int32_t); -# define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2) -static void _sse_addr_f(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0) -static void _sse_addi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2) -static void _sse_addr_d(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0) -static void _sse_addi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2) -static void _sse_subr_f(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0) -static void _sse_subi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2) -static void _sse_subr_d(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0) -static void _sse_subi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1) -# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0) -static void _sse_rsbi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1) -# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0) -static void _sse_rsbi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2) -static void _sse_mulr_f(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0) -static void _sse_muli_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2) -static void _sse_mulr_d(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0) -static void _sse_muli_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2) -static void _sse_divr_f(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0) -static void _sse_divi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2) -static void _sse_divr_d(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0) -static void _sse_divi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1) -static void _sse_absr_f(jit_state_t*,int32_t,int32_t); -# define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1) -static void _sse_absr_d(jit_state_t*,int32_t,int32_t); -# define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1) -static void _sse_negr_f(jit_state_t*,int32_t,int32_t); -# define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1) -static void _sse_negr_d(jit_state_t*,int32_t,int32_t); -# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1) -# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1) -# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2) -# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2) -static void -_ssecmp(jit_state_t*, jit_bool_t, int32_t, - int32_t, int32_t, int32_t); -#define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1) -static void _sse_movr_f(jit_state_t*, int32_t, int32_t); -#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0) -static void _sse_movi_f(jit_state_t*, int32_t, jit_float32_t*); -# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0) -static void _sse_lti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2) -# define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0) -static void _sse_lei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2) -# define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0) -static void _sse_eqi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2) -static void _sse_eqr_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0) -static void _sse_gei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1) -# define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0) -static void _sse_gti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1) -# define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0) -static void _sse_nei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2) -static void _sse_ner_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0) -static void _sse_unlti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1) -# define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0) -static void _sse_unlei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2) -# define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0) -static void _sse_uneqi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -static void _sse_unler_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2) -static void _sse_uneqr_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0) -static void _sse_ungei_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2) -static void _sse_unger_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0) -static void _sse_ungti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2) -# define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0) -static void _sse_ltgti_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2) -static void _sse_ltgtr_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0) -static void _sse_ordi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1) -# define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0) -static void _sse_unordi_f(jit_state_t*,int32_t,int32_t,jit_float32_t*); -# define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1) -# define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0) -# define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0) -static void _sse_ldi_f(jit_state_t*, int32_t, jit_word_t); -# define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2) -static void _sse_ldxr_f(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0) -static void _sse_ldxi_f(jit_state_t*, int32_t, int32_t, jit_word_t); -# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1) -# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0) -static void _sse_sti_f(jit_state_t*, jit_word_t,int32_t); -# define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2) -static void _sse_stxr_f(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1) -static void _sse_stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1) -static jit_word_t -_sse_blti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1) -static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1) -static jit_word_t -_sse_blei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1) -static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1) -static jit_word_t -_sse_beqi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1) -static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bgei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bgti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1) -static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bnei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bunlti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1) -static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bunlei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1) -static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1) -static jit_word_t -_sse_buneqi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1) -static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bungei_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bungti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bltgti_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bordi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -# define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1) -static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1) -static jit_word_t -_sse_bunordi_f(jit_state_t*, jit_word_t, int32_t, jit_float32_t*); -#define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1) -static void _sse_movr_d(jit_state_t*, int32_t, int32_t); -#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0) -static void _sse_movi_d(jit_state_t*, int32_t, jit_float64_t*); -# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2) -# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0) -static void _sse_lti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2) -# define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0) -static void _sse_lei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2) -static void _sse_eqr_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0) -static void _sse_eqi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1) -# define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0) -static void _sse_gei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1) -# define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0) -static void _sse_gti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2) -static void _sse_ner_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0) -static void _sse_nei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1) -# define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0) -static void _sse_unlti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2) -static void _sse_unler_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0) -static void _sse_unlei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2) -static void _sse_uneqr_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0) -static void _sse_uneqi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2) -static void _sse_unger_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0) -static void _sse_ungei_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2) -# define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0) -static void _sse_ungti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2) -static void _sse_ltgtr_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0) -static void _sse_ltgti_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1) -# define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0) -static void _sse_ordi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1) -# define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0) -static void _sse_unordi_d(jit_state_t*,int32_t,int32_t,jit_float64_t*); -# define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0) -# define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0) -static void _sse_ldi_d(jit_state_t*, int32_t, jit_word_t); -# define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2) -static void _sse_ldxr_d(jit_state_t*, int32_t, int32_t, int32_t); -# define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0) -static void _sse_ldxi_d(jit_state_t*, int32_t, int32_t, jit_word_t); -# define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1) -# define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1) -# define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0) -static void _sse_sti_d(jit_state_t*, jit_word_t,int32_t); -# define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2) -static void _sse_stxr_d(jit_state_t*,int32_t,int32_t,int32_t); -# define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1) -static void _sse_stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t); -static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1) -static jit_word_t -_sse_blti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1) -static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1) -static jit_word_t -_sse_blei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1) -static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1) -static jit_word_t -_sse_beqi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1) -static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bgei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1) -static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bgti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1) -static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bnei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1) -static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bunlti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1) -static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bunlei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1) -static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1) -static jit_word_t -_sse_buneqi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1) -static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bungei_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1) -static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bungti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1) -static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bltgti_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1) -static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bordi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -# define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1) -static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,int32_t,int32_t); -# define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1) -static jit_word_t -_sse_bunordi_d(jit_state_t*, jit_word_t, int32_t, jit_float64_t*); -#endif +#define _XMM0_REGNO 0 +#define _XMM1_REGNO 1 +#define _XMM2_REGNO 2 +#define _XMM3_REGNO 3 +#define _XMM4_REGNO 4 +#define _XMM5_REGNO 5 +#define _XMM6_REGNO 6 +#define _XMM7_REGNO 7 +#define _XMM8_REGNO 8 +#define _XMM9_REGNO 9 +#define _XMM10_REGNO 10 +#define _XMM11_REGNO 11 +#define _XMM12_REGNO 12 +#define _XMM13_REGNO 13 +#define _XMM14_REGNO 14 +#define _XMM15_REGNO 15 +#define X86_SSE_MOV 0x10 +#define X86_SSE_MOV1 0x11 +#define X86_SSE_MOVLP 0x12 +#define X86_SSE_MOVHP 0x16 +#define X86_SSE_MOVA 0x28 +#define X86_SSE_CVTIS 0x2a +#define X86_SSE_CVTTSI 0x2c +#define X86_SSE_CVTSI 0x2d +#define X86_SSE_UCOMI 0x2e +#define X86_SSE_COMI 0x2f +#define X86_SSE_ROUND 0x3a +#define X86_SSE_SQRT 0x51 +#define X86_SSE_RSQRT 0x52 +#define X86_SSE_RCP 0x53 +#define X86_SSE_AND 0x54 +#define X86_SSE_ANDN 0x55 +#define X86_SSE_OR 0x56 +#define X86_SSE_XOR 0x57 +#define X86_SSE_ADD 0x58 +#define X86_SSE_MUL 0x59 +#define X86_SSE_CVTSD 0x5a +#define X86_SSE_CVTDT 0x5b +#define X86_SSE_SUB 0x5c +#define X86_SSE_MIN 0x5d +#define X86_SSE_DIV 0x5e +#define X86_SSE_MAX 0x5f +#define X86_SSE_X2G 0x6e +#define X86_SSE_EQB 0x74 +#define X86_SSE_EQW 0x75 +#define X86_SSE_EQD 0x76 +#define X86_SSE_G2X 0x7e +#define X86_SSE_MOV2 0xd6 -#if CODE -# define fpr_opi(name, type, size) \ -static void \ -_sse_##name##i_##type(jit_state_t *_jit, \ - int32_t r0, int32_t r1, \ - jit_float##size##_t *i0) \ -{ \ - int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \ - assert(jit_sse_reg_p(reg)); \ - sse_movi_##type(rn(reg), i0); \ - sse_##name##r_##type(r0, r1, rn(reg)); \ - jit_unget_reg(reg); \ -} -# define fpr_bopi(name, type, size) \ -static jit_word_t \ -_sse_b##name##i_##type(jit_state_t *_jit, \ - jit_word_t i0, int32_t r0, \ - jit_float##size##_t *i1) \ -{ \ - jit_word_t word; \ - int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \ - jit_class_nospill); \ - assert(jit_sse_reg_p(reg)); \ - sse_movi_##type(rn(reg), i1); \ - word = sse_b##name##r_##type(i0, r0, rn(reg)); \ - jit_unget_reg(reg); \ - return (word); \ -} -# define fopi(name) fpr_opi(name, f, 32) -# define fbopi(name) fpr_bopi(name, f, 32) -# define dopi(name) fpr_opi(name, d, 64) -# define dbopi(name) fpr_bopi(name, d, 64) static void -_sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1) +sser(jit_state_t *_jit, int32_t c, int32_t r0, int32_t r1) { - rex(0, 0, r0, 0, r1); - ic(0x0f); - ic(c); - mrm(0x03, r7(r0), r7(r1)); + rex(_jit, 0, 0, r0, 0, r1); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(r0), r7(r1)); } static void -_ssexr(jit_state_t *_jit, int32_t p, int32_t c, - int32_t r0, int32_t r1) +ssexr(jit_state_t *_jit, int32_t p, int32_t c, + int32_t r0, int32_t r1) { - ic(p); - rex(0, 0, r0, 0, r1); - ic(0x0f); - ic(c); - mrm(0x03, r7(r0), r7(r1)); + ic(_jit, p); + rex(_jit, 0, 0, r0, 0, r1); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(r0), r7(r1)); } static void -_ssexi(jit_state_t *_jit, int32_t c, int32_t r0, - int32_t m, int32_t i) +ssexi(jit_state_t *_jit, int32_t c, int32_t r0, + int32_t m, int32_t i) { - ic(0x66); - rex(0, 0, 0, 0, r0); - ic(0x0f); - ic(c); - mrm(0x03, r7(m), r7(r0)); - ic(i); + ic(_jit, 0x66); + rex(_jit, 0, 0, 0, 0, r0); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(m), r7(r0)); + ic(_jit, i); } -#if __X64 static void -_sselxr(jit_state_t *_jit, int32_t p, int32_t c, - int32_t r0, int32_t r1) +sselxr(jit_state_t *_jit, int32_t p, int32_t c, int32_t r0, int32_t r1) { - ic(p); - rex(0, 1, r0, 0, r1); - ic(0x0f); - ic(c); - mrm(0x03, r7(r0), r7(r1)); + if (__X64 && !__X64_32) { + ic(_jit, p); + rex(_jit, 0, 1, r0, 0, r1); + ic(_jit, 0x0f); + ic(_jit, c); + mrm(_jit, 0x03, r7(r0), r7(r1)); + } else { + ssexr(_jit, p, c, r0, r1); + } } -#endif static void -_ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md, +ssexrx(jit_state_t *_jit, int32_t px, int32_t code, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd) { - ic(px); - rex(0, 0, rd, ri, rb); - ic(0x0f); - ic(code); - rx(rd, md, rb, ri, ms); + ic(_jit, px); + rex(_jit, 0, 0, rd, ri, rb); + ic(_jit, 0x0f); + ic(_jit, code); + rx(_jit, rd, md, rb, ri, ms); } static void -_sse_addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +movdlxr(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 == r1) - addssr(r0, r2); - else if (r0 == r2) - addssr(r0, r1); - else { - sse_movr_f(r0, r1); - addssr(r0, r2); - } + ssexr(_jit, 0x66, X86_SSE_X2G, r0, r1); } - -fopi(add) - static void -_sse_addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +movdqxr(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 == r1) - addsdr(r0, r2); - else if (r0 == r2) - addsdr(r0, r1); - else { - sse_movr_d(r0, r1); - addsdr(r0, r2); - } -} - -dopi(add) - -static void -_sse_subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (r0 == r1) - subssr(r0, r2); - else if (r0 == r2) { - reg = jit_get_reg(jit_class_fpr|jit_class_xpr); - sse_movr_f(rn(reg), r0); - sse_movr_f(r0, r1); - subssr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - sse_movr_f(r0, r1); - subssr(r0, r2); - } -} - -fopi(sub) - -static void -_sse_subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (r0 == r1) - subsdr(r0, r2); - else if (r0 == r2) { - reg = jit_get_reg(jit_class_fpr|jit_class_xpr); - sse_movr_d(rn(reg), r0); - sse_movr_d(r0, r1); - subsdr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - sse_movr_d(r0, r1); - subsdr(r0, r2); - } -} - -dopi(sub) - -fopi(rsb) - -dopi(rsb) - -static void -_sse_mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r0 == r1) - mulssr(r0, r2); - else if (r0 == r2) - mulssr(r0, r1); - else { - sse_movr_f(r0, r1); - mulssr(r0, r2); - } -} - -fopi(mul) - -static void -_sse_mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r0 == r1) - mulsdr(r0, r2); - else if (r0 == r2) - mulsdr(r0, r1); - else { - sse_movr_d(r0, r1); - mulsdr(r0, r2); - } -} - -dopi(mul) - -static void -_sse_divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (r0 == r1) - divssr(r0, r2); - else if (r0 == r2) { - reg = jit_get_reg(jit_class_fpr|jit_class_xpr); - sse_movr_f(rn(reg), r0); - sse_movr_f(r0, r1); - divssr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - sse_movr_f(r0, r1); - divssr(r0, r2); - } -} - -fopi(div) - -static void -_sse_divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - int32_t reg; - if (r0 == r1) - divsdr(r0, r2); - else if (r0 == r2) { - reg = jit_get_reg(jit_class_fpr|jit_class_xpr); - sse_movr_d(rn(reg), r0); - sse_movr_d(r0, r1); - divsdr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - sse_movr_d(r0, r1); - divsdr(r0, r2); - } -} - -dopi(div) - -static void -_sse_absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - int32_t reg; - if (r0 == r1) { - reg = jit_get_reg(jit_class_fpr|jit_class_xpr); - pcmpeqlr(rn(reg), rn(reg)); - psrl(rn(reg), 1); - andpsr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - pcmpeqlr(r0, r0); - psrl(r0, 1); - andpsr(r0, r1); - } + sselxr(_jit, 0x66, X86_SSE_X2G, r0, r1); } static void -_sse_absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +movssmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd) { - int32_t reg; - if (r0 == r1) { - reg = jit_get_reg(jit_class_fpr|jit_class_xpr); - pcmpeqlr(rn(reg), rn(reg)); - psrq(rn(reg), 1); - andpdr(r0, rn(reg)); - jit_unget_reg(reg); - } - else { - pcmpeqlr(r0, r0); - psrq(r0, 1); - andpdr(r0, r1); - } + ssexrx(_jit, 0xf3, X86_SSE_MOV, md, rb, ri, ms, rd); +} +static void +movsdmr(jit_state_t *_jit, int32_t md, int32_t rb, int32_t ri, int32_t ms, int32_t rd) +{ + ssexrx(_jit, 0xf2, X86_SSE_MOV, md, rb, ri, ms, rd); +} +static void +movssrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms) +{ + ssexrx(_jit, 0xf3, X86_SSE_MOV1, md, mb, mi, ms, rs); +} +static void +movsdrm(jit_state_t *_jit, int32_t rs, int32_t md, int32_t mb, int32_t mi, int32_t ms) +{ + ssexrx(_jit, 0xf2, X86_SSE_MOV1, md, mb, mi, ms, rs); } static void -_sse_negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t freg, ireg; - ireg = jit_get_reg(jit_class_gpr); - imovi(rn(ireg), 0x80000000); - if (r0 == r1) { - freg = jit_get_reg(jit_class_fpr|jit_class_xpr); - movdlxr(rn(freg), rn(ireg)); - xorpsr(r0, rn(freg)); - jit_unget_reg(freg); - } - else { - movdlxr(r0, rn(ireg)); - xorpsr(r0, r1); - } - jit_unget_reg(ireg); + if (r0 != r1) + ssexr(_jit, 0xf3, X86_SSE_MOV, r0, r1); } static void -_sse_negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { - int32_t freg, ireg; - ireg = jit_get_reg(jit_class_gpr); - imovi(rn(ireg), 0x80000000); - if (r0 == r1) { - freg = jit_get_reg(jit_class_fpr|jit_class_xpr); - movdlxr(rn(freg), rn(ireg)); - pslq(rn(freg), 32); - xorpdr(r0, rn(freg)); - jit_unget_reg(freg); - } - else { - movdlxr(r0, rn(ireg)); - pslq(r0, 32); - xorpdr(r0, r1); - } - jit_unget_reg(ireg); + if (r0 != r1) + ssexr(_jit, 0xf2, X86_SSE_MOV, r0, r1); } static void -_ssecmp(jit_state_t *_jit, jit_bool_t d, int32_t code, - int32_t r0, int32_t r1, int32_t r2) +pushr_d(jit_state_t *_jit, int32_t r0) { - jit_bool_t rc; - int32_t reg; - if ((rc = reg8_p(r0))) - reg = r0; - else { - reg = _RAX_REGNO; - movr(r0, reg); - } - ixorr(reg, reg); - if (d) - ucomisdr(r2, r1); - else - ucomissr(r2, r1); - cc(code, reg); - if (!rc) - xchgr(r0, reg); + int32_t tmp = get_temp_gpr(_jit); + movdqxr(_jit, rn(tmp), r0); + pushr(_jit, rn(tmp)); + unget_temp_gpr(_jit); } static void -_sse_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +popr_d(jit_state_t *_jit, int32_t r0) { - if (r0 != r1) - ssexr(0xf3, X86_SSE_MOV, r0, r1); + int32_t tmp = get_temp_gpr(_jit); + popr(_jit, rn(tmp)); + ssexr(_jit, 0x66, X86_SSE_G2X, r0, rn(tmp)); + unget_temp_gpr(_jit); +} + +static int32_t +get_temp_xpr(jit_state_t *_jit) +{ + /* Reserve XMM7 for the JIT. */ + ASSERT(!_jit->temp_fpr_saved); + _jit->temp_fpr_saved = 1; + return _XMM7_REGNO; } static void -_sse_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t *i0) +unget_temp_xpr(jit_state_t *_jit) { - union { - int32_t i; - jit_float32_t f; - } data; - int32_t reg; - jit_bool_t ldi; + ASSERT(_jit->temp_fpr_saved); + _jit->temp_fpr_saved = 0; +} - data.f = *i0; - if (data.f == 0.0 && !(data.i & 0x80000000)) - xorpsr(r0, r0); - else { - ldi = !_jitc->no_data; +static void +addssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_ADD, r0, r1); +} +static void +addsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_ADD, r0, r1); +} +static void +subssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_SUB, r0, r1); +} +static void +subsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_SUB, r0, r1); +} +static void +mulssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_MUL, r0, r1); +} +static void +mulsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_MUL, r0, r1); +} +static void +divssr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_DIV, r0, r1); +} +static void +divsdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_DIV, r0, r1); +} +static void +andpsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sser(_jit, X86_SSE_AND, r0, r1); +} +static void +andpdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_AND, r0, r1); +} +static void +truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1); +} +static void +truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1); +} #if __X64 - /* if will allocate a register for offset, just use immediate */ - if (ldi && !sse_address_p(i0)) - ldi = 0; +static void +truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0xf3, X86_SSE_CVTTSI, r0, r1); +} +static void +truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sselxr(_jit, 0xf2, X86_SSE_CVTTSI, r0, r1); +} #endif - if (ldi) - sse_ldi_f(r0, (jit_word_t)i0); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), data.i); - movdlxr(r0, rn(reg)); - jit_unget_reg(reg); - } - } -} - -fopi(lt) -fopi(le) - static void -_sse_eqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +extr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - jit_bool_t rc; - int32_t reg; - jit_word_t jp_code; - if ((rc = reg8_p(r0))) - reg = r0; - else { - reg = _RAX_REGNO; - movr(r0, _RAX_REGNO); - } - ixorr(reg, reg); - ucomissr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; - cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); - if (!rc) - xchgr(r0, reg); + sselxr(_jit, 0xf3, X86_SSE_CVTIS, r0, r1); } - -fopi(eq) -fopi(ge) -fopi(gt) - static void -_sse_ner_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +extr_d(jit_state_t *_jit, int32_t r0, int32_t r1) { - jit_bool_t rc; - int32_t reg; - jit_word_t jp_code; - if ((rc = reg8_p(r0))) - reg = r0; - else { - reg = _RAX_REGNO; - movr(r0, _RAX_REGNO); - } - imovi(reg, 1); - ucomissr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; - cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); - if (!rc) - xchgr(r0, reg); -} - -fopi(ne) -fopi(unlt) - -static void -_sse_unler_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - ssecmpf(X86_CC_NA, r0, r2, r1); -} - -fopi(unle) - -static void -_sse_uneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - ssecmpf(X86_CC_E, r0, r1, r2); -} - -fopi(uneq) - -static void -_sse_unger_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - ssecmpf(X86_CC_NA, r0, r1, r2); -} - -fopi(unge) -fopi(ungt) - -static void -_sse_ltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - ixorr(r0, r0); - else - ssecmpf(X86_CC_NE, r0, r1, r2); -} - -fopi(ltgt) -fopi(ord) -fopi(unord) - -static void -_sse_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) -{ - int32_t reg; - if (sse_address_p(i0)) - movssmr(i0, _NOREG, _NOREG, _SCL1, r0); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - sse_ldr_f(r0, rn(reg)); - jit_unget_reg(reg); - } + sselxr(_jit, 0xf2, X86_SSE_CVTIS, r0, r1); } static void -_sse_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1) { -#if __X64_32 - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - sse_ldr_f(r0, rn(reg)); - jit_unget_reg(reg); -#else - movssmr(0, r1, r2, _SCL1, r0); -#endif + ssexr(_jit, 0xf3, X86_SSE_CVTSD, r0, r1); +} +static void +extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_CVTSD, r0, r1); +} +static void +ucomissr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sser(_jit, X86_SSE_UCOMI, r0, r1); +} +static void +ucomisdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_UCOMI, r0, r1); +} +static void +xorpsr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + sser(_jit, X86_SSE_XOR, r0, r1); +} +static void +xorpdr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_XOR, r0, r1); +} +static void +pcmpeqlr(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0x66, X86_SSE_EQD, r0, r1); +} +static void +psrl(jit_state_t *_jit, int32_t r0, int32_t i0) +{ + ssexi(_jit, 0x72, r0, 0x02, i0); +} +static void +psrq(jit_state_t *_jit, int32_t r0, int32_t i0) +{ + ssexi(_jit, 0x73, r0, 0x02, i0); +} +static void +pslq(jit_state_t *_jit, int32_t r0, int32_t i0) +{ + ssexi(_jit, 0x73, r0, 0x06, i0); +} +static void +sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf3, X86_SSE_SQRT, r0, r1); +} +static void +sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ssexr(_jit, 0xf2, X86_SSE_SQRT, r0, r1); +} +static void +ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movssmr(_jit, 0, r1, _NOREG, _SCL1, r0); +} +static void +str_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movssrm(_jit, r1, 0, r0, _NOREG, _SCL1); +} +static void +ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movsdmr(_jit, 0, r1, _NOREG, _SCL1, r0); +} +static void +str_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + movsdrm(_jit, r1, 0, r0, _NOREG, _SCL1); } static void -_sse_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) - movssmr(i0, r1, _NOREG, _SCL1, r0); - else { - reg = jit_get_reg(jit_class_gpr); -#if __X64_32 - addi(rn(reg), r1, i0); - sse_ldr_f(r0, rn(reg)); -#else - movi(rn(reg), i0); - sse_ldxr_f(r0, r1, rn(reg)); -#endif - jit_unget_reg(reg); - } + union { + int32_t i; + jit_float32_t f; + } data; + + data.f = i0; + if (data.f == 0.0 && !(data.i & 0x80000000)) + xorpsr(_jit, r0, r0); + else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), data.i); + movdlxr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } } static void -_sse_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) +movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0) { - int32_t reg; - if (sse_address_p(i0)) - movssrm(r0, i0, _NOREG, _NOREG, _SCL1); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - sse_str_f(rn(reg), r0); - jit_unget_reg(reg); - } -} + union { + int32_t ii[2]; + jit_word_t w; + jit_float64_t d; + } data; -static void -_sse_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ -#if __X64_32 - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - sse_str_f(rn(reg), r2); - jit_unget_reg(reg); -#else - movssrm(r2, 0, r0, r1, _SCL1); -#endif -} - -static void -_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - int32_t reg; - if (can_sign_extend_int_p(i0)) - movssrm(r1, i0, r0, _NOREG, _SCL1); - else { - reg = jit_get_reg(jit_class_gpr); -#if __X64_32 - addi(rn(reg), r0, i0); - sse_str_f(rn(reg), r1); -#else - movi(rn(reg), i0); - sse_stxr_f(rn(reg), r0, r1); -#endif - jit_unget_reg(reg); - } -} - -static jit_word_t -_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r1, r0); - ja(i0); - return (_jit->pc.w); -} -fbopi(lt) - -static jit_word_t -_sse_bler_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r1, r0); - jae(i0); - return (_jit->pc.w); -} -fbopi(le) - -static jit_word_t -_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jit_word_t jp_code; - ucomissr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - je(i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); -} -fbopi(eq) - -static jit_word_t -_sse_bger_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r0, r1); - jae(i0); - return (_jit->pc.w); -} -fbopi(ge) - -static jit_word_t -_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r0, r1); - ja(i0); - return (_jit->pc.w); -} -fbopi(gt) - -static jit_word_t -_sse_bner_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - jit_word_t jp_code; - jit_word_t jz_code; - ucomissr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); -} -fbopi(ne) - -static jit_word_t -_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r0, r1); - jnae(i0); - return (_jit->pc.w); -} -fbopi(unlt) - -static jit_word_t -_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) - jmpi(i0); - else { - ucomissr(r0, r1); - jna(i0); - } - return (_jit->pc.w); -} -fbopi(unle) - -static jit_word_t -_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) - jmpi(i0); - else { - ucomissr(r0, r1); - je(i0); - } - return (_jit->pc.w); -} -fbopi(uneq) - -static jit_word_t -_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - if (r0 == r1) - jmpi(i0); - else { - ucomissr(r1, r0); - jna(i0); - } - return (_jit->pc.w); -} -fbopi(unge) - -static jit_word_t -_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r1, r0); - jnae(i0); - return (_jit->pc.w); -} -fbopi(ungt) - -static jit_word_t -_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r0, r1); - jne(i0); - return (_jit->pc.w); -} -fbopi(ltgt) - -static jit_word_t -_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r0, r1); - jnp(i0); - return (_jit->pc.w); -} -fbopi(ord) - -static jit_word_t -_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) -{ - ucomissr(r0, r1); - jp(i0); - return (_jit->pc.w); -} -fbopi(unord) - -dopi(lt) -dopi(le) - -static void -_sse_eqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - jit_bool_t rc; - int32_t reg; - jit_word_t jp_code; - if ((rc = reg8_p(r0))) - reg = r0; - else { - reg = _RAX_REGNO; - movr(r0, _RAX_REGNO); - } - ixorr(reg, reg); - ucomisdr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; - cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); - if (!rc) - xchgr(r0, reg); -} - -dopi(eq) -dopi(ge) -dopi(gt) - -static void -_sse_ner_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - jit_bool_t rc; - int32_t reg; - jit_word_t jp_code; - if ((rc = reg8_p(r0))) - reg = r0; - else { - reg = _RAX_REGNO; - movr(r0, _RAX_REGNO); - } - imovi(reg, 1); - ucomisdr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; - cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); - if (!rc) - xchgr(r0, reg); -} - -dopi(ne) -dopi(unlt) - -static void -_sse_unler_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - ssecmpd(X86_CC_NA, r0, r2, r1); -} - -dopi(unle) - -static void -_sse_uneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - ssecmpd(X86_CC_E, r0, r1, r2); -} - -dopi(uneq) - -static void -_sse_unger_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - movi(r0, 1); - else - ssecmpd(X86_CC_NA, r0, r1, r2); -} - -dopi(unge) -dopi(ungt) - -static void -_sse_ltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) -{ - if (r1 == r2) - ixorr(r0, r0); - else - ssecmpd(X86_CC_NE, r0, r1, r2); -} - -dopi(ltgt) -dopi(ord) -dopi(unord) - -static void -_sse_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - if (r0 != r1) - ssexr(0xf2, X86_SSE_MOV, r0, r1); -} - -static void -_sse_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t *i0) -{ - union { - int32_t ii[2]; - jit_word_t w; - jit_float64_t d; - } data; - int32_t reg; - jit_bool_t ldi; - - data.d = *i0; - if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) - xorpdr(r0, r0); - else { - ldi = !_jitc->no_data; -#if __X64 - /* if will allocate a register for offset, just use immediate */ - if (ldi && !sse_address_p(i0)) - ldi = 0; -#endif - if (ldi) - sse_ldi_d(r0, (jit_word_t)i0); - else { - reg = jit_get_reg(jit_class_gpr); + data.d = i0; + if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) + xorpdr(_jit, r0, r0); + else { + int32_t reg = get_temp_gpr(_jit); #if __X64 && !__X64_32 - movi(rn(reg), data.w); - movdqxr(r0, rn(reg)); - jit_unget_reg(reg); + movi(_jit, rn(reg), data.w); + movdqxr(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); #else - movi(rn(reg), data.ii[0]); - stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); - movi(rn(reg), data.ii[1]); - stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); - jit_unget_reg(reg); - sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); + movi(_jit, rn(reg), data.ii[0]); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + movi(_jit, rn(reg), data.ii[1]); + stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); + unget_temp_gpr(_jit); + ldxi_d(_jit, r0, _RBP_REGNO, CVT_OFFSET); #endif - } - } + } } static void -_sse_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +retval_f(jit_state_t *_jit, int32_t r0) { - int32_t reg; - if (sse_address_p(i0)) - movsdmr(i0, _NOREG, _NOREG, _SCL1, r0); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - sse_ldr_d(r0, rn(reg)); - jit_unget_reg(reg); - } +#if __X64 + movr_f(_jit, r0, rn(JIT_FRET)); +#endif } static void -_sse_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +retval_d(jit_state_t *_jit, int32_t r0) +{ +#if __X64 + movr_d(_jit, r0, rn(JIT_FRET)); +#endif +} + +static void +retr_f(jit_state_t *_jit, int32_t u) +{ + movr_f(_jit, rn(JIT_FRET), u); + ret(_jit); +} + +static void +retr_d(jit_state_t *_jit, int32_t u) +{ + movr_d(_jit, rn(JIT_FRET), u); + ret(_jit); +} + +static void +addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + addssr(_jit, r0, r2); + else if (r0 == r2) + addssr(_jit, r0, r1); + else { + movr_f(_jit, r0, r1); + addssr(_jit, r0, r2); + } +} + +static void +addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + addsdr(_jit, r0, r2); + else if (r0 == r2) + addsdr(_jit, r0, r1); + else { + movr_d(_jit, r0, r1); + addsdr(_jit, r0, r2); + } +} + +static void +subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + subssr(_jit, r0, r2); + else if (r0 == r2) { + int32_t reg = get_temp_xpr(_jit); + movr_f(_jit, rn(reg), r0); + movr_f(_jit, r0, r1); + subssr(_jit, r0, rn(reg)); + unget_temp_xpr(_jit); + } + else { + movr_f(_jit, r0, r1); + subssr(_jit, r0, r2); + } +} + +static void +subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + subsdr(_jit, r0, r2); + else if (r0 == r2) { + int32_t reg = get_temp_xpr(_jit); + movr_d(_jit, rn(reg), r0); + movr_d(_jit, r0, r1); + subsdr(_jit, r0, rn(reg)); + unget_temp_xpr(_jit); + } + else { + movr_d(_jit, r0, r1); + subsdr(_jit, r0, r2); + } +} + +static void +mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + mulssr(_jit, r0, r2); + else if (r0 == r2) + mulssr(_jit, r0, r1); + else { + movr_f(_jit, r0, r1); + mulssr(_jit, r0, r2); + } +} + +static void +mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + mulsdr(_jit, r0, r2); + else if (r0 == r2) + mulsdr(_jit, r0, r1); + else { + movr_d(_jit, r0, r1); + mulsdr(_jit, r0, r2); + } +} + +static void +divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + divssr(_jit, r0, r2); + else if (r0 == r2) { + int32_t reg = get_temp_xpr(_jit); + movr_f(_jit, rn(reg), r0); + movr_f(_jit, r0, r1); + divssr(_jit, r0, rn(reg)); + unget_temp_xpr(_jit); + } + else { + movr_f(_jit, r0, r1); + divssr(_jit, r0, r2); + } +} + +static void +divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ + if (r0 == r1) + divsdr(_jit, r0, r2); + else if (r0 == r2) { + int32_t reg = get_temp_xpr(_jit); + movr_d(_jit, rn(reg), r0); + movr_d(_jit, r0, r1); + divsdr(_jit, r0, rn(reg)); + unget_temp_xpr(_jit); + } + else { + movr_d(_jit, r0, r1); + divsdr(_jit, r0, r2); + } +} + +static void +absr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 == r1) { + int32_t reg = get_temp_xpr(_jit); + pcmpeqlr(_jit, rn(reg), rn(reg)); + psrl(_jit, rn(reg), 1); + andpsr(_jit, r0, rn(reg)); + unget_temp_xpr(_jit); + } + else { + pcmpeqlr(_jit, r0, r0); + psrl(_jit, r0, 1); + andpsr(_jit, r0, r1); + } +} + +static void +absr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + if (r0 == r1) { + int32_t reg = get_temp_xpr(_jit); + pcmpeqlr(_jit, rn(reg), rn(reg)); + psrq(_jit, rn(reg), 1); + andpdr(_jit, r0, rn(reg)); + unget_temp_xpr(_jit); + } + else { + pcmpeqlr(_jit, r0, r0); + psrq(_jit, r0, 1); + andpdr(_jit, r0, r1); + } +} + +static void +negr_f(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t ireg = get_temp_gpr(_jit); + imovi(_jit, rn(ireg), 0x80000000); + if (r0 == r1) { + int32_t freg = get_temp_xpr(_jit); + movdlxr(_jit, rn(freg), rn(ireg)); + xorpsr(_jit, r0, rn(freg)); + unget_temp_xpr(_jit); + } else { + movdlxr(_jit, r0, rn(ireg)); + xorpsr(_jit, r0, r1); + } + unget_temp_gpr(_jit); +} + +static void +negr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + int32_t ireg = get_temp_gpr(_jit); + imovi(_jit, rn(ireg), 0x80000000); + if (r0 == r1) { + int32_t freg = get_temp_xpr(_jit); + movdlxr(_jit, rn(freg), rn(ireg)); + pslq(_jit, rn(freg), 32); + xorpdr(_jit, r0, rn(freg)); + unget_temp_xpr(_jit); + } else { + movdlxr(_jit, r0, rn(ireg)); + pslq(_jit, r0, 32); + xorpdr(_jit, r0, r1); + } + unget_temp_gpr(_jit); +} + +#if __X32 +# define sse_address_p(i0) 1 +#elif __X64_32 +# define sse_address_p(i0) ((jit_word_t)(i0) >= 0) +# else +# define sse_address_p(i0) can_sign_extend_int_p(i0) +#endif + +static void +ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (sse_address_p(i0)) + movssmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0); + else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_f(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - sse_ldr_d(r0, rn(reg)); - jit_unget_reg(reg); + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r1, r2); + ldr_f(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); #else - movsdmr(0, r1, r2, _SCL1, r0); + movssmr(_jit, 0, r1, r2, _SCL1, r0); #endif } static void -_sse_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) { - int32_t reg; - if (can_sign_extend_int_p(i0)) - movsdmr(i0, r1, _NOREG, _SCL1, r0); - else { - reg = jit_get_reg(jit_class_gpr); + if (can_sign_extend_int_p(i0)) + movssmr(_jit, i0, r1, _NOREG, _SCL1, r0); + else { + int32_t reg = get_temp_gpr(_jit); #if __X64_32 - addi(rn(reg), r1, i0); - sse_ldr_d(r0, rn(reg)); + addi(rn(reg), r1, i0); + ldr_f(_jit, r0, rn(reg)); #else - movi(rn(reg), i0); - sse_ldxr_d(r0, r1, rn(reg)); + movi(_jit, rn(reg), i0); + ldxr_f(_jit, r0, r1, rn(reg)); #endif - jit_unget_reg(reg); - } + unget_temp_gpr(_jit); + } } static void -_sse_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0) { - int32_t reg; - if (sse_address_p(i0)) - movsdrm(r0, i0, _NOREG, _NOREG, _SCL1); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - sse_str_d(rn(reg), r0); - jit_unget_reg(reg); - } + if (sse_address_p(i0)) + movssrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_f(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } } static void -_sse_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) { #if __X64_32 - int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r0, r1); - sse_str_d(rn(reg), r2); - jit_unget_reg(reg); + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r0, r1); + str_f(_jit, rn(reg), r2); + unget_temp_gpr(_jit); #else - movsdrm(r2, 0, r0, r1, _SCL1); + movssrm(_jit, r2, 0, r0, r1, _SCL1); #endif } static void -_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) { - int32_t reg; - if (can_sign_extend_int_p(i0)) - movsdrm(r1, i0, r0, _NOREG, _SCL1); - else { - reg = jit_get_reg(jit_class_gpr); + if (can_sign_extend_int_p(i0)) + movssrm(_jit, r1, i0, r0, _NOREG, _SCL1); + else { + int32_t reg = get_temp_gpr(_jit); #if __X64_32 - addi(rn(reg), r0, i0); - sse_str_d(rn(reg), r1); + addi(rn(reg), r0, i0); + str_f(_jit, rn(reg), r1); #else - movi(rn(reg), i0); - sse_stxr_f(rn(reg), r0, r1); + movi(_jit, rn(reg), i0); + stxr_f(_jit, rn(reg), r0, r1); #endif - jit_unget_reg(reg); - } + unget_temp_gpr(_jit); + } } -static jit_word_t -_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r1, r0); - ja(i0); - return (_jit->pc.w); + ucomissr(_jit, r1, r0); + return ja(_jit); } -dbopi(lt) -static jit_word_t -_sse_bler_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bler_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r1, r0); - jae(i0); - return (_jit->pc.w); + ucomissr(_jit, r1, r0); + return jae(_jit); } -dbopi(le) -static jit_word_t -_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - jit_word_t jp_code; - ucomisdr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - je(i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t ret = je(_jit); + jit_patch_here(_jit, pos); + return ret; } -dbopi(eq) -static jit_word_t -_sse_bger_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bger_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r0, r1); - jae(i0); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return jae(_jit); } -dbopi(ge) -static jit_word_t -_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r0, r1); - ja(i0); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return ja(_jit); } -dbopi(gt) -static jit_word_t -_sse_bner_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bner_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - jit_word_t jp_code; - jit_word_t jz_code; - ucomisdr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t zero = jzs(_jit); + jit_patch_here(_jit, pos); + jit_reloc_t ret = jmp(_jit); + jit_patch_here(_jit, zero); + return ret; } -dbopi(ne) -static jit_word_t -_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r0, r1); - jnae(i0); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return jnae(_jit); } -dbopi(unlt) -static jit_word_t -_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 == r1) - jmpi(i0); - else { - ucomisdr(r0, r1); - jna(i0); - } - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return jna(_jit); } -dbopi(unle) -static jit_word_t -_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 == r1) - jmpi(i0); - else { - ucomisdr(r0, r1); - je(i0); - } - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return je(_jit); } -dbopi(uneq) -static jit_word_t -_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - if (r0 == r1) - jmpi(i0); - else { - ucomisdr(r1, r0); - jna(i0); - } - return (_jit->pc.w); + ucomissr(_jit, r1, r0); + return jna(_jit); } -dbopi(unge) -static jit_word_t -_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r1, r0); - jnae(i0); - return (_jit->pc.w); + ucomissr(_jit, r1, r0); + return jnae(_jit); } -dbopi(ungt) -static jit_word_t -_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r0, r1); - jne(i0); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return jne(_jit); } -dbopi(ltgt) -static jit_word_t -_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r0, r1); - jnp(i0); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return jnp(_jit); } -dbopi(ord) -static jit_word_t -_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +static jit_reloc_t +bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1) { - ucomisdr(r0, r1); - jp(i0); - return (_jit->pc.w); + ucomissr(_jit, r0, r1); + return jp(_jit); } -dbopi(unord) -# undef fopi -# undef fbopi -# undef bopi -# undef dbopi -# undef fpr_bopi -# undef fpr_opi + +static void +ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0) +{ + if (sse_address_p(i0)) + movsdmr(_jit, i0, _NOREG, _NOREG, _SCL1, r0); + else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + ldr_d(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); + } +} + +static void +ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ +#if __X64_32 + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r1, r2); + ldr_d(_jit, r0, rn(reg)); + unget_temp_gpr(_jit); +#else + movsdmr(_jit, 0, r1, r2, _SCL1, r0); #endif +} + +static void +ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0) +{ + if (can_sign_extend_int_p(i0)) + movsdmr(_jit, i0, r1, _NOREG, _SCL1, r0); + else { + int32_t reg = get_temp_gpr(_jit); +#if __X64_32 + addi(rn(reg), r1, i0); + ldr_d(_jit, r0, rn(reg)); +#else + movi(_jit, rn(reg), i0); + ldxr_d(_jit, r0, r1, rn(reg)); +#endif + unget_temp_gpr(_jit); + } +} + +static void +sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0) +{ + if (sse_address_p(i0)) + movsdrm(_jit, r0, i0, _NOREG, _NOREG, _SCL1); + else { + int32_t reg = get_temp_gpr(_jit); + movi(_jit, rn(reg), i0); + str_d(_jit, rn(reg), r0); + unget_temp_gpr(_jit); + } +} + +static void +stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2) +{ +#if __X64_32 + int32_t reg = get_temp_gpr(_jit); + addr(_jit, rn(reg), r0, r1); + str_d(_jit, rn(reg), r2); + unget_temp_gpr(_jit); +#else + movsdrm(_jit, r2, 0, r0, r1, _SCL1); +#endif +} + +static void +stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1) +{ + if (can_sign_extend_int_p(i0)) + movsdrm(_jit, r1, i0, r0, _NOREG, _SCL1); + else { + int32_t reg = get_temp_gpr(_jit); +#if __X64_32 + addi(rn(reg), r0, i0); + str_d(_jit, rn(reg), r1); +#else + movi(_jit, rn(reg), i0); + stxr_f(_jit, rn(reg), r0, r1); +#endif + unget_temp_gpr(_jit); + } +} + +static jit_reloc_t +bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return ja(_jit); +} + +static jit_reloc_t +bler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return jae(_jit); +} + +static jit_reloc_t +beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t ret = je(_jit); + jit_patch_here(_jit, pos); + return ret; +} + +static jit_reloc_t +bger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jae(_jit); +} + +static jit_reloc_t +bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return ja(_jit); +} + +static jit_reloc_t +bner_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + jit_reloc_t pos = jps(_jit); + jit_reloc_t zero = jzs(_jit); + jit_patch_here(_jit, pos); + jit_reloc_t ret = jmp(_jit); + jit_patch_here(_jit, zero); + return ret; +} + +static jit_reloc_t +bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jnae(_jit); +} + +static jit_reloc_t +bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jna(_jit); +} + +static jit_reloc_t +buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return je(_jit); +} + +static jit_reloc_t +bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return jna(_jit); +} + +static jit_reloc_t +bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r1, r0); + return jnae(_jit); +} + +static jit_reloc_t +bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jne(_jit); +} + +static jit_reloc_t +bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jnp(_jit); +} + +static jit_reloc_t +bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1) +{ + ucomisdr(_jit, r0, r1); + return jp(_jit); +} diff --git a/jit/x86.c b/jit/x86.c index 1e34b23e9..1b617372a 100644 --- a/jit/x86.c +++ b/jit/x86.c @@ -14,46 +14,46 @@ * License for more details. * * Authors: - * Paulo Cesar Pereira de Andrade + * Paulo Cesar Pereira de Andrade */ #if __X32 -# define jit_arg_reg_p(i) 0 -# define jit_arg_f_reg_p(i) 0 -# define stack_framesize 20 -# define stack_adjust 12 -# define CVT_OFFSET -12 -# define REAL_WORDSIZE 4 -# define va_gp_increment 4 -# define va_fp_increment 8 +# define jit_arg_reg_p(i) 0 +# define jit_arg_f_reg_p(i) 0 +# define stack_framesize 20 +# define stack_adjust 12 +# define CVT_OFFSET -12 +# define REAL_WORDSIZE 4 +# define va_gp_increment 4 +# define va_fp_increment 8 #else -# if __CYGWIN__ -# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) -# define jit_arg_f_reg_p(i) jit_arg_reg_p(i) -# define stack_framesize 152 -# define va_fp_increment 8 -# else -# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) -# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) -# define stack_framesize 56 -# define first_gp_argument rdi -# define first_gp_offset offsetof(jit_va_list_t, rdi) -# define first_gp_from_offset(gp) ((gp) / 8) -# define last_gp_argument r9 -# define va_gp_max_offset \ - (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8) -# define first_fp_argument xmm0 -# define first_fp_offset offsetof(jit_va_list_t, xmm0) -# define last_fp_argument xmm7 -# define va_fp_max_offset \ - (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16) -# define va_fp_increment 16 -# define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16) -# endif -# define va_gp_increment 8 -# define stack_adjust 8 -# define CVT_OFFSET -8 -# define REAL_WORDSIZE 8 +# if __CYGWIN__ +# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) +# define jit_arg_f_reg_p(i) jit_arg_reg_p(i) +# define stack_framesize 152 +# define va_fp_increment 8 +# else +# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) +# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +# define stack_framesize 56 +# define first_gp_argument rdi +# define first_gp_offset offsetof(jit_va_list_t, rdi) +# define first_gp_from_offset(gp) ((gp) / 8) +# define last_gp_argument r9 +# define va_gp_max_offset \ + (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8) +# define first_fp_argument xmm0 +# define first_fp_offset offsetof(jit_va_list_t, xmm0) +# define last_fp_argument xmm7 +# define va_fp_max_offset \ + (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16) +# define va_fp_increment 16 +# define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16) +# endif +# define va_gp_increment 8 +# define stack_adjust 8 +# define CVT_OFFSET -8 +# define REAL_WORDSIZE 8 #endif /* @@ -63,2197 +63,381 @@ typedef jit_pointer_t jit_va_list_t; #else typedef struct jit_va_list { - int32_t gpoff; - int32_t fpoff; - jit_pointer_t over; - jit_pointer_t save; - /* Declared explicitly as int64 for the x32 abi */ - int64_t rdi; - int64_t rsi; - int64_t rdx; - int64_t rcx; - int64_t r8; - int64_t r9; - jit_float64_t xmm0; - jit_float64_t _up0; - jit_float64_t xmm1; - jit_float64_t _up1; - jit_float64_t xmm2; - jit_float64_t _up2; - jit_float64_t xmm3; - jit_float64_t _up3; - jit_float64_t xmm4; - jit_float64_t _up4; - jit_float64_t xmm5; - jit_float64_t _up5; - jit_float64_t xmm6; - jit_float64_t _up6; - jit_float64_t xmm7; - jit_float64_t _up7; + int32_t gpoff; + int32_t fpoff; + jit_pointer_t over; + jit_pointer_t save; + /* Declared explicitly as int64 for the x32 abi */ + int64_t rdi; + int64_t rsi; + int64_t rdx; + int64_t rcx; + int64_t r8; + int64_t r9; + jit_float64_t xmm0; + jit_float64_t _up0; + jit_float64_t xmm1; + jit_float64_t _up1; + jit_float64_t xmm2; + jit_float64_t _up2; + jit_float64_t xmm3; + jit_float64_t _up3; + jit_float64_t xmm4; + jit_float64_t _up4; + jit_float64_t xmm5; + jit_float64_t _up5; + jit_float64_t xmm6; + jit_float64_t _up6; + jit_float64_t xmm7; + jit_float64_t _up7; } jit_va_list_t; #endif -/* - * Prototypes - */ -#define sse_from_x87_f(r0, r1) _sse_from_x87_f(_jit, r0, r1) -static void _sse_from_x87_f(jit_state_t*,int32_t,int32_t); -#define sse_from_x87_d(r0, r1) _sse_from_x87_d(_jit, r0, r1) -static void _sse_from_x87_d(jit_state_t*,int32_t,int32_t); -#define x87_from_sse_f(r0, r1) _x87_from_sse_f(_jit, r0, r1) -static void _x87_from_sse_f(jit_state_t*,int32_t,int32_t); -#define x87_from_sse_d(r0, r1) _x87_from_sse_d(_jit, r0, r1) -static void _x87_from_sse_d(jit_state_t*,int32_t,int32_t); - -#define PROTO 1 -# include "x86-cpu.c" -# include "x86-sse.c" -# include "x86-x87.c" -#undef PROTO - -/* - * Initialization - */ -jit_cpu_t jit_cpu; +jit_cpu_t jit_cpu; static const jit_register_t _rvs[] = { #if __X32 - { rc(gpr) | rc(rg8) | 0, "%eax" }, - { rc(gpr) | rc(rg8) | 1, "%ecx" }, - { rc(gpr) | rc(rg8) | 2, "%edx" }, - { rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" }, - { rc(sav) | rc(gpr) | 6, "%esi" }, - { rc(sav) | rc(gpr) | 7, "%edi" }, - { rc(sav) | 4, "%esp" }, - { rc(sav) | 5, "%ebp" }, - { rc(xpr) | rc(fpr) | 0, "%xmm0" }, - { rc(xpr) | rc(fpr) | 1, "%xmm1" }, - { rc(xpr) | rc(fpr) | 2, "%xmm2" }, - { rc(xpr) | rc(fpr) | 3, "%xmm3" }, - { rc(xpr) | rc(fpr) | 4, "%xmm4" }, - { rc(xpr) | rc(fpr) | 5, "%xmm5" }, - { rc(xpr) | rc(fpr) | 6, "%xmm6" }, - { rc(xpr) | rc(fpr) | 7, "%xmm7" }, - { rc(fpr) | 0, "st(0)" }, - { rc(fpr) | 1, "st(1)" }, - { rc(fpr) | 2, "st(2)" }, - { rc(fpr) | 3, "st(3)" }, - { rc(fpr) | 4, "st(4)" }, - { rc(fpr) | 5, "st(5)" }, - { rc(fpr) | 6, "st(6)" }, - { rc(fpr) | 7, "st(7)" }, + { rc(gpr) | rc(rg8) | 0, "%eax" }, + { rc(gpr) | rc(rg8) | 1, "%ecx" }, + { rc(gpr) | rc(rg8) | 2, "%edx" }, + { rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" }, + { rc(sav) | rc(gpr) | 6, "%esi" }, + { rc(sav) | rc(gpr) | 7, "%edi" }, + { rc(sav) | 4, "%esp" }, + { rc(sav) | 5, "%ebp" }, + { rc(xpr) | rc(fpr) | 0, "%xmm0" }, + { rc(xpr) | rc(fpr) | 1, "%xmm1" }, + { rc(xpr) | rc(fpr) | 2, "%xmm2" }, + { rc(xpr) | rc(fpr) | 3, "%xmm3" }, + { rc(xpr) | rc(fpr) | 4, "%xmm4" }, + { rc(xpr) | rc(fpr) | 5, "%xmm5" }, + { rc(xpr) | rc(fpr) | 6, "%xmm6" }, + { rc(xpr) | rc(fpr) | 7, "%xmm7" }, +#elif __CYGWIN__ + { rc(gpr) | rc(rg8) | 0, "%rax" }, + { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" }, + { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" }, + { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" }, + { rc(sav) | rc(gpr) | 7, "%rdi" }, + { rc(sav) | rc(gpr) | 6, "%rsi" }, + { rc(sav) | rc(gpr) | 12, "%r12" }, + { rc(sav) | rc(gpr) | 13, "%r13" }, + { rc(sav) | rc(gpr) | 14, "%r14" }, + { rc(sav) | rc(gpr) | 15, "%r15" }, + { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" }, + { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" }, + { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" }, + { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" }, + { rc(sav) | 4, "%rsp" }, + { rc(sav) | 5, "%rbp" }, + { rc(xpr) | rc(fpr) | 4, "%xmm4" }, + { rc(xpr) | rc(fpr) | 5, "%xmm5" }, + { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" }, + { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" }, + { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" }, + { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" }, + { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" }, + { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" }, + { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" }, + { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" }, + { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" }, + { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" }, + { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" }, + { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, + { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, + { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, #else -# if __CYGWIN__ - { rc(gpr) | rc(rg8) | 0, "%rax" }, - { rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" }, - { rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" }, - { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" }, - { rc(sav) | rc(gpr) | 7, "%rdi" }, - { rc(sav) | rc(gpr) | 6, "%rsi" }, - { rc(sav) | rc(gpr) | 12, "%r12" }, - { rc(sav) | rc(gpr) | 13, "%r13" }, - { rc(sav) | rc(gpr) | 14, "%r14" }, - { rc(sav) | rc(gpr) | 15, "%r15" }, - { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" }, - { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" }, - { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" }, - { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" }, - { rc(sav) | 4, "%rsp" }, - { rc(sav) | 5, "%rbp" }, - { rc(xpr) | rc(fpr) | 4, "%xmm4" }, - { rc(xpr) | rc(fpr) | 5, "%xmm5" }, - { rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" }, - { rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" }, - { rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" }, - { rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" }, - { rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" }, - { rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" }, - { rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" }, - { rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" }, - { rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" }, - { rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" }, - { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" }, - { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, - { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, - { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, -#else - /* %rax is a pseudo flag argument for varargs functions */ - { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" }, - { rc(gpr) | rc(rg8) | 10, "%r10" }, - { rc(gpr) | rc(rg8) | 11, "%r11" }, - { rc(gpr) | rc(rg8) | 12, "%r12" }, - { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" }, - { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" }, - { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" }, - { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" }, - { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" }, - { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" }, - { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" }, - { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" }, - { rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" }, - { rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" }, - { rc(sav) | 4, "%rsp" }, - { rc(sav) | 5, "%rbp" }, - { rc(xpr) | rc(fpr) | 8, "%xmm8" }, - { rc(xpr) | rc(fpr) | 9, "%xmm9" }, - { rc(xpr) | rc(fpr) | 10, "%xmm10" }, - { rc(xpr) | rc(fpr) | 11, "%xmm11" }, - { rc(xpr) | rc(fpr) | 12, "%xmm12" }, - { rc(xpr) | rc(fpr) | 13, "%xmm13" }, - { rc(xpr) | rc(fpr) | 14, "%xmm14" }, - { rc(xpr) | rc(fpr) | 15, "%xmm15" }, - { rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" }, - { rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" }, - { rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" }, - { rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" }, - { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" }, - { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, - { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, - { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, -# endif - { rc(fpr) | 0, "st(0)" }, - { rc(fpr) | 1, "st(1)" }, - { rc(fpr) | 2, "st(2)" }, - { rc(fpr) | 3, "st(3)" }, - { rc(fpr) | 4, "st(4)" }, - { rc(fpr) | 5, "st(5)" }, - { rc(fpr) | 6, "st(6)" }, - { rc(fpr) | 7, "st(7)" }, + /* %rax is a pseudo flag argument for varargs functions */ + { rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" }, + { rc(gpr) | rc(rg8) | 10, "%r10" }, + { rc(gpr) | rc(rg8) | 11, "%r11" }, + { rc(gpr) | rc(rg8) | 12, "%r12" }, + { rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" }, + { rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" }, + { rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" }, + { rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" }, + { rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" }, + { rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" }, + { rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" }, + { rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" }, + { rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" }, + { rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" }, + { rc(sav) | 4, "%rsp" }, + { rc(sav) | 5, "%rbp" }, + { rc(xpr) | rc(fpr) | 8, "%xmm8" }, + { rc(xpr) | rc(fpr) | 9, "%xmm9" }, + { rc(xpr) | rc(fpr) | 10, "%xmm10" }, + { rc(xpr) | rc(fpr) | 11, "%xmm11" }, + { rc(xpr) | rc(fpr) | 12, "%xmm12" }, + { rc(xpr) | rc(fpr) | 13, "%xmm13" }, + { rc(xpr) | rc(fpr) | 14, "%xmm14" }, + { rc(xpr) | rc(fpr) | 15, "%xmm15" }, + { rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" }, + { rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" }, + { rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" }, + { rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" }, + { rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" }, + { rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" }, + { rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" }, + { rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" }, #endif - { _NOREG, "" }, + { _NOREG, "" }, }; -/* - * Implementation - */ -void +#include "x86-cpu.c" +#include "x86-sse.c" + +jit_bool_t jit_get_cpu(void) { - union { - struct { - uint32_t sse3 : 1; - uint32_t pclmulqdq : 1; - uint32_t dtes64 : 1; /* amd reserved */ - uint32_t monitor : 1; - uint32_t ds_cpl : 1; /* amd reserved */ - uint32_t vmx : 1; /* amd reserved */ - uint32_t smx : 1; /* amd reserved */ - uint32_t est : 1; /* amd reserved */ - uint32_t tm2 : 1; /* amd reserved */ - uint32_t ssse3 : 1; - uint32_t cntx_id : 1; /* amd reserved */ - uint32_t __reserved0 : 1; - uint32_t fma : 1; - uint32_t cmpxchg16b : 1; - uint32_t xtpr : 1; /* amd reserved */ - uint32_t pdcm : 1; /* amd reserved */ - uint32_t __reserved1 : 1; - uint32_t pcid : 1; /* amd reserved */ - uint32_t dca : 1; /* amd reserved */ - uint32_t sse4_1 : 1; - uint32_t sse4_2 : 1; - uint32_t x2apic : 1; /* amd reserved */ - uint32_t movbe : 1; /* amd reserved */ - uint32_t popcnt : 1; - uint32_t tsc : 1; /* amd reserved */ - uint32_t aes : 1; - uint32_t xsave : 1; - uint32_t osxsave : 1; - uint32_t avx : 1; - uint32_t __reserved2 : 1; /* amd F16C */ - uint32_t __reserved3 : 1; - uint32_t __alwayszero : 1; /* amd RAZ */ - } bits; - jit_uword_t cpuid; - } ecx; - union { - struct { - uint32_t fpu : 1; - uint32_t vme : 1; - uint32_t de : 1; - uint32_t pse : 1; - uint32_t tsc : 1; - uint32_t msr : 1; - uint32_t pae : 1; - uint32_t mce : 1; - uint32_t cmpxchg8b : 1; - uint32_t apic : 1; - uint32_t __reserved0 : 1; - uint32_t sep : 1; - uint32_t mtrr : 1; - uint32_t pge : 1; - uint32_t mca : 1; - uint32_t cmov : 1; - uint32_t pat : 1; - uint32_t pse36 : 1; - uint32_t psn : 1; /* amd reserved */ - uint32_t clfsh : 1; - uint32_t __reserved1 : 1; - uint32_t ds : 1; /* amd reserved */ - uint32_t acpi : 1; /* amd reserved */ - uint32_t mmx : 1; - uint32_t fxsr : 1; - uint32_t sse : 1; - uint32_t sse2 : 1; - uint32_t ss : 1; /* amd reserved */ - uint32_t htt : 1; - uint32_t tm : 1; /* amd reserved */ - uint32_t __reserved2 : 1; - uint32_t pbe : 1; /* amd reserved */ - } bits; - jit_uword_t cpuid; - } edx; + union { + struct { + uint32_t sse3 : 1; + uint32_t pclmulqdq : 1; + uint32_t dtes64 : 1; /* amd reserved */ + uint32_t monitor : 1; + uint32_t ds_cpl : 1; /* amd reserved */ + uint32_t vmx : 1; /* amd reserved */ + uint32_t smx : 1; /* amd reserved */ + uint32_t est : 1; /* amd reserved */ + uint32_t tm2 : 1; /* amd reserved */ + uint32_t ssse3 : 1; + uint32_t cntx_id : 1; /* amd reserved */ + uint32_t __reserved0 : 1; + uint32_t fma : 1; + uint32_t cmpxchg16b : 1; + uint32_t xtpr : 1; /* amd reserved */ + uint32_t pdcm : 1; /* amd reserved */ + uint32_t __reserved1 : 1; + uint32_t pcid : 1; /* amd reserved */ + uint32_t dca : 1; /* amd reserved */ + uint32_t sse4_1 : 1; + uint32_t sse4_2 : 1; + uint32_t x2apic : 1; /* amd reserved */ + uint32_t movbe : 1; /* amd reserved */ + uint32_t popcnt : 1; + uint32_t tsc : 1; /* amd reserved */ + uint32_t aes : 1; + uint32_t xsave : 1; + uint32_t osxsave : 1; + uint32_t avx : 1; + uint32_t __reserved2 : 1; /* amd F16C */ + uint32_t __reserved3 : 1; + uint32_t __alwayszero : 1; /* amd RAZ */ + } bits; + jit_uword_t cpuid; + } ecx; + union { + struct { + uint32_t fpu : 1; + uint32_t vme : 1; + uint32_t de : 1; + uint32_t pse : 1; + uint32_t tsc : 1; + uint32_t msr : 1; + uint32_t pae : 1; + uint32_t mce : 1; + uint32_t cmpxchg8b : 1; + uint32_t apic : 1; + uint32_t __reserved0 : 1; + uint32_t sep : 1; + uint32_t mtrr : 1; + uint32_t pge : 1; + uint32_t mca : 1; + uint32_t cmov : 1; + uint32_t pat : 1; + uint32_t pse36 : 1; + uint32_t psn : 1; /* amd reserved */ + uint32_t clfsh : 1; + uint32_t __reserved1 : 1; + uint32_t ds : 1; /* amd reserved */ + uint32_t acpi : 1; /* amd reserved */ + uint32_t mmx : 1; + uint32_t fxsr : 1; + uint32_t sse : 1; + uint32_t sse2 : 1; + uint32_t ss : 1; /* amd reserved */ + uint32_t htt : 1; + uint32_t tm : 1; /* amd reserved */ + uint32_t __reserved2 : 1; + uint32_t pbe : 1; /* amd reserved */ + } bits; + jit_uword_t cpuid; + } edx; #if __X32 - int ac, flags; + int ac, flags; #endif - jit_uword_t eax, ebx; + jit_uword_t eax, ebx; #if __X32 - /* adapted from glibc __sysconf */ - __asm__ volatile ("pushfl;\n\t" - "popl %0;\n\t" - "movl $0x240000, %1;\n\t" - "xorl %0, %1;\n\t" - "pushl %1;\n\t" - "popfl;\n\t" - "pushfl;\n\t" - "popl %1;\n\t" - "xorl %0, %1;\n\t" - "pushl %0;\n\t" - "popfl" - : "=r" (flags), "=r" (ac)); + /* adapted from glibc __sysconf */ + __asm__ volatile ("pushfl;\n\t" + "popl %0;\n\t" + "movl $0x240000, %1;\n\t" + "xorl %0, %1;\n\t" + "pushl %1;\n\t" + "popfl;\n\t" + "pushfl;\n\t" + "popl %1;\n\t" + "xorl %0, %1;\n\t" + "pushl %0;\n\t" + "popfl" + : "=r" (flags), "=r" (ac)); - /* i386 or i486 without cpuid */ - if ((ac & (1 << 21)) == 0) - /* probably without x87 as well */ - return; + /* i386 or i486 without cpuid */ + if ((ac & (1 << 21)) == 0) + /* probably without x87 as well */ + return false; #endif /* query %eax = 1 function */ + __asm__ volatile ( #if __X32 || __X64_32 - __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" #else - __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" + "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" #endif - : "=a" (eax), "=r" (ebx), - "=c" (ecx.cpuid), "=d" (edx.cpuid) - : "0" (1)); + : "=a" (eax), "=r" (ebx), + "=c" (ecx.cpuid), "=d" (edx.cpuid) + : "0" (1)); - jit_cpu.fpu = edx.bits.fpu; - jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b; - jit_cpu.cmov = edx.bits.cmov; - jit_cpu.mmx = edx.bits.mmx; - jit_cpu.sse = edx.bits.sse; - jit_cpu.sse2 = edx.bits.sse2; - jit_cpu.sse3 = ecx.bits.sse3; - jit_cpu.pclmulqdq = ecx.bits.pclmulqdq; - jit_cpu.ssse3 = ecx.bits.ssse3; - jit_cpu.fma = ecx.bits.fma; - jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b; - jit_cpu.sse4_1 = ecx.bits.sse4_1; - jit_cpu.sse4_2 = ecx.bits.sse4_2; - jit_cpu.movbe = ecx.bits.movbe; - jit_cpu.popcnt = ecx.bits.popcnt; - jit_cpu.aes = ecx.bits.aes; - jit_cpu.avx = ecx.bits.avx; + jit_cpu.fpu = edx.bits.fpu; + jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b; + jit_cpu.cmov = edx.bits.cmov; + jit_cpu.mmx = edx.bits.mmx; + jit_cpu.sse = edx.bits.sse; + jit_cpu.sse2 = edx.bits.sse2; + jit_cpu.sse3 = ecx.bits.sse3; + jit_cpu.pclmulqdq = ecx.bits.pclmulqdq; + jit_cpu.ssse3 = ecx.bits.ssse3; + jit_cpu.fma = ecx.bits.fma; + jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b; + jit_cpu.sse4_1 = ecx.bits.sse4_1; + jit_cpu.sse4_2 = ecx.bits.sse4_2; + jit_cpu.movbe = ecx.bits.movbe; + jit_cpu.popcnt = ecx.bits.popcnt; + jit_cpu.aes = ecx.bits.aes; + jit_cpu.avx = ecx.bits.avx; /* query %eax = 0x80000001 function */ + __asm__ volatile ( #if __X64 # if __X64_32 - __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + "xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" # else - __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" + "xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" # endif - : "=a" (eax), "=r" (ebx), - "=c" (ecx.cpuid), "=d" (edx.cpuid) - : "0" (0x80000001)); - jit_cpu.lahf = ecx.cpuid & 1; -#endif -} - -void -_jit_init(jit_state_t *_jit) -{ -#if __X32 - int32_t regno; - static jit_bool_t first = 1; + : "=a" (eax), "=r" (ebx), + "=c" (ecx.cpuid), "=d" (edx.cpuid) + : "0" (0x80000001)); + jit_cpu.lahf = ecx.cpuid & 1; #endif - _jitc->reglen = jit_size(_rvs) - 1; -#if __X32 - if (first) { - if (!jit_cpu.sse2) { - for (regno = _jitc->reglen; regno >= 0; regno--) { - if (_rvs[regno].spec & jit_class_xpr) - _rvs[regno].spec = 0; - } - } - first = 0; - } -#endif -} - -void -_jit_prolog(jit_state_t *_jit) -{ - int32_t offset; - - if (_jitc->function) - jit_epilog(); - assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); - jit_regset_set_ui(&_jitc->regsav, 0); - offset = _jitc->functions.offset; - if (offset >= _jitc->functions.length) { - jit_realloc((jit_pointer_t *)&_jitc->functions.ptr, - _jitc->functions.length * sizeof(jit_function_t), - (_jitc->functions.length + 16) * sizeof(jit_function_t)); - _jitc->functions.length += 16; - } - _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; - _jitc->function->self.size = stack_framesize; - _jitc->function->self.argi = _jitc->function->self.argf = - _jitc->function->self.aoff = _jitc->function->self.alen = 0; - /* sse/x87 conversion */ - _jitc->function->self.aoff = CVT_OFFSET; - _jitc->function->self.call = jit_call_default; - jit_alloc((jit_pointer_t *)&_jitc->function->regoff, - _jitc->reglen * sizeof(int32_t)); - - /* _no_link here does not mean the jit_link() call can be removed - * by rewriting as: - * _jitc->function->prolog = jit_new_node(jit_code_prolog); - */ - _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog); - jit_link(_jitc->function->prolog); - _jitc->function->prolog->w.w = offset; - _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog); - /* u: label value - * v: offset in blocks vector - * w: offset in functions vector - */ - _jitc->function->epilog->w.w = offset; - - jit_regset_new(&_jitc->function->regset); -} - -int32_t -_jit_allocai(jit_state_t *_jit, int32_t length) -{ - assert(_jitc->function); - switch (length) { - case 0: case 1: break; - case 2: _jitc->function->self.aoff &= -2; break; - case 3: case 4: _jitc->function->self.aoff &= -4; break; - default: _jitc->function->self.aoff &= -8; break; - } - _jitc->function->self.aoff -= length; - - /* jit_allocai() may be called from jit_x86-cpu.c, and force a function - * generation restart on some conditions: div/rem and qmul/qdiv, due - * to registers constraints. - * The check is to prevent an assertion of a jit_xyz() being called - * during code generation, and attempting to add a node to the tail - * of the current IR generation. */ - if (!_jitc->realize) { - jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length); - jit_dec_synth(); - } - - return (_jitc->function->self.aoff); -} - -void -_jit_allocar(jit_state_t *_jit, int32_t u, int32_t v) -{ - int32_t reg; - assert(_jitc->function); - jit_inc_synth_ww(allocar, u, v); - if (!_jitc->function->allocar) { - _jitc->function->aoffoff = jit_allocai(sizeof(int32_t)); - _jitc->function->allocar = 1; - } - reg = jit_get_reg(jit_class_gpr); - jit_negr(reg, v); - jit_andi(reg, reg, -16); - jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); - jit_addr(u, u, reg); - jit_addr(JIT_SP, JIT_SP, reg); - jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); - jit_unget_reg(reg); - jit_dec_synth(); -} - -void -_jit_ret(jit_state_t *_jit) -{ - jit_node_t *instr; - assert(_jitc->function); - jit_inc_synth(ret); - /* jump to epilog */ - instr = jit_jmpi(); - jit_patch_at(instr, _jitc->function->epilog); - jit_dec_synth(); -} - -void -_jit_retr(jit_state_t *_jit, int32_t u) -{ - jit_inc_synth_w(retr, u); - /* movr(%ret, %ret) would be optimized out */ - if (JIT_RET != u) - jit_movr(JIT_RET, u); - /* explicitly tell it is live */ - jit_live(JIT_RET); - jit_ret(); - jit_dec_synth(); -} - -void -_jit_reti(jit_state_t *_jit, jit_word_t u) -{ - jit_inc_synth_w(reti, u); - jit_movi(JIT_RET, u); - jit_ret(); - jit_dec_synth(); -} - -void -_jit_retr_f(jit_state_t *_jit, int32_t u) -{ - jit_inc_synth_w(retr_f, u); - if (JIT_FRET != u) - jit_movr_f(JIT_FRET, u); - else - jit_live(JIT_FRET); - jit_ret(); - jit_dec_synth(); -} - -void -_jit_reti_f(jit_state_t *_jit, jit_float32_t u) -{ - jit_inc_synth_f(reti_f, u); - jit_movi_f(JIT_FRET, u); - jit_ret(); - jit_dec_synth(); -} - -void -_jit_retr_d(jit_state_t *_jit, int32_t u) -{ - jit_inc_synth_w(retr_d, u); - if (JIT_FRET != u) - jit_movr_d(JIT_FRET, u); - else - jit_live(JIT_FRET); - jit_ret(); - jit_dec_synth(); -} - -void -_jit_reti_d(jit_state_t *_jit, jit_float64_t u) -{ - jit_inc_synth_d(reti_d, u); - jit_movi_d(JIT_FRET, u); - jit_ret(); - jit_dec_synth(); -} - -void -_jit_epilog(jit_state_t *_jit) -{ - assert(_jitc->function); - assert(_jitc->function->epilog->next == NULL); - jit_link(_jitc->function->epilog); - _jitc->function = NULL; + return jit_cpu.sse2; } jit_bool_t -_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) +jit_init(jit_state_t *_jit) { - if (u->code == jit_code_arg) - return (jit_arg_reg_p(u->u.w)); - assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); - return (jit_arg_f_reg_p(u->u.w)); + return jit_cpu.sse2; } void -_jit_ellipsis(jit_state_t *_jit) +jit_epilog(jit_state_t *_jit) { - jit_inc_synth(ellipsis); - if (_jitc->prepare) { - jit_link_prepare(); - /* Remember that a varargs function call is being constructed. */ - assert(!(_jitc->function->call.call & jit_call_varargs)); - _jitc->function->call.call |= jit_call_varargs; - } - else { - jit_link_prolog(); - /* Remember the current function is varargs. */ - assert(!(_jitc->function->self.call & jit_call_varargs)); - _jitc->function->self.call |= jit_call_varargs; - -#if __X64 && !__CYGWIN__ - /* Allocate va_list like object in the stack. - * If applicable, with enough space to save all argument - * registers, and use fixed offsets for them. */ - _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t)); - - /* Initialize gp offset in save area. */ - if (jit_arg_reg_p(_jitc->function->self.argi)) - _jitc->function->vagp = _jitc->function->self.argi * 8; - else - _jitc->function->vagp = va_gp_max_offset; - - /* Initialize fp offset in save area. */ - if (jit_arg_f_reg_p(_jitc->function->self.argf)) - _jitc->function->vafp = _jitc->function->self.argf * 16 + - va_gp_max_offset; - else - _jitc->function->vafp = va_fp_max_offset; -#endif - } - jit_dec_synth(); + /* TODO: Restore registers. */ } void -_jit_va_push(jit_state_t *_jit, int32_t u) +jit_calli(jit_state_t *_jit, jit_pointer_t f, + size_t argc, const jit_arg_abi_t abi[], const jit_arg_t args[]) { - jit_inc_synth_w(va_push, u); - jit_pushargr(u); - jit_dec_synth(); -} - -jit_node_t * -_jit_arg(jit_state_t *_jit) -{ - jit_node_t *node; - int32_t offset; - assert(_jitc->function); - assert(!(_jitc->function->self.call & jit_call_varargs)); -#if __X64 - if (jit_arg_reg_p(_jitc->function->self.argi)) { - offset = _jitc->function->self.argi++; -# if __CYGWIN__ - _jitc->function->self.size += sizeof(jit_word_t); -# endif - } - else -#endif - { - offset = _jitc->function->self.size; - _jitc->function->self.size += REAL_WORDSIZE; - } - node = jit_new_node_ww(jit_code_arg, offset, - ++_jitc->function->self.argn); - jit_link_prolog(); - return (node); -} - -jit_node_t * -_jit_arg_f(jit_state_t *_jit) -{ - jit_node_t *node; - int32_t offset; - assert(_jitc->function); - assert(!(_jitc->function->self.call & jit_call_varargs)); -#if __X64 -# if __CYGWIN__ - if (jit_arg_reg_p(_jitc->function->self.argi)) { - offset = _jitc->function->self.argi++; - _jitc->function->self.size += sizeof(jit_word_t); - } -# else - if (jit_arg_f_reg_p(_jitc->function->self.argf)) - offset = _jitc->function->self.argf++; -# endif - else -#endif - { - offset = _jitc->function->self.size; - _jitc->function->self.size += REAL_WORDSIZE; - } - node = jit_new_node_ww(jit_code_arg_f, offset, - ++_jitc->function->self.argn); - jit_link_prolog(); - return (node); -} - -jit_node_t * -_jit_arg_d(jit_state_t *_jit) -{ - jit_node_t *node; - int32_t offset; - assert(_jitc->function); - assert(!(_jitc->function->self.call & jit_call_varargs)); -#if __X64 -# if __CYGWIN__ - if (jit_arg_reg_p(_jitc->function->self.argi)) { - offset = _jitc->function->self.argi++; - _jitc->function->self.size += sizeof(jit_word_t); - } -# else - if (jit_arg_f_reg_p(_jitc->function->self.argf)) - offset = _jitc->function->self.argf++; -# endif - else -#endif - { - offset = _jitc->function->self.size; - _jitc->function->self.size += sizeof(jit_float64_t); - } - node = jit_new_node_ww(jit_code_arg_d, offset, - ++_jitc->function->self.argn); - jit_link_prolog(); - return (node); + /* TODO: Do the call! */ + calli(_jit, (jit_word_t)f); } void -_jit_getarg_c(jit_state_t *_jit, int32_t u, jit_node_t *v) +jit_callr(jit_state_t *_jit, jit_gpr_t f, + size_t argc, const jit_arg_abi_t abi[], const jit_arg_t args[]) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_c, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_extr_c(u, JIT_RA0 - v->u.w); - else -#endif - jit_ldxi_c(u, _RBP, v->u.w); - jit_dec_synth(); + /* TODO: Do the call! */ + callr(_jit, f); } void -_jit_getarg_uc(jit_state_t *_jit, int32_t u, jit_node_t *v) +jit_receive(jit_state_t *_jit, + size_t argc, const jit_arg_abi_t abi[], jit_arg_t args[]) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_uc, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_extr_uc(u, JIT_RA0 - v->u.w); - else -#endif - jit_ldxi_uc(u, _RBP, v->u.w); - jit_dec_synth(); -} - -void -_jit_getarg_s(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_s, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_extr_s(u, JIT_RA0 - v->u.w); - else -#endif - jit_ldxi_s(u, _RBP, v->u.w); - jit_dec_synth(); -} - -void -_jit_getarg_us(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_us, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_extr_us(u, JIT_RA0 - v->u.w); - else -#endif - jit_ldxi_us(u, _RBP, v->u.w); - jit_dec_synth(); -} - -void -_jit_getarg_i(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_i, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) { -# if __X64_32 - jit_movr(u, JIT_RA0 - v->u.w); -# else - jit_extr_i(u, JIT_RA0 - v->u.w); -# endif - } - else -#endif - jit_ldxi_i(u, _RBP, v->u.w); - jit_dec_synth(); -} - -#if __X64 && !__X64_32 -void -_jit_getarg_ui(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_ui, u, v); - if (jit_arg_reg_p(v->u.w)) - jit_extr_ui(u, JIT_RA0 - v->u.w); - else - jit_ldxi_ui(u, _RBP, v->u.w); - jit_dec_synth(); -} - -void -_jit_getarg_l(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg); - jit_inc_synth_wp(getarg_l, u, v); - if (jit_arg_reg_p(v->u.w)) - jit_movr(u, JIT_RA0 - v->u.w); - else - jit_ldxi_l(u, _RBP, v->u.w); - jit_dec_synth(); -} -#endif - -void -_jit_putargr(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_movr(JIT_RA0 - v->u.w, u); - else -#endif - jit_stxi(v->u.w, _RBP, u); - jit_dec_synth(); -} - -void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) -{ - int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_movi(JIT_RA0 - v->u.w, u); - else -#endif - { - regno = jit_get_reg(jit_class_gpr); - jit_movi(regno, u); - jit_stxi(v->u.w, _RBP, regno); - jit_unget_reg(regno); - } - jit_dec_synth(); -} - -void -_jit_getarg_f(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg_f); - jit_inc_synth_wp(getarg_f, u, v); -#if __X64 - if (jit_arg_f_reg_p(v->u.w)) - jit_movr_f(u, _XMM0 - v->u.w); - else -#endif - jit_ldxi_f(u, _RBP, v->u.w); - jit_dec_synth(); -} - -void -_jit_putargr_f(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg_f); - jit_inc_synth_wp(putargr_f, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_movr_f(_XMM0 - v->u.w, u); - else -#endif - jit_stxi_f(v->u.w, _RBP, u); - jit_dec_synth(); -} - -void -_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) -{ - int32_t regno; - assert(v->code == jit_code_arg_f); - jit_inc_synth_fp(putargi_f, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_movi_f(_XMM0 - v->u.w, u); - else -#endif - { - regno = jit_get_reg(jit_class_gpr); - jit_movi_f(regno, u); - jit_stxi_f(v->u.w, _RBP, regno); - jit_unget_reg(regno); - } - jit_dec_synth(); -} - -void -_jit_getarg_d(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg_d); - jit_inc_synth_wp(getarg_d, u, v); -#if __X64 - if (jit_arg_f_reg_p(v->u.w)) - jit_movr_d(u, _XMM0 - v->u.w); - else -#endif - jit_ldxi_d(u, _RBP, v->u.w); - jit_dec_synth(); -} - -void -_jit_putargr_d(jit_state_t *_jit, int32_t u, jit_node_t *v) -{ - assert(v->code == jit_code_arg_d); - jit_inc_synth_wp(putargr_d, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_movr_d(_XMM0 - v->u.w, u); - else -#endif - jit_stxi_d(v->u.w, _RBP, u); - jit_dec_synth(); -} - -void -_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) -{ - int32_t regno; - assert(v->code == jit_code_arg_d); - jit_inc_synth_dp(putargi_d, u, v); -#if __X64 - if (jit_arg_reg_p(v->u.w)) - jit_movi_d(_XMM0 - v->u.w, u); - else -#endif - { - regno = jit_get_reg(jit_class_gpr); - jit_movi_d(regno, u); - jit_stxi_d(v->u.w, _RBP, regno); - jit_unget_reg(regno); - } - jit_dec_synth(); -} - -void -_jit_pushargr(jit_state_t *_jit, int32_t u) -{ - assert(_jitc->function); - jit_inc_synth_w(pushargr, u); - jit_link_prepare(); -#if __X64 - if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movr(JIT_RA0 - _jitc->function->call.argi, u); - ++_jitc->function->call.argi; -# if __CYGWIN__ - if (_jitc->function->call.call & jit_call_varargs) - jit_stxi(_jitc->function->call.size, _RSP, u); - _jitc->function->call.size += sizeof(jit_word_t); -# endif - } - else -#endif - { - jit_stxi(_jitc->function->call.size, _RSP, u); - _jitc->function->call.size += REAL_WORDSIZE; - } - jit_dec_synth(); -} - -void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) -{ - int32_t regno; - assert(_jitc->function); - jit_inc_synth_w(pushargi, u); - jit_link_prepare(); -#if __X64 - if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movi(JIT_RA0 - _jitc->function->call.argi, u); -# if __CYGWIN__ - if (_jitc->function->call.call & jit_call_varargs) - jit_stxi(_jitc->function->call.size, _RSP, - JIT_RA0 - _jitc->function->call.argi); - _jitc->function->call.size += sizeof(jit_word_t); -# endif - ++_jitc->function->call.argi; - } - else -#endif - { - regno = jit_get_reg(jit_class_gpr); - jit_movi(regno, u); - jit_stxi(_jitc->function->call.size, _RSP, regno); - _jitc->function->call.size += REAL_WORDSIZE; - jit_unget_reg(regno); - } - jit_dec_synth(); -} - -void -_jit_pushargr_f(jit_state_t *_jit, int32_t u) -{ - assert(_jitc->function); - jit_inc_synth_w(pushargr_f, u); - jit_link_prepare(); -#if __X64 -# if __CYGWIN__ - if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movr_f(_XMM0 - _jitc->function->call.argi, u); - if (_jitc->function->call.call & jit_call_varargs) { - jit_stxi_f(_jitc->function->call.size, _RSP, - _XMM0 - _jitc->function->call.argi); - jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP, - _jitc->function->call.size); - } - ++_jitc->function->call.argi; - _jitc->function->call.size += sizeof(jit_word_t); - } -# else - if (jit_arg_f_reg_p(_jitc->function->self.argf)) { - jit_movr_f(_XMM0 - _jitc->function->call.argf, u); - ++_jitc->function->call.argf; - } -# endif - else -#endif - { - jit_stxi_f(_jitc->function->call.size, _RSP, u); - _jitc->function->call.size += REAL_WORDSIZE; - } - jit_dec_synth(); -} - -void -_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) -{ - int32_t regno; - assert(_jitc->function); - jit_inc_synth_f(pushargi_f, u); - jit_link_prepare(); -#if __X64 -# if __CYGWIN__ - if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movi_f(_XMM0 - _jitc->function->call.argi, u); - if (_jitc->function->call.call & jit_call_varargs) { - jit_stxi_f(_jitc->function->call.size, _RSP, - _XMM0 - _jitc->function->call.argi); - jit_ldxi_i(JIT_RA0 - _jitc->function->call.argi, _RSP, - _jitc->function->call.size); - } - ++_jitc->function->call.argi; - _jitc->function->call.size += sizeof(jit_word_t); - } -# else - if (jit_arg_f_reg_p(_jitc->function->call.argf)) { - jit_movi_f(_XMM0 - _jitc->function->call.argf, u); - ++_jitc->function->call.argf; - } -# endif - else -#endif - { - regno = jit_get_reg(jit_class_fpr); - jit_movi_f(regno, u); - jit_stxi_f(_jitc->function->call.size, _RSP, regno); - _jitc->function->call.size += REAL_WORDSIZE; - jit_unget_reg(regno); - } - jit_dec_synth(); -} - -void -_jit_pushargr_d(jit_state_t *_jit, int32_t u) -{ - assert(_jitc->function); - jit_inc_synth_w(pushargr_d, u); - jit_link_prepare(); -#if __X64 -# if __CYGWIN__ - if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movr_d(_XMM0 - _jitc->function->call.argi, u); - if (_jitc->function->call.call & jit_call_varargs) { - jit_stxi_d(_jitc->function->call.size, _RSP, - _XMM0 - _jitc->function->call.argi); - jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP, - _jitc->function->call.size); - } - ++_jitc->function->call.argi; - _jitc->function->call.size += sizeof(jit_word_t); - } -# else - if (jit_arg_f_reg_p(_jitc->function->call.argf)) { - jit_movr_d(_XMM0 - _jitc->function->call.argf, u); - ++_jitc->function->call.argf; - } -# endif - else -#endif - { - jit_stxi_d(_jitc->function->call.size, _RSP, u); - _jitc->function->call.size += sizeof(jit_float64_t); - } - jit_dec_synth(); -} - -void -_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) -{ - int32_t regno; - assert(_jitc->function); - jit_inc_synth_d(pushargi_d, u); - jit_link_prepare(); -#if __X64 -# if __CYGWIN__ - if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movi_d(_XMM0 - _jitc->function->call.argi, u); - if (_jitc->function->call.call & jit_call_varargs) { - jit_stxi_d(_jitc->function->call.size, _RSP, - _XMM0 - _jitc->function->call.argi); - jit_ldxi_l(JIT_RA0 - _jitc->function->call.argi, _RSP, - _jitc->function->call.size); - } - ++_jitc->function->call.argi; - _jitc->function->call.size += sizeof(jit_word_t); - } -# else - if (jit_arg_f_reg_p(_jitc->function->call.argf)) { - jit_movi_d(_XMM0 - _jitc->function->call.argf, u); - ++_jitc->function->call.argf; - } -# endif - else -#endif - { - regno = jit_get_reg(jit_class_fpr); - jit_movi_d(regno, u); - jit_stxi_d(_jitc->function->call.size, _RSP, regno); - _jitc->function->call.size += sizeof(jit_float64_t); - jit_unget_reg(regno); - } - jit_dec_synth(); -} - -jit_bool_t -_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, int32_t regno) -{ -#if __X64 - int32_t spec; - - spec = jit_class(_rvs[regno].spec); - if (spec & jit_class_arg) { - if (spec & jit_class_gpr) { - regno = JIT_RA0 - regno; - if (regno >= 0 && regno < node->v.w) - return (1); - } - else if (spec & jit_class_fpr) { - regno = _XMM0 - regno; - if (regno >= 0 && regno < node->w.w) - return (1); - } - } -#endif - return (0); -} - -void -_jit_finishr(jit_state_t *_jit, int32_t r0) -{ - int32_t reg; - jit_node_t *call; - assert(_jitc->function); - reg = r0; - jit_inc_synth_w(finishr, r0); - if (_jitc->function->self.alen < _jitc->function->call.size) - _jitc->function->self.alen = _jitc->function->call.size; -#if __X64 -# if !__CYGWIN__ - if (_jitc->function->call.call & jit_call_varargs) { - if (jit_regno(reg) == _RAX) { - reg = jit_get_reg(jit_class_gpr); - jit_movr(reg, _RAX); - } - if (_jitc->function->call.argf) - jit_movi(_RAX, _jitc->function->call.argf); - else - jit_movi(_RAX, 0); - if (reg != r0) - jit_unget_reg(reg); - } -# endif -#endif - call = jit_callr(reg); - call->v.w = _jitc->function->call.argi; - call->w.w = _jitc->function->call.argf; - _jitc->function->call.argi = _jitc->function->call.argf = - _jitc->function->call.size = 0; - _jitc->prepare = 0; - jit_dec_synth(); -} - -jit_node_t * -_jit_finishi(jit_state_t *_jit, jit_pointer_t i0) -{ -#if __X64 - int32_t reg; -#endif - jit_node_t *node; - assert(_jitc->function); - jit_inc_synth_w(finishi, (jit_word_t)i0); - if (_jitc->function->self.alen < _jitc->function->call.size) - _jitc->function->self.alen = _jitc->function->call.size; -#if __X64 - /* FIXME preventing %rax allocation is good enough, but for consistency - * it should automatically detect %rax is dead, in case it has run out - * registers, and not save/restore it, what would be wrong if using the - * the return value, otherwise, just a needless noop */ - /* >> prevent %rax from being allocated as the function pointer */ - jit_regset_setbit(&_jitc->regarg, _RAX); - reg = jit_get_reg(jit_class_gpr); - node = jit_movi(reg, (jit_word_t)i0); - jit_finishr(reg); - jit_unget_reg(reg); - /* << prevent %rax from being allocated as the function pointer */ - jit_regset_clrbit(&_jitc->regarg, _RAX); -#else - node = jit_calli(i0); - node->v.w = _jitc->function->call.argi; - node->w.w = _jitc->function->call.argf; -#endif - _jitc->function->call.argi = _jitc->function->call.argf = - _jitc->function->call.size = 0; - _jitc->prepare = 0; - jit_dec_synth(); - return (node); -} - -void -_jit_retval_c(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_c, r0); - jit_extr_c(r0, JIT_RET); - jit_dec_synth(); -} - -void -_jit_retval_uc(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_uc, r0); - jit_extr_uc(r0, JIT_RET); - jit_dec_synth(); -} - -void -_jit_retval_s(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_s, r0); - jit_extr_s(r0, JIT_RET); - jit_dec_synth(); -} - -void -_jit_retval_us(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_us, r0); - jit_extr_us(r0, JIT_RET); - jit_dec_synth(); -} - -void -_jit_retval_i(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_i, r0); -#if __X32 || __X64_32 - if (r0 != JIT_RET) - jit_movr(r0, JIT_RET); -#else - jit_extr_i(r0, JIT_RET); -#endif - jit_dec_synth(); -} - -#if __X64 && !__X64_32 -void -_jit_retval_ui(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_ui, r0); - jit_extr_ui(r0, JIT_RET); - jit_dec_synth(); -} - -void -_jit_retval_l(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_l, r0); - if (r0 != JIT_RET) - jit_movr(r0, JIT_RET); - jit_dec_synth(); -} -#endif - -void -_jit_retval_f(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_f, r0); -#if __X64 - if (r0 != JIT_FRET) - jit_movr_f(r0, JIT_FRET); -#endif - jit_dec_synth(); -} - -void -_jit_retval_d(jit_state_t *_jit, int32_t r0) -{ - jit_inc_synth_w(retval_d, r0); -#if __X64 - if (r0 != JIT_FRET) - jit_movr_d(r0, JIT_FRET); -#endif - jit_dec_synth(); -} - -jit_pointer_t -_emit_code(jit_state_t *_jit) -{ - jit_node_t *node; - jit_node_t *temp; - jit_word_t word; - int32_t value; - int32_t offset; - struct { - jit_node_t *node; - jit_word_t word; -#if DEVEL_DISASSEMBLER - jit_word_t prevw; -#endif - int32_t patch_offset; - } undo; -#if DEVEL_DISASSEMBLER - jit_word_t prevw; -#endif - - _jitc->function = NULL; - - jit_reglive_setup(); - - undo.word = 0; - undo.node = NULL; - undo.patch_offset = 0; -#define case_rr(name, type) \ - case jit_code_##name##r##type: \ - name##r##type(rn(node->u.w), rn(node->v.w)); \ - break -#define case_rw(name, type) \ - case jit_code_##name##i##type: \ - name##i##type(rn(node->u.w), node->v.w); \ - break -#define case_rf(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->v.w)) \ - x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \ - break -#define case_fr(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->u.w)) \ - x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \ - break -#define case_fw(name, type) \ - case jit_code_##name##i##type: \ - if (jit_x87_reg_p(node->u.w)) \ - x87_##name##i##type(rn(node->u.w), node->v.w); \ - else \ - sse_##name##i##type(rn(node->u.w), node->v.w); \ - break -#define case_wr(name, type) \ - case jit_code_##name##i##type: \ - name##i##type(node->u.w, rn(node->v.w)); \ - break -#define case_wf(name, type) \ - case jit_code_##name##i##type: \ - if (jit_x87_reg_p(node->v.w)) \ - x87_##name##i##type(node->u.w, rn(node->v.w)); \ - else \ - sse_##name##i##type(node->u.w, rn(node->v.w)); \ - break -#define case_ff(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->u.w) && \ - jit_x87_reg_p(node->v.w)) \ - x87_##name##r##type(rn(node->u.w), rn(node->v.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), rn(node->v.w)); \ - break; -#define case_rrr(name, type) \ - case jit_code_##name##r##type: \ - name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - break -#define case_rrrr(name, type) \ - case jit_code_##name##r##type: \ - name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ - rn(node->v.w), rn(node->w.w)); \ - break -#define case_frr(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->u.w)) \ - x87_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - break -#define case_rrf(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->w.w)) \ - x87_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - break -#define case_rrw(name, type) \ - case jit_code_##name##i##type: \ - name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ - break -#define case_rrrw(name, type) \ - case jit_code_##name##i##type: \ - name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ - rn(node->v.w), node->w.w); \ - break -#define case_frw(name, type) \ - case jit_code_##name##i##type: \ - if (jit_x87_reg_p(node->u.w)) \ - x87_##name##i##type(rn(node->u.w), \ - rn(node->v.w), node->w.w); \ - else \ - sse_##name##i##type(rn(node->u.w), \ - rn(node->v.w), node->w.w); \ - break -#define case_wrr(name, type) \ - case jit_code_##name##i##type: \ - name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ - break -#define case_wrf(name, type) \ - case jit_code_##name##i##type: \ - if (jit_x87_reg_p(node->w.w)) \ - x87_##name##i##type(node->u.w, \ - rn(node->v.w), rn(node->w.w)); \ - else \ - sse_##name##i##type(node->u.w, \ - rn(node->v.w), rn(node->w.w)); \ - break -#define case_brr(name, type) \ - case jit_code_##name##r##type: \ - temp = node->u.n; \ - assert(temp->code == jit_code_label || \ - temp->code == jit_code_epilog); \ - if (temp->flag & jit_flag_patch) \ - name##r##type(temp->u.w, rn(node->v.w), \ - rn(node->w.w)); \ - else { \ - word = name##r##type(_jit->pc.w, \ - rn(node->v.w), rn(node->w.w)); \ - patch(word, node); \ - } \ - break -#define case_brw(name, type) \ - case jit_code_##name##i##type: \ - temp = node->u.n; \ - assert(temp->code == jit_code_label || \ - temp->code == jit_code_epilog); \ - if (temp->flag & jit_flag_patch) \ - name##i##type(temp->u.w, \ - rn(node->v.w), node->w.w); \ - else { \ - word = name##i##type(_jit->pc.w, \ - rn(node->v.w), node->w.w); \ - patch(word, node); \ - } \ - break -#define case_rff(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->v.w) && \ - jit_x87_reg_p(node->w.w)) \ - x87_##name##r##type(rn(node->u.w), rn(node->v.w), \ - rn(node->w.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), rn(node->v.w), \ - rn(node->w.w)); \ - break; -#define case_rfw(name, type, size) \ - case jit_code_##name##i##type: \ - assert(node->flag & jit_flag_data); \ - if (jit_x87_reg_p(node->v.w)) \ - x87_##name##i##type(rn(node->u.w), rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - else \ - sse_##name##i##type(rn(node->u.w), rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - break -#define case_fff(name, type) \ - case jit_code_##name##r##type: \ - if (jit_x87_reg_p(node->u.w) && \ - jit_x87_reg_p(node->v.w) && \ - jit_x87_reg_p(node->w.w)) \ - x87_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - else \ - sse_##name##r##type(rn(node->u.w), \ - rn(node->v.w), rn(node->w.w)); \ - break -#define case_ffw(name, type, size) \ - case jit_code_##name##i##type: \ - assert(node->flag & jit_flag_data); \ - if (jit_x87_reg_p(node->u.w) && \ - jit_x87_reg_p(node->v.w)) \ - x87_##name##i##type(rn(node->u.w), rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - else \ - sse_##name##i##type(rn(node->u.w), rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - break -#define case_bff(name, type) \ - case jit_code_b##name##r##type: \ - temp = node->u.n; \ - assert(temp->code == jit_code_label || \ - temp->code == jit_code_epilog); \ - if (temp->flag & jit_flag_patch) { \ - if (jit_x87_reg_p(node->v.w) && \ - jit_x87_reg_p(node->w.w)) \ - x87_b##name##r##type(temp->u.w, \ - rn(node->v.w), rn(node->w.w)); \ - else \ - sse_b##name##r##type(temp->u.w, \ - rn(node->v.w), rn(node->w.w)); \ - } \ - else { \ - if (jit_x87_reg_p(node->v.w) && \ - jit_x87_reg_p(node->w.w)) \ - word = x87_b##name##r##type(_jit->pc.w, \ - rn(node->v.w), rn(node->w.w)); \ - else \ - word = sse_b##name##r##type(_jit->pc.w, \ - rn(node->v.w), rn(node->w.w)); \ - patch(word, node); \ - } \ - break -#define case_bfw(name, type, size) \ - case jit_code_b##name##i##type: \ - temp = node->u.n; \ - assert(temp->code == jit_code_label || \ - temp->code == jit_code_epilog); \ - if (temp->flag & jit_flag_patch) { \ - if (jit_x87_reg_p(node->v.w)) \ - x87_b##name##i##type(temp->u.w, \ - rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - else \ - sse_b##name##i##type(temp->u.w, \ - rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - } \ - else { \ - if (jit_x87_reg_p(node->v.w)) \ - word = x87_b##name##i##type(_jit->pc.w, \ - rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - else \ - word = sse_b##name##i##type(_jit->pc.w, \ - rn(node->v.w), \ - (jit_float##size##_t *)node->w.n->u.w); \ - patch(word, node); \ - } \ - break -#if DEVEL_DISASSEMBLER - prevw = _jit->pc.w; -#endif - for (node = _jitc->head; node; node = node->next) { - if (_jit->pc.uc >= _jitc->code.end) - return (NULL); - -#if DEVEL_DISASSEMBLER - node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw; - prevw = _jit->pc.w; -#endif - value = jit_classify(node->code); - jit_regarg_set(node, value); - switch (node->code) { - case jit_code_align: - assert(!(node->u.w & (node->u.w - 1)) && - node->u.w <= sizeof(jit_word_t)); - if ((word = _jit->pc.w & (node->u.w - 1))) - nop(node->u.w - word); - break; - case jit_code_note: case jit_code_name: - node->u.w = _jit->pc.w; - break; - case jit_code_label: - if ((node->link || (node->flag & jit_flag_use)) && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); - /* remember label is defined */ - node->flag |= jit_flag_patch; - node->u.w = _jit->pc.w; - break; - case_rrr(add,); - case_rrw(add,); - case_rrr(addx,); - case_rrw(addx,); - case_rrr(addc,); - case_rrw(addc,); - case_rrr(sub,); - case_rrw(sub,); - case_rrr(subx,); - case_rrw(subx,); - case_rrr(subc,); - case_rrw(subc,); - case_rrw(rsb,); - case_rrr(mul,); - case_rrw(mul,); - case_rrrr(qmul,); - case_rrrw(qmul,); - case_rrrr(qmul, _u); - case_rrrw(qmul, _u); - case_rrr(div,); - case_rrw(div,); - case_rrr(div, _u); - case_rrw(div, _u); - case_rrrr(qdiv,); - case_rrrw(qdiv,); - case_rrrr(qdiv, _u); - case_rrrw(qdiv, _u); - case_rrr(rem,); - case_rrw(rem,); - case_rrr(rem, _u); - case_rrw(rem, _u); - case_rrr(and,); - case_rrw(and,); - case_rrr(or,); - case_rrw(or,); - case_rrr(xor,); - case_rrw(xor,); - case_rrr(lsh,); - case_rrw(lsh,); - case_rrr(rsh,); - case_rrw(rsh,); - case_rrr(rsh, _u); - case_rrw(rsh, _u); - case_rr(neg,); - case_rr(com,); - case_rrr(lt,); - case_rrw(lt,); - case_rrr(lt, _u); - case_rrw(lt, _u); - case_rrr(le,); - case_rrw(le,); - case_rrr(le, _u); - case_rrw(le, _u); - case_rrr(eq,); - case_rrw(eq,); - case_rrr(ge,); - case_rrw(ge,); - case_rrr(ge, _u); - case_rrw(ge, _u); - case_rrr(gt,); - case_rrw(gt,); - case_rrr(gt, _u); - case_rrw(gt, _u); - case_rrr(ne,); - case_rrw(ne,); - case_rr(mov,); - case jit_code_movi: - if (node->flag & jit_flag_node) { - temp = node->v.n; - if (temp->code == jit_code_data || - (temp->code == jit_code_label && - (temp->flag & jit_flag_patch))) - movi(rn(node->u.w), temp->u.w); - else { - assert(temp->code == jit_code_label || - temp->code == jit_code_epilog); - word = movi_p(rn(node->u.w), node->v.w); - patch(word, node); - } - } - else - movi(rn(node->u.w), node->v.w); - break; - case_rr(hton, _us); - case_rr(hton, _ui); -#if __X64 && !__X64_32 - case_rr(hton, _ul); -#endif - case_rr(ext, _c); - case_rr(ext, _uc); - case_rr(ext, _s); - case_rr(ext, _us); -#if __X64 && !__X64_32 - case_rr(ext, _i); - case_rr(ext, _ui); -#endif - case_rf(trunc, _f_i); - case_rf(trunc, _d_i); -#if __X64 - case_rf(trunc, _f_l); - case_rf(trunc, _d_l); -#endif - case_rr(ld, _c); - case_rw(ld, _c); - case_rr(ld, _uc); - case_rw(ld, _uc); - case_rr(ld, _s); - case_rw(ld, _s); - case_rr(ld, _us); - case_rw(ld, _us); - case_rr(ld, _i); - case_rw(ld, _i); -#if __X64 && !__X64_32 - case_rr(ld, _ui); - case_rw(ld, _ui); - case_rr(ld, _l); - case_rw(ld, _l); -#endif - case_rrr(ldx, _c); - case_rrw(ldx, _c); - case_rrr(ldx, _uc); - case_rrw(ldx, _uc); - case_rrr(ldx, _s); - case_rrw(ldx, _s); - case_rrr(ldx, _us); - case_rrw(ldx, _us); - case_rrr(ldx, _i); - case_rrw(ldx, _i); -#if __X64 && !__X64_32 - case_rrr(ldx, _ui); - case_rrw(ldx, _ui); - case_rrr(ldx, _l); - case_rrw(ldx, _l); -#endif - case_rr(st, _c); - case_wr(st, _c); - case_rr(st, _s); - case_wr(st, _s); - case_rr(st, _i); - case_wr(st, _i); -#if __X64 && !__X64_32 - case_rr(st, _l); - case_wr(st, _l); -#endif - case_rrr(stx, _c); - case_wrr(stx, _c); - case_rrr(stx, _s); - case_wrr(stx, _s); - case_rrr(stx, _i); - case_wrr(stx, _i); -#if __X64 && !__X64_32 - case_rrr(stx, _l); - case_wrr(stx, _l); -#endif - case_brr(blt,); - case_brw(blt,); - case_brr(blt, _u); - case_brw(blt, _u); - case_brr(ble,); - case_brw(ble,); - case_brr(ble, _u); - case_brw(ble, _u); - case_brr(beq,); - case_brw(beq,); - case_brr(bge,); - case_brw(bge,); - case_brr(bge, _u); - case_brw(bge, _u); - case_brr(bgt,); - case_brw(bgt,); - case_brr(bgt, _u); - case_brw(bgt, _u); - case_brr(bne,); - case_brw(bne,); - case_brr(bms,); - case_brw(bms,); - case_brr(bmc,); - case_brw(bmc,); - case_brr(boadd,); - case_brw(boadd,); - case_brr(boadd, _u); - case_brw(boadd, _u); - case_brr(bxadd,); - case_brw(bxadd,); - case_brr(bxadd, _u); - case_brw(bxadd, _u); - case_brr(bosub,); - case_brw(bosub,); - case_brr(bosub, _u); - case_brw(bosub, _u); - case_brr(bxsub,); - case_brw(bxsub,); - case_brr(bxsub, _u); - case_brw(bxsub, _u); - case_fff(add, _f); - case_ffw(add, _f, 32); - case_fff(sub, _f); - case_ffw(sub, _f, 32); - case_ffw(rsb, _f, 32); - case_fff(mul, _f); - case_ffw(mul, _f, 32); - case_fff(div, _f); - case_ffw(div, _f, 32); - case_ff(abs, _f); - case_ff(neg, _f); - case_ff(sqrt, _f); - case_fr(ext, _f); - case_fr(ext, _d_f); - case_rff(lt, _f); - case_rfw(lt, _f, 32); - case_rff(le, _f); - case_rfw(le, _f, 32); - case_rff(eq, _f); - case_rfw(eq, _f, 32); - case_rff(ge, _f); - case_rfw(ge, _f, 32); - case_rff(gt, _f); - case_rfw(gt, _f, 32); - case_rff(ne, _f); - case_rfw(ne, _f, 32); - case_rff(unlt, _f); - case_rfw(unlt, _f, 32); - case_rff(unle, _f); - case_rfw(unle, _f, 32); - case_rff(uneq, _f); - case_rfw(uneq, _f, 32); - case_rff(unge, _f); - case_rfw(unge, _f, 32); - case_rff(ungt, _f); - case_rfw(ungt, _f, 32); - case_rff(ltgt, _f); - case_rfw(ltgt, _f, 32); - case_rff(ord, _f); - case_rfw(ord, _f, 32); - case_rff(unord, _f); - case_rfw(unord, _f, 32); - case jit_code_movr_f: - if (jit_x87_reg_p(node->u.w)) { - if (jit_x87_reg_p(node->v.w)) - x87_movr_f(rn(node->u.w), rn(node->v.w)); - else - x87_from_sse_f(rn(node->u.w), rn(node->v.w)); - } - else { - if (jit_sse_reg_p(node->v.w)) - sse_movr_f(rn(node->u.w), rn(node->v.w)); - else - sse_from_x87_f(rn(node->u.w), rn(node->v.w)); - } - break; - case jit_code_movi_f: - assert(node->flag & jit_flag_data); - if (jit_x87_reg_p(node->u.w)) - x87_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w); - else - sse_movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w); - break; - case_fr(ld, _f); - case_fw(ld, _f); - case_frr(ldx, _f); - case_frw(ldx, _f); - case_rf(st, _f); - case_wf(st, _f); - case_rrf(stx, _f); - case_wrf(stx, _f); - case_bff(lt, _f); - case_bfw(lt, _f, 32); - case_bff(le, _f); - case_bfw(le, _f, 32); - case_bff(eq, _f); - case_bfw(eq, _f, 32); - case_bff(ge, _f); - case_bfw(ge, _f, 32); - case_bff(gt, _f); - case_bfw(gt, _f, 32); - case_bff(ne, _f); - case_bfw(ne, _f, 32); - case_bff(unlt, _f); - case_bfw(unlt, _f, 32); - case_bff(unle, _f); - case_bfw(unle, _f, 32); - case_bff(uneq, _f); - case_bfw(uneq, _f, 32); - case_bff(unge, _f); - case_bfw(unge, _f, 32); - case_bff(ungt, _f); - case_bfw(ungt, _f, 32); - case_bff(ltgt, _f); - case_bfw(ltgt, _f, 32); - case_bff(ord, _f); - case_bfw(ord, _f, 32); - case_bff(unord, _f); - case_bfw(unord, _f, 32); - case_fff(add, _d); - case_ffw(add, _d, 64); - case_fff(sub, _d); - case_ffw(sub, _d, 64); - case_ffw(rsb, _d, 64); - case_fff(mul, _d); - case_ffw(mul, _d, 64); - case_fff(div, _d); - case_ffw(div, _d, 64); - case_ff(abs, _d); - case_ff(neg, _d); - case_ff(sqrt, _d); - case_fr(ext, _d); - case_fr(ext, _f_d); - case_rff(lt, _d); - case_rfw(lt, _d, 64); - case_rff(le, _d); - case_rfw(le, _d, 64); - case_rff(eq, _d); - case_rfw(eq, _d, 64); - case_rff(ge, _d); - case_rfw(ge, _d, 64); - case_rff(gt, _d); - case_rfw(gt, _d, 64); - case_rff(ne, _d); - case_rfw(ne, _d, 64); - case_rff(unlt, _d); - case_rfw(unlt, _d, 64); - case_rff(unle, _d); - case_rfw(unle, _d, 64); - case_rff(uneq, _d); - case_rfw(uneq, _d, 64); - case_rff(unge, _d); - case_rfw(unge, _d, 64); - case_rff(ungt, _d); - case_rfw(ungt, _d, 64); - case_rff(ltgt, _d); - case_rfw(ltgt, _d, 64); - case_rff(ord, _d); - case_rfw(ord, _d, 64); - case_rff(unord, _d); - case_rfw(unord, _d, 64); - case jit_code_movr_d: - if (jit_x87_reg_p(node->u.w)) { - if (jit_x87_reg_p(node->v.w)) - x87_movr_d(rn(node->u.w), rn(node->v.w)); - else - x87_from_sse_d(rn(node->u.w), rn(node->v.w)); - } - else { - if (jit_sse_reg_p(node->v.w)) - sse_movr_d(rn(node->u.w), rn(node->v.w)); - else - sse_from_x87_d(rn(node->u.w), rn(node->v.w)); - } - break; - case jit_code_movi_d: - assert(node->flag & jit_flag_data); - if (jit_x87_reg_p(node->u.w)) - x87_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w); - else - sse_movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w); - break; - case_fr(ld, _d); - case_fw(ld, _d); - case_frr(ldx, _d); - case_frw(ldx, _d); - case_rf(st, _d); - case_wf(st, _d); - case_rrf(stx, _d); - case_wrf(stx, _d); - case_bff(lt, _d); - case_bfw(lt, _d, 64); - case_bff(le, _d); - case_bfw(le, _d, 64); - case_bff(eq, _d); - case_bfw(eq, _d, 64); - case_bff(ge, _d); - case_bfw(ge, _d, 64); - case_bff(gt, _d); - case_bfw(gt, _d, 64); - case_bff(ne, _d); - case_bfw(ne, _d, 64); - case_bff(unlt, _d); - case_bfw(unlt, _d, 64); - case_bff(unle, _d); - case_bfw(unle, _d, 64); - case_bff(uneq, _d); - case_bfw(uneq, _d, 64); - case_bff(unge, _d); - case_bfw(unge, _d, 64); - case_bff(ungt, _d); - case_bfw(ungt, _d, 64); - case_bff(ltgt, _d); - case_bfw(ltgt, _d, 64); - case_bff(ord, _d); - case_bfw(ord, _d, 64); - case_bff(unord, _d); - case_bfw(unord, _d, 64); - case jit_code_jmpr: - jmpr(rn(node->u.w)); - break; - case jit_code_jmpi: - if (node->flag & jit_flag_node) { - temp = node->u.n; - assert(temp->code == jit_code_label || - temp->code == jit_code_epilog); - if (temp->flag & jit_flag_patch) - jmpi(temp->u.w); - else { - word = jmpi(_jit->pc.w); - patch(word, node); - } - } - else - jmpi(node->u.w); - break; - case jit_code_callr: - callr(rn(node->u.w)); - break; - case jit_code_calli: - if (node->flag & jit_flag_node) { - temp = node->u.n; - assert(temp->code == jit_code_label || - temp->code == jit_code_epilog); - word = calli(temp->u.w); - if (!(temp->flag & jit_flag_patch)) - patch(word, node); - } - else - calli(node->u.w); - break; - case jit_code_prolog: - _jitc->function = _jitc->functions.ptr + node->w.w; - undo.node = node; - undo.word = _jit->pc.w; -#if DEVEL_DISASSEMBLER - undo.prevw = prevw; -#endif - undo.patch_offset = _jitc->patches.offset; - restart_function: - _jitc->again = 0; - prolog(node); - break; - case jit_code_epilog: - assert(_jitc->function == _jitc->functions.ptr + node->w.w); - if (_jitc->again) { - for (temp = undo.node->next; - temp != node; temp = temp->next) { - if (temp->code == jit_code_label || - temp->code == jit_code_epilog) - temp->flag &= ~jit_flag_patch; - } - temp->flag &= ~jit_flag_patch; - node = undo.node; - _jit->pc.w = undo.word; -#if DEVEL_DISASSEMBLER - prevw = undo.prevw; -#endif - _jitc->patches.offset = undo.patch_offset; - goto restart_function; - } - if (node->link && - (word = _jit->pc.w & (sizeof(jit_word_t) - 1))) - nop(sizeof(jit_word_t) - word); - /* remember label is defined */ - node->flag |= jit_flag_patch; - node->u.w = _jit->pc.w; - epilog(node); - _jitc->function = NULL; - break; - case jit_code_va_start: - vastart(rn(node->u.w)); - break; - case jit_code_va_arg: - vaarg(rn(node->u.w), rn(node->v.w)); - break; - case jit_code_va_arg_d: - vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w)); - break; - case jit_code_live: case jit_code_ellipsis: - case jit_code_va_push: - case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: - case jit_code_arg_f: case jit_code_arg_d: - case jit_code_va_end: - case jit_code_ret: - case jit_code_retr: case jit_code_reti: - case jit_code_retr_f: case jit_code_reti_f: - case jit_code_retr_d: case jit_code_reti_d: - case jit_code_getarg_c: case jit_code_getarg_uc: - case jit_code_getarg_s: case jit_code_getarg_us: - case jit_code_getarg_i: -#if __X64 && !__X64_32 - case jit_code_getarg_ui: case jit_code_getarg_l: -#endif - case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: - case jit_code_putargr_f: case jit_code_putargi_f: - case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: - case jit_code_pushargr_f: case jit_code_pushargi_f: - case jit_code_pushargr_d: case jit_code_pushargi_d: - case jit_code_retval_c: case jit_code_retval_uc: - case jit_code_retval_s: case jit_code_retval_us: - case jit_code_retval_i: -#if __X64 && !__X32 - case jit_code_retval_ui: case jit_code_retval_l: -#endif - case jit_code_prepare: - case jit_code_finishr: case jit_code_finishi: - break; - case jit_code_retval_f: + const jit_reg_t gpr_args[] = { #if __X32 - if (jit_sse_reg_p(node->u.w)) { - fstpr(_ST1_REGNO); - sse_from_x87_f(rn(node->u.w), _ST0_REGNO); - } - else - fstpr(rn(node->u.w) + 1); + /* No GPRs in args. */ +#elif __CYGWIN__ + _RCX, _RDX, _R8, _R9 +#else + _RDI, _RSI, _RDX, _RCX, _R8, _R9 #endif - break; - case jit_code_retval_d: + }; + const jit_reg_t fpr_args[] = { #if __X32 - if (jit_sse_reg_p(node->u.w)) { - fstpr(_ST1_REGNO); - sse_from_x87_d(rn(node->u.w), _ST0_REGNO); - } - else - fstpr(rn(node->u.w) + 1); + /* No FPRs in args. */ +#elif __CYGWIN__ + _XMM0, _XMM1, _XMM2, _XMM3 +#else + _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7 #endif - break; - default: - abort(); - } - jit_regarg_clr(node, value); - assert(_jitc->regarg == 0 && _jitc->synth == 0); - /* update register live state */ - jit_reglive(node); + }; + size_t gpr_arg_idx = 0; + size_t fpr_arg_idx = 0; + /* size_t stack_offset = 0; */ + size_t gpr_arg_count = sizeof(gpr_args) / sizeof(jit_reg_t); + size_t fpr_arg_count = sizeof(fpr_args) / sizeof(jit_reg_t); + +#if __CYGWIN__ +#define NEXT_GPR() do { gpr_arg_idx++; fpr_arg_idx++; } while (0) +#define NEXT_FPR() do { gpr_arg_idx++; fpr_arg_idx++; } while (0) +#else +#define NEXT_GPR() do { gpr_arg_idx++; } while (0) +#define NEXT_FPR() do { fpr_arg_idx++; } while (0) +#endif + + for (size_t i = 0; i < argc; i++) { + switch (abi[i]) { + case JIT_ARG_ABI_UINT8: + case JIT_ARG_ABI_INT8: + case JIT_ARG_ABI_UINT16: + case JIT_ARG_ABI_INT16: + case JIT_ARG_ABI_UINT32: + case JIT_ARG_ABI_INT32: + case JIT_ARG_ABI_UINT64: + case JIT_ARG_ABI_INT64: + case JIT_ARG_ABI_POINTER: + if (gpr_arg_idx < gpr_arg_count) { + args[i].kind = JIT_ARG_LOC_GPR; + args[i].loc.gpr = gpr_args[gpr_arg_idx]; + NEXT_GPR(); + } else { + abort(); + } + break; + case JIT_ARG_ABI_FLOAT: + case JIT_ARG_ABI_DOUBLE: + if (fpr_arg_idx < fpr_arg_count) { + args[i].kind = JIT_ARG_LOC_FPR; + args[i].loc.fpr = fpr_args[fpr_arg_idx]; + NEXT_FPR(); + } else { + abort(); + } + break; } -#undef case_bfw -#undef case_bff -#undef case_ffw -#undef case_rfw -#undef case_rff -#undef case_brw -#undef case_brr -#undef case_wrf -#undef case_wrr -#undef case_frw -#undef case_rrf -#undef case_rrw -#undef case_frr -#undef case_rrr -#undef case_wf -#undef case_fw -#undef case_fr -#undef case_rr - - for (offset = 0; offset < _jitc->patches.offset; offset++) { - node = _jitc->patches.ptr[offset].node; - word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; - patch_at(node, _jitc->patches.ptr[offset].inst, word); - } - - jit_flush(_jit->code.ptr, _jit->pc.uc); - - return (_jit->code.ptr); + } } -#define CODE 1 -# include "x86-cpu.c" -# include "x86-sse.c" -# include "x86-x87.c" -#undef CODE - void jit_flush(void *fptr, void *tptr) { } -void -_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0) -{ - ldxi(rn(r0), rn(r1), i0); -} - -void -_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1) -{ - stxi(i0, rn(r0), rn(r1)); -} - -void -_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0) -{ - if (jit_x87_reg_p(r0)) - x87_ldxi_d(rn(r0), rn(r1), i0); - else - sse_ldxi_d(rn(r0), rn(r1), i0); -} - -void -_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1) -{ - if (jit_x87_reg_p(r1)) - x87_stxi_d(i0, rn(r0), rn(r1)); - else - sse_stxi_d(i0, rn(r0), rn(r1)); -} - static void -_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc) { - int32_t flag; - - assert(node->flag & jit_flag_node); - if (node->code == jit_code_movi) - flag = node->v.n->flag; - else - flag = node->u.n->flag; - assert(!(flag & jit_flag_patch)); - if (_jitc->patches.offset >= _jitc->patches.length) { - jit_realloc((jit_pointer_t *)&_jitc->patches.ptr, - _jitc->patches.length * sizeof(jit_patch_t), - (_jitc->patches.length + 1024) * sizeof(jit_patch_t)); - _jitc->patches.length += 1024; - } - _jitc->patches.ptr[_jitc->patches.offset].inst = instr; - _jitc->patches.ptr[_jitc->patches.offset].node = node; - ++_jitc->patches.offset; -} - -static void -_sse_from_x87_f(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1); - sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET); -} - -static void -_sse_from_x87_d(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1); - sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); -} - -static void -_x87_from_sse_f(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1); - x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET); -} - -static void -_x87_from_sse_d(jit_state_t *_jit, int32_t r0, int32_t r1) -{ - sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1); - x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); } diff --git a/jit/x86.h b/jit/x86.h index 7e37f95f0..89e341da4 100644 --- a/jit/x86.h +++ b/jit/x86.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2018 Free Software Foundation, Inc. + * Copyright (C) 2012-2019 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -14,186 +14,202 @@ * License for more details. * * Authors: - * Paulo Cesar Pereira de Andrade + * Paulo Cesar Pereira de Andrade */ #ifndef _jit_x86_h #define _jit_x86_h -#define JIT_HASH_CONSTS 1 -#define JIT_NUM_OPERANDS 2 +#define JIT_HASH_CONSTS 1 +#define JIT_NUM_OPERANDS 2 /* * Types */ -#define jit_sse2_p() jit_cpu.sse2 -#define jit_x87_reg_p(reg) ((reg) >= _ST0 && (reg) <= _ST6) +#define jit_sse2_p() jit_cpu.sse2 +#define jit_x87_reg_p(reg) ((reg) >= _ST0 && (reg) <= _ST6) #if __WORDSIZE == 32 -# if defined(__x86_64__) -# define __X64_32 1 -# define __X64 1 -# else -# define __X32 1 -# endif +# if defined(__x86_64__) +# define __X64 1 +# define __X64_32 1 +# define __X32 0 +# else +# define __X64 0 +# define __X64_32 0 +# define __X32 1 +# endif #else -# define __X64 1 +# define __X64 1 +# define __X64_32 0 +# define __X32 0 #endif -#define JIT_FP _RBP +#define JIT_FP _RBP typedef enum { #if __X32 -# define jit_r(i) (_RAX + (i)) -# define jit_r_num() 3 -# define jit_v(i) (_RBX + (i)) -# define jit_v_num() 3 -# define jit_f(i) (jit_cpu.sse2 ? _XMM0 + (i) : _ST0 + (i)) -# define jit_f_num() (jit_cpu.sse2 ? 8 : 6) -# define JIT_R0 _RAX -# define JIT_R1 _RCX -# define JIT_R2 _RDX - _RAX, _RCX, _RDX, -# define JIT_V0 _RBX -# define JIT_V1 _RSI -# define JIT_V2 _RDI - _RBX, _RSI, _RDI, - _RSP, _RBP, -# define JIT_F0 (jit_sse2_p() ? _XMM0 : _ST0) -# define JIT_F1 (jit_sse2_p() ? _XMM1 : _ST1) -# define JIT_F2 (jit_sse2_p() ? _XMM2 : _ST2) -# define JIT_F3 (jit_sse2_p() ? _XMM3 : _ST3) -# define JIT_F4 (jit_sse2_p() ? _XMM4 : _ST4) -# define JIT_F5 (jit_sse2_p() ? _XMM5 : _ST5) -# define JIT_F6 (jit_sse2_p() ? _XMM6 : _ST6) - _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7, -# define jit_sse_reg_p(reg) ((reg) >= _XMM0 && (reg) <= _XMM7) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 3 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 3 +# define jit_f(i) (jit_cpu.sse2 ? _XMM0 + (i) : _ST0 + (i)) +# define jit_f_num() (jit_cpu.sse2 ? 8 : 6) +# define JIT_R0 _RAX +# define JIT_R1 _RCX +# define JIT_R2 _RDX + _RAX, _RCX, _RDX, +# define JIT_V0 _RBX +# define JIT_V1 _RSI +# define JIT_V2 _RDI + _RBX, _RSI, _RDI, + _RSP, _RBP, +# define JIT_F0 _XMM0 +# define JIT_F1 _XMM1 +# define JIT_F2 _XMM2 +# define JIT_F3 _XMM3 +# define JIT_F4 _XMM4 +# define JIT_F5 _XMM5 +# define JIT_F6 _XMM6 + _XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7, +# define jit_sse_reg_p(reg) ((reg) >= _XMM0 && (reg) <= _XMM7) #else # if __CYGWIN__ -# define jit_r(i) (_RAX + (i)) -# define jit_r_num() 3 -# define jit_v(i) (_RBX + (i)) -# define jit_v_num() 7 -# define jit_f(index) (_XMM4 + (index)) -# define jit_f_num() 12 -# define JIT_R0 _RAX -# define JIT_R1 _R10 -# define JIT_R2 _R11 -# define JIT_V0 _RBX -# define JIT_V1 _RDI -# define JIT_V2 _RSI -# define JIT_V3 _R12 -# define JIT_V4 _R13 -# define JIT_V5 _R14 -# define JIT_V6 _R15 - /* Volatile - Return value register */ - _RAX, - /* Volatile */ - _R10, _R11, - /* Nonvolatile */ - _RBX, _RDI, _RSI, - _R12, _R13, _R14, _R15, - /* Volatile - Integer arguments (4 to 1) */ - _R9, _R8, _RDX, _RCX, - /* Nonvolatile */ - _RSP, _RBP, -# define JIT_F0 _XMM4 -# define JIT_F1 _XMM5 -# define JIT_F2 _XMM6 -# define JIT_F3 _XMM7 -# define JIT_F4 _XMM8 -# define JIT_F5 _XMM9 -# define JIT_F6 _XMM10 -# define JIT_F7 _XMM11 -# define JIT_F8 _XMM12 -# define JIT_F9 _XMM13 -# define JIT_F10 _XMM14 -# define JIT_F11 _XMM15 - /* Volatile */ - _XMM4, _XMM5, - /* Nonvolatile */ - _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, - _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, - /* Volatile - FP arguments (4 to 1) */ - _XMM3, _XMM2, _XMM1, _XMM0, -# define jit_sse_reg_p(reg) ((reg) >= _XMM4 && (reg) <= _XMM0) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 3 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 7 +# define jit_f(index) (_XMM4 + (index)) +# define jit_f_num() 12 +# define JIT_R0 _RAX +# define JIT_R1 _R10 +# define JIT_R2 _R11 +# define JIT_V0 _RBX +# define JIT_V1 _RDI +# define JIT_V2 _RSI +# define JIT_V3 _R12 +# define JIT_V4 _R13 +# define JIT_V5 _R14 +# define JIT_V6 _R15 + /* Volatile - Return value register */ + _RAX, + /* Volatile */ + _R10, _R11, + /* Nonvolatile */ + _RBX, _RDI, _RSI, + _R12, _R13, _R14, _R15, + /* Volatile - Integer arguments (4 to 1) */ + _R9, _R8, _RDX, _RCX, + /* Nonvolatile */ + _RSP, _RBP, +# define JIT_F0 _XMM0 +# define JIT_F1 _XMM1 +# define JIT_F2 _XMM2 +# define JIT_F3 _XMM3 +# define JIT_F4 _XMM4 +# define JIT_F5 _XMM5 +# define JIT_F6 _XMM6 +# define JIT_F7 _XMM7 +# define JIT_F8 _XMM8 +# define JIT_F9 _XMM9 +# define JIT_F10 _XMM10 +# define JIT_F11 _XMM11 +# define JIT_F12 _XMM12 +# define JIT_F13 _XMM13 +# define JIT_F14 _XMM14 +# define JIT_F15 _XMM15 + /* Volatile */ + _XMM4, _XMM5, + /* Nonvolatile */ + _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, + _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, + /* Volatile - FP arguments (4 to 1) */ + _XMM3, _XMM2, _XMM1, _XMM0, +# define jit_sse_reg_p(reg) ((reg) >= _XMM4 && (reg) <= _XMM0) # else -# define jit_r(i) (_RAX + (i)) -# define jit_r_num() 4 -# define jit_v(i) (_RBX + (i)) -# define jit_v_num() 4 -# define jit_f(index) (_XMM8 + (index)) -# define jit_f_num() 8 -# define JIT_R0 _RAX -# define JIT_R1 _R10 -# define JIT_R2 _R11 -# define JIT_R3 _R12 - _RAX, _R10, _R11, _R12, -# define JIT_V0 _RBX -# define JIT_V1 _R13 -# define JIT_V2 _R14 -# define JIT_V3 _R15 - _RBX, _R13, _R14, _R15, - _R9, _R8, _RCX, _RDX, _RSI, _RDI, - _RSP, _RBP, -# define JIT_F0 _XMM8 -# define JIT_F1 _XMM9 -# define JIT_F2 _XMM10 -# define JIT_F3 _XMM11 -# define JIT_F4 _XMM12 -# define JIT_F5 _XMM13 -# define JIT_F6 _XMM14 -# define JIT_F7 _XMM15 - _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, - _XMM7, _XMM6, _XMM5, _XMM4, _XMM3, _XMM2, _XMM1, _XMM0, -# define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0) +# define jit_r(i) (_RAX + (i)) +# define jit_r_num() 4 +# define jit_v(i) (_RBX + (i)) +# define jit_v_num() 4 +# define jit_f(index) (_XMM8 + (index)) +# define jit_f_num() 8 +# define JIT_R0 _RAX +# define JIT_R1 _R10 +# define JIT_R2 _R11 +# define JIT_R3 _R12 + _RAX, _R10, _R11, _R12, +# define JIT_V0 _RBX +# define JIT_V1 _R13 +# define JIT_V2 _R14 +# define JIT_V3 _R15 + _RBX, _R13, _R14, _R15, + _R9, _R8, _RCX, _RDX, _RSI, _RDI, + _RSP, _RBP, +# define JIT_F0 _XMM0 +# define JIT_F1 _XMM1 +# define JIT_F2 _XMM2 +# define JIT_F3 _XMM3 +# define JIT_F4 _XMM4 +# define JIT_F5 _XMM5 +# define JIT_F6 _XMM6 +# define JIT_F7 _XMM7 +# define JIT_F8 _XMM8 +# define JIT_F9 _XMM9 +# define JIT_F10 _XMM10 +# define JIT_F11 _XMM11 +# define JIT_F12 _XMM12 +# define JIT_F13 _XMM13 +# define JIT_F14 _XMM14 +# define JIT_F15 _XMM15 + _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, + _XMM7, _XMM6, _XMM5, _XMM4, _XMM3, _XMM2, _XMM1, _XMM0, +# define jit_sse_reg_p(reg) ((reg) >= _XMM8 && (reg) <= _XMM0) # endif #endif - _ST0, _ST1, _ST2, _ST3, _ST4, _ST5, _ST6, -# define JIT_NOREG _NOREG - _NOREG, +# define JIT_NOREG _NOREG + _NOREG, } jit_reg_t; typedef struct { - /* x87 present */ - uint32_t fpu : 1; - /* cmpxchg8b instruction */ - uint32_t cmpxchg8b : 1; - /* cmov and fcmov branchless conditional mov */ - uint32_t cmov : 1; - /* mmx registers/instructions available */ - uint32_t mmx : 1; - /* sse registers/instructions available */ - uint32_t sse : 1; - /* sse2 registers/instructions available */ - uint32_t sse2 : 1; - /* sse3 instructions available */ - uint32_t sse3 : 1; - /* pcmulqdq instruction */ - uint32_t pclmulqdq : 1; - /* ssse3 suplemental sse3 instructions available */ - uint32_t ssse3 : 1; - /* fused multiply/add using ymm state */ - uint32_t fma : 1; - /* cmpxchg16b instruction */ - uint32_t cmpxchg16b : 1; - /* sse4.1 instructions available */ - uint32_t sse4_1 : 1; - /* sse4.2 instructions available */ - uint32_t sse4_2 : 1; - /* movbe instruction available */ - uint32_t movbe : 1; - /* popcnt instruction available */ - uint32_t popcnt : 1; - /* aes instructions available */ - uint32_t aes : 1; - /* avx instructions available */ - uint32_t avx : 1; - /* lahf/sahf available in 64 bits mode */ - uint32_t lahf : 1; + /* x87 present */ + uint32_t fpu : 1; + /* cmpxchg8b instruction */ + uint32_t cmpxchg8b : 1; + /* cmov and fcmov branchless conditional mov */ + uint32_t cmov : 1; + /* mmx registers/instructions available */ + uint32_t mmx : 1; + /* sse registers/instructions available */ + uint32_t sse : 1; + /* sse2 registers/instructions available */ + uint32_t sse2 : 1; + /* sse3 instructions available */ + uint32_t sse3 : 1; + /* pcmulqdq instruction */ + uint32_t pclmulqdq : 1; + /* ssse3 suplemental sse3 instructions available */ + uint32_t ssse3 : 1; + /* fused multiply/add using ymm state */ + uint32_t fma : 1; + /* cmpxchg16b instruction */ + uint32_t cmpxchg16b : 1; + /* sse4.1 instructions available */ + uint32_t sse4_1 : 1; + /* sse4.2 instructions available */ + uint32_t sse4_2 : 1; + /* movbe instruction available */ + uint32_t movbe : 1; + /* popcnt instruction available */ + uint32_t popcnt : 1; + /* aes instructions available */ + uint32_t aes : 1; + /* avx instructions available */ + uint32_t avx : 1; + /* lahf/sahf available in 64 bits mode */ + uint32_t lahf : 1; } jit_cpu_t; /* * Initialization */ -JIT_API jit_cpu_t jit_cpu; +JIT_API jit_cpu_t jit_cpu; #endif /* _jit_x86_h */ diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 000000000..ee41e5e8e --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,16 @@ +TESTS = addr + +CC = gcc +CFLAGS = -Wall -O0 -g + +all: $(addprefix test-,$(TESTS)) + +jit.o: ../jit.h ../jit/*.c + $(CC) $(CFLAGS) $(CPPFLAGS) -flto -I.. -o jit.o -c ../jit/jit.c + +test-%: test-%.c jit.o test.h + $(CC) $(CFLAGS) $(CPPFLAGS) -flto -I.. -o $@ jit.o $< + +clean: + rm -f $(addprefix test-,$(TESTS)) + rm -f jit.o diff --git a/tests/test-addr.c b/tests/test-addr.c new file mode 100644 index 000000000..8ead832dd --- /dev/null +++ b/tests/test-addr.c @@ -0,0 +1,27 @@ +#include "test.h" + +static void +run_test(jit_state_t *j, uint8_t *arena_base, size_t arena_size) +{ + jit_begin(j, arena_base, arena_size); + + jit_arg_abi_t abi[] = { JIT_ARG_ABI_INT32, JIT_ARG_ABI_INT32 }; + jit_arg_t args[2]; + jit_receive(j, 2, abi, args); + ASSERT(args[0].kind == JIT_ARG_LOC_GPR); + ASSERT(args[1].kind == JIT_ARG_LOC_GPR); + jit_addr(j, JIT_R0, args[0].loc.gpr, args[1].loc.gpr); + jit_retr(j, JIT_R0); + + size_t size = 0; + void* ret = jit_end(j, &size); + + int (*f)(int, int) = ret; + ASSERT(f(42, 69) == 111); +} + +int +main (int argc, char *argv[]) +{ + return main_helper(argc, argv, run_test); +} diff --git a/tests/test.h b/tests/test.h new file mode 100644 index 000000000..bc30507ca --- /dev/null +++ b/tests/test.h @@ -0,0 +1,42 @@ +#include +#include +#include + +#include + +#define ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "%s:%d: assertion failed: " #x "\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + } while (0) + +static inline int +main_helper (int argc, char *argv[], + void (*run_test)(jit_state_t*, uint8_t*, size_t)) +{ + ASSERT(init_jit()); + jit_state_t *j = jit_new_state(); + ASSERT(j); + + const size_t arena_size = 4096; + char *arena_base = mmap (NULL, arena_size, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (arena_base == MAP_FAILED) + { + perror ("allocating JIT code buffer failed"); + return 1; + } + + run_test(j, (uint8_t*)arena_base, arena_size); + + jit_destroy_state(j); + + munmap(arena_base, arena_size); + + return 0; +}