diff --git a/ChangeLog b/ChangeLog index a9ee7f494..8731f1e20 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +2013-07-16 Paulo Andrade + + * include/lightning/jit_aarch64.h, lib/jit_aarch64-cpu.c, + lib/jit_aarch64-fpu.c, lib/jit_aarch64.c: New files + implementing the new aarch64 port, as a new architecture, + not as an expansion of the existing armv[4-7] port. + + * check/lightning.c: Add aarch64 support and a small + change to recognize character constants as immediate + values. + + * check/float.tst: Add aarch64 preprocessor conditionals + to select proper expected value when converting [+-]Inf + and NaN to integer. + + * include/lightning/jit_arm.h, lib/jit_arm.c: Minor changes + to better match the new aarch64 files. + + * configure.ac, include/lightning.h, + include/lightning/Makefile.am, include/lightning/jit_private.h, + lib/Makefile.am, lib/lightning.c: Minor adjustments + for the aarch64 port. + 2013-07-08 Paulo Andrade * NEWS, THANKS, configure.ac, doc/version.texi: Update for diff --git a/check/float.tst b/check/float.tst index f905dbeeb..0aa8605f8 100644 --- a/check/float.tst +++ b/check/float.tst @@ -16,12 +16,12 @@ ok: #if __mips__ || __sparc__ || __hppa__ # define wnan x7f -#elif __arm__ +#elif __arm__ || __aarch64__ # define wnan 0 #else # define wnan x80 #endif -#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ +#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ # define wpinf x7f #else # define wpinf x80 diff --git a/check/lightning.c b/check/lightning.c index 4c18d27d2..83dfd8f9d 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -874,6 +874,10 @@ get_imm(void) ungetch(ch); value = get_int(skip_none); break; + case '\'': + character(); + value = parser.value.i; + break; case '$': switch (expression()) { case tok_int: @@ -1329,6 +1333,10 @@ movi(void) ungetch(ch); value = (void *)(long)get_uint(skip_none); break; + case '\'': + character(); + value = (void *)parser.value.i; + break; case '$': switch (expression()) { case tok_int: @@ -4042,6 +4050,11 @@ main(int argc, char *argv[]) opt_short += snprintf(cmdline + opt_short, sizeof(cmdline) - opt_short, " -D__sgi__=1"); +#endif +#if defined(__aarch64__) + opt_short += snprintf(cmdline + opt_short, + sizeof(cmdline) - opt_short, + " -D__aarch64__=1"); #endif if ((parser.fp = popen(cmdline, "r")) == NULL) error("cannot execute %s", cmdline); diff --git a/configure.ac b/configure.ac index 061d8de1d..92128c3a5 100644 --- a/configure.ac +++ b/configure.ac @@ -99,15 +99,17 @@ case "$target_cpu" in *sparc*) cpu=sparc ;; ia64) cpu=ia64 ;; hppa*) cpu=hppa ;; + aarch64) cpu=aarch64 ;; *) ;; esac -AM_CONDITIONAL(cpu_arm, [test cpu-$cpu = cpu-arm]) -AM_CONDITIONAL(cpu_mips, [test cpu-$cpu = cpu-mips]) -AM_CONDITIONAL(cpu_ppc, [test cpu-$cpu = cpu-ppc]) -AM_CONDITIONAL(cpu_sparc, [test cpu-$cpu = cpu-sparc]) -AM_CONDITIONAL(cpu_x86, [test cpu-$cpu = cpu-x86]) -AM_CONDITIONAL(cpu_ia64, [test cpu-$cpu = cpu-ia64]) -AM_CONDITIONAL(cpu_hppa, [test cpu-$cpu = cpu-hppa]) +AM_CONDITIONAL(cpu_arm, [test cpu-$cpu = cpu-arm]) +AM_CONDITIONAL(cpu_mips, [test cpu-$cpu = cpu-mips]) +AM_CONDITIONAL(cpu_ppc, [test cpu-$cpu = cpu-ppc]) +AM_CONDITIONAL(cpu_sparc, [test cpu-$cpu = cpu-sparc]) +AM_CONDITIONAL(cpu_x86, [test cpu-$cpu = cpu-x86]) +AM_CONDITIONAL(cpu_ia64, [test cpu-$cpu = cpu-ia64]) +AM_CONDITIONAL(cpu_hppa, [test cpu-$cpu = cpu-hppa]) +AM_CONDITIONAL(cpu_aarch64, [test cpu-$cpu = cpu-aarch64]) # Test x87 if both, x87 and sse2 available ac_cv_test_x86_x87= diff --git a/include/lightning.h b/include/lightning.h index 8e8e4859c..fe9c86fce 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -124,6 +124,8 @@ typedef jit_int32_t jit_fpr_t; # include #elif defined(__hppa__) # include +#elif defined(__aarch64__) +# include #endif #define jit_flag_node 0x00000001 /* patch node not absolute */ diff --git a/include/lightning/Makefile.am b/include/lightning/Makefile.am index 8cb47695f..c9abb777d 100644 --- a/include/lightning/Makefile.am +++ b/include/lightning/Makefile.am @@ -45,3 +45,7 @@ if cpu_hppa lightning_include_HEADERS = \ jit_hppa.h endif +if cpu_aarch64 +lightning_include_HEADERS = \ + jit_aarch64.h +endif diff --git a/include/lightning/jit_aarch64.h b/include/lightning/jit_aarch64.h new file mode 100644 index 000000000..b18583780 --- /dev/null +++ b/include/lightning/jit_aarch64.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2013 Free Software Foundation, Inc. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#ifndef _jit_aarch64_h +#define _jit_aarch64_h + +#define JIT_HASH_CONSTS 0 +#define JIT_NUM_OPERANDS 3 + +/* + * Types + */ +#define JIT_FP _R29 +typedef enum { +#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) +#define jit_r(i) (_R9 + (i)) +#define jit_r_num() 7 +#define jit_v(i) (_R19 + (i)) +#define jit_v_num() 10 +#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +#define jit_f(i) (_V8 + (i)) +#define jit_f_num() 8 +#define JIT_R0 _R9 +#define JIT_R1 _R10 +#define JIT_R2 _R11 + _R8, /* indirect result */ + _R18, /* platform register */ + _R17, /* IP1 */ + _R16, /* IP0 */ + _R9, _R10, _R11, _R12, /* temporaries */ + _R13, _R14, _R15, +#define JIT_V0 _R19 +#define JIT_V1 _R20 +#define JIT_V2 _R21 + _R19, _R20, _R21, _R22, /* callee save */ + _R23, _R24, _R25, _R26, + _R27, _R28, + _SP, /* stack pointer */ + _R30, /* link register */ + _R29, /* frame pointer */ +#define JIT_RA0 _R0 + _R7, _R6, _R5, _R4, + _R3, _R2, _R1, _R0, +#define JIT_F0 _V8 +#define JIT_F1 _V9 +#define JIT_F2 _V10 +#define JIT_F3 _V11 +#define JIT_F4 _V12 +#define JIT_F5 _V13 + _V31, _V30, _V29, _V28, /* temporaries */ + _V27, _V26, _V25, _V24, + _V23, _V22, _V21, _V20, + _V19, _V18, _V17, _V16, + /* callee save */ + _V8, _V9, _V10, _V11, + _V12, _V13, _V14, _V15, +#define JIT_FA0 _V0 + _V7, _V6, _V5, _V4, /* arguments */ + _V3, _V2, _V1, _V0, + _NOREG, +#define JIT_NOREG _NOREG +} jit_reg_t; + +typedef jit_int64_t jit_regset_t; + +#endif /* _jit_aarch64_h */ diff --git a/include/lightning/jit_arm.h b/include/lightning/jit_arm.h index adcb99fba..16889c74a 100644 --- a/include/lightning/jit_arm.h +++ b/include/lightning/jit_arm.h @@ -56,9 +56,6 @@ typedef enum { _R14, /* lr - link register */ _R15, /* pc - program counter */ #define JIT_RA0 _R0 -#define JIT_RA1 _R1 -#define JIT_RA2 _R2 -#define JIT_RA3 _R3 _R3, /* r3 - argument/result */ _R2, /* r2 - argument/result */ _R1, /* r1 - argument/result */ @@ -69,8 +66,6 @@ typedef enum { #define JIT_F3 (jit_hardfp_p() ? _D11 : _D3) #define JIT_F4 (jit_hardfp_p() ? _D12 : _D4) #define JIT_F5 (jit_hardfp_p() ? _D13 : _D5) -#define JIT_F6 (jit_hardfp_p() ? _D14 : _D6) -#define JIT_F7 (jit_hardfp_p() ? _D15 : _D7) _S16, _D8 = _S16, _Q4 = _D8, _S17, _S18, _D9 = _S18, @@ -88,13 +83,6 @@ typedef enum { _S30, _D15 = _S30, _S31, #define JIT_FA0 _D0 -#define JIT_FA1 _D1 -#define JIT_FA2 _D2 -#define JIT_FA3 _D3 -#define JIT_FA4 _D4 -#define JIT_FA5 _D5 -#define JIT_FA6 _D6 -#define JIT_FA7 _D7 _S15, _S14, _D7 = _S14, _S13, diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index f8d6cc2db..6941caf11 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -81,6 +81,10 @@ # define JIT_SP _R30 # define JIT_RET _R28 # define JIT_FRET _F4 +#elif defined(__aarch64__) +# define JIT_SP _SP +# define JIT_RET _R0 +# define JIT_FRET _V0 #endif #define jit_size(vector) (sizeof(vector) / sizeof((vector)[0])) diff --git a/lib/Makefile.am b/lib/Makefile.am index acd46ade3..185263931 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -24,6 +24,9 @@ liblightning_la_SOURCES = \ lightning.c EXTRA_DIST = \ + jit_aarch64.c \ + jit_aarch64-cpu.c \ + jit_aarch64-fpu.c \ jit_arm.c \ jit_arm-cpu.c \ jit_arm-swf.c \ diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c new file mode 100644 index 000000000..2c7c8035e --- /dev/null +++ b/lib/jit_aarch64-cpu.c @@ -0,0 +1,2272 @@ +/* + * Copyright (C) 2013 Free Software Foundation, Inc. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +typedef union { +/* aarch64-opc.c */ +# define ui jit_uint32_t +# if __BYTE_ORDER == __LITTLE_ENDIAN + /* cond2: condition in truly conditional-executed inst. */ + struct { ui b: 4; } cond2; + /* nzcv: flag bit specifier, encoded in the "nzcv" field. */ + struct { ui b: 4; } nzcv; + /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate. */ + struct { ui _: 5; ui b: 5; } defgh; + /* abc: a:b:c bits in AdvSIMD modified immediate. */ + struct { ui _: 16; ui b: 3; } abc; + /* imm19: e.g. in CBZ. */ + struct { ui _: 5; ui b: 19; } imm19; + /* immhi: e.g. in ADRP. */ + struct { ui _: 5; ui b: 19; } immhi; + /* immlo: e.g. in ADRP. */ + struct { ui _: 29; ui b: 2; } immlo; + /* size: in most AdvSIMD and floating-point instructions. */ + struct { ui _: 22; ui b: 2; } size; + /* vldst_size: size field in the AdvSIMD load/store inst. */ + struct { ui _: 10; ui b: 2; } vldst_size; + /* op: in AdvSIMD modified immediate instructions. */ + struct { ui _: 29; ui b: 1; } op; + /* Q: in most AdvSIMD instructions. */ + struct { ui _: 30; ui b: 1; } Q; + /* Rt: in load/store instructions. */ + struct { ui b: 5; } Rt; + /* Rd: in many integer instructions. */ + struct { ui b: 5; } Rd; + /* Rn: in many integer instructions. */ + struct { ui _: 5; ui b: 5; } Rn; + /* Rt2: in load/store pair instructions. */ + struct { ui _: 10; ui b: 5; } Rt2; + /* Ra: in fp instructions. */ + struct { ui _: 10; ui b: 5; } Ra; + /* op2: in the system instructions. */ + struct { ui _: 5; ui b: 3; } op2; + /* CRm: in the system instructions. */ + struct { ui _: 8; ui b: 4; } CRm; + /* CRn: in the system instructions. */ + struct { ui _: 12; ui b: 4; } CRn; + /* op1: in the system instructions. */ + struct { ui _: 16; ui b: 3; } op1; + /* op0: in the system instructions. */ + struct { ui _: 19; ui b: 2; } op0; + /* imm3: in add/sub extended reg instructions. */ + struct { ui _: 10; ui b: 3; } imm3; + /* cond: condition flags as a source operand. */ + struct { ui _: 12; ui b: 4; } cond; + /* opcode: in advsimd load/store instructions. */ + struct { ui _: 12; ui b: 4; } opcode; + /* cmode: in advsimd modified immediate instructions. */ + struct { ui _: 12; ui b: 4; } cmode; + /* asisdlso_opcode: opcode in advsimd ld/st single element. */ + struct { ui _: 13; ui b: 3; } asisdlso_opcode; + /* len: in advsimd tbl/tbx instructions. */ + struct { ui _: 13; ui b: 2; } len; + /* Rm: in ld/st reg offset and some integer inst. */ + struct { ui _: 16; ui b: 5; } Rm; + /* Rs: in load/store exclusive instructions. */ + struct { ui _: 16; ui b: 5; } Rs; + /* option: in ld/st reg offset + add/sub extended reg inst. */ + struct { ui _: 13; ui b: 3; } option; + /* S: in load/store reg offset instructions. */ + struct { ui _: 12; ui b: 1; } S; + /* hw: in move wide constant instructions. */ + struct { ui _: 21; ui b: 2; } hw; + /* opc: in load/store reg offset instructions. */ + struct { ui _: 22; ui b: 2; } opc; + /* opc1: in load/store reg offset instructions. */ + struct { ui _: 23; ui b: 1; } opc1; + /* shift: in add/sub reg/imm shifted instructions. */ + struct { ui _: 22; ui b: 2; } shift; + /* type: floating point type field in fp data inst. */ + struct { ui _: 22; ui b: 2; } type; + /* ldst_size: size field in ld/st reg offset inst. */ + struct { ui _: 30; ui b: 2; } ldst_size; + /* imm6: in add/sub reg shifted instructions. */ + struct { ui _: 10; ui b: 6; } imm6; + /* imm4: in advsimd ext and advsimd ins instructions. */ + struct { ui _: 11; ui b: 4; } imm4; + /* imm5: in conditional compare (immediate) instructions. */ + struct { ui _: 16; ui b: 5; } imm5; + /* imm7: in load/store pair pre/post index instructions. */ + struct { ui _: 15; ui b: 7; } imm7; + /* imm8: in floating-point scalar move immediate inst. */ + struct { ui _: 13; ui b: 8; } imm8; + /* imm9: in load/store pre/post index instructions. */ + struct { ui _: 12; ui b: 9; } imm9; + /* imm12: in ld/st unsigned imm or add/sub shifted inst. */ + struct { ui _: 10; ui b: 12; } imm12; + /* imm14: in test bit and branch instructions. */ + struct { ui _: 5; ui b: 14; } imm14; + /* imm16: in exception instructions. */ + struct { ui _: 5; ui b: 16; } imm16; + /* imm26: in unconditional branch instructions. */ + struct { ui b: 26; } imm26; + /* imms: in bitfield and logical immediate instructions. */ + struct { ui _: 10; ui b: 6; } imms; + /* immr: in bitfield and logical immediate instructions. */ + struct { ui _: 16; ui b: 6; } immr; + /* immb: in advsimd shift by immediate instructions. */ + struct { ui _: 16; ui b: 3; } immb; + /* immh: in advsimd shift by immediate instructions. */ + struct { ui _: 19; ui b: 4; } immh; + /* N: in logical (immediate) instructions. */ + struct { ui _: 22; ui b: 1; } N; + /* index: in ld/st inst deciding the pre/post-index. */ + struct { ui _: 11; ui b: 1; } index; + /* index2: in ld/st pair inst deciding the pre/post-index. */ + struct { ui _: 24; ui b: 1; } index2; + /* sf: in integer data processing instructions. */ + struct { ui _: 31; ui b: 1; } sf; + /* H: in advsimd scalar x indexed element instructions. */ + struct { ui _: 11; ui b: 1; } H; + /* L: in advsimd scalar x indexed element instructions. */ + struct { ui _: 21; ui b: 1; } L; + /* M: in advsimd scalar x indexed element instructions. */ + struct { ui _: 20; ui b: 1; } M; + /* b5: in the test bit and branch instructions. */ + struct { ui _: 31; ui b: 1; } b5; + /* b40: in the test bit and branch instructions. */ + struct { ui _: 19; ui b: 5; } b40; + /* scale: in the fixed-point scalar to fp converting inst. */ + struct { ui _: 10; ui b: 6; } scale; +# else + struct { ui _: 28; ui b: 4; } cond2; + struct { ui _: 28; ui b: 4; } nzcv; + struct { ui _: 22; ui b: 5; } defgh; + struct { ui _: 13; ui b: 3; } abc; + struct { ui _: 8; ui b: 19; } imm19; + struct { ui _: 8; ui b: 19; } immhi; + struct { ui _: 1; ui b: 29; } immlo; + struct { ui _: 8; ui b: 2; } size; + struct { ui _: 20; ui b: 2; } vldst_size; + struct { ui _: 2; ui b: 1; } op; + struct { ui _: 1; ui b: 1; } Q; + struct { ui _: 27; ui b: 1; } Rt; + struct { ui _: 27; ui b: 1; } Rd; + struct { ui _: 22; ui b: 5; } Rn; + struct { ui _: 17; ui b: 5; } Rt2; + struct { ui _: 17; ui b: 5; } Ra; + struct { ui _: 24; ui b: 3; } op2; + struct { ui _: 20; ui b: 4; } CRm; + struct { ui _: 16; ui b: 4; } CRn; + struct { ui _: 13; ui b: 3; } op1; + struct { ui _: 11; ui b: 2; } op0; + struct { ui _: 19; ui b: 3; } imm3; + struct { ui _: 16; ui b: 4; } cond; + struct { ui _: 16; ui b: 4; } opcode; + struct { ui _: 16; ui b: 4; } cmode; + struct { ui _: 16; ui b: 3; } asisdlso_opcode; + struct { ui _: 17; ui b: 2; } len; + struct { ui _: 11; ui b: 5; } Rm; + struct { ui _: 11; ui b: 5; } Rs; + struct { ui _: 16; ui b: 3; } option; + struct { ui _: 19; ui b: 1; } S; + struct { ui _: 9; ui b: 2; } hw; + struct { ui _: 8; ui b: 2; } opc; + struct { ui _: 8; ui b: 1; } opc1; + struct { ui _: 8; ui b: 2; } shift; + struct { ui _: 8; ui b: 2; } type; + struct { ui b: 2; } ldst_size; + struct { ui _: 16; ui b: 6; } imm6; + struct { ui _: 17; ui b: 4; } imm4; + struct { ui _: 11; ui b: 5; } imm5; + struct { ui _: 10; ui b: 7; } imm7; + struct { ui _: 11; ui b: 8; } imm8; + struct { ui _: 11; ui b: 9; } imm9; + struct { ui _: 10; ui b: 12; } imm12; + struct { ui _: 13; ui b: 14; } imm14; + struct { ui _: 11; ui b: 16; } imm16; + struct { ui _: 6; ui b: 26; } imm26; + struct { ui _: 16; ui b: 6; } imms; + struct { ui _: 10; ui b: 6; } immr; + struct { ui _: 13; ui b: 3; } immb; + struct { ui _: 9; ui b: 4; } immh; + struct { ui _: 9; ui b: 1; } N; + struct { ui _: 20; ui b: 1; } index; + struct { ui _: 7; ui b: 1; } index2; + struct { ui b: 1; } sf; + struct { ui _: 20; ui b: 1; } H; + struct { ui _: 10; ui b: 1; } L; + struct { ui _: 11; ui b: 1; } M; + struct { ui b: 1; } b5; + struct { ui _: 8; ui b: 5; } b40; + struct { ui _: 16; ui b: 6; } scale; +# endif + jit_int32_t w; +# undef ui +} instr_t; +# define stack_framesize 160 +# define ii(i) *_jit->pc.ui++ = i +# define ldxi(r0,r1,i0) ldxi_l(r0,r1,i0) +# define stxi(i0,r0,r1) stxi_l(i0,r0,r1) +# define FP_REGNO 0x1d +# define LR_REGNO 0x1e +# define SP_REGNO 0x1f +# define XZR_REGNO 0x1f +# define WZR_REGNO XZR_REGNO +# define LSL_12 0x00400000 +# define MOVI_LSL_16 0x00200000 +# define MOVI_LSL_32 0x00400000 +# define MOVI_LSL_48 0x00600000 +# define XS 0x80000000 /* Wn -> Xn */ +# define DS 0x00400000 /* Sn -> Dn */ +# define CC_NE 0x0 +# define CC_EQ 0x1 +# define CC_CC 0x2 +# define CC_LO CC_CC +# define CC_CS 0x3 +# define CC_HS CC_CS +# define CC_PL 0x4 +# define CC_MI 0x5 +# define CC_VC 0x6 +# define CC_VS 0x7 +# define CC_LS 0x8 +# define CC_HI 0x9 +# define CC_LT 0xa +# define CC_GE 0xb +# define CC_LE 0xc +# define CC_GT 0xd +# define CC_NV 0xe +# define CC_AL 0xf +/* Branches need inverted condition */ +# define BCC_EQ 0x0 +# define BCC_NE 0x1 +# define BCC_CS 0x2 +# define BCC_HS BCC_CS +# define BCC_CC 0x3 +# define BCC_LO BCC_CC +# define BCC_MI 0x4 +# define BCC_PL 0x5 +# define BCC_VS 0x6 +# define BCC_VC 0x7 +# define BCC_HI 0x8 +# define BCC_LS 0x9 +# define BCC_GE 0xa +# define BCC_LT 0xb +# define BCC_GT 0xc +# define BCC_LE 0xd +# define BCC_AL 0xe +# define BCC_NV 0xf +/* adapted and cut down to only tested and required by lightning, + * from data in binutils/aarch64-tbl.h */ +# define A64_ADCS 0x3a000000 +# define A64_SBCS 0x7a000000 +# define A64_ADDI 0x11000000 +# define A64_ADDSI 0xb1000000 +# define A64_SUBI 0x51000000 +# define A64_SUBSI 0x71000000 +# define A64_ADD 0x0b000000 +# define A64_ADDS 0x2b000000 +# define A64_SUB 0x4b000000 +# define A64_NEG 0x4b0003e0 +# define A64_SUBS 0x6b000000 +# define A64_CMP 0x6b00001f +# define A64_SBFM 0x93400000 +# define A64_UBFM 0x53400000 +# define A64_UBFX 0x53000000 +# define A64_B 0x14000000 +# define A64_BL 0x94000000 +# define A64_BR 0xd61f0000 +# define A64_BLR 0xd63f0000 +# define A64_RET 0xd65f0000 +# define A64_CBZ 0x34000000 +# define A64_CBNZ 0x35000000 +# define A64_B_C 0x54000000 +# define A64_CSINC 0x1a800400 +# define A64_REV 0xdac00c00 +# define A64_UDIV 0x1ac00800 +# define A64_SDIV 0x1ac00c00 +# define A64_LSL 0x1ac02000 +# define A64_LSR 0x1ac02400 +# define A64_ASR 0x1ac02800 +# define A64_MUL 0x1b007c00 +# define A64_SMULL 0x9b207c00 +# define A64_SMULH 0x9b407c00 +# define A64_UMULL 0x9ba07c00 +# define A64_UMULH 0x9bc07c00 +# define A64_STRBI 0x39000000 +# define A64_LDRBI 0x39400000 +# define A64_LDRSBI 0x39800000 +# define A64_STRI 0xf9000000 +# define A64_LDRI 0xf9400000 +# define A64_STRHI 0x79000000 +# define A64_LDRHI 0x79400000 +# define A64_LDRSHI 0x79800000 +# define A64_STRWI 0xb9000000 +# define A64_LDRWI 0xb9400000 +# define A64_LDRSWI 0xb9800000 +# define A64_STRB 0x38206800 +# define A64_LDRB 0x38606800 +# define A64_LDRSB 0x38e06800 +# define A64_STR 0xf8206800 +# define A64_LDR 0xf8606800 +# define A64_STRH 0x78206800 +# define A64_LDRH 0x78606800 +# define A64_LDRSH 0x78a06800 +# define A64_STRW 0xb8206800 +# define A64_LDRW 0xb8606800 +# define A64_LDRSW 0xb8a06800 +# define A64_STURB 0x38000000 +# define A64_LDURB 0x38400000 +# define A64_LDURSB 0x38800000 +# define A64_STUR 0xf8000000 +# define A64_LDUR 0xf8400000 +# define A64_STURH 0x78000000 +# define A64_LDURH 0x78400000 +# define A64_LDURSH 0x78800000 +# define A64_STURW 0xb8000000 +# define A64_LDURW 0xb8400000 +# define A64_LDURSW 0xb8800000 +# define A64_STP 0x29000000 +# define A64_LDP 0x29400000 +# define A64_STP_POS 0x29800000 +# define A64_LDP_PRE 0x28c00000 +# define A64_ANDI 0x12400000 +# define A64_ORRI 0x32400000 +# define A64_EORI 0x52400000 +# define A64_ANDSI 0x72000000 +# define A64_AND 0x0a000000 +# define A64_ORR 0x2a000000 +# define A64_MOV 0x2a0003e0 /* AKA orr Rd,xzr,Rm */ +# define A64_MVN 0x2a2003e0 +# define A64_UXTW 0x2a0003e0 /* AKA MOV */ +# define A64_EOR 0x4a000000 +# define A64_ANDS 0x6a000000 +# define A64_MOVN 0x12800000 +# define A64_MOVZ 0x52800000 +# define A64_MOVK 0x72800000 +# define SBFM(Rd,Rn,ImmR,ImmS) oxxrs(A64_SBFM|XS,Rd,Rn,ImmR,ImmS) +# define UBFM(Rd,Rn,ImmR,ImmS) oxxrs(A64_UBFM|XS,Rd,Rn,ImmR,ImmS) +# define UBFX(Rd,Rn,ImmR,ImmS) oxxrs(A64_UBFX,Rd,Rn,ImmR,ImmS) +# define CMP(Rn,Rm) oxx_(A64_CMP|XS,Rn,Rm) +# define CMPI(Rn,Imm12) oxxi(A64_SUBSI|XS,XZR_REGNO,Rn,Imm12) +# define CMPI_12(Rn,Imm12) oxxi(A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12) +# define CMNI(Rn,Imm12) oxxi(A64_ADDSI|XS,XZR_REGNO,Rn,Imm12) +# define CMNI_12(Rn,Imm12) oxxi(A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12) +# define CSINC(Rd,Rn,Rm,Cc) oxxxc(A64_CSINC|XS,Rd,Rn,Rm,Cc) +# define TST(Rn,Rm) oxxx(A64_ANDS|XS,XZR_REGNO,Rn,Rm) +/* actually should use oxxrs but logical_immediate returns proper encoding */ +# define TSTI(Rn,Imm12) oxxi(A64_ANDSI,XZR_REGNO,Rn,Imm12) +# define MOV(Rd,Rm) ox_x(A64_MOV|XS,Rd,Rm) +# define MVN(Rd,Rm) ox_x(A64_MVN|XS,Rd,Rm) +# define NEG(Rd,Rm) ox_x(A64_NEG|XS,Rd,Rm) +# define MOVN(Rd,Imm16) ox_h(A64_MOVN|XS,Rd,Imm16) +# define MOVN_16(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16) +# define MOVN_32(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16) +# define MOVN_48(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16) +# define MOVZ(Rd,Imm16) ox_h(A64_MOVZ|XS,Rd,Imm16) +# define MOVZ_16(Rd,Imm16) ox_h(A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16) +# define MOVZ_32(Rd,Imm16) ox_h(A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16) +# define MOVZ_48(Rd,Imm16) ox_h(A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16) +# define MOVK(Rd,Imm16) ox_h(A64_MOVK|XS,Rd,Imm16) +# define MOVK_16(Rd,Imm16) ox_h(A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16) +# define MOVK_32(Rd,Imm16) ox_h(A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16) +# define MOVK_48(Rd,Imm16) ox_h(A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16) +# define ADD(Rd,Rn,Rm) oxxx(A64_ADD|XS,Rd,Rn,Rm) +# define ADDI(Rd,Rn,Imm12) oxxi(A64_ADDI|XS,Rd,Rn,Imm12) +# define ADDI_12(Rd,Rn,Imm12) oxxi(A64_ADDI|XS|LSL_12,Rd,Rn,Imm12) +# define MOV_XSP(Rd,Rn) ADDI(Rd,Rn,0) +# define ADDS(Rd,Rn,Rm) oxxx(A64_ADDS|XS,Rd,Rn,Rm) +# define ADDSI(Rd,Rn,Imm12) oxxi(A64_ADDSI|XS,Rd,Rn,Imm12) +# define ADDSI_12(Rd,Rn,Imm12) oxxi(A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12) +# define ADCS(Rd,Rn,Rm) oxxx(A64_ADCS|XS,Rd,Rn,Rm) +# define SUB(Rd,Rn,Rm) oxxx(A64_SUB|XS,Rd,Rn,Rm) +# define SUBI(Rd,Rn,Imm12) oxxi(A64_SUBI|XS,Rd,Rn,Imm12) +# define SUBI_12(Rd,Rn,Imm12) oxxi(A64_SUBI|XS|LSL_12,Rd,Rn,Imm12) +# define SUBS(Rd,Rn,Rm) oxxx(A64_SUBS|XS,Rd,Rn,Rm) +# define SUBSI(Rd,Rn,Imm12) oxxi(A64_SUBSI|XS,Rd,Rn,Imm12) +# define SUBSI_12(Rd,Rn,Imm12) oxxi(A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12) +# define SBCS(Rd,Rn,Rm) oxxx(A64_SBCS|XS,Rd,Rn,Rm) +# define MUL(Rd,Rn,Rm) oxxx(A64_MUL|XS,Rd,Rn,Rm) +# define SMULL(Rd,Rn,Rm) oxxx(A64_SMULL,Rd,Rn,Rm) +# define SMULH(Rd,Rn,Rm) oxxx(A64_SMULH,Rd,Rn,Rm) +# define UMULL(Rd,Rn,Rm) oxxx(A64_UMULL,Rd,Rn,Rm) +# define UMULH(Rd,Rn,Rm) oxxx(A64_UMULH,Rd,Rn,Rm) +# define SDIV(Rd,Rn,Rm) oxxx(A64_SDIV|XS,Rd,Rn,Rm) +# define UDIV(Rd,Rn,Rm) oxxx(A64_UDIV|XS,Rd,Rn,Rm) +# define LSL(Rd,Rn,Rm) oxxx(A64_LSL|XS,Rd,Rn,Rm) +# define LSLI(r0,r1,i0) UBFM(r0,r1,(64-i0)&63,63-i0) +# define ASR(Rd,Rn,Rm) oxxx(A64_ASR|XS,Rd,Rn,Rm) +# define ASRI(r0,r1,i0) SBFM(r0,r1,i0,63) +# define LSR(Rd,Rn,Rm) oxxx(A64_LSR|XS,Rd,Rn,Rm) +# define LSRI(r0,r1,i0) UBFM(r0,r1,i0,63) +# define AND(Rd,Rn,Rm) oxxx(A64_AND|XS,Rd,Rn,Rm) +/* actually should use oxxrs but logical_immediate returns proper encoding */ +# define ANDI(Rd,Rn,Imm12) oxxi(A64_ANDI|XS,Rd,Rn,Imm12) +# define ORR(Rd,Rn,Rm) oxxx(A64_ORR|XS,Rd,Rn,Rm) +/* actually should use oxxrs but logical_immediate returns proper encoding */ +# define ORRI(Rd,Rn,Imm12) oxxi(A64_ORRI|XS,Rd,Rn,Imm12) +# define EOR(Rd,Rn,Rm) oxxx(A64_EOR|XS,Rd,Rn,Rm) +/* actually should use oxxrs but logical_immediate returns proper encoding */ +# define EORI(Rd,Rn,Imm12) oxxi(A64_EORI|XS,Rd,Rn,Imm12) +# define SXTB(Rd,Rn) SBFM(Rd,Rn,0,7) +# define SXTH(Rd,Rn) SBFM(Rd,Rn,0,15) +# define SXTW(Rd,Rn) SBFM(Rd,Rn,0,31) +# define UXTB(Rd,Rn) UBFX(Rd,Rn,0,7) +# define UXTH(Rd,Rn) UBFX(Rd,Rn,0,15) +# define UXTW(Rd,Rm) ox_x(A64_UXTW,Rd,Rm) +# define REV(Rd,Rn) o_xx(A64_REV,Rd,Rn) +# define LDRSB(Rt,Rn,Rm) oxxx(A64_LDRSB,Rt,Rn,Rm) +# define LDRSBI(Rt,Rn,Imm12) oxxi(A64_LDRSBI,Rt,Rn,Imm12) +# define LDURSB(Rt,Rn,Imm9) oxx9(A64_LDURSB,Rt,Rn,Imm9) +# define LDRB(Rt,Rn,Rm) oxxx(A64_LDRB,Rt,Rn,Rm) +# define LDRBI(Rt,Rn,Imm12) oxxi(A64_LDRBI,Rt,Rn,Imm12) +# define LDURB(Rt,Rn,Imm9) oxx9(A64_LDURB,Rt,Rn,Imm9) +# define LDRSH(Rt,Rn,Rm) oxxx(A64_LDRSH,Rt,Rn,Rm) +# define LDRSHI(Rt,Rn,Imm12) oxxi(A64_LDRSHI,Rt,Rn,Imm12) +# define LDURSH(Rt,Rn,Imm9) oxx9(A64_LDURSH,Rt,Rn,Imm9) +# define LDRH(Rt,Rn,Rm) oxxx(A64_LDRH,Rt,Rn,Rm) +# define LDRHI(Rt,Rn,Imm12) oxxi(A64_LDRHI,Rt,Rn,Imm12) +# define LDURH(Rt,Rn,Imm9) oxx9(A64_LDURH,Rt,Rn,Imm9) +# define LDRSW(Rt,Rn,Rm) oxxx(A64_LDRSW,Rt,Rn,Rm) +# define LDRSWI(Rt,Rn,Imm12) oxxi(A64_LDRSWI,Rt,Rn,Imm12) +# define LDURSW(Rt,Rn,Imm9) oxx9(A64_LDURSW,Rt,Rn,Imm9) +# define LDRW(Rt,Rn,Rm) oxxx(A64_LDRW,Rt,Rn,Rm) +# define LDRWI(Rt,Rn,Imm12) oxxi(A64_LDRWI,Rt,Rn,Imm12) +# define LDURW(Rt,Rn,Imm9) oxx9(A64_LDURW,Rt,Rn,Imm9) +# define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm) +# define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12) +# define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9) +# define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm) +# define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12) +# define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9) +# define STRH(Rt,Rn,Rm) oxxx(A64_STRH,Rt,Rn,Rm) +# define STRHI(Rt,Rn,Imm12) oxxi(A64_STRHI,Rt,Rn,Imm12) +# define STURH(Rt,Rn,Imm9) oxx9(A64_STURH,Rt,Rn,Imm9) +# define STRW(Rt,Rn,Rm) oxxx(A64_STRW,Rt,Rn,Rm) +# define STRWI(Rt,Rn,Imm12) oxxi(A64_STRWI,Rt,Rn,Imm12) +# define STURW(Rt,Rn,Imm9) oxx9(A64_STURW,Rt,Rn,Imm9) +# define STR(Rt,Rn,Rm) oxxx(A64_STR,Rt,Rn,Rm) +# define STRI(Rt,Rn,Imm12) oxxi(A64_STRI,Rt,Rn,Imm12) +# define STUR(Rt,Rn,Imm9) oxx9(A64_STUR,Rt,Rn,Imm9) +# define LDPI(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7) +# define STPI(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7) +# define LDPI_PRE(Rt,Rt2,Rn,Simm7) oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7) +# define STPI_POS(Rt,Rt2,Rn,Simm7) oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7) +# define CSET(Rd,Cc) CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc) +# define B(Simm26) o26(A64_B,Simm26) +# define BL(Simm26) o26(A64_BL,Simm26) +# define BR(Rn) o_x_(A64_BR,Rn) +# define BLR(Rn) o_x_(A64_BLR,Rn) +# define RET() o_x_(A64_RET,LR_REGNO) +# define B_C(Cc,Simm19) oc19(A64_B_C,Cc,Simm19) +# define CBZ(Rd,Simm19) ox19(A64_CBZ|XS,Rd,Simm19) +# define CBNZ(Rd,Simm19) ox19(A64_CBNZ|XS,Rd,Simm19) +static jit_int32_t logical_immediate(jit_word_t); +# define oxxx(Op,Rd,Rn,Rm) _oxxx(_jit,Op,Rd,Rn,Rm) +static void _oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define oxxi(Op,Rd,Rn,Imm12) _oxxi(_jit,Op,Rd,Rn,Imm12) +static void _oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define oxx9(Op,Rd,Rn,Imm9) _oxx9(_jit,Op,Rd,Rn,Imm9) +static void _oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ox19(Op,Rd,Simm19) _ox19(_jit,Op,Rd,Simm19) +static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define oc19(Op,Cc,Simm19) _oc19(_jit,Op,Cc,Simm19) +static void _oc19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define o26(Op,Simm26) _o26(_jit,Op,Simm26) +static void _oc26(jit_state_t*,jit_int32_t,jit_int32_t); +# define ox_x(Op,Rd,Rn) _ox_x(_jit,Op,Rd,Rn) +static void _ox_x(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define o_xx(Op,Rd,Rn) _o_xx(_jit,Op,Rd,Rn) +static void _o_xx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define oxx_(Op,Rn,Rm) _oxx_(_jit,Op,Rn,Rm) +static void _oxx_(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define o_x_(Op,Rn) _o_x_(_jit,Op,Rn) +static void _o_x_(jit_state_t*,jit_int32_t,jit_int32_t); +# define ox_h(Op,Rd,Imm16) _ox_h(_jit,Op,Rd,Imm16) +static void _ox_h(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define oxxrs(Op,Rd,Rn,R,S) _oxxrs(_jit,Op,Rd,Rn,R,S) +static void _oxxrs(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define oxxxc(Op,Rd,Rn,Rm,Cc) _oxxxc(_jit,Op,Rd,Rn,Rm,Cc) +static void _oxxxc(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define oxxx7(Op,Rt,Rt2,Rn,Simm7) _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7) +static void _oxxx7(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define addr(r0,r1,r2) ADD(r0,r1,r2) +# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) +static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define addcr(r0,r1,r2) ADDS(r0,r1,r2) +# define addci(r0,r1,i0) _addci(_jit,r0,r1,i0) +static void _addci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define addxr(r0,r1,r2) ADCS(r0,r1,r2) +# define addxi(r0,r1,i0) _addxi(_jit,r0,r1,i0) +static void _addxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define subr(r0,r1,r2) SUB(r0,r1,r2) +# define subi(r0,r1,i0) _subi(_jit,r0,r1,i0) +static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define subcr(r0,r1,r2) SUBS(r0,r1,r2) +# define subci(r0,r1,i0) _subci(_jit,r0,r1,i0) +static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define subxr(r0,r1,r2) SBCS(r0,r1,r2) +# define subxi(r0,r1,i0) _subxi(_jit,r0,r1,i0) +static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define mulr(r0,r1,r2) MUL(r0,r1,r2) +# define muli(r0,r1,i0) _muli(_jit,r0,r1,i0) +static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qmulr(r0,r1,r2,r3) _qmulr(_jit,r0,r1,r2,r3) +static void _qmulr(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define qmuli(r0,r1,r2,i0) _qmuli(_jit,r0,r1,r2,i0) +static void _qmuli(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +# define qmulr_u(r0,r1,r2,r3) _qmulr_u(_jit,r0,r1,r2,r3) +static void _qmulr_u(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define qmuli_u(r0,r1,r2,i0) _qmuli_u(_jit,r0,r1,r2,i0) +static void _qmuli_u(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +# define divr(r0,r1,r2) SDIV(r0,r1,r2) +# define divi(r0,r1,i0) _divi(_jit,r0,r1,i0) +static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define divr_u(r0,r1,r2) UDIV(r0,r1,r2) +# define divi_u(r0,r1,i0) _divi_u(_jit,r0,r1,i0) +static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define qdivr(r0,r1,r2,r3) _iqdivr(_jit,1,r0,r1,r2,r3) +# define qdivr_u(r0,r1,r2,r3) _iqdivr(_jit,0,r0,r1,r2,r3) +static void _iqdivr(jit_state_t*,jit_bool_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define qdivi(r0,r1,r2,i0) _qdivi(_jit,r0,r1,r2,i0) +static void _qdivi(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +# define qdivi_u(r0,r1,r2,i0) _qdivi_u(_jit,r0,r1,r2,i0) +static void _qdivi_u(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +# define remr(r0,r1,r2) _remr(_jit,r0,r1,r2) +static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define remi(r0,r1,i0) _remi(_jit,r0,r1,i0) +static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define remr_u(r0,r1,r2) _remr_u(_jit,r0,r1,r2) +static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define remi_u(r0,r1,i0) _remi_u(_jit,r0,r1,i0) +static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define lshr(r0,r1,r2) LSL(r0,r1,r2) +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rshr(r0,r1,r2) ASR(r0,r1,r2) +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define rshr_u(r0,r1,r2) LSR(r0,r1,r2) +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define negr(r0,r1) NEG(r0,r1) +# define comr(r0,r1) MVN(r0,r1) +# define andr(r0,r1,r2) AND(r0,r1,r2) +# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) +static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define orr(r0,r1,r2) ORR(r0,r1,r2) +# define ori(r0,r1,i0) _ori(_jit,r0,r1,i0) +static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define xorr(r0,r1,r2) EOR(r0,r1,r2) +# define xori(r0,r1,i0) _xori(_jit,r0,r1,i0) +static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldr_c(r0,r1) LDRSBI(r0,r1,0) +# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) +static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); +# define ldr_uc(r0,r1) _ldr_uc(_jit,r0,r1) +static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldi_uc(r0,i0) _ldi_uc(_jit,r0,i0) +static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t); +# define ldr_s(r0,r1) LDRSHI(r0,r1,0) +# define ldi_s(r0,i0) _ldi_s(_jit,r0,i0) +static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t); +# define ldr_us(r0,r1) _ldr_us(_jit,r0,r1) +static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldi_us(r0,i0) _ldi_us(_jit,r0,i0) +static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t); +# define ldr_i(r0,r1) LDRSWI(r0,r1,0) +# define ldi_i(r0,i0) _ldi_i(_jit,r0,i0) +static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t); +# define ldr_ui(r0,r1) _ldr_ui(_jit,r0,r1) +static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldi_ui(r0,i0) _ldi_ui(_jit,r0,i0) +static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t); +# define ldr_l(r0,r1) LDRI(r0,r1,0) +static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldi_l(r0,i0) _ldi_l(_jit,r0,i0) +static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t); +# define ldxr_c(r0,r1,r2) _ldxr_c(_jit,r0,r1,r2) +static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxi_c(r0,r1,i0) _ldxi_c(_jit,r0,r1,i0) +static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxr_uc(r0,r1,r2) _ldxr_uc(_jit,r0,r1,r2) +static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxi_uc(r0,r1,i0) _ldxi_uc(_jit,r0,r1,i0) +static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxr_s(r0,r1,r2) LDRSH(r0,r1,r2) +# define ldxi_s(r0,r1,i0) _ldxi_s(_jit,r0,r1,i0) +static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxr_us(r0,r1,r2) _ldxr_us(_jit,r0,r1,r2) +static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxi_us(r0,r1,i0) _ldxi_us(_jit,r0,r1,i0) +static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxr_i(r0,r1,r2) LDRSW(r0,r1,r2) +# define ldxi_i(r0,r1,i0) _ldxi_i(_jit,r0,r1,i0) +static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxr_ui(r0,r1,r2) _ldxr_ui(_jit,r0,r1,r2) +static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxi_ui(r0,r1,i0) _ldxi_ui(_jit,r0,r1,i0) +static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define ldxr_l(r0,r1,r2) LDR(r0,r1,r2) +# define ldxi_l(r0,r1,i0) _ldxi_l(_jit,r0,r1,i0) +static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define str_c(r0,r1) STRBI(r1,r0,0) +# define sti_c(i0,r0) _sti_c(_jit,i0,r0) +static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t); +# define str_s(r0,r1) STRHI(r1,r0,0) +# define sti_s(i0,r0) _sti_s(_jit,i0,r0) +static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t); +# define str_i(r0,r1) STRWI(r1,r0,0) +# define sti_i(i0,r0) _sti_i(_jit,i0,r0) +static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t); +# define str_l(r0,r1) STRI(r1,r0,0) +# define sti_l(i0,r0) _sti_l(_jit,i0,r0) +static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t); +# define stxr_c(r0,r1,r2) STRB(r2,r1,r0) +# define stxi_c(i0,r0,r1) _stxi_c(_jit,i0,r0,r1) +static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxr_s(r0,r1,r2) STRH(r2,r1,r0) +# define stxi_s(i0,r0,r1) _stxi_s(_jit,i0,r0,r1) +static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxr_i(r0,r1,r2) STRW(r2,r1,r0) +# define stxi_i(i0,r0,r1) _stxi_i(_jit,i0,r0,r1) +static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define stxr_l(r0,r1,r2) STR(r2,r1,r0) +# define stxi_l(i0,r0,r1) _stxi_l(_jit,i0,r0,r1) +static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define htonr(r0,r1) REV(r0,r1) +# else +# define htonr(r0,r1) movr(r0,r1) +# endif +# define extr_c(r0,r1) SXTB(r0,r1) +# define extr_uc(r0,r1) UXTB(r0,r1) +# define extr_s(r0,r1) SXTH(r0,r1) +# define extr_us(r0,r1) UXTH(r0,r1) +# define extr_i(r0,r1) SXTW(r0,r1) +# define extr_ui(r0,r1) UXTW(r0,r1) +# define movr(r0,r1) _movr(_jit,r0,r1) +static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); +# define movi(r0,i0) _movi(_jit,r0,i0) +static void _movi(jit_state_t*,jit_int32_t,jit_word_t); +# define movi_p(r0,i0) _movi_p(_jit,r0,i0) +static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define ccr(cc,r0,r1,r2) _ccr(_jit,cc,r0,r1,r2) +static void _ccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define cci(cc,r0,r1,i0) _cci(_jit,cc,r0,r1,i0) +static void _cci(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t); +# define ltr(r0,r1,r2) ccr(CC_LT,r0,r1,r2) +# define lti(r0,r1,i0) cci(CC_LT,r0,r1,i0) +# define ltr_u(r0,r1,r2) ccr(CC_CC,r0,r1,r2) +# define lti_u(r0,r1,i0) cci(CC_CC,r0,r1,i0) +# define ler(r0,r1,r2) ccr(CC_LE,r0,r1,r2) +# define lei(r0,r1,i0) cci(CC_LE,r0,r1,i0) +# define ler_u(r0,r1,r2) ccr(CC_LS,r0,r1,r2) +# define lei_u(r0,r1,i0) cci(CC_LS,r0,r1,i0) +# define eqr(r0,r1,r2) ccr(CC_EQ,r0,r1,r2) +# define eqi(r0,r1,i0) cci(CC_EQ,r0,r1,i0) +# define ger(r0,r1,r2) ccr(CC_GE,r0,r1,r2) +# define gei(r0,r1,i0) cci(CC_GE,r0,r1,i0) +# define ger_u(r0,r1,r2) ccr(CC_CS,r0,r1,r2) +# define gei_u(r0,r1,i0) cci(CC_CS,r0,r1,i0) +# define gtr(r0,r1,r2) ccr(CC_GT,r0,r1,r2) +# define gti(r0,r1,i0) cci(CC_GT,r0,r1,i0) +# define gtr_u(r0,r1,r2) ccr(CC_HI,r0,r1,r2) +# define gti_u(r0,r1,i0) cci(CC_HI,r0,r1,i0) +# define ner(r0,r1,r2) ccr(CC_NE,r0,r1,r2) +# define nei(r0,r1,i0) cci(CC_NE,r0,r1,i0) +# define bccr(cc,i0,r0,r1) _bccr(_jit,cc,i0,r0,r1) +static jit_word_t +_bccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +# define bcci(cc,i0,r0,i1) _bcci(_jit,cc,i0,r0,i1) +static jit_word_t +_bcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); +# define bltr(i0,r0,r1) bccr(BCC_LT,i0,r0,r1) +# define blti(i0,r0,i1) bcci(BCC_LT,i0,r0,i1) +# define bltr_u(i0,r0,r1) bccr(BCC_CC,i0,r0,r1) +# define blti_u(i0,r0,i1) bcci(BCC_CC,i0,r0,i1) +# define bler(i0,r0,r1) bccr(BCC_LE,i0,r0,r1) +# define blei(i0,r0,i1) bcci(BCC_LE,i0,r0,i1) +# define bler_u(i0,r0,r1) bccr(BCC_LS,i0,r0,r1) +# define blei_u(i0,r0,i1) bcci(BCC_LS,i0,r0,i1) +# define beqr(i0,r0,r1) bccr(BCC_EQ,i0,r0,r1) +# define beqi(i0,r0,i1) _beqi(_jit,i0,r0,i1) +static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +# define bger(i0,r0,r1) bccr(BCC_GE,i0,r0,r1) +# define bgei(i0,r0,i1) bcci(BCC_GE,i0,r0,i1) +# define bger_u(i0,r0,r1) bccr(BCC_CS,i0,r0,r1) +# define bgei_u(i0,r0,i1) bcci(BCC_CS,i0,r0,i1) +# define bgtr(i0,r0,r1) bccr(BCC_GT,i0,r0,r1) +# define bgti(i0,r0,i1) bcci(BCC_GT,i0,r0,i1) +# define bgtr_u(i0,r0,r1) bccr(BCC_HI,i0,r0,r1) +# define bgti_u(i0,r0,i1) bcci(BCC_HI,i0,r0,i1) +# define bner(i0,r0,r1) bccr(BCC_NE,i0,r0,r1) +# define bnei(i0,r0,i1) _bnei(_jit,i0,r0,i1) +static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +# define baddr(cc,i0,r0,r1) _baddr(_jit,cc,i0,r0,r1) +static jit_word_t +_baddr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +# define baddi(cc,i0,r0,i1) _baddi(_jit,cc,i0,r0,i1) +static jit_word_t +_baddi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); +# define boaddr(i0,r0,r1) baddr(BCC_VS,i0,r0,r1) +# define boaddi(i0,r0,i1) baddi(BCC_VS,i0,r0,i1) +# define boaddr_u(i0,r0,r1) baddr(BCC_HS,i0,r0,r1) +# define boaddi_u(i0,r0,i1) baddi(BCC_HS,i0,r0,i1) +# define bxaddr(i0,r0,r1) baddr(BCC_VC,i0,r0,r1) +# define bxaddi(i0,r0,i1) baddi(BCC_VC,i0,r0,i1) +# define bxaddr_u(i0,r0,r1) baddr(BCC_LO,i0,r0,r1) +# define bxaddi_u(i0,r0,i1) baddi(BCC_LO,i0,r0,i1) +# define bsubr(cc,i0,r0,r1) _bsubr(_jit,cc,i0,r0,r1) +static jit_word_t +_bsubr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +# define bsubi(cc,i0,r0,i1) _bsubi(_jit,cc,i0,r0,i1) +static jit_word_t +_bsubi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); +# define bosubr(i0,r0,r1) bsubr(BCC_VS,i0,r0,r1) +# define bosubi(i0,r0,i1) bsubi(BCC_VS,i0,r0,i1) +# define bosubr_u(i0,r0,r1) bsubr(BCC_LO,i0,r0,r1) +# define bosubi_u(i0,r0,i1) bsubi(BCC_LO,i0,r0,i1) +# define bxsubr(i0,r0,r1) bsubr(BCC_VC,i0,r0,r1) +# define bxsubi(i0,r0,i1) bsubi(BCC_VC,i0,r0,i1) +# define bxsubr_u(i0,r0,r1) bsubr(BCC_HS,i0,r0,r1) +# define bxsubi_u(i0,r0,i1) bsubi(BCC_HS,i0,r0,i1) +# define bmxr(cc,i0,r0,r1) _bmxr(_jit,cc,i0,r0,r1) +static jit_word_t +_bmxr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +# define bmxi(cc,i0,r0,r1) _bmxi(_jit,cc,i0,r0,r1) +static jit_word_t +_bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); +# define bmsr(i0,r0,r1) bmxr(BCC_NE,i0,r0,r1) +# define bmsi(i0,r0,i1) bmxi(BCC_NE,i0,r0,i1) +# define bmcr(i0,r0,r1) bmxr(BCC_EQ,i0,r0,r1) +# define bmci(i0,r0,i1) bmxi(BCC_EQ,i0,r0,i1) +# define jmpr(r0) BR(r0) +# define jmpi(i0) _jmpi(_jit,i0) +static void _jmpi(jit_state_t*,jit_word_t); +# define jmpi_p(i0) _jmpi_p(_jit,i0) +static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); +# define callr(r0) BLR(r0) +# define calli(i0) _calli(_jit,i0) +static void _calli(jit_state_t*,jit_word_t); +# define calli_p(i0) _calli_p(_jit,i0) +static jit_word_t _calli_p(jit_state_t*,jit_word_t); +# define prolog(i0) _prolog(_jit,i0) +static void _prolog(jit_state_t*,jit_node_t*); +# define epilog(i0) _epilog(_jit,i0) +static void _epilog(jit_state_t*,jit_node_t*); +# define patch_at(jump,label) _patch_at(_jit,jump,label) +static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); +#endif + +#if CODE +static jit_int32_t +logical_immediate(jit_word_t imm) +{ + /* There are 5334 possible immediate values, but to avoid the + * need of either too complex code or large lookup tables, + * only check for (simply) encodable common/small values */ + switch (imm) { + case -16: return (0xf3b); + case -15: return (0xf3c); + case -13: return (0xf3d); + case -9: return (0xf3e); + case -8: return (0xf7c); + case -7: return (0xf7d); + case -5: return (0xf7e); + case -4: return (0xfbd); + case -3: return (0xfbe); + case -2: return (0xffe); + case 1: return (0x000); + case 2: return (0xfc0); + case 3: return (0x001); + case 4: return (0xf80); + case 6: return (0xfc1); + case 7: return (0x002); + case 8: return (0xf40); + case 12: return (0xf81); + case 14: return (0xfc2); + case 15: return (0x003); + case 16: return (0xf00); + default: return (-1); + } +} + +static void +_oxxx(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Rm & ~0x1f)); + assert(!(Op & ~0xffe0fc00)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.Rm.b = Rm; + ii(i.w); +} + +static void +_oxxi(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm12) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Imm12 & ~0xfff)); + assert(!(Op & ~0xffe00000)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.imm12.b = Imm12; + ii(i.w); +} + +static void +_oxx9(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Imm9 & ~0x1ff)); + assert(!(Op & ~0xffe00000)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.imm9.b = Imm9; + ii(i.w); +} + +static void +_ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(Simm19 >= -262148 && Simm19 <= 262143); + assert(!(Op & ~0xff000000)); + i.w = Op; + i.Rd.b = Rd; + i.imm19.b = Simm19; + ii(i.w); +} + +static void +_oc19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Cc, jit_int32_t Simm19) +{ + instr_t i; + assert(!(Cc & ~0xf)); + assert(Simm19 >= -262148 && Simm19 <= 262143); + assert(!(Op & ~0xff000000)); + i.w = Op; + i.cond2.b = Cc; + i.imm19.b = Simm19; + ii(i.w); +} + +static void +_o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26) +{ + instr_t i; + assert(Simm26 >= -33554432 && Simm26 <= 33554431); + assert(!(Op & ~0xfc000000)); + i.w = Op; + i.imm26.b = Simm26; + ii(i.w); +} + +static void +_ox_x(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rm) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rm & ~0x1f)); + assert(!(Op & ~0xffe0ffe0)); + i.w = Op; + i.Rd.b = Rd; + i.Rm.b = Rm; + ii(i.w); +} + +static void +_o_xx(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Rn) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Op & ~0xfffffc00)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + ii(i.w); +} + +static void +_oxx_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn, jit_int32_t Rm) +{ + instr_t i; + assert(!(Rn & ~0x1f)); + assert(!(Rm & ~0x1f)); + assert(!(Op & ~0xffc0fc1f)); + i.w = Op; + i.Rn.b = Rn; + i.Rm.b = Rm; + ii(i.w); +} + +static void +_o_x_(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rn) +{ + instr_t i; + assert(!(Rn & ~0x1f)); + assert(!(Op & 0x3e0)); + i.w = Op; + i.Rn.b = Rn; + ii(i.w); +} + +static void +_ox_h(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Imm16) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Imm16 & ~0xffff)); + assert(!(Op & ~0xffe00000)); + i.w = Op; + i.Rd.b = Rd; + i.imm16.b = Imm16; + ii(i.w); +} + +static void +_oxxrs(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t R, jit_int32_t S) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(R & ~0x3f)); + assert(!(S & ~0x3f)); + assert(!(Op & ~0xffc00000)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.immr.b = R; + i.imms.b = S; + ii(i.w); +} + +static void +_oxxxc(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm, jit_int32_t Cc) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Rm & ~0x1f)); + assert(!(Cc & ~0xf)); + assert(!(Op & ~0xffc00c00)); + i.w = Op; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.Rm.b = Rm; + i.cond.b = Cc; + ii(i.w); +} + +static void +_oxxx7(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Rt, jit_int32_t Rt2, jit_int32_t Rn, jit_int32_t Simm7) +{ + instr_t i; + assert(!(Rt & ~0x1f)); + assert(!(Rt2 & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(Simm7 >= -128 && Simm7 <= 127); + assert(!(Op & ~0xffc003e0)); + i.w = Op; + i.Rt.b = Rt; + i.Rt2.b = Rt2; + i.Rn.b = Rn; + i.imm7.b = Simm7; + ii(i.w); +} + +static void +_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_word_t is = i0 >> 12; + jit_word_t in = -i0; + jit_word_t iS = in >> 12; + if ( i0 >= 0 && i0 <= 0xfff) + ADDI (r0, r1, i0); + else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) + ADDI_12(r0, r1, is); + else if ( in >= 0 && in <= 0xfff) + SUBI (r0, r1, in); + else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) + SUBI_12(r0, r1, iS); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_word_t is = i0 >> 12; + jit_word_t in = -i0; + jit_word_t iS = in >> 12; + if ( i0 >= 0 && i0 <= 0xfff) + ADDSI (r0, r1, i0); + else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) + ADDSI_12(r0, r1, is); + else if ( in >= 0 && in <= 0xfff) + SUBSI (r0, r1, in); + else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) + SUBSI_12(r0, r1, iS); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addcr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + addxr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_word_t is = i0 >> 12; + if ( i0 >= 0 && i0 <= 0xfff) + SUBI (r0, r1, i0); + else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) + SUBI_12(r0, r1, is); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_word_t is = i0 >> 12; + if ( i0 >= 0 && i0 <= 0xfff) + SUBSI (r0, r1, i0); + else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) + SUBSI_12(r0, r1, is); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subcr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + subxr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + mulr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_qmulr(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t reg; + if (r0 == r2 || r0 == r3) { + reg = jit_get_reg(jit_class_gpr); + mulr(rn(reg), r2, r3); + } + else + mulr(r0, r2, r3); + SMULH(r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_qmuli(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + qmulr(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); +} + +static void +_qmulr_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t reg; + if (r0 == r2 || r0 == r3) { + reg = jit_get_reg(jit_class_gpr); + mulr(rn(reg), r2, r3); + } + else + mulr(r0, r2, r3); + UMULH(r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_qmuli_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + qmulr_u(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); +} + +static void +_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + divr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + divr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_iqdivr(jit_state_t *_jit, jit_bool_t sign, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3) +{ + jit_int32_t sv0, rg0; + jit_int32_t sv1, rg1; + if (r0 == r2 || r0 == r3) { + sv0 = jit_get_reg(jit_class_gpr); + rg0 = rn(sv0); + } + else + rg0 = r0; + if (r1 == r2 || r1 == r3) { + sv1 = jit_get_reg(jit_class_gpr); + rg1 = rn(sv1); + } + else + rg1 = r1; + if (sign) + divr(rg0, r2, r3); + else + divr_u(rg0, r2, r3); + mulr(rg1, r3, rg0); + subr(rg1, r2, rg1); + if (rg0 != r0) { + movr(r0, rg0); + jit_unget_reg(sv0); + } + if (rg1 != r1) { + movr(r1, rg1); + jit_unget_reg(sv1); + } +} + +static void +_qdivi(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + qdivr(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); +} + +static void +_qdivi_u(jit_state_t *_jit, jit_int32_t r0, + jit_int32_t r1, jit_int32_t r2, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + qdivr_u(r0, r1, r2, rn(reg)); + jit_unget_reg(reg); +} + +static void +_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + divr(rn(reg), r1, r2); + mulr(rn(reg), r2, rn(reg)); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + else { + divr(r0, r1, r2); + mulr(r0, r2, r0); + subr(r0, r1, r0); + } +} + +static void +_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + remr(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + divr_u(rn(reg), r1, r2); + mulr(rn(reg), r2, rn(reg)); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + else { + divr_u(r0, r1, r2); + mulr(r0, r2, r0); + subr(r0, r1, r0); + } +} + +static void +_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + remr_u(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { + assert(i0 > 0 && i0 < 64); + LSLI(r0, r1, i0); + } +} + +static void +_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { + assert(i0 > 0 && i0 < 64); + ASRI(r0, r1, i0); + } +} + +static void +_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + if (i0 == 0) + movr(r0, r1); + else { + assert(i0 > 0 && i0 < 64); + LSRI(r0, r1, i0); + } +} + +static void +_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_int32_t imm; + if (i0 == 0) + movi(r0, 0); + else if (i0 == -1) + movr(r0, r1); + else { + imm = logical_immediate(i0); + if (imm != -1) + ANDI(r0, r1, imm); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + andr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + } +} + +static void +_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_int32_t imm; + if (i0 == 0) + movr(r0, r1); + else if (i0 == -1) + movi(r0, -1); + else { + imm = logical_immediate(i0); + if (imm != -1) + ORRI(r0, r1, imm); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + orr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + } +} + +static void +_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_int32_t imm; + if (i0 == 0) + movr(r0, r1); + else if (i0 == -1) + comr(r0, r1); + else { + imm = logical_immediate(i0); + if (imm != -1) + EORI(r0, r1, imm); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + xorr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + } +} + +static void +_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_c(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LDRBI(r0, r1, 0); +#if 0 + extr_uc(r0, r0); +#endif +} + +static void +_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_uc(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_s(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LDRHI(r0, r1, 0); +#if 0 + extr_us(r0, r0); +#endif +} + +static void +_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_us(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_i(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LDRWI(r0, r1, 0); +#if 0 + extr_ui(r0, r0); +#endif +} + +static void +_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_ui(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_l(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + LDRSB(r0, r1, r2); + extr_c(r0, r0); +} + +static void +_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 >= 0 && i0 <= 4095) + LDRSBI(r0, r1, i0); + else if (i0 > -256 && i0 < 0) + LDURSB(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDRSB(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + extr_c(r0, r0); +} + +static void +_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + LDRB(r0, r1, r2); +#if 0 + extr_uc(r0, r0); +#endif +} + +static void +_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (i0 >= 0 && i0 <= 4095) + LDRBI(r0, r1, i0); + else if (i0 > -256 && i0 < 0) + LDURB(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ldr_uc(r0, rn(reg)); + jit_unget_reg(reg); + } +#if 0 + extr_uc(r0, r0); +#endif +} + +static void +_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + assert(!(i0 & 1)); + if (i0 >= 0 && i0 <= 8191) + LDRSHI(r0, r1, i0 >> 1); + else if (i0 > -256 && i0 < 0) + LDURSH(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDRSH(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + LDRH(r0, r1, r2); +#if 0 + extr_us(r0, r0); +#endif +} + +static void +_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + assert(!(i0 & 1)); + if (i0 >= 0 && i0 <= 8191) + LDRHI(r0, r1, i0 >> 1); + else if (i0 > -256 && i0 < 0) + LDURH(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDRH(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#if 0 + extr_us(r0, r0); +#endif +} + +static void +_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + assert(!(i0 & 3)); + if (i0 >= 0 && i0 <= 16383) + LDRSWI(r0, r1, i0 >> 2); + else if (i0 > -256 && i0 < 0) + LDURSW(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ldr_i(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + LDRW(r0, r1, r2); +#if 0 + extr_ui(r0, r0); +#endif +} + +static void +_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + assert(!(i0 & 3)); + if (i0 >= 0 && i0 <= 16383) + LDRWI(r0, r1, i0 >> 2); + else if (i0 > -256 && i0 < 0) + LDURW(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDRW(r0, r1, rn(reg)); + jit_unget_reg(reg); + } +#if 0 + extr_ui(r0, r0); +#endif +} + +static void +_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + assert(!(i0 & 7)); + if (i0 >= 0 && i0 <= 32767) + LDRI(r0, r1, i0 >> 3); + else if (i0 > -256 && i0 < 0) + LDUR(r0, r1, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ldr_l(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_c(rn(reg), r0); + jit_unget_reg(reg); +} + +static void +_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_s(rn(reg), r0); + jit_unget_reg(reg); +} + +static void +_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_i(rn(reg), r0); + jit_unget_reg(reg); +} + +static void +_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_l(rn(reg), r0); + jit_unget_reg(reg); +} + +static void +_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (i0 >= 0 && i0 <= 4095) + STRBI(r1, r0, i0); + else if (i0 > -256 && i0 < 0) + STURB(r1, r0, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + str_c(rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void +_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + assert(!(i0 & 1)); + if (i0 >= 0 && i0 <= 8191) + STRHI(r1, r0, i0 >> 1); + else if (i0 > -256 && i0 < 0) + STURH(r1, r0, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + str_s(rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void +_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + assert(!(i0 & 3)); + if (i0 >= 0 && i0 <= 16383) + STRWI(r1, r0, i0 >> 2); + else if (i0 > -256 && i0 < 0) + STURW(r1, r0, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + str_i(rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void +_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + assert(!(i0 & 7)); + if (i0 >= 0 && i0 <= 32767) + STRI(r1, r0, i0 >> 3); + else if (i0 > -256 && i0 < 0) + STUR(r1, r0, i0 & 0x1ff); + else { + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + str_l(rn(reg), r1); + jit_unget_reg(reg); + } +} + +static void +_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + MOV(r0, r1); +} + +static void +_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_word_t n0, ibit, nbit; + n0 = ~i0; + ibit = nbit = 0; + if (i0 & 0x000000000000ffffL) ibit |= 1; + if (i0 & 0x00000000ffff0000L) ibit |= 2; + if (i0 & 0x0000ffff00000000L) ibit |= 4; + if (i0 & 0xffff000000000000L) ibit |= 8; + if (n0 & 0x000000000000ffffL) nbit |= 1; + if (n0 & 0x00000000ffff0000L) nbit |= 2; + if (n0 & 0x0000ffff00000000L) nbit |= 4; + if (n0 & 0xffff000000000000L) nbit |= 8; + switch (ibit) { + case 0: + MOVZ (r0, 0); + break; + case 1: + MOVZ (r0, i0 & 0xffff); + break; + case 2: + MOVZ_16(r0, (i0 >> 16) & 0xffff); + break; + case 3: + MOVZ (r0, i0 & 0xffff); + MOVK_16(r0, (i0 >> 16) & 0xffff); + break; + case 4: + MOVZ_32(r0, (i0 >> 32) & 0xffff); + break; + case 5: + MOVZ (r0, i0 & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + break; + case 6: + MOVZ_16(r0, (i0 >> 16) & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + break; + case 7: + if (nbit == 8) + MOVN_48(r0, (n0 >> 48) & 0xffff); + else { + MOVZ (r0, i0 & 0xffff); + MOVK_16(r0, (i0 >> 16) & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + } + break; + case 8: + MOVZ_48(r0, (i0 >> 48) & 0xffff); + break; + case 9: + MOVZ (r0, i0 & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + break; + case 10: + MOVZ_16(r0, (i0 >> 16) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + break; + case 11: + if (nbit == 4) + MOVN_32(r0, (n0 >> 32) & 0xffff); + else { + MOVZ (r0, i0 & 0xffff); + MOVK_16(r0, (i0 >> 16) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + } + break; + case 12: + MOVZ_32(r0, (i0 >> 32) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + break; + case 13: + if (nbit == 2) + MOVN_16(r0, (n0 >> 16) & 0xffff); + else { + MOVZ (r0, i0 & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + } + break; + case 14: + if (nbit == 1) + MOVN (r0, (n0) & 0xffff); + else { + MOVZ_16(r0, (i0 >> 16) & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + } + break; + case 15: + if (nbit == 0) + MOVN (r0, 0); + else if (nbit == 1) + MOVN (r0, n0 & 0xffff); + else if (nbit == 8) + MOVN_48(r0, (n0 >> 48) & 0xffff); + else { + MOVZ (r0, i0 & 0xffff); + MOVK_16(r0, (i0 >> 16) & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + } + break; + default: + abort(); + } +} + +static jit_word_t +_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_word_t w; + w = _jit->pc.w; + MOVZ (r0, i0 & 0xffff); + MOVK_16(r0, (i0 >> 16) & 0xffff); + MOVK_32(r0, (i0 >> 32) & 0xffff); + MOVK_48(r0, (i0 >> 48) & 0xffff); + return (w); +} + +static void +_ccr(jit_state_t *_jit, jit_int32_t cc, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + CMP(r1, r2); + CSET(r0, cc); +} + +static void +_cci(jit_state_t *_jit, jit_int32_t cc, + jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + jit_word_t is = i0 >> 12; + jit_word_t in = -i0; + jit_word_t iS = in >> 12; + if ( i0 >= 0 && i0 <= 0xfff) + CMPI (r1, i0); + else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) + CMPI_12(r1, is); + else if ( in >= 0 && in <= 0xfff) + CMNI (r1, in); + else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) + CMNI_12(r1, iS); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + CMP(r1, rn(reg)); + jit_unget_reg(reg); + } + CSET(r0, cc); +} + +static jit_word_t +_bccr(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, d; + CMP(r0, r1); + w = _jit->pc.w; + d = (i0 - w) >> 2; + B_C(cc, d); + return (w); +} + +static jit_word_t +_bcci(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_int32_t reg; + jit_word_t w, d; + jit_word_t is = i1 >> 12; + jit_word_t in = -i1; + jit_word_t iS = in >> 12; + if ( i1 >= 0 && i1 <= 0xfff) + CMPI (r0, i1); + else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) + CMPI_12(r0, is); + else if ( in >= 0 && in <= 0xfff) + CMNI (r0, in); + else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) + CMNI_12(r0, iS); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i1); + CMP(r0, rn(reg)); + jit_unget_reg(reg); + } + w = _jit->pc.w; + d = (i0 - w) >> 2; + B_C(cc, d); + return (w); +} + +static jit_word_t +_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + if (i1 == 0) { + w = _jit->pc.w; + CBZ(r0, (i0 - w) >> 2); + } + else + w = bcci(BCC_EQ, i0, r0, i1); + return (w); +} + +static jit_word_t +_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + if (i1 == 0) { + w = _jit->pc.w; + CBNZ(r0, (i0 - w) >> 2); + } + else + w = bcci(BCC_NE, i0, r0, i1); + return (w); +} + +static jit_word_t +_baddr(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + addcr(r0, r0, r1); + w = _jit->pc.w; + B_C(cc, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_baddi(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + addci(r0, r0, i1); + w = _jit->pc.w; + B_C(cc, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bsubr(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + subcr(r0, r0, r1); + w = _jit->pc.w; + B_C(cc, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bsubi(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + subci(r0, r0, i1); + w = _jit->pc.w; + B_C(cc, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bmxr(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + TST(r0, r1); + w = _jit->pc.w; + B_C(cc, (i0 - w) >> 2); + return (w); +} + +static jit_word_t +_bmxi(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_word_t i1) +{ + jit_word_t w; + jit_int32_t reg; + jit_int32_t imm; + imm = logical_immediate(i1); + if (imm != -1) + TSTI(r0, imm); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + TST(r0, rn(reg)); + jit_unget_reg(reg); + } + w = _jit->pc.w; + B_C(cc, (i0 - w) >> 2); + return (w); +} + +static void +_jmpi(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t reg; + w = (i0 - _jit->pc.w) >> 2; + if (w >= -33554432 && w <= 33554431) + B(w); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg(reg); + } +} + +static jit_word_t +_jmpi_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + w = movi_p(rn(reg), i0); + jmpr(rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static void +_calli(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t reg; + w = (i0 - _jit->pc.w) >> 2; + if (w >= -33554432 && w <= 33554431) + BL(w); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + callr(rn(reg)); + jit_unget_reg(reg); + } +} + +static jit_word_t +_calli_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + w = movi_p(rn(reg), i0); + callr(rn(reg)); + jit_unget_reg(reg); + return (w); +} + +/* + * prolog and epilog not as "optimized" as one would like, but the + * problem of overallocating stack space to save callee save registers + * exists on all ports, and is still a todo to use a variable + * stack_framesize + * value, what would cause needing to patch some calls, most likely + * the offset of jit_arg* of stack arguments. + */ +static void +_prolog(jit_state_t *_jit, jit_node_t *node) +{ + _jitc->function->stack = ((_jitc->function->self.alen - + /* align stack at 16 bytes */ + _jitc->function->self.aoff) + 15) & -16; + STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3)); + MOV_XSP(FP_REGNO, SP_REGNO); +#define SPILL(L, R, O) \ + do { \ + if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) { \ + if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ + STPI(L, R, SP_REGNO, O); \ + else \ + STRI(L, SP_REGNO, O); \ + } \ + else if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ + STRI(R, SP_REGNO, O + 1); \ + } while (0) + SPILL(19, 20, 2); + SPILL(21, 22, 4); + SPILL(23, 24, 6); + SPILL(25, 26, 8); + SPILL(27, 28, 10); +#undef SPILL +#define SPILL(R, O) \ + do { \ + if (jit_regset_tstbit(&_jitc->function->regset, _V##R)) \ + stxi_d(O, SP_REGNO, R); \ + } while (0) + SPILL( 8, 96); + SPILL( 9, 104); + SPILL(10, 112); + SPILL(11, 120); + SPILL(12, 128); + SPILL(13, 136); + SPILL(14, 144); + SPILL(15, 152); +#undef SPILL + if (_jitc->function->stack) + subi(SP_REGNO, SP_REGNO, _jitc->function->stack); +} + +static void +_epilog(jit_state_t *_jit, jit_node_t *node) +{ + if (_jitc->function->stack) + MOV_XSP(SP_REGNO, FP_REGNO); +#define LOAD(L, R, O) \ + do { \ + if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) { \ + if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ + LDPI(L, R, SP_REGNO, O); \ + else \ + LDRI(L, SP_REGNO, O); \ + } \ + else if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ + LDRI(R, SP_REGNO, O + 1); \ + } while (0) + LOAD(19, 20, 2); + LOAD(21, 22, 4); + LOAD(23, 24, 6); + LOAD(25, 26, 8); + LOAD(27, 28, 10); +#undef LOAD +#define LOAD(R, O) \ + do { \ + if (jit_regset_tstbit(&_jitc->function->regset, _V##R)) \ + ldxi_d(R, SP_REGNO, O); \ + } while (0) + LOAD( 8, 96); + LOAD( 9, 104); + LOAD(10, 112); + LOAD(11, 120); + LOAD(12, 128); + LOAD(13, 136); + LOAD(14, 144); + LOAD(15, 152); +#undef LOAD + LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3); + RET(); +} + +static void +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) +{ + instr_t i; + jit_word_t d; + jit_int32_t fc, ff, ffc; + union { + jit_int32_t *i; + jit_word_t w; + } u; + u.w = instr; + i.w = u.i[0]; + fc = i.w & 0xfc000000; + ff = i.w & 0xff000000; + ffc = i.w & 0xffc00000; + if (fc == A64_B || fc == A64_BL) { + d = (label - instr) >> 2; + assert(d >= -33554432 && d <= 33554431); + i.imm26.b = d; + u.i[0] = i.w; + } + else if (ff == A64_B_C || ff == (A64_CBZ|XS) || ff == (A64_CBNZ|XS)) { + d = (label - instr) >> 2; + assert(d >= -262148 && d <= 262143); + i.imm19.b = d; + u.i[0] = i.w; + } + else if (ffc == (A64_MOVZ|XS)) { + i.imm16.b = label; + u.i[0] = i.w; + i.w = u.i[1]; + assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_16)); + i.imm16.b = label >> 16; + u.i[1] = i.w; + i.w = u.i[2]; + assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_32)); + i.imm16.b = label >> 32; + u.i[2] = i.w; + i.w = u.i[3]; + assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_48)); + i.imm16.b = label >> 48; + u.i[3] = i.w; + } + else + abort(); +} +#endif diff --git a/lib/jit_aarch64-fpu.c b/lib/jit_aarch64-fpu.c new file mode 100644 index 000000000..fc94fd30b --- /dev/null +++ b/lib/jit_aarch64-fpu.c @@ -0,0 +1,847 @@ +/* + * Copyright (C) 2013 Free Software Foundation, Inc. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +# define A64_SCVTF 0x1e220000 +# define A64_FMOVWV 0x1e260000 +# define A64_FMOVVW 0x1e270000 +# define A64_FMOVXV 0x9e260000 +# define A64_FMOVVX 0x9e270000 +# define A64_FCVTZS 0x1e380000 +# define A64_FCMPE 0x1e202010 +# define A64_FMOV 0x1e204000 +# define A64_FABS 0x1e20c000 +# define A64_FNEG 0x1e214000 +# define A64_FSQRT 0x1e21c000 +# define A64_FCVTS 0x1e224000 +# define A64_FCVTD 0x1e22c000 +# define A64_FMUL 0x1e200800 +# define A64_FDIV 0x1e201800 +# define A64_FADD 0x1e202800 +# define A64_FSUB 0x1e203800 +# define FCMPES(Rn,Rm) os_vv(A64_FCMPE,0,Rn,Rm) +# define FCMPED(Rn,Rm) os_vv(A64_FCMPE,1,Rn,Rm) +# define FMOVS(Rd,Rn) osvv_(A64_FMOV,0,Rd,Rn) +# define FMOVD(Rd,Rn) osvv_(A64_FMOV,1,Rd,Rn) +# define FMOVWS(Rd,Rn) osvv_(A64_FMOVWV,0,Rd,Rn) +# define FMOVSW(Rd,Rn) osvv_(A64_FMOVVW,0,Rd,Rn) +# define FMOVXD(Rd,Rn) osvv_(A64_FMOVXV,1,Rd,Rn) +# define FMOVDX(Rd,Rn) osvv_(A64_FMOVVX,1,Rd,Rn) +# define FCVT_SD(Rd,Rn) osvv_(A64_FCVTS,1,Rd,Rn) +# define FCVT_DS(Rd,Rn) osvv_(A64_FCVTD,0,Rd,Rn) +# define SCVTFS(Rd,Rn) osvv_(A64_SCVTF|XS,0,Rd,Rn) +# define SCVTFD(Rd,Rn) osvv_(A64_SCVTF|XS,1,Rd,Rn) +# define FCVTSZ_WS(Rd,Rn) osvv_(A64_FCVTZS,0,Rd,Rn) +# define FCVTSZ_WD(Rd,Rn) osvv_(A64_FCVTZS,1,Rd,Rn) +# define FCVTSZ_XS(Rd,Rn) osvv_(A64_FCVTZS|XS,0,Rd,Rn) +# define FCVTSZ_XD(Rd,Rn) osvv_(A64_FCVTZS|XS,1,Rd,Rn) +# define FABSS(Rd,Rn) osvv_(A64_FABS,0,Rd,Rn) +# define FABSD(Rd,Rn) osvv_(A64_FABS,1,Rd,Rn) +# define FNEGS(Rd,Rn) osvv_(A64_FNEG,0,Rd,Rn) +# define FNEGD(Rd,Rn) osvv_(A64_FNEG,1,Rd,Rn) +# define FSQRTS(Rd,Rn) osvv_(A64_FSQRT,0,Rd,Rn) +# define FSQRTD(Rd,Rn) osvv_(A64_FSQRT,1,Rd,Rn) +# define FADDS(Rd,Rn,Rm) osvvv(A64_FADD,0,Rd,Rn,Rm) +# define FADDD(Rd,Rn,Rm) osvvv(A64_FADD,1,Rd,Rn,Rm) +# define FSUBS(Rd,Rn,Rm) osvvv(A64_FSUB,0,Rd,Rn,Rm) +# define FSUBD(Rd,Rn,Rm) osvvv(A64_FSUB,1,Rd,Rn,Rm) +# define FMULS(Rd,Rn,Rm) osvvv(A64_FMUL,0,Rd,Rn,Rm) +# define FMULD(Rd,Rn,Rm) osvvv(A64_FMUL,1,Rd,Rn,Rm) +# define FDIVS(Rd,Rn,Rm) osvvv(A64_FDIV,0,Rd,Rn,Rm) +# define FDIVD(Rd,Rn,Rm) osvvv(A64_FDIV,1,Rd,Rn,Rm) +# define osvvv(Op,Sz,Rd,Rn,Rm) _osvvv(_jit,Op,Sz,Rd,Rn,Rm) +static void _osvvv(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define osvv_(Op,Sz,Rd,Rn) _osvv_(_jit,Op,Sz,Rd,Rn) +static void _osvv_(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define os_vv(Op,Sz,Rn,Rm) _os_vv(_jit,Op,Sz,Rn,Rm) +static void _os_vv(jit_state_t*,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); +# define truncr_f_i(r0,r1) _truncr_f_i(_jit,r0,r1) +static void _truncr_f_i(jit_state_t*,jit_int32_t,jit_int32_t); +# define truncr_f_l(r0,r1) FCVTSZ_XS(r0,r1) +# define truncr_d_i(r0,r1) _truncr_d_i(_jit,r0,r1) +static void _truncr_d_i(jit_state_t*,jit_int32_t,jit_int32_t); +# define truncr_d_l(r0,r1) FCVTSZ_XD(r0,r1) +# define addr_f(r0,r1,r2) FADDS(r0,r1,r2) +# define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) +static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define subr_f(r0,r1,r2) FSUBS(r0,r1,r2) +# define subi_f(r0,r1,i0) _subi_f(_jit,r0,r1,i0) +static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define mulr_f(r0,r1,r2) FMULS(r0,r1,r2) +# define muli_f(r0,r1,i0) _muli_f(_jit,r0,r1,i0) +static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define divr_f(r0,r1,r2) FDIVS(r0,r1,r2) +# define divi_f(r0,r1,i0) _divi_f(_jit,r0,r1,i0) +static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define absr_f(r0,r1) FABSS(r0,r1) +# define negr_f(r0,r1) FNEGS(r0,r1) +# define sqrtr_f(r0,r1) FSQRTS(r0,r1) +# define extr_f(r0,r1) SCVTFS(r0,r1) +# define ldr_f(r0,r1) _ldr_f(_jit,r0,r1) +static void _ldr_f(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldi_f(r0,i0) _ldi_f(_jit,r0,i0) +static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t); +# define ldxr_f(r0,r1,r2) _ldxr_f(_jit,r0,r1,r2) +static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxi_f(r0,r1,i0) _ldxi_f(_jit,r0,r1,i0) +static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define str_f(r0,r1) _str_f(_jit,r0,r1) +static void _str_f(jit_state_t*,jit_int32_t,jit_int32_t); +# define sti_f(i0,r0) _sti_f(_jit,i0,r0) +static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); +# define stxr_f(r0,r1,r2) _stxr_f(_jit,r0,r1,r2) +static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1) +static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define movr_f(r0,r1) _movr_f(_jit,r0,r1) +static void _movr_f(jit_state_t*,jit_int32_t,jit_int32_t); +# define movi_f(r0,i0) _movi_f(_jit,r0,i0) +static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t); +# define extr_d_f(r0,r1) FCVT_SD(r0,r1) +# define fccr(cc,r0,r1,r2) _fccr(_jit,cc,r0,r1,r2) +static void _fccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define fcci(cc,r0,r1,i0) _fcci(_jit,cc,r0,r1,i0) +static void _fcci(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_float32_t); +# define ltr_f(r0,r1,r2) fccr(CC_MI,r0,r1,r2) +# define lti_f(r0,r1,i0) fcci(CC_MI,r0,r1,i0) +# define ler_f(r0,r1,r2) fccr(CC_LS,r0,r1,r2) +# define lei_f(r0,r1,i0) fcci(CC_LS,r0,r1,i0) +# define eqr_f(r0,r1,r2) fccr(CC_EQ,r0,r1,r2) +# define eqi_f(r0,r1,i0) fcci(CC_EQ,r0,r1,i0) +# define ger_f(r0,r1,r2) fccr(CC_GE,r0,r1,r2) +# define gei_f(r0,r1,i0) fcci(CC_GE,r0,r1,i0) +# define gtr_f(r0,r1,r2) fccr(CC_GT,r0,r1,r2) +# define gti_f(r0,r1,i0) fcci(CC_GT,r0,r1,i0) +# define ner_f(r0,r1,r2) fccr(CC_NE,r0,r1,r2) +# define nei_f(r0,r1,i0) fcci(CC_NE,r0,r1,i0) +# define unltr_f(r0,r1,r2) fccr(CC_LT,r0,r1,r2) +# define unlti_f(r0,r1,i0) fcci(CC_LT,r0,r1,i0) +# define unler_f(r0,r1,r2) fccr(CC_LE,r0,r1,r2) +# define unlei_f(r0,r1,i0) fcci(CC_LE,r0,r1,i0) +# define uneqr_f(r0,r1,r2) _uneqr_f(_jit,r0,r1,r2) +static void _uneqr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define uneqi_f(r0,r1,i0) _uneqi_f(_jit,r0,r1,i0) +static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define unger_f(r0,r1,r2) fccr(CC_PL,r0,r1,r2) +# define ungei_f(r0,r1,i0) fcci(CC_PL,r0,r1,i0) +# define ungtr_f(r0,r1,r2) fccr(CC_HI,r0,r1,r2) +# define ungti_f(r0,r1,i0) fcci(CC_HI,r0,r1,i0) +# define ltgtr_f(r0,r1,r2) _ltgtr_f(_jit,r0,r1,r2) +static void _ltgtr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ltgti_f(r0,r1,i0) _ltgti_f(_jit,r0,r1,i0) +static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t); +# define ordr_f(r0,r1,r2) fccr(CC_VC,r0,r1,r2) +# define ordi_f(r0,r1,i0) fcci(CC_VC,r0,r1,i0) +# define unordr_f(r0,r1,r2) fccr(CC_VS,r0,r1,r2) +# define unordi_f(r0,r1,i0) fcci(CC_VS,r0,r1,i0) +#define fbccr(cc,i0,r0,r1) _fbccr(_jit,cc,i0,r0,r1) +static jit_word_t +_fbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +#define fbcci(cc,i0,r0,i1) _fbcci(_jit,cc,i0,r0,i1) +static jit_word_t +_fbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float32_t); +# define bltr_f(i0,r0,r1) fbccr(BCC_MI,i0,r0,r1) +# define blti_f(i0,r0,i1) fbcci(BCC_MI,i0,r0,i1) +# define bler_f(i0,r0,r1) fbccr(BCC_LS,i0,r0,r1) +# define blei_f(i0,r0,i1) fbcci(BCC_LS,i0,r0,i1) +# define beqr_f(i0,r0,r1) fbccr(BCC_EQ,i0,r0,r1) +# define beqi_f(i0,r0,i1) fbcci(BCC_EQ,i0,r0,i1) +# define bger_f(i0,r0,r1) fbccr(BCC_GE,i0,r0,r1) +# define bgei_f(i0,r0,i1) fbcci(BCC_GE,i0,r0,i1) +# define bgtr_f(i0,r0,r1) fbccr(BCC_GT,i0,r0,r1) +# define bgti_f(i0,r0,i1) fbcci(BCC_GT,i0,r0,i1) +# define bner_f(i0,r0,r1) fbccr(BCC_NE,i0,r0,r1) +# define bnei_f(i0,r0,i1) fbcci(BCC_NE,i0,r0,i1) +# define bunltr_f(i0,r0,r1) fbccr(BCC_LT,i0,r0,r1) +# define bunlti_f(i0,r0,i1) fbcci(BCC_LT,i0,r0,i1) +# define bunler_f(i0,r0,r1) fbccr(BCC_LE,i0,r0,r1) +# define bunlei_f(i0,r0,i1) fbcci(BCC_LE,i0,r0,i1) +# define buneqr_f(i0,r0,r1) _buneqr_f(_jit,i0,r0,r1) +static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define buneqi_f(i0,r0,i1) _buneqi_f(_jit,i0,r0,i1) +static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t); +# define bunger_f(i0,r0,r1) fbccr(BCC_PL,i0,r0,r1) +# define bungei_f(i0,r0,i1) fbcci(BCC_PL,i0,r0,i1) +# define bungtr_f(i0,r0,r1) fbccr(BCC_HI,i0,r0,r1) +# define bungti_f(i0,r0,i1) fbcci(BCC_HI,i0,r0,i1) +# define bltgtr_f(i0,r0,r1) _bltgtr_f(_jit,i0,r0,r1) +static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define bltgti_f(i0,r0,i1) _bltgti_f(_jit,i0,r0,i1) +static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t); +# define bordr_f(i0,r0,r1) fbccr(BCC_VC,i0,r0,r1) +# define bordi_f(i0,r0,i1) fbcci(BCC_VC,i0,r0,i1) +# define bunordr_f(i0,r0,r1) fbccr(BCC_VS,i0,r0,r1) +# define bunordi_f(i0,r0,i1) fbcci(BCC_VS,i0,r0,i1) +# define addr_d(r0,r1,r2) FADDD(r0,r1,r2) +# define addi_d(r0,r1,i0) _addi_d(_jit,r0,r1,i0) +static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define subr_d(r0,r1,r2) FSUBD(r0,r1,r2) +# define subi_d(r0,r1,i0) _subi_d(_jit,r0,r1,i0) +static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define mulr_d(r0,r1,r2) FMULD(r0,r1,r2) +# define muli_d(r0,r1,i0) _muli_d(_jit,r0,r1,i0) +static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define divr_d(r0,r1,r2) FDIVD(r0,r1,r2) +# define divi_d(r0,r1,i0) _divi_d(_jit,r0,r1,i0) +static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define absr_d(r0,r1) FABSD(r0,r1) +# define negr_d(r0,r1) FNEGD(r0,r1) +# define sqrtr_d(r0,r1) FSQRTD(r0,r1) +# define extr_d(r0,r1) SCVTFD(r0,r1) +# define ldr_d(r0,r1) _ldr_d(_jit,r0,r1) +static void _ldr_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define ldi_d(r0,i0) _ldi_d(_jit,r0,i0) +static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t); +# define ldxr_d(r0,r1,r2) _ldxr_d(_jit,r0,r1,r2) +static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ldxi_d(r0,r1,i0) _ldxi_d(_jit,r0,r1,i0) +static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +# define str_d(r0,r1) _str_d(_jit,r0,r1) +static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define sti_d(i0,r0) _sti_d(_jit,i0,r0) +static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); +# define stxr_d(r0,r1,r2) _stxr_d(_jit,r0,r1,r2) +static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) +static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define movr_d(r0,r1) _movr_d(_jit,r0,r1) +static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define movi_d(r0,i0) _movi_d(_jit,r0,i0) +static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t); +# define extr_f_d(r0,r1) FCVT_DS(r0,r1) +# define dccr(cc,r0,r1,r2) _dccr(_jit,cc,r0,r1,r2) +static void _dccr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define dcci(cc,r0,r1,i0) _dcci(_jit,cc,r0,r1,i0) +static void _dcci(jit_state_t*, + jit_int32_t,jit_int32_t,jit_int32_t,jit_float64_t); +# define ltr_d(r0,r1,r2) dccr(CC_MI,r0,r1,r2) +# define lti_d(r0,r1,i0) dcci(CC_MI,r0,r1,i0) +# define ler_d(r0,r1,r2) dccr(CC_LS,r0,r1,r2) +# define lei_d(r0,r1,i0) dcci(CC_LS,r0,r1,i0) +# define eqr_d(r0,r1,r2) dccr(CC_EQ,r0,r1,r2) +# define eqi_d(r0,r1,i0) dcci(CC_EQ,r0,r1,i0) +# define ger_d(r0,r1,r2) dccr(CC_GE,r0,r1,r2) +# define gei_d(r0,r1,i0) dcci(CC_GE,r0,r1,i0) +# define gtr_d(r0,r1,r2) dccr(CC_GT,r0,r1,r2) +# define gti_d(r0,r1,i0) dcci(CC_GT,r0,r1,i0) +# define ner_d(r0,r1,r2) dccr(CC_NE,r0,r1,r2) +# define nei_d(r0,r1,i0) dcci(CC_NE,r0,r1,i0) +# define unltr_d(r0,r1,r2) dccr(CC_LT,r0,r1,r2) +# define unlti_d(r0,r1,i0) dcci(CC_LT,r0,r1,i0) +# define unler_d(r0,r1,r2) dccr(CC_LE,r0,r1,r2) +# define unlei_d(r0,r1,i0) dcci(CC_LE,r0,r1,i0) +# define uneqr_d(r0,r1,r2) _uneqr_d(_jit,r0,r1,r2) +static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define uneqi_d(r0,r1,i0) _uneqi_d(_jit,r0,r1,i0) +static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define unger_d(r0,r1,r2) dccr(CC_PL,r0,r1,r2) +# define ungei_d(r0,r1,i0) dcci(CC_PL,r0,r1,i0) +# define ungtr_d(r0,r1,r2) dccr(CC_HI,r0,r1,r2) +# define ungti_d(r0,r1,i0) dcci(CC_HI,r0,r1,i0) +# define ltgtr_d(r0,r1,r2) _ltgtr_d(_jit,r0,r1,r2) +static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ltgti_d(r0,r1,i0) _ltgti_d(_jit,r0,r1,i0) +static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t); +# define ordr_d(r0,r1,r2) dccr(CC_VC,r0,r1,r2) +# define ordi_d(r0,r1,i0) dcci(CC_VC,r0,r1,i0) +# define unordr_d(r0,r1,r2) dccr(CC_VS,r0,r1,r2) +# define unordi_d(r0,r1,i0) dcci(CC_VS,r0,r1,i0) +#define dbccr(cc,i0,r0,r1) _dbccr(_jit,cc,i0,r0,r1) +static jit_word_t +_dbccr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +#define dbcci(cc,i0,r0,i1) _dbcci(_jit,cc,i0,r0,i1) +static jit_word_t +_dbcci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_float64_t); +# define bltr_d(i0,r0,r1) dbccr(BCC_MI,i0,r0,r1) +# define blti_d(i0,r0,i1) dbcci(BCC_MI,i0,r0,i1) +# define bler_d(i0,r0,r1) dbccr(BCC_LS,i0,r0,r1) +# define blei_d(i0,r0,i1) dbcci(BCC_LS,i0,r0,i1) +# define beqr_d(i0,r0,r1) dbccr(BCC_EQ,i0,r0,r1) +# define beqi_d(i0,r0,i1) dbcci(BCC_EQ,i0,r0,i1) +# define bger_d(i0,r0,r1) dbccr(BCC_GE,i0,r0,r1) +# define bgei_d(i0,r0,i1) dbcci(BCC_GE,i0,r0,i1) +# define bgtr_d(i0,r0,r1) dbccr(BCC_GT,i0,r0,r1) +# define bgti_d(i0,r0,i1) dbcci(BCC_GT,i0,r0,i1) +# define bner_d(i0,r0,r1) dbccr(BCC_NE,i0,r0,r1) +# define bnei_d(i0,r0,i1) dbcci(BCC_NE,i0,r0,i1) +# define bunltr_d(i0,r0,r1) dbccr(BCC_LT,i0,r0,r1) +# define bunlti_d(i0,r0,i1) dbcci(BCC_LT,i0,r0,i1) +# define bunler_d(i0,r0,r1) dbccr(BCC_LE,i0,r0,r1) +# define bunlei_d(i0,r0,i1) dbcci(BCC_LE,i0,r0,i1) +# define buneqr_d(i0,r0,r1) _buneqr_d(_jit,i0,r0,r1) +static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define buneqi_d(i0,r0,i1) _buneqi_d(_jit,i0,r0,i1) +static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t); +# define bunger_d(i0,r0,r1) dbccr(BCC_PL,i0,r0,r1) +# define bungei_d(i0,r0,i1) dbcci(BCC_PL,i0,r0,i1) +# define bungtr_d(i0,r0,r1) dbccr(BCC_HI,i0,r0,r1) +# define bungti_d(i0,r0,i1) dbcci(BCC_HI,i0,r0,i1) +# define bltgtr_d(i0,r0,r1) _bltgtr_d(_jit,i0,r0,r1) +static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define bltgti_d(i0,r0,i1) _bltgti_d(_jit,i0,r0,i1) +static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t); +# define bordr_d(i0,r0,r1) dbccr(BCC_VC,i0,r0,r1) +# define bordi_d(i0,r0,i1) dbcci(BCC_VC,i0,r0,i1) +# define bunordr_d(i0,r0,r1) dbccr(BCC_VS,i0,r0,r1) +# define bunordi_d(i0,r0,i1) dbcci(BCC_VS,i0,r0,i1) +#endif + +#if CODE +static void +_osvvv(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Sz, + jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Rm) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Rm & ~0x1f)); + assert(!(Sz & ~0x3)); + assert(!(Op & ~0xffe0fc00)); + i.w = Op; + i.size.b = Sz; + i.Rd.b = Rd; + i.Rn.b = Rn; + i.Rm.b = Rm; + ii(i.w); +} + +static void +_osvv_(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Sz, jit_int32_t Rd, jit_int32_t Rn) +{ + instr_t i; + assert(!(Rd & ~0x1f)); + assert(!(Rn & ~0x1f)); + assert(!(Sz & ~0x3)); + assert(!(Op & ~0xfffffc00)); + i.w = Op; + i.size.b = Sz; + i.Rd.b = Rd; + i.Rn.b = Rn; + ii(i.w); +} + +static void +_os_vv(jit_state_t *_jit, jit_int32_t Op, + jit_int32_t Sz, jit_int32_t Rn, jit_int32_t Rm) +{ + instr_t i; + assert(!(Rn & ~0x1f)); + assert(!(Rm & ~0x1f)); + assert(!(Sz & ~0x3)); + assert(!(Op & ~0xff20fc1f)); + i.w = Op; + i.size.b = Sz; + i.Rn.b = Rn; + i.Rm.b = Rm; + ii(i.w); +} + +#define fopi(name) \ +static void \ +_##name##i_f(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) \ +{ \ + jit_int32_t reg = jit_get_reg(jit_class_fpr); \ + movi_f(rn(reg), i0); \ + name##r_f(r0, r1, rn(reg)); \ + jit_unget_reg(reg); \ +} +#define dopi(name) \ +static void \ +_##name##i_d(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) \ +{ \ + jit_int32_t reg = jit_get_reg(jit_class_fpr); \ + movi_d(rn(reg), i0); \ + name##r_d(r0, r1, rn(reg)); \ + jit_unget_reg(reg); \ +} +#define fbopi(name) \ +static jit_word_t \ +_b##name##i_f(jit_state_t *_jit, \ + jit_word_t i0, jit_int32_t r0, jit_float32_t i1) \ +{ \ + jit_word_t word; \ + jit_int32_t reg = jit_get_reg(jit_class_fpr); \ + movi_f(rn(reg), i1); \ + word = b##name##r_f(i0, r0, rn(reg)); \ + jit_unget_reg(reg); \ + return (word); \ +} +#define dbopi(name) \ +static jit_word_t \ +_b##name##i_d(jit_state_t *_jit, \ + jit_word_t i0, jit_int32_t r0, jit_float64_t i1) \ +{ \ + jit_word_t word; \ + jit_int32_t reg = jit_get_reg(jit_class_fpr); \ + movi_d(rn(reg), i1); \ + word = b##name##r_d(i0, r0, rn(reg)); \ + jit_unget_reg(reg); \ + return (word); \ +} + +static void +_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + FCVTSZ_WS(r0, r1); + extr_i(r0, r0); +} + +static void +_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + FCVTSZ_WD(r0, r1); + extr_i(r0, r0); +} + +fopi(add) +fopi(sub) +fopi(mul) +fopi(div) + +static void +_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldr_i(rn(reg), r1); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldi_i(rn(reg), i0); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldxr_i(rn(reg), r1, r2); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldxi_i(rn(reg), r1, i0); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVWS(rn(reg), r1); + str_i(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVWS(rn(reg), r0); + sti_i(i0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVWS(rn(reg), r2); + stxr_i(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVWS(rn(reg), r1); + stxi_i(i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + FMOVS(r0, r1); +} + +static void +_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } u; + jit_int32_t reg; + u.f = i0; + if (u.i == 0) + FMOVSW(r0, WZR_REGNO); + else { + reg = jit_get_reg(jit_class_gpr); + /* prevent generating unused top 32 bits */ + movi(rn(reg), ((jit_word_t)u.i) & 0xffffffff); + FMOVSW(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_fccr(jit_state_t *_jit, jit_int32_t cc, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMPES(r1, r2); + CSET(r0, cc); +} + +static void +_fcci(jit_state_t *_jit, jit_int32_t cc, + jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i0); + fccr(cc, r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMPES(r1, r2); + CSET(r0, CC_VS); + w = _jit->pc.w; + B_C(BCC_VS, 1); /* unordered satisfies condition */ + CSET(r0, CC_EQ); /* equal satisfies condition */ + patch_at(w, _jit->pc.w); +} +fopi(uneq) + +static void +_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMPES(r1, r2); + CSET(r0, CC_VC); /* set to 1 if ordered */ + w = _jit->pc.w; + B_C(BCC_VS, 1); /* unordered does not satisfy condition */ + CSET(r0, CC_NE); /* set to 1 if not equal */ + patch_at(w, _jit->pc.w); +} +fopi(ltgt) + +static jit_word_t +_fbccr(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, d; + FCMPES(r0, r1); + w = _jit->pc.w; + d = (i0 - w) >> 2; + B_C(cc, d); + return (w); +} + +static jit_word_t +_fbcci(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_float32_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_f(rn(reg), i1); + w = fbccr(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t u, v, w; + FCMPES(r0, r1); + u = _jit->pc.w; + B_C(BCC_VS, 1); /* unordered satisfies condition */ + v = _jit->pc.w; + B_C(BCC_NE, 1); /* not equal (or unordered) does not satisfy */ + patch_at(u, _jit->pc.w); + w = _jit->pc.w; + B((i0 - w) >> 2); + patch_at(v, _jit->pc.w); + return (w); +} +fbopi(uneq) + +static jit_word_t +_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t u, v, w; + FCMPES(r0, r1); + u = _jit->pc.w; + B_C(BCC_VS, 2); /* jump over if unordered */ + v = _jit->pc.w; + B_C(BCC_EQ, 1); /* jump over if equal */ + w = _jit->pc.w; + B((i0 - w) >> 2); + patch_at(u, _jit->pc.w); + patch_at(v, _jit->pc.w); + return (w); +} +fbopi(ltgt) + +dopi(add) +dopi(sub) +dopi(mul) +dopi(div) + +static void +_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldr_l(rn(reg), r1); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldi_l(rn(reg), i0); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldxr_l(rn(reg), r1, r2); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + ldxi_l(rn(reg), r1, i0); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVXD(rn(reg), r1); + str_l(r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVXD(rn(reg), r0); + sti_l(i0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVXD(rn(reg), r2); + stxr_l(r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + FMOVXD(rn(reg), r1); + stxi_l(i0, r0, rn(reg)); + jit_unget_reg(reg); +} + +static void +_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + FMOVD(r0, r1); +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) +{ + union { + jit_int64_t l; + jit_float64_t d; + } u; + jit_int32_t reg; + u.d = i0; + if (u.l == 0) + FMOVDX(r0, XZR_REGNO); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), u.l); + FMOVDX(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_dccr(jit_state_t *_jit, jit_int32_t cc, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + FCMPED(r1, r2); + CSET(r0, cc); +} + +static void +_dcci(jit_state_t *_jit, jit_int32_t cc, + jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i0); + dccr(cc, r0, r1, rn(reg)); + jit_unget_reg(reg); +} + +static void +_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMPED(r1, r2); + CSET(r0, CC_VS); + w = _jit->pc.w; + B_C(BCC_VS, 1); /* unordered satisfies condition */ + CSET(r0, CC_EQ); /* equal satisfies condition */ + patch_at(w, _jit->pc.w); +} +dopi(uneq) + +static void +_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_word_t w; + FCMPED(r1, r2); + CSET(r0, CC_VC); /* set to 1 if ordered */ + w = _jit->pc.w; + B_C(BCC_VS, 1); /* unordered does not satisfy condition */ + CSET(r0, CC_NE); /* set to 1 if not equal */ + patch_at(w, _jit->pc.w); +} +dopi(ltgt) + +static jit_word_t +_dbccr(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, d; + FCMPED(r0, r1); + w = _jit->pc.w; + d = (i0 - w) >> 2; + B_C(cc, d); + return (w); +} + +static jit_word_t +_dbcci(jit_state_t *_jit, jit_int32_t cc, + jit_word_t i0, jit_int32_t r0, jit_float64_t i1) +{ + jit_word_t w; + jit_int32_t reg; + reg = jit_get_reg(jit_class_fpr); + movi_d(rn(reg), i1); + w = dbccr(cc, i0, r0, rn(reg)); + jit_unget_reg(reg); + return (w); +} + +static jit_word_t +_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t u, v, w; + FCMPED(r0, r1); + u = _jit->pc.w; + B_C(BCC_VS, 1); /* unordered satisfies condition */ + v = _jit->pc.w; + B_C(BCC_NE, 1); /* not equal (or unordered) does not satisfy */ + patch_at(u, _jit->pc.w); + w = _jit->pc.w; + B((i0 - w) >> 2); + patch_at(v, _jit->pc.w); + return (w); +} +dbopi(uneq) + +static jit_word_t +_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t u, v, w; + FCMPED(r0, r1); + u = _jit->pc.w; + B_C(BCC_VS, 2); /* jump over if unordered */ + v = _jit->pc.w; + B_C(BCC_EQ, 1); /* jump over if equal */ + w = _jit->pc.w; + B((i0 - w) >> 2); + patch_at(u, _jit->pc.w); + patch_at(v, _jit->pc.w); + return (w); +} +dbopi(ltgt) +#endif diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c new file mode 100644 index 000000000..a76d6a6af --- /dev/null +++ b/lib/jit_aarch64.c @@ -0,0 +1,1237 @@ +/* + * Copyright (C) 2013 Free Software Foundation, Inc. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#define rc(value) jit_class_##value +#define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) + +/* + * Prototypes + */ +#define patch(instr, node) _patch(_jit, instr, node) +static void _patch(jit_state_t*,jit_word_t,jit_node_t*); + +/* libgcc */ +extern void __clear_cache(void *, void *); + +#define PROTO 1 +# include "jit_aarch64-cpu.c" +# include "jit_aarch64-fpu.c" +#undef PROTO + +/* + * Initialization + */ +jit_register_t _rvs[] = { + { rc(gpr) | 0x08, "x8" }, + { rc(gpr) | 0x12, "x18" }, + { rc(gpr) | 0x11, "x17" }, + { rc(gpr) | 0x10, "x16" }, + { rc(gpr) | 0x09, "x9" }, + { rc(gpr) | 0x0a, "x10" }, + { rc(gpr) | 0x0b, "x11" }, + { rc(gpr) | 0x0c, "x12" }, + { rc(gpr) | 0x0d, "x13" }, + { rc(gpr) | 0x0e, "x14" }, + { rc(gpr) | 0x0f, "x15" }, + { rc(sav) | rc(gpr) | 0x13, "x19" }, + { rc(sav) | rc(gpr) | 0x14, "x20" }, + { rc(sav) | rc(gpr) | 0x15, "x21" }, + { rc(sav) | rc(gpr) | 0x16, "x22" }, + { rc(sav) | rc(gpr) | 0x17, "x23" }, + { rc(sav) | rc(gpr) | 0x18, "x24" }, + { rc(sav) | rc(gpr) | 0x19, "x25" }, + { rc(sav) | rc(gpr) | 0x1a, "x26" }, + { rc(sav) | rc(gpr) | 0x1b, "x27" }, + { rc(sav) | rc(gpr) | 0x1c, "x28" }, + { 0x1f, "sp" }, + { 0x1e, "lr" }, + { 0x1d, "fp" }, + { rc(arg) | rc(gpr) | 0x07, "x7" }, + { rc(arg) | rc(gpr) | 0x06, "x6" }, + { rc(arg) | rc(gpr) | 0x05, "x5" }, + { rc(arg) | rc(gpr) | 0x04, "x4" }, + { rc(arg) | rc(gpr) | 0x03, "x3" }, + { rc(arg) | rc(gpr) | 0x02, "x2" }, + { rc(arg) | rc(gpr) | 0x01, "x1" }, + { rc(arg) | rc(gpr) | 0x00, "x0" }, + { rc(fpr) | 0x1f, "v31" }, + { rc(fpr) | 0x1e, "v30" }, + { rc(fpr) | 0x1d, "v29" }, + { rc(fpr) | 0x1c, "v28" }, + { rc(fpr) | 0x1b, "v27" }, + { rc(fpr) | 0x1a, "v26" }, + { rc(fpr) | 0x19, "v25" }, + { rc(fpr) | 0x18, "v24" }, + { rc(fpr) | 0x17, "v23" }, + { rc(fpr) | 0x16, "v22" }, + { rc(fpr) | 0x15, "v21" }, + { rc(fpr) | 0x14, "v20" }, + { rc(fpr) | 0x13, "v19" }, + { rc(fpr) | 0x12, "v18" }, + { rc(fpr) | 0x11, "v17" }, + { rc(fpr) | 0x10, "v16" }, + { rc(sav) | rc(fpr) | 0x08, "v8" }, + { rc(sav) | rc(fpr) | 0x09, "v9" }, + { rc(sav) | rc(fpr) | 0x0a, "v10" }, + { rc(sav) | rc(fpr) | 0x0b, "v11" }, + { rc(sav) | rc(fpr) | 0x0c, "v12" }, + { rc(sav) | rc(fpr) | 0x0d, "v13" }, + { rc(sav) | rc(fpr) | 0x0e, "v14" }, + { rc(sav) | rc(fpr) | 0x0f, "v15" }, + { rc(arg) | rc(fpr) | 0x07, "v7" }, + { rc(arg) | rc(fpr) | 0x06, "v6" }, + { rc(arg) | rc(fpr) | 0x05, "v5" }, + { rc(arg) | rc(fpr) | 0x04, "v4" }, + { rc(arg) | rc(fpr) | 0x03, "v3" }, + { rc(arg) | rc(fpr) | 0x02, "v2" }, + { rc(arg) | rc(fpr) | 0x01, "v1" }, + { rc(arg) | rc(fpr) | 0x00, "v0" }, + { _NOREG, "" }, +}; + +/* + * Implementation + */ +void +jit_get_cpu(void) +{ +} + +void +_jit_init(jit_state_t *_jit) +{ + _jitc->reglen = jit_size(_rvs) - 1; +} + +void +_jit_prolog(jit_state_t *_jit) +{ + jit_int32_t offset; + + if (_jitc->function) + jit_epilog(); + assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0); + jit_regset_set_ui(&_jitc->regsav, 0); + offset = _jitc->functions.offset; + if (offset >= _jitc->functions.length) { + jit_realloc((jit_pointer_t *)&_jitc->functions.ptr, + _jitc->functions.length * sizeof(jit_function_t), + (_jitc->functions.length + 16) * sizeof(jit_function_t)); + _jitc->functions.length += 16; + } + _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; + _jitc->function->self.size = stack_framesize; + _jitc->function->self.argi = _jitc->function->self.argf = + _jitc->function->self.alen = 0; + _jitc->function->self.aoff = 0; + _jitc->function->self.call = jit_call_default; + jit_alloc((jit_pointer_t *)&_jitc->function->regoff, + _jitc->reglen * sizeof(jit_int32_t)); + + _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog); + jit_link(_jitc->function->prolog); + _jitc->function->prolog->w.w = offset; + _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog); + /* u: label value + * v: offset in blocks vector + * w: offset in functions vector + */ + _jitc->function->epilog->w.w = offset; + + jit_regset_new(&_jitc->function->regset); +} + +jit_int32_t +_jit_allocai(jit_state_t *_jit, jit_int32_t length) +{ + assert(_jitc->function); + switch (length) { + case 0: case 1: break; + case 2: _jitc->function->self.aoff &= -2; break; + case 3: case 4: _jitc->function->self.aoff &= -4; break; + default: _jitc->function->self.aoff &= -8; break; + } + _jitc->function->self.aoff -= length; + return (_jitc->function->self.aoff); +} + +void +_jit_ret(jit_state_t *_jit) +{ + jit_node_t *instr; + + assert(_jitc->function); + + /* jump to epilog */ + instr = jit_jmpi(); + jit_patch_at(instr, _jitc->function->epilog); +} + +void +_jit_retr(jit_state_t *_jit, jit_int32_t u) +{ + if (JIT_RET != u) + jit_movr(JIT_RET, u); + else + jit_live(JIT_RET); + jit_ret(); +} + +void +_jit_reti(jit_state_t *_jit, jit_word_t u) +{ + jit_movi(JIT_RET, u); + jit_ret(); +} + +void +_jit_retr_f(jit_state_t *_jit, jit_int32_t u) +{ + if (u != JIT_FRET) + jit_movr_f(JIT_FRET, u); + else + jit_live(JIT_FRET); + jit_ret(); +} + +void +_jit_reti_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_movi_f(JIT_FRET, u); + jit_ret(); +} + +void +_jit_retr_d(jit_state_t *_jit, jit_int32_t u) +{ + if (u != JIT_FRET) + jit_movr_d(JIT_FRET, u); + else + jit_live(JIT_FRET); + jit_ret(); +} + +void +_jit_reti_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_movi_d(JIT_FRET, u); + jit_ret(); +} + +void +_jit_epilog(jit_state_t *_jit) +{ + assert(_jitc->function); + assert(_jitc->function->epilog->next == NULL); + jit_link(_jitc->function->epilog); + _jitc->function = NULL; +} + +jit_node_t * +_jit_arg(jit_state_t *_jit) +{ + jit_int32_t offset; + assert(_jitc->function); + if (_jitc->function->self.argi < 8) + offset = _jitc->function->self.argi++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + return (jit_new_node_w(jit_code_arg, offset)); +} + +jit_bool_t +_jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset) +{ + return (offset >= 0 && offset < 8); +} + +jit_node_t * +_jit_arg_f(jit_state_t *_jit) +{ + jit_int32_t offset; + assert(_jitc->function); + if (_jitc->function->self.argf < 8) + offset = _jitc->function->self.argf++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + return (jit_new_node_w(jit_code_arg_f, offset)); +} + +jit_bool_t +_jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset) +{ + return (jit_arg_reg_p(offset)); +} + +jit_node_t * +_jit_arg_d(jit_state_t *_jit) +{ + jit_int32_t offset; + assert(_jitc->function); + if (_jitc->function->self.argf < 8) + offset = _jitc->function->self.argf++; + else { + offset = _jitc->function->self.size; + _jitc->function->self.size += sizeof(jit_word_t); + } + return (jit_new_node_w(jit_code_arg_d, offset)); +} + +jit_bool_t +_jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset) +{ + return (jit_arg_reg_p(offset)); +} + +void +_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_extr_c(u, JIT_RA0 - v->u.w); + else + jit_ldxi_c(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_extr_uc(u, JIT_RA0 - v->u.w); + else + jit_ldxi_uc(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_extr_s(u, JIT_RA0 - v->u.w); + else + jit_ldxi_s(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_extr_us(u, JIT_RA0 - v->u.w); + else + jit_ldxi_us(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_extr_i(u, JIT_RA0 - v->u.w); + else + jit_ldxi_i(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_extr_ui(u, JIT_RA0 - v->u.w); + else + jit_ldxi_ui(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_movr(u, JIT_RA0 - v->u.w); + else + jit_ldxi_l(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_movr_f(u, JIT_FA0 - v->u.w); + else + jit_ldxi_f(u, JIT_FP, v->u.w); +} + +void +_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +{ + if (v->u.w < 8) + jit_movr_d(u, JIT_FA0 - v->u.w); + else + jit_ldxi_d(u, JIT_FP, v->u.w); +} + +void +_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +{ + assert(_jitc->function); + if (_jitc->function->call.argi < 8) { + jit_movr(JIT_RA0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + jit_stxi(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } +} + +void +_jit_pushargi(jit_state_t *_jit, jit_word_t u) +{ + jit_int32_t regno; + assert(_jitc->function); + if (_jitc->function->call.argi < 8) { + jit_movi(JIT_RA0 - _jitc->function->call.argi, u); + ++_jitc->function->call.argi; + } + else { + regno = jit_get_reg(jit_class_gpr); + jit_movi(regno, u); + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } +} + +void +_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) +{ + assert(_jitc->function); + if (_jitc->function->call.argf < 8) { + jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } +} + +void +_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) +{ + jit_int32_t regno; + assert(_jitc->function); + if (_jitc->function->call.argf < 8) { + jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_f(regno, u); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } +} + +void +_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) +{ + assert(_jitc->function); + if (_jitc->function->call.argf < 8) { + jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + jit_stxi_d(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } +} + +void +_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) +{ + jit_int32_t regno; + assert(_jitc->function); + if (_jitc->function->call.argf < 8) { + jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u); + ++_jitc->function->call.argf; + } + else { + regno = jit_get_reg(jit_class_fpr); + jit_movi_d(regno, u); + jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); + jit_unget_reg(regno); + _jitc->function->call.size += sizeof(jit_word_t); + } +} + +jit_bool_t +_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) +{ + jit_int32_t spec; + spec = jit_class(_rvs[regno].spec); + if (spec & jit_class_arg) { + regno = JIT_RA0 - regno; + if (regno >= 0 && regno < node->v.w) + return (1); + if (spec & jit_class_fpr) { + regno = JIT_FA0 - regno; + if (regno >= 0 && regno < node->w.w) + return (1); + } + } + + return (0); +} + +void +_jit_finishr(jit_state_t *_jit, jit_int32_t r0) +{ + jit_node_t *node; + assert(_jitc->function); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + node = jit_callr(r0); + node->v.w = _jitc->function->self.argi; + node->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; +} + +jit_node_t * +_jit_finishi(jit_state_t *_jit, jit_pointer_t i0) +{ + jit_node_t *node; + assert(_jitc->function); + if (_jitc->function->self.alen < _jitc->function->call.size) + _jitc->function->self.alen = _jitc->function->call.size; + node = jit_calli(i0); + node->v.w = _jitc->function->call.argi; + node->w.w = _jitc->function->call.argf; + _jitc->function->call.argi = _jitc->function->call.argf = + _jitc->function->call.size = 0; + _jitc->prepare = 0; + return (node); +} + +void +_jit_retval_c(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_c(r0, JIT_RET); +} + +void +_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_uc(r0, JIT_RET); +} + +void +_jit_retval_s(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_s(r0, JIT_RET); +} + +void +_jit_retval_us(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_us(r0, JIT_RET); +} + +void +_jit_retval_i(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_i(r0, JIT_RET); +} + +void +_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0) +{ + jit_extr_ui(r0, JIT_RET); +} + +void +_jit_retval_l(jit_state_t *_jit, jit_int32_t r0) +{ + if (r0 != JIT_RET) + jit_movr(r0, JIT_RET); +} + +void +_jit_retval_f(jit_state_t *_jit, jit_int32_t r0) +{ + if (r0 != JIT_FRET) + jit_movr_f(r0, JIT_FRET); +} + +void +_jit_retval_d(jit_state_t *_jit, jit_int32_t r0) +{ + if (r0 != JIT_FRET) + jit_movr_d(r0, JIT_FRET); +} + +jit_pointer_t +_emit_code(jit_state_t *_jit) +{ + jit_node_t *node; + jit_node_t *temp; + jit_word_t word; + jit_word_t value; + jit_int32_t offset; + struct { + jit_node_t *node; + jit_uint8_t *data; + jit_word_t word; + jit_int32_t const_offset; + jit_int32_t patch_offset; + } undo; + + _jitc->function = NULL; + + jit_reglive_setup(); + + undo.word = 0; + undo.node = NULL; + undo.const_offset = undo.patch_offset = 0; +# define assert_data(node) /**/ +#define case_rr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), rn(node->v.w)); \ + break +#define case_rw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), node->v.w); \ + break +#define case_vv(name, type) \ + case jit_code_##name##r##type: \ + if (jit_swf_p()) \ + swf_##name##r##type(rn(node->u.w), rn(node->v.w)); \ + else \ + vfp_##name##r##type(rn(node->u.w), rn(node->v.w)); \ + break +#define case_vw(name, type) \ + case jit_code_##name##i##type: \ + if (jit_swf_p()) \ + swf_##name##i##type(rn(node->u.w), node->v.w); \ + else \ + vfp_##name##i##type(rn(node->u.w), node->v.w); \ + break +#define case_wr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w)); \ + break +#define case_wv(name, type) \ + case jit_code_##name##i##type: \ + if (jit_swf_p()) \ + swf_##name##i##type(node->u.w, rn(node->v.w)); \ + else \ + vfp_##name##i##type(node->u.w, rn(node->v.w)); \ + break +#define case_rrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrrr(name, type) \ + case jit_code_##name##r##type: \ + name##r##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_vvv(name, type) \ + case jit_code_##name##r##type: \ + if (jit_swf_p()) \ + swf_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + else \ + vfp_##name##r##type(rn(node->u.w), \ + rn(node->v.w), rn(node->w.w)); \ + break +#define case_rrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \ + break +#define case_rrrw(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(rn(node->u.q.l), rn(node->u.q.h), \ + rn(node->v.w), node->w.w); \ + break +#define case_rrf(name) \ + case jit_code_##name##i_f: \ + assert_data(node); \ + name##i_f(rn(node->u.w), rn(node->v.w), node->w.f); \ + break +#define case_rrd(name) \ + case jit_code_##name##i_d: \ + assert_data(node); \ + name##i_d(rn(node->u.w), rn(node->v.w), node->w.d); \ + break +#define case_wrr(name, type) \ + case jit_code_##name##i##type: \ + name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \ + break +#define case_brr(name, type) \ + case jit_code_##name##r##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##r##type(temp->u.w, rn(node->v.w), \ + rn(node->w.w)); \ + else { \ + word = name##r##type(_jit->pc.w, \ + rn(node->v.w), rn(node->w.w)); \ + patch(word, node); \ + } \ + break +#define case_brw(name, type) \ + case jit_code_##name##i##type: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i##type(temp->u.w, \ + rn(node->v.w), node->w.w); \ + else { \ + word = name##i##type(_jit->pc.w, \ + rn(node->v.w), node->w.w); \ + patch(word, node); \ + } \ + break; +#define case_brf(name) \ + case jit_code_##name##i_f: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_f(temp->u.w, rn(node->v.w), node->w.f); \ + else { \ + word = name##i_f(_jit->pc.w, rn(node->v.w), \ + node->w.f); \ + patch(word, node); \ + } \ + break +#define case_brd(name) \ + case jit_code_##name##i_d: \ + temp = node->u.n; \ + assert(temp->code == jit_code_label || \ + temp->code == jit_code_epilog); \ + if (temp->flag & jit_flag_patch) \ + name##i_d(temp->u.w, rn(node->v.w), node->w.d); \ + else { \ + word = name##i_d(_jit->pc.w, rn(node->v.w), \ + node->w.d); \ + patch(word, node); \ + } \ + break + for (node = _jitc->head; node; node = node->next) { + if (_jit->pc.uc >= _jitc->code.end && !jit_remap()) + return (NULL); + + value = jit_classify(node->code); + jit_regarg_set(node, value); + switch (node->code) { + case jit_code_note: case jit_code_name: + node->u.w = _jit->pc.w; + break; + case jit_code_label: + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + break; + case_rrr(add,); + case_rrw(add,); + case_rrr(addc,); + case_rrw(addc,); + case_rrr(addx,); + case_rrw(addx,); + case_rrr(sub,); + case_rrw(sub,); + case_rrr(subc,); + case_rrw(subc,); + case_rrr(subx,); + case_rrw(subx,); + case_rrr(mul,); + case_rrw(mul,); + case_rrrr(qmul,); + case_rrrw(qmul,); + case_rrrr(qmul, _u); + case_rrrw(qmul, _u); + case_rrr(div,); + case_rrw(div,); + case_rrr(div, _u); + case_rrw(div, _u); + case_rrrr(qdiv,); + case_rrrw(qdiv,); + case_rrrr(qdiv, _u); + case_rrrw(qdiv, _u); + case_rrr(rem,); + case_rrw(rem,); + case_rrr(rem, _u); + case_rrw(rem, _u); + case_rrr(lsh,); + case_rrw(lsh,); + case_rrr(rsh,); + case_rrw(rsh,); + case_rrr(rsh, _u); + case_rrw(rsh, _u); + case_rr(neg,); + case_rr(com,); + case_rrr(and,); + case_rrw(and,); + case_rrr(or,); + case_rrw(or,); + case_rrr(xor,); + case_rrw(xor,); + case_rr(trunc, _f_i); + case_rr(trunc, _d_i); + case_rr(trunc, _f_l); + case_rr(trunc, _d_l); + case_rr(ld, _c); + case_rw(ld, _c); + case_rr(ld, _uc); + case_rw(ld, _uc); + case_rr(ld, _s); + case_rw(ld, _s); + case_rr(ld, _us); + case_rw(ld, _us); + case_rr(ld, _i); + case_rw(ld, _i); + case_rr(ld, _ui); + case_rw(ld, _ui); + case_rr(ld, _l); + case_rw(ld, _l); + case_rrr(ldx, _c); + case_rrw(ldx, _c); + case_rrr(ldx, _uc); + case_rrw(ldx, _uc); + case_rrr(ldx, _s); + case_rrw(ldx, _s); + case_rrr(ldx, _us); + case_rrw(ldx, _us); + case_rrr(ldx, _i); + case_rrw(ldx, _i); + case_rrr(ldx, _ui); + case_rrw(ldx, _ui); + case_rrr(ldx, _l); + case_rrw(ldx, _l); + case_rr(st, _c); + case_wr(st, _c); + case_rr(st, _s); + case_wr(st, _s); + case_rr(st, _i); + case_wr(st, _i); + case_rr(st, _l); + case_wr(st, _l); + case_rrr(stx, _c); + case_wrr(stx, _c); + case_rrr(stx, _s); + case_wrr(stx, _s); + case_rrr(stx, _i); + case_wrr(stx, _i); + case_rrr(stx, _l); + case_wrr(stx, _l); + case_rr(hton,); + case_rr(ext, _c); + case_rr(ext, _uc); + case_rr(ext, _s); + case_rr(ext, _us); + case_rr(ext, _i); + case_rr(ext, _ui); + case_rr(mov,); + case jit_code_movi: + if (node->flag & jit_flag_node) { + temp = node->v.n; + if (temp->code == jit_code_data || + (temp->code == jit_code_label && + (temp->flag & jit_flag_patch))) + movi(rn(node->u.w), temp->u.w); + else { + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + word = movi_p(rn(node->u.w), temp->u.w); + patch(word, node); + } + } + else + movi(rn(node->u.w), node->v.w); + break; + case_rrr(lt,); + case_rrw(lt,); + case_rrr(lt, _u); + case_rrw(lt, _u); + case_rrr(le,); + case_rrw(le,); + case_rrr(le, _u); + case_rrw(le, _u); + case_rrr(eq,); + case_rrw(eq,); + case_rrr(ge,); + case_rrw(ge,); + case_rrr(ge, _u); + case_rrw(ge, _u); + case_rrr(gt,); + case_rrw(gt,); + case_rrr(gt, _u); + case_rrw(gt, _u); + case_rrr(ne,); + case_rrw(ne,); + case_brr(blt,); + case_brw(blt,); + case_brr(blt, _u); + case_brw(blt, _u); + case_brr(ble,); + case_brw(ble,); + case_brr(ble, _u); + case_brw(ble, _u); + case_brr(beq,); + case_brw(beq,); + case_brr(bge,); + case_brw(bge,); + case_brr(bge, _u); + case_brw(bge, _u); + case_brr(bgt,); + case_brw(bgt,); + case_brr(bgt, _u); + case_brw(bgt, _u); + case_brr(bne,); + case_brw(bne,); + case_brr(boadd,); + case_brw(boadd,); + case_brr(boadd, _u); + case_brw(boadd, _u); + case_brr(bxadd,); + case_brw(bxadd,); + case_brr(bxadd, _u); + case_brw(bxadd, _u); + case_brr(bosub,); + case_brw(bosub,); + case_brr(bosub, _u); + case_brw(bosub, _u); + case_brr(bxsub,); + case_brw(bxsub,); + case_brr(bxsub, _u); + case_brw(bxsub, _u); + case_brr(bms,); + case_brw(bms,); + case_brr(bmc,); + case_brw(bmc,); + case_rrr(add, _f); + case_rrf(add); + case_rrr(sub, _f); + case_rrf(sub); + case_rrr(mul, _f); + case_rrf(mul); + case_rrr(div, _f); + case_rrf(div); + case_rr(abs, _f); + case_rr(neg, _f); + case_rr(sqrt, _f); + case_rr(ext, _f); + case_rr(ld, _f); + case_rw(ld, _f); + case_rrr(ldx, _f); + case_rrw(ldx, _f); + case_rr(st, _f); + case_wr(st, _f); + case_rrr(stx, _f); + case_wrr(stx, _f); + case_rr(mov, _f); + case jit_code_movi_f: + assert_data(node); + movi_f(rn(node->u.w), node->v.f); + break; + case_rr(ext, _d_f); + case_rrr(lt, _f); + case_rrf(lt); + case_rrr(le, _f); + case_rrf(le); + case_rrr(eq, _f); + case_rrf(eq); + case_rrr(ge, _f); + case_rrf(ge); + case_rrr(gt, _f); + case_rrf(gt); + case_rrr(ne, _f); + case_rrf(ne); + case_rrr(unlt, _f); + case_rrf(unlt); + case_rrr(unle, _f); + case_rrf(unle); + case_rrr(uneq, _f); + case_rrf(uneq); + case_rrr(unge, _f); + case_rrf(unge); + case_rrr(ungt, _f); + case_rrf(ungt); + case_rrr(ltgt, _f); + case_rrf(ltgt); + case_rrr(ord, _f); + case_rrf(ord); + case_rrr(unord, _f); + case_rrf(unord); + case_brr(blt, _f); + case_brf(blt); + case_brr(ble, _f); + case_brf(ble); + case_brr(beq, _f); + case_brf(beq); + case_brr(bge, _f); + case_brf(bge); + case_brr(bgt, _f); + case_brf(bgt); + case_brr(bne, _f); + case_brf(bne); + case_brr(bunlt, _f); + case_brf(bunlt); + case_brr(bunle, _f); + case_brf(bunle); + case_brr(buneq, _f); + case_brf(buneq); + case_brr(bunge, _f); + case_brf(bunge); + case_brr(bungt, _f); + case_brf(bungt); + case_brr(bltgt, _f); + case_brf(bltgt); + case_brr(bord, _f); + case_brf(bord); + case_brr(bunord, _f); + case_brf(bunord); + case_rrr(add, _d); + case_rrd(add); + case_rrr(sub, _d); + case_rrd(sub); + case_rrr(mul, _d); + case_rrd(mul); + case_rrr(div, _d); + case_rrd(div); + case_rr(abs, _d); + case_rr(neg, _d); + case_rr(sqrt, _d); + case_rr(ext, _d); + case_rr(ld, _d); + case_rw(ld, _d); + case_rrr(ldx, _d); + case_rrw(ldx, _d); + case_rr(st, _d); + case_wr(st, _d); + case_rrr(stx, _d); + case_wrr(stx, _d); + case_rr(mov, _d); + case jit_code_movi_d: + assert_data(node); + movi_d(rn(node->u.w), node->v.d); + break; + case_rr(ext, _f_d); + case_rrr(lt, _d); + case_rrd(lt); + case_rrr(le, _d); + case_rrd(le); + case_rrr(eq, _d); + case_rrd(eq); + case_rrr(ge, _d); + case_rrd(ge); + case_rrr(gt, _d); + case_rrd(gt); + case_rrr(ne, _d); + case_rrd(ne); + case_rrr(unlt, _d); + case_rrd(unlt); + case_rrr(unle, _d); + case_rrd(unle); + case_rrr(uneq, _d); + case_rrd(uneq); + case_rrr(unge, _d); + case_rrd(unge); + case_rrr(ungt, _d); + case_rrd(ungt); + case_rrr(ltgt, _d); + case_rrd(ltgt); + case_rrr(ord, _d); + case_rrd(ord); + case_rrr(unord, _d); + case_rrd(unord); + case_brr(blt, _d); + case_brd(blt); + case_brr(ble, _d); + case_brd(ble); + case_brr(beq, _d); + case_brd(beq); + case_brr(bge, _d); + case_brd(bge); + case_brr(bgt, _d); + case_brd(bgt); + case_brr(bne, _d); + case_brd(bne); + case_brr(bunlt, _d); + case_brd(bunlt); + case_brr(bunle, _d); + case_brd(bunle); + case_brr(buneq, _d); + case_brd(buneq); + case_brr(bunge, _d); + case_brd(bunge); + case_brr(bungt, _d); + case_brd(bungt); + case_brr(bltgt, _d); + case_brd(bltgt); + case_brr(bord, _d); + case_brd(bord); + case_brr(bunord, _d); + case_brd(bunord); + case jit_code_jmpr: + jmpr(rn(node->u.w)); + break; + case jit_code_jmpi: + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + jmpi(temp->u.w); + else { + word = jmpi_p(_jit->pc.w); + patch(word, node); + } + break; + case jit_code_callr: + callr(rn(node->u.w)); + break; + case jit_code_calli: + if (node->flag & jit_flag_node) { + temp = node->u.n; + assert(temp->code == jit_code_label || + temp->code == jit_code_epilog); + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = calli_p(_jit->pc.w); + patch(word, node); + } + } + else + calli(node->u.w); + break; + case jit_code_prolog: + _jitc->function = _jitc->functions.ptr + node->w.w; + undo.node = node; + undo.word = _jit->pc.w; + undo.patch_offset = _jitc->patches.offset; + restart_function: + _jitc->again = 0; + prolog(node); + break; + case jit_code_epilog: + assert(_jitc->function == _jitc->functions.ptr + node->w.w); + if (_jitc->again) { + for (temp = undo.node->next; + temp != node; temp = temp->next) { + if (temp->code == jit_code_label || + temp->code == jit_code_epilog) + temp->flag &= ~jit_flag_patch; + } + temp->flag &= ~jit_flag_patch; + node = undo.node; + _jit->pc.w = undo.word; + _jitc->patches.offset = undo.patch_offset; + goto restart_function; + } + /* remember label is defined */ + node->flag |= jit_flag_patch; + node->u.w = _jit->pc.w; + epilog(node); + _jitc->function = NULL; + break; + case jit_code_live: + case jit_code_arg: + case jit_code_arg_f: case jit_code_arg_d: + break; + default: + abort(); + } + jit_regarg_clr(node, value); + /* update register live state */ + jit_reglive(node); + } +#undef case_brw +#undef case_brr +#undef case_wrr +#undef case_rrw +#undef case_rrr +#undef case_wr +#undef case_rw +#undef case_rr + + for (offset = 0; offset < _jitc->patches.offset; offset++) { + node = _jitc->patches.ptr[offset].node; + word = _jitc->patches.ptr[offset].inst; + value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; + patch_at(word, value); + } + + word = sysconf(_SC_PAGE_SIZE); + __clear_cache(_jit->code.ptr, (void *)((_jit->pc.w + word) & -word)); + + return (_jit->code.ptr); +} + +#define CODE 1 +# include "jit_aarch64-cpu.c" +# include "jit_aarch64-fpu.c" +#undef CODE + +void +_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxi(rn(r0), rn(r1), i0); +} + +void +_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + stxi(i0, rn(r0), rn(r1)); +} + +void +_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + ldxi_d(rn(r0), rn(r1), i0); +} + +void +_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + stxi_d(i0, rn(r0), rn(r1)); +} + +static void +_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +{ + jit_int32_t flag; + + assert(node->flag & jit_flag_node); + assert(!(flag & jit_flag_patch)); + if (_jitc->patches.offset >= _jitc->patches.length) { + jit_realloc((jit_pointer_t *)&_jitc->patches.ptr, + _jitc->patches.length * sizeof(jit_patch_t), + (_jitc->patches.length + 1024) * sizeof(jit_patch_t)); + _jitc->patches.length += 1024; + } + _jitc->patches.ptr[_jitc->patches.offset].inst = instr; + _jitc->patches.ptr[_jitc->patches.offset].node = node; + ++_jitc->patches.offset; +} diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 07f98f4a0..8d478d0ce 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -347,7 +347,6 @@ _jit_reti_d(jit_state_t *_jit, jit_float64_t u) jit_ret(); } -/* must be called internally only */ void _jit_epilog(jit_state_t *_jit) { @@ -361,7 +360,6 @@ jit_node_t * _jit_arg(jit_state_t *_jit) { jit_int32_t offset; - assert(_jitc->function); if (_jitc->function->self.argi < 4) offset = _jitc->function->self.argi++; @@ -382,7 +380,6 @@ jit_node_t * _jit_arg_f(jit_state_t *_jit) { jit_int32_t offset; - assert(_jitc->function); if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) { if (_jitc->function->self.argf < 16) { @@ -412,7 +409,6 @@ jit_node_t * _jit_arg_d(jit_state_t *_jit) { jit_int32_t offset; - assert(_jitc->function); if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) { if (_jitc->function->self.argf < 15) { @@ -557,7 +553,6 @@ void _jit_pushargi(jit_state_t *_jit, jit_word_t u) { jit_int32_t regno; - assert(_jitc->function); if (_jitc->function->call.argi < 4) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -598,7 +593,6 @@ void _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) { jit_int32_t regno; - assert(_jitc->function); if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) { if (_jitc->function->call.argf < 16) { @@ -662,7 +656,6 @@ void _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) { jit_int32_t regno; - assert(_jitc->function); if (jit_cpu.abi && !(_jitc->function->call.call & jit_call_varargs)) { if (_jitc->function->call.argf < 15) { @@ -697,7 +690,6 @@ jit_bool_t _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) { jit_int32_t spec; - spec = jit_class(_rvs[regno].spec); if (spec & jit_class_arg) { regno = JIT_RA0 - regno; @@ -717,7 +709,6 @@ void _jit_finishr(jit_state_t *_jit, jit_int32_t r0) { jit_node_t *node; - assert(_jitc->function); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -733,7 +724,6 @@ jit_node_t * _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { jit_node_t *node; - assert(_jitc->function); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; diff --git a/lib/lightning.c b/lib/lightning.c index 8d844e7e7..4223616f6 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -2911,4 +2911,6 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, # include "jit_ia64.c" #elif defined(__hppa__) # include "jit_hppa.c" +#elif defined(__aarch64__) +# include "jit_aarch64.c" #endif