diff --git a/ChangeLog b/ChangeLog index 6718b7697..71a1907e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,63 @@ +2012-12-09 Paulo Andrade + + * check/alu.inc, check/alu_add.ok, check/alu_add.tst, + check/alu_and.ok, check/alu_and.tst, check/alu_com.ok, + check/alu_com.tst, check/alu_div.ok, check/alu_div.tst, + check/alu_lsh.ok, check/alu_lsh.tst, check/alu_mul.ok, + check/alu_mul.tst, check/alu_neg.ok, check/alu_neg.tst, + check/alu_or.ok, check/alu_or.tst, check/alu_rem.ok, + check/alu_rem.tst, check/alu_rsh.ok, check/alu_rsh.tst, + check/alu_sub.ok, check/alu_sub.tst, check/alu_xor.ok, + check/alu_xor.tst, check/alux_add.ok, check/alux_add.tst, + check/alux_sub.ok, check/alux_sub.tst, check/branch.ok, + check/branch.tst: New test cases for arithmetic and branch + tests. + + * check/Makefile.am: Update for new test cases. + + * include/lightning/jit_private.h: Make the jit_reg_free_p + macro shared by all backends. Previously was added for the + arm backend, but is useful in the x86_64 backend when checking + state of "special purpose register". + Also add the new jit_class_named register class, that must be + or'ed with the register value if calling jit_get_reg expecting + an specific value, because the specific register value may be + zero, that previously was treated as no register requested. + + * lib/jit_arm-cpu.c: Correct argument order for T2_MVN. + + * lib/jit_arm-swf.c: Call the proper function for double + divide. The "software float" implementation just calls + libgcc functions. + + * lib/jit_arm.c: Return float/double values in the float + register if using the hard float ABI. + + * lib/jit_x86-cpu.c: Change the can_sign_extend_int_p macro + to not include -0x80000000L, because there is code that + "abuses" it and thinks it can negate the immediate value + after calling that macro. + Correct implementation of jit_subi that had a wrong code + patch logic doing subtraction with reversed arguments. + Correct REX prefix calculation in the jit_muli implementation. + Correct logic to get/unget %*ax and %*dx registers in divremr + and divremi. + Correct divremi that was using the symbolic, unique %*ax + value in on place (not using the _REGNO name suffix). + Correct cut&paste error causing it to use "xor" instead of + "or" in one code path of the jit_ori implementation. + Correct several flaws when clobbering registers and/or when + one of the arguments was %*cx in the rotshr wrapper function + implementing most shift operations. + + * lib/lightning.c: No longer expect that the backend be smart + enough to know what to do when asking for a named register + if that register is already an argument or is live. It fails + if it is an argument, or if register is live, fails if cannot + spill. + No longer incorrectly assume that eqr_{f,d} and ltgr_{f,d} are + safe to inverse value tests in jump thread optimization. + 2012-12-05 Paulo Andrade * check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new diff --git a/check/Makefile.am b/check/Makefile.am index ae44deabf..4ecc2432d 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -41,6 +41,22 @@ EXTRA_DIST = \ ldstxr-c.tst ldstxr-c.ok \ ldstxi-c.tst ldstxi-c.ok \ cvt.tst cvt.ok \ + branch.tst branch.ok \ + alu.inc \ + alu_add.tst alu_add.ok \ + alux_add.tst alux_add.ok \ + alu_sub.tst alu_sub.ok \ + alux_sub.tst alux_sub.ok \ + alu_mul.tst alu_mul.ok \ + alu_div.tst alu_div.ok \ + alu_rem.tst alu_rem.ok \ + alu_and.tst alu_and.ok \ + alu_or.tst alu_or.ok \ + alu_xor.tst alu_xor.ok \ + alu_lsh.tst alu_lsh.ok \ + alu_rsh.tst alu_rsh.ok \ + alu_com.tst alu_com.ok \ + alu_neg.tst alu_neg.ok \ check.sh run-test \ all.tst @@ -49,7 +65,13 @@ TESTS = 3to2 add allocai \ ldstr ldsti \ ldstxr ldstxi \ ldstr-c ldstxr-c ldstxi-c \ - cvt + cvt branch \ + alu_add alux_add \ + alu_sub alux_sub \ + alu_mul alu_div alu_rem \ + alu_and alu_or alu_xor \ + alu_lsh alu_rsh \ + alu_com alu_neg CLEANFILES = $(TESTS) diff --git a/check/alu.inc b/check/alu.inc new file mode 100644 index 000000000..7edf6b29e --- /dev/null +++ b/check/alu.inc @@ -0,0 +1,283 @@ +.data 8 +ok: +.c "ok\n" + +/* 3 operand */ + +/* reg0 = reg1 op reg2 */ +#define ALUR(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R1 I0 \ + movi %R2 I1 \ + OP##r##T %R0 %R1 %R2 \ + beqi OP##T##N##r_##R0##R1##R2 %R0 V \ + calli @abort \ +OP##T##N##r_##R0##R1##R2: + +/* reg0 = reg1 op im */ +#define ALUI(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R1 I0 \ + movi %R2 V \ + OP##i##T %R0 %R1 I1 \ + beqr OP##T##N##i_##R0##R1##R2 %R0 %R2 \ + calli @abort \ +OP##T##N##i_##R0##R1##R2: + +/* reg0 = reg0 op reg1 */ +#define ALUR0(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I0 \ + movi %R1 I1 \ + movi %R2 V \ + OP##r##T %R0 %R0 %R1 \ + beqr OP##T##N##r_0##R0##R1##R2 %R0 %R2 \ + calli @abort \ +OP##T##N##r_0##R0##R1##R2: + +/* reg0 = reg1 op reg0 */ +#define ALUR1(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I1 \ + movi %R1 I0 \ + movi %R2 V \ + OP##r##T %R0 %R1 %R0 \ + beqr OP##T##N##r_1##R0##R1##R2 %R0 %R2 \ + calli @abort \ +OP##T##N##r_1##R0##R1##R2: + +/* reg0 = reg0 op im */ +#define ALUI0(N, T, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I0 \ + movi %R1 V \ + OP##i##T %R0 %R0 I1 \ + beqr OP##T##N##i_0##R0##R1##R2 %R0 %R1 \ + calli @abort \ +OP##T##N##i_0##R0##R1##R2: + +#define ALU3(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALUR(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALUI(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALUR0(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALUR1(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALUI0(N, T, OP, I0, I1, V, R0, R1, R2) + +#define ALU2(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALU3(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALU3(N, T, OP, I0, I1, V, R0, R2, R1) + +#define ALU1(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALU2(N, T, OP, I0, I1, V, R0, R1, R2) \ + ALU2(N, T, OP, I0, I1, V, R1, R0, R2) \ + ALU2(N, T, OP, I0, I1, V, R2, R1, R0) + +#define ALU(N, T, OP, I0, I1, V) \ + ALU1(N, T, OP, I0, I1, V, v0, v1, v2) \ + ALU1(N, T, OP, I0, I1, V, v0, v1, r0) \ + ALU1(N, T, OP, I0, I1, V, v0, v1, r1) \ + ALU1(N, T, OP, I0, I1, V, v0, v1, r2) \ + ALU1(N, T, OP, I0, I1, V, v1, v2, r1) \ + ALU1(N, T, OP, I0, I1, V, v1, v2, r2) \ + ALU1(N, T, OP, I0, I1, V, v2, r0, r1) \ + ALU1(N, T, OP, I0, I1, V, v2, r0, r2) \ + ALU1(N, T, OP, I0, I1, V, r0, r1, r2) + +/* 3 carry set/propagate */ + +/* + * r0 = i0 + * r1 = i1 + * r2 = 0 + * r0 = r0 opc r1 + * r2 = r2 opx r2 + */ +#define ALUXII(N, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I0 \ + movi %R2 0 \ + OP##ci %R0 %R0 I1 \ + OP##xi %R2 %R2 0 \ + beqi OP##N##ii##R0##R1##R2 %R2 V \ + calli @abort \ +OP##N##ii##R0##R1##R2: + +#define ALUXIR(N, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I0 \ + movi %R2 0 \ + OP##ci %R0 %R0 I1 \ + OP##xr %R2 %R2 %R2 \ + beqi OP##N##ir##R0##R1##R2 %R2 V \ + calli @abort \ +OP##N##ir##R0##R1##R2: + +#define ALUXRI(N, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I0 \ + movi %R1 I1 \ + movi %R2 0 \ + OP##cr %R0 %R0 %R1 \ + OP##xi %R2 %R2 0 \ + beqi OP##N##ri##R0##R1##R2 %R2 V \ + calli @abort \ +OP##N##ri##R0##R1##R2: + +#define ALUXRR(N, OP, I0, I1, V, R0, R1, R2) \ + movi %R0 I0 \ + movi %R1 I1 \ + movi %R2 0 \ + OP##cr %R0 %R0 %R1 \ + OP##xr %R2 %R2 %R2 \ + beqi OP##N##rr##R0##R1##R2 %R2 V \ + calli @abort \ +OP##N##rr##R0##R1##R2: + +#define ALUX2(N, OP, I0, I1, V, R0, R1, R2) \ + ALUXII(N, OP, I0, I1, V, R0, R1, R2) \ + ALUXIR(N, OP, I0, I1, V, R0, R1, R2) \ + ALUXRI(N, OP, I0, I1, V, R0, R1, R2) \ + ALUXRR(N, OP, I0, I1, V, R0, R1, R2) + +#define ALUX1(N, OP, I0, I1, V, R0, R1, R2) \ + ALUX2(N, OP, I0, I1, V, R0, R1, R2) \ + ALUX2(N, OP, I0, I1, V, R0, R2, R1) + +#define ALUX0(N, OP, I0, I1, V, R0, R1, R2) \ + ALUX1(N, OP, I0, I1, V, R0, R1, R2) \ + ALUX1(N, OP, I0, I1, V, R1, R0, R2) \ + ALUX1(N, OP, I0, I1, V, R2, R1, R0) + +#define ALUX(N, OP, I0, I1, V) \ + ALUX0(N, OP, I0, I1, V, v0, v1, v2) \ + ALUX0(N, OP, I0, I1, V, v0, v1, r0) \ + ALUX0(N, OP, I0, I1, V, v0, v1, r1) \ + ALUX0(N, OP, I0, I1, V, v0, v1, r2) \ + ALUX0(N, OP, I0, I1, V, v1, v2, r0) \ + ALUX0(N, OP, I0, I1, V, v1, v2, r1) \ + ALUX0(N, OP, I0, I1, V, v1, v2, r2) \ + ALUX0(N, OP, I0, I1, V, v2, r0, r1) \ + ALUX0(N, OP, I0, I1, V, v2, r0, r2) \ + ALUX0(N, OP, I0, I1, V, r0, r1, r2) + +/* unary int */ + +#define UNR(N, OP, I, V, R0, R1) \ + movi %R1 I \ + OP##r %R0 %R1 \ + beqi OP##N##R0##R1 %R0 V \ + calli @abort \ +OP##N##R0##R1: + +#define UNRC(N, OP, I, V, R0, R1) \ + movi %R0 I \ + OP##r %R0 %R0 \ + beqi OP##N##c##R0##R1 %R0 V \ + calli @abort \ +OP##N##c##R0##R1: + +#define UN2(N, OP, I, V, R0, R1) \ + UNR(N, OP, I, V, R0, R1) \ + UNRC(N, OP, I, V, R0, R1) + +#define UN1(N, OP, I, V, R0, R1) \ + UN2(N, OP, I, V, R0, R1) \ + UN2(N, OP, I, V, R1, R0) + +#define UN(N, OP, I, V) \ + UN1(N, OP, I, V, v0, v1) \ + UN1(N, OP, I, V, v0, v2) \ + UN1(N, OP, I, V, v0, r0) \ + UN1(N, OP, I, V, v0, r1) \ + UN1(N, OP, I, V, v0, r2) \ + UN1(N, OP, I, V, v1, v2) \ + UN1(N, OP, I, V, v1, r0) \ + UN1(N, OP, I, V, v1, r1) \ + UN1(N, OP, I, V, v1, r2) \ + UN1(N, OP, I, V, v2, r0) \ + UN1(N, OP, I, V, v2, r1) \ + UN1(N, OP, I, V, v2, r2) \ + UN1(N, OP, I, V, r0, r1) \ + UN1(N, OP, I, V, r0, r2) \ + UN1(N, OP, I, V, r1, r2) + +/* reg0 = reg1 op reg2 */ +#define FOPR(N, T, OP, I0, I1, V, F0, F1, F2) \ + movi##T %F1 I0 \ + movi##T %F2 I1 \ + OP##r##T %F0 %F1 %F2 \ + beqi##T OP##T##N##F0##F1##F2 %F0 V \ + calli @abort \ +OP##T##N##F0##F1##F2: + +/* reg0 = reg0 op reg1 */ +#define FOPR0(N, T, OP, I0, I1, V, F0, F1, F2) \ + movi##T %F0 I0 \ + movi##T %F1 I1 \ + OP##r##T %F0 %F0 %F1 \ + beqi##T OP##T##N##0##F0##F1##F2 %F0 V \ + calli @abort \ +OP##T##N##0##F0##F1##F2: + +/* reg1 = reg0 op reg1 */ +#define FOPR1(N, T, OP, I0, I1, V, F0, F1, F2) \ + movi##T %F0 I0 \ + movi##T %F1 I1 \ + OP##r##T %F1 %F0 %F1 \ + beqi##T OP##T##N##1##F0##F1##F2 %F1 V \ + calli @abort \ +OP##T##N##1##F0##F1##F2: + +/* reg0 = reg1 op im */ +#define FOPI(N, T, OP, I0, I1, V, F0, F1, F2) \ + movi##T %F1 I0 \ + movi##T %F2 V \ + OP##i##T %F0 %F1 I1 \ + beqr##T OP##T##N##i##F0##F1##F2 %F0 %F2 \ + calli @abort \ +OP##T##N##i##F0##F1##F2: + +/* reg0 = reg0 op im */ +#define FOPI0(N, T, OP, I0, I1, V, F0, F1, F2) \ + movi##T %F0 I0 \ + movi##T %F2 V \ + OP##i##T %F0 %F0 I1 \ + beqr##T OP##T##N##i0##F0##F1##F2 %F0 %F2 \ + calli @abort \ +OP##T##N##i0##F0##F1##F2: + +#define FOP1(N, T, OP, I0, I1, V, F0, F1, F2) \ + FOPR(N, T, OP, I0, I1, V, F0, F1, F2) \ + FOPR0(N, T, OP, I0, I1, V, F0, F1, F2) \ + FOPR1(N, T, OP, I0, I1, V, F0, F1, F2) \ + FOPI(N, T, OP, I0, I1, V, F0, F1, F2) \ + FOPI0(N, T, OP, I0, I1, V, F0, F1, F2) + +#define FOP(N, T, OP, I0, I1, V) \ + FOP1(N, T, OP, I0, I1, V, f0, f1, f2) \ + FOP1(N, T, OP, I0, I1, V, f0, f2, f3) \ + FOP1(N, T, OP, I0, I1, V, f0, f3, f4) \ + FOP1(N, T, OP, I0, I1, V, f0, f5, f1) + +/* unary float */ + +#define FUNR(N, T, OP, I, V, R0, R1) \ + movi##T %R1 I \ + OP##r##T %R0 %R1 \ + beqi##T OP##N##T##R0##R1 %R0 V \ + calli @abort \ +OP##N##T##R0##R1: + +#define FUNRC(N, T, OP, I, V, R0, R1) \ + movi##T %R0 I \ + OP##r##T %R0 %R0 \ + beqi##T OP##N##T##c##R0##R1 %R0 V \ + calli @abort \ +OP##N##T##c##R0##R1: + +#define FUN2(N, T, OP, I, V, R0, R1) \ + FUNR(N, T, OP, I, V, R0, R1) \ + FUNRC(N, T, OP, I, V, R0, R1) + +#define FUN1(N, T, OP, I, V, R0, R1) \ + FUN2(N, T, OP, I, V, R0, R1) \ + FUN2(N, T, OP, I, V, R1, R0) + +#define FUN(N, T, OP, I, V) \ + FUN1(N, T, OP, I, V, f0, f1) \ + FUN1(N, T, OP, I, V, f0, f2) \ + FUN1(N, T, OP, I, V, f0, f3) \ + FUN1(N, T, OP, I, V, f0, f4) \ + FUN1(N, T, OP, I, V, f0, f5) diff --git a/check/alu_add.ok b/check/alu_add.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_add.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_add.tst b/check/alu_add.tst new file mode 100644 index 000000000..221b6acfb --- /dev/null +++ b/check/alu_add.tst @@ -0,0 +1,46 @@ +#include "alu.inc" + +.code + prolog + +#define ADD(N, I0, I1, V) ALU(N, , add, I0, I1, V) + + ADD(0, 0x7fffffff, 1, 0x80000000) + ADD(1, 1, 0x7fffffff, 0x80000000) + ADD(2, 0x80000000, 1, 0x80000001) + ADD(3, 1, 0x80000000, 0x80000001) + ADD(4, 0x7fffffff, 0x80000000, 0xffffffff) + ADD(5, 0x80000000, 0x7fffffff, 0xffffffff) + ADD(6, 0x7fffffff, 0, 0x7fffffff) + ADD(7, 0, 0x7fffffff, 0x7fffffff) +#if __WORDSIZE == 32 + ADD(8, 0x7fffffff, 0xffffffff, 0x7ffffffe) + ADD(9, 0xffffffff, 0x7fffffff, 0x7ffffffe) + ADD(10, 0xffffffff, 0xffffffff, 0xfffffffe) +#else + ADD(8, 0x7fffffff, 0xffffffff, 0x17ffffffe) + ADD(9, 0xffffffff, 0x7fffffff, 0x17ffffffe) + ADD(10, 0xffffffff, 0xffffffff, 0x1fffffffe) + ADD(11, 0x7fffffffffffffff, 1, 0x8000000000000000) + ADD(12, 1, 0x7fffffffffffffff, 0x8000000000000000) + ADD(13, 0x8000000000000000, 1, 0x8000000000000001) + ADD(14, 1, 0x8000000000000000, 0x8000000000000001) + ADD(15, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) + ADD(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff) + ADD(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7ffffffffffffffe) + ADD(18, 0x7fffffffffffffff, 0x7fffffffffffffff, 0xfffffffffffffffe) + ADD(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffffe) +#endif + +#undef ADD +#define ADD(N, T, I0, I1, V) FOP(N, T, add, I0, I1, V) + ADD(0, _f, -0.5, 0.5, 0.0) + ADD(1, _f, 0.25, 0.75, 1.0) + ADD(0, _d, -0.5, 0.5, 0.0) + ADD(1, _d, 0.25, 0.75, 1.0) + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_and.ok b/check/alu_and.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_and.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_and.tst b/check/alu_and.tst new file mode 100644 index 000000000..2f0da9aa8 --- /dev/null +++ b/check/alu_and.tst @@ -0,0 +1,35 @@ +#include "alu.inc" + +.code + prolog + +#define AND(N, I0, I1, V) ALU(N, , and, I0, I1, V) + + AND(0, 0x7fffffff, 1, 1) + AND(1, 1, 0x7fffffff, 1) + AND(2, 0x80000000, 1, 0) + AND(3, 1, 0x80000000, 0) + AND(4, 0x7fffffff, 0x80000000, 0) + AND(5, 0x80000000, 0x7fffffff, 0) + AND(6, 0x7fffffff, 0xffffffff, 0x7fffffff) + AND(7, 0xffffffff, 0x7fffffff, 0x7fffffff) + AND(8, 0xffffffff, 0xffffffff, 0xffffffff) + AND(9, 0x7fffffff, 0, 0) + AND(10, 0, 0x7fffffff, 0) +#if __WORDSIZE == 64 + AND(11, 0x7fffffffffffffff, 1, 1) + AND(12, 1, 0x7fffffffffffffff, 1) + AND(13, 0x8000000000000000, 1, 0) + AND(14, 1, 0x8000000000000000, 0) + AND(15, 0x7fffffffffffffff, 0x8000000000000000, 0) + AND(16, 0x8000000000000000, 0x7fffffffffffffff, 0) + AND(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffffff) + AND(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff) + AND(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_com.ok b/check/alu_com.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_com.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_com.tst b/check/alu_com.tst new file mode 100644 index 000000000..47228130f --- /dev/null +++ b/check/alu_com.tst @@ -0,0 +1,32 @@ +#include "alu.inc" + +.code + prolog + +#define COM(N, I0, V) UN(N, com, I0, V) + +#if __WORDSIZE == 32 + COM(0, 0, 0xffffffff) + COM(1, 1, 0xfffffffe) + COM(2, 0xffffffff, 0) + COM(3, 0x80000000, 0x7fffffff) + COM(4, 0x7fffffff, 0x80000000) + COM(5, 0x80000001, 0x7ffffffe) +#else + COM(0, 0, 0xffffffffffffffff) + COM(1, 1, 0xfffffffffffffffe) + COM(2, 0xffffffff, 0xffffffff00000000) + COM(3, 0x80000000, 0xffffffff7fffffff) + COM(4, 0x7fffffff, 0xffffffff80000000) + COM(5, 0x80000001, 0xffffffff7ffffffe) + COM(6, 0xffffffffffffffff, 0) + COM(7, 0x8000000000000000, 0x7fffffffffffffff) + COM(8, 0x7fffffffffffffff, 0x8000000000000000) + COM(9, 0x8000000000000001, 0x7ffffffffffffffe) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_div.ok b/check/alu_div.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_div.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_div.tst b/check/alu_div.tst new file mode 100644 index 000000000..37c1a2b49 --- /dev/null +++ b/check/alu_div.tst @@ -0,0 +1,82 @@ +#include "alu.inc" + +.code + prolog + +#define DIV(N, I0, I1, V) ALU(N, , div, I0, I1, V) +#define UDIV(N, I0, I1, V) ALU(N, _u, div, I0, I1, V) + + DIV(0, 0x7fffffff, 1, 0x7fffffff) + DIV(1, 1, 0x7fffffff, 0) + DIV(2, 0x80000000, 1, 0x80000000) + DIV(3, 1, 0x80000000, 0) + DIV(4, 0x7fffffff, 2, 0x3fffffff) + DIV(5, 2, 0x7fffffff, 0) + DIV(6, 2, 0x80000000, 0) + DIV(7, 0x7fffffff, 0x80000000, 0) + DIV(8, 0, 0x7fffffff, 0) + DIV(9, 0xffffffff, 0xffffffff, 1) + UDIV(0, 0x7fffffff, 1, 0x7fffffff) + UDIV(1, 1, 0x7fffffff, 0) + UDIV(2, 0x80000000, 1, 0x80000000) + UDIV(3, 1, 0x80000000, 0) + UDIV(4, 0x7fffffff, 2, 0x3fffffff) + UDIV(5, 2, 0x7fffffff, 0) + UDIV(6, 0x80000000, 2, 0x40000000) + UDIV(7, 2, 0x80000000, 0) + UDIV(8, 0x7fffffff, 0x80000000, 0) + UDIV(9, 0x80000000, 0x7fffffff, 1) + UDIV(10,0, 0x7fffffff, 0) + UDIV(11,0x7fffffff, 0xffffffff, 0) + UDIV(12,0xffffffff, 0x7fffffff, 2) + UDIV(13,0xffffffff, 0xffffffff, 1) +#if __WORDSIZE == 32 + DIV(10, 0x80000000, 2, 0xc0000000) + DIV(11, 0x80000000, 0x7fffffff, 0xffffffff) + DIV(12, 0x7fffffff, 0xffffffff, 0x80000001) + DIV(13, 0xffffffff, 0x7fffffff, 0) +#else + DIV(10, 0x80000000, 2, 0x40000000) + DIV(11, 0x80000000, 0x7fffffff, 1) + DIV(12, 0x7fffffff, 0xffffffff, 0) + DIV(13, 0xffffffff, 0x7fffffff, 2) + DIV(14, 0x7fffffffffffffff, 1, 0x7fffffffffffffff) + DIV(15, 1, 0x7fffffffffffffff, 0) + DIV(16, 0x8000000000000000, 1, 0x8000000000000000) + DIV(17, 1, 0x8000000000000000, 0) + DIV(18, 0x7fffffffffffffff, 2, 0x3fffffffffffffff) + DIV(19, 2, 0x7fffffffffffffff, 0) + DIV(20, 0x8000000000000000, 2, 0xc000000000000000) + DIV(21, 2, 0x8000000000000000, 0) + DIV(22, 0x7fffffffffffffff, 0x8000000000000000, 0) + DIV(23, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff) + DIV(24, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000001) + DIV(25, 0xffffffffffffffff, 0x7fffffffffffffff, 0) + DIV(26, 0xffffffffffffffff, 0xffffffffffffffff, 1) + UDIV(14,0x7fffffffffffffff, 1, 0x7fffffffffffffff) + UDIV(15,1, 0x7fffffffffffffff, 0) + UDIV(16,0x8000000000000000, 1, 0x8000000000000000) + UDIV(17,1, 0x8000000000000000, 0) + UDIV(18,0x7fffffffffffffff, 2, 0x3fffffffffffffff) + UDIV(19,2, 0x7fffffffffffffff, 0) + UDIV(20,0x8000000000000000, 2, 0x4000000000000000) + UDIV(21,2, 0x8000000000000000, 0) + UDIV(22,0x7fffffffffffffff, 0x8000000000000000, 0) + UDIV(23,0x8000000000000000, 0x7fffffffffffffff, 1) + UDIV(24,0x7fffffffffffffff, 0xffffffffffffffff, 0) + UDIV(25,0xffffffffffffffff, 0x7fffffffffffffff, 2) + UDIV(26,0xffffffffffffffff, 0xffffffffffffffff, 1) +#endif + +#undef DIV +#define DIV(N, T, I0, I1, V) FOP(N, T, div, I0, I1, V) + DIV(0, _f, -0.5, 0.5, -1.0) + DIV(1, _f, 1.25, 0.5, 2.5) + DIV(0, _d, -0.5, 0.5, -1.0) + DIV(1, _d, 1.25, 0.5, 2.5) + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_lsh.ok b/check/alu_lsh.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_lsh.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_lsh.tst b/check/alu_lsh.tst new file mode 100644 index 000000000..1b3118f9b --- /dev/null +++ b/check/alu_lsh.tst @@ -0,0 +1,56 @@ +#include "alu.inc" + +.code + prolog + +#define LSH(N, I0, I1, V) ALU(N, , lsh, I0, I1, V) + + LSH(0, 0x7f, 1, 0xfe) + LSH(1, 0x7fff, 2, 0x1fffc) + LSH(2, 0x81, 16, 0x810000) + LSH(3, 0xff, 15, 0x7f8000) + LSH(4, 0x7fffffff, 0, 0x7fffffff) +#if __WORDSIZE == 32 + LSH(5, 0xffffffff, 8, 0xffffff00) + LSH(6, 0x7fffffff, 3, 0xfffffff8) + LSH(7, -0x7f, 31, 0x80000000) + LSH(8, -0x7fff, 30, 0x40000000) + LSH(9, -0x7fffffff, 29, 0x20000000) + LSH(10, 0x80000001, 28, 0x10000000) + LSH(11, 0x8001, 17, 0x20000) + LSH(12, 0x80000001, 18, 0x40000) + LSH(13, -0xffff, 24, 0x1000000) +#else + LSH(5, 0xffffffff, 8, 0xffffffff00) + LSH(6, 0x7fffffff, 3, 0x3fffffff8) + LSH(7, -0x7f, 31, 0xffffffc080000000) + LSH(8, -0x7fff, 30, 0xffffe00040000000) + LSH(9, -0x7fffffff, 29, 0xf000000020000000) + LSH(10, 0x80000001, 28, 0x800000010000000) + LSH(11, 0x8001, 17, 0x100020000) + LSH(12, 0x80000001, 18, 0x2000000040000) + LSH(13, -0xffff, 24, 0xffffff0001000000) + LSH(14, 0x7f, 33, 0xfe00000000) + LSH(15, 0x7ffff, 34, 0x1ffffc00000000) + LSH(16, 0x7fffffff, 35, 0xfffffff800000000) + LSH(17, -0x7f, 63, 0x8000000000000000) + LSH(18, -0x7fff, 62, 0x4000000000000000) + LSH(19, -0x7fffffff, 61, 0x2000000000000000) + LSH(20, 0x80000001, 60, 0x1000000000000000) + LSH(21, 0x81, 48, 0x81000000000000) + LSH(22, 0x8001, 49, 0x2000000000000) + LSH(23, 0x80000001, 40, 0x10000000000) + LSH(24, 0xff, 47, 0x7f800000000000) + LSH(25, 0xffff0001, 56, 0x100000000000000) + LSH(26, 0xffffffff, 40, 0xffffff0000000000) + LSH(27, 0x7fffffffff, 33, 0xfffffffe00000000) + LSH(28, -0x7fffffffff, 63, 0x8000000000000000) + LSH(29, 0x8000000001, 48, 0x1000000000000) + LSH(30, 0xffffffffff, 47, 0xffff800000000000) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_mul.ok b/check/alu_mul.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_mul.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_mul.tst b/check/alu_mul.tst new file mode 100644 index 000000000..edf9777cb --- /dev/null +++ b/check/alu_mul.tst @@ -0,0 +1,58 @@ +#include "alu.inc" + +.code + prolog + +#define MUL(N, I0, I1, V) ALU(N, , mul, I0, I1, V) + + MUL(0, 0x7fffffff, 1, 0x7fffffff) + MUL(1, 1, 0x7fffffff, 0x7fffffff) + MUL(2, 0x80000000, 1, 0x80000000) + MUL(3, 1, 0x80000000, 0x80000000) + MUL(4, 0x7fffffff, 2, 0xfffffffe) + MUL(5, 2, 0x7fffffff, 0xfffffffe) + MUL(6, 0x7fffffff, 0, 0) + MUL(7, 0, 0x7fffffff, 0) +#if __WORDSIZE == 32 + MUL(8, 0x80000000, 2, 0) + MUL(9, 2, 0x80000000, 0) + MUL(10, 0x7fffffff, 0x80000000, 0x80000000) + MUL(11, 0x80000000, 0x7fffffff, 0x80000000) + MUL(12, 0x7fffffff, 0xffffffff, 0x80000001) + MUL(13, 0xffffffff, 0x7fffffff, 0x80000001) + MUL(14, 0xffffffff, 0xffffffff, 1) +#else + MUL(8, 0x80000000, 2, 0x100000000) + MUL(9, 2, 0x80000000, 0x100000000) + MUL(10, 0x7fffffff, 0x80000000, 0x3fffffff80000000) + MUL(11, 0x80000000, 0x7fffffff, 0x3fffffff80000000) + MUL(12, 0x7fffffff, 0xffffffff, 0x7ffffffe80000001) + MUL(13, 0xffffffff, 0x7fffffff, 0x7ffffffe80000001) + MUL(14, 0xffffffff, 0xffffffff, 0xfffffffe00000001) + MUL(15, 0x7fffffffffffffff, 1, 0x7fffffffffffffff) + MUL(16, 1, 0x7fffffffffffffff, 0x7fffffffffffffff) + MUL(17, 0x8000000000000000, 1, 0x8000000000000000) + MUL(18, 1, 0x8000000000000000, 0x8000000000000000) + MUL(19, 0x7fffffffffffffff, 2, 0xfffffffffffffffe) + MUL(20, 2, 0x7fffffffffffffff, 0xfffffffffffffffe) + MUL(21, 0x8000000000000000, 2, 0) + MUL(22, 2, 0x8000000000000000, 0) + MUL(23, 0x7fffffffffffffff, 0x8000000000000000, 0x8000000000000000) + MUL(24, 0x8000000000000000, 0x7fffffffffffffff, 0x8000000000000000) + MUL(25, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000001) + MUL(26, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000001) + MUL(27, 0xffffffffffffffff, 0xffffffffffffffff, 1) +#endif + +#undef MUL +#define MUL(N, T, I0, I1, V) FOP(N, T, mul, I0, I1, V) + MUL(0, _f, -0.5, 0.5, -0.25) + MUL(1, _f, 0.25, 0.75, 0.1875) + MUL(0, _d, -0.5, 0.5, -0.25) + MUL(1, _d, 0.25, 0.75, 0.1875) + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_neg.ok b/check/alu_neg.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_neg.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_neg.tst b/check/alu_neg.tst new file mode 100644 index 000000000..73b1b30bc --- /dev/null +++ b/check/alu_neg.tst @@ -0,0 +1,41 @@ +#include "alu.inc" + +.code + prolog + +#define NEG(N, I, V) UN(N, neg, I, V) + + NEG(0, 0, 0) +#if __WORDSIZE == 32 + NEG(1, 1, 0xffffffff) + NEG(2, 0xffffffff, 1) + NEG(3, 0x80000000, 0x80000000) + NEG(4, 0x7fffffff, 0x80000001) + NEG(5, 0x80000001, 0x7fffffff) +#else + NEG(1, 1, 0xffffffffffffffff) + NEG(2, 0xffffffff, 0xffffffff00000001) + NEG(3, 0x80000000, 0xffffffff80000000) + NEG(4, 0x7fffffff, 0xffffffff80000001) + NEG(5, 0x80000001, 0xffffffff7fffffff) + NEG(6, 0xffffffffffffffff, 1) + NEG(7, 0x8000000000000000, 0x8000000000000000) + NEG(8, 0x7fffffffffffffff, 0x8000000000000001) +#endif + +#undef NEG +#define NEG(N, T, I, V) FUN(N, T, neg, I, V) + NEG(0, _f, 0.0, -0.0) + NEG(1, _f, 0.5, -0.5) + NEG(2, _f, $(1 / 0.0), $(-1.0 / 0)) + NEG(3, _f, -1.25, 1.25) + NEG(0, _d, 0.0, -0.0) + NEG(1, _d, 0.5, -0.5) + NEG(2, _d, $(1.0 / 0), $(-1 / 0.0)) + NEG(3, _d, -1.25, 1.25) + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_or.ok b/check/alu_or.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_or.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_or.tst b/check/alu_or.tst new file mode 100644 index 000000000..31aa0c6f6 --- /dev/null +++ b/check/alu_or.tst @@ -0,0 +1,35 @@ +#include "alu.inc" + +.code + prolog + +#define OR(N, I0, I1, V) ALU(N, , or, I0, I1, V) + + OR(0, 0x7fffffff, 1, 0x7fffffff) + OR(1, 1, 0x7fffffff, 0x7fffffff) + OR(2, 0x80000000, 1, 0x80000001) + OR(3, 1, 0x80000000, 0x80000001) + OR(4, 0x7fffffff, 0x80000000, 0xffffffff) + OR(5, 0x80000000, 0x7fffffff, 0xffffffff) + OR(6, 0x7fffffff, 0xffffffff, 0xffffffff) + OR(7, 0xffffffff, 0x7fffffff, 0xffffffff) + OR(8, 0xffffffff, 0xffffffff, 0xffffffff) + OR(9, 0x7fffffff, 0, 0x7fffffff) + OR(10, 0, 0x7fffffff, 0x7fffffff) +#if __WORDSIZE == 64 + OR(11, 0x7fffffffffffffff, 1, 0x7fffffffffffffff) + OR(12, 1, 0x7fffffffffffffff, 0x7fffffffffffffff) + OR(13, 0x8000000000000000, 1, 0x8000000000000001) + OR(14, 1, 0x8000000000000000, 0x8000000000000001) + OR(15, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) + OR(16, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff) + OR(17, 0x7fffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff) + OR(18, 0xffffffffffffffff, 0x7fffffffffffffff, 0xffffffffffffffff) + OR(19, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_rem.ok b/check/alu_rem.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_rem.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_rem.tst b/check/alu_rem.tst new file mode 100644 index 000000000..f6c607455 --- /dev/null +++ b/check/alu_rem.tst @@ -0,0 +1,75 @@ +#include "alu.inc" + +.code + prolog + +#define REM(N, I0, I1, V) ALU(N, , rem, I0, I1, V) +#define UREM(N, I0, I1, V) ALU(N, _u, rem, I0, I1, V) + + REM(0, 0x7fffffff, 1, 0) + REM(1, 1, 0x7fffffff, 1) + REM(2, 0x80000000, 1, 0) + REM(3, 1, 0x80000000, 1) + REM(4, 0x7fffffff, 2, 1) + REM(5, 2, 0x7fffffff, 2) + REM(6, 0x80000000, 2, 0) + REM(7, 2, 0x80000000, 2) + REM(8, 0x7fffffff, 0x80000000, 0x7fffffff) + REM(9, 0, 0x7fffffff, 0) + REM(10, 0xffffffff, 0xffffffff, 0) + UREM(0, 0x7fffffff, 1, 0) + UREM(1, 1, 0x7fffffff, 1) + UREM(2, 0x80000000, 1, 0) + UREM(3, 1, 0x80000000, 1) + UREM(4, 0x7fffffff, 2, 1) + UREM(5, 2, 0x7fffffff, 2) + UREM(6, 0x80000000, 2, 0) + UREM(7, 2, 0x80000000, 2) + UREM(8, 0x7fffffff, 0x80000000, 0x7fffffff) + UREM(9, 0x80000000, 0x7fffffff, 1) + UREM(10,0, 0x7fffffff, 0) + UREM(11,0x7fffffff, 0xffffffff, 0x7fffffff) + UREM(12,0xffffffff, 0x7fffffff, 1) + UREM(13,0xffffffff, 0xffffffff, 0) + +#if __WORDSIZE == 32 + REM(11, 0x80000000, 0x7fffffff, 0xffffffff) + REM(12, 0x7fffffff, 0xffffffff, 0) + REM(13, 0xffffffff, 0x7fffffff, 0xffffffff) +#else + REM(11, 0x80000000, 0x7fffffff, 1) + REM(12, 0x7fffffff, 0xffffffff, 0x7fffffff) + REM(13, 0xffffffff, 0x7fffffff, 1) + REM(14, 0x7fffffffffffffff, 1, 0) + REM(15, 1, 0x7fffffffffffffff, 1) + REM(16, 0x8000000000000000, 1, 0) + REM(17, 1, 0x8000000000000000, 1) + REM(18, 0x7fffffffffffffff, 2, 1) + REM(19, 2, 0x7fffffffffffffff, 2) + REM(20, 0x8000000000000000, 2, 0) + REM(21, 2, 0x8000000000000000, 2) + REM(22, 0x7fffffffffffffff, 0x8000000000000000, 0x7fffffffffffffff) + REM(23, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff) + REM(24, 0x7fffffffffffffff, 0xffffffffffffffff, 0) + REM(25, 0xffffffffffffffff, 0x7fffffffffffffff, 0xffffffffffffffff) + REM(26, 0xffffffffffffffff, 0xffffffffffffffff, 0) + UREM(14,0x7fffffffffffffff, 1, 0) + UREM(15,1, 0x7fffffffffffffff, 1) + UREM(16,0x8000000000000000, 1, 0) + UREM(17,1, 0x8000000000000000, 1) + UREM(18,0x7fffffffffffffff, 2, 1) + UREM(19,2, 0x7fffffffffffffff, 2) + UREM(20,0x8000000000000000, 2, 0) + UREM(21,2, 0x8000000000000000, 2) + UREM(22,0x7fffffffffffffff, 0x8000000000000000, 0x7fffffffffffffff) + UREM(23,0x8000000000000000, 0x7fffffffffffffff, 1) + UREM(24,0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffffffffffff) + UREM(25,0xffffffffffffffff, 0x7fffffffffffffff, 1) + UREM(26,0xffffffffffffffff, 0xffffffffffffffff, 0) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_rsh.ok b/check/alu_rsh.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_rsh.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_rsh.tst b/check/alu_rsh.tst new file mode 100644 index 000000000..f32c3c92a --- /dev/null +++ b/check/alu_rsh.tst @@ -0,0 +1,84 @@ +#include "alu.inc" + +.code + prolog + +#define RSH(N, I0, I1, V) ALU(N, , rsh, I0, I1, V) +#define URSH(N, I0, I1, V) ALU(N, _u, rsh, I0, I1, V) + + RSH(0, 0xfe, 1, 0x7f) + RSH(1, 0x1fffc, 2, 0x7fff) + RSH(2, 0x40000000, 30, 1) + RSH(3, 0x20000000, 29, 1) + RSH(4, 0x10000000, 28, 1) + RSH(5, 0x810000, 16, 0x81) + RSH(6, 0x20000, 17, 1) + RSH(7, 0x40000, 18, 1) + RSH(8, 0x7f8000, 15, 0xff) + RSH(9, 0x1000000, 24, 1) + RSH(10, 0x7fffffff, 0, 0x7fffffff) + URSH(0, 0xfe, 1, 0x7f) + URSH(1, 0x1fffc, 2, 0x7fff) + URSH(2, 0x80000000, 31, 1) + URSH(3, 0x40000000, 30, 1) + URSH(4, 0x20000000, 29, 1) + URSH(5, 0x10000000, 28, 1) + URSH(6, 0x810000, 16, 0x81) + URSH(7, 0x20000, 17, 1) + URSH(8, 0x40000, 18, 1) + URSH(9,0x7f8000, 15, 0xff) + URSH(10,0x1000000, 24, 1) + URSH(11,0xffffff00, 8, 0xffffff) + URSH(12,0x7fffffff, 0, 0x7fffffff) +#if __WORDSIZE == 32 + RSH(11, 0xfffffff8, 3, 0xffffffff) + RSH(12, 0x80000000, 31, 0xffffffff) + RSH(13, 0xffffff00, 8, 0xffffffff) + URSH(13,0xfffffff8, 3, 0x1fffffff) +#else + RSH(11, 0x3fffffff8, 3, 0x7fffffff) + RSH(12, 0xffffffc080000000, 31, 0xffffffffffffff81) + RSH(13, 0xffffff00, 8, 0xffffff) + RSH(14, 0xfe00000000, 33, 0x7f) + RSH(15, 0x1ffffc00000000, 34, 0x7ffff) + RSH(16, 0xfffffff800000000, 29, 0xffffffffffffffc0) + RSH(17, 0x8000000000000000, 63, 0xffffffffffffffff) + RSH(18, 0x4000000000000000, 62, 1) + RSH(19, 0x2000000000000000, 61, 1) + RSH(20, 0x1000000000000000, 60, 1) + RSH(21, 0x81000000000000, 48, 0x81) + RSH(22, 0x2000000000000, 49, 1) + RSH(23, 0x10000000000, 40, 1) + RSH(24, 0x7f800000000000, 47, 0xff) + RSH(25, 0x100000000000000, 56, 1) + RSH(26, 0xffffff0000000000, 40, 0xffffffffffffffff) + RSH(27, 0xfffffffe00000000, 33, 0xffffffffffffffff) + RSH(28, 0x8000000000000001, 63, 0xffffffffffffffff) + RSH(29, 0x1000000000000, 48, 1) + RSH(30, 0xffff800000000000, 47, 0xffffffffffffffff) + URSH(13,0x3fffffff8, 3, 0x7fffffff) + URSH(14,0xffffffc080000000, 31, 0x1ffffff81) + URSH(15,0xfe00000000, 33, 0x7f) + URSH(16,0x1ffffc00000000, 34, 0x7ffff) + URSH(17,0xfffffff800000000, 29, 0x7ffffffc0) + URSH(18,0x8000000000000000, 63, 1) + URSH(19,0x4000000000000000, 62, 1) + URSH(20,0x2000000000000000, 61, 1) + URSH(21,0x1000000000000000, 60, 1) + URSH(22,0x81000000000000, 48, 0x81) + URSH(23,0x2000000000000, 49, 1) + URSH(24,0x10000000000, 40, 1) + URSH(25,0x7f800000000000, 47, 0xff) + URSH(26,0x100000000000000, 56, 1) + URSH(27,0xffffff0000000000, 40, 0xffffff) + URSH(28,0xfffffffe00000000, 33, 0x7fffffff) + URSH(29,0x8000000000000001, 63, 1) + URSH(30,0x1000000000000, 48, 1) + URSH(31,0xffff800000000000, 47, 0x1ffff) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_sub.ok b/check/alu_sub.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_sub.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_sub.tst b/check/alu_sub.tst new file mode 100644 index 000000000..4e8fd3c5c --- /dev/null +++ b/check/alu_sub.tst @@ -0,0 +1,48 @@ +#include "alu.inc" + +.code + prolog + +#define SUB(N, I0, I1, V) ALU(N, , sub, I0, I1, V) + + SUB(0, 0x7fffffff, 1, 0x7ffffffe) + SUB(2, 0x80000000, 1, 0x7fffffff) + SUB(3, 0x80000000, 0x7fffffff, 1) + SUB(4, 0xffffffff, 0xffffffff, 0) + SUB(5, 0xffffffff, 0x7fffffff, 0x80000000) + SUB(6, 0x7fffffff, 0, 0x7fffffff) +#if __WORDSIZE == 32 + SUB(7, 1, 0x7fffffff, 0x80000002) + SUB(8, 1, 0x80000000, 0x80000001) + SUB(9, 0x7fffffff, 0x80000000, 0xffffffff) + SUB(10, 0x7fffffff, 0xffffffff, 0x80000000) + SUB(11, 0, 0x7fffffff, 0x80000001) +#else + SUB(7, 1, 0x7fffffff, 0xffffffff80000002) + SUB(8, 1, 0xffffffff80000000, 0x80000001) + SUB(9, 0x7fffffff, 0xffffffff80000000, 0xffffffff) + SUB(10, 0xffffffff7fffffff, 0xffffffffffffffff, 0xffffffff80000000) + SUB(11, 0, 0x7fffffff, 0xffffffff80000001) + SUB(12, 0x7fffffffffffffff, 1, 0x7ffffffffffffffe) + SUB(13, 1, 0x7fffffffffffffff, 0x8000000000000002) + SUB(14, 0x8000000000000000, 1, 0x7fffffffffffffff) + SUB(15, 1, 0x8000000000000000, 0x8000000000000001) + SUB(16, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) + SUB(17, 0x8000000000000000, 0x7fffffffffffffff, 1) + SUB(18, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000) + SUB(19, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000) + SUB(20, 0xffffffffffffffff, 0xffffffffffffffff, 0) +#endif + +#undef SUB +#define SUB(N, T, I0, I1, V) FOP(N, T, sub, I0, I1, V) + SUB(0, _f, -0.5, 0.5, -1.0) + SUB(1, _f, 0.25, 0.75, -0.5) + SUB(0, _d, -0.5, 0.5, -1.0) + SUB(1, _d, 0.25, 0.75, -0.5) + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alu_xor.ok b/check/alu_xor.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alu_xor.ok @@ -0,0 +1 @@ +ok diff --git a/check/alu_xor.tst b/check/alu_xor.tst new file mode 100644 index 000000000..5c98e27fa --- /dev/null +++ b/check/alu_xor.tst @@ -0,0 +1,35 @@ +#include "alu.inc" + +.code + prolog + +#define XOR(N, I0, I1, V) ALU(N, , xor, I0, I1, V) + + XOR(0, 0x7fffffff, 1, 0x7ffffffe) + XOR(1, 1, 0x7fffffff, 0x7ffffffe) + XOR(2, 0x80000000, 1, 0x80000001) + XOR(3, 1, 0x80000000, 0x80000001) + XOR(4, 0x7fffffff, 0x80000000, 0xffffffff) + XOR(5, 0x80000000, 0x7fffffff, 0xffffffff) + XOR(6, 0x7fffffff, 0xffffffff, 0x80000000) + XOR(7, 0xffffffff, 0x7fffffff, 0x80000000) + XOR(9, 0xffffffff, 0xffffffff, 0) + XOR(10, 0x7fffffff, 0, 0x7fffffff) + XOR(11, 0, 0x7fffffff, 0x7fffffff) +#if __WORDSIZE == 64 + XOR(12, 0x7fffffffffffffff, 1, 0x7ffffffffffffffe) + XOR(13, 1, 0x7fffffffffffffff, 0x7ffffffffffffffe) + XOR(14, 0x8000000000000000, 1, 0x8000000000000001) + XOR(15, 1, 0x8000000000000000, 0x8000000000000001) + XOR(16, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) + XOR(17, 0x8000000000000000, 0x7fffffffffffffff, 0xffffffffffffffff) + XOR(18, 0x7fffffffffffffff, 0xffffffffffffffff, 0x8000000000000000) + XOR(19, 0xffffffffffffffff, 0x7fffffffffffffff, 0x8000000000000000) + XOR(20, 0xffffffffffffffff, 0xffffffffffffffff, 0) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alux_add.ok b/check/alux_add.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alux_add.ok @@ -0,0 +1 @@ +ok diff --git a/check/alux_add.tst b/check/alux_add.tst new file mode 100644 index 000000000..68cf8e50b --- /dev/null +++ b/check/alux_add.tst @@ -0,0 +1,48 @@ +#include "alu.inc" + +.code + prolog + +#define ADDX(N, I0, I1, V) ALUX(N, add, I0, I1, V) + + /* nothing */ + ADDX(0, 0, 0, 0) +#if __WORDSIZE == 32 + /* carry */ + ADDX(1, 0xffffffff, 0xffffffff, 1) + /* overflow */ + ADDX(2, 0x7fffffff, 1, 0) + /* overflow */ + ADDX(3, 0x7fffffff, 0x7fffffff, 0) + /* carry */ + ADDX(4, 0x7fffffff, 0x80000000, 0) + /* carry+overflow */ + ADDX(5, 0x80000000, 0x80000000, 1) +#else + /* nothing */ + ADDX(1, 0xffffffff, 0xffffffff, 0) + /* nothing */ + ADDX(2, 0x7fffffff, 1, 0) + /* nothing */ + ADDX(3, 0x7fffffff, 0x7fffffff, 0) + /* nothing */ + ADDX(4, 0x7fffffff, 0x80000000, 0) + /* nothing */ + ADDX(5, 0x80000000, 0x80000000, 0) + /* carry */ + ADDX(6, 0xffffffffffffffff, 0xffffffffffffffff, 1) + /* overflow */ + ADDX(7, 0x7fffffffffffffff, 1, 0) + /* overflow */ + ADDX(8, 0x7fffffffffffffff, 0x7fffffffffffffff, 0) + /* overflow */ + ADDX(9, 0x7fffffffffffffff, 0x8000000000000000, 0) + /* carry+overflow */ + ADDX(10,0x8000000000000000, 0x8000000000000000, 1) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/alux_sub.ok b/check/alux_sub.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/alux_sub.ok @@ -0,0 +1 @@ +ok diff --git a/check/alux_sub.tst b/check/alux_sub.tst new file mode 100644 index 000000000..edef15f6e --- /dev/null +++ b/check/alux_sub.tst @@ -0,0 +1,48 @@ +#include "alu.inc" + +.code + prolog + +#define SUBX(N, I0, I1, V) ALUX(N, sub, I0, I1, V) + + /* nothing */ + SUBX(0, 0, 0, 0) +#if __WORDSIZE == 32 + /* carry */ + SUBX(1, 0x7fffffff, 0xffffffff, 0xffffffff) + /* overflow */ + SUBX(2, 0x80000000, 1, 0) + /* carry */ + SUBX(3, 0x7fffffff, 0x80000000, 0xffffffff) + /* overflow */ + SUBX(4, 0x80000000, 0x7fffffff, 0) + /* carry+overflow */ + SUBX(5, 1, 0x80000000, 0xffffffff) +#else + /* carry */ + SUBX(1, 0x7fffffff, 0xffffffff, -1) + /* nothing */ + SUBX(2, 0x80000000, 1, 0) + /* carry */ + SUBX(3, 0x7fffffff, 0x80000000, -1) + /* nothing */ + SUBX(4, 0x80000000, 0x7fffffff, 0) + /* carry */ + SUBX(5, 1, 0x80000000, -1) + /* carry */ + SUBX(6, 0x7fffffffffffffff, 0xffffffffffffffff, -1) + /* overflow */ + SUBX(7, 0x8000000000000000, 1, 0) + /* carry */ + SUBX(8, 0x7fffffffffffffff, 0x8000000000000000, -1) + /* overflow */ + SUBX(9, 0x8000000000000000, 0x7fffffffffffffff, 0) + /* carry+overflow */ + SUBX(10,1, 0x8000000000000000, -1) +#endif + + prepare 1 + pushargi ok + finishi @printf + ret + epilog diff --git a/check/branch.ok b/check/branch.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/branch.ok @@ -0,0 +1 @@ +ok diff --git a/check/branch.tst b/check/branch.tst new file mode 100644 index 000000000..5fd54d5a2 --- /dev/null +++ b/check/branch.tst @@ -0,0 +1,562 @@ +#if __WORDSIZE == 64 +# define I7f 0x7fffffffffffffff +# define I80 0x8000000000000000 +# define I81 0x8000000000000001 +# define Iff 0xffffffffffffffff +#else +# define I7f 0x7fffffff +# define I80 0x80000000 +# define I81 0x80000001 +# define Iff 0xffffffff +#endif + +.data 12 +ok: +.c "ok\n" +. $($NaN = 0.0 / 0.0) + +#define BOP(N, Ls, Rs, Lu, Ru, R0, R1) \ + movi %R0 Ls \ + movi %R1 Rs \ + b##N##r N##r_##R0##_##R1 %R0 %R1 \ + calli @abort \ +N##r_##R0##_##R1: \ + b##N##i N##i_##R0##_##R1 %R0 Rs \ + calli @abort \ +N##i_##R0##_##R1: \ + movi %R0 Lu \ + movi %R1 Ru \ + b##N##r_u N##r_u_##R0##_##R1 %R0 %R1 \ + calli @abort \ +N##r_u_##R0##_##R1: \ + b##N##i_u N##i_u_##R0##_##R1 %R0 Ru \ + calli @abort \ +N##i_u_##R0##_##R1: \ + movi %R0 Ls \ + movi %R1 Rs \ + N##r %R0 %R0 %R1 \ + beqi _##N##r_##R0##_##R1 %R0 1 \ + calli @abort \ +_##N##r_##R0##_##R1: \ + movi %R0 Ls \ + N##i %R1 %R0 Rs \ + beqi _##N##i_##R0##_##R1 %R1 1 \ + calli @abort \ +_##N##i_##R0##_##R1: \ + movi %R0 Lu \ + movi %R1 Ru \ + N##r_u %R0 %R0 %R1 \ + beqi _##N##r_u_##R0##_##R1 %R0 1 \ + calli @abort \ +_##N##r_u_##R0##_##R1: \ + movi %R0 Lu \ + N##i_u %R1 %R0 Ru \ + beqi _##N##i_u_##R0##_##R1 %R1 1 \ + calli @abort \ +_##N##i_u_##R0##_##R1: + +#define EB(N, L, R, R0, R1) \ + movi %R0 L \ + movi %R1 R \ + b##N##r N##r_##R0##_##R1 %R0 %R1 \ + calli @abort \ +N##r_##R0##_##R1: \ + b##N##i N##i_##R0##_##R1 %R0 R \ + calli @abort \ +N##i_##R0##_##R1: \ + movi %R0 L \ + movi %R1 R \ + N##r %R0 %R0 %R1 \ + beqi _##N##r_##R0##_##R1 %R0 1 \ + calli @abort \ +_##N##r_##R0##_##R1: \ + movi %R0 L \ + N##i %R1 %R0 R \ + beqi _##N##i_##R0##_##R1 %R1 1 \ + calli @abort \ +_##N##i_##R0##_##R1: + +#define XEB(N, L, R, R0, R1) \ + movi %R0 L \ + movi %R1 R \ + b##N##r N##r_##R0##_##R1 %R0 %R1 \ + calli @abort \ +N##r_##R0##_##R1: \ + b##N##i N##i_##R0##_##R1 %R0 R \ + calli @abort \ +N##i_##R0##_##R1: + +#define XBOP(N, Ls, Rs, Lu, Ru, R0, R1) \ + movi %R0 Ls \ + movi %R1 Rs \ + b##N##r N##r_##R0##_##R1 %R0 %R1 \ + calli @abort \ +N##r_##R0##_##R1: \ + movi %R0 Ls \ + b##N##i N##i_##R0##_##R1 %R0 Rs \ + calli @abort \ +N##i_##R0##_##R1: \ + movi %R0 Lu \ + movi %R1 Ru \ + b##N##r_u N##r_u_##R0##_##R1 %R0 %R1 \ + calli @abort \ +N##r_u_##R0##_##R1: \ + movi %R0 Lu \ + b##N##i_u N##i_u_##R0##_##R1 %R0 Ru \ + calli @abort \ +N##i_u_##R0##_##R1: + +#define BOPI(N, Ls, Rs, Lu, Ru) \ + BOP(N, Ls, Rs, Lu, Ru, v0, v1) \ + BOP(N, Ls, Rs, Lu, Ru, v0, v2) \ + BOP(N, Ls, Rs, Lu, Ru, v0, r0) \ + BOP(N, Ls, Rs, Lu, Ru, v0, r1) \ + BOP(N, Ls, Rs, Lu, Ru, v0, r2) \ + BOP(N, Ls, Rs, Lu, Ru, v1, v0) \ + BOP(N, Ls, Rs, Lu, Ru, v1, v2) \ + BOP(N, Ls, Rs, Lu, Ru, v1, r0) \ + BOP(N, Ls, Rs, Lu, Ru, v1, r1) \ + BOP(N, Ls, Rs, Lu, Ru, v1, r2) \ + BOP(N, Ls, Rs, Lu, Ru, v2, v0) \ + BOP(N, Ls, Rs, Lu, Ru, v2, v1) \ + BOP(N, Ls, Rs, Lu, Ru, v2, r0) \ + BOP(N, Ls, Rs, Lu, Ru, v2, r1) \ + BOP(N, Ls, Rs, Lu, Ru, v2, r2) \ + BOP(N, Ls, Rs, Lu, Ru, r0, v0) \ + BOP(N, Ls, Rs, Lu, Ru, r0, v1) \ + BOP(N, Ls, Rs, Lu, Ru, r0, v2) \ + BOP(N, Ls, Rs, Lu, Ru, r0, r1) \ + BOP(N, Ls, Rs, Lu, Ru, r0, r2) \ + BOP(N, Ls, Rs, Lu, Ru, r1, v0) \ + BOP(N, Ls, Rs, Lu, Ru, r1, v1) \ + BOP(N, Ls, Rs, Lu, Ru, r1, v2) \ + BOP(N, Ls, Rs, Lu, Ru, r1, r0) \ + BOP(N, Ls, Rs, Lu, Ru, r1, r2) \ + BOP(N, Ls, Rs, Lu, Ru, r2, v0) \ + BOP(N, Ls, Rs, Lu, Ru, r2, v1) \ + BOP(N, Ls, Rs, Lu, Ru, r2, v2) \ + BOP(N, Ls, Rs, Lu, Ru, r2, r0) \ + BOP(N, Ls, Rs, Lu, Ru, r2, r1) + +#define EBI(N, L, R) \ + EB(N, L, R, v0, v1) \ + EB(N, L, R, v0, v2) \ + EB(N, L, R, v0, r0) \ + EB(N, L, R, v0, r1) \ + EB(N, L, R, v0, r2) \ + EB(N, L, R, v1, v0) \ + EB(N, L, R, v1, v2) \ + EB(N, L, R, v1, r0) \ + EB(N, L, R, v1, r1) \ + EB(N, L, R, v1, r2) \ + EB(N, L, R, v2, v0) \ + EB(N, L, R, v2, v1) \ + EB(N, L, R, v2, r0) \ + EB(N, L, R, v2, r1) \ + EB(N, L, R, v2, r2) \ + EB(N, L, R, r0, v0) \ + EB(N, L, R, r0, v1) \ + EB(N, L, R, r0, v2) \ + EB(N, L, R, r0, r1) \ + EB(N, L, R, r0, r2) \ + EB(N, L, R, r1, v0) \ + EB(N, L, R, r1, v1) \ + EB(N, L, R, r1, v2) \ + EB(N, L, R, r1, r0) \ + EB(N, L, R, r1, r2) \ + EB(N, L, R, r2, v0) \ + EB(N, L, R, r2, v1) \ + EB(N, L, R, r2, v2) \ + EB(N, L, R, r2, r0) \ + EB(N, L, R, r2, r1) + + +#define XEBI(N, L, R) \ + XEB(N, L, R, v0, v1) \ + XEB(N, L, R, v0, v2) \ + XEB(N, L, R, v0, r0) \ + XEB(N, L, R, v0, r1) \ + XEB(N, L, R, v0, r2) \ + XEB(N, L, R, v1, v0) \ + XEB(N, L, R, v1, v2) \ + XEB(N, L, R, v1, r0) \ + XEB(N, L, R, v1, r1) \ + XEB(N, L, R, v1, r2) \ + XEB(N, L, R, v2, v0) \ + XEB(N, L, R, v2, v1) \ + XEB(N, L, R, v2, r0) \ + XEB(N, L, R, v2, r1) \ + XEB(N, L, R, v2, r2) \ + XEB(N, L, R, r0, v0) \ + XEB(N, L, R, r0, v1) \ + XEB(N, L, R, r0, v2) \ + XEB(N, L, R, r0, r1) \ + XEB(N, L, R, r0, r2) \ + XEB(N, L, R, r1, v0) \ + XEB(N, L, R, r1, v1) \ + XEB(N, L, R, r1, v2) \ + XEB(N, L, R, r1, r0) \ + XEB(N, L, R, r1, r2) \ + XEB(N, L, R, r2, v0) \ + XEB(N, L, R, r2, v1) \ + XEB(N, L, R, r2, v2) \ + XEB(N, L, R, r2, r0) \ + XEB(N, L, R, r2, r1) + +#define XBOPI(N, Ls, Rs, Lu, Ru) \ + XBOP(N, Ls, Rs, Lu, Ru, v0, v1) \ + XBOP(N, Ls, Rs, Lu, Ru, v0, v2) \ + XBOP(N, Ls, Rs, Lu, Ru, v0, r0) \ + XBOP(N, Ls, Rs, Lu, Ru, v0, r1) \ + XBOP(N, Ls, Rs, Lu, Ru, v0, r2) \ + XBOP(N, Ls, Rs, Lu, Ru, v1, v0) \ + XBOP(N, Ls, Rs, Lu, Ru, v1, v2) \ + XBOP(N, Ls, Rs, Lu, Ru, v1, r0) \ + XBOP(N, Ls, Rs, Lu, Ru, v1, r1) \ + XBOP(N, Ls, Rs, Lu, Ru, v1, r2) \ + XBOP(N, Ls, Rs, Lu, Ru, v2, v0) \ + XBOP(N, Ls, Rs, Lu, Ru, v2, v1) \ + XBOP(N, Ls, Rs, Lu, Ru, v2, r0) \ + XBOP(N, Ls, Rs, Lu, Ru, v2, r1) \ + XBOP(N, Ls, Rs, Lu, Ru, v2, r2) \ + XBOP(N, Ls, Rs, Lu, Ru, r0, v0) \ + XBOP(N, Ls, Rs, Lu, Ru, r0, v1) \ + XBOP(N, Ls, Rs, Lu, Ru, r0, v2) \ + XBOP(N, Ls, Rs, Lu, Ru, r0, r1) \ + XBOP(N, Ls, Rs, Lu, Ru, r0, r2) \ + XBOP(N, Ls, Rs, Lu, Ru, r1, v0) \ + XBOP(N, Ls, Rs, Lu, Ru, r1, v1) \ + XBOP(N, Ls, Rs, Lu, Ru, r1, v2) \ + XBOP(N, Ls, Rs, Lu, Ru, r1, r0) \ + XBOP(N, Ls, Rs, Lu, Ru, r1, r2) \ + XBOP(N, Ls, Rs, Lu, Ru, r2, v0) \ + XBOP(N, Ls, Rs, Lu, Ru, r2, v1) \ + XBOP(N, Ls, Rs, Lu, Ru, r2, v2) \ + XBOP(N, Ls, Rs, Lu, Ru, r2, r0) \ + XBOP(N, Ls, Rs, Lu, Ru, r2, r1) + +#define TBOPF(N, T, L, R) \ + movi_##T %f0 L \ + movi_##T %f1 R \ + b##N##r##_##T N##r_##T %f0 %f1 \ + calli @abort \ +N##r_##T: \ + b##N##i##_##T N##i_##T %f0 R \ + calli @abort \ +N##i_##T: \ + movi_##T %f1 $NaN \ + b##N##r##_##T N##r_##T##_##u %f0 %f1 \ + jmpi N##r_##T##_##u0 \ +N##r_##T##_##u: \ + calli @abort \ +N##r##_##T##_##u0: \ + b##N##i##_##T N##i_##T##_##u %f0 $NaN \ + jmpi N##i_##T##_##u0 \ +N##i##_##T##_##u: \ + calli @abort \ +N##i##_##T##_##u0: +#define BOPF(N, L, R) \ + TBOPF(N, f, L, R) \ + TBOPF(N, d, L, R) + +#define TUBOPF(N, T, L, R) \ + movi_##T %f0 L \ + movi_##T %f1 R \ + b##N##r##_##T N##r_##T %f0 %f1 \ + calli @abort \ +N##r_##T: \ + b##N##i##_##T N##i_##T %f0 R \ + calli @abort \ +N##i_##T: \ + movi_##T %f1 $NaN \ + b##N##r##_##T N##r_##T##_##u %f0 %f1 \ + calli @abort \ +N##r_##T##_##u: \ + b##N##i##_##T N##i_##T##_##u %f0 $NaN \ + calli @abort \ +N##i##_##T##_##u: + +#define UBOPF(N, L, R) \ + TUBOPF(N, f, L, R) \ + TUBOPF(N, d, L, R) + +.code + prolog + + movi %r0 -1 + movi %r1 1 + bltr xltr_r0_r1 %r0 %r1 + calli @abort +xltr_r0_r1: + blti xlti_r0_r1 %r0 1 + calli @abort +xlti_r0_r1: + movi %r0 1 + movi %r1 -1 + bltr_u xltru_r0_r1 %r0 %r1 + calli @abort +xltru_r0_r1: + blti_u xltiu_r0_r1 %r0 -1 + calli @abort +xltiu_r0_r1: + movi %r0 -1 + movi %r1 -1 + bler xler_r0_r1 %r0 %r1 + calli @abort +xler_r0_r1: + blti xlei_r0_r1 %r0 1 + calli @abort +xlei_r0_r1: + movi %r0 1 + movi %r1 -1 + bltr_u xlteu_r0_r1 %r0 %r1 + calli @abort +xlteu_r0_r1: + blei_u xleiu_r0_r1 %r0 -1 + calli @abort +xleiu_r0_r1: + movi %r0 32 + movi %r1 32 + beqr xeqr_r0_r1 %r0 %r1 + calli @abort +xeqr_r0_r1: + beqi xeqi_r0_r1 %r0 32 + calli @abort +xeqi_r0_r1: + movi %r0 -2 + movi %r1 -2 + bger xger_r0_r1 %r0 %r1 + calli @abort +xger_r0_r1: + bgei xgei_r0_r1 %r0 -2 + calli @abort +xgei_r0_r1: + movi %r0 2 + movi %r1 2 + bger_u xgeru_r0_r1 %r0 %r1 + calli @abort +xgeru_r0_r1: + bgei_u xgeiu_r0_r1 %r0 2 + calli @abort +xgeiu_r0_r1: + movi %r0 2 + movi %r1 -2 + bgtr xgtr_r0_r1 %r0 %r1 + calli @abort +xgtr_r0_r1: + bgti xgti_r0_r1 %r0 -2 + calli @abort +xgti_r0_r1: + movi %r0 -2 + movi %r1 2 + bgtr_u xgtru_r0_r1 %r0 %r1 + calli @abort +xgtru_r0_r1: + bgti_u xgtiu_r0_r1 %r0 2 + calli @abort +xgtiu_r0_r1: + movi %r0 -3 + movi %r1 3 + bner xner_r0_r1 %r0 %r1 + calli @abort +xner_r0_r1: + bnei xnei_r0_r1 %r0 3 + calli @abort +xnei_r0_r1: + movi %r0 1 + movi %r1 3 + bmsr xmsr_r0_r1 %r0 %r1 + calli @abort +xmsr_r0_r1: + bmsi xmsi_r0_r1 %r0 3 + calli @abort +xmsi_r0_r1: + movi %r0 1 + movi %r1 2 + bmcr xmcr_r0_r1 %r0 %r1 + calli @abort +xmcr_r0_r1: + bmci xmci_r0_r1 %r0 2 + calli @abort +xmci_r0_r1: + movi %r0 I7f + movi %r1 1 + boaddr xoaddr_r0_r1 %r0 %r1 + calli @abort +xoaddr_r0_r1: + movi %r0 Iff + movi %r1 1 + boaddr_u xoaddr_u_r0_r1 %r0 %r1 + calli @abort +xoaddr_u_r0_r1: + movi %r0 I7f + boaddi xoaddi_r0_r1 %r0 1 + calli @abort +xoaddi_r0_r1: + movi %r0 Iff + boaddi_u xoaddi_u_r0_r1 %r0 1 + calli @abort +xoaddi_u_r0_r1: + movi %r0 I80 + movi %r1 1 + bxaddr xxaddr_r0_r1 %r0 %r1 + calli @abort +xxaddr_r0_r1: + movi %r0 I80 + bxaddi xxaddi_r0_r1 %r0 1 + calli @abort +xxaddi_r0_r1: + movi %r0 I7f + movi %r1 1 + bxaddr_u xxaddr_u_r0_r1 %r0 %r1 + calli @abort +xxaddr_u_r0_r1: + movi %r0 I7f + bxaddi_u xxaddi_u_r0_r1 %r0 1 + calli @abort +xxaddi_u_r0_r1: + movi %r0 I80 + movi %r1 1 + bosubr xosubr_r0_r1 %r0 %r1 + calli @abort +xosubr_r0_r1: + movi %r0 0 + movi %r1 1 + bosubr_u xosubr_u_r0_r1 %r0 %r1 + calli @abort +xosubr_u_r0_r1: + movi %r0 I80 + bosubi xosubi_r0_r1 %r0 1 + calli @abort +xosubi_r0_r1: + movi %r0 0 + bosubi_u xosubi_u_r0_r1 %r0 1 + calli @abort +xosubi_u_r0_r1: + movi %r0 I81 + movi %r1 1 + bxsubr xxsubr_r0_r1 %r0 %r1 + calli @abort +xxsubr_r0_r1: + movi %r0 I81 + bxsubi xxsubi_r0_r1 %r0 1 + calli @abort +xxsubi_r0_r1: + movi %r0 I80 + movi %r1 1 + bxsubr_u xxsubr_u_r0_r1 %r0 %r1 + calli @abort +xxsubr_u_r0_r1: + movi %r0 I80 + bxsubi_u xxsubi_u_r0_r1 %r0 1 + calli @abort +xxsubi_u_r0_r1: + movi_f %f0 1 + movi_f %f1 2 + bltr_f xltr_f_f0_f1 %f0 %f1 + calli @abort +xltr_f_f0_f1: + blti_f xlti_f_f0_f1 %f0 2 + calli @abort +xlti_f_f0_f1: + movi_f %f0 -1 + movi_f %f1 -1 + bler_f xler_f_f0_f1 %f0 %f1 + calli @abort +xler_f_f0_f1: + blei_f xlei_f_f0_f1 %f0 -1 + calli @abort +xlei_f_f0_f1: + movi_f %f0 -2 + movi_f %f1 -2 + beqr_f xeqr_f_f0_f1 %f0 %f1 + calli @abort +xeqr_f_f0_f1: + beqi_f xeqi_f_f0_f1 %f0 -2 + calli @abort +xeqi_f_f0_f1: + movi_f %f0 -3 + movi_f %f1 -3 + bger_f xger_f_f0_f1 %f0 %f1 + calli @abort +xger_f_f0_f1: + bgei_f xgei_f_f0_f1 %f0 -3 + calli @abort +xgei_f_f0_f1: + movi_f %f0 2 + movi_f %f1 1 + bgtr_f xgtr_f_f0_f1 %f0 %f1 + calli @abort +xgtr_f_f0_f1: + bgti_f xgti_f_f0_f1 %f0 1 + calli @abort +xgti_f_f0_f1: + movi_f %f0 0 + movi_f %f1 2 + bner_f xner_f_f0_f1 %f0 %f1 + calli @abort +xner_f_f0_f1: + bnei_f xnei_f_f0_f1 %f0 2 + calli @abort +xnei_f_f0_f1: + + BOPI(lt, -1, 1, 1, -1) + BOPI(le, -1, -1, 1, 1) + EBI(eq, 32, 32) + BOPI(ge, -2, -2, 2, 2) + BOPI(gt, 2, -2, -2, 2) + EBI(ne, 3, -3) + XEBI(ms, 1, 3) + XEBI(mc, 1, 2) + XBOPI(oadd, I7f, 1, Iff, 1) + XBOPI(xadd, I80, 1, I7f, 1) + XBOPI(osub, I80, 1, 0, 1) + XBOPI(xsub, I81, 1, I80, 1) + BOPF(lt, 1, 2) + BOPF(le, 2, 2) + BOPF(eq, 3, 3) + BOPF(ge, 3, 3) + BOPF(gt, 4, 3) + BOPF(ne, 4, 3) + UBOPF(unlt, 1, 2) + UBOPF(unle, 2, 2) + UBOPF(uneq, 3, 3) + UBOPF(unge, 3, 3) + UBOPF(ungt, 4, 3) + BOPF(ltgt, 4, 3) + movi_f %f0 5 + movi_f %f1 5 + bordr_f ordr_f %f0 %f1 + calli @abort +ordr_f: + bordi_f ordi_f %f0 1 + calli @abort +ordi_f: + bordi_f ordi_f_u %f0 $NaN + jmpi ordi_f_u0 +ordi_f_u: + calli @abort +ordi_f_u0: + movi_f %f0 5 + movi_f %f1 5 + bunordr_f unordr_f %f0 %f1 + jmpi unordr_f_0 +unordr_f: + calli @abort +unordr_f_0: + bunordi_f unordi_f %f0 1 + jmpi unordi_f_0 +unordi_f: + calli @abort +unordi_f_0: + bunordi_f unordi_f_1 %f0 $NaN + calli @abort +unordi_f_1: + + // just to know did not crash or abort + prepare 1 + pushargi ok + finishi @printf + + ret + epilog diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index afbfd10ca..d2db6c236 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -44,9 +44,15 @@ #define jit_size(vector) (sizeof(vector) / sizeof((vector)[0])) +#define jit_reg_free_p(regno) \ + (!jit_regset_tstbit(_jit->reglive, regno) && \ + !jit_regset_tstbit(_jit->regarg, regno) && \ + !jit_regset_tstbit(_jit->regsav, regno)) + /* * Private jit_class bitmasks */ +#define jit_class_named 0x00400000 /* hit must be the named reg */ #define jit_class_nospill 0x00800000 /* hint to fail if need spill */ #define jit_class_sft 0x01000000 /* not a hardware register */ #define jit_class_rg8 0x04000000 /* x86 8 bits */ diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 948f26094..1922a6cbb 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -409,7 +409,7 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_MVN(cc,rd,rm) corrr(cc,ARM_MVN,0,rd,rm) # define MVN(rd,rm) CC_MVN(ARM_CC_AL,rd,rm) # define T1_MVN(rd,rm) is(THUMB_MVN|(_u3(rm)<<3)|_u3(rd)) -# define T2_MVN(rd,rm) torrr(THUMB2_MVN,rd,_R15_REGNO,rm) +# define T2_MVN(rd,rm) torrr(THUMB2_MVN,_R15_REGNO,rd,rm) # define CC_MVNI(cc,rd,im) corri(cc,ARM_MVN|ARM_I,0,rd,im) # define MVNI(rd,im) CC_MVNI(ARM_CC_AL,rd,im) # define T2_MVNI(rd,im) torri(THUMB2_MVNI,_R15_REGNO,rd,im) @@ -816,10 +816,10 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define T2_POP(im) tpp(THUMB2_POP,im) # define jit_get_reg_args() \ do { \ - (void)jit_get_reg(_R0|jit_class_gpr); \ - (void)jit_get_reg(_R1|jit_class_gpr); \ - (void)jit_get_reg(_R2|jit_class_gpr); \ - (void)jit_get_reg(_R3|jit_class_gpr); \ + (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr); \ + (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr); \ + (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr); \ + (void)jit_get_reg(_R3|jit_class_named|jit_class_gpr); \ } while (0) # define jit_unget_reg_args() \ do { \ diff --git a/lib/jit_arm-swf.c b/lib/jit_arm-swf.c index cc311dfb4..ad0d12d84 100644 --- a/lib/jit_arm-swf.c +++ b/lib/jit_arm-swf.c @@ -156,8 +156,8 @@ static void _swf_negr_d(jit_state_t*,jit_int32_t,jit_int32_t); # define swf_muli_d(r0,r1,i0) swf_ddd_(__aeabi_dmul,r0,r1,i0) # define swf_divr_f(r0,r1,r2) swf_fff(__aeabi_fdiv,r0,r1,r2) # define swf_divi_f(r0,r1,i0) swf_fff_(__aeabi_fdiv,r0,r1,i0) -# define swf_divr_d(r0,r1,r2) swf_ddd(__aeabi_dsub,r0,r1,r2) -# define swf_divi_d(r0,r1,i0) swf_ddd_(__aeabi_dsub,r0,r1,i0) +# define swf_divr_d(r0,r1,r2) swf_ddd(__aeabi_ddiv,r0,r1,r2) +# define swf_divi_d(r0,r1,i0) swf_ddd_(__aeabi_ddiv,r0,r1,i0) # define swf_ltr_f(r0,r1,r2) swf_iff(__aeabi_fcmplt,r0,r1,r2) # define swf_lti_f(r0,r1,i0) swf_iff_(__aeabi_fcmplt,r0,r1,i0) # define swf_ltr_d(r0,r1,r2) swf_idd(__aeabi_dcmplt,r0,r1,r2) diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 319703e72..b70644040 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -37,10 +37,6 @@ #define jit_exchange_p() 1 /* FIXME is it really required to not touch _R10? */ -#define jit_reg_free_p(regno) \ - (!jit_regset_tstbit(_jit->reglive, regno) && \ - !jit_regset_tstbit(_jit->regarg, regno) && \ - !jit_regset_tstbit(_jit->regsav, regno)) /* * Types @@ -290,28 +286,28 @@ _jit_reti(jit_state_t *_jit, jit_word_t u) void _jit_retr_f(jit_state_t *_jit, jit_int32_t u) { - jit_movr_f(JIT_RET, u); + jit_movr_f(JIT_FRET, u); jit_ret(); } void _jit_reti_f(jit_state_t *_jit, jit_float32_t u) { - jit_movi_f(JIT_RET, u); + jit_movi_f(JIT_FRET, u); jit_ret(); } void _jit_retr_d(jit_state_t *_jit, jit_int32_t u) { - jit_movr_d(JIT_RET, u); + jit_movr_d(JIT_FRET, u); jit_ret(); } void _jit_reti_d(jit_state_t *_jit, jit_float64_t u) { - jit_movi_d(JIT_RET, u); + jit_movi_d(JIT_FRET, u); jit_ret(); } diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index bef8d4c95..ed02008aa 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -40,7 +40,7 @@ # define stxi(u, v, w) stxi_l(u, v, w) # define can_sign_extend_int_p(im) \ (((im) >= 0 && (long)(im) <= 0x7fffffffL) || \ - ((im) < 0 && (long)(im) >= -0x80000000L)) + ((im) < 0 && (long)(im) > -0x80000000L)) # define can_zero_extend_int_p(im) \ ((im) >= 0 && (im) < 0x80000000L) # define fits_uint32_p(im) ((im & 0xffffffff00000000L) == 0) @@ -975,8 +975,8 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) lea(-i0, r1, _NOREG, _SCL1, r0); } else if (r0 != r1) { - movi(r0, i0); - isubr(r0, r1); + movi(r0, -i0); + iaddr(r0, r1); } else { reg = jit_get_reg(jit_class_gpr); @@ -1064,7 +1064,7 @@ _imuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_int_p(i0)) { - rex(0, 1, r1, _NOREG, r0); + rex(0, 1, r0, _NOREG, r1); if ((jit_int8_t)i0 == i0) { ic(0x6b); mrm(0x03, r7(r0), r7(r1)); @@ -1147,16 +1147,25 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, { jit_int32_t div; jit_int32_t reg; + jit_int32_t set; + jit_int32_t use; - if (r0 != _RDX_REGNO) - (void)jit_get_reg(_RDX|jit_class_gpr); - if (r0 != _RAX_REGNO) - (void)jit_get_reg(_RAX|jit_class_gpr); + set = use = 0; + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO && r2 != _RDX_REGNO) + set |= 1 << _RDX_REGNO; + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO && r2 != _RAX_REGNO) + set |= 1 << _RAX_REGNO; + if (set & (1 <<_RDX_REGNO)) + (void)jit_get_reg(_RDX|jit_class_gpr|jit_class_named); + if (set & (1 << _RAX_REGNO)) + (void)jit_get_reg(_RAX|jit_class_gpr|jit_class_named); if (r2 == _RAX_REGNO) { if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg(r1 == _RCX_REGNO ? _RBX : _RCX); + reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | + jit_class_gpr|jit_class_named); + use = 1; div = rn(reg); movr(div, _RAX_REGNO); if (r1 != _RAX_REGNO) @@ -1172,13 +1181,14 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, movr(_RAX_REGNO, r1); } div = r0; - reg = 0; } } else if (r2 == _RDX_REGNO) { if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg(r1 == _RCX_REGNO ? _RBX : _RCX); + reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | + jit_class_gpr|jit_class_named); + use = 1; div = rn(reg); movr(div, _RDX_REGNO); if (r1 != _RAX_REGNO) @@ -1189,14 +1199,12 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, movr(_RAX_REGNO, r1); movr(r0, _RDX_REGNO); div = r0; - reg = 0; } } else { if (r1 != _RAX_REGNO) movr(_RAX_REGNO, r1); div = r2; - reg = 0; } if (sign) { @@ -1208,19 +1216,21 @@ _divremr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, idivr_u(div); } - if (reg) + if (use) jit_unget_reg(reg); if (r0 != _RAX_REGNO) { if (divide) movr(r0, _RAX_REGNO); - jit_unget_reg(_RAX); } if (r0 != _RDX_REGNO) { if (!divide) movr(r0, _RDX_REGNO); - jit_unget_reg(_RDX); } + if (set & (1 <<_RDX_REGNO)) + jit_unget_reg(_RDX); + if (set & (1 << _RAX_REGNO)) + jit_unget_reg(_RAX); } static void @@ -1229,6 +1239,8 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0, { jit_int32_t reg; jit_int32_t div; + jit_int32_t set; + jit_int32_t use; if (divide) { switch (i0) { @@ -1275,23 +1287,28 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0, return; } - if (r0 != _RDX_REGNO) - (void)jit_get_reg(_RDX|jit_class_gpr); - if (r0 != _RAX_REGNO) - (void)jit_get_reg(_RAX|jit_class_gpr); + set = use = 0; + if (r0 != _RDX_REGNO && r1 != _RDX_REGNO) + set |= 1 << _RDX_REGNO; + if (r0 != _RAX_REGNO && r1 != _RAX_REGNO) + set |= 1 << _RAX_REGNO; + if (set & (1 <<_RDX_REGNO)) + (void)jit_get_reg(_RDX|jit_class_gpr|jit_class_named); + if (set & (1 << _RAX_REGNO)) + (void)jit_get_reg(_RAX|jit_class_gpr|jit_class_named); - if (r0 == _RAX_REGNO || r0 == _RDX_REGNO) { + if (r0 == _RAX_REGNO || r0 == _RDX_REGNO || r0 == r1) { if ((reg = jit_get_reg(jit_class_gpr|jit_class_chk)) == JIT_NOREG) - reg = jit_get_reg(_RCX); + reg = jit_get_reg((r1 == _RCX_REGNO ? _RBX : _RCX) | + jit_class_gpr|jit_class_named); + use = 1; div = rn(reg); } - else { - reg = 0; + else div = r0; - } movi(div, i0); - movr(_RAX, r1); + movr(_RAX_REGNO, r1); if (sign) { sign_extend_rdx_rax(); @@ -1302,19 +1319,21 @@ _divremi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0, idivr_u(div); } - if (reg) + if (use) jit_unget_reg(reg); if (r0 != _RAX_REGNO) { if (divide) movr(r0, _RAX_REGNO); - jit_unget_reg(_RAX); } if (r0 != _RDX_REGNO) { if (!divide) movr(r0, _RDX_REGNO); - jit_unget_reg(_RDX); } + if (set & (1 <<_RDX_REGNO)) + jit_unget_reg(_RDX); + if (set & (1 << _RAX_REGNO)) + jit_unget_reg(_RAX); } static void @@ -1386,7 +1405,7 @@ _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } else if (r0 != r1) { movi(r0, i0); - ixorr(r0, r1); + iorr(r0, r1); } else { reg = jit_get_reg(jit_class_gpr); @@ -1448,6 +1467,7 @@ _rotshr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t reg; + jit_int32_t use; if (r0 == _RCX_REGNO) { reg = jit_get_reg(jit_class_gpr); @@ -1459,13 +1479,30 @@ _rotshr(jit_state_t *_jit, jit_int32_t code, jit_unget_reg(reg); } else if (r2 != _RCX_REGNO) { - reg = jit_get_reg(jit_class_gpr); - movr(rn(reg), _RCX_REGNO); - movr(_RCX_REGNO, r2); - movr(r0, r1); + use = !jit_reg_free_p(_RCX); + if (use) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), _RCX_REGNO); + } + else + reg = 0; + if (r1 == _RCX_REGNO) { + if (r0 == r2) + xchgr(r0, _RCX_REGNO); + else { + movr(r0, r1); + movr(_RCX_REGNO, r2); + } + } + else { + movr(_RCX_REGNO, r2); + movr(r0, r1); + } irotshr(code, r0); - movr(_RCX_REGNO, rn(reg)); - jit_unget_reg(reg); + if (use) { + movr(_RCX_REGNO, rn(reg)); + jit_unget_reg(reg); + } } else { movr(r0, r1); diff --git a/lib/lightning.c b/lib/lightning.c index ac06ffe99..69abeeb78 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -148,17 +148,18 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec) jit_int32_t spec; jit_int32_t regno; - /* if asking for an explicit register value, assume it will - * properly handle the case of the register also being an - * argument for the instruction, or the register value - * being live */ spec = regspec & ~(jit_class_chk|jit_class_nospill); - if ((regno = jit_regno(spec))) { + if (spec & jit_class_named) { + regno = jit_regno(spec); if (jit_regset_tstbit(_jit->regsav, regno)) /* fail if register is spilled */ goto fail; - if (jit_regset_tstbit(_jit->regarg, regno)) { + if (jit_regset_tstbit(_jit->regarg, regno)) + /* fail if register is an argument to current instruction */ + goto fail; + if (jit_regset_tstbit(_jit->reglive, regno)) { if (regspec & jit_class_nospill) + /* fail if register is live and should not spill/reload */ goto fail; goto spill; } @@ -1453,6 +1454,12 @@ _thread_jumps(jit_state_t *_jit) case jit_code_callr: case jit_code_calli: /* non optimizable jump like code */ break; + case jit_code_beqr_f: case jit_code_beqi_f: + case jit_code_beqr_d: case jit_code_beqi_d: + case jit_code_bltgtr_f: case jit_code_bltgti_f: + case jit_code_bltgtr_d: case jit_code_bltgti_d: + /* non optimizable jump code */ + break; default: mask = jit_classify(node->code); if (mask & jit_cc_a0_jmp) { @@ -1633,8 +1640,10 @@ reverse_jump_code(jit_code_t code) case jit_code_blti_f: return (jit_code_bungei_f); case jit_code_bler_f: return (jit_code_bungtr_f); case jit_code_blei_f: return (jit_code_bungti_f); +#if 0 case jit_code_beqr_f: return (jit_code_bltgtr_f); case jit_code_beqi_f: return (jit_code_bltgti_f); +#endif case jit_code_bger_f: return (jit_code_bunltr_f); case jit_code_bgei_f: return (jit_code_bunlti_f); case jit_code_bgtr_f: return (jit_code_bunler_f); @@ -1651,8 +1660,10 @@ reverse_jump_code(jit_code_t code) case jit_code_bungei_f: return (jit_code_blti_f); case jit_code_bungtr_f: return (jit_code_bler_f); case jit_code_bungti_f: return (jit_code_blei_f); +#if 0 case jit_code_bltgtr_f: return (jit_code_beqr_f); case jit_code_bltgti_f: return (jit_code_beqi_f); +#endif case jit_code_bordr_f: return (jit_code_bunordr_f); case jit_code_bordi_f: return (jit_code_bunordi_f); case jit_code_bunordr_f:return (jit_code_bordr_f);