From d1c0bc8e983620693bb71c58b7bfd64bd3690d94 Mon Sep 17 00:00:00 2001 From: pcpa Date: Thu, 6 Dec 2012 15:16:23 -0200 Subject: [PATCH] Add new test case for int/float to/from conversion. * check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new "cvt" test case to test conversion from/to int/float types. * check/lightning.c: Only define truncr_{f,d}_l in 64 bit mode. * include/lightning.h: Correct typo that caused it to define jit_truncr_{f,d}_l in 32 bit mode. * lib/jit_arm-cpu.c: Avoid assertion failure in the signed/unsigned extend opcodes generation as it shares an interface for 3 argument opcode generation. * lib/jit_x86-cpu.c: Correct wrong argument passed to jit_unget_reg in the andi implementation and wrong byte unsigned extend code generation. * lib/jit_x86-sse.c: Correct conversion from "word" to float or double as is dependent on wordsize. --- ChangeLog | 21 +++ check/Makefile.am | 4 +- check/cvt.ok | 1 + check/cvt.tst | 379 ++++++++++++++++++++++++++++++++++++++++++++ check/lightning.c | 37 ++++- include/lightning.h | 6 +- lib/jit_arm-cpu.c | 8 +- lib/jit_x86-cpu.c | 4 +- lib/jit_x86-sse.c | 7 +- 9 files changed, 449 insertions(+), 18 deletions(-) create mode 100644 check/cvt.ok create mode 100644 check/cvt.tst diff --git a/ChangeLog b/ChangeLog index 18c41062c..6718b7697 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2012-12-05 Paulo Andrade + + * check/Makefile.am, check/cvt.ok, check/cvt.tst: Add new + "cvt" test case to test conversion from/to int/float types. + + * check/lightning.c: Only define truncr_{f,d}_l in 64 bit mode. + + * include/lightning.h: Correct typo that caused it to define + jit_truncr_{f,d}_l in 32 bit mode. + + * lib/jit_arm-cpu.c: Avoid assertion failure in the signed/unsigned + extend opcodes generation as it shares an interface for 3 argument + opcode generation. + + * lib/jit_x86-cpu.c: Correct wrong argument passed to + jit_unget_reg in the andi implementation and wrong byte + unsigned extend code generation. + + * lib/jit_x86-sse.c: Correct conversion from "word" to float or + double as is dependent on wordsize. + 2012-12-05 Paulo Andrade * check/ldstr-c.ok, check/ldstr-c.tst, check/ldstxi-c.ok, diff --git a/check/Makefile.am b/check/Makefile.am index 0292d4d7c..ae44deabf 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -40,6 +40,7 @@ EXTRA_DIST = \ ldstr-c.tst ldstr-c.ok \ ldstxr-c.tst ldstxr-c.ok \ ldstxi-c.tst ldstxi-c.ok \ + cvt.tst cvt.ok \ check.sh run-test \ all.tst @@ -47,7 +48,8 @@ TESTS = 3to2 add allocai \ bp divi fib rpn \ ldstr ldsti \ ldstxr ldstxi \ - ldstr-c ldstxr-c ldstxi-c + ldstr-c ldstxr-c ldstxi-c \ + cvt CLEANFILES = $(TESTS) diff --git a/check/cvt.ok b/check/cvt.ok new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/check/cvt.ok @@ -0,0 +1 @@ +ok diff --git a/check/cvt.tst b/check/cvt.tst new file mode 100644 index 000000000..e645cc253 --- /dev/null +++ b/check/cvt.tst @@ -0,0 +1,379 @@ +.data 12 +ok: +.c "ok\n" + +#if __WORDSIZE == 32 +# define w7f 0x7fffffff +# define w80 0x80000000 +# define w81 0x80000001 +# define wff 0xffffffff +# define LEXTII2(N, R0, R1) +# define LEXTIC2(N, R0) +#else +# define w7f 0x7fffffffffffffff +# define w80 0x8000000000000000 +# define w81 0x8000000000000001 +# define wff 0xffffffffffffffff +# define i7f wff +# define ui7f 0xffffffff +# define i80 0 +# define ui80 0 +# define i81 1 +# define ui81 1 +# define iff wff +# define uiff 0xffffffff +# define LEXTII2(N, R0, R1) \ + EXTII2(N, i, R0, R1) \ + EXTII2(N, ui, R0, R1) +# define LEXTIC2(N, R0) \ + EXTIC2(N, i, R0) \ + EXTIC2(N, ui, R0) +#endif +#define c7f wff +#define uc7f 0xff +#define s7f wff +#define us7f 0xffff +#define c80 0 +#define uc80 0 +#define s80 0 +#define us80 0 +#define c81 1 +#define uc81 1 +#define s81 1 +#define us81 1 +#define cff wff +#define ucff 0xff +#define sff wff +#define usff 0xffff + +#define EXTII2(N, T, R0, R1) \ + movi %R0 w##N \ + extr_##T %R1 %R0 \ + beqi T##_##R0##_##R1##_##N %R1 T##N \ + calli @abort \ +T##_##R0##_##R1##_##N: +#define EXTII1(N, R0, R1) \ + EXTII2(N, c, R0, R1) \ + EXTII2(N, uc, R0, R1) \ + EXTII2(N, s, R0, R1) \ + EXTII2(N, us, R0, R1) \ + LEXTII2(N, R0, R1) +#define EXTII0(R0, R1) \ + EXTII1(7f, R0, R1) \ + EXTII1(80, R0, R1) \ + EXTII1(81, R0, R1) \ + EXTII1(ff, R0, R1) + +#define EXTIC2(N, T, R0) \ + movi %R0 w##N \ + extr_##T %R0 %R0 \ + beqi T##_##R0##_##N %R0 T##N \ + calli @abort \ +T##_##R0##_##N: +#define EXTIC1(N, R0) \ + EXTIC2(N, c, R0) \ + EXTIC2(N, uc, R0) \ + EXTIC2(N, s, R0) \ + EXTIC2(N, us, R0) \ + LEXTIC2(N, R0) +#define EXTIC0(R0) \ + EXTIC1(7f, R0) \ + EXTIC1(80, R0) \ + EXTIC1(81, R0) \ + EXTIC1(ff, R0) + +#define EXTII(V0, V1, V2, R0, R1, R2) \ + EXTII0(V0, V1) \ + EXTII0(V0, V2) \ + EXTII0(V0, R0) \ + EXTII0(V0, R1) \ + EXTII0(V0, R2) \ + EXTII0(V1, V0) \ + EXTII0(V1, V2) \ + EXTII0(V1, R0) \ + EXTII0(V1, R1) \ + EXTII0(V1, R2) \ + EXTII0(V2, V0) \ + EXTII0(V2, V1) \ + EXTII0(V2, R0) \ + EXTII0(V2, R1) \ + EXTII0(V2, R2) \ + EXTII0(R0, V0) \ + EXTII0(R0, V1) \ + EXTII0(R0, V2) \ + EXTII0(R0, R1) \ + EXTII0(R0, R2) \ + EXTII0(R1, V0) \ + EXTII0(R1, V1) \ + EXTII0(R1, V2) \ + EXTII0(R1, R0) \ + EXTII0(R1, R2) \ + EXTII0(R2, V0) \ + EXTII0(R2, V1) \ + EXTII0(R2, V2) \ + EXTII0(R2, R0) \ + EXTII0(R2, R1) \ + EXTIC0(V0) \ + EXTIC0(V1) \ + EXTIC0(V2) \ + EXTIC0(R0) \ + EXTIC0(R1) \ + EXTIC0(R2) + +#define EXIF1(N, V, R0, R1) \ + movi %R0 V \ + extr_f %R1 %R0 \ + beqi_f wf##_##R0##_##R1##_##N %R1 V \ +wf##_##R0##_##R1##_##N: +#define EXIF0(R0, R1) \ + EXIF1(0, -1, R0, R1) \ + EXIF1(1, 64, R0, R1) +#define EXIF(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5) \ + EXIF0(V0, F0) \ + EXIF0(V1, F1) \ + EXIF0(V2, F2) \ + EXIF0(R0, F3) \ + EXIF0(R1, F4) \ + EXIF0(R2, F5) +#define EXID1(N, V, R0, R1) \ + movi %R0 V \ + extr_d %R1 %R0 \ + beqi_d wd##_##R0##_##R1##_##N %R1 V \ +wd##_##R0##_##R1##_##N: +#define EXID0(R0, R1) \ + EXID1(0, -1, R0, R1) \ + EXID1(1, 64, R0, R1) +#define EXID(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5) \ + EXID0(V0, F0) \ + EXID0(V1, F1) \ + EXID0(V2, F2) \ + EXID0(R0, F3) \ + EXID0(R1, F4) \ + EXID0(R2, F5) + +#define EXFI1(N, V, R0, R1) \ + movi_f %R1 V \ + truncr_f %R0 %R1 \ + beqi fi##_##R0##_##R1##_##N %R0 V \ + calli @abort \ +fi##_##R0##_##R1##_##N: +#define EXFI0(R0, R1) \ + EXFI1(0, 42, R0, R1) \ + EXFI1(1, -128, R0, R1) +#define EXFI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5) \ + EXFI0(V0, F5) \ + EXFI0(V1, F4) \ + EXFI0(V2, F3) \ + EXFI0(R0, F2) \ + EXFI0(R1, F1) \ + EXFI0(R2, F0) +#define EXDI1(N, V, R0, R1) \ + movi_d %R1 V \ + truncr_d %R0 %R1 \ + beqi di##_##R0##_##R1##_##N %R0 V \ + calli @abort \ +di##_##R0##_##R1##_##N: +#define EXDI0(R0, R1) \ + EXDI1(0, 42, R0, R1) \ + EXDI1(1, -128, R0, R1) +#define EXDI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5) \ + EXDI0(V0, F5) \ + EXDI0(V1, F4) \ + EXDI0(V2, F3) \ + EXDI0(R0, F2) \ + EXDI0(R1, F1) \ + EXDI0(R2, F0) + +#define LEXFI1(N, V, R0, R1) \ + movi_f %R1 V \ + truncr_f_i %R0 %R1 \ + andi %R0 %R0 0xffffffff \ + beqi lfi##_##R0##_##R1##_##N %R0 $(V & 0xffffffff) \ + calli @abort \ +lfi##_##R0##_##R1##_##N: +#define LEXFI0(R0, R1) \ + LEXFI1(0, 42, R0, R1) \ + LEXFI1(1, -128, R0, R1) +#define LEXFI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5) \ + LEXFI0(V0, F5) \ + LEXFI0(V1, F4) \ + LEXFI0(V2, F3) \ + LEXFI0(R0, F2) \ + LEXFI0(R1, F1) \ + LEXFI0(R2, F0) +#define LEXDI1(N, V, R0, R1) \ + movi_d %R1 V \ + truncr_d_i %R0 %R1 \ + andi %R0 %R0 0xffffffff \ + beqi ldi##_##R0##_##R1##_##N %R0 $(V & 0xffffffff) \ + calli @abort \ +ldi##_##R0##_##R1##_##N: +#define LEXDI0(R0, R1) \ + LEXDI1(0, 42, R0, R1) \ + LEXDI1(1, -128, R0, R1) +#define LEXDI(V0, V1, V2, R0, R1, R2, F0, F1, F2, F3, F4, F5) \ + LEXDI0(V0, F5) \ + LEXDI0(V1, F4) \ + LEXDI0(V2, F3) \ + LEXDI0(R0, F2) \ + LEXDI0(R1, F1) \ + LEXDI0(R2, F0) + +#define EXTFD2(V, R0, R1) \ + movi_f %R0 V \ + extr_f_d %R1 %R0 \ + beqi_d fd##_##R0##_##R1 %R1 V \ + calli @abort \ +fd##_##R0##_##R1: +#define EXTFD1(R0, R1) \ + EXTFD2(1.25, R0, R1) +#define EXTFDC2(V, R0) \ + movi_f %R0 V \ + extr_f_d %R0 %R0 \ + beqi_d fdc##_##R0 %R0 V \ + calli @abort \ +fdc##_##R0: +#define EXTFDC1(R0) \ + EXTFDC2(-0.75, R0) +#define EXTFD(R0, R1, R2, R3, R4, R5) \ + EXTFD1(R0, R1) \ + EXTFD1(R0, R2) \ + EXTFD1(R0, R3) \ + EXTFD1(R0, R4) \ + EXTFD1(R0, R5) \ + EXTFDC1(R0) \ + EXTFDC1(R1) \ + EXTFDC1(R2) \ + EXTFDC1(R3) \ + EXTFDC1(R4) \ + EXTFDC1(R5) + +#define EXTDF2(V, R0, R1) \ + movi_d %R0 V \ + extr_d_f %R1 %R0 \ + beqi_f df##_##R0##_##R1 %R1 V \ + calli @abort \ +df##_##R0##_##R1: +#define EXTDF1(R0, R1) \ + EXTDF2(1.25, R0, R1) +#define EXTDFC2(V, R0) \ + movi_d %R0 V \ + extr_d_f %R0 %R0 \ + beqi_f dfc##_##R0 %R0 V \ + calli @abort \ +dfc##_##R0: +#define EXTDFC1(R0) \ + EXTDFC2(-0.75, R0) +#define EXTDF(R0, R1, R2, R3, R4, R5) \ + EXTDF1(R0, R1) \ + EXTDF1(R0, R2) \ + EXTDF1(R0, R3) \ + EXTDF1(R0, R4) \ + EXTDF1(R0, R5) \ + EXTDFC1(R0) \ + EXTDFC1(R1) \ + EXTDFC1(R2) \ + EXTDFC1(R3) \ + EXTDFC1(R4) \ + EXTDFC1(R5) + +.code + prolog + + /* simple sequence for easier disassembly reading and encoding check */ + movi %r0 w7f + extr_c %r1 %r0 + beqi xc %r1 c7f + calli @abort +xc: + movi %r0 w7f + extr_uc %r1 %r0 + beqi xuc %r1 uc7f + calli @abort +xuc: + movi %r0 w7f + extr_s %r1 %r0 + beqi xs %r1 s7f + calli @abort +xs: + movi %r0 w7f + extr_us %r1 %r0 + beqi xus %r1 us7f + calli @abort +xus: +#if __WORDSIZE == 64 + movi %r0 w7f + extr_i %r1 %r0 + beqi xi %r1 i7f + calli @abort +xi: + movi %r0 w7f + extr_ui %r1 %r0 + beqi xui %r1 ui7f + calli @abort +xui: +#endif + movi %r0 -2 + extr_f %f0 %r0 + beqi_f xif %f0 -2 + calli @abort +xif: + movi %r0 32 + extr_d %f0 %r0 + beqi_d xid %f0 32 + calli @abort +xid: + movi_f %f0 -128 + truncr_f %r0 %f0 + beqi xfi %r0 -128 + calli @abort +xfi: + movi_d %f0 -128 + truncr_d %r0 %f0 + beqi xdi %r0 -128 + calli @abort +xdi: +#if __WORDSIZE == 64 + movi_f %f0 -128 + truncr_f_i %r0 %f0 + andi %r0 %r0 0xffffffff + beqi yfi %r0 $(-128 & 0xffffffff) + calli @abort +yfi: + movi_d %f0 -128 + truncr_d_i %r0 %f0 + andi %r0 %r0 0xffffffff + beqi ydi %r0 $(-128 & 0xffffffff) + calli @abort +ydi: +#endif + movi_f %f0 0.5 + extr_f_d %f1 %f0 + beqi_d xfd %f1 0.5 + calli @abort +xfd: + movi_d %f0 0.5 + extr_d_f %f1 %f0 + beqi_f xdf %f1 0.5 + calli @abort +xdf: + + EXTII(v0, v1, v2, r0, r1, r2) + EXIF(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5) + EXID(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5) + EXFI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5) +#if __WORDSIZE == 64 + LEXFI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5) + LEXDI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5) +#endif + EXDI(v0, v1, v2, r0, r1, r2, f0, f1, f2, f3, f4, f5) + EXTFD(f0, f1, f2, f3, f4, f5) + EXTDF(f0, f1, f2, f3, f4, f5) + + // just to know did not abort + prepare 1 + pushargi ok + finishi @printf + + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index 88d07fd5c..cf5dff1e4 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -378,7 +378,11 @@ static void ungtr_f(void); static void ungti_f(void); static void ltgtr_f(void); static void ltgti_f(void); static void ordr_f(void); static void ordi_f(void); static void unordr_f(void); static void unordi_f(void); -static void truncr_f_i(void); static void truncr_f_l(void); +static void truncr_f_i(void); +#if __WORDSIZE == 64 +static void truncr_f_l(void); +#endif +static void truncr_f(void); static void extr_f(void); static void extr_d_f(void); static void movr_f(void); static void movi_f(void); static void ldr_f(void); static void ldi_f(void); @@ -424,7 +428,11 @@ static void ungtr_d(void); static void ungti_d(void); static void ltgtr_d(void); static void ltgti_d(void); static void ordr_d(void); static void ordi_d(void); static void unordr_d(void); static void unordi_d(void); -static void truncr_d_i(void); static void truncr_d_l(void); +static void truncr_d_i(void); +#if __WORDSIZE == 64 +static void truncr_d_l(void); +#endif +static void truncr_d(void); static void extr_d(void); static void extr_f_d(void); static void movr_d(void); static void movi_d(void); static void ldr_d(void); static void ldi_d(void); @@ -661,7 +669,11 @@ static instr_t instr_vector[] = { entry(ltgtr_f), entry(ltgti_f), entry(ordr_f), entry(ordi_f), entry(unordr_f), entry(unordi_f), - entry(truncr_f_i), entry(truncr_f_l), + entry(truncr_f_i), +#if __WORDSIZE == 64 + entry(truncr_f_l), +#endif + entry(truncr_f), entry(extr_f), entry(extr_d_f), entry(movr_f), entry(movi_f), entry(ldr_f), entry(ldi_f), @@ -707,7 +719,11 @@ static instr_t instr_vector[] = { entry(ltgtr_d), entry(ltgti_d), entry(ordr_d), entry(ordi_d), entry(unordr_d), entry(unordi_d), - entry(truncr_d_i), entry(truncr_d_l), + entry(truncr_d_i), +#if __WORDSIZE == 64 + entry(truncr_d_l), +#endif + entry(truncr_d), entry(extr_d), entry(extr_f_d), entry(movr_d), entry(movi_d), entry(ldr_d), entry(ldi_d), @@ -1251,6 +1267,7 @@ movi(void) jit_gpr_t r0 = get_ireg(); ch = skipws(); switch (ch) { + case '+': case '-': case '0' ... '9': ungetch(ch); value = (void *)(long)get_uint(skip_none); @@ -1378,7 +1395,11 @@ entry_ir_fr_fr(ungtr_f) entry_ir_fr_fm(ungti_f) entry_ir_fr_fr(ltgtr_f) entry_ir_fr_fm(ltgti_f) entry_ir_fr_fr(ordr_f) entry_ir_fr_fm(ordi_f) entry_ir_fr_fr(unordr_f) entry_ir_fr_fm(unordi_f) -entry_ir_fr(truncr_f_i) entry_ir_fr(truncr_f_l) +entry_ir_fr(truncr_f_i) +#if __WORDSIZE == 64 +entry_ir_fr(truncr_f_l) +#endif +entry_ir_fr(truncr_f) entry_fr_ir(extr_f) entry_fr_fr(extr_d_f) entry_fr_fr(movr_f) entry_fr_fm(movi_f) entry_fr_ir(ldr_f) entry_fr_pm(ldi_f) @@ -1424,7 +1445,11 @@ entry_ir_fr_fr(ungtr_d) entry_ir_fr_fm(ungti_d) entry_ir_fr_fr(ltgtr_d) entry_ir_fr_fm(ltgti_d) entry_ir_fr_fr(ordr_d) entry_ir_fr_fm(ordi_d) entry_ir_fr_fr(unordr_d) entry_ir_fr_fm(unordi_d) -entry_ir_fr(truncr_d_i) entry_ir_fr(truncr_d_l) +entry_ir_fr(truncr_d_i) +#if __WORDSIZE == 64 +entry_ir_fr(truncr_d_l) +#endif +entry_ir_fr(truncr_d) entry_fr_ir(extr_d) entry_fr_fr(extr_f_d) entry_fr_fr(movr_d) entry_fr_fm(movi_d) entry_fr_ir(ldr_d) entry_fr_pm(ldi_d) diff --git a/include/lightning.h b/include/lightning.h index 8af86e188..997a936eb 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -497,7 +497,7 @@ typedef enum { #define jit_truncr_f_i(u,v) jit_new_node_ww(jit_code_truncr_f_i,u,v) jit_code_truncr_f_i, -#if __WODSIZE == 32 +#if __WORDSIZE == 32 # define jit_truncr_f(u,v) jit_truncr_f_i(u,v) #else # define jit_truncr_f(u,v) jit_truncr_f_l(u,v) @@ -642,14 +642,14 @@ typedef enum { #define jit_truncr_d_i(u,v) jit_new_node_ww(jit_code_truncr_d_i,u,v) jit_code_truncr_d_i, -#if __WODSIZE == 32 +#if __WORDSIZE == 32 # define jit_truncr_d(u,v) jit_truncr_d_i(u,v) #else # define jit_truncr_d(u,v) jit_truncr_d_l(u,v) # define jit_truncr_d_l(u,v) jit_new_node_ww(jit_code_truncr_d_l,u,v) #endif jit_code_truncr_d_l, -#define jit_extr_d(u,v) jit_new_node_ww(jit_code_extr_f,u,v) +#define jit_extr_d(u,v) jit_new_node_ww(jit_code_extr_d,u,v) #define jit_extr_f_d(u,v) jit_new_node_ww(jit_code_extr_f_d,u,v) jit_code_extr_d, jit_code_extr_f_d, #define jit_movr_d(u,v) jit_new_node_ww(jit_code_movr_d,u,v) diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index ff2b31944..948f26094 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -159,16 +159,16 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB2_REV16 0xfa90f090 # define ARM_SXTB 0x06af0070 # define THUMB_SXTB 0xb240 -# define THUMB2_SXTB 0xfa4ff080 +# define THUMB2_SXTB 0xfa40f080 # define ARM_UXTB 0x06ef0070 # define THUMB_UXTB 0xb2c0 -# define THUMB2_UXTB 0xfa5ff080 +# define THUMB2_UXTB 0xfa50f080 # define ARM_SXTH 0x06bf0070 # define THUMB_SXTH 0xb200 -# define THUMB2_SXTH 0xfa0f0080 +# define THUMB2_SXTH 0xfa00f080 # define ARM_UXTH 0x06ff0070 # define THUMB_UXTH 0xb280 -# define THUMB2_UXTH 0xfa1ff080 +# define THUMB2_UXTH 0xfa10f080 # define ARM_XTR8 0x00000400 /* ?xt? rotate 8 bits */ # define ARM_XTR16 0x00000800 /* ?xt? rotate 16 bits */ # define ARM_XTR24 0x00000c00 /* ?xt? rotate 24 bits */ diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 60b57df74..bef8d4c95 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -1348,7 +1348,7 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); iandr(r0, rn(reg)); - jit_unget_reg(r0); + jit_unget_reg(reg); } } else { @@ -1856,7 +1856,7 @@ _movcr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { rex(0, 1, r0, _NOREG, r1); ic(0x0f); - ic(0xbe); + ic(0xb6); mrm(0x03, r7(r0), r7(r1)); } diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index 89425fb7b..d67ab9dc8 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -69,9 +69,12 @@ static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) # if __WORDSIZE == 64 # define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1) # define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1) +# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1) +# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1) +# else +# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1) +# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1) # endif -# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1) -# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1) # define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1) # define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1) # define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1)