diff --git a/ChangeLog b/ChangeLog index 14772894f..06bab4642 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,60 @@ +2014-12-03 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/lightning.c: Implement the new jit_set_data() interface, + and the new jit_get_data() helper. Like jit_set_code(), + jit_realize() should be called before jit_set_data(). + The most common usage should be jit_set_data(JIT_DISABLE_DATA + | JIT_DISABLE_NOTE), to force synthesize any float/double + constant in the stack and not generate any debug information. + + * lib/jit_note.c: Minor change to debug note generation as + now it uses an alternate temporary data buffer during constants + and debug generation to accommodate the possibility of the user + setting an alternate data buffer. + + * lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c, + lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c, + lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data. + + * lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c, + lib/jit_s390x-sz.c: Update for several instructions that now + have a different maximum length due to jit_set_data. + + * lib/jit_mips-fpu.c: Implement jit_set_data, but missing + validation on n32 and n64 abis (and/or big endian). + + * lib/jit_mips-sz.c: Update for changes in o32. + + * lib/jit_ppc-fpu.c: Implement jit_set_data, but missing + validation on Darwin PPC. + + * lib/jit_ppc-sz.c: Update for changes in powerpc 32 and + 64 bit. + + * lib/jit_ia64-fpu.c: Implement untested jit_set_data. + + * TODO: Add note to list ports that were not tested for the + new jit_set_data() feature, due to no longer having access + to them. + + * check/nodata.c: New file implementing a simple test exercising + several different conditions created by jit_set_data(). + + * check/check.nodata.sh: New file implementing a wrapper + over the existing *.tst files, that runs all tests without + using a data buffer for constants; only meaningful (and + enabled) on architectures that used to store float/double + constants on a read only data buffer. + + * configure.ac, check/Makefile.am: Update for the new test + cases. + + * check/lightning.c: Implement the new "-d" option that + sets an internal flag to call jit_set_data() disable + constants and debug, that is, using only a pure code + buffer. + 2014-11-03 Paulo Andrade * include/lightning.h, include/lightning/jit_private.h, diff --git a/TODO b/TODO index 59ac79d4a..676af0293 100644 --- a/TODO +++ b/TODO @@ -6,3 +6,23 @@ * Write a simple higher level language implementation generating jit with lightning, that could be some lisp or C like language. + + * rerun ./configure --enable-devel-get-jit-size and regenerate + the related jit_$arch-sz.c for the ports where nodata is + meaningful: + hppa (done) + i586 (done) + ia64 + mips o32 (done) + mips n32 + mips n64 + powerpc 32 (done) + powerpc 64 (done) + ppc + s390x (done) + sparc (done) + x86_64 (done) + Missing ones are due to no longer (remote) access to such hosts + and may be broken with jit_set_data(..., JIT_DISABLE_DATA). + (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and + (darwin ppc). diff --git a/check/Makefile.am b/check/Makefile.am index 22a2035bf..c900fbd8a 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -16,7 +16,7 @@ AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE -check_PROGRAMS = lightning ccall self setcode +check_PROGRAMS = lightning ccall self setcode nodata lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) lightning_SOURCES = lightning.c @@ -30,6 +30,9 @@ self_SOURCES = self.c setcode_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) setcode_SOURCES = setcode.c +nodata_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) +nodata_SOURCES = nodata.c + $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la @@ -81,6 +84,8 @@ EXTRA_DIST = \ check.sh \ check.x87.sh \ check.arm.sh check.swf.sh \ + check.nodata.sh \ + check.x87.nodata.sh \ run-test all.tst base_TESTS = \ @@ -130,6 +135,28 @@ x87_TESTS = \ $(x87_TESTS): check.x87.sh $(LN_S) $(srcdir)/check.x87.sh $@ TESTS += $(x87_TESTS) + +#x87_nodata_TESTS = $(addsuffix .x87.nodata, $(base_TESTS)) +x87_nodata_TESTS = \ + 3to2.x87.nodata add.x87.nodata allocai.x87.nodata \ + bp.x87.nodata divi.x87.nodata fib.x87.nodata rpn.x87.nodata \ + ldstr.x87.nodata ldsti.x87.nodata \ + ldstxr.x87.nodata ldstxi.x87.nodata \ + ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata \ + cvt.x87.nodata branch.x87.nodata \ + alu_add.x87.nodata alux_add.x87.nodata \ + alu_sub.x87.nodata alux_sub.x87.nodata \ + alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \ + alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \ + alu_lsh.x87.nodata alu_rsh.x87.nodata \ + alu_com.x87.nodata alu_neg.x87.nodata \ + fop_abs.x87.nodata fop_sqrt.x87.nodata \ + varargs.x87.nodata stack.x87.nodata \ + clobber.x87.nodata carry.x87.nodata call.x87.nodata \ + float.x87.nodata +$(x87_nodata_TESTS): check.x87.nodata.sh + $(LN_S) $(srcdir)/check.x87.nodata.sh $@ +TESTS += $(x87_nodata_TESTS) endif if test_arm_arm @@ -180,7 +207,31 @@ $(swf_TESTS): check.swf.sh TESTS += $(swf_TESTS) endif -TESTS += ccall self setcode +if test_nodata +#nodata_TESTS = $(addsuffix .nodata, $(base_TESTS)) +nodata_TESTS = \ + 3to2.nodata add.nodata allocai.nodata \ + bp.nodata divi.nodata fib.nodata rpn.nodata \ + ldstr.nodata ldsti.nodata \ + ldstxr.nodata ldstxi.nodata \ + ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata \ + cvt.nodata branch.nodata \ + alu_add.nodata alux_add.nodata \ + alu_sub.nodata alux_sub.nodata \ + alu_mul.nodata alu_div.nodata alu_rem.nodata \ + alu_and.nodata alu_or.nodata alu_xor.nodata \ + alu_lsh.nodata alu_rsh.nodata \ + alu_com.nodata alu_neg.nodata \ + fop_abs.nodata fop_sqrt.nodata \ + varargs.nodata stack.nodata \ + clobber.nodata carry.nodata call.nodata \ + float.nodata +$(nodata_TESTS): check.nodata.sh + $(LN_S) $(srcdir)/check.nodata.sh $@ +TESTS += $(nodata_TESTS) +endif + +TESTS += ccall self setcode nodata CLEANFILES = $(TESTS) #TESTS_ENVIRONMENT=$(srcdir)/run-test; diff --git a/check/check.nodata.sh b/check/check.nodata.sh new file mode 100755 index 000000000..0fbc4e90f --- /dev/null +++ b/check/check.nodata.sh @@ -0,0 +1,15 @@ +#!/bin/sh +test=`basename $0 | sed -e 's|\.nodata$||'` +./lightning -d $srcdir/$test.tst | tr -d \\r > $test.out +if test $? != 0; then + exit $? +fi + +cmp -s $srcdir/$test.ok $test.out +result=$? +if test $result != 0; then + diff $srcdir/$test.ok $test.out + rm $test.out + exit 1 +fi +rm $test.out diff --git a/check/lightning.c b/check/lightning.c index 2cf38edc8..f0fd19be6 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -548,6 +548,7 @@ static void rehash(hash_t *hash); */ static jit_state_t *_jit; static int flag_verbose; +static int flag_data; static int flag_disasm; static char *progname; static parser_t parser; @@ -3664,6 +3665,11 @@ execute(int argc, char *argv[]) patch = next; } + if (flag_data == 0) { + jit_realize(); + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + } + function = jit_emit(); if (flag_verbose > 1 || flag_disasm) { jit_print(); @@ -3876,6 +3882,7 @@ Usage: %s [jit assembler options] file [jit program options]\n\ Jit assembler options:\n\ -help Display this information\n\ -v[0-3] Verbose output level\n\ + -d Do not use a data buffer\n\ -D[=] Preprocessor options\n" # if defined(__i386__) && __WORDSIZE == 32 " -mx87=1 Force using x87 when sse2 available\n" @@ -3906,9 +3913,10 @@ int main(int argc, char *argv[]) { #if HAVE_GETOPT_LONG_ONLY - static const char *short_options = "v::"; + static const char *short_options = "dv::"; static struct option long_options[] = { { "help", 0, 0, 'h' }, + { "data", 2, 0, 'd' }, # if defined(__i386__) && __WORDSIZE == 32 { "mx87", 2, 0, '7' }, # endif @@ -3939,6 +3947,7 @@ main(int argc, char *argv[]) DL_HANDLE = dlopen(NULL, RTLD_LAZY); #endif + flag_data = 1; #if HAVE_GETOPT_LONG_ONLY for (;;) { if ((opt_short = getopt_long_only(argc, argv, short_options, @@ -3958,6 +3967,9 @@ main(int argc, char *argv[]) else flag_verbose = 1; break; + case 'd': + flag_data = 0; + break; #if defined(__i386__) && __WORDSIZE == 32 case '7': if (optarg) { @@ -4022,9 +4034,11 @@ main(int argc, char *argv[]) } } #else - while ((opt_short = getopt(argc, argv, "hv")) >= 0) { + while ((opt_short = getopt(argc, argv, "hvd")) >= 0) { if (opt_short == 'v') ++flag_verbose; + else if (opt_short == 'd') + flag_data = 0; else usage(); } diff --git a/check/nodata.c b/check/nodata.c new file mode 100644 index 000000000..0e594c327 --- /dev/null +++ b/check/nodata.c @@ -0,0 +1,106 @@ +/* + * Simple test of using an alternate buffer for the code. + */ + +#include +#include +#include +#include +#if defined(__sgi) +# include +#endif + +#ifndef MAP_ANON +# define MAP_ANON MAP_ANONYMOUS +# ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS 0 +# endif +#endif + +#if !defined(__sgi) +#define mmap_fd -1 +#endif + +jit_uint8_t *data; +jit_state_t *_jit; +jit_word_t data_length; +jit_word_t note_length; +#if defined(__sgi) +int mmap_fd; +#endif +void (*function)(void); + +void +gencode(jit_word_t flags) +{ + jit_word_t offset; + jit_word_t length; + + _jit = jit_new_state(); + + jit_name("main"); + jit_prolog(); + jit_prepare(); + jit_pushargi((jit_word_t)"%f\n"); + jit_ellipsis(); + jit_pushargi_d(1.5); + jit_finishi(printf); + jit_note("nodata.c", __LINE__); + + /* call to jit_realize() is only required when using an alternate + * code buffer. Note that not using mmap'ed memory may not work + * on several ports and/or operating system versions */ + jit_realize(); + + if (jit_get_data(&data_length, ¬e_length) != NULL) + abort(); + + length = 0; + if (!(flags & JIT_DISABLE_DATA)) + length += data_length; + if (!(flags & JIT_DISABLE_NOTE)) + length += note_length; + + /* check that a too small buffer fails */ + if (flags) + jit_set_data(length ? data : NULL, length, flags); + + /* and calling again with enough space works */ + offset = (length + 7) & -8; + function = jit_emit(); + if (function == NULL) + abort(); + + jit_clear_state(); + (*function)(); + jit_destroy_state(); +} + +int +main(int argc, char *argv[]) +{ +#if defined(__sgi) + mmap_fd = open("/dev/zero", O_RDWR); +#endif + + data = mmap(NULL, 4096, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + assert(data != MAP_FAILED); +#if defined(__sgi) + close(mmap_fd); +#endif + + init_jit(argv[0]); + + gencode(0); + gencode(JIT_DISABLE_DATA); + gencode(JIT_DISABLE_NOTE); + gencode(JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + finish_jit(); + + munmap(data, 4096); + + return (0); +} diff --git a/configure.ac b/configure.ac index 498d1c1cd..3901d7dd5 100644 --- a/configure.ac +++ b/configure.ac @@ -221,7 +221,9 @@ AM_CONDITIONAL(test_x86_x87, [test x$ac_cv_test_x86_x87 = xyes]) AM_CONDITIONAL(test_arm_arm, [test x$ac_cv_test_arm_arm = xyes]) AM_CONDITIONAL(test_arm_swf, [test x$ac_cv_test_arm_swf = xyes]) -if test $cpu=arm; then +AM_CONDITIONAL(test_nodata, [test cpu-$cpu = cpu-mips -o cpu-$cpu = cpu-ppc -o cpu-$cpu = cpu-sparc -o cpu-$cpu = cpu-x86 -o cpu-$cpu = cpu-ia64 -o cpu-$cpu = cpu-hppa -o cpu-$cpu = cpu-s390x]) + +if test $cpu = arm; then AC_CHECK_LIB(m, sqrtf, , [AC_MSG_ERROR([sqrtf required but not available])]) fi diff --git a/include/lightning.h b/include/lightning.h index 84521c39a..2f6d43f94 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -152,6 +152,9 @@ typedef jit_int32_t jit_fpr_t; #define JIT_V_NUM jit_v_num() #define JIT_F_NUM jit_f_num() +#define JIT_DISABLE_DATA 1 /* force synthesize of constants */ +#define JIT_DISABLE_NOTE 2 /* disable debug info generation */ + #define jit_class_chk 0x02000000 /* just checking */ #define jit_class_arg 0x08000000 /* argument register */ #define jit_class_sav 0x10000000 /* callee save */ @@ -893,6 +896,10 @@ extern void _jit_realize(jit_state_t*); extern jit_pointer_t _jit_get_code(jit_state_t*, jit_word_t*); #define jit_set_code(u,v) _jit_set_code(_jit,u,v) extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t); +#define jit_get_data(u,v) _jit_get_data(_jit,u,v) +extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*); +#define jit_set_data(u,v,w) _jit_set_data(_jit,u,v,w) +extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 1759bbe69..ed543bc07 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -337,6 +337,7 @@ struct jit_compiler { jit_node_t *head; jit_node_t *tail; jit_uint32_t realize : 1; /* jit_realize() called? */ + jit_uint32_t dataset : 1; /* jit_dataset() called? */ jit_uint32_t done : 1; /* emit state finished */ jit_uint32_t emit : 1; /* emit state entered */ jit_uint32_t again : 1; /* start over emiting function */ @@ -344,6 +345,8 @@ struct jit_compiler { #if DEBUG jit_uint32_t getreg : 1; #endif + jit_uint32_t no_data : 1; + jit_uint32_t no_note : 1; jit_int32_t reglen; /* number of registers */ jit_regset_t regarg; /* cannot allocate */ jit_regset_t regsav; /* automatic spill only once */ @@ -357,6 +360,7 @@ struct jit_compiler { jit_uint8_t *end; } code; struct { + jit_uint8_t *ptr; jit_node_t **table; /* very simple hash table */ jit_word_t size; /* number of vectors in table */ jit_word_t count; /* number of hash table entries */ @@ -467,6 +471,7 @@ struct jit_state { jit_compiler_t *comp; /* Flags to know if user did set the code and data buffers */ jit_uint32_t user_code : 1; + jit_uint32_t user_data : 1; }; struct jit_register { diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c index 0421de1df..c1861b1fa 100644 --- a/lib/jit_hppa-fpu.c +++ b/lib/jit_hppa-fpu.c @@ -680,13 +680,47 @@ _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { - ldi_f(r0, (jit_word_t)i0); + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + stxi_i(alloca_offset - 8, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_f(r0, _FP_REGNO, alloca_offset - 8); + } + else + ldi_f(r0, (jit_word_t)i0); } static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { - ldi_d(r0, (jit_word_t)i0); + union { + jit_int32_t ii[2]; + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + + data.d = *i0; + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.ii[0]); + stxi_i(alloca_offset - 8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(alloca_offset - 4, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_d(r0, _FP_REGNO, alloca_offset - 8); + } + else + ldi_d(r0, (jit_word_t)i0); } #define fpr_opi(name, type, size) \ diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 392674e64..ab8c77d31 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -256,50 +256,50 @@ 28, 0, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 4, 4, 4, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 12, 0, 12, 4, 4, - 12, + 20, 4, 12, 4, @@ -309,33 +309,33 @@ 8, 4, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 0, 0, 0, diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index f902df13f..71e325a40 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -431,8 +431,10 @@ static void F16_(jit_state_t*,jit_word_t, #define movr_f(r0,r1) movr_d(r0,r1) #define movr_d(r0,r1) MOVF(r0,r1) -#define movi_f(r0,i0) ldi_f(r0,(jit_word_t)i0) -#define movi_d(r0,i0) ldi_d(r0,(jit_word_t)i0) +#define movi_f(r0,i0) _movi_f(_jit,r0,i0) +static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*); +#define movi_d(r0,i0) _movi_d(_jit,r0,i0) +static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*); #define movr_f_w(r0,r1) _movr_f_w(_jit,r0,r1) static void _movr_f_w(jit_state_t*,jit_int32_t,jit_int32_t); #define movr_d_w(r0,r1) _movr_d_w(_jit,r0,r1) @@ -999,6 +1001,46 @@ F16_(jit_state_t* _jit, jit_word_t _p, inst((((im>>20)&1L)<<36)|(y<<27)|(1L<<26)|((im&0xffffL)<<6)|_p, INST_F); } +static void +_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i & 0xffffffff); + SETF_S(r0, rn(reg)); + jit_unget_reg(reg); + } + else + ldi_f(r0, (jit_word_t)i0); +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +{ + union { + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.w); + SETF_D(r0, rn(reg)); + jit_unget_reg(reg); + } + else + ldi_d(r0, (jit_word_t)i0); +} + static void _movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c index 7154359f0..0a56d5f4b 100644 --- a/lib/jit_mips-fpu.c +++ b/lib/jit_mips-fpu.c @@ -726,10 +726,19 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) jit_int32_t i; jit_float32_t f; } data; + jit_int32_t reg; data.f = *i0; - if (data.i) - ldi_f(r0, (jit_word_t)i0); + if (data.i) { + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + MTC1(rn(reg), r0); + jit_unget_reg(reg); + } + else + ldi_f(r0, (jit_word_t)i0); + } else MTC1(_ZERO_REGNO, r0); } @@ -970,22 +979,62 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) jit_int64_t l; jit_float64_t d; } data; + jit_int32_t reg; data.d = *i0; # if __WORDSIZE == 64 || NEW_ABI - if (data.l) - ldi_d(r0, (jit_word_t)i0); + if (data.l) { + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 64 + movi(rn(reg), data.l); + DMTC1(rn(reg), r0); +# else + if (data.i[0]) { + movi(rn(reg), data.i[0]); + MTC1(rn(reg), r0); + } + else + MTC1(_ZERO_REGNO, r0); + if (data.i[1]) { + movi(rn(reg), data.i[1]); + MTC1(rn(reg), r0 + 1); + } + else + MTC1(_ZERO_REGNO, r0 + 1); +# endif + jit_unget_reg(reg); + } + else + ldi_d(r0, (jit_word_t)i0); + } else DMTC1(_ZERO_REGNO, r0); # else - if (data.i[0]) - ldi_f(r0, (jit_word_t)i0); + if (_jitc->no_data) + reg = jit_get_reg(jit_class_gpr); + if (data.i[0]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[0]); + MTC1(rn(reg), r0); + } + else + ldi_f(r0, (jit_word_t)i0); + } else MTC1(_ZERO_REGNO, r0); - if (data.i[1]) - ldi_f(r0 + 1, ((jit_word_t)i0) + 4); + if (data.i[1]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[1]); + MTC1(rn(reg), r0 + 1); + } + else + ldi_f(r0 + 1, ((jit_word_t)i0) + 4); + } else MTC1(_ZERO_REGNO, r0 + 1); + if (_jitc->no_data) + jit_unget_reg(reg); # endif } diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index 48c061079..69f6dcf26 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -700,7 +700,7 @@ 0, 0, 0, -#endif /* NEW_ABI */ +#endif /* NEW_ABI */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 diff --git a/lib/jit_note.c b/lib/jit_note.c index 474a8db4a..412eba3de 100644 --- a/lib/jit_note.c +++ b/lib/jit_note.c @@ -90,7 +90,7 @@ _jit_note(jit_state_t *_jit, char *name, int line) (name == NULL && _jitc->note.note != NULL) || (name != NULL && _jitc->note.note == NULL) || (name != NULL && _jitc->note.note != NULL && - strcmp(name, (char *)_jit->data.ptr + _jitc->note.note->v.n->u.w))) + strcmp(name, (char *)_jitc->data.ptr + _jitc->note.note->v.n->u.w))) _jitc->note.size += sizeof(jit_line_t); _jitc->note.size += sizeof(jit_int32_t) * 2; return (_jitc->note.note = node); diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c index ace978753..2bef7e31a 100644 --- a/lib/jit_ppc-fpu.c +++ b/lib/jit_ppc-fpu.c @@ -439,13 +439,51 @@ _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { - ldi_f(r0, (jit_word_t)i0); + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(alloca_offset - 4, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_f(r0, _FP_REGNO, alloca_offset - 4); + } + else + ldi_f(r0, (jit_word_t)i0); } static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { - ldi_d(r0, (jit_word_t)i0); + union { + jit_int32_t i[2]; + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 32 + movi(rn(reg), data.i[0]); + stxi(alloca_offset - 8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.i[1]); + stxi(alloca_offset - 4, _FP_REGNO, rn(reg)); +# else + movi(rn(reg), data.w); + stxi(alloca_offset - 8, _FP_REGNO, rn(reg)); +# endif + jit_unget_reg(reg); + ldxi_d(r0, _FP_REGNO, alloca_offset - 8); + } + else + ldi_d(r0, (jit_word_t)i0); } /* should only work on newer ppc (fcfid is a ppc64 instruction) */ diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index f1652372e..e0c53b9e1 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -527,50 +527,50 @@ 72, 0, 4, - 12, + 16, 4, - 12, + 16, 4, - 12, + 16, 4, - 12, + 16, 4, 4, 4, 12, - 20, - 16, 24, + 16, + 28, 12, - 20, - 16, 24, + 16, + 28, 12, - 20, - 16, 24, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, + 16, + 28, 12, - 20, + 24, 12, 0, 20, 4, 4, - 8, + 12, 4, 8, 4, @@ -580,79 +580,79 @@ 4, 4, 8, - 16, - 12, - 20, - 8, - 16, - 12, - 20, - 8, - 16, - 8, - 16, - 12, - 20, - 8, - 16, - 12, - 20, - 8, - 16, - 12, 20, 12, + 24, + 8, + 20, + 12, + 24, + 8, 20, 8, - 16, + 20, + 12, + 24, 8, - 16, + 20, + 12, + 24, + 8, + 20, + 12, + 24, + 12, + 24, + 8, + 20, + 8, + 20, 0, 4, - 12, + 24, 4, - 12, + 24, 4, - 12, + 24, 4, - 12, + 24, 4, 4, 4, 12, - 20, + 32, 16, - 24, + 36, 12, - 20, + 32, 16, - 24, + 36, 12, - 20, + 32, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 12, - 20, + 32, 12, 0, 20, 4, 4, - 8, + 24, 4, 8, 4, @@ -662,33 +662,33 @@ 4, 4, 8, - 16, + 28, 12, - 20, + 32, 8, - 16, + 32, 12, - 20, + 32, 8, - 16, + 28, 8, - 16, + 28, 12, - 20, + 32, 8, - 16, + 28, 12, - 20, + 32, 8, - 16, + 28, 12, - 20, + 32, 12, - 20, + 32, 8, - 16, + 28, 8, - 16, + 28, 0, 0, 0, @@ -972,39 +972,39 @@ 4, 4, 12, - 36, - 16, 40, + 16, + 44, 12, - 36, - 16, 40, + 16, + 44, 12, - 36, - 16, 40, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, + 16, + 44, 12, - 36, + 40, 12, 12, 12, 4, 4, - 24, + 32, 4, 24, 4, @@ -1018,13 +1018,13 @@ 12, 36, 8, - 32, + 40, 12, + 40, + 8, 36, 8, - 32, - 8, - 32, + 36, 12, 36, 8, @@ -1032,13 +1032,13 @@ 12, 36, 8, - 32, - 12, 36, 12, - 36, + 40, + 12, + 40, 8, - 32, + 36, 8, 32, 0, diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c index 343a39a27..fdceaee65 100644 --- a/lib/jit_s390x-cpu.c +++ b/lib/jit_s390x-cpu.c @@ -26,6 +26,7 @@ # define _R1_REGNO 1 # define _R7_REGNO 7 # define _R13_REGNO 13 +# define _FP_REGNO _R13_REGNO # define _R14_REGNO 14 # define _R15_REGNO 15 # define u12_p(i0) ((i0) >= 0 && (i0) <= 4095) diff --git a/lib/jit_s390x-fpu.c b/lib/jit_s390x-fpu.c index e263f78ba..d5b681daa 100644 --- a/lib/jit_s390x-fpu.c +++ b/lib/jit_s390x-fpu.c @@ -852,8 +852,22 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + if (*(jit_int32_t *)i0 == 0) LZER(r0); + else if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg_but_zero(); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(-4, _FP_REGNO, rn(reg)); + jit_unget_reg_but_zero(reg); + ldxi_f(r0, _FP_REGNO, -4); + } else ldi_f(r0, (jit_word_t)i0); } @@ -868,8 +882,22 @@ _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { + union { + jit_int64_t l; + jit_float64_t d; + } data; + jit_int32_t reg; + if (*(jit_int64_t *)i0 == 0) LZDR(r0); + else if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg_but_zero(); + movi(rn(reg), data.l); + stxi_l(-8, _FP_REGNO, rn(reg)); + jit_unget_reg_but_zero(reg); + ldxi_d(r0, _FP_REGNO, -8); + } else ldi_d(r0, (jit_word_t)i0); } diff --git a/lib/jit_s390x-sz.c b/lib/jit_s390x-sz.c index c12d6aff1..e7b8ea290 100644 --- a/lib/jit_s390x-sz.c +++ b/lib/jit_s390x-sz.c @@ -174,50 +174,50 @@ 36, 0, 6, - 16, + 26, 8, - 16, + 26, 6, - 16, + 26, 8, - 16, + 26, 4, 4, 4, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 4, 4, 4, 4, 2, - 10, + 20, 4, 16, 12, @@ -227,79 +227,79 @@ 12, 4, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 0, 6, - 16, + 26, 8, - 16, + 26, 6, - 16, + 26, 8, - 16, + 26, 4, 4, 4, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 4, 4, 4, 4, 2, - 10, + 24, 4, 16, 12, @@ -309,33 +309,33 @@ 12, 4, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 34, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 0, 0, 0, diff --git a/lib/jit_s390x.c b/lib/jit_s390x.c index 26f286704..fb6d4e483 100644 --- a/lib/jit_s390x.c +++ b/lib/jit_s390x.c @@ -116,8 +116,10 @@ _jit_prolog(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; _jitc->function->self.size = stack_framesize; _jitc->function->self.argi = _jitc->function->self.argf = - _jitc->function->self.aoff = _jitc->function->self.alen = - _jitc->function->self.aoff = 0; + _jitc->function->self.aoff = _jitc->function->self.alen = 0; + /* preallocate 8 bytes if not using a constant data buffer */ + if (_jitc->no_data) + _jitc->function->self.aoff = -8; _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -592,18 +594,6 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), node->v.w); \ break -#define case_rf(name) \ - case jit_code_##name##i_f: \ - assert_data(node); \ - name##_f(rn(node->u.w), \ - (jit_float32_t *)node->v.n->u.w); \ - break -#define case_rd(name) \ - case jit_code_##name##i_d: \ - assert_data(node); \ - name##_d(rn(node->u.w), \ - (jit_float64_t *)node->v.n->u.w); \ - break #define case_wr(name, type) \ case jit_code_##name##i##type: \ name##i##type(node->u.w, rn(node->v.w)); \ diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index 94fed634c..52d760d9f 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -150,7 +150,8 @@ static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t); # define extr_d_f(r0, r1) FDTOS(r1, r0) -# define movi_f(r0, i0) ldi_f(r0, (jit_word_t)i0) +# define movi_f(r0, i0) _movi_f(_jit, r0, i0) +static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); # define movr_f(r0, r1) FMOVS(r1, r0) # define negr_f(r0, r1) FNEGS(r1, r0) # define absr_f(r0, r1) FABSS(r1, r0) @@ -162,7 +163,8 @@ static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_d_i(r0, r1) _truncr_d_i(_jit, r0, r1) static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t); # define extr_f_d(r0, r1) FSTOD(r1, r0) -# define movi_d(r0, i0) ldi_d(r0, (jit_word_t)i0) +# define movi_d(r0, i0) _movi_d(_jit, r0, i0) +static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*); # define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define negr_d(r0, r1) _negr_d(_jit, r0, r1) @@ -381,6 +383,50 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, ii(v.v); } +static void +_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(-8, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_f(r0, _FP_REGNO, -8); + } + else + ldi_f(r0, (jit_word_t)i0); +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +{ + union { + jit_int32_t i[2]; + jit_float64_t d; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i[0]); + stxi_i(-8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.i[1]); + stxi_i(-4, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_d(r0, _FP_REGNO, -8); + } + else + ldi_d(r0, (jit_word_t)i0); +} + static void _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 9e004c0ab..6056af221 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 28 +#define JIT_INSTR_MAX 40 0, 0, 0, @@ -185,39 +185,39 @@ 4, 4, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 12, 0, 12, 4, 4, - 12, + 16, 4, 12, 4, @@ -237,69 +237,69 @@ 12, 24, 12, - 24, + 28, + 12, + 28, + 12, + 28, + 12, + 28, + 12, + 28, + 12, + 28, 12, 24, 12, 24, 12, - 24, - 12, - 24, - 12, - 24, - 12, - 24, - 12, - 24, - 12, - 24, + 28, 0, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 8, 8, 4, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 12, 0, 16, 4, 8, - 12, + 24, 4, 12, 4, @@ -309,33 +309,33 @@ 4, 4, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 36, 0, 0, 0, diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index c22076aa1..f1f4bffc5 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -624,10 +624,17 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(node->u.w, rn(node->v.w)); \ break -#define case_rf(name, type) \ +#define case_rf(name) \ case jit_code_##name##i##type: \ assert(node->flag & jit_flag_data); \ - name##i##type(rn(node->u.w), node->v.n->u.w); \ + name##_f(rn(node->u.w), \ + (jit_float32_t *)node->v.n->u.w); \ + break +#define case_rd(name) \ + case jit_code_##name##i_d: \ + assert(node->flag & jit_flag_data); \ + name##_d(rn(node->u.w), \ + (jit_float64_t *)node->v.n->u.w); \ break #define case_rrr(name, type) \ case jit_code_##name##r##type: \ @@ -930,7 +937,10 @@ _emit_code(jit_state_t *_jit) case_rrr(stx, _f); case_wrr(stx, _f); case_rr(mov, _f); - case_rf(mov, _f); + case jit_code_movi_f: + assert(node->flag & jit_flag_data); + movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w); + break; case_brr(blt, _f); case_brf(blt, _f, 32); case_brr(ble, _f); @@ -1009,7 +1019,10 @@ _emit_code(jit_state_t *_jit) case_rrr(stx, _d); case_wrr(stx, _d); case_rr(mov, _d); - case_rf(mov, _d); + case jit_code_movi_d: + assert(node->flag & jit_flag_data); + movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w); + break; case_brr(blt, _d); case_brf(blt, _d, 64); case_brr(ble, _d); diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index 5082165a4..ca5f8a4f6 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -765,24 +765,27 @@ _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) jit_int32_t i; jit_float32_t f; } data; + jit_int32_t reg; + jit_bool_t ldi; data.f = *i0; if (data.f == 0.0 && !(data.i & 0x80000000)) xorpsr(r0, r0); else { + ldi = !_jitc->no_data; #if __WORDSIZE == 64 - if (can_sign_extend_int_p((jit_word_t)i0)) + /* if will allocate a register for offset, just use immediate */ + if (ldi && !can_sign_extend_int_p((jit_word_t)i0)) + ldi = 0; +#endif + if (ldi) sse_ldi_f(r0, (jit_word_t)i0); else { - /* if will allocate a register for offset, just use immediate */ - jit_int32_t reg = jit_get_reg(jit_class_gpr); + reg = jit_get_reg(jit_class_gpr); movi(rn(reg), data.i); movdlxr(r0, rn(reg)); jit_unget_reg(reg); } -#else - sse_ldi_f(r0, (jit_word_t)i0); -#endif } } @@ -1209,24 +1212,36 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) jit_word_t w; jit_float64_t d; } data; + jit_int32_t reg; + jit_bool_t ldi; data.d = *i0; if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) xorpdr(r0, r0); else { + ldi = !_jitc->no_data; #if __WORDSIZE == 64 - if (can_sign_extend_int_p((jit_word_t)i0)) + /* if will allocate a register for offset, just use immediate */ + if (ldi && !can_sign_extend_int_p((jit_word_t)i0)) + ldi = 0; +#endif + if (ldi) sse_ldi_d(r0, (jit_word_t)i0); else { - /* if will allocate a register for offset, just use immediate */ - jit_int32_t reg = jit_get_reg(jit_class_gpr); + reg = jit_get_reg(jit_class_gpr); +#if __WORDSIZE == 64 movi(rn(reg), data.w); movdqxr(r0, rn(reg)); jit_unget_reg(reg); - } #else - sse_ldi_d(r0, (jit_word_t)i0); + movi(rn(reg), data.ii[0]); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); #endif + } } } diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index ea766361c..2e76e0cf2 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 26 +#define JIT_INSTR_MAX 42 0, 0, 0, @@ -169,141 +169,59 @@ 9, 0, 5, - 2, - 5, + 4, + 7, 24, 0, 8, - 16, - 12, - 16, - 8, - 16, - 12, - 16, - 12, - 12, - 6, - 13, - 21, - 13, - 21, - 15, - 22, - 13, - 21, - 13, - 21, - 18, - 25, - 13, - 21, - 13, - 21, - 13, - 20, - 13, - 21, - 13, - 21, - 13, - 20, - 13, - 20, - 13, - 20, - 7, - 0, - 7, - 4, - 10, - 8, - 4, - 8, - 5, - 5, - 6, - 10, - 7, - 7, - 10, - 18, - 10, - 18, - 12, - 20, - 10, - 18, - 10, - 18, - 13, - 21, - 10, - 18, - 10, - 18, - 10, - 18, - 10, - 18, - 10, - 18, - 10, - 18, - 10, 17, - 10, - 18, - 0, - 8, - 16, 12, - 16, + 17, 8, - 16, + 17, + 12, + 17, + 12, 12, - 16, - 18, - 13, 6, 13, - 21, + 27, 13, - 21, + 27, 15, - 23, + 29, 13, - 21, + 27, 13, - 21, + 27, 18, - 26, + 32, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, - 7, + 27, + 8, 0, - 7, + 8, 4, 10, - 8, + 15, 4, 8, 5, - 8, + 5, 6, 10, 7, @@ -337,6 +255,87 @@ 10, 18, 0, + 8, + 26, + 12, + 26, + 8, + 26, + 12, + 26, + 18, + 13, + 6, + 13, + 37, + 13, + 37, + 15, + 39, + 13, + 37, + 13, + 37, + 18, + 42, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 8, + 0, + 8, + 4, + 10, + 24, + 4, + 8, + 5, + 8, + 6, + 10, + 7, + 7, + 10, + 28, + 10, + 28, + 12, + 30, + 10, + 28, + 10, + 28, + 13, + 31, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, 0, 0, 0, @@ -345,7 +344,8 @@ 0, 0, 0, - 14, + 0, + 10, 10, #endif /* __WORDSIZE */ diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c index 253f6af8d..5f8e014c7 100644 --- a/lib/jit_x86-x87.c +++ b/lib/jit_x86-x87.c @@ -810,6 +810,7 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) jit_int32_t i; jit_float32_t f; } data; + jit_int32_t reg; data.f = *i0; if (data.f == 0.0 && !(data.i & 0x80000000)) @@ -827,7 +828,15 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) else if (data.f == 0.6931471805599453094172323683399f) fldln2(); else { - x87_ldi_f(r0, (jit_word_t)i0); + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET); + } + else + x87_ldi_f(r0, (jit_word_t)i0); return; } fstpr(r0 + 1); @@ -897,7 +906,7 @@ _x87_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_str_f(rn(reg), r0); jit_unget_reg(reg); } @@ -928,7 +937,7 @@ _x87_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_stxr_f(rn(reg), r0, r1); jit_unget_reg(reg); } @@ -966,6 +975,7 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) jit_word_t w; jit_float64_t d; } data; + jit_int32_t reg; data.d = *i0; if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) @@ -983,7 +993,22 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) else if (data.d == 0.6931471805599453094172323683399) fldln2(); else { - x87_ldi_d(r0, (jit_word_t)i0); + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); +#if __WORDSIZE == 32 + movi(rn(reg), data.ii[0]); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); +#else + movi(rn(reg), data.w); + stxi_l(CVT_OFFSET, _RBP_REGNO, rn(reg)); +#endif + jit_unget_reg(reg); + x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); + } + else + x87_ldi_d(r0, (jit_word_t)i0); return; } fstpr(r0 + 1); @@ -1138,7 +1163,7 @@ _x87_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_str_d(rn(reg), r0); jit_unget_reg(reg); } @@ -1169,7 +1194,7 @@ _x87_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_stxr_d(rn(reg), r0, r1); jit_unget_reg(reg); } diff --git a/lib/lightning.c b/lib/lightning.c index ee21b1212..fa7743036 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -80,6 +80,10 @@ static void _bmp_clr(jit_state_t*, jit_word_t) maybe_unused; #define bmp_tst(bit) _bmp_tst(_jit, bit) static jit_bool_t _bmp_tst(jit_state_t*, jit_word_t); +#define jit_dataset() _jit_dataset(_jit) +static void +_jit_dataset(jit_state_t *_jit); + #define jit_setup(block) _jit_setup(_jit, block) static void _jit_setup(jit_state_t *_jit, jit_block_t *block); @@ -570,10 +574,10 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, size = (_jit->data.length + length + 4096) & - 4095; assert(size >= _jit->data.length); - if (_jit->data.ptr == NULL) - jit_alloc((jit_pointer_t *)&_jit->data.ptr, size); + if (_jitc->data.ptr == NULL) + jit_alloc((jit_pointer_t *)&_jitc->data.ptr, size); else - jit_realloc((jit_pointer_t *)&_jit->data.ptr, + jit_realloc((jit_pointer_t *)&_jitc->data.ptr, _jit->data.length, size); _jit->data.length = size; } @@ -585,7 +589,7 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, node = _jitc->data.table[key]; for (; node; node = node->next) { if (node->v.w == length && - memcmp(_jit->data.ptr + node->u.w, data, length) == 0) + memcmp(_jitc->data.ptr + node->u.w, data, length) == 0) break; } @@ -608,7 +612,7 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, } node->u.w = _jitc->data.offset; node->v.w = length; - memcpy(_jit->data.ptr + _jitc->data.offset, data, length); + memcpy(_jitc->data.ptr + _jitc->data.offset, data, length); _jitc->data.offset += length; node->next = _jitc->data.table[key]; @@ -630,7 +634,7 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, temp = _jitc->data.table[i]; for (; temp; temp = next) { next = temp->next; - key = hash_data(_jit->data.ptr + temp->u.w, temp->v.w) & + key = hash_data(_jitc->data.ptr + temp->u.w, temp->v.w) & ((_jitc->data.size << 1) - 1); temp->next = hash[key]; hash[key] = temp; @@ -889,7 +893,8 @@ _jit_destroy_state(jit_state_t *_jit) { if (!_jit->user_code) munmap(_jit->code.ptr, _jit->code.length); - munmap(_jit->data.ptr, _jit->data.length); + if (!_jit->user_data) + munmap(_jit->data.ptr, _jit->data.length); jit_free((jit_pointer_t *)&_jit); } @@ -1417,15 +1422,11 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) void _jit_optimize(jit_state_t *_jit) { - jit_uint8_t *ptr; jit_bool_t jump; jit_int32_t mask; jit_node_t *node; jit_block_t *block; jit_word_t offset; -#if defined(__sgi) - int mmap_fd; -#endif _jitc->function = NULL; @@ -1534,38 +1535,6 @@ _jit_optimize(jit_state_t *_jit) break; } } - - /* ensure it is aligned */ - _jitc->data.offset = (_jitc->data.offset + 7) & -8; - - /* create read only data buffer */ - _jit->data.length = (_jitc->data.offset + - /* reserve space for annotations */ - _jitc->note.size + 4095) & -4096; -#if defined(__sgi) - mmap_fd = open("/dev/zero", O_RDWR); -#endif - ptr = mmap(NULL, _jit->data.length, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); - assert(ptr != MAP_FAILED); -#if defined(__sgi) - close(mmap_fd); -#endif - memcpy(ptr, _jit->data.ptr, _jitc->data.offset); - jit_free((jit_pointer_t *)&_jit->data.ptr); - _jit->data.ptr = ptr; - - /* to be filled with note contents once offsets are known */ - _jitc->note.base = ptr + _jitc->data.offset; - memset(_jitc->note.base, 0, _jit->data.length - _jitc->data.offset); - - for (offset = 0; offset < _jitc->data.size; offset++) { - for (node = _jitc->data.table[offset]; node; node = node->next) { - node->flag |= jit_flag_patch; - node->u.w = (jit_word_t)(_jit->data.ptr + node->u.w); - } - } } void @@ -1713,6 +1682,9 @@ _jit_realize(jit_state_t *_jit) jit_optimize(); _jitc->realize = 1; + /* ensure it is aligned */ + _jitc->data.offset = (_jitc->data.offset + 7) & -8; + #if GET_JIT_SIZE /* Heuristic to guess code buffer size */ _jitc->mult = 4; @@ -1722,6 +1694,68 @@ _jit_realize(jit_state_t *_jit) #endif } +void +_jit_dataset(jit_state_t *_jit) +{ + jit_uint8_t *ptr; + jit_node_t *node; + jit_word_t offset; +#if defined(__sgi) + int mmap_fd; +#endif + + assert(!_jitc->dataset); + if (!_jit->user_data) { + + /* create read only data buffer */ + _jit->data.length = (_jitc->data.offset + + /* reserve space for annotations */ + _jitc->note.size + 4095) & -4096; +#if defined(__sgi) + mmap_fd = open("/dev/zero", O_RDWR); +#endif + _jit->data.ptr = mmap(NULL, _jit->data.length, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + assert(_jit->data.ptr != MAP_FAILED); +#if defined(__sgi) + close(mmap_fd); +#endif + } + + if (!_jitc->no_data) + memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset); + + if (_jitc->no_note) { + _jit->note.length = 0; + _jitc->note.size = 0; + } + else { + _jitc->note.base = _jit->data.ptr; + if (!_jitc->no_data) + _jitc->note.base += _jitc->data.offset; + memset(_jitc->note.base, 0, _jitc->note.size); + } + + if (_jit->user_data) + /* Need the temporary hashed data until jit_emit is finished */ + ptr = _jitc->no_data ? _jitc->data.ptr : _jit->data.ptr; + else { + ptr = _jit->data.ptr; + /* Temporary hashed data no longer required */ + jit_free((jit_pointer_t *)&_jitc->data.ptr); + } + + for (offset = 0; offset < _jitc->data.size; offset++) { + for (node = _jitc->data.table[offset]; node; node = node->next) { + node->flag |= jit_flag_patch; + node->u.w = (jit_word_t)(ptr + node->u.w); + } + } + + _jitc->dataset = 1; +} + jit_pointer_t _jit_get_code(jit_state_t *_jit, jit_word_t *length) { @@ -1747,6 +1781,39 @@ _jit_set_code(jit_state_t *_jit, jit_pointer_t ptr, jit_word_t length) _jit->user_code = 1; } +jit_pointer_t +_jit_get_data(jit_state_t *_jit, jit_word_t *data_size, jit_word_t *note_size) +{ + assert(_jitc->realize); + if (data_size) + *data_size = _jitc->data.offset; + if (note_size) + *note_size = _jitc->note.size; + return (_jit->data.ptr); +} + +void +_jit_set_data(jit_state_t *_jit, jit_pointer_t ptr, + jit_word_t length, jit_word_t flags) +{ + assert(_jitc->realize); + if (flags & JIT_DISABLE_DATA) + _jitc->no_data = 1; + else + assert(length >= _jitc->data.offset); + if (flags & JIT_DISABLE_NOTE) + _jitc->no_note = 1; + else { + if (flags & JIT_DISABLE_DATA) + assert(length >= _jitc->note.size); + else + assert(length >= _jitc->data.offset + _jitc->note.size); + } + _jit->data.ptr = ptr; + _jit->data.length = length; + _jit->user_data = 1; +} + jit_pointer_t _jit_emit(jit_state_t *_jit) { @@ -1761,6 +1828,9 @@ _jit_emit(jit_state_t *_jit) if (!_jitc->realize) jit_realize(); + if (!_jitc->dataset) + jit_dataset(); + _jitc->emit = 1; if (!_jit->user_code) { @@ -1829,10 +1899,15 @@ _jit_emit(jit_state_t *_jit) #endif _jitc->done = 1; - jit_annotate(); + if (!_jitc->no_note) + jit_annotate(); - result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); - assert(result == 0); + if (_jit->user_data) + jit_free((jit_pointer_t *)&_jitc->data.ptr); + else { + result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); + assert(result == 0); + } if (!_jit->user_code) { result = mprotect(_jit->code.ptr, _jit->code.length, PROT_READ | PROT_EXEC);