From 33ee2337c7cad06b40fa1179f97e0c6ef2bd3dc9 Mon Sep 17 00:00:00 2001 From: pcpa Date: Wed, 12 Mar 2014 14:50:31 -0300 Subject: [PATCH] Implement the new jit_set_data interface. * include/lightning.h, include/lightning/jit_private.h, lib/lightning.c: Implement the new jit_set_data() interface, and the new jit_get_data() helper. Like jit_set_code(), jit_realize() should be called before jit_set_data(). The most common usage should be jit_set_data(JIT_DISABLE_DATA | JIT_DISABLE_NOTE), to force synthesize any float/double constant in the stack and not generate any debug information. * lib/jit_note.c: Minor change to debug note generation as now it uses an alternate temporary data buffer during constants and debug generation to accommodate the possibility of the user setting an alternate data buffer. * lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data. * lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c, lib/jit_s390x-sz.c: Update for several instructions that now have a different maximum length due to jit_set_data. * lib/jit_mips-fpu.c: Implement jit_set_data, but missing validation on n32 and n64 abis (and/or big endian). * lib/jit_mips-sz.c: Update for changes in o32. * lib/jit_ppc-fpu.c: Implement jit_set_data, but missing validation on Darwin PPC. * lib/jit_ppc-sz.c: Update for changes in powerpc 32 and 64 bit. * lib/jit_ia64-fpu.c: Implement untested jit_set_data. * TODO: Add note to list ports that were not tested for the new jit_set_data() feature, due to no longer having access to them. * check/nodata.c: New file implementing a simple test exercising several different conditions created by jit_set_data(). * check/check.nodata.sh: New file implementing a wrapper over the existing *.tst files, that runs all tests without using a data buffer for constants; only meaningful (and enabled) on architectures that used to store float/double constants on a read only data buffer. * configure.ac, check/Makefile.am: Update for the new test cases. * check/lightning.c: Implement the new "-d" option that sets an internal flag to call jit_set_data() disable constants and debug, that is, using only a pure code buffer. --- ChangeLog | 57 +++++++++ TODO | 20 +++ check/Makefile.am | 55 +++++++- check/check.nodata.sh | 15 +++ check/lightning.c | 18 ++- check/nodata.c | 106 ++++++++++++++++ configure.ac | 4 +- include/lightning.h | 7 + include/lightning/jit_private.h | 5 + lib/jit_hppa-fpu.c | 38 +++++- lib/jit_hppa-sz.c | 66 +++++----- lib/jit_ia64-fpu.c | 46 ++++++- lib/jit_mips-fpu.c | 65 ++++++++-- lib/jit_mips-sz.c | 2 +- lib/jit_note.c | 2 +- lib/jit_ppc-fpu.c | 42 +++++- lib/jit_ppc-sz.c | 194 ++++++++++++++-------------- lib/jit_s390x-cpu.c | 1 + lib/jit_s390x-fpu.c | 28 ++++ lib/jit_s390x-sz.c | 132 +++++++++---------- lib/jit_s390x.c | 18 +-- lib/jit_sparc-fpu.c | 50 +++++++- lib/jit_sparc-sz.c | 122 +++++++++--------- lib/jit_sparc.c | 21 ++- lib/jit_x86-sse.c | 37 ++++-- lib/jit_x86-sz.c | 218 ++++++++++++++++---------------- lib/jit_x86-x87.c | 37 +++++- lib/lightning.c | 167 +++++++++++++++++------- 28 files changed, 1103 insertions(+), 470 deletions(-) create mode 100755 check/check.nodata.sh create mode 100644 check/nodata.c diff --git a/ChangeLog b/ChangeLog index 14772894f..06bab4642 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,60 @@ +2014-12-03 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/lightning.c: Implement the new jit_set_data() interface, + and the new jit_get_data() helper. Like jit_set_code(), + jit_realize() should be called before jit_set_data(). + The most common usage should be jit_set_data(JIT_DISABLE_DATA + | JIT_DISABLE_NOTE), to force synthesize any float/double + constant in the stack and not generate any debug information. + + * lib/jit_note.c: Minor change to debug note generation as + now it uses an alternate temporary data buffer during constants + and debug generation to accommodate the possibility of the user + setting an alternate data buffer. + + * lib/jit_hppa-fpu.c, lib/jit_s390x.c, lib/jit_s390x-cpu.c, + lib/jit_s390x-fpu.c, lib/jit_sparc.c, lib/jit_sparc-fpu.c, + lib/jit_x86-sse.c, lib/jit_x86-x87.c: Implement jit_set_data. + + * lib/jit_hppa-sz.c, lib/jit_sparc-sz.c, lib/jit_x86-sz.c, + lib/jit_s390x-sz.c: Update for several instructions that now + have a different maximum length due to jit_set_data. + + * lib/jit_mips-fpu.c: Implement jit_set_data, but missing + validation on n32 and n64 abis (and/or big endian). + + * lib/jit_mips-sz.c: Update for changes in o32. + + * lib/jit_ppc-fpu.c: Implement jit_set_data, but missing + validation on Darwin PPC. + + * lib/jit_ppc-sz.c: Update for changes in powerpc 32 and + 64 bit. + + * lib/jit_ia64-fpu.c: Implement untested jit_set_data. + + * TODO: Add note to list ports that were not tested for the + new jit_set_data() feature, due to no longer having access + to them. + + * check/nodata.c: New file implementing a simple test exercising + several different conditions created by jit_set_data(). + + * check/check.nodata.sh: New file implementing a wrapper + over the existing *.tst files, that runs all tests without + using a data buffer for constants; only meaningful (and + enabled) on architectures that used to store float/double + constants on a read only data buffer. + + * configure.ac, check/Makefile.am: Update for the new test + cases. + + * check/lightning.c: Implement the new "-d" option that + sets an internal flag to call jit_set_data() disable + constants and debug, that is, using only a pure code + buffer. + 2014-11-03 Paulo Andrade * include/lightning.h, include/lightning/jit_private.h, diff --git a/TODO b/TODO index 59ac79d4a..676af0293 100644 --- a/TODO +++ b/TODO @@ -6,3 +6,23 @@ * Write a simple higher level language implementation generating jit with lightning, that could be some lisp or C like language. + + * rerun ./configure --enable-devel-get-jit-size and regenerate + the related jit_$arch-sz.c for the ports where nodata is + meaningful: + hppa (done) + i586 (done) + ia64 + mips o32 (done) + mips n32 + mips n64 + powerpc 32 (done) + powerpc 64 (done) + ppc + s390x (done) + sparc (done) + x86_64 (done) + Missing ones are due to no longer (remote) access to such hosts + and may be broken with jit_set_data(..., JIT_DISABLE_DATA). + (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and + (darwin ppc). diff --git a/check/Makefile.am b/check/Makefile.am index 22a2035bf..c900fbd8a 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -16,7 +16,7 @@ AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE -check_PROGRAMS = lightning ccall self setcode +check_PROGRAMS = lightning ccall self setcode nodata lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) lightning_SOURCES = lightning.c @@ -30,6 +30,9 @@ self_SOURCES = self.c setcode_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) setcode_SOURCES = setcode.c +nodata_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) +nodata_SOURCES = nodata.c + $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la @@ -81,6 +84,8 @@ EXTRA_DIST = \ check.sh \ check.x87.sh \ check.arm.sh check.swf.sh \ + check.nodata.sh \ + check.x87.nodata.sh \ run-test all.tst base_TESTS = \ @@ -130,6 +135,28 @@ x87_TESTS = \ $(x87_TESTS): check.x87.sh $(LN_S) $(srcdir)/check.x87.sh $@ TESTS += $(x87_TESTS) + +#x87_nodata_TESTS = $(addsuffix .x87.nodata, $(base_TESTS)) +x87_nodata_TESTS = \ + 3to2.x87.nodata add.x87.nodata allocai.x87.nodata \ + bp.x87.nodata divi.x87.nodata fib.x87.nodata rpn.x87.nodata \ + ldstr.x87.nodata ldsti.x87.nodata \ + ldstxr.x87.nodata ldstxi.x87.nodata \ + ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata \ + cvt.x87.nodata branch.x87.nodata \ + alu_add.x87.nodata alux_add.x87.nodata \ + alu_sub.x87.nodata alux_sub.x87.nodata \ + alu_mul.x87.nodata alu_div.x87.nodata alu_rem.x87.nodata \ + alu_and.x87.nodata alu_or.x87.nodata alu_xor.x87.nodata \ + alu_lsh.x87.nodata alu_rsh.x87.nodata \ + alu_com.x87.nodata alu_neg.x87.nodata \ + fop_abs.x87.nodata fop_sqrt.x87.nodata \ + varargs.x87.nodata stack.x87.nodata \ + clobber.x87.nodata carry.x87.nodata call.x87.nodata \ + float.x87.nodata +$(x87_nodata_TESTS): check.x87.nodata.sh + $(LN_S) $(srcdir)/check.x87.nodata.sh $@ +TESTS += $(x87_nodata_TESTS) endif if test_arm_arm @@ -180,7 +207,31 @@ $(swf_TESTS): check.swf.sh TESTS += $(swf_TESTS) endif -TESTS += ccall self setcode +if test_nodata +#nodata_TESTS = $(addsuffix .nodata, $(base_TESTS)) +nodata_TESTS = \ + 3to2.nodata add.nodata allocai.nodata \ + bp.nodata divi.nodata fib.nodata rpn.nodata \ + ldstr.nodata ldsti.nodata \ + ldstxr.nodata ldstxi.nodata \ + ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata \ + cvt.nodata branch.nodata \ + alu_add.nodata alux_add.nodata \ + alu_sub.nodata alux_sub.nodata \ + alu_mul.nodata alu_div.nodata alu_rem.nodata \ + alu_and.nodata alu_or.nodata alu_xor.nodata \ + alu_lsh.nodata alu_rsh.nodata \ + alu_com.nodata alu_neg.nodata \ + fop_abs.nodata fop_sqrt.nodata \ + varargs.nodata stack.nodata \ + clobber.nodata carry.nodata call.nodata \ + float.nodata +$(nodata_TESTS): check.nodata.sh + $(LN_S) $(srcdir)/check.nodata.sh $@ +TESTS += $(nodata_TESTS) +endif + +TESTS += ccall self setcode nodata CLEANFILES = $(TESTS) #TESTS_ENVIRONMENT=$(srcdir)/run-test; diff --git a/check/check.nodata.sh b/check/check.nodata.sh new file mode 100755 index 000000000..0fbc4e90f --- /dev/null +++ b/check/check.nodata.sh @@ -0,0 +1,15 @@ +#!/bin/sh +test=`basename $0 | sed -e 's|\.nodata$||'` +./lightning -d $srcdir/$test.tst | tr -d \\r > $test.out +if test $? != 0; then + exit $? +fi + +cmp -s $srcdir/$test.ok $test.out +result=$? +if test $result != 0; then + diff $srcdir/$test.ok $test.out + rm $test.out + exit 1 +fi +rm $test.out diff --git a/check/lightning.c b/check/lightning.c index 2cf38edc8..f0fd19be6 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -548,6 +548,7 @@ static void rehash(hash_t *hash); */ static jit_state_t *_jit; static int flag_verbose; +static int flag_data; static int flag_disasm; static char *progname; static parser_t parser; @@ -3664,6 +3665,11 @@ execute(int argc, char *argv[]) patch = next; } + if (flag_data == 0) { + jit_realize(); + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + } + function = jit_emit(); if (flag_verbose > 1 || flag_disasm) { jit_print(); @@ -3876,6 +3882,7 @@ Usage: %s [jit assembler options] file [jit program options]\n\ Jit assembler options:\n\ -help Display this information\n\ -v[0-3] Verbose output level\n\ + -d Do not use a data buffer\n\ -D[=] Preprocessor options\n" # if defined(__i386__) && __WORDSIZE == 32 " -mx87=1 Force using x87 when sse2 available\n" @@ -3906,9 +3913,10 @@ int main(int argc, char *argv[]) { #if HAVE_GETOPT_LONG_ONLY - static const char *short_options = "v::"; + static const char *short_options = "dv::"; static struct option long_options[] = { { "help", 0, 0, 'h' }, + { "data", 2, 0, 'd' }, # if defined(__i386__) && __WORDSIZE == 32 { "mx87", 2, 0, '7' }, # endif @@ -3939,6 +3947,7 @@ main(int argc, char *argv[]) DL_HANDLE = dlopen(NULL, RTLD_LAZY); #endif + flag_data = 1; #if HAVE_GETOPT_LONG_ONLY for (;;) { if ((opt_short = getopt_long_only(argc, argv, short_options, @@ -3958,6 +3967,9 @@ main(int argc, char *argv[]) else flag_verbose = 1; break; + case 'd': + flag_data = 0; + break; #if defined(__i386__) && __WORDSIZE == 32 case '7': if (optarg) { @@ -4022,9 +4034,11 @@ main(int argc, char *argv[]) } } #else - while ((opt_short = getopt(argc, argv, "hv")) >= 0) { + while ((opt_short = getopt(argc, argv, "hvd")) >= 0) { if (opt_short == 'v') ++flag_verbose; + else if (opt_short == 'd') + flag_data = 0; else usage(); } diff --git a/check/nodata.c b/check/nodata.c new file mode 100644 index 000000000..0e594c327 --- /dev/null +++ b/check/nodata.c @@ -0,0 +1,106 @@ +/* + * Simple test of using an alternate buffer for the code. + */ + +#include +#include +#include +#include +#if defined(__sgi) +# include +#endif + +#ifndef MAP_ANON +# define MAP_ANON MAP_ANONYMOUS +# ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS 0 +# endif +#endif + +#if !defined(__sgi) +#define mmap_fd -1 +#endif + +jit_uint8_t *data; +jit_state_t *_jit; +jit_word_t data_length; +jit_word_t note_length; +#if defined(__sgi) +int mmap_fd; +#endif +void (*function)(void); + +void +gencode(jit_word_t flags) +{ + jit_word_t offset; + jit_word_t length; + + _jit = jit_new_state(); + + jit_name("main"); + jit_prolog(); + jit_prepare(); + jit_pushargi((jit_word_t)"%f\n"); + jit_ellipsis(); + jit_pushargi_d(1.5); + jit_finishi(printf); + jit_note("nodata.c", __LINE__); + + /* call to jit_realize() is only required when using an alternate + * code buffer. Note that not using mmap'ed memory may not work + * on several ports and/or operating system versions */ + jit_realize(); + + if (jit_get_data(&data_length, ¬e_length) != NULL) + abort(); + + length = 0; + if (!(flags & JIT_DISABLE_DATA)) + length += data_length; + if (!(flags & JIT_DISABLE_NOTE)) + length += note_length; + + /* check that a too small buffer fails */ + if (flags) + jit_set_data(length ? data : NULL, length, flags); + + /* and calling again with enough space works */ + offset = (length + 7) & -8; + function = jit_emit(); + if (function == NULL) + abort(); + + jit_clear_state(); + (*function)(); + jit_destroy_state(); +} + +int +main(int argc, char *argv[]) +{ +#if defined(__sgi) + mmap_fd = open("/dev/zero", O_RDWR); +#endif + + data = mmap(NULL, 4096, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + assert(data != MAP_FAILED); +#if defined(__sgi) + close(mmap_fd); +#endif + + init_jit(argv[0]); + + gencode(0); + gencode(JIT_DISABLE_DATA); + gencode(JIT_DISABLE_NOTE); + gencode(JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + finish_jit(); + + munmap(data, 4096); + + return (0); +} diff --git a/configure.ac b/configure.ac index 498d1c1cd..3901d7dd5 100644 --- a/configure.ac +++ b/configure.ac @@ -221,7 +221,9 @@ AM_CONDITIONAL(test_x86_x87, [test x$ac_cv_test_x86_x87 = xyes]) AM_CONDITIONAL(test_arm_arm, [test x$ac_cv_test_arm_arm = xyes]) AM_CONDITIONAL(test_arm_swf, [test x$ac_cv_test_arm_swf = xyes]) -if test $cpu=arm; then +AM_CONDITIONAL(test_nodata, [test cpu-$cpu = cpu-mips -o cpu-$cpu = cpu-ppc -o cpu-$cpu = cpu-sparc -o cpu-$cpu = cpu-x86 -o cpu-$cpu = cpu-ia64 -o cpu-$cpu = cpu-hppa -o cpu-$cpu = cpu-s390x]) + +if test $cpu = arm; then AC_CHECK_LIB(m, sqrtf, , [AC_MSG_ERROR([sqrtf required but not available])]) fi diff --git a/include/lightning.h b/include/lightning.h index 84521c39a..2f6d43f94 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -152,6 +152,9 @@ typedef jit_int32_t jit_fpr_t; #define JIT_V_NUM jit_v_num() #define JIT_F_NUM jit_f_num() +#define JIT_DISABLE_DATA 1 /* force synthesize of constants */ +#define JIT_DISABLE_NOTE 2 /* disable debug info generation */ + #define jit_class_chk 0x02000000 /* just checking */ #define jit_class_arg 0x08000000 /* argument register */ #define jit_class_sav 0x10000000 /* callee save */ @@ -893,6 +896,10 @@ extern void _jit_realize(jit_state_t*); extern jit_pointer_t _jit_get_code(jit_state_t*, jit_word_t*); #define jit_set_code(u,v) _jit_set_code(_jit,u,v) extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t); +#define jit_get_data(u,v) _jit_get_data(_jit,u,v) +extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*); +#define jit_set_data(u,v,w) _jit_set_data(_jit,u,v,w) +extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 1759bbe69..ed543bc07 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -337,6 +337,7 @@ struct jit_compiler { jit_node_t *head; jit_node_t *tail; jit_uint32_t realize : 1; /* jit_realize() called? */ + jit_uint32_t dataset : 1; /* jit_dataset() called? */ jit_uint32_t done : 1; /* emit state finished */ jit_uint32_t emit : 1; /* emit state entered */ jit_uint32_t again : 1; /* start over emiting function */ @@ -344,6 +345,8 @@ struct jit_compiler { #if DEBUG jit_uint32_t getreg : 1; #endif + jit_uint32_t no_data : 1; + jit_uint32_t no_note : 1; jit_int32_t reglen; /* number of registers */ jit_regset_t regarg; /* cannot allocate */ jit_regset_t regsav; /* automatic spill only once */ @@ -357,6 +360,7 @@ struct jit_compiler { jit_uint8_t *end; } code; struct { + jit_uint8_t *ptr; jit_node_t **table; /* very simple hash table */ jit_word_t size; /* number of vectors in table */ jit_word_t count; /* number of hash table entries */ @@ -467,6 +471,7 @@ struct jit_state { jit_compiler_t *comp; /* Flags to know if user did set the code and data buffers */ jit_uint32_t user_code : 1; + jit_uint32_t user_data : 1; }; struct jit_register { diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c index 0421de1df..c1861b1fa 100644 --- a/lib/jit_hppa-fpu.c +++ b/lib/jit_hppa-fpu.c @@ -680,13 +680,47 @@ _truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { - ldi_f(r0, (jit_word_t)i0); + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + stxi_i(alloca_offset - 8, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_f(r0, _FP_REGNO, alloca_offset - 8); + } + else + ldi_f(r0, (jit_word_t)i0); } static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { - ldi_d(r0, (jit_word_t)i0); + union { + jit_int32_t ii[2]; + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + + data.d = *i0; + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.ii[0]); + stxi_i(alloca_offset - 8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(alloca_offset - 4, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_d(r0, _FP_REGNO, alloca_offset - 8); + } + else + ldi_d(r0, (jit_word_t)i0); } #define fpr_opi(name, type, size) \ diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 392674e64..ab8c77d31 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -256,50 +256,50 @@ 28, 0, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 4, 4, 4, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 12, 0, 12, 4, 4, - 12, + 20, 4, 12, 4, @@ -309,33 +309,33 @@ 8, 4, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 16, - 28, + 36, 0, 0, 0, diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c index f902df13f..71e325a40 100644 --- a/lib/jit_ia64-fpu.c +++ b/lib/jit_ia64-fpu.c @@ -431,8 +431,10 @@ static void F16_(jit_state_t*,jit_word_t, #define movr_f(r0,r1) movr_d(r0,r1) #define movr_d(r0,r1) MOVF(r0,r1) -#define movi_f(r0,i0) ldi_f(r0,(jit_word_t)i0) -#define movi_d(r0,i0) ldi_d(r0,(jit_word_t)i0) +#define movi_f(r0,i0) _movi_f(_jit,r0,i0) +static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*); +#define movi_d(r0,i0) _movi_d(_jit,r0,i0) +static void _movi_d(jit_state_t*,jit_int32_t,jit_float64_t*); #define movr_f_w(r0,r1) _movr_f_w(_jit,r0,r1) static void _movr_f_w(jit_state_t*,jit_int32_t,jit_int32_t); #define movr_d_w(r0,r1) _movr_d_w(_jit,r0,r1) @@ -999,6 +1001,46 @@ F16_(jit_state_t* _jit, jit_word_t _p, inst((((im>>20)&1L)<<36)|(y<<27)|(1L<<26)|((im&0xffffL)<<6)|_p, INST_F); } +static void +_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i & 0xffffffff); + SETF_S(r0, rn(reg)); + jit_unget_reg(reg); + } + else + ldi_f(r0, (jit_word_t)i0); +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +{ + union { + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.w); + SETF_D(r0, rn(reg)); + jit_unget_reg(reg); + } + else + ldi_d(r0, (jit_word_t)i0); +} + static void _movr_f_w(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_mips-fpu.c b/lib/jit_mips-fpu.c index 7154359f0..0a56d5f4b 100644 --- a/lib/jit_mips-fpu.c +++ b/lib/jit_mips-fpu.c @@ -726,10 +726,19 @@ _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) jit_int32_t i; jit_float32_t f; } data; + jit_int32_t reg; data.f = *i0; - if (data.i) - ldi_f(r0, (jit_word_t)i0); + if (data.i) { + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + MTC1(rn(reg), r0); + jit_unget_reg(reg); + } + else + ldi_f(r0, (jit_word_t)i0); + } else MTC1(_ZERO_REGNO, r0); } @@ -970,22 +979,62 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) jit_int64_t l; jit_float64_t d; } data; + jit_int32_t reg; data.d = *i0; # if __WORDSIZE == 64 || NEW_ABI - if (data.l) - ldi_d(r0, (jit_word_t)i0); + if (data.l) { + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 64 + movi(rn(reg), data.l); + DMTC1(rn(reg), r0); +# else + if (data.i[0]) { + movi(rn(reg), data.i[0]); + MTC1(rn(reg), r0); + } + else + MTC1(_ZERO_REGNO, r0); + if (data.i[1]) { + movi(rn(reg), data.i[1]); + MTC1(rn(reg), r0 + 1); + } + else + MTC1(_ZERO_REGNO, r0 + 1); +# endif + jit_unget_reg(reg); + } + else + ldi_d(r0, (jit_word_t)i0); + } else DMTC1(_ZERO_REGNO, r0); # else - if (data.i[0]) - ldi_f(r0, (jit_word_t)i0); + if (_jitc->no_data) + reg = jit_get_reg(jit_class_gpr); + if (data.i[0]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[0]); + MTC1(rn(reg), r0); + } + else + ldi_f(r0, (jit_word_t)i0); + } else MTC1(_ZERO_REGNO, r0); - if (data.i[1]) - ldi_f(r0 + 1, ((jit_word_t)i0) + 4); + if (data.i[1]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[1]); + MTC1(rn(reg), r0 + 1); + } + else + ldi_f(r0 + 1, ((jit_word_t)i0) + 4); + } else MTC1(_ZERO_REGNO, r0 + 1); + if (_jitc->no_data) + jit_unget_reg(reg); # endif } diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index 48c061079..69f6dcf26 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -700,7 +700,7 @@ 0, 0, 0, -#endif /* NEW_ABI */ +#endif /* NEW_ABI */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 diff --git a/lib/jit_note.c b/lib/jit_note.c index 474a8db4a..412eba3de 100644 --- a/lib/jit_note.c +++ b/lib/jit_note.c @@ -90,7 +90,7 @@ _jit_note(jit_state_t *_jit, char *name, int line) (name == NULL && _jitc->note.note != NULL) || (name != NULL && _jitc->note.note == NULL) || (name != NULL && _jitc->note.note != NULL && - strcmp(name, (char *)_jit->data.ptr + _jitc->note.note->v.n->u.w))) + strcmp(name, (char *)_jitc->data.ptr + _jitc->note.note->v.n->u.w))) _jitc->note.size += sizeof(jit_line_t); _jitc->note.size += sizeof(jit_int32_t) * 2; return (_jitc->note.note = node); diff --git a/lib/jit_ppc-fpu.c b/lib/jit_ppc-fpu.c index ace978753..2bef7e31a 100644 --- a/lib/jit_ppc-fpu.c +++ b/lib/jit_ppc-fpu.c @@ -439,13 +439,51 @@ _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { - ldi_f(r0, (jit_word_t)i0); + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(alloca_offset - 4, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_f(r0, _FP_REGNO, alloca_offset - 4); + } + else + ldi_f(r0, (jit_word_t)i0); } static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { - ldi_d(r0, (jit_word_t)i0); + union { + jit_int32_t i[2]; + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 32 + movi(rn(reg), data.i[0]); + stxi(alloca_offset - 8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.i[1]); + stxi(alloca_offset - 4, _FP_REGNO, rn(reg)); +# else + movi(rn(reg), data.w); + stxi(alloca_offset - 8, _FP_REGNO, rn(reg)); +# endif + jit_unget_reg(reg); + ldxi_d(r0, _FP_REGNO, alloca_offset - 8); + } + else + ldi_d(r0, (jit_word_t)i0); } /* should only work on newer ppc (fcfid is a ppc64 instruction) */ diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index f1652372e..e0c53b9e1 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -527,50 +527,50 @@ 72, 0, 4, - 12, + 16, 4, - 12, + 16, 4, - 12, + 16, 4, - 12, + 16, 4, 4, 4, 12, - 20, - 16, 24, + 16, + 28, 12, - 20, - 16, 24, + 16, + 28, 12, - 20, - 16, 24, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, 16, - 24, + 28, + 16, + 28, 12, - 20, + 24, 12, 0, 20, 4, 4, - 8, + 12, 4, 8, 4, @@ -580,79 +580,79 @@ 4, 4, 8, - 16, - 12, - 20, - 8, - 16, - 12, - 20, - 8, - 16, - 8, - 16, - 12, - 20, - 8, - 16, - 12, - 20, - 8, - 16, - 12, 20, 12, + 24, + 8, + 20, + 12, + 24, + 8, 20, 8, - 16, + 20, + 12, + 24, 8, - 16, + 20, + 12, + 24, + 8, + 20, + 12, + 24, + 12, + 24, + 8, + 20, + 8, + 20, 0, 4, - 12, + 24, 4, - 12, + 24, 4, - 12, + 24, 4, - 12, + 24, 4, 4, 4, 12, - 20, + 32, 16, - 24, + 36, 12, - 20, + 32, 16, - 24, + 36, 12, - 20, + 32, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 16, - 24, + 36, 12, - 20, + 32, 12, 0, 20, 4, 4, - 8, + 24, 4, 8, 4, @@ -662,33 +662,33 @@ 4, 4, 8, - 16, + 28, 12, - 20, + 32, 8, - 16, + 32, 12, - 20, + 32, 8, - 16, + 28, 8, - 16, + 28, 12, - 20, + 32, 8, - 16, + 28, 12, - 20, + 32, 8, - 16, + 28, 12, - 20, + 32, 12, - 20, + 32, 8, - 16, + 28, 8, - 16, + 28, 0, 0, 0, @@ -972,39 +972,39 @@ 4, 4, 12, - 36, - 16, 40, + 16, + 44, 12, - 36, - 16, 40, + 16, + 44, 12, - 36, - 16, 40, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, 16, - 40, + 44, + 16, + 44, 12, - 36, + 40, 12, 12, 12, 4, 4, - 24, + 32, 4, 24, 4, @@ -1018,13 +1018,13 @@ 12, 36, 8, - 32, + 40, 12, + 40, + 8, 36, 8, - 32, - 8, - 32, + 36, 12, 36, 8, @@ -1032,13 +1032,13 @@ 12, 36, 8, - 32, - 12, 36, 12, - 36, + 40, + 12, + 40, 8, - 32, + 36, 8, 32, 0, diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c index 343a39a27..fdceaee65 100644 --- a/lib/jit_s390x-cpu.c +++ b/lib/jit_s390x-cpu.c @@ -26,6 +26,7 @@ # define _R1_REGNO 1 # define _R7_REGNO 7 # define _R13_REGNO 13 +# define _FP_REGNO _R13_REGNO # define _R14_REGNO 14 # define _R15_REGNO 15 # define u12_p(i0) ((i0) >= 0 && (i0) <= 4095) diff --git a/lib/jit_s390x-fpu.c b/lib/jit_s390x-fpu.c index e263f78ba..d5b681daa 100644 --- a/lib/jit_s390x-fpu.c +++ b/lib/jit_s390x-fpu.c @@ -852,8 +852,22 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) { + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + if (*(jit_int32_t *)i0 == 0) LZER(r0); + else if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg_but_zero(); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(-4, _FP_REGNO, rn(reg)); + jit_unget_reg_but_zero(reg); + ldxi_f(r0, _FP_REGNO, -4); + } else ldi_f(r0, (jit_word_t)i0); } @@ -868,8 +882,22 @@ _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) { + union { + jit_int64_t l; + jit_float64_t d; + } data; + jit_int32_t reg; + if (*(jit_int64_t *)i0 == 0) LZDR(r0); + else if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg_but_zero(); + movi(rn(reg), data.l); + stxi_l(-8, _FP_REGNO, rn(reg)); + jit_unget_reg_but_zero(reg); + ldxi_d(r0, _FP_REGNO, -8); + } else ldi_d(r0, (jit_word_t)i0); } diff --git a/lib/jit_s390x-sz.c b/lib/jit_s390x-sz.c index c12d6aff1..e7b8ea290 100644 --- a/lib/jit_s390x-sz.c +++ b/lib/jit_s390x-sz.c @@ -174,50 +174,50 @@ 36, 0, 6, - 16, + 26, 8, - 16, + 26, 6, - 16, + 26, 8, - 16, + 26, 4, 4, 4, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 4, 4, 4, 4, 2, - 10, + 20, 4, 16, 12, @@ -227,79 +227,79 @@ 12, 4, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 0, 6, - 16, + 26, 8, - 16, + 26, 6, - 16, + 26, 8, - 16, + 26, 4, 4, 4, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 20, - 30, + 40, 16, - 26, + 36, 16, - 26, + 36, 4, 4, 4, 4, 2, - 10, + 24, 4, 16, 12, @@ -309,33 +309,33 @@ 12, 4, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 34, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 18, - 28, + 38, 10, - 20, + 30, 10, - 20, + 30, 0, 0, 0, diff --git a/lib/jit_s390x.c b/lib/jit_s390x.c index 26f286704..fb6d4e483 100644 --- a/lib/jit_s390x.c +++ b/lib/jit_s390x.c @@ -116,8 +116,10 @@ _jit_prolog(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; _jitc->function->self.size = stack_framesize; _jitc->function->self.argi = _jitc->function->self.argf = - _jitc->function->self.aoff = _jitc->function->self.alen = - _jitc->function->self.aoff = 0; + _jitc->function->self.aoff = _jitc->function->self.alen = 0; + /* preallocate 8 bytes if not using a constant data buffer */ + if (_jitc->no_data) + _jitc->function->self.aoff = -8; _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -592,18 +594,6 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(rn(node->u.w), node->v.w); \ break -#define case_rf(name) \ - case jit_code_##name##i_f: \ - assert_data(node); \ - name##_f(rn(node->u.w), \ - (jit_float32_t *)node->v.n->u.w); \ - break -#define case_rd(name) \ - case jit_code_##name##i_d: \ - assert_data(node); \ - name##_d(rn(node->u.w), \ - (jit_float64_t *)node->v.n->u.w); \ - break #define case_wr(name, type) \ case jit_code_##name##i##type: \ name##i##type(node->u.w, rn(node->v.w)); \ diff --git a/lib/jit_sparc-fpu.c b/lib/jit_sparc-fpu.c index 94fed634c..52d760d9f 100644 --- a/lib/jit_sparc-fpu.c +++ b/lib/jit_sparc-fpu.c @@ -150,7 +150,8 @@ static void _extr_f(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_f_i(r0, r1) _truncr_f_i(_jit, r0, r1) static void _truncr_f_i(jit_state_t*, jit_int32_t, jit_int32_t); # define extr_d_f(r0, r1) FDTOS(r1, r0) -# define movi_f(r0, i0) ldi_f(r0, (jit_word_t)i0) +# define movi_f(r0, i0) _movi_f(_jit, r0, i0) +static void _movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); # define movr_f(r0, r1) FMOVS(r1, r0) # define negr_f(r0, r1) FNEGS(r1, r0) # define absr_f(r0, r1) FABSS(r1, r0) @@ -162,7 +163,8 @@ static void _extr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define truncr_d_i(r0, r1) _truncr_d_i(_jit, r0, r1) static void _truncr_d_i(jit_state_t*, jit_int32_t, jit_int32_t); # define extr_f_d(r0, r1) FSTOD(r1, r0) -# define movi_d(r0, i0) ldi_d(r0, (jit_word_t)i0) +# define movi_d(r0, i0) _movi_d(_jit, r0, i0) +static void _movi_d(jit_state_t*, jit_int32_t, jit_float64_t*); # define movr_d(r0, r1) _movr_d(_jit, r0, r1) static void _movr_d(jit_state_t*, jit_int32_t, jit_int32_t); # define negr_d(r0, r1) _negr_d(_jit, r0, r1) @@ -381,6 +383,50 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, ii(v.v); } +static void +_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.f = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i & 0xffffffff); + stxi_i(-8, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_f(r0, _FP_REGNO, -8); + } + else + ldi_f(r0, (jit_word_t)i0); +} + +static void +_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +{ + union { + jit_int32_t i[2]; + jit_float64_t d; + } data; + jit_int32_t reg; + + if (_jitc->no_data) { + data.d = *i0; + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i[0]); + stxi_i(-8, _FP_REGNO, rn(reg)); + movi(rn(reg), data.i[1]); + stxi_i(-4, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + ldxi_d(r0, _FP_REGNO, -8); + } + else + ldi_d(r0, (jit_word_t)i0); +} + static void _movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 9e004c0ab..6056af221 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 28 +#define JIT_INSTR_MAX 40 0, 0, 0, @@ -185,39 +185,39 @@ 4, 4, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 16, - 28, + 32, 12, 0, 12, 4, 4, - 12, + 16, 4, 12, 4, @@ -237,69 +237,69 @@ 12, 24, 12, - 24, + 28, + 12, + 28, + 12, + 28, + 12, + 28, + 12, + 28, + 12, + 28, 12, 24, 12, 24, 12, - 24, - 12, - 24, - 12, - 24, - 12, - 24, - 12, - 24, - 12, - 24, + 28, 0, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 4, - 16, + 24, 8, 8, 4, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 16, - 28, + 40, 12, 0, 16, 4, 8, - 12, + 24, 4, 12, 4, @@ -309,33 +309,33 @@ 4, 4, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 36, 12, - 24, + 32, 12, - 24, + 32, 12, - 24, + 36, 0, 0, 0, diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index c22076aa1..f1f4bffc5 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -624,10 +624,17 @@ _emit_code(jit_state_t *_jit) case jit_code_##name##i##type: \ name##i##type(node->u.w, rn(node->v.w)); \ break -#define case_rf(name, type) \ +#define case_rf(name) \ case jit_code_##name##i##type: \ assert(node->flag & jit_flag_data); \ - name##i##type(rn(node->u.w), node->v.n->u.w); \ + name##_f(rn(node->u.w), \ + (jit_float32_t *)node->v.n->u.w); \ + break +#define case_rd(name) \ + case jit_code_##name##i_d: \ + assert(node->flag & jit_flag_data); \ + name##_d(rn(node->u.w), \ + (jit_float64_t *)node->v.n->u.w); \ break #define case_rrr(name, type) \ case jit_code_##name##r##type: \ @@ -930,7 +937,10 @@ _emit_code(jit_state_t *_jit) case_rrr(stx, _f); case_wrr(stx, _f); case_rr(mov, _f); - case_rf(mov, _f); + case jit_code_movi_f: + assert(node->flag & jit_flag_data); + movi_f(rn(node->u.w), (jit_float32_t *)node->v.n->u.w); + break; case_brr(blt, _f); case_brf(blt, _f, 32); case_brr(ble, _f); @@ -1009,7 +1019,10 @@ _emit_code(jit_state_t *_jit) case_rrr(stx, _d); case_wrr(stx, _d); case_rr(mov, _d); - case_rf(mov, _d); + case jit_code_movi_d: + assert(node->flag & jit_flag_data); + movi_d(rn(node->u.w), (jit_float64_t *)node->v.n->u.w); + break; case_brr(blt, _d); case_brf(blt, _d, 64); case_brr(ble, _d); diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c index 5082165a4..ca5f8a4f6 100644 --- a/lib/jit_x86-sse.c +++ b/lib/jit_x86-sse.c @@ -765,24 +765,27 @@ _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) jit_int32_t i; jit_float32_t f; } data; + jit_int32_t reg; + jit_bool_t ldi; data.f = *i0; if (data.f == 0.0 && !(data.i & 0x80000000)) xorpsr(r0, r0); else { + ldi = !_jitc->no_data; #if __WORDSIZE == 64 - if (can_sign_extend_int_p((jit_word_t)i0)) + /* if will allocate a register for offset, just use immediate */ + if (ldi && !can_sign_extend_int_p((jit_word_t)i0)) + ldi = 0; +#endif + if (ldi) sse_ldi_f(r0, (jit_word_t)i0); else { - /* if will allocate a register for offset, just use immediate */ - jit_int32_t reg = jit_get_reg(jit_class_gpr); + reg = jit_get_reg(jit_class_gpr); movi(rn(reg), data.i); movdlxr(r0, rn(reg)); jit_unget_reg(reg); } -#else - sse_ldi_f(r0, (jit_word_t)i0); -#endif } } @@ -1209,24 +1212,36 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) jit_word_t w; jit_float64_t d; } data; + jit_int32_t reg; + jit_bool_t ldi; data.d = *i0; if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) xorpdr(r0, r0); else { + ldi = !_jitc->no_data; #if __WORDSIZE == 64 - if (can_sign_extend_int_p((jit_word_t)i0)) + /* if will allocate a register for offset, just use immediate */ + if (ldi && !can_sign_extend_int_p((jit_word_t)i0)) + ldi = 0; +#endif + if (ldi) sse_ldi_d(r0, (jit_word_t)i0); else { - /* if will allocate a register for offset, just use immediate */ - jit_int32_t reg = jit_get_reg(jit_class_gpr); + reg = jit_get_reg(jit_class_gpr); +#if __WORDSIZE == 64 movi(rn(reg), data.w); movdqxr(r0, rn(reg)); jit_unget_reg(reg); - } #else - sse_ldi_d(r0, (jit_word_t)i0); + movi(rn(reg), data.ii[0]); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); #endif + } } } diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index ea766361c..2e76e0cf2 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -1,6 +1,6 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 26 +#define JIT_INSTR_MAX 42 0, 0, 0, @@ -169,141 +169,59 @@ 9, 0, 5, - 2, - 5, + 4, + 7, 24, 0, 8, - 16, - 12, - 16, - 8, - 16, - 12, - 16, - 12, - 12, - 6, - 13, - 21, - 13, - 21, - 15, - 22, - 13, - 21, - 13, - 21, - 18, - 25, - 13, - 21, - 13, - 21, - 13, - 20, - 13, - 21, - 13, - 21, - 13, - 20, - 13, - 20, - 13, - 20, - 7, - 0, - 7, - 4, - 10, - 8, - 4, - 8, - 5, - 5, - 6, - 10, - 7, - 7, - 10, - 18, - 10, - 18, - 12, - 20, - 10, - 18, - 10, - 18, - 13, - 21, - 10, - 18, - 10, - 18, - 10, - 18, - 10, - 18, - 10, - 18, - 10, - 18, - 10, 17, - 10, - 18, - 0, - 8, - 16, 12, - 16, + 17, 8, - 16, + 17, + 12, + 17, + 12, 12, - 16, - 18, - 13, 6, 13, - 21, + 27, 13, - 21, + 27, 15, - 23, + 29, 13, - 21, + 27, 13, - 21, + 27, 18, - 26, + 32, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, + 27, 13, - 21, - 7, + 27, + 8, 0, - 7, + 8, 4, 10, - 8, + 15, 4, 8, 5, - 8, + 5, 6, 10, 7, @@ -337,6 +255,87 @@ 10, 18, 0, + 8, + 26, + 12, + 26, + 8, + 26, + 12, + 26, + 18, + 13, + 6, + 13, + 37, + 13, + 37, + 15, + 39, + 13, + 37, + 13, + 37, + 18, + 42, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 13, + 37, + 8, + 0, + 8, + 4, + 10, + 24, + 4, + 8, + 5, + 8, + 6, + 10, + 7, + 7, + 10, + 28, + 10, + 28, + 12, + 30, + 10, + 28, + 10, + 28, + 13, + 31, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, + 10, + 28, 0, 0, 0, @@ -345,7 +344,8 @@ 0, 0, 0, - 14, + 0, + 10, 10, #endif /* __WORDSIZE */ diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c index 253f6af8d..5f8e014c7 100644 --- a/lib/jit_x86-x87.c +++ b/lib/jit_x86-x87.c @@ -810,6 +810,7 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) jit_int32_t i; jit_float32_t f; } data; + jit_int32_t reg; data.f = *i0; if (data.f == 0.0 && !(data.i & 0x80000000)) @@ -827,7 +828,15 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) else if (data.f == 0.6931471805599453094172323683399f) fldln2(); else { - x87_ldi_f(r0, (jit_word_t)i0); + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET); + } + else + x87_ldi_f(r0, (jit_word_t)i0); return; } fstpr(r0 + 1); @@ -897,7 +906,7 @@ _x87_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_str_f(rn(reg), r0); jit_unget_reg(reg); } @@ -928,7 +937,7 @@ _x87_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_stxr_f(rn(reg), r0, r1); jit_unget_reg(reg); } @@ -966,6 +975,7 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) jit_word_t w; jit_float64_t d; } data; + jit_int32_t reg; data.d = *i0; if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) @@ -983,7 +993,22 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) else if (data.d == 0.6931471805599453094172323683399) fldln2(); else { - x87_ldi_d(r0, (jit_word_t)i0); + if (_jitc->no_data) { + reg = jit_get_reg(jit_class_gpr); +#if __WORDSIZE == 32 + movi(rn(reg), data.ii[0]); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); +#else + movi(rn(reg), data.w); + stxi_l(CVT_OFFSET, _RBP_REGNO, rn(reg)); +#endif + jit_unget_reg(reg); + x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); + } + else + x87_ldi_d(r0, (jit_word_t)i0); return; } fstpr(r0 + 1); @@ -1138,7 +1163,7 @@ _x87_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_str_d(rn(reg), r0); jit_unget_reg(reg); } @@ -1169,7 +1194,7 @@ _x87_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) jit_int32_t reg; if (!can_sign_extend_int_p(i0)) { reg = jit_get_reg(jit_class_gpr); - jit_movi(rn(reg), i0); + movi(rn(reg), i0); x87_stxr_d(rn(reg), r0, r1); jit_unget_reg(reg); } diff --git a/lib/lightning.c b/lib/lightning.c index ee21b1212..fa7743036 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -80,6 +80,10 @@ static void _bmp_clr(jit_state_t*, jit_word_t) maybe_unused; #define bmp_tst(bit) _bmp_tst(_jit, bit) static jit_bool_t _bmp_tst(jit_state_t*, jit_word_t); +#define jit_dataset() _jit_dataset(_jit) +static void +_jit_dataset(jit_state_t *_jit); + #define jit_setup(block) _jit_setup(_jit, block) static void _jit_setup(jit_state_t *_jit, jit_block_t *block); @@ -570,10 +574,10 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, size = (_jit->data.length + length + 4096) & - 4095; assert(size >= _jit->data.length); - if (_jit->data.ptr == NULL) - jit_alloc((jit_pointer_t *)&_jit->data.ptr, size); + if (_jitc->data.ptr == NULL) + jit_alloc((jit_pointer_t *)&_jitc->data.ptr, size); else - jit_realloc((jit_pointer_t *)&_jit->data.ptr, + jit_realloc((jit_pointer_t *)&_jitc->data.ptr, _jit->data.length, size); _jit->data.length = size; } @@ -585,7 +589,7 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, node = _jitc->data.table[key]; for (; node; node = node->next) { if (node->v.w == length && - memcmp(_jit->data.ptr + node->u.w, data, length) == 0) + memcmp(_jitc->data.ptr + node->u.w, data, length) == 0) break; } @@ -608,7 +612,7 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, } node->u.w = _jitc->data.offset; node->v.w = length; - memcpy(_jit->data.ptr + _jitc->data.offset, data, length); + memcpy(_jitc->data.ptr + _jitc->data.offset, data, length); _jitc->data.offset += length; node->next = _jitc->data.table[key]; @@ -630,7 +634,7 @@ _jit_data(jit_state_t *_jit, jit_pointer_t data, temp = _jitc->data.table[i]; for (; temp; temp = next) { next = temp->next; - key = hash_data(_jit->data.ptr + temp->u.w, temp->v.w) & + key = hash_data(_jitc->data.ptr + temp->u.w, temp->v.w) & ((_jitc->data.size << 1) - 1); temp->next = hash[key]; hash[key] = temp; @@ -889,7 +893,8 @@ _jit_destroy_state(jit_state_t *_jit) { if (!_jit->user_code) munmap(_jit->code.ptr, _jit->code.length); - munmap(_jit->data.ptr, _jit->data.length); + if (!_jit->user_data) + munmap(_jit->data.ptr, _jit->data.length); jit_free((jit_pointer_t *)&_jit); } @@ -1417,15 +1422,11 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) void _jit_optimize(jit_state_t *_jit) { - jit_uint8_t *ptr; jit_bool_t jump; jit_int32_t mask; jit_node_t *node; jit_block_t *block; jit_word_t offset; -#if defined(__sgi) - int mmap_fd; -#endif _jitc->function = NULL; @@ -1534,38 +1535,6 @@ _jit_optimize(jit_state_t *_jit) break; } } - - /* ensure it is aligned */ - _jitc->data.offset = (_jitc->data.offset + 7) & -8; - - /* create read only data buffer */ - _jit->data.length = (_jitc->data.offset + - /* reserve space for annotations */ - _jitc->note.size + 4095) & -4096; -#if defined(__sgi) - mmap_fd = open("/dev/zero", O_RDWR); -#endif - ptr = mmap(NULL, _jit->data.length, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); - assert(ptr != MAP_FAILED); -#if defined(__sgi) - close(mmap_fd); -#endif - memcpy(ptr, _jit->data.ptr, _jitc->data.offset); - jit_free((jit_pointer_t *)&_jit->data.ptr); - _jit->data.ptr = ptr; - - /* to be filled with note contents once offsets are known */ - _jitc->note.base = ptr + _jitc->data.offset; - memset(_jitc->note.base, 0, _jit->data.length - _jitc->data.offset); - - for (offset = 0; offset < _jitc->data.size; offset++) { - for (node = _jitc->data.table[offset]; node; node = node->next) { - node->flag |= jit_flag_patch; - node->u.w = (jit_word_t)(_jit->data.ptr + node->u.w); - } - } } void @@ -1713,6 +1682,9 @@ _jit_realize(jit_state_t *_jit) jit_optimize(); _jitc->realize = 1; + /* ensure it is aligned */ + _jitc->data.offset = (_jitc->data.offset + 7) & -8; + #if GET_JIT_SIZE /* Heuristic to guess code buffer size */ _jitc->mult = 4; @@ -1722,6 +1694,68 @@ _jit_realize(jit_state_t *_jit) #endif } +void +_jit_dataset(jit_state_t *_jit) +{ + jit_uint8_t *ptr; + jit_node_t *node; + jit_word_t offset; +#if defined(__sgi) + int mmap_fd; +#endif + + assert(!_jitc->dataset); + if (!_jit->user_data) { + + /* create read only data buffer */ + _jit->data.length = (_jitc->data.offset + + /* reserve space for annotations */ + _jitc->note.size + 4095) & -4096; +#if defined(__sgi) + mmap_fd = open("/dev/zero", O_RDWR); +#endif + _jit->data.ptr = mmap(NULL, _jit->data.length, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + assert(_jit->data.ptr != MAP_FAILED); +#if defined(__sgi) + close(mmap_fd); +#endif + } + + if (!_jitc->no_data) + memcpy(_jit->data.ptr, _jitc->data.ptr, _jitc->data.offset); + + if (_jitc->no_note) { + _jit->note.length = 0; + _jitc->note.size = 0; + } + else { + _jitc->note.base = _jit->data.ptr; + if (!_jitc->no_data) + _jitc->note.base += _jitc->data.offset; + memset(_jitc->note.base, 0, _jitc->note.size); + } + + if (_jit->user_data) + /* Need the temporary hashed data until jit_emit is finished */ + ptr = _jitc->no_data ? _jitc->data.ptr : _jit->data.ptr; + else { + ptr = _jit->data.ptr; + /* Temporary hashed data no longer required */ + jit_free((jit_pointer_t *)&_jitc->data.ptr); + } + + for (offset = 0; offset < _jitc->data.size; offset++) { + for (node = _jitc->data.table[offset]; node; node = node->next) { + node->flag |= jit_flag_patch; + node->u.w = (jit_word_t)(ptr + node->u.w); + } + } + + _jitc->dataset = 1; +} + jit_pointer_t _jit_get_code(jit_state_t *_jit, jit_word_t *length) { @@ -1747,6 +1781,39 @@ _jit_set_code(jit_state_t *_jit, jit_pointer_t ptr, jit_word_t length) _jit->user_code = 1; } +jit_pointer_t +_jit_get_data(jit_state_t *_jit, jit_word_t *data_size, jit_word_t *note_size) +{ + assert(_jitc->realize); + if (data_size) + *data_size = _jitc->data.offset; + if (note_size) + *note_size = _jitc->note.size; + return (_jit->data.ptr); +} + +void +_jit_set_data(jit_state_t *_jit, jit_pointer_t ptr, + jit_word_t length, jit_word_t flags) +{ + assert(_jitc->realize); + if (flags & JIT_DISABLE_DATA) + _jitc->no_data = 1; + else + assert(length >= _jitc->data.offset); + if (flags & JIT_DISABLE_NOTE) + _jitc->no_note = 1; + else { + if (flags & JIT_DISABLE_DATA) + assert(length >= _jitc->note.size); + else + assert(length >= _jitc->data.offset + _jitc->note.size); + } + _jit->data.ptr = ptr; + _jit->data.length = length; + _jit->user_data = 1; +} + jit_pointer_t _jit_emit(jit_state_t *_jit) { @@ -1761,6 +1828,9 @@ _jit_emit(jit_state_t *_jit) if (!_jitc->realize) jit_realize(); + if (!_jitc->dataset) + jit_dataset(); + _jitc->emit = 1; if (!_jit->user_code) { @@ -1829,10 +1899,15 @@ _jit_emit(jit_state_t *_jit) #endif _jitc->done = 1; - jit_annotate(); + if (!_jitc->no_note) + jit_annotate(); - result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); - assert(result == 0); + if (_jit->user_data) + jit_free((jit_pointer_t *)&_jitc->data.ptr); + else { + result = mprotect(_jit->data.ptr, _jit->data.length, PROT_READ); + assert(result == 0); + } if (!_jit->user_code) { result = mprotect(_jit->code.ptr, _jit->code.length, PROT_READ | PROT_EXEC);