From 839341a498b7b2077d715c2ca66d3debcddc0c30 Mon Sep 17 00:00:00 2001 From: pcpa Date: Tue, 14 Oct 2014 17:04:40 -0300 Subject: [PATCH] Implement jit_frame and jit_tramp interfaces * include/lightning.h, include/lightning/jit_private.h, lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c, lib/jit_hppa-cpu.c, lib/jit_ia64-cpu.c, lib/jit_mips-cpu.c, lib/jit_ppc-cpu.c, lib/jit_s390x-cpu.c, lib/jit_sparc-cpu.c, lib/jit_x86-cpu.c, lib/lightning.c: Implement the new jit_frame and jit_tramp interfaces, that allow writing trampoline like calls, where a single dispatcher jit buffer is written, and later other jit buffers are created, with the same stack frame layout as the dispatcher. This is the logic that GNU Smalltalk used in lightning 1.x, and is required to make a sane port for lighting 2.x. * jit_ia64-cpu.c: Implement support for jit_frame and jit_tramp, and also correct wrong encoding for B4 instructions, that implement jmpr, as well as correct reverse logic in _jmpr, that was moving the branch register to the jump register, and not vice-versa. Also, if a stack frame is to be assumed, always assume it may call a function with up to 8 arguments, regardless of the hint frame argument. * lib/jit_arm.c: Add a new must_align_p() interface to ensure function prologs are always aligned. This condition was previously always true, somewhat by accident, but with jit_tramp it is not guaranteed. * jit_ia64-cpu.c: lib/jit_ppc.c: Add minor special handling required to implement jit_tramp, where a function descriptor should not be added before a prolog, as jit_tramp means omit prolog. * check/lightning.c: Update test driver for the new interfaces. * check/Makefile.am, check/tramp.tst, check/tramp.ok: Add a simple test and example of the jit_frame and jit_tramp usage implementing a simple Fibonacci function using a simulation of an interpreter stack and how it would handle state in language specific variables. * doc/body.texi: Add documentation for jit_frame and jit_tramp. --- ChangeLog | 44 +++++++++++++++++++++++++++++++++ check/Makefile.am | 9 ++++--- check/lightning.c | 12 ++++++--- doc/body.texi | 38 ++++++++++++++++++++++++++++ include/lightning.h | 4 +++ include/lightning/jit_private.h | 10 ++++++++ lib/jit_aarch64-cpu.c | 9 +++++++ lib/jit_alpha-cpu.c | 9 +++++++ lib/jit_arm-cpu.c | 12 +++++++++ lib/jit_arm.c | 30 ++++++++++++++++++++++ lib/jit_hppa-cpu.c | 9 +++++++ lib/jit_ia64-cpu.c | 25 ++++++++++++++----- lib/jit_ia64.c | 2 +- lib/jit_mips-cpu.c | 9 +++++++ lib/jit_ppc-cpu.c | 9 +++++++ lib/jit_ppc.c | 2 +- lib/jit_s390x-cpu.c | 9 +++++++ lib/jit_sparc-cpu.c | 9 +++++++ lib/jit_x86-cpu.c | 9 +++++++ lib/lightning.c | 39 +++++++++++++++++++++++++++++ 20 files changed, 284 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 578003452..1274690e3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,47 @@ +2014-10-12 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c, + lib/jit_hppa-cpu.c, lib/jit_ia64-cpu.c, lib/jit_mips-cpu.c, + lib/jit_ppc-cpu.c, lib/jit_s390x-cpu.c, lib/jit_sparc-cpu.c, + lib/jit_x86-cpu.c, lib/lightning.c: Implement the new + jit_frame and jit_tramp interfaces, that allow writing + trampoline like calls, where a single dispatcher jit buffer + is written, and later other jit buffers are created, with + the same stack frame layout as the dispatcher. This is the + logic that GNU Smalltalk used in lightning 1.x, and is required + to make a sane port for lighting 2.x. + + * jit_ia64-cpu.c: Implement support for jit_frame and jit_tramp, + and also correct wrong encoding for B4 instructions, that + implement jmpr, as well as correct reverse logic in _jmpr, + that was moving the branch register to the jump register, + and not vice-versa. + Also, if a stack frame is to be assumed, always assume it may + call a function with up to 8 arguments, regardless of the + hint frame argument. + + * lib/jit_arm.c: Add a new must_align_p() interface to ensure + function prologs are always aligned. This condition was + previously always true, somewhat by accident, but with + jit_tramp it is not guaranteed. + + * jit_ia64-cpu.c: lib/jit_ppc.c: Add minor special handling + required to implement jit_tramp, where a function descriptor + should not be added before a prolog, as jit_tramp means omit + prolog. + + * check/lightning.c: Update test driver for the new interfaces. + + * check/Makefile.am, check/tramp.tst, check/tramp.ok: Add + a simple test and example of the jit_frame and jit_tramp + usage implementing a simple Fibonacci function using a + simulation of an interpreter stack and how it would handle + state in language specific variables. + + * doc/body.texi: Add documentation for jit_frame and + jit_tramp. + 2014-09-29 Paulo Andrade * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, diff --git a/check/Makefile.am b/check/Makefile.am index 9c6c54afb..c65622bd7 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -82,6 +82,7 @@ EXTRA_DIST = \ qalu_mul.tst qalu_mul.ok \ qalu_div.tst qalu_div.ok \ ret.tst ret.ok \ + tramp.tst tramp.ok \ check.sh \ check.x87.sh \ check.arm.sh check.swf.sh \ @@ -107,7 +108,7 @@ base_TESTS = \ clobber carry call \ float jmpr \ qalu_mul qalu_div \ - ret + ret tramp $(base_TESTS): check.sh $(LN_S) $(srcdir)/check.sh $@ @@ -178,7 +179,7 @@ arm_TESTS = \ fop_abs.arm fop_sqrt.arm \ varargs.arm stack.arm \ clobber.arm carry.arm call.arm \ - float.arm jmpr.arm + float.arm jmpr.arm tramp.arm $(arm_TESTS): check.arm.sh $(LN_S) $(srcdir)/check.arm.sh $@ TESTS += $(arm_TESTS) @@ -202,7 +203,7 @@ swf_TESTS = \ fop_abs.swf fop_sqrt.swf \ varargs.swf stack.swf \ clobber.swf carry.swf call.swf \ - float.swf jmpr.arm + float.swf jmpr.swf tramp.swf $(swf_TESTS): check.swf.sh $(LN_S) $(srcdir)/check.swf.sh $@ TESTS += $(swf_TESTS) @@ -226,7 +227,7 @@ nodata_TESTS = \ fop_abs.nodata fop_sqrt.nodata \ varargs.nodata stack.nodata \ clobber.nodata carry.nodata call.nodata \ - float.nodata jmpr.nodata + float.nodata jmpr.nodata tramp.nodata $(nodata_TESTS): check.nodata.sh $(LN_S) $(srcdir)/check.nodata.sh $@ TESTS += $(nodata_TESTS) diff --git a/check/lightning.c b/check/lightning.c index 83b6ab778..60b75bcf0 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -268,7 +268,9 @@ static void make_arg(void *value); static jit_pointer_t get_arg(void); static long get_imm(void); static void name(void); -static void prolog(void); static void ellipsis(void); +static void prolog(void); +static void frame(void); static void tramp(void); +static void ellipsis(void); static void allocai(void); static void arg(void); static void getarg_c(void); static void getarg_uc(void); @@ -566,7 +568,9 @@ static size_t data_offset, data_length; static instr_t instr_vector[] = { #define entry(value) { NULL, #value, value } entry(name), - entry(prolog), entry(ellipsis), + entry(prolog), + entry(frame), entry(tramp), + entry(ellipsis), entry(allocai), entry(arg), entry(getarg_c), entry(getarg_uc), @@ -1320,7 +1324,9 @@ name(void) { (void)identifier(ch); jit_name(parser.string); } -entry(prolog) entry(ellipsis) +entry(prolog) +entry_im(frame) entry_im(tramp) +entry(ellipsis) void allocai(void) { symbol_t *symbol; diff --git a/doc/body.texi b/doc/body.texi index fe1ae829f..28b942416 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -1444,6 +1444,44 @@ Or to only use a data buffer, if required: @rem{...} @end example +@section Shared stack frame layout +Certain jit generation patterns, for example the original GNU Smalltalk +jit generator, uses an approach of a fixed trampoline jit code, and +later generation of code that jumps around, assuming a fixed layout +stack frame. + +To help on this pattern of code generation, @lightning{} provides +the @code{jit_frame} and the @code{jit_tramp} interfaces, to define +or to assume a stack frame. Both @code{jit_frame} or @code{jit_tramp} +must be the first call after @code{jit_prolog}. + +@deftypefun void jit_frame (jit_int32_t @var{frame}) +@var{frame} defines the size in bytes of the current function +stack frame. To calculate its value, a good formula is maximum number +of arguments to any called native function times eight, plus the +sum of the arguments to any call to @code{jit_allocai}. @lightning{} +automatically adjusts this value for any backend specific stack memory +it may need, or any alignment constraint. +To ensure trampoline code is correct, @lightning{} will save all +callee save registers in the prolog and reload in the epilog. +@end deftypefun + +@deftypefun void jit_tramp (jit_int32_t @var{frame}) +@var{frame} must be the same value of the dispatcher defined with the +@code{jit_frame} call. +The only difference of @code{jit_frame} and @code{jit_tramp} is that +@code{jit_tramp} omits generation of a prolog and epilog for the +current function. +Most trampoline based jit generation implements a single dispatch method +and later emit code that knows how to return back to the dispatch routine, +and the later emitted code is called with a non local goto. In such cases, +emitting a native prolog (and epilog) is just a waste of space. +@end deftypefun + +It is a fatal error if more than @var{frame} bytes are required +either in the dispatcher defined with @code{jit_frame} or the +"trampolined" code, defined with @code{jit_tramp}. + @node Acknowledgements @chapter Acknowledgements diff --git a/include/lightning.h b/include/lightning.h index fc9fb9dfa..600174e87 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -907,6 +907,10 @@ extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t); extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*); #define jit_set_data(u,v,w) _jit_set_data(_jit,u,v,w) extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t); +#define jit_frame(u) _jit_frame(_jit,u) +extern void _jit_frame(jit_state_t*, jit_int32_t); +#define jit_tramp(u) _jit_tramp(_jit,u) +extern void _jit_tramp(jit_state_t*, jit_int32_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index b620a1b06..128bd1ad8 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -349,6 +349,13 @@ struct jit_function { jit_int32_t *regoff; jit_regset_t regset; jit_int32_t stack; + + /* Helper for common jit generation pattern, used in GNU Smalltalk + * and possibly others, where a static frame layout is required or + * assumed. */ + jit_int32_t frame; + jit_uint32_t define_frame : 1; + jit_uint32_t assume_frame : 1; }; /* data used only during jit generation */ @@ -559,6 +566,9 @@ _jit_save(jit_state_t*, jit_int32_t); extern void _jit_load(jit_state_t*, jit_int32_t); +#define jit_trampoline(u,v) _jit_trampoline(_jit, u, v) +extern void _jit_trampoline(jit_state_t*, jit_int32_t, jit_bool_t); + #define jit_optimize() _jit_optimize(_jit) extern void _jit_optimize(jit_state_t*); diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index 804fecba1..a2d2ce060 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -2145,6 +2145,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; @@ -2188,6 +2195,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; if (_jitc->function->stack) MOV_XSP(SP_REGNO, FP_REGNO); #define LOAD(L, R, O) \ diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index a43c94ce9..8fe43381d 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -2504,6 +2504,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 7) & -8; /* ldgp gp, 0(pv) */ @@ -2544,6 +2551,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; movr(_SP_REGNO, _FP_REGNO); ldxi(_RA_REGNO, _SP_REGNO, 0); ldxi(_FP_REGNO, _SP_REGNO, 8); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 1be40b52b..02edeed21 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -3694,6 +3694,16 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) { + if (jit_thumb_p() && !_jitc->thumb) + _jitc->thumb = _jit->pc.w; + return; + } + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3732,6 +3742,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; addi(_SP_REGNO, _FP_REGNO, 16); if (jit_cpu.abi) VPOP_F64(_D8_REGNO, 8); diff --git a/lib/jit_arm.c b/lib/jit_arm.c index bdafcddd5..7dbf78aa4 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -55,6 +55,8 @@ typedef union _jit_thumb_t { static jit_int32_t _jit_get_reg_pair(jit_state_t*); #define jit_unget_reg_pair(rn) _jit_unget_reg_pair(_jit,rn) static void _jit_unget_reg_pair(jit_state_t*,jit_int32_t); +# define must_align_p(node) _must_align_p(_jit, node) +static jit_bool_t _must_align_p(jit_state_t*,jit_node_t*); #define load_const(uniq,r0,i0) _load_const(_jit,uniq,r0,i0) static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t); #define flush_consts() _flush_consts(_jit) @@ -1042,9 +1044,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_note: case jit_code_name: + if (must_align_p(node->next)) + nop(2); node->u.w = _jit->pc.w; break; case jit_code_label: + if (must_align_p(node->next)) + nop(2); /* remember label is defined */ node->flag |= jit_flag_patch; node->u.w = _jit->pc.w; @@ -1665,6 +1671,30 @@ _jit_unget_reg_pair(jit_state_t *_jit, jit_int32_t reg) } } +/* A prolog must be aligned at mod 4 bytes boundary. + * This condition was not being required to be tested by + * accident previously, but with the jit_frame and jit_tramp + * code it is required */ +static jit_bool_t +_must_align_p(jit_state_t *_jit, jit_node_t *node) +{ + if (jit_thumb_p() && (_jit->pc.w & 3)) { + for (; node; node = node->next) { + switch (node->code) { + case jit_code_note: + case jit_code_name: + case jit_code_label: + break; + case jit_code_prolog: + return (1); + default: + return (0); + } + } + } + return (0); +} + static void _load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0) { diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 30de58cc8..479d6cf34 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -2621,6 +2621,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) jit_int32_t regno; jit_word_t offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + /* hppa stack grows up */ + assert(_jitc->function->self.aoff <= _jitc->function->frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = _jitc->function->frame; + } _jitc->function->stack = ((_jitc->function->self.aoff - _jitc->function->self.alen - _jitc->function->self.size) + 63) & -64; @@ -2651,6 +2658,8 @@ _epilog(jit_state_t *_jit, jit_node_t *node) jit_int32_t regno; jit_word_t offset; + if (_jitc->function->assume_frame) + return; /* Restore any modified callee save registers */ offset = alloca_offset - 140; for (regno = 0; regno < jit_size(gr); regno++, offset += 4) { diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 6774e3622..bf701418d 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -3297,7 +3297,7 @@ _B4(jit_state_t *_jit, jit_word_t _p, assert(!(p & ~0x1L)); assert(!(tp & ~0x7L)); TSTPRED(_p); - inst((d<<37)|(wh<<33)|(x6<<27)|(b<<13)|(p<<12)|(tp<<6)|_p, INST_B); + inst((d<<35)|(wh<<33)|(x6<<27)|(b<<13)|(p<<12)|(tp<<6)|_p, INST_B); } static void @@ -5051,7 +5051,7 @@ _bsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1, static void _jmpr(jit_state_t *_jit, jit_int32_t r0) { - MOV_rn_br(r0, BR_6); + MOV_br_rn(BR_6, r0); BR(BR_6); } @@ -5117,6 +5117,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) { jit_int32_t reg, ruse, rout; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 15) & -16; @@ -5130,11 +5137,15 @@ _prolog(jit_state_t *_jit, jit_node_t *node) ruse = _jitc->rout - GR_32; /* How many out argument registers required? */ - for (reg = _OUT0; reg <= _OUT7; reg++) { - if (!jit_regset_tstbit(&_jitc->function->regset, reg)) - break; + if (!_jitc->function->define_frame) { + for (reg = _OUT0; reg <= _OUT7; reg++) { + if (!jit_regset_tstbit(&_jitc->function->regset, reg)) + break; + } + rout = reg - _OUT0; } - rout = reg - _OUT0; + else + rout = 8; /* Do not know if will call div/mod functions (sqrt* needs one) */ if (rout < 2) @@ -5181,6 +5192,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; if (jit_regset_tstbit(&_jitc->function->regset, JIT_F0)) LDF_FILL(rn(JIT_F0), GR_4); if (jit_regset_tstbit(&_jitc->function->regset, JIT_F1)) { diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index ee2ebcd92..0a9eccd84 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -1256,7 +1256,7 @@ _emit_code(jit_state_t *_jit) undo.prolog_offset = _jitc->prolog.offset; restart_function: _jitc->again = 0; - if (_jitc->jump) { + if (_jitc->jump && !_jitc->function->assume_frame) { /* remember prolog to hide offset adjustment for a jump * to the start of a function, what is expected to be * a common practice as first jit instruction */ diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index ce7e7edda..8976b314b 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -2862,6 +2862,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) { jit_int32_t index; jit_int32_t offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } #if NEW_ABI _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ @@ -2900,6 +2907,8 @@ _epilog(jit_state_t *_jit, jit_node_t *node) { jit_int32_t index; jit_int32_t offset; + if (_jitc->function->assume_frame) + return; /* callee save registers */ movr(_SP_REGNO, _BP_REGNO); offset = stack_framesize - (sizeof(jit_word_t) << 1); diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 1413875e1..ed9306557 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -3072,6 +3072,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) unsigned long regno; jit_word_t offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen + _jitc->function->self.size - _jitc->function->self.aoff) + 15) & -16; @@ -3124,6 +3131,8 @@ _epilog(jit_state_t *_jit, jit_node_t *node) unsigned long regno; jit_word_t offset; + if (_jitc->function->assume_frame) + return; #if __ppc__ LWZ(_SP_REGNO, _SP_REGNO, 0); ldxi(_R0_REGNO, _SP_REGNO, 8); diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 758f52ad9..08da19f51 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1280,7 +1280,7 @@ _emit_code(jit_state_t *_jit) restart_function: _jitc->again = 0; #if __powerpc__ - if (_jitc->jump) { + if (_jitc->jump && !_jitc->function->assume_frame) { /* remember prolog to hide offset adjustment for a jump * to the start of a function, what is expected to be * a common practice as first jit instruction */ diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c index afba9db21..dfa4e56e5 100644 --- a/lib/jit_s390x-cpu.c +++ b/lib/jit_s390x-cpu.c @@ -3256,6 +3256,13 @@ static void _prolog(jit_state_t *_jit, jit_node_t *i0) { jit_int32_t regno, offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3299,6 +3306,8 @@ static void _epilog(jit_state_t *_jit, jit_node_t *i0) { jit_int32_t regno, offset; + if (_jitc->function->assume_frame) + return; for (regno = 0; regno < jit_size(gprs) - 1; regno++) { if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno])) break; diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index c4b5521c2..ba56c6159 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -1620,6 +1620,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } /* align at 16 bytes boundary */ _jitc->function->stack = ((stack_framesize + _jitc->function->self.alen - @@ -1649,6 +1656,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; /* (most) other backends do not save incoming arguments, so, * only save locals here */ if (jit_regset_tstbit(&_jitc->function->regset, _L0)) diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 8a0ce1149..0a5d3302a 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -3326,6 +3326,13 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } #if __WORDSIZE == 64 && __CYGWIN__ _jitc->function->stack = (((/* first 32 bytes must be allocated */ (_jitc->function->self.alen > 32 ? @@ -3406,6 +3413,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; /* callee save registers */ movr(_RSP_REGNO, _RBP_REGNO); #if __WORDSIZE == 32 diff --git a/lib/lightning.c b/lib/lightning.c index 8642cf89a..42c3a45f7 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1920,6 +1920,45 @@ fail: return (NULL); } +void +_jit_frame(jit_state_t *_jit, jit_int32_t frame) +{ + jit_trampoline(frame, 1); +} + +void +_jit_tramp(jit_state_t *_jit, jit_int32_t frame) +{ + jit_trampoline(frame, 0); +} + +void +_jit_trampoline(jit_state_t *_jit, jit_int32_t frame, jit_bool_t prolog) +{ + jit_int32_t regno; + + /* Must be called after prolog, actually, just to simplify + * tests and know there is a current function and that + * _jitc->function->self.aoff is at the before any alloca value */ + assert(_jitc->tail && _jitc->tail->code == jit_code_prolog); + + /* + 24 for 3 possible spilled temporaries (that could be a double) */ + frame += 24; +#if defined(__hppa__) + frame += _jitc->function->self.aoff; +#else + frame -= _jitc->function->self.aoff; +#endif + _jitc->function->frame = frame; + if (prolog) + _jitc->function->define_frame = 1; + else + _jitc->function->assume_frame = 1; + for (regno = 0; regno < _jitc->reglen; regno++) + if (jit_class(_rvs[regno].spec) & jit_class_sav) + jit_regset_setbit(&_jitc->function->regset, regno); +} + /* Compute initial reglive and regmask set values of a basic block. * reglive is the set of known live registers * regmask is the set of registers not referenced in the block