diff --git a/ChangeLog b/ChangeLog index 578003452..1274690e3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,47 @@ +2014-10-12 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_aarch64-cpu.c, lib/jit_alpha-cpu.c, lib/jit_arm-cpu.c, + lib/jit_hppa-cpu.c, lib/jit_ia64-cpu.c, lib/jit_mips-cpu.c, + lib/jit_ppc-cpu.c, lib/jit_s390x-cpu.c, lib/jit_sparc-cpu.c, + lib/jit_x86-cpu.c, lib/lightning.c: Implement the new + jit_frame and jit_tramp interfaces, that allow writing + trampoline like calls, where a single dispatcher jit buffer + is written, and later other jit buffers are created, with + the same stack frame layout as the dispatcher. This is the + logic that GNU Smalltalk used in lightning 1.x, and is required + to make a sane port for lighting 2.x. + + * jit_ia64-cpu.c: Implement support for jit_frame and jit_tramp, + and also correct wrong encoding for B4 instructions, that + implement jmpr, as well as correct reverse logic in _jmpr, + that was moving the branch register to the jump register, + and not vice-versa. + Also, if a stack frame is to be assumed, always assume it may + call a function with up to 8 arguments, regardless of the + hint frame argument. + + * lib/jit_arm.c: Add a new must_align_p() interface to ensure + function prologs are always aligned. This condition was + previously always true, somewhat by accident, but with + jit_tramp it is not guaranteed. + + * jit_ia64-cpu.c: lib/jit_ppc.c: Add minor special handling + required to implement jit_tramp, where a function descriptor + should not be added before a prolog, as jit_tramp means omit + prolog. + + * check/lightning.c: Update test driver for the new interfaces. + + * check/Makefile.am, check/tramp.tst, check/tramp.ok: Add + a simple test and example of the jit_frame and jit_tramp + usage implementing a simple Fibonacci function using a + simulation of an interpreter stack and how it would handle + state in language specific variables. + + * doc/body.texi: Add documentation for jit_frame and + jit_tramp. + 2014-09-29 Paulo Andrade * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, diff --git a/check/Makefile.am b/check/Makefile.am index 9c6c54afb..c65622bd7 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -82,6 +82,7 @@ EXTRA_DIST = \ qalu_mul.tst qalu_mul.ok \ qalu_div.tst qalu_div.ok \ ret.tst ret.ok \ + tramp.tst tramp.ok \ check.sh \ check.x87.sh \ check.arm.sh check.swf.sh \ @@ -107,7 +108,7 @@ base_TESTS = \ clobber carry call \ float jmpr \ qalu_mul qalu_div \ - ret + ret tramp $(base_TESTS): check.sh $(LN_S) $(srcdir)/check.sh $@ @@ -178,7 +179,7 @@ arm_TESTS = \ fop_abs.arm fop_sqrt.arm \ varargs.arm stack.arm \ clobber.arm carry.arm call.arm \ - float.arm jmpr.arm + float.arm jmpr.arm tramp.arm $(arm_TESTS): check.arm.sh $(LN_S) $(srcdir)/check.arm.sh $@ TESTS += $(arm_TESTS) @@ -202,7 +203,7 @@ swf_TESTS = \ fop_abs.swf fop_sqrt.swf \ varargs.swf stack.swf \ clobber.swf carry.swf call.swf \ - float.swf jmpr.arm + float.swf jmpr.swf tramp.swf $(swf_TESTS): check.swf.sh $(LN_S) $(srcdir)/check.swf.sh $@ TESTS += $(swf_TESTS) @@ -226,7 +227,7 @@ nodata_TESTS = \ fop_abs.nodata fop_sqrt.nodata \ varargs.nodata stack.nodata \ clobber.nodata carry.nodata call.nodata \ - float.nodata jmpr.nodata + float.nodata jmpr.nodata tramp.nodata $(nodata_TESTS): check.nodata.sh $(LN_S) $(srcdir)/check.nodata.sh $@ TESTS += $(nodata_TESTS) diff --git a/check/lightning.c b/check/lightning.c index 83b6ab778..60b75bcf0 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -268,7 +268,9 @@ static void make_arg(void *value); static jit_pointer_t get_arg(void); static long get_imm(void); static void name(void); -static void prolog(void); static void ellipsis(void); +static void prolog(void); +static void frame(void); static void tramp(void); +static void ellipsis(void); static void allocai(void); static void arg(void); static void getarg_c(void); static void getarg_uc(void); @@ -566,7 +568,9 @@ static size_t data_offset, data_length; static instr_t instr_vector[] = { #define entry(value) { NULL, #value, value } entry(name), - entry(prolog), entry(ellipsis), + entry(prolog), + entry(frame), entry(tramp), + entry(ellipsis), entry(allocai), entry(arg), entry(getarg_c), entry(getarg_uc), @@ -1320,7 +1324,9 @@ name(void) { (void)identifier(ch); jit_name(parser.string); } -entry(prolog) entry(ellipsis) +entry(prolog) +entry_im(frame) entry_im(tramp) +entry(ellipsis) void allocai(void) { symbol_t *symbol; diff --git a/doc/body.texi b/doc/body.texi index fe1ae829f..28b942416 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -1444,6 +1444,44 @@ Or to only use a data buffer, if required: @rem{...} @end example +@section Shared stack frame layout +Certain jit generation patterns, for example the original GNU Smalltalk +jit generator, uses an approach of a fixed trampoline jit code, and +later generation of code that jumps around, assuming a fixed layout +stack frame. + +To help on this pattern of code generation, @lightning{} provides +the @code{jit_frame} and the @code{jit_tramp} interfaces, to define +or to assume a stack frame. Both @code{jit_frame} or @code{jit_tramp} +must be the first call after @code{jit_prolog}. + +@deftypefun void jit_frame (jit_int32_t @var{frame}) +@var{frame} defines the size in bytes of the current function +stack frame. To calculate its value, a good formula is maximum number +of arguments to any called native function times eight, plus the +sum of the arguments to any call to @code{jit_allocai}. @lightning{} +automatically adjusts this value for any backend specific stack memory +it may need, or any alignment constraint. +To ensure trampoline code is correct, @lightning{} will save all +callee save registers in the prolog and reload in the epilog. +@end deftypefun + +@deftypefun void jit_tramp (jit_int32_t @var{frame}) +@var{frame} must be the same value of the dispatcher defined with the +@code{jit_frame} call. +The only difference of @code{jit_frame} and @code{jit_tramp} is that +@code{jit_tramp} omits generation of a prolog and epilog for the +current function. +Most trampoline based jit generation implements a single dispatch method +and later emit code that knows how to return back to the dispatch routine, +and the later emitted code is called with a non local goto. In such cases, +emitting a native prolog (and epilog) is just a waste of space. +@end deftypefun + +It is a fatal error if more than @var{frame} bytes are required +either in the dispatcher defined with @code{jit_frame} or the +"trampolined" code, defined with @code{jit_tramp}. + @node Acknowledgements @chapter Acknowledgements diff --git a/include/lightning.h b/include/lightning.h index fc9fb9dfa..600174e87 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -907,6 +907,10 @@ extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t); extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*); #define jit_set_data(u,v,w) _jit_set_data(_jit,u,v,w) extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t); +#define jit_frame(u) _jit_frame(_jit,u) +extern void _jit_frame(jit_state_t*, jit_int32_t); +#define jit_tramp(u) _jit_tramp(_jit,u) +extern void _jit_tramp(jit_state_t*, jit_int32_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index b620a1b06..128bd1ad8 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -349,6 +349,13 @@ struct jit_function { jit_int32_t *regoff; jit_regset_t regset; jit_int32_t stack; + + /* Helper for common jit generation pattern, used in GNU Smalltalk + * and possibly others, where a static frame layout is required or + * assumed. */ + jit_int32_t frame; + jit_uint32_t define_frame : 1; + jit_uint32_t assume_frame : 1; }; /* data used only during jit generation */ @@ -559,6 +566,9 @@ _jit_save(jit_state_t*, jit_int32_t); extern void _jit_load(jit_state_t*, jit_int32_t); +#define jit_trampoline(u,v) _jit_trampoline(_jit, u, v) +extern void _jit_trampoline(jit_state_t*, jit_int32_t, jit_bool_t); + #define jit_optimize() _jit_optimize(_jit) extern void _jit_optimize(jit_state_t*); diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index 804fecba1..a2d2ce060 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -2145,6 +2145,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; @@ -2188,6 +2195,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; if (_jitc->function->stack) MOV_XSP(SP_REGNO, FP_REGNO); #define LOAD(L, R, O) \ diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index a43c94ce9..8fe43381d 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -2504,6 +2504,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 7) & -8; /* ldgp gp, 0(pv) */ @@ -2544,6 +2551,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; movr(_SP_REGNO, _FP_REGNO); ldxi(_RA_REGNO, _SP_REGNO, 0); ldxi(_FP_REGNO, _SP_REGNO, 8); diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index 1be40b52b..02edeed21 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -3694,6 +3694,16 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) { + if (jit_thumb_p() && !_jitc->thumb) + _jitc->thumb = _jit->pc.w; + return; + } + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3732,6 +3742,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; addi(_SP_REGNO, _FP_REGNO, 16); if (jit_cpu.abi) VPOP_F64(_D8_REGNO, 8); diff --git a/lib/jit_arm.c b/lib/jit_arm.c index bdafcddd5..7dbf78aa4 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -55,6 +55,8 @@ typedef union _jit_thumb_t { static jit_int32_t _jit_get_reg_pair(jit_state_t*); #define jit_unget_reg_pair(rn) _jit_unget_reg_pair(_jit,rn) static void _jit_unget_reg_pair(jit_state_t*,jit_int32_t); +# define must_align_p(node) _must_align_p(_jit, node) +static jit_bool_t _must_align_p(jit_state_t*,jit_node_t*); #define load_const(uniq,r0,i0) _load_const(_jit,uniq,r0,i0) static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t); #define flush_consts() _flush_consts(_jit) @@ -1042,9 +1044,13 @@ _emit_code(jit_state_t *_jit) jit_regarg_set(node, value); switch (node->code) { case jit_code_note: case jit_code_name: + if (must_align_p(node->next)) + nop(2); node->u.w = _jit->pc.w; break; case jit_code_label: + if (must_align_p(node->next)) + nop(2); /* remember label is defined */ node->flag |= jit_flag_patch; node->u.w = _jit->pc.w; @@ -1665,6 +1671,30 @@ _jit_unget_reg_pair(jit_state_t *_jit, jit_int32_t reg) } } +/* A prolog must be aligned at mod 4 bytes boundary. + * This condition was not being required to be tested by + * accident previously, but with the jit_frame and jit_tramp + * code it is required */ +static jit_bool_t +_must_align_p(jit_state_t *_jit, jit_node_t *node) +{ + if (jit_thumb_p() && (_jit->pc.w & 3)) { + for (; node; node = node->next) { + switch (node->code) { + case jit_code_note: + case jit_code_name: + case jit_code_label: + break; + case jit_code_prolog: + return (1); + default: + return (0); + } + } + } + return (0); +} + static void _load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0) { diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 30de58cc8..479d6cf34 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -2621,6 +2621,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) jit_int32_t regno; jit_word_t offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + /* hppa stack grows up */ + assert(_jitc->function->self.aoff <= _jitc->function->frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = _jitc->function->frame; + } _jitc->function->stack = ((_jitc->function->self.aoff - _jitc->function->self.alen - _jitc->function->self.size) + 63) & -64; @@ -2651,6 +2658,8 @@ _epilog(jit_state_t *_jit, jit_node_t *node) jit_int32_t regno; jit_word_t offset; + if (_jitc->function->assume_frame) + return; /* Restore any modified callee save registers */ offset = alloca_offset - 140; for (regno = 0; regno < jit_size(gr); regno++, offset += 4) { diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 6774e3622..bf701418d 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -3297,7 +3297,7 @@ _B4(jit_state_t *_jit, jit_word_t _p, assert(!(p & ~0x1L)); assert(!(tp & ~0x7L)); TSTPRED(_p); - inst((d<<37)|(wh<<33)|(x6<<27)|(b<<13)|(p<<12)|(tp<<6)|_p, INST_B); + inst((d<<35)|(wh<<33)|(x6<<27)|(b<<13)|(p<<12)|(tp<<6)|_p, INST_B); } static void @@ -5051,7 +5051,7 @@ _bsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1, static void _jmpr(jit_state_t *_jit, jit_int32_t r0) { - MOV_rn_br(r0, BR_6); + MOV_br_rn(BR_6, r0); BR(BR_6); } @@ -5117,6 +5117,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) { jit_int32_t reg, ruse, rout; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 15) & -16; @@ -5130,11 +5137,15 @@ _prolog(jit_state_t *_jit, jit_node_t *node) ruse = _jitc->rout - GR_32; /* How many out argument registers required? */ - for (reg = _OUT0; reg <= _OUT7; reg++) { - if (!jit_regset_tstbit(&_jitc->function->regset, reg)) - break; + if (!_jitc->function->define_frame) { + for (reg = _OUT0; reg <= _OUT7; reg++) { + if (!jit_regset_tstbit(&_jitc->function->regset, reg)) + break; + } + rout = reg - _OUT0; } - rout = reg - _OUT0; + else + rout = 8; /* Do not know if will call div/mod functions (sqrt* needs one) */ if (rout < 2) @@ -5181,6 +5192,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; if (jit_regset_tstbit(&_jitc->function->regset, JIT_F0)) LDF_FILL(rn(JIT_F0), GR_4); if (jit_regset_tstbit(&_jitc->function->regset, JIT_F1)) { diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index ee2ebcd92..0a9eccd84 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -1256,7 +1256,7 @@ _emit_code(jit_state_t *_jit) undo.prolog_offset = _jitc->prolog.offset; restart_function: _jitc->again = 0; - if (_jitc->jump) { + if (_jitc->jump && !_jitc->function->assume_frame) { /* remember prolog to hide offset adjustment for a jump * to the start of a function, what is expected to be * a common practice as first jit instruction */ diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index ce7e7edda..8976b314b 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -2862,6 +2862,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) { jit_int32_t index; jit_int32_t offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } #if NEW_ABI _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ @@ -2900,6 +2907,8 @@ _epilog(jit_state_t *_jit, jit_node_t *node) { jit_int32_t index; jit_int32_t offset; + if (_jitc->function->assume_frame) + return; /* callee save registers */ movr(_SP_REGNO, _BP_REGNO); offset = stack_framesize - (sizeof(jit_word_t) << 1); diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index 1413875e1..ed9306557 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -3072,6 +3072,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) unsigned long regno; jit_word_t offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen + _jitc->function->self.size - _jitc->function->self.aoff) + 15) & -16; @@ -3124,6 +3131,8 @@ _epilog(jit_state_t *_jit, jit_node_t *node) unsigned long regno; jit_word_t offset; + if (_jitc->function->assume_frame) + return; #if __ppc__ LWZ(_SP_REGNO, _SP_REGNO, 0); ldxi(_R0_REGNO, _SP_REGNO, 8); diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 758f52ad9..08da19f51 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -1280,7 +1280,7 @@ _emit_code(jit_state_t *_jit) restart_function: _jitc->again = 0; #if __powerpc__ - if (_jitc->jump) { + if (_jitc->jump && !_jitc->function->assume_frame) { /* remember prolog to hide offset adjustment for a jump * to the start of a function, what is expected to be * a common practice as first jit instruction */ diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c index afba9db21..dfa4e56e5 100644 --- a/lib/jit_s390x-cpu.c +++ b/lib/jit_s390x-cpu.c @@ -3256,6 +3256,13 @@ static void _prolog(jit_state_t *_jit, jit_node_t *i0) { jit_int32_t regno, offset; + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3299,6 +3306,8 @@ static void _epilog(jit_state_t *_jit, jit_node_t *i0) { jit_int32_t regno, offset; + if (_jitc->function->assume_frame) + return; for (regno = 0; regno < jit_size(gprs) - 1; regno++) { if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno])) break; diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index c4b5521c2..ba56c6159 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -1620,6 +1620,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } /* align at 16 bytes boundary */ _jitc->function->stack = ((stack_framesize + _jitc->function->self.alen - @@ -1649,6 +1656,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; /* (most) other backends do not save incoming arguments, so, * only save locals here */ if (jit_regset_tstbit(&_jitc->function->regset, _L0)) diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 8a0ce1149..0a5d3302a 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -3326,6 +3326,13 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->define_frame || _jitc->function->assume_frame) { + jit_int32_t frame = -_jitc->function->frame; + assert(_jitc->function->self.aoff >= frame); + if (_jitc->function->assume_frame) + return; + _jitc->function->self.aoff = frame; + } #if __WORDSIZE == 64 && __CYGWIN__ _jitc->function->stack = (((/* first 32 bytes must be allocated */ (_jitc->function->self.alen > 32 ? @@ -3406,6 +3413,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + if (_jitc->function->assume_frame) + return; /* callee save registers */ movr(_RSP_REGNO, _RBP_REGNO); #if __WORDSIZE == 32 diff --git a/lib/lightning.c b/lib/lightning.c index 8642cf89a..42c3a45f7 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1920,6 +1920,45 @@ fail: return (NULL); } +void +_jit_frame(jit_state_t *_jit, jit_int32_t frame) +{ + jit_trampoline(frame, 1); +} + +void +_jit_tramp(jit_state_t *_jit, jit_int32_t frame) +{ + jit_trampoline(frame, 0); +} + +void +_jit_trampoline(jit_state_t *_jit, jit_int32_t frame, jit_bool_t prolog) +{ + jit_int32_t regno; + + /* Must be called after prolog, actually, just to simplify + * tests and know there is a current function and that + * _jitc->function->self.aoff is at the before any alloca value */ + assert(_jitc->tail && _jitc->tail->code == jit_code_prolog); + + /* + 24 for 3 possible spilled temporaries (that could be a double) */ + frame += 24; +#if defined(__hppa__) + frame += _jitc->function->self.aoff; +#else + frame -= _jitc->function->self.aoff; +#endif + _jitc->function->frame = frame; + if (prolog) + _jitc->function->define_frame = 1; + else + _jitc->function->assume_frame = 1; + for (regno = 0; regno < _jitc->reglen; regno++) + if (jit_class(_rvs[regno].spec) & jit_class_sav) + jit_regset_setbit(&_jitc->function->regset, regno); +} + /* Compute initial reglive and regmask set values of a basic block. * reglive is the set of known live registers * regmask is the set of registers not referenced in the block