diff --git a/ChangeLog b/ChangeLog index 186cea9ca..50b10b1b7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +2015-02-17 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, + lib/jit_alpha-cpu.c, lib/jit_alpha.c, + lib/jit_arm-cpu.c, lib/jit_arm.c, + lib/jit_hppa-cpu.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips.c, + lib/jit_ppc-cpu.c, lib/jit_ppc.c, + lib/jit_s390-cpu.c, lib/jit_s390.c, + lib/jit_sparc-cpu.c, lib/jit_sparc.c, + lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new + jit_allocar(offs, size) interface, that receives + two integer registers arguments, allocates space + dynamically in the stack, returns the offset in + the first argument, and uses the second argument + for the size in bytes of the memory to be allocated. + + * check/allocar.ok, check/allocar.tst: New files + implementing test cases for the new jit_allocar + interface. + + * check/Makefile.am, check/lightning.c: Update for + the new test case and interface. + + * doc/body.texi: Add documentation of the new + interface. + 2015-02-17 Paulo Andrade * include/lightning/jit_x86.h, lib/jit_x86-cpu.c, diff --git a/check/Makefile.am b/check/Makefile.am index 301e7da04..4844c414d 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -47,6 +47,7 @@ EXTRA_DIST = \ add.tst add.ok \ align.tst align.ok \ allocai.tst allocai.ok \ + allocar.tst allocar.ok \ bp.tst bp.ok \ divi.tst divi.ok \ fib.tst fib.ok \ @@ -106,7 +107,7 @@ EXTRA_DIST = \ base_TESTS = \ 3to2 add align allocai \ - bp divi fib rpn \ + allocar bp divi fib rpn \ ldstr ldsti \ ldstxr ldstxi \ ldstr-c ldstxr-c ldstxi-c \ @@ -133,8 +134,8 @@ if test_x86_x87 #x87_TESTS = $(addsuffix .x87, $(base_TESTS)) x87_TESTS = \ 3to2.x87 add.x87 allocai.x87 \ - bp.x87 divi.x87 fib.x87 rpn.x87 \ - ldstr.x87 ldsti.x87 \ + allocar.x87 bp.x87 divi.x87 fib.x87 \ + rpn.x87 ldstr.x87 ldsti.x87 \ ldstxr.x87 ldstxi.x87 \ ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87 \ cvt.x87 branch.x87 \ @@ -155,8 +156,8 @@ TESTS += $(x87_TESTS) #x87_nodata_TESTS = $(addsuffix .x87.nodata, $(base_TESTS)) x87_nodata_TESTS = \ 3to2.x87.nodata add.x87.nodata allocai.x87.nodata \ - bp.x87.nodata divi.x87.nodata fib.x87.nodata rpn.x87.nodata \ - ldstr.x87.nodata ldsti.x87.nodata \ + allocar.x87.nodata bp.x87.nodata divi.x87.nodata fib.x87.nodata \ + rpn.x87.nodata ldstr.x87.nodata ldsti.x87.nodata \ ldstxr.x87.nodata ldstxi.x87.nodata \ ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata \ cvt.x87.nodata branch.x87.nodata \ @@ -179,8 +180,8 @@ if test_arm_arm #arm_TESTS = $(addsuffix .arm, $(base_TESTS)) arm_TESTS = \ 3to2.arm add.arm align.arm allocai.arm \ - bp.arm divi.arm fib.arm rpn.arm \ - ldstr.arm ldsti.arm \ + allocar.arm bp.arm divi.arm fib.arm \ + rpn.arm ldstr.arm ldsti.arm \ ldstxr.arm ldstxi.arm \ ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \ cvt.arm hton.arm branch.arm \ @@ -204,8 +205,8 @@ if test_arm_swf #swf_TESTS = $(addsuffix .swf, $(base_TESTS)) swf_TESTS = \ 3to2.swf add.swf allocai.swf \ - bp.swf divi.swf fib.swf rpn.swf \ - ldstr.swf ldsti.swf \ + allocar.swf bp.swf divi.swf fib.swf \ + rpn.swf ldstr.swf ldsti.swf \ ldstxr.swf ldstxi.swf \ ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \ cvt.swf hton.swf branch.swf \ @@ -227,8 +228,8 @@ if test_arm_arm #arm_swf_TESTS = $(addsuffix .arm.swf, $(base_TESTS)) arm_swf_TESTS = \ 3to2.arm.swf add.arm.swf allocai.arm.swf \ - bp.arm.swf divi.arm.swf fib.arm.swf rpn.arm.swf \ - ldstr.arm.swf ldsti.arm.swf \ + allocar.arm.swf bp.arm.swf divi.arm.swf fib.arm.swf \ + rpn.arm.swf ldstr.arm.swf ldsti.arm.swf \ ldstxr.arm.swf ldstxi.arm.swf \ ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf \ cvt.arm.swf hton.arm.swf branch.arm.swf \ @@ -251,8 +252,8 @@ if test_arm_arm #arm4_swf_TESTS = $(addsuffix .arm4.swf, $(base_TESTS)) arm4_swf_TESTS = \ 3to2.arm4.swf add.arm4.swf allocai.arm4.swf \ - bp.arm4.swf divi.arm4.swf fib.arm4.swf rpn.arm4.swf \ - ldstr.arm4.swf ldsti.arm4.swf \ + allocar.arm4.swf bp.arm4.swf divi.arm4.swf fib.arm4.swf \ + rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf \ ldstxr.arm4.swf ldstxi.arm4.swf \ ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf \ cvt.arm4.swf hton.arm4.swf branch.arm4.swf \ @@ -277,8 +278,8 @@ if test_nodata #nodata_TESTS = $(addsuffix .nodata, $(base_TESTS)) nodata_TESTS = \ 3to2.nodata add.nodata allocai.nodata \ - bp.nodata divi.nodata fib.nodata rpn.nodata \ - ldstr.nodata ldsti.nodata \ + allocar.nodata bp.nodata divi.nodata fib.nodata \ + rpn.nodata ldstr.nodata ldsti.nodata \ ldstxr.nodata ldstxi.nodata \ ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata \ cvt.nodata branch.nodata \ diff --git a/check/allocar.ok b/check/allocar.ok new file mode 100644 index 000000000..516b1e77f --- /dev/null +++ b/check/allocar.ok @@ -0,0 +1,4 @@ +1 2 3 +3 4 5 +5 6 7 +7 8 9 diff --git a/check/allocar.tst b/check/allocar.tst new file mode 100644 index 000000000..4870e61cd --- /dev/null +++ b/check/allocar.tst @@ -0,0 +1,402 @@ +#define szof_c 1 +#define szof_uc szof_c +#define szof_s 2 +#define szof_us szof_s +#define szof_i 4 +#if __WORDSIZE == 64 +# define szof_ui szof_i +# define szof_l 8 +#endif +#define szof_f 4 +#define szof_d 8 + +#define FILL(T) \ + name fill##T \ +fill##T: \ + prolog \ + arg $argp \ + getarg %v0 $argp \ + arg $argi \ + getarg %r0 $argi \ + muli %r0 %r0 szof##T \ + addr %v1 %v0 %r0 \ + movi %r0 0 \ +fill##T##loop: \ + bger fill##T##done %v0 %v1 \ + str##T %v0 %r0 \ + addi %r0 %r0 1 \ + addi %v0 %v0 szof##T \ + jmpi fill##T##loop \ +fill##T##done: \ + ret \ + epilog +#define FILLF(T) \ + name fill##T \ +fill##T: \ + prolog \ + arg $argp \ + getarg %v0 $argp \ + arg $argi \ + getarg %r0 $argi \ + muli %r0 %r0 szof##T \ + addr %v1 %v0 %r0 \ + movi##T %f0 0.0 \ +fill##T##loop: \ + bger fill##T##done %v0 %v1 \ + str##T %v0 %f0 \ + addi##T %f0 %f0 1.0 \ + addi %v0 %v0 szof##T \ + jmpi fill##T##loop \ +fill##T##done: \ + ret \ + epilog + +#define fill_uc fill_c +#define fill_us fill_s +#define fill_ui fill_i + +#define ARG( T, N) arg $arg##T##N +#define ARGF( T, N) arg##T $arg##T##N +#define ARG1( K, T) ARG##K(T, 0) +#define ARG2( K, T) ARG1( K, T) ARG##K(T, 1) +#define ARG3( K, T) ARG2( K, T) ARG##K(T, 2) +#define ARG4( K, T) ARG3( K, T) ARG##K(T, 3) +#define ARG5( K, T) ARG4( K, T) ARG##K(T, 4) +#define ARG6( K, T) ARG5( K, T) ARG##K(T, 5) +#define ARG7( K, T) ARG6( K, T) ARG##K(T, 6) +#define ARG8( K, T) ARG7( K, T) ARG##K(T, 7) +#define ARG9( K, T) ARG8( K, T) ARG##K(T, 8) +#define ARG10(K, T) ARG9( K, T) ARG##K(T, 9) +#define ARG11(K, T) ARG10(K, T) ARG##K(T, 10) +#define ARG12(K, T) ARG11(K, T) ARG##K(T, 11) +#define ARG13(K, T) ARG12(K, T) ARG##K(T, 12) +#define ARG14(K, T) ARG13(K, T) ARG##K(T, 13) +#define ARG15(K, T) ARG14(K, T) ARG##K(T, 14) +#define ARG16(K, T) ARG15(K, T) ARG##K(T, 15) +#define ARG_c(N) ARG##N( , _c) +#define ARG_uc(N) ARG##N( , _uc) +#define ARG_s(N) ARG##N( , _s) +#define ARG_us(N) ARG##N( , _us) +#define ARG_i(N) ARG##N( , _i) +#define ARG_ui(N) ARG##N( , _ui) +#define ARG_l(N) ARG##N( , _l) +#define ARG_f(N) ARG##N(F, _f) +#define ARG_d(N) ARG##N(F, _d) + +#define CHK(N, T, V) \ + getarg %r0 $arg##T##V \ + ldxi##T %r1 %v0 $(V * szof##T) \ + beqr N##T##V %r0 %r1 \ + calli @abort \ +N##T##V: +#define CHKF(N, T, V) \ + getarg##T %f0 $arg##T##V \ + ldxi##T %f1 %v0 $(V * szof##T) \ + beqr##T N##T##V %f0 %f1 \ + calli @abort \ +N##T##V: + +#define GET1( K, N, T, V) CHK##K(N, T, 0) +#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1) +#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2) +#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3) +#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4) +#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5) +#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6) +#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7) +#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8) +#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9) +#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10) +#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11) +#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12) +#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13) +#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14) +#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15) + +#define GET_c(N, M) GET##N( , c##N, _c, M) +#define GET_uc(N, M) GET##N( , uc##N, _uc, M) +#define GET_s(N, M) GET##N( , s##N, _s, M) +#define GET_us(N, M) GET##N( , us##N, _us, M) +#define GET_i(N, M) GET##N( , i##N, _i, M) +#define GET_ui(N, M) GET##N( , ui##N, _ui, M) +#define GET_l(N, M) GET##N( , l##N, _l, M) +#define GET_f(N, M) GET##N(F, f##N, _f, M) +#define GET_d(N, M) GET##N(F, d##N, _d, M) + +#define PUSH( T, V) pushargi V +#define PUSHF( T, V) pushargi##T V +#define PUSH0( K, T) /**/ +#define PUSH1( K, T) PUSH##K(T, 0) +#define PUSH2( K, T) PUSH1( K, T) PUSH##K(T, 1) +#define PUSH3( K, T) PUSH2( K, T) PUSH##K(T, 2) +#define PUSH4( K, T) PUSH3( K, T) PUSH##K(T, 3) +#define PUSH5( K, T) PUSH4( K, T) PUSH##K(T, 4) +#define PUSH6( K, T) PUSH5( K, T) PUSH##K(T, 5) +#define PUSH7( K, T) PUSH6( K, T) PUSH##K(T, 6) +#define PUSH8( K, T) PUSH7( K, T) PUSH##K(T, 7) +#define PUSH9( K, T) PUSH8( K, T) PUSH##K(T, 8) +#define PUSH10(K, T) PUSH9( K, T) PUSH##K(T, 9) +#define PUSH11(K, T) PUSH10(K, T) PUSH##K(T, 10) +#define PUSH12(K, T) PUSH11(K, T) PUSH##K(T, 11) +#define PUSH13(K, T) PUSH12(K, T) PUSH##K(T, 12) +#define PUSH14(K, T) PUSH13(K, T) PUSH##K(T, 13) +#define PUSH15(K, T) PUSH14(K, T) PUSH##K(T, 14) +#define PUSH16(K, T) PUSH15(K, T) PUSH##K(T, 15) + +#define PUSH_c( N) PUSH##N( , _c) +#define PUSH_uc(N) PUSH##N( , _uc) +#define PUSH_s( N) PUSH##N( , _s) +#define PUSH_us(N) PUSH##N( , _us) +#define PUSH_i( N) PUSH##N( , _i) +#define PUSH_ui(N) PUSH##N( , _ui) +#define PUSH_l( N) PUSH##N( , _l) +#define PUSH_f( N) PUSH##N(F, _f) +#define PUSH_d( N) PUSH##N(F, _d) + +/* bottom function */ +#define DEF0(T) \ + name test##T##_0 \ +test##T##_0: \ + prolog \ + ret \ + epilog + +#define DEFN(N, M, T) \ + name test##T##_##N \ +test##T##_##N: \ + prolog \ + arg $argp \ + /* stack buffer in %v0 */ \ + getarg %v0 $argp \ + ARG##T(N) \ + /* validate arguments */ \ + GET##T(N, M) \ + /* heap buffer in %v1 */ \ + prepare \ + pushargi $(N * szof##T) \ + finishi @malloc \ + retval %v1 \ + /* copy stack bufer to heap buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v0 \ + pushargi $(N * szof##T) \ + finishi MEMCPY \ + /* stack buffer for next function in %v2 */ \ + movi %r0 $(M * szof##T) \ + allocar %v2 %r0 \ + addr %v2 %v2 %fp \ + /* fill stack buffer for next function */ \ + prepare \ + pushargr %v2 \ + pushargi M \ + finishi fill##T \ + /* call next function */ \ + prepare \ + pushargr %v2 \ + PUSH##T(M) \ + finishi test##T##_##M \ + /* validate stack buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v0 \ + pushargi $(N * szof##T) \ + finishi @memcmp \ + retval %r0 \ + beqi test##T##_##N##_done %r0 0 \ + calli @abort \ +test##T##_##N##_done: \ + /* release heap bufer */ \ + prepare \ + pushargr %v1 \ + finishi @free \ + ret \ + epilog + +/* top function */ +#define DEFX(T) \ + name test##T##_17 \ +test##T##_17: \ + prolog \ + /* heap buffer in %v1 */ \ + prepare \ + pushargi $(16 * szof##T) \ + finishi @malloc \ + retval %v1 \ + /* stack buffer for next function in %v2 */ \ + movi %r0 $(16 * szof##T) \ + allocar %v2 %r0 \ + addr %v2 %v2 %fp \ + /* fill stack buffer for next function */ \ + prepare \ + pushargr %v2 \ + pushargi 16 \ + finishi fill##T \ + /* copy stack buffer to heap buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v2 \ + pushargi $(16 * szof##T) \ + finishi MEMCPY \ + /* call next function */ \ + prepare \ + pushargr %v2 \ + PUSH##T(16) \ + finishi test##T##_16 \ + /* validate stack buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v2 \ + pushargi $(16 * szof##T) \ + finishi @memcmp \ + retval %r0 \ + beqi test##T##_17_done %r0 0 \ + calli @abort \ +test##T##_17_done: \ + /* release heap bufer */ \ + prepare \ + pushargr %v1 \ + finishi @free \ + ret \ + epilog + +#define DEF( T) \ + DEF0( T) \ + DEFN( 1, 0, T) \ + DEFN( 2, 1, T) \ + DEFN( 3, 2, T) \ + DEFN( 4, 3, T) \ + DEFN( 5, 4, T) \ + DEFN( 6, 5, T) \ + DEFN( 7, 6, T) \ + DEFN( 8, 7, T) \ + DEFN( 9, 8, T) \ + DEFN(10, 9, T) \ + DEFN(11, 10, T) \ + DEFN(12, 11, T) \ + DEFN(13, 12, T) \ + DEFN(14, 13, T) \ + DEFN(15, 14, T) \ + DEFN(16, 15, T) \ + DEFX(T) + +#define CALL(T) calli test##T##_17 + +.data 16 +fmt: +.c "%d %d %d\n" +.code + jmpi main + +#if _AIX +# define MEMCPY memcpy +/* error: Function not implemented (memcpy) */ + name memcpy +memcpy: + prolog + arg $dst + arg $src + arg $len + getarg %r0 $dst + getarg %r1 $src + getarg %r2 $len + movr %v1 %r0 + blti memcpy_done %r2 1 +memcpy_loop: + subi %r2 %r2 1 + ldxr_c %v0 %r1 %r2 + stxr_c %r2 %r0 %v0 + bgti memcpy_loop %r2 0 +memcpy_done: + retr %v1 + epilog +#else +# define MEMCPY @memcpy +#endif + + FILL(_c) + FILL(_s) + FILL(_i) +#if __WORDSIZE == 64 + FILL(_l) +#endif + FILLF(_f) + FILLF(_d) + + DEF(_c) + DEF(_uc) + DEF(_s) + DEF(_us) + DEF(_i) +#if __WORDSIZE == 64 + DEF(_ui) + DEF(_l) +#endif + DEF(_f) + DEF(_d) + + name main +main: + prolog + + CALL(_c) + CALL(_uc) + CALL(_s) + CALL(_us) + CALL(_i) +#if __WORDSIZE == 64 + CALL(_ui) + CALL(_l) +#endif + CALL(_f) + CALL(_d) + + // loop control + movi %v2 1 + + // loop a few times calling allocar +loop: + // allocate 12 bytes + movi %r0 12 + allocar %v0 %r0 + + // offset + movr %v1 %v0 + + // 1 + stxr_i %v1 %fp %v2 + + // 2 + addi %v2 %v2 1 + addi %v1 %v1 4 + stxr_i %v1 %fp %v2 + + // 3 + addi %v2 %v2 1 + addi %v1 %v1 4 + stxr_i %v1 %fp %v2 + + // reload + movr %v1 %v0 + + // 1 + ldxr_i %r0 %fp %v1 + + // 2 + addi %v1 %v1 4 + ldxr_i %r1 %fp %v1 + + // 3 + addi %v1 %v1 4 + ldxr_i %r2 %fp %v1 + + prepare + pushargi fmt + pushargr %r0 + pushargr %r1 + pushargr %r2 + finishi @printf + blti loop %v2 9 + + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index f1d2c79c8..7dd88fbc8 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -271,7 +271,7 @@ static void align(void); static void name(void); static void prolog(void); static void frame(void); static void tramp(void); static void ellipsis(void); -static void allocai(void); +static void allocai(void); static void allocar(void); static void arg(void); static void getarg_c(void); static void getarg_uc(void); static void getarg_s(void); static void getarg_us(void); @@ -580,7 +580,7 @@ static instr_t instr_vector[] = { entry(prolog), entry(frame), entry(tramp), entry(ellipsis), - entry(allocai), + entry(allocai), entry(allocar), entry(arg), entry(getarg_c), entry(getarg_uc), entry(getarg_s), entry(getarg_us), @@ -1371,6 +1371,7 @@ allocai(void) { symbol->type = type_l; symbol->value.i = i; } +entry_ir_ir(allocar) entry_ca(arg) entry_ia(getarg_c) entry_ia(getarg_uc) entry_ia(getarg_s) entry_ia(getarg_us) diff --git a/doc/body.texi b/doc/body.texi index 9b8931de4..23b8b8fbc 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -175,7 +175,8 @@ operation, there is a the @code{_u} modifier. There are at least seven integer registers, of which six are general-purpose, while the last is used to contain the frame pointer (@code{FP}). The frame pointer can be used to allocate and access local -variables on the stack, using the @code{allocai} instruction. +variables on the stack, using the @code{allocai} or @code{allocar} +instruction. Of the general-purpose registers, at least three are guaranteed to be preserved across function calls (@code{V0}, @code{V1} and @@ -540,16 +541,16 @@ bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow} @end example @item Jump and return operations -These accept one argument except @code{ret} which has none; the -difference between @code{finishi} and @code{calli} is that the -latter does not clean the stack from pushed parameters (if any) -and the former must @strong{always} follow a @code{prepare} +These accept one argument except @code{ret} and @code{jmpi} which +have none; the difference between @code{finishi} and @code{calli} +is that the latter does not clean the stack from pushed parameters +(if any) and the former must @strong{always} follow a @code{prepare} instruction. @example -callr (not specified) @r{function call to a register} -calli (not specified) @r{function call to O1} -finishr (not specified) @r{function call to a register} -finishi (not specified) @r{function call to O1} +callr (not specified) @r{function call to register O1} +calli (not specified) @r{function call to immediate O1} +finishr (not specified) @r{function call to register O1} +finishi (not specified) @r{function call to immediate O1} jmpr (not specified) @r{unconditional jump to register} jmpi (not specified) @r{unconditional jump} ret (not specified) @r{return from subroutine} @@ -627,17 +628,32 @@ an "unbound" label. These macros are used to set up a function prolog. The @code{allocai} call accept a single integer argument and returns an offset value -for stack storage access. +for stack storage access. The @code{allocar} accepts two registers +arguments, the first is set to the offset for stack access, and the +second is the size in bytes argument. @example prolog (not specified) @r{function prolog} allocai (not specified) @r{reserve space on the stack} +allocar (not specified) @r{allocate space on the stack} @end example @code{allocai} receives the number of bytes to allocate and returns the offset from the frame pointer register @code{FP} to the base of the area. +@code{allocar} receives two register arguments. The first is where +to store the offset from the frame pointer register @code{FP} to the +base of the area. The second argument is the size in bytes. Note +that @code{allocar} is dynamic allocation, and special attention +should be taken when using it. If called in a loop, every iteration +will allocate stack space. Stack space is aligned from 8 to 64 bytes +depending on backend requirements, even if allocating only one byte. +It is advisable to not use it with @code{frame} and @code{tramp}; it +should work with @code{frame} with special care to call only once, +but is not supported if used in @code{tramp}, even if called only +once. + As a small appetizer, here is a small function that adds 1 to the input parameter (an @code{int}). I'm using an assembly-like syntax here which is a bit different from the one used when writing real subroutines with diff --git a/include/lightning.h b/include/lightning.h index 0379a2f49..a0204fbe4 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -193,6 +193,7 @@ typedef enum { #define jit_ellipsis() _jit_ellipsis(_jit) #define jit_allocai(u) _jit_allocai(_jit,u) +#define jit_allocar(u, v) _jit_allocar(_jit,u,v) #define jit_arg() _jit_arg(_jit) #define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v) @@ -890,6 +891,7 @@ extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*); extern void _jit_prolog(jit_state_t*); extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t); +extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t); extern void _jit_ellipsis(jit_state_t*); extern jit_node_t *_jit_arg(jit_state_t*); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index b88463b26..5c754c3f6 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -363,6 +363,11 @@ struct jit_function { jit_int32_t frame; jit_uint32_t define_frame : 1; jit_uint32_t assume_frame : 1; + + /* alloca offset offset */ + jit_int32_t aoffoff; + /* uses allocar flag */ + jit_uint32_t allocar : 1; }; /* data used only during jit generation */ diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index f49ee16ea..5b094b773 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -2187,6 +2187,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -2194,6 +2195,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; @@ -2232,6 +2235,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) #undef SPILL if (_jitc->function->stack) subi(SP_REGNO, SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index 411571962..fdd0c8aea 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -173,6 +173,37 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t r0, r1; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + r0 = jit_get_reg(jit_class_gpr); + jit_negr(r0, v); + jit_andi(r0, r0, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, r0); + /* Cannot "addr sp, sp, reg" because in this context "sp" is "[w|x]zr", + * the zero register */ +#if 0 + jit_addr(JIT_SP, JIT_SP, r0); +#else + r1 = jit_get_reg(jit_class_gpr); + /* note that "mov r1, sp" does not work, but the proper encoding + * can be triggered before actually emiting with "add r1, sp, 0" */ + jit_addi(r1, JIT_SP, 0); + jit_addr(r1, r1, r0); + jit_addi(JIT_SP, r1, 0); + jit_unget_reg(r1); +#endif + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(r0); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index a550630f9..4680481e8 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -2567,6 +2567,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -2574,6 +2575,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 7) & -8; /* ldgp gp, 0(pv) */ @@ -2609,6 +2612,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* alloca */ if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index cfd91cf4e..867d2a762 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -182,6 +182,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index a54b4efb3..2f224daad 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -3745,6 +3745,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -3755,6 +3756,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) } _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3788,6 +3791,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) movr(_FP_REGNO, _SP_REGNO); if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 77466383e..1648cb112 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -272,6 +272,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index efe6220f6..8b0b4389f 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -2655,6 +2655,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = _jitc->function->frame; } + if (_jitc->function->allocar) { + _jitc->function->self.aoff += 63; + _jitc->function->self.aoff &= -64; + } _jitc->function->stack = ((_jitc->function->self.aoff - _jitc->function->self.alen - _jitc->function->self.size) + 63) & -64; @@ -2677,6 +2681,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) if (jit_regset_tstbit(&_jitc->function->regset, fr[regno])) stxi_d(offset, _FP_REGNO, rn(fr[regno])); } + + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno)); + jit_unget_reg(regno); + } } static void diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 3a7913821..d0919056f 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -196,6 +196,24 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (offset); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_addi(reg, v, 63); + jit_andi(reg, reg, -64); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 7a9a97491..5cbd32727 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -5190,6 +5190,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 15) & -16; @@ -5253,6 +5255,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) addi(GR_2, GR_4, 80); STF_SPILL(GR_2, rn(JIT_F5)); } + + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, GR_4, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index 59c915392..8575f256b 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -299,6 +299,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 38141ae17..9ba759d5b 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -2909,6 +2909,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; #if NEW_ABI _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ @@ -2940,6 +2942,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* alloca */ if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + index = jit_get_reg(jit_class_gpr); + movi(rn(index), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index)); + jit_unget_reg(index); + } } static void diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 6ee35fbca..9ae2d96a9 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -212,6 +212,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index ae4d60360..13078141a 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -3225,6 +3225,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) { + _jitc->function->self.aoff -= 2 * sizeof(jit_word_t); + _jitc->function->self.aoff &= -16; + } _jitc->function->stack = ((_jitc->function->self.alen + _jitc->function->self.size - _jitc->function->self.aoff) + 15) & -16; @@ -3269,6 +3273,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) #else STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack); #endif + + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno)); + jit_unget_reg(regno); + } } static void @@ -3295,7 +3306,10 @@ _epilog(jit_state_t *_jit, jit_node_t *node) } #else /* __powerpc__ */ - addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) + ldr(_SP_REGNO, _SP_REGNO); + else + addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); ldxi(_R0_REGNO, _SP_REGNO, sizeof(void*) * 2); offset = -gpr_save_area; for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) { diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 3abaf7557..d58ada51c 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -192,6 +192,29 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t r0, r1; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + r0 = jit_get_reg(jit_class_gpr); + r1 = jit_get_reg(jit_class_gpr); + jit_ldr(r0, JIT_SP); + jit_negr(r1, v); + jit_andi(r1, r1, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, r1); + jit_addr(JIT_SP, JIT_SP, r1); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_str(JIT_SP, r0); + jit_unget_reg(r1); + jit_unget_reg(r0); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 0d13d706c..556ed18ec 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -3498,6 +3498,8 @@ _prolog(jit_state_t *_jit, jit_node_t *i0) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3550,6 +3552,12 @@ _prolog(jit_state_t *_jit, jit_node_t *i0) #undef SPILL movr(_R13_REGNO, _R15_REGNO); subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack); + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _R13_REGNO, rn(regno)); + jit_unget_reg(regno); + } } static void diff --git a/lib/jit_s390.c b/lib/jit_s390.c index c811acedf..560e93d12 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -159,6 +159,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 2a9e2a219..236ffd10c 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -1631,6 +1631,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -1638,6 +1639,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; /* align at 16 bytes boundary */ _jitc->function->stack = ((stack_framesize + _jitc->function->self.alen - @@ -1662,6 +1665,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi(24, _SP_REGNO, _L6_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) stxi(28, _SP_REGNO, _L7_REGNO); + + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index a0ebd940c..9c301a6bd 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -155,6 +155,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(_SP, _SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index a26f32ade..c6d80dfec 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -3437,6 +3437,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -3444,6 +3445,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; #if __X64 && __CYGWIN__ _jitc->function->stack = (((/* first 32 bytes must be allocated */ (_jitc->function->self.alen > 32 ? @@ -3519,6 +3522,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* alloca */ subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 7b43bd3c5..8e9021308 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -398,6 +398,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) {