From ad589fbb0a308f86f7e1e2de815e05d6b39594bb Mon Sep 17 00:00:00 2001 From: pcpa Date: Tue, 17 Feb 2015 14:37:57 -0200 Subject: [PATCH] Implement jit_allocar for dynamic stack allocation * include/lightning.h, include/lightning/jit_private.h, lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, lib/jit_alpha-cpu.c, lib/jit_alpha.c, lib/jit_arm-cpu.c, lib/jit_arm.c, lib/jit_hppa-cpu.c, lib/jit_hppa.c, lib/jit_ia64-cpu.c, lib/jit_ia64.c, lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_ppc-cpu.c, lib/jit_ppc.c, lib/jit_s390-cpu.c, lib/jit_s390.c, lib/jit_sparc-cpu.c, lib/jit_sparc.c, lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new jit_allocar(offs, size) interface, that receives two integer registers arguments, allocates space dynamically in the stack, returns the offset in the first argument, and uses the second argument for the size in bytes of the memory to be allocated. * check/allocar.ok, check/allocar.tst: New files implementing test cases for the new jit_allocar interface. * check/Makefile.am, check/lightning.c: Update for the new test case and interface. * doc/body.texi: Add documentation of the new interface. --- ChangeLog | 29 +++ check/Makefile.am | 31 +-- check/allocar.ok | 4 + check/allocar.tst | 402 ++++++++++++++++++++++++++++++++ check/lightning.c | 5 +- doc/body.texi | 36 ++- include/lightning.h | 2 + include/lightning/jit_private.h | 5 + lib/jit_aarch64-cpu.c | 9 + lib/jit_aarch64.c | 31 +++ lib/jit_alpha-cpu.c | 9 + lib/jit_alpha.c | 22 ++ lib/jit_arm-cpu.c | 9 + lib/jit_arm.c | 22 ++ lib/jit_hppa-cpu.c | 11 + lib/jit_hppa.c | 18 ++ lib/jit_ia64-cpu.c | 9 + lib/jit_ia64.c | 22 ++ lib/jit_mips-cpu.c | 8 + lib/jit_mips.c | 19 ++ lib/jit_ppc-cpu.c | 16 +- lib/jit_ppc.c | 23 ++ lib/jit_s390-cpu.c | 8 + lib/jit_s390.c | 19 ++ lib/jit_sparc-cpu.c | 10 + lib/jit_sparc.c | 22 ++ lib/jit_x86-cpu.c | 9 + lib/jit_x86.c | 19 ++ 28 files changed, 801 insertions(+), 28 deletions(-) create mode 100644 check/allocar.ok create mode 100644 check/allocar.tst diff --git a/ChangeLog b/ChangeLog index 186cea9ca..50b10b1b7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +2015-02-17 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_aarch64-cpu.c, lib/jit_aarch64.c, + lib/jit_alpha-cpu.c, lib/jit_alpha.c, + lib/jit_arm-cpu.c, lib/jit_arm.c, + lib/jit_hppa-cpu.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips.c, + lib/jit_ppc-cpu.c, lib/jit_ppc.c, + lib/jit_s390-cpu.c, lib/jit_s390.c, + lib/jit_sparc-cpu.c, lib/jit_sparc.c, + lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new + jit_allocar(offs, size) interface, that receives + two integer registers arguments, allocates space + dynamically in the stack, returns the offset in + the first argument, and uses the second argument + for the size in bytes of the memory to be allocated. + + * check/allocar.ok, check/allocar.tst: New files + implementing test cases for the new jit_allocar + interface. + + * check/Makefile.am, check/lightning.c: Update for + the new test case and interface. + + * doc/body.texi: Add documentation of the new + interface. + 2015-02-17 Paulo Andrade * include/lightning/jit_x86.h, lib/jit_x86-cpu.c, diff --git a/check/Makefile.am b/check/Makefile.am index 301e7da04..4844c414d 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -47,6 +47,7 @@ EXTRA_DIST = \ add.tst add.ok \ align.tst align.ok \ allocai.tst allocai.ok \ + allocar.tst allocar.ok \ bp.tst bp.ok \ divi.tst divi.ok \ fib.tst fib.ok \ @@ -106,7 +107,7 @@ EXTRA_DIST = \ base_TESTS = \ 3to2 add align allocai \ - bp divi fib rpn \ + allocar bp divi fib rpn \ ldstr ldsti \ ldstxr ldstxi \ ldstr-c ldstxr-c ldstxi-c \ @@ -133,8 +134,8 @@ if test_x86_x87 #x87_TESTS = $(addsuffix .x87, $(base_TESTS)) x87_TESTS = \ 3to2.x87 add.x87 allocai.x87 \ - bp.x87 divi.x87 fib.x87 rpn.x87 \ - ldstr.x87 ldsti.x87 \ + allocar.x87 bp.x87 divi.x87 fib.x87 \ + rpn.x87 ldstr.x87 ldsti.x87 \ ldstxr.x87 ldstxi.x87 \ ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87 \ cvt.x87 branch.x87 \ @@ -155,8 +156,8 @@ TESTS += $(x87_TESTS) #x87_nodata_TESTS = $(addsuffix .x87.nodata, $(base_TESTS)) x87_nodata_TESTS = \ 3to2.x87.nodata add.x87.nodata allocai.x87.nodata \ - bp.x87.nodata divi.x87.nodata fib.x87.nodata rpn.x87.nodata \ - ldstr.x87.nodata ldsti.x87.nodata \ + allocar.x87.nodata bp.x87.nodata divi.x87.nodata fib.x87.nodata \ + rpn.x87.nodata ldstr.x87.nodata ldsti.x87.nodata \ ldstxr.x87.nodata ldstxi.x87.nodata \ ldstr-c.x87.nodata ldstxr-c.x87.nodata ldstxi-c.x87.nodata \ cvt.x87.nodata branch.x87.nodata \ @@ -179,8 +180,8 @@ if test_arm_arm #arm_TESTS = $(addsuffix .arm, $(base_TESTS)) arm_TESTS = \ 3to2.arm add.arm align.arm allocai.arm \ - bp.arm divi.arm fib.arm rpn.arm \ - ldstr.arm ldsti.arm \ + allocar.arm bp.arm divi.arm fib.arm \ + rpn.arm ldstr.arm ldsti.arm \ ldstxr.arm ldstxi.arm \ ldstr-c.arm ldstxr-c.arm ldstxi-c.arm \ cvt.arm hton.arm branch.arm \ @@ -204,8 +205,8 @@ if test_arm_swf #swf_TESTS = $(addsuffix .swf, $(base_TESTS)) swf_TESTS = \ 3to2.swf add.swf allocai.swf \ - bp.swf divi.swf fib.swf rpn.swf \ - ldstr.swf ldsti.swf \ + allocar.swf bp.swf divi.swf fib.swf \ + rpn.swf ldstr.swf ldsti.swf \ ldstxr.swf ldstxi.swf \ ldstr-c.swf ldstxr-c.swf ldstxi-c.swf \ cvt.swf hton.swf branch.swf \ @@ -227,8 +228,8 @@ if test_arm_arm #arm_swf_TESTS = $(addsuffix .arm.swf, $(base_TESTS)) arm_swf_TESTS = \ 3to2.arm.swf add.arm.swf allocai.arm.swf \ - bp.arm.swf divi.arm.swf fib.arm.swf rpn.arm.swf \ - ldstr.arm.swf ldsti.arm.swf \ + allocar.arm.swf bp.arm.swf divi.arm.swf fib.arm.swf \ + rpn.arm.swf ldstr.arm.swf ldsti.arm.swf \ ldstxr.arm.swf ldstxi.arm.swf \ ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf \ cvt.arm.swf hton.arm.swf branch.arm.swf \ @@ -251,8 +252,8 @@ if test_arm_arm #arm4_swf_TESTS = $(addsuffix .arm4.swf, $(base_TESTS)) arm4_swf_TESTS = \ 3to2.arm4.swf add.arm4.swf allocai.arm4.swf \ - bp.arm4.swf divi.arm4.swf fib.arm4.swf rpn.arm4.swf \ - ldstr.arm4.swf ldsti.arm4.swf \ + allocar.arm4.swf bp.arm4.swf divi.arm4.swf fib.arm4.swf \ + rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf \ ldstxr.arm4.swf ldstxi.arm4.swf \ ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf \ cvt.arm4.swf hton.arm4.swf branch.arm4.swf \ @@ -277,8 +278,8 @@ if test_nodata #nodata_TESTS = $(addsuffix .nodata, $(base_TESTS)) nodata_TESTS = \ 3to2.nodata add.nodata allocai.nodata \ - bp.nodata divi.nodata fib.nodata rpn.nodata \ - ldstr.nodata ldsti.nodata \ + allocar.nodata bp.nodata divi.nodata fib.nodata \ + rpn.nodata ldstr.nodata ldsti.nodata \ ldstxr.nodata ldstxi.nodata \ ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata \ cvt.nodata branch.nodata \ diff --git a/check/allocar.ok b/check/allocar.ok new file mode 100644 index 000000000..516b1e77f --- /dev/null +++ b/check/allocar.ok @@ -0,0 +1,4 @@ +1 2 3 +3 4 5 +5 6 7 +7 8 9 diff --git a/check/allocar.tst b/check/allocar.tst new file mode 100644 index 000000000..4870e61cd --- /dev/null +++ b/check/allocar.tst @@ -0,0 +1,402 @@ +#define szof_c 1 +#define szof_uc szof_c +#define szof_s 2 +#define szof_us szof_s +#define szof_i 4 +#if __WORDSIZE == 64 +# define szof_ui szof_i +# define szof_l 8 +#endif +#define szof_f 4 +#define szof_d 8 + +#define FILL(T) \ + name fill##T \ +fill##T: \ + prolog \ + arg $argp \ + getarg %v0 $argp \ + arg $argi \ + getarg %r0 $argi \ + muli %r0 %r0 szof##T \ + addr %v1 %v0 %r0 \ + movi %r0 0 \ +fill##T##loop: \ + bger fill##T##done %v0 %v1 \ + str##T %v0 %r0 \ + addi %r0 %r0 1 \ + addi %v0 %v0 szof##T \ + jmpi fill##T##loop \ +fill##T##done: \ + ret \ + epilog +#define FILLF(T) \ + name fill##T \ +fill##T: \ + prolog \ + arg $argp \ + getarg %v0 $argp \ + arg $argi \ + getarg %r0 $argi \ + muli %r0 %r0 szof##T \ + addr %v1 %v0 %r0 \ + movi##T %f0 0.0 \ +fill##T##loop: \ + bger fill##T##done %v0 %v1 \ + str##T %v0 %f0 \ + addi##T %f0 %f0 1.0 \ + addi %v0 %v0 szof##T \ + jmpi fill##T##loop \ +fill##T##done: \ + ret \ + epilog + +#define fill_uc fill_c +#define fill_us fill_s +#define fill_ui fill_i + +#define ARG( T, N) arg $arg##T##N +#define ARGF( T, N) arg##T $arg##T##N +#define ARG1( K, T) ARG##K(T, 0) +#define ARG2( K, T) ARG1( K, T) ARG##K(T, 1) +#define ARG3( K, T) ARG2( K, T) ARG##K(T, 2) +#define ARG4( K, T) ARG3( K, T) ARG##K(T, 3) +#define ARG5( K, T) ARG4( K, T) ARG##K(T, 4) +#define ARG6( K, T) ARG5( K, T) ARG##K(T, 5) +#define ARG7( K, T) ARG6( K, T) ARG##K(T, 6) +#define ARG8( K, T) ARG7( K, T) ARG##K(T, 7) +#define ARG9( K, T) ARG8( K, T) ARG##K(T, 8) +#define ARG10(K, T) ARG9( K, T) ARG##K(T, 9) +#define ARG11(K, T) ARG10(K, T) ARG##K(T, 10) +#define ARG12(K, T) ARG11(K, T) ARG##K(T, 11) +#define ARG13(K, T) ARG12(K, T) ARG##K(T, 12) +#define ARG14(K, T) ARG13(K, T) ARG##K(T, 13) +#define ARG15(K, T) ARG14(K, T) ARG##K(T, 14) +#define ARG16(K, T) ARG15(K, T) ARG##K(T, 15) +#define ARG_c(N) ARG##N( , _c) +#define ARG_uc(N) ARG##N( , _uc) +#define ARG_s(N) ARG##N( , _s) +#define ARG_us(N) ARG##N( , _us) +#define ARG_i(N) ARG##N( , _i) +#define ARG_ui(N) ARG##N( , _ui) +#define ARG_l(N) ARG##N( , _l) +#define ARG_f(N) ARG##N(F, _f) +#define ARG_d(N) ARG##N(F, _d) + +#define CHK(N, T, V) \ + getarg %r0 $arg##T##V \ + ldxi##T %r1 %v0 $(V * szof##T) \ + beqr N##T##V %r0 %r1 \ + calli @abort \ +N##T##V: +#define CHKF(N, T, V) \ + getarg##T %f0 $arg##T##V \ + ldxi##T %f1 %v0 $(V * szof##T) \ + beqr##T N##T##V %f0 %f1 \ + calli @abort \ +N##T##V: + +#define GET1( K, N, T, V) CHK##K(N, T, 0) +#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1) +#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2) +#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3) +#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4) +#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5) +#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6) +#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7) +#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8) +#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9) +#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10) +#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11) +#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12) +#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13) +#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14) +#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15) + +#define GET_c(N, M) GET##N( , c##N, _c, M) +#define GET_uc(N, M) GET##N( , uc##N, _uc, M) +#define GET_s(N, M) GET##N( , s##N, _s, M) +#define GET_us(N, M) GET##N( , us##N, _us, M) +#define GET_i(N, M) GET##N( , i##N, _i, M) +#define GET_ui(N, M) GET##N( , ui##N, _ui, M) +#define GET_l(N, M) GET##N( , l##N, _l, M) +#define GET_f(N, M) GET##N(F, f##N, _f, M) +#define GET_d(N, M) GET##N(F, d##N, _d, M) + +#define PUSH( T, V) pushargi V +#define PUSHF( T, V) pushargi##T V +#define PUSH0( K, T) /**/ +#define PUSH1( K, T) PUSH##K(T, 0) +#define PUSH2( K, T) PUSH1( K, T) PUSH##K(T, 1) +#define PUSH3( K, T) PUSH2( K, T) PUSH##K(T, 2) +#define PUSH4( K, T) PUSH3( K, T) PUSH##K(T, 3) +#define PUSH5( K, T) PUSH4( K, T) PUSH##K(T, 4) +#define PUSH6( K, T) PUSH5( K, T) PUSH##K(T, 5) +#define PUSH7( K, T) PUSH6( K, T) PUSH##K(T, 6) +#define PUSH8( K, T) PUSH7( K, T) PUSH##K(T, 7) +#define PUSH9( K, T) PUSH8( K, T) PUSH##K(T, 8) +#define PUSH10(K, T) PUSH9( K, T) PUSH##K(T, 9) +#define PUSH11(K, T) PUSH10(K, T) PUSH##K(T, 10) +#define PUSH12(K, T) PUSH11(K, T) PUSH##K(T, 11) +#define PUSH13(K, T) PUSH12(K, T) PUSH##K(T, 12) +#define PUSH14(K, T) PUSH13(K, T) PUSH##K(T, 13) +#define PUSH15(K, T) PUSH14(K, T) PUSH##K(T, 14) +#define PUSH16(K, T) PUSH15(K, T) PUSH##K(T, 15) + +#define PUSH_c( N) PUSH##N( , _c) +#define PUSH_uc(N) PUSH##N( , _uc) +#define PUSH_s( N) PUSH##N( , _s) +#define PUSH_us(N) PUSH##N( , _us) +#define PUSH_i( N) PUSH##N( , _i) +#define PUSH_ui(N) PUSH##N( , _ui) +#define PUSH_l( N) PUSH##N( , _l) +#define PUSH_f( N) PUSH##N(F, _f) +#define PUSH_d( N) PUSH##N(F, _d) + +/* bottom function */ +#define DEF0(T) \ + name test##T##_0 \ +test##T##_0: \ + prolog \ + ret \ + epilog + +#define DEFN(N, M, T) \ + name test##T##_##N \ +test##T##_##N: \ + prolog \ + arg $argp \ + /* stack buffer in %v0 */ \ + getarg %v0 $argp \ + ARG##T(N) \ + /* validate arguments */ \ + GET##T(N, M) \ + /* heap buffer in %v1 */ \ + prepare \ + pushargi $(N * szof##T) \ + finishi @malloc \ + retval %v1 \ + /* copy stack bufer to heap buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v0 \ + pushargi $(N * szof##T) \ + finishi MEMCPY \ + /* stack buffer for next function in %v2 */ \ + movi %r0 $(M * szof##T) \ + allocar %v2 %r0 \ + addr %v2 %v2 %fp \ + /* fill stack buffer for next function */ \ + prepare \ + pushargr %v2 \ + pushargi M \ + finishi fill##T \ + /* call next function */ \ + prepare \ + pushargr %v2 \ + PUSH##T(M) \ + finishi test##T##_##M \ + /* validate stack buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v0 \ + pushargi $(N * szof##T) \ + finishi @memcmp \ + retval %r0 \ + beqi test##T##_##N##_done %r0 0 \ + calli @abort \ +test##T##_##N##_done: \ + /* release heap bufer */ \ + prepare \ + pushargr %v1 \ + finishi @free \ + ret \ + epilog + +/* top function */ +#define DEFX(T) \ + name test##T##_17 \ +test##T##_17: \ + prolog \ + /* heap buffer in %v1 */ \ + prepare \ + pushargi $(16 * szof##T) \ + finishi @malloc \ + retval %v1 \ + /* stack buffer for next function in %v2 */ \ + movi %r0 $(16 * szof##T) \ + allocar %v2 %r0 \ + addr %v2 %v2 %fp \ + /* fill stack buffer for next function */ \ + prepare \ + pushargr %v2 \ + pushargi 16 \ + finishi fill##T \ + /* copy stack buffer to heap buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v2 \ + pushargi $(16 * szof##T) \ + finishi MEMCPY \ + /* call next function */ \ + prepare \ + pushargr %v2 \ + PUSH##T(16) \ + finishi test##T##_16 \ + /* validate stack buffer */ \ + prepare \ + pushargr %v1 \ + pushargr %v2 \ + pushargi $(16 * szof##T) \ + finishi @memcmp \ + retval %r0 \ + beqi test##T##_17_done %r0 0 \ + calli @abort \ +test##T##_17_done: \ + /* release heap bufer */ \ + prepare \ + pushargr %v1 \ + finishi @free \ + ret \ + epilog + +#define DEF( T) \ + DEF0( T) \ + DEFN( 1, 0, T) \ + DEFN( 2, 1, T) \ + DEFN( 3, 2, T) \ + DEFN( 4, 3, T) \ + DEFN( 5, 4, T) \ + DEFN( 6, 5, T) \ + DEFN( 7, 6, T) \ + DEFN( 8, 7, T) \ + DEFN( 9, 8, T) \ + DEFN(10, 9, T) \ + DEFN(11, 10, T) \ + DEFN(12, 11, T) \ + DEFN(13, 12, T) \ + DEFN(14, 13, T) \ + DEFN(15, 14, T) \ + DEFN(16, 15, T) \ + DEFX(T) + +#define CALL(T) calli test##T##_17 + +.data 16 +fmt: +.c "%d %d %d\n" +.code + jmpi main + +#if _AIX +# define MEMCPY memcpy +/* error: Function not implemented (memcpy) */ + name memcpy +memcpy: + prolog + arg $dst + arg $src + arg $len + getarg %r0 $dst + getarg %r1 $src + getarg %r2 $len + movr %v1 %r0 + blti memcpy_done %r2 1 +memcpy_loop: + subi %r2 %r2 1 + ldxr_c %v0 %r1 %r2 + stxr_c %r2 %r0 %v0 + bgti memcpy_loop %r2 0 +memcpy_done: + retr %v1 + epilog +#else +# define MEMCPY @memcpy +#endif + + FILL(_c) + FILL(_s) + FILL(_i) +#if __WORDSIZE == 64 + FILL(_l) +#endif + FILLF(_f) + FILLF(_d) + + DEF(_c) + DEF(_uc) + DEF(_s) + DEF(_us) + DEF(_i) +#if __WORDSIZE == 64 + DEF(_ui) + DEF(_l) +#endif + DEF(_f) + DEF(_d) + + name main +main: + prolog + + CALL(_c) + CALL(_uc) + CALL(_s) + CALL(_us) + CALL(_i) +#if __WORDSIZE == 64 + CALL(_ui) + CALL(_l) +#endif + CALL(_f) + CALL(_d) + + // loop control + movi %v2 1 + + // loop a few times calling allocar +loop: + // allocate 12 bytes + movi %r0 12 + allocar %v0 %r0 + + // offset + movr %v1 %v0 + + // 1 + stxr_i %v1 %fp %v2 + + // 2 + addi %v2 %v2 1 + addi %v1 %v1 4 + stxr_i %v1 %fp %v2 + + // 3 + addi %v2 %v2 1 + addi %v1 %v1 4 + stxr_i %v1 %fp %v2 + + // reload + movr %v1 %v0 + + // 1 + ldxr_i %r0 %fp %v1 + + // 2 + addi %v1 %v1 4 + ldxr_i %r1 %fp %v1 + + // 3 + addi %v1 %v1 4 + ldxr_i %r2 %fp %v1 + + prepare + pushargi fmt + pushargr %r0 + pushargr %r1 + pushargr %r2 + finishi @printf + blti loop %v2 9 + + ret + epilog diff --git a/check/lightning.c b/check/lightning.c index f1d2c79c8..7dd88fbc8 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -271,7 +271,7 @@ static void align(void); static void name(void); static void prolog(void); static void frame(void); static void tramp(void); static void ellipsis(void); -static void allocai(void); +static void allocai(void); static void allocar(void); static void arg(void); static void getarg_c(void); static void getarg_uc(void); static void getarg_s(void); static void getarg_us(void); @@ -580,7 +580,7 @@ static instr_t instr_vector[] = { entry(prolog), entry(frame), entry(tramp), entry(ellipsis), - entry(allocai), + entry(allocai), entry(allocar), entry(arg), entry(getarg_c), entry(getarg_uc), entry(getarg_s), entry(getarg_us), @@ -1371,6 +1371,7 @@ allocai(void) { symbol->type = type_l; symbol->value.i = i; } +entry_ir_ir(allocar) entry_ca(arg) entry_ia(getarg_c) entry_ia(getarg_uc) entry_ia(getarg_s) entry_ia(getarg_us) diff --git a/doc/body.texi b/doc/body.texi index 9b8931de4..23b8b8fbc 100644 --- a/doc/body.texi +++ b/doc/body.texi @@ -175,7 +175,8 @@ operation, there is a the @code{_u} modifier. There are at least seven integer registers, of which six are general-purpose, while the last is used to contain the frame pointer (@code{FP}). The frame pointer can be used to allocate and access local -variables on the stack, using the @code{allocai} instruction. +variables on the stack, using the @code{allocai} or @code{allocar} +instruction. Of the general-purpose registers, at least three are guaranteed to be preserved across function calls (@code{V0}, @code{V1} and @@ -540,16 +541,16 @@ bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow} @end example @item Jump and return operations -These accept one argument except @code{ret} which has none; the -difference between @code{finishi} and @code{calli} is that the -latter does not clean the stack from pushed parameters (if any) -and the former must @strong{always} follow a @code{prepare} +These accept one argument except @code{ret} and @code{jmpi} which +have none; the difference between @code{finishi} and @code{calli} +is that the latter does not clean the stack from pushed parameters +(if any) and the former must @strong{always} follow a @code{prepare} instruction. @example -callr (not specified) @r{function call to a register} -calli (not specified) @r{function call to O1} -finishr (not specified) @r{function call to a register} -finishi (not specified) @r{function call to O1} +callr (not specified) @r{function call to register O1} +calli (not specified) @r{function call to immediate O1} +finishr (not specified) @r{function call to register O1} +finishi (not specified) @r{function call to immediate O1} jmpr (not specified) @r{unconditional jump to register} jmpi (not specified) @r{unconditional jump} ret (not specified) @r{return from subroutine} @@ -627,17 +628,32 @@ an "unbound" label. These macros are used to set up a function prolog. The @code{allocai} call accept a single integer argument and returns an offset value -for stack storage access. +for stack storage access. The @code{allocar} accepts two registers +arguments, the first is set to the offset for stack access, and the +second is the size in bytes argument. @example prolog (not specified) @r{function prolog} allocai (not specified) @r{reserve space on the stack} +allocar (not specified) @r{allocate space on the stack} @end example @code{allocai} receives the number of bytes to allocate and returns the offset from the frame pointer register @code{FP} to the base of the area. +@code{allocar} receives two register arguments. The first is where +to store the offset from the frame pointer register @code{FP} to the +base of the area. The second argument is the size in bytes. Note +that @code{allocar} is dynamic allocation, and special attention +should be taken when using it. If called in a loop, every iteration +will allocate stack space. Stack space is aligned from 8 to 64 bytes +depending on backend requirements, even if allocating only one byte. +It is advisable to not use it with @code{frame} and @code{tramp}; it +should work with @code{frame} with special care to call only once, +but is not supported if used in @code{tramp}, even if called only +once. + As a small appetizer, here is a small function that adds 1 to the input parameter (an @code{int}). I'm using an assembly-like syntax here which is a bit different from the one used when writing real subroutines with diff --git a/include/lightning.h b/include/lightning.h index 0379a2f49..a0204fbe4 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -193,6 +193,7 @@ typedef enum { #define jit_ellipsis() _jit_ellipsis(_jit) #define jit_allocai(u) _jit_allocai(_jit,u) +#define jit_allocar(u, v) _jit_allocar(_jit,u,v) #define jit_arg() _jit_arg(_jit) #define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v) @@ -890,6 +891,7 @@ extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*); extern void _jit_prolog(jit_state_t*); extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t); +extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t); extern void _jit_ellipsis(jit_state_t*); extern jit_node_t *_jit_arg(jit_state_t*); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index b88463b26..5c754c3f6 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -363,6 +363,11 @@ struct jit_function { jit_int32_t frame; jit_uint32_t define_frame : 1; jit_uint32_t assume_frame : 1; + + /* alloca offset offset */ + jit_int32_t aoffoff; + /* uses allocar flag */ + jit_uint32_t allocar : 1; }; /* data used only during jit generation */ diff --git a/lib/jit_aarch64-cpu.c b/lib/jit_aarch64-cpu.c index f49ee16ea..5b094b773 100644 --- a/lib/jit_aarch64-cpu.c +++ b/lib/jit_aarch64-cpu.c @@ -2187,6 +2187,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -2194,6 +2195,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; @@ -2232,6 +2235,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) #undef SPILL if (_jitc->function->stack) subi(SP_REGNO, SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index 411571962..fdd0c8aea 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -173,6 +173,37 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t r0, r1; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + r0 = jit_get_reg(jit_class_gpr); + jit_negr(r0, v); + jit_andi(r0, r0, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, r0); + /* Cannot "addr sp, sp, reg" because in this context "sp" is "[w|x]zr", + * the zero register */ +#if 0 + jit_addr(JIT_SP, JIT_SP, r0); +#else + r1 = jit_get_reg(jit_class_gpr); + /* note that "mov r1, sp" does not work, but the proper encoding + * can be triggered before actually emiting with "add r1, sp, 0" */ + jit_addi(r1, JIT_SP, 0); + jit_addr(r1, r1, r0); + jit_addi(JIT_SP, r1, 0); + jit_unget_reg(r1); +#endif + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(r0); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_alpha-cpu.c b/lib/jit_alpha-cpu.c index a550630f9..4680481e8 100644 --- a/lib/jit_alpha-cpu.c +++ b/lib/jit_alpha-cpu.c @@ -2567,6 +2567,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -2574,6 +2575,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 7) & -8; /* ldgp gp, 0(pv) */ @@ -2609,6 +2612,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* alloca */ if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index cfd91cf4e..867d2a762 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -182,6 +182,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c index a54b4efb3..2f224daad 100644 --- a/lib/jit_arm-cpu.c +++ b/lib/jit_arm-cpu.c @@ -3745,6 +3745,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -3755,6 +3756,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) } _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3788,6 +3791,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) movr(_FP_REGNO, _SP_REGNO); if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 77466383e..1648cb112 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -272,6 +272,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index efe6220f6..8b0b4389f 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -2655,6 +2655,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = _jitc->function->frame; } + if (_jitc->function->allocar) { + _jitc->function->self.aoff += 63; + _jitc->function->self.aoff &= -64; + } _jitc->function->stack = ((_jitc->function->self.aoff - _jitc->function->self.alen - _jitc->function->self.size) + 63) & -64; @@ -2677,6 +2681,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) if (jit_regset_tstbit(&_jitc->function->regset, fr[regno])) stxi_d(offset, _FP_REGNO, rn(fr[regno])); } + + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno)); + jit_unget_reg(regno); + } } static void diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 3a7913821..d0919056f 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -196,6 +196,24 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (offset); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_addi(reg, v, 63); + jit_andi(reg, reg, -64); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c index 7a9a97491..5cbd32727 100644 --- a/lib/jit_ia64-cpu.c +++ b/lib/jit_ia64-cpu.c @@ -5190,6 +5190,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; _jitc->function->stack = ((_jitc->function->self.alen - _jitc->function->self.aoff) + 15) & -16; @@ -5253,6 +5255,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) addi(GR_2, GR_4, 80); STF_SPILL(GR_2, rn(JIT_F5)); } + + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, GR_4, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index 59c915392..8575f256b 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -299,6 +299,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c index 38141ae17..9ba759d5b 100644 --- a/lib/jit_mips-cpu.c +++ b/lib/jit_mips-cpu.c @@ -2909,6 +2909,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; #if NEW_ABI _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ @@ -2940,6 +2942,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* alloca */ if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + index = jit_get_reg(jit_class_gpr); + movi(rn(index), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index)); + jit_unget_reg(index); + } } static void diff --git a/lib/jit_mips.c b/lib/jit_mips.c index 6ee35fbca..9ae2d96a9 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -212,6 +212,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_ppc-cpu.c b/lib/jit_ppc-cpu.c index ae4d60360..13078141a 100644 --- a/lib/jit_ppc-cpu.c +++ b/lib/jit_ppc-cpu.c @@ -3225,6 +3225,10 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) { + _jitc->function->self.aoff -= 2 * sizeof(jit_word_t); + _jitc->function->self.aoff &= -16; + } _jitc->function->stack = ((_jitc->function->self.alen + _jitc->function->self.size - _jitc->function->self.aoff) + 15) & -16; @@ -3269,6 +3273,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) #else STDU(_SP_REGNO, _SP_REGNO, -_jitc->function->stack); #endif + + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(regno)); + jit_unget_reg(regno); + } } static void @@ -3295,7 +3306,10 @@ _epilog(jit_state_t *_jit, jit_node_t *node) } #else /* __powerpc__ */ - addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) + ldr(_SP_REGNO, _SP_REGNO); + else + addi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); ldxi(_R0_REGNO, _SP_REGNO, sizeof(void*) * 2); offset = -gpr_save_area; for (regno = 0; regno < jit_size(save); regno++, offset += sizeof(void*)) { diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index 3abaf7557..d58ada51c 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -192,6 +192,29 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t r0, r1; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + r0 = jit_get_reg(jit_class_gpr); + r1 = jit_get_reg(jit_class_gpr); + jit_ldr(r0, JIT_SP); + jit_negr(r1, v); + jit_andi(r1, r1, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, r1); + jit_addr(JIT_SP, JIT_SP, r1); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_str(JIT_SP, r0); + jit_unget_reg(r1); + jit_unget_reg(r0); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_s390-cpu.c b/lib/jit_s390-cpu.c index 0d13d706c..556ed18ec 100644 --- a/lib/jit_s390-cpu.c +++ b/lib/jit_s390-cpu.c @@ -3498,6 +3498,8 @@ _prolog(jit_state_t *_jit, jit_node_t *i0) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -8; _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; @@ -3550,6 +3552,12 @@ _prolog(jit_state_t *_jit, jit_node_t *i0) #undef SPILL movr(_R13_REGNO, _R15_REGNO); subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack); + if (_jitc->function->allocar) { + regno = jit_get_reg(jit_class_gpr); + movi(rn(regno), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _R13_REGNO, rn(regno)); + jit_unget_reg(regno); + } } static void diff --git a/lib/jit_s390.c b/lib/jit_s390.c index c811acedf..560e93d12 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -159,6 +159,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -8); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_sparc-cpu.c b/lib/jit_sparc-cpu.c index 2a9e2a219..236ffd10c 100644 --- a/lib/jit_sparc-cpu.c +++ b/lib/jit_sparc-cpu.c @@ -1631,6 +1631,7 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -1638,6 +1639,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; /* align at 16 bytes boundary */ _jitc->function->stack = ((stack_framesize + _jitc->function->self.alen - @@ -1662,6 +1665,13 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi(24, _SP_REGNO, _L6_REGNO); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) stxi(28, _SP_REGNO, _L7_REGNO); + + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _FP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index a0ebd940c..9c301a6bd 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -155,6 +155,28 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(_SP, _SP, reg); + + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) { diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index a26f32ade..c6d80dfec 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -3437,6 +3437,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; assert(_jitc->function->self.aoff >= frame); @@ -3444,6 +3445,8 @@ _prolog(jit_state_t *_jit, jit_node_t *node) return; _jitc->function->self.aoff = frame; } + if (_jitc->function->allocar) + _jitc->function->self.aoff &= -16; #if __X64 && __CYGWIN__ _jitc->function->stack = (((/* first 32 bytes must be allocated */ (_jitc->function->self.alen > 32 ? @@ -3519,6 +3522,12 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* alloca */ subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack); + if (_jitc->function->allocar) { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + } } static void diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 7b43bd3c5..8e9021308 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -398,6 +398,25 @@ _jit_allocai(jit_state_t *_jit, jit_int32_t length) return (_jitc->function->self.aoff); } +void +_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v) +{ + jit_int32_t reg; + assert(_jitc->function); + if (!_jitc->function->allocar) { + _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t)); + _jitc->function->allocar = 1; + } + reg = jit_get_reg(jit_class_gpr); + jit_negr(reg, v); + jit_andi(reg, reg, -16); + jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff); + jit_addr(u, u, reg); + jit_addr(JIT_SP, JIT_SP, reg); + jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u); + jit_unget_reg(reg); +} + void _jit_ret(jit_state_t *_jit) {