1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-19 18:20:22 +02:00

finish jit_allocai implementation

2006-11-04  Paolo Bonzini  <bonzini@gnu.org>

	* lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
	* lightning/ppc/funcs.h: Store frame size into _jitl.  Store R1 before
	the STMW, so that the offset is unchanged when we patch the STMW.
	* lightning/i386/core.h: Define JIT_FP to be EBP.
	* lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
	epilog if jit_allocai was used.
	* lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
	epilog if jit_allocai was used.

git-archimport-id: bonzini@gnu.org--2004b/lightning--stable--1.2--patch-36
This commit is contained in:
Paolo Bonzini 2006-11-06 09:06:49 +00:00
parent 4290adb33a
commit be415cc6a5
8 changed files with 125 additions and 46 deletions

View file

@ -1,3 +1,20 @@
2006-11-04 Paolo Bonzini <bonzini@gnu.org>
* lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
* lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before
the STMW, so that the offset is unchanged when we patch the STMW.
* lightning/i386/core.h: Define JIT_FP to be EBP.
* lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
epilog if jit_allocai was used.
* lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
epilog if jit_allocai was used.
2006-11-04 Ludovic Courtes <ludo@chbouib.org>
* lightning/sparc/core.h: Implement jit_allocai.
* tests/allocai.c: New.
* tests/Makefile.am: Point to new tests.
2006-11-03 Paolo Bonzini <bonzini@gnu.org>
* lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT.

1
NEWS
View file

@ -9,7 +9,6 @@ o Support for stack-allocated variables. Because of this,
backends defining JIT_FP should now rename it to JIT_AP.
JIT_FP is now a user-visible register used in ldxi/ldxr
to access stack-allocated variables.
[a promise for now, not yet implemented!]
---

View file

@ -41,12 +41,27 @@
struct jit_local_state {
int framesize;
int argssize;
int alloca_offset;
int alloca_slack;
};
#define jit_base_prolog() (PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI))
#define jit_prolog(n) (_jitl.framesize = 8, jit_base_prolog())
#define jit_prolog(n) (_jitl.framesize = 8, _jitl.alloca_offset = -12, jit_base_prolog())
/* The += allows for stack pollution */
/* Used internally. SLACK is used by the Darwin ABI which keeps the stack
aligned to 16-bytes. */
#define jit_allocai_internal(amount, slack) \
(((amount) < _jitl.alloca_slack \
? 0 \
: (_jitl.alloca_slack += (amount) + (slack), \
((amount) + (slack) == sizeof (int) \
? PUSHLr(_EAX) \
: SUBLir((amount) + (slack), _ESP)))), \
_jitl.alloca_slack -= (amount), \
_jitl.alloca_offset -= (amount))
/* The += in argssize allows for stack pollution */
#ifdef __APPLE__
/* Stack must stay 16-byte aligned: */
@ -54,8 +69,15 @@ struct jit_local_state {
? SUBLir(4 * ((((ni) + 3) & ~(0x3)) - (ni)), JIT_SP) \
: (void)0), \
_jitl.argssize += (((ni) + 3) & ~(0x3)))
#define jit_allocai(n) \
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
#else
# define jit_prepare_i(ni) (_jitl.argssize += (ni))
#define jit_allocai(n) \
jit_allocai_internal ((n), 0)
#endif
#define jit_pusharg_i(rs) PUSHLr(rs)
@ -74,7 +96,7 @@ struct jit_local_state {
#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v) - (jump_pc)))
#define jit_patch_at(jump_pc,v) jit_patch_long_at(jump_pc, v)
#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_())
#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), (_jitl.alloca_offset < -12 ? LEAVE_() : POPLr(_EBP)), RET_())
#endif /* __lightning_core_h */

View file

@ -40,8 +40,23 @@ struct jit_local_state {
int long_jumps;
int nextarg_geti;
int argssize;
int alloca_offset;
int alloca_slack;
};
/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way. */
#define jit_allocai_internal(amount, slack) \
(((amount) < _jitl.alloca_slack \
? 0 \
: (_jitl.alloca_slack += (amount) + (slack), \
SUBQir((amount) + (slack), _ESP))), \
_jitl.alloca_slack -= (amount), \
_jitl.alloca_offset -= (amount))
#define jit_allocai(n) \
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
/* 3-parameter operation */
#define jit_qopr_(d, s1, s2, op1d, op2d) \
( (s2 == d) ? op1d : \
@ -95,7 +110,7 @@ struct jit_local_state {
#define jit_popr_l(rs) POPQr(rs)
#define jit_base_prolog() (PUSHQr(_EBP), MOVQrr(_ESP, _EBP), PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13))
#define jit_prolog(n) (_jitl.nextarg_geti = 0, jit_base_prolog())
#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = -24, jit_base_prolog())
/* Stack isn't used for arguments: */
#define jit_prepare_i(ni) (_jitl.argssize = 0)
@ -154,7 +169,7 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v)))
#define jit_patch_short_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc)))
#define jit_patch_at(jump_pc,v) (_jitl.long_jumps ? jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v))
#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), POPQr(_EBP), RET_())
#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), (_jitl.alloca_offset < -24 ? LEAVE_() : POPQr(_EBP)), RET_())
#define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d))
#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))

View file

@ -35,6 +35,7 @@
#define __lightning_core_i386_h
#define JIT_AP _EBP
#define JIT_FP _EBP
#define JIT_SP _ESP
#define JIT_RET _EAX

View file

@ -42,9 +42,27 @@ struct jit_local_state {
int nextarg_geti; /* Next r20-r25 reg. to be read */
int nextarg_getd; /* The FP args are picked up from FPR1 -> FPR10 */
int nbArgs; /* Number of arguments for the prolog */
int frame_size, slack;
jit_insn *stwu;
};
/* Patch a `stwu' instruction (with immediate operand) so that it decreases
r1 by AMOUNT. AMOUNT should already be rounded so that %sp remains quadword
aligned. */
#define jit_patch_stwu(amount) \
(*(_jitl.stwu) &= ~_MASK (16), \
*(_jitl.stwu) |= _s16 ((amount)))
#define jit_allocai(n) \
(_jitl.frame_size += (n), \
((n) <= _jitl.slack \
? 0 : jit_patch_stwu (-((_jitl.frame_size + 15) & ~15))), \
_jitl.slack = ((_jitl.frame_size + 15) & ~15) - _jitl.frame_size, \
_jitl.frame_size - (n))
#define JIT_SP 1
#define JIT_FP 1
#define JIT_RET 3
#define JIT_R_NUM 3
#define JIT_V_NUM 7
@ -52,9 +70,6 @@ struct jit_local_state {
#define JIT_V(i) (31-(i))
#define JIT_AUX JIT_V(JIT_V_NUM) /* for 32-bit operands & shift counts */
#define jit_pfx_start() (_jit.jitl.trampolines)
#define jit_pfx_end() (_jit.jitl.free)
/* If possible, use the `small' instruction (rd, rs, imm)
* else load imm into r26 and use the `big' instruction (rd, rs, r26)
*/

View file

@ -91,13 +91,9 @@ static void
_jit_epilog(jit_state *jit)
{
int n = _jitl.nbArgs;
int frame_size, ofs;
int first_saved_reg = JIT_AUX - n;
int num_saved_regs = 32 - first_saved_reg;
frame_size = 24 + 32 + num_saved_regs * 4; /* r24..r31 + args */
frame_size += 15; /* the stack must be quad-word */
frame_size &= ~15; /* aligned */
int frame_size = (_jitl.frame_size + 15) & ~15;
#ifdef __APPLE__
LWZrm(0, frame_size + 8, 1); /* lwz r0, x+8(r1) (ret.addr.) */
@ -106,19 +102,12 @@ _jit_epilog(jit_state *jit)
#endif
MTLRr(0); /* mtspr LR, r0 */
ofs = frame_size - num_saved_regs * 4;
LMWrm(first_saved_reg, ofs, 1); /* lmw rI, ofs(r1) */
LMWrm(first_saved_reg, 24 + 32, 1); /* lmw rI, ofs(r1) */
ADDIrri(1, 1, frame_size); /* addi r1, r1, x */
BLR(); /* blr */
}
/* Emit a prolog for a function.
Upon entrance to the trampoline:
- LR = address where the real code for the function lies
- R3-R8 = parameters
Upon finishing the trampoline:
- R0 = return address for the function
- R25-R20 = parameters (order is reversed, 1st argument is R25)
The +32 in frame_size computation is to accound for the parameter area of
a function frame.
@ -126,7 +115,7 @@ _jit_epilog(jit_state *jit)
On PPC the frame must have space to host the arguments of any callee.
However, as it currently stands, the argument to jit_trampoline (n) is
the number of arguments of the caller we generate. Therefore, the
callee can overwrite a part of the stack (saved register area when it
callee can overwrite a part of the stack (saved register area) when it
flushes its own parameter on the stack. The addition of a constant
offset = 32 is enough to hold eight 4 bytes arguments. This is less
than perfect but is a reasonable work around for now.
@ -134,8 +123,8 @@ _jit_epilog(jit_state *jit)
static void
_jit_prolog(jit_state *jit, int n)
{
int frame_size;
int ofs, i;
int orig_frame_size, frame_size;
int i;
int first_saved_reg = JIT_AUX - n;
int num_saved_regs = 32 - first_saved_reg;
@ -143,20 +132,31 @@ _jit_prolog(jit_state *jit, int n)
_jitl.nextarg_getd = 1;
_jitl.nbArgs = n;
frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args */
frame_size += 15; /* the stack must be quad-word */
frame_size &= ~15; /* aligned */
MFLRr(0);
#ifdef __APPLE__
STWrm(0, 8, 1); /* stw r0, 8(r1) */
#else
STWrm(0, 4, 1); /* stw r0, 4(r1) */
#endif
/* 0..55 -> frame data
56..frame_size -> saved registers
The STMW instruction is patched by jit_allocai, thus leaving
the space for the allocai above the 56 bytes. jit_allocai is
also able to reuse the slack space needed to keep the stack
quadword-aligned. */
_jitl.frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args */
/* The stack must be quad-word aligned. */
frame_size = (_jitl.frame_size + 15) & ~15;
_jitl.slack = frame_size - _jitl.frame_size;
_jitl.stwu = _jit.x.pc;
STWUrm(1, -frame_size, 1); /* stwu r1, -x(r1) */
ofs = frame_size - num_saved_regs * 4;
STMWrm(first_saved_reg, ofs, 1); /* stmw rI, ofs(r1) */
#ifdef __APPLE__
STWrm(0, frame_size + 8, 1); /* stw r0, x+8(r1) */
#else
STWrm(0, frame_size + 4, 1); /* stw r0, x+4(r1) */
#endif
STMWrm(first_saved_reg, 24 + 32, 1); /* stmw rI, ofs(r1) */
for (i = 0; i < n; i++)
MRrr(JIT_AUX-1-i, 3+i); /* save parameters below r24 */
}

View file

@ -41,7 +41,7 @@ check_PROGRAMS = fibit$(EXEEXT) incr$(EXEEXT) printf$(EXEEXT) \
printf2$(EXEEXT) rpn$(EXEEXT) fib$(EXEEXT) fibdelay$(EXEEXT) \
add$(EXEEXT) bp$(EXEEXT) testfp$(EXEEXT) funcfp$(EXEEXT) \
rpnfp$(EXEEXT) modi$(EXEEXT) ldxi$(EXEEXT) divi$(EXEEXT) \
movi$(EXEEXT) ret$(EXEEXT)
movi$(EXEEXT) ret$(EXEEXT) allocai$(EXEEXT)
subdir = tests
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@ -56,6 +56,11 @@ add_SOURCES = add.c
add_OBJECTS = add.$(OBJEXT)
add_LDADD = $(LDADD)
@DISASS_TRUE@add_DEPENDENCIES = $(top_builddir)/opcode/libdisass.a
allocai_SOURCES = allocai.c
allocai_OBJECTS = allocai.$(OBJEXT)
allocai_LDADD = $(LDADD)
@DISASS_TRUE@allocai_DEPENDENCIES = \
@DISASS_TRUE@ $(top_builddir)/opcode/libdisass.a
bp_SOURCES = bp.c
bp_OBJECTS = bp.$(OBJEXT)
bp_LDADD = $(LDADD)
@ -129,12 +134,12 @@ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c incr.c \
ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c rpnfp.c \
testfp.c
DIST_SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c \
incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c \
rpnfp.c testfp.c
SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
rpn.c rpnfp.c testfp.c
DIST_SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
rpn.c rpnfp.c testfp.c
DATA = $(noinst_DATA)
ETAGS = etags
CTAGS = ctags
@ -242,12 +247,13 @@ target_vendor = @target_vendor@
AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir) -I$(top_srcdir)/lightning/$(cpu)
noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok \
fib.ok fibdelay.ok testfp.ok funcfp.ok rpnfp.ok add.ok \
bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok
bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok \
allocai.ok
EXTRA_DIST = $(noinst_DATA) run-test
@DISASS_TRUE@LDADD = $(top_builddir)/opcode/libdisass.a
@REGRESSION_TESTING_TRUE@TESTS = fib fibit fibdelay incr printf printf2 rpn add bp \
@REGRESSION_TESTING_TRUE@ testfp funcfp rpnfp modi ldxi divi movi ret
@REGRESSION_TESTING_TRUE@ testfp funcfp rpnfp modi ldxi divi movi ret allocai
@REGRESSION_TESTING_TRUE@TESTS_ENVIRONMENT = $(srcdir)/run-test
all: all-am
@ -289,6 +295,9 @@ clean-checkPROGRAMS:
add$(EXEEXT): $(add_OBJECTS) $(add_DEPENDENCIES)
@rm -f add$(EXEEXT)
$(LINK) $(add_LDFLAGS) $(add_OBJECTS) $(add_LDADD) $(LIBS)
allocai$(EXEEXT): $(allocai_OBJECTS) $(allocai_DEPENDENCIES)
@rm -f allocai$(EXEEXT)
$(LINK) $(allocai_LDFLAGS) $(allocai_OBJECTS) $(allocai_LDADD) $(LIBS)
bp$(EXEEXT): $(bp_OBJECTS) $(bp_DEPENDENCIES)
@rm -f bp$(EXEEXT)
$(LINK) $(bp_LDFLAGS) $(bp_OBJECTS) $(bp_LDADD) $(LIBS)
@ -345,6 +354,7 @@ distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/add.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocai.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bp.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/divi.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fib.Po@am__quote@