mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-19 18:20:22 +02:00
finish jit_allocai implementation
2006-11-04 Paolo Bonzini <bonzini@gnu.org> * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1. * lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before the STMW, so that the offset is unchanged when we patch the STMW. * lightning/i386/core.h: Define JIT_FP to be EBP. * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the epilog if jit_allocai was used. * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the epilog if jit_allocai was used. git-archimport-id: bonzini@gnu.org--2004b/lightning--stable--1.2--patch-36
This commit is contained in:
parent
4290adb33a
commit
be415cc6a5
8 changed files with 125 additions and 46 deletions
17
ChangeLog
17
ChangeLog
|
@ -1,3 +1,20 @@
|
|||
2006-11-04 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
|
||||
* lightning/ppc/funcs.h: Store frame size into _jitl. Store R1 before
|
||||
the STMW, so that the offset is unchanged when we patch the STMW.
|
||||
* lightning/i386/core.h: Define JIT_FP to be EBP.
|
||||
* lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
|
||||
epilog if jit_allocai was used.
|
||||
* lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
|
||||
epilog if jit_allocai was used.
|
||||
|
||||
2006-11-04 Ludovic Courtes <ludo@chbouib.org>
|
||||
|
||||
* lightning/sparc/core.h: Implement jit_allocai.
|
||||
* tests/allocai.c: New.
|
||||
* tests/Makefile.am: Point to new tests.
|
||||
|
||||
2006-11-03 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT.
|
||||
|
|
1
NEWS
1
NEWS
|
@ -9,7 +9,6 @@ o Support for stack-allocated variables. Because of this,
|
|||
backends defining JIT_FP should now rename it to JIT_AP.
|
||||
JIT_FP is now a user-visible register used in ldxi/ldxr
|
||||
to access stack-allocated variables.
|
||||
[a promise for now, not yet implemented!]
|
||||
|
||||
|
||||
---
|
||||
|
|
|
@ -41,12 +41,27 @@
|
|||
struct jit_local_state {
|
||||
int framesize;
|
||||
int argssize;
|
||||
int alloca_offset;
|
||||
int alloca_slack;
|
||||
};
|
||||
|
||||
#define jit_base_prolog() (PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI))
|
||||
#define jit_prolog(n) (_jitl.framesize = 8, jit_base_prolog())
|
||||
#define jit_prolog(n) (_jitl.framesize = 8, _jitl.alloca_offset = -12, jit_base_prolog())
|
||||
|
||||
/* The += allows for stack pollution */
|
||||
/* Used internally. SLACK is used by the Darwin ABI which keeps the stack
|
||||
aligned to 16-bytes. */
|
||||
|
||||
#define jit_allocai_internal(amount, slack) \
|
||||
(((amount) < _jitl.alloca_slack \
|
||||
? 0 \
|
||||
: (_jitl.alloca_slack += (amount) + (slack), \
|
||||
((amount) + (slack) == sizeof (int) \
|
||||
? PUSHLr(_EAX) \
|
||||
: SUBLir((amount) + (slack), _ESP)))), \
|
||||
_jitl.alloca_slack -= (amount), \
|
||||
_jitl.alloca_offset -= (amount))
|
||||
|
||||
/* The += in argssize allows for stack pollution */
|
||||
|
||||
#ifdef __APPLE__
|
||||
/* Stack must stay 16-byte aligned: */
|
||||
|
@ -54,8 +69,15 @@ struct jit_local_state {
|
|||
? SUBLir(4 * ((((ni) + 3) & ~(0x3)) - (ni)), JIT_SP) \
|
||||
: (void)0), \
|
||||
_jitl.argssize += (((ni) + 3) & ~(0x3)))
|
||||
|
||||
#define jit_allocai(n) \
|
||||
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
|
||||
|
||||
#else
|
||||
# define jit_prepare_i(ni) (_jitl.argssize += (ni))
|
||||
|
||||
#define jit_allocai(n) \
|
||||
jit_allocai_internal ((n), 0)
|
||||
#endif
|
||||
|
||||
#define jit_pusharg_i(rs) PUSHLr(rs)
|
||||
|
@ -74,7 +96,7 @@ struct jit_local_state {
|
|||
|
||||
#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v) - (jump_pc)))
|
||||
#define jit_patch_at(jump_pc,v) jit_patch_long_at(jump_pc, v)
|
||||
#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_())
|
||||
#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), (_jitl.alloca_offset < -12 ? LEAVE_() : POPLr(_EBP)), RET_())
|
||||
|
||||
#endif /* __lightning_core_h */
|
||||
|
||||
|
|
|
@ -40,8 +40,23 @@ struct jit_local_state {
|
|||
int long_jumps;
|
||||
int nextarg_geti;
|
||||
int argssize;
|
||||
int alloca_offset;
|
||||
int alloca_slack;
|
||||
};
|
||||
|
||||
|
||||
/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way. */
|
||||
#define jit_allocai_internal(amount, slack) \
|
||||
(((amount) < _jitl.alloca_slack \
|
||||
? 0 \
|
||||
: (_jitl.alloca_slack += (amount) + (slack), \
|
||||
SUBQir((amount) + (slack), _ESP))), \
|
||||
_jitl.alloca_slack -= (amount), \
|
||||
_jitl.alloca_offset -= (amount))
|
||||
|
||||
#define jit_allocai(n) \
|
||||
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
|
||||
|
||||
/* 3-parameter operation */
|
||||
#define jit_qopr_(d, s1, s2, op1d, op2d) \
|
||||
( (s2 == d) ? op1d : \
|
||||
|
@ -95,7 +110,7 @@ struct jit_local_state {
|
|||
#define jit_popr_l(rs) POPQr(rs)
|
||||
|
||||
#define jit_base_prolog() (PUSHQr(_EBP), MOVQrr(_ESP, _EBP), PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13))
|
||||
#define jit_prolog(n) (_jitl.nextarg_geti = 0, jit_base_prolog())
|
||||
#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = -24, jit_base_prolog())
|
||||
|
||||
/* Stack isn't used for arguments: */
|
||||
#define jit_prepare_i(ni) (_jitl.argssize = 0)
|
||||
|
@ -154,7 +169,7 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
|
|||
#define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v)))
|
||||
#define jit_patch_short_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc)))
|
||||
#define jit_patch_at(jump_pc,v) (_jitl.long_jumps ? jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v))
|
||||
#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), POPQr(_EBP), RET_())
|
||||
#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), (_jitl.alloca_offset < -24 ? LEAVE_() : POPQr(_EBP)), RET_())
|
||||
|
||||
#define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d))
|
||||
#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#define __lightning_core_i386_h
|
||||
|
||||
#define JIT_AP _EBP
|
||||
#define JIT_FP _EBP
|
||||
#define JIT_SP _ESP
|
||||
#define JIT_RET _EAX
|
||||
|
||||
|
|
|
@ -42,9 +42,27 @@ struct jit_local_state {
|
|||
int nextarg_geti; /* Next r20-r25 reg. to be read */
|
||||
int nextarg_getd; /* The FP args are picked up from FPR1 -> FPR10 */
|
||||
int nbArgs; /* Number of arguments for the prolog */
|
||||
|
||||
int frame_size, slack;
|
||||
jit_insn *stwu;
|
||||
};
|
||||
|
||||
/* Patch a `stwu' instruction (with immediate operand) so that it decreases
|
||||
r1 by AMOUNT. AMOUNT should already be rounded so that %sp remains quadword
|
||||
aligned. */
|
||||
#define jit_patch_stwu(amount) \
|
||||
(*(_jitl.stwu) &= ~_MASK (16), \
|
||||
*(_jitl.stwu) |= _s16 ((amount)))
|
||||
|
||||
#define jit_allocai(n) \
|
||||
(_jitl.frame_size += (n), \
|
||||
((n) <= _jitl.slack \
|
||||
? 0 : jit_patch_stwu (-((_jitl.frame_size + 15) & ~15))), \
|
||||
_jitl.slack = ((_jitl.frame_size + 15) & ~15) - _jitl.frame_size, \
|
||||
_jitl.frame_size - (n))
|
||||
|
||||
#define JIT_SP 1
|
||||
#define JIT_FP 1
|
||||
#define JIT_RET 3
|
||||
#define JIT_R_NUM 3
|
||||
#define JIT_V_NUM 7
|
||||
|
@ -52,9 +70,6 @@ struct jit_local_state {
|
|||
#define JIT_V(i) (31-(i))
|
||||
#define JIT_AUX JIT_V(JIT_V_NUM) /* for 32-bit operands & shift counts */
|
||||
|
||||
#define jit_pfx_start() (_jit.jitl.trampolines)
|
||||
#define jit_pfx_end() (_jit.jitl.free)
|
||||
|
||||
/* If possible, use the `small' instruction (rd, rs, imm)
|
||||
* else load imm into r26 and use the `big' instruction (rd, rs, r26)
|
||||
*/
|
||||
|
|
|
@ -91,13 +91,9 @@ static void
|
|||
_jit_epilog(jit_state *jit)
|
||||
{
|
||||
int n = _jitl.nbArgs;
|
||||
int frame_size, ofs;
|
||||
int first_saved_reg = JIT_AUX - n;
|
||||
int num_saved_regs = 32 - first_saved_reg;
|
||||
|
||||
frame_size = 24 + 32 + num_saved_regs * 4; /* r24..r31 + args */
|
||||
frame_size += 15; /* the stack must be quad-word */
|
||||
frame_size &= ~15; /* aligned */
|
||||
int frame_size = (_jitl.frame_size + 15) & ~15;
|
||||
|
||||
#ifdef __APPLE__
|
||||
LWZrm(0, frame_size + 8, 1); /* lwz r0, x+8(r1) (ret.addr.) */
|
||||
|
@ -106,19 +102,12 @@ _jit_epilog(jit_state *jit)
|
|||
#endif
|
||||
MTLRr(0); /* mtspr LR, r0 */
|
||||
|
||||
ofs = frame_size - num_saved_regs * 4;
|
||||
LMWrm(first_saved_reg, ofs, 1); /* lmw rI, ofs(r1) */
|
||||
LMWrm(first_saved_reg, 24 + 32, 1); /* lmw rI, ofs(r1) */
|
||||
ADDIrri(1, 1, frame_size); /* addi r1, r1, x */
|
||||
BLR(); /* blr */
|
||||
}
|
||||
|
||||
/* Emit a prolog for a function.
|
||||
Upon entrance to the trampoline:
|
||||
- LR = address where the real code for the function lies
|
||||
- R3-R8 = parameters
|
||||
Upon finishing the trampoline:
|
||||
- R0 = return address for the function
|
||||
- R25-R20 = parameters (order is reversed, 1st argument is R25)
|
||||
|
||||
The +32 in frame_size computation is to accound for the parameter area of
|
||||
a function frame.
|
||||
|
@ -126,7 +115,7 @@ _jit_epilog(jit_state *jit)
|
|||
On PPC the frame must have space to host the arguments of any callee.
|
||||
However, as it currently stands, the argument to jit_trampoline (n) is
|
||||
the number of arguments of the caller we generate. Therefore, the
|
||||
callee can overwrite a part of the stack (saved register area when it
|
||||
callee can overwrite a part of the stack (saved register area) when it
|
||||
flushes its own parameter on the stack. The addition of a constant
|
||||
offset = 32 is enough to hold eight 4 bytes arguments. This is less
|
||||
than perfect but is a reasonable work around for now.
|
||||
|
@ -134,8 +123,8 @@ _jit_epilog(jit_state *jit)
|
|||
static void
|
||||
_jit_prolog(jit_state *jit, int n)
|
||||
{
|
||||
int frame_size;
|
||||
int ofs, i;
|
||||
int orig_frame_size, frame_size;
|
||||
int i;
|
||||
int first_saved_reg = JIT_AUX - n;
|
||||
int num_saved_regs = 32 - first_saved_reg;
|
||||
|
||||
|
@ -143,20 +132,31 @@ _jit_prolog(jit_state *jit, int n)
|
|||
_jitl.nextarg_getd = 1;
|
||||
_jitl.nbArgs = n;
|
||||
|
||||
frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args */
|
||||
frame_size += 15; /* the stack must be quad-word */
|
||||
frame_size &= ~15; /* aligned */
|
||||
|
||||
MFLRr(0);
|
||||
|
||||
#ifdef __APPLE__
|
||||
STWrm(0, 8, 1); /* stw r0, 8(r1) */
|
||||
#else
|
||||
STWrm(0, 4, 1); /* stw r0, 4(r1) */
|
||||
#endif
|
||||
|
||||
/* 0..55 -> frame data
|
||||
56..frame_size -> saved registers
|
||||
|
||||
The STMW instruction is patched by jit_allocai, thus leaving
|
||||
the space for the allocai above the 56 bytes. jit_allocai is
|
||||
also able to reuse the slack space needed to keep the stack
|
||||
quadword-aligned. */
|
||||
|
||||
_jitl.frame_size = 24 + 32 + num_saved_regs * 4; /* r27..r31 + args */
|
||||
|
||||
/* The stack must be quad-word aligned. */
|
||||
frame_size = (_jitl.frame_size + 15) & ~15;
|
||||
_jitl.slack = frame_size - _jitl.frame_size;
|
||||
_jitl.stwu = _jit.x.pc;
|
||||
STWUrm(1, -frame_size, 1); /* stwu r1, -x(r1) */
|
||||
|
||||
ofs = frame_size - num_saved_regs * 4;
|
||||
STMWrm(first_saved_reg, ofs, 1); /* stmw rI, ofs(r1) */
|
||||
#ifdef __APPLE__
|
||||
STWrm(0, frame_size + 8, 1); /* stw r0, x+8(r1) */
|
||||
#else
|
||||
STWrm(0, frame_size + 4, 1); /* stw r0, x+4(r1) */
|
||||
#endif
|
||||
STMWrm(first_saved_reg, 24 + 32, 1); /* stmw rI, ofs(r1) */
|
||||
for (i = 0; i < n; i++)
|
||||
MRrr(JIT_AUX-1-i, 3+i); /* save parameters below r24 */
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ check_PROGRAMS = fibit$(EXEEXT) incr$(EXEEXT) printf$(EXEEXT) \
|
|||
printf2$(EXEEXT) rpn$(EXEEXT) fib$(EXEEXT) fibdelay$(EXEEXT) \
|
||||
add$(EXEEXT) bp$(EXEEXT) testfp$(EXEEXT) funcfp$(EXEEXT) \
|
||||
rpnfp$(EXEEXT) modi$(EXEEXT) ldxi$(EXEEXT) divi$(EXEEXT) \
|
||||
movi$(EXEEXT) ret$(EXEEXT)
|
||||
movi$(EXEEXT) ret$(EXEEXT) allocai$(EXEEXT)
|
||||
subdir = tests
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
|
@ -56,6 +56,11 @@ add_SOURCES = add.c
|
|||
add_OBJECTS = add.$(OBJEXT)
|
||||
add_LDADD = $(LDADD)
|
||||
@DISASS_TRUE@add_DEPENDENCIES = $(top_builddir)/opcode/libdisass.a
|
||||
allocai_SOURCES = allocai.c
|
||||
allocai_OBJECTS = allocai.$(OBJEXT)
|
||||
allocai_LDADD = $(LDADD)
|
||||
@DISASS_TRUE@allocai_DEPENDENCIES = \
|
||||
@DISASS_TRUE@ $(top_builddir)/opcode/libdisass.a
|
||||
bp_SOURCES = bp.c
|
||||
bp_OBJECTS = bp.$(OBJEXT)
|
||||
bp_LDADD = $(LDADD)
|
||||
|
@ -129,12 +134,12 @@ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
|||
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
CCLD = $(CC)
|
||||
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
|
||||
SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c incr.c \
|
||||
ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c rpnfp.c \
|
||||
testfp.c
|
||||
DIST_SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c \
|
||||
incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c \
|
||||
rpnfp.c testfp.c
|
||||
SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
|
||||
funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
|
||||
rpn.c rpnfp.c testfp.c
|
||||
DIST_SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
|
||||
funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
|
||||
rpn.c rpnfp.c testfp.c
|
||||
DATA = $(noinst_DATA)
|
||||
ETAGS = etags
|
||||
CTAGS = ctags
|
||||
|
@ -242,12 +247,13 @@ target_vendor = @target_vendor@
|
|||
AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir) -I$(top_srcdir)/lightning/$(cpu)
|
||||
noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok \
|
||||
fib.ok fibdelay.ok testfp.ok funcfp.ok rpnfp.ok add.ok \
|
||||
bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok
|
||||
bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok \
|
||||
allocai.ok
|
||||
|
||||
EXTRA_DIST = $(noinst_DATA) run-test
|
||||
@DISASS_TRUE@LDADD = $(top_builddir)/opcode/libdisass.a
|
||||
@REGRESSION_TESTING_TRUE@TESTS = fib fibit fibdelay incr printf printf2 rpn add bp \
|
||||
@REGRESSION_TESTING_TRUE@ testfp funcfp rpnfp modi ldxi divi movi ret
|
||||
@REGRESSION_TESTING_TRUE@ testfp funcfp rpnfp modi ldxi divi movi ret allocai
|
||||
|
||||
@REGRESSION_TESTING_TRUE@TESTS_ENVIRONMENT = $(srcdir)/run-test
|
||||
all: all-am
|
||||
|
@ -289,6 +295,9 @@ clean-checkPROGRAMS:
|
|||
add$(EXEEXT): $(add_OBJECTS) $(add_DEPENDENCIES)
|
||||
@rm -f add$(EXEEXT)
|
||||
$(LINK) $(add_LDFLAGS) $(add_OBJECTS) $(add_LDADD) $(LIBS)
|
||||
allocai$(EXEEXT): $(allocai_OBJECTS) $(allocai_DEPENDENCIES)
|
||||
@rm -f allocai$(EXEEXT)
|
||||
$(LINK) $(allocai_LDFLAGS) $(allocai_OBJECTS) $(allocai_LDADD) $(LIBS)
|
||||
bp$(EXEEXT): $(bp_OBJECTS) $(bp_DEPENDENCIES)
|
||||
@rm -f bp$(EXEEXT)
|
||||
$(LINK) $(bp_LDFLAGS) $(bp_OBJECTS) $(bp_LDADD) $(LIBS)
|
||||
|
@ -345,6 +354,7 @@ distclean-compile:
|
|||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/add.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocai.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bp.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/divi.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fib.Po@am__quote@
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue