From d639674549d2249a28ba6c55d284775bffa11b59 Mon Sep 17 00:00:00 2001 From: pcpa Date: Mon, 27 Apr 2015 21:12:32 -0300 Subject: [PATCH] Add initial support to implement vararg jit functions * include/lightning.h, include/lightning/jit_private.h, lib/jit_names.c, lib/lightning.c: Add initial support for the new jit_va_start, jit_va_arg, jit_va_arg_d, and jit_va_end interfaces. The jit_va_start call is supposed to return a va_list compatible pointer, but not yet decided if it will be "declared" stdarg compatible, as for now only x86 support has been added (and should be compatible), but issues may arise on other backends. * check/lightning.c: Add wrappers to call the new jit_va_* interfaces. * lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new jit_va_* for x86. * lib/jit_x86-sz.c: Add fields, but not yet fully updated, as this is an intermediate commit. * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, lib/jit_mips-sz.c, lib/jit_mips.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_s390-sz.c, lib/jit_s390.c, lib/jit_sparc-sz.c, lib/jit_sparc.c: Prepare for the new jit_va_* interfaces. Not yet implemented, and will cause an assertion if used. * check/va_list.tst: Simple early test case, that works on x86_64, x32, ix86, cygwin, and cygwin64. --- ChangeLog | 35 ++++++ check/lightning.c | 34 ++++- check/va_list.tst | 30 +++++ include/lightning.h | 19 ++- include/lightning/jit_private.h | 5 + lib/jit_aarch64-sz.c | 4 + lib/jit_aarch64.c | 13 ++ lib/jit_alpha-sz.c | 4 + lib/jit_alpha.c | 13 ++ lib/jit_arm-sz.c | 8 ++ lib/jit_arm.c | 13 ++ lib/jit_hppa-sz.c | 4 + lib/jit_hppa.c | 13 ++ lib/jit_ia64-sz.c | 4 + lib/jit_ia64.c | 13 ++ lib/jit_mips-sz.c | 12 ++ lib/jit_mips.c | 14 ++- lib/jit_names.c | 3 + lib/jit_ppc-sz.c | 12 ++ lib/jit_ppc.c | 14 ++- lib/jit_s390-sz.c | 8 ++ lib/jit_s390.c | 13 ++ lib/jit_sparc-sz.c | 4 + lib/jit_sparc.c | 13 ++ lib/jit_x86-cpu.c | 214 ++++++++++++++++++++++++++++++++ lib/jit_x86-sz.c | 16 +++ lib/jit_x86.c | 106 +++++++++++++++- lib/lightning.c | 27 +--- 28 files changed, 639 insertions(+), 29 deletions(-) create mode 100644 check/va_list.tst diff --git a/ChangeLog b/ChangeLog index 50b10b1b7..035468548 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,38 @@ +2015-05-27 Paulo Andrade + + * include/lightning.h, include/lightning/jit_private.h, + lib/jit_names.c, lib/lightning.c: Add initial support + for the new jit_va_start, jit_va_arg, jit_va_arg_d, and + jit_va_end interfaces. The jit_va_start call is supposed + to return a va_list compatible pointer, but not yet + decided if it will be "declared" stdarg compatible, + as for now only x86 support has been added (and should + be compatible), but issues may arise on other backends. + + * check/lightning.c: Add wrappers to call the new jit_va_* + interfaces. + + * lib/jit_x86-cpu.c, lib/jit_x86.c: Implement the new + jit_va_* for x86. + + * lib/jit_x86-sz.c: Add fields, but not yet fully updated, + as this is an intermediate commit. + + * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_alpha-sz.c, lib/jit_alpha.c, + lib/jit_arm-sz.c, lib/jit_arm.c, + lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_ppc-sz.c, lib/jit_ppc.c, + lib/jit_s390-sz.c, lib/jit_s390.c, + lib/jit_sparc-sz.c, lib/jit_sparc.c: Prepare for the + new jit_va_* interfaces. Not yet implemented, and will + cause an assertion if used. + + * check/va_list.tst: Simple early test case, that works + on x86_64, x32, ix86, cygwin, and cygwin64. + 2015-02-17 Paulo Andrade * include/lightning.h, include/lightning/jit_private.h, diff --git a/check/lightning.c b/check/lightning.c index 7e40d9669..02d418af8 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -497,6 +497,9 @@ static void bunordr_d(void); static void bunordi_d(void); static void pushargr_d(void); static void pushargi_d(void); static void retr_d(void); static void reti_d(void); static void retval_d(void); +static void vastart(void); +static void vaarg(void); static void vaarg_d(void); +static void vaend(void); static void error(const char *format, ...) noreturn printf_format(1, 2); static void warn(const char *format, ...) printf_format(1, 2) maybe_unused; @@ -576,6 +579,7 @@ static char *data; static size_t data_offset, data_length; static instr_t instr_vector[] = { #define entry(value) { NULL, #value, value } +#define entry2(name, function) { NULL, name, function } entry(align), entry(name), entry(prolog), entry(frame), entry(tramp), @@ -806,7 +810,10 @@ static instr_t instr_vector[] = { entry(pushargr_d), entry(pushargi_d), entry(retr_d), entry(reti_d), entry(retval_d), - + entry2("va_start", vastart), + entry2("va_arg", vaarg), + entry2("va_arg_d", vaarg_d), + entry2("va_end", vaend), #undef entry }; @@ -1644,6 +1651,31 @@ entry_lb_fr_fr(bunordr_d) entry_lb_fr_dm(bunordi_d) entry_fr(pushargr_d) entry_dm(pushargi_d) entry_fr(retr_d) entry_dm(reti_d) entry_fr(retval_d) +static void +vastart(void) +{ + jit_gpr_t r0 = get_ireg(); + jit_va_start(r0); +} +static void +vaarg(void) +{ + jit_gpr_t r0 = get_ireg(), r1 = get_ireg(); + jit_va_arg(r0, r1); +} +static void +vaarg_d(void) +{ + jit_fpr_t r0 = get_freg(); + jit_gpr_t r1 = get_ireg(); + jit_va_arg_d(r0, r1); +} +static void +vaend(void) +{ + jit_gpr_t r0 = get_ireg(); + jit_va_end(r0); +} #undef entry_fn #undef entry_fm #undef entry_dm diff --git a/check/va_list.tst b/check/va_list.tst new file mode 100644 index 000000000..76a23de02 --- /dev/null +++ b/check/va_list.tst @@ -0,0 +1,30 @@ +.data 16 +fmt: +.c "%d %f\n" +.code + jmpi main +varargs: + prolog + ellipsis + va_start %v0 + va_arg %r0 %v0 + va_arg_d %f0 %v0 + va_end %v0 + prepare + pushargi fmt + ellipsis + pushargr %r0 + pushargr_d %f0 + finishi @printf + ret + epilog + +main: + prolog + prepare + ellipsis + pushargi 1 + pushargi_d 2 + finishi varargs + ret + epilog diff --git a/include/lightning.h b/include/lightning.h index 55b2d6a5b..23585a434 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -38,6 +38,14 @@ # include #endif +#ifdef STDC_HEADERS +# include +#else +# if !defined(offsetof) +# define offsetof(type, field) ((char *)&((type *)0)->field - (char *)0) +# endif +#endif + #ifndef __WORDSIZE # if defined(WORDSIZE) /* ppc darwin */ # define __WORDSIZE WORDSIZE @@ -854,7 +862,16 @@ typedef enum { #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) jit_code_x86_retval_f, jit_code_x86_retval_d, -#define jit_code_last_code jit_code_x86_retval_d + + /* These should be moved/reordered when bumping library major */ + jit_code_va_start, +#define jit_va_start(u) jit_new_node_w(jit_code_va_start, u) + jit_code_va_arg, jit_code_va_arg_d, +#define jit_va_arg(u, v) jit_new_node_ww(jit_code_va_arg, u, v) +#define jit_va_arg_d(u, v) jit_new_node_ww(jit_code_va_arg_d, u, v) + jit_code_va_end, +#define jit_va_end(u) jit_new_node_w(jit_code_va_end, u) +#define jit_code_last_code jit_code_va_end } jit_code_t; typedef void* (*jit_alloc_func_ptr) (size_t); diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h index 9e07d031e..86e583dd1 100644 --- a/include/lightning/jit_private.h +++ b/include/lightning/jit_private.h @@ -368,6 +368,11 @@ struct jit_function { jit_int32_t aoffoff; /* uses allocar flag */ jit_uint32_t allocar : 1; + + /* varargs state offsets */ + jit_int32_t vaoff; /* offset of jit_va_list */ + jit_int32_t vagp; /* first gp va argument */ + jit_int32_t vafp; /* first fp va argument */ }; /* data used only during jit generation */ diff --git a/lib/jit_aarch64-sz.c b/lib/jit_aarch64-sz.c index bf7812bd1..8b0bfacc2 100644 --- a/lib/jit_aarch64-sz.c +++ b/lib/jit_aarch64-sz.c @@ -353,4 +353,8 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_aarch64.c b/lib/jit_aarch64.c index af8d5d078..77256c958 100644 --- a/lib/jit_aarch64.c +++ b/lib/jit_aarch64.c @@ -285,6 +285,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_f_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_alpha-sz.c b/lib/jit_alpha-sz.c index ef8f71b37..d4e3ca73e 100644 --- a/lib/jit_alpha-sz.c +++ b/lib/jit_alpha-sz.c @@ -353,4 +353,8 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_alpha.c b/lib/jit_alpha.c index 6524d2309..7bb92e9da 100644 --- a/lib/jit_alpha.c +++ b/lib/jit_alpha.c @@ -285,6 +285,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_f_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_arm-sz.c b/lib/jit_arm-sz.c index 8a8b71935..53a2367c8 100644 --- a/lib/jit_arm-sz.c +++ b/lib/jit_arm-sz.c @@ -354,6 +354,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -712,5 +716,9 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/lib/jit_arm.c b/lib/jit_arm.c index 5e73ea397..6b3b3a3de 100644 --- a/lib/jit_arm.c +++ b/lib/jit_arm.c @@ -405,6 +405,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_hppa-sz.c b/lib/jit_hppa-sz.c index 0741f7b07..55387abef 100644 --- a/lib/jit_hppa-sz.c +++ b/lib/jit_hppa-sz.c @@ -353,4 +353,8 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index 4dd6b850e..ed6f43ee4 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -285,6 +285,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_ia64-sz.c b/lib/jit_ia64-sz.c index 1b384b2f4..8017a544b 100644 --- a/lib/jit_ia64-sz.c +++ b/lib/jit_ia64-sz.c @@ -353,4 +353,8 @@ 32, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ia64.c b/lib/jit_ia64.c index c5f18f368..dc4ac857e 100644 --- a/lib/jit_ia64.c +++ b/lib/jit_ia64.c @@ -392,6 +392,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_mips-sz.c b/lib/jit_mips-sz.c index c7dfbb10b..1576a314a 100644 --- a/lib/jit_mips-sz.c +++ b/lib/jit_mips-sz.c @@ -354,6 +354,10 @@ 12, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -712,6 +716,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -1069,4 +1077,8 @@ 12, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_mips.c b/lib/jit_mips.c index c8f201ff5..1d925797e 100644 --- a/lib/jit_mips.c +++ b/lib/jit_mips.c @@ -294,7 +294,6 @@ _jit_reti_d(jit_state_t *_jit, jit_float64_t u) jit_ret(); } -/* must be called internally only */ void _jit_epilog(jit_state_t *_jit) { @@ -317,6 +316,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) #endif } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_names.c b/lib/jit_names.c index 1de379386..f96342eb8 100644 --- a/lib/jit_names.c +++ b/lib/jit_names.c @@ -201,4 +201,7 @@ static char *code_name[] = { "movr_d_ww", "movi_d_ww", "movr_d_w", "movi_d_w", "x86_retval_f", "x86_retval_d", + "va_start", + "va_arg", "va_arg_d", + "va_end", }; diff --git a/lib/jit_ppc-sz.c b/lib/jit_ppc-sz.c index 1de3b9bd4..33a6ef65c 100644 --- a/lib/jit_ppc-sz.c +++ b/lib/jit_ppc-sz.c @@ -354,6 +354,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __ppc__ */ #endif /* __WORDSIZE */ @@ -712,6 +716,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1076,5 +1084,9 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/lib/jit_ppc.c b/lib/jit_ppc.c index f1b6974ba..0b134bf0a 100644 --- a/lib/jit_ppc.c +++ b/lib/jit_ppc.c @@ -278,7 +278,6 @@ _jit_reti_d(jit_state_t *_jit, jit_float64_t u) jit_ret(); } -/* must be called internally only */ void _jit_epilog(jit_state_t *_jit) { @@ -297,6 +296,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_f_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_s390-sz.c b/lib/jit_s390-sz.c index 4b7ec5e5e..c3b09c5cc 100644 --- a/lib/jit_s390-sz.c +++ b/lib/jit_s390-sz.c @@ -353,6 +353,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -709,4 +713,8 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_s390.c b/lib/jit_s390.c index 5412c55e1..77ca27147 100644 --- a/lib/jit_s390.c +++ b/lib/jit_s390.c @@ -250,6 +250,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_f_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_sparc-sz.c b/lib/jit_sparc-sz.c index 95ac33a40..5431dc3b6 100644 --- a/lib/jit_sparc-sz.c +++ b/lib/jit_sparc-sz.c @@ -353,4 +353,8 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __WORDSIZE */ diff --git a/lib/jit_sparc.c b/lib/jit_sparc.c index 9d91f0582..5b14e4489 100644 --- a/lib/jit_sparc.c +++ b/lib/jit_sparc.c @@ -258,6 +258,19 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_d_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c index 488885402..c2f5a9ba0 100644 --- a/lib/jit_x86-cpu.c +++ b/lib/jit_x86-cpu.c @@ -24,6 +24,8 @@ # if __X32 || __X64_32 # define WIDE 0 # define ldi(u, v) ldi_i(u, v) +# define ldr(u, v) ldr_i(u, v) +# define ldxr(u, v, w) ldxr_i(u, v, w) # define ldxi(u, v, w) ldxi_i(u, v, w) # define sti(u, v) sti_i(u, v) # define stxi(u, v, w) stxi_i(u, v, w) @@ -33,6 +35,8 @@ # else # define WIDE 1 # define ldi(u, v) ldi_l(u, v) +# define ldr(u, v) ldr_l(u, v) +# define ldxr(u, v, w) ldxr_l(u, v, w) # define ldxi(u, v, w) ldxi_l(u, v, w) # define sti(u, v) sti_l(u, v) # define stxi(u, v, w) stxi_l(u, v, w) @@ -661,10 +665,18 @@ static jit_word_t _calli(jit_state_t*, jit_word_t); static void _jmpr(jit_state_t*, jit_int32_t); # define jmpi(i0) _jmpi(_jit, i0) static jit_word_t _jmpi(jit_state_t*, jit_word_t); +# define jmpsi(i0) _jmpsi(_jit, i0) +static void _jmpsi(jit_state_t*, jit_uint8_t); # define prolog(node) _prolog(_jit, node) static void _prolog(jit_state_t*, jit_node_t*); # define epilog(node) _epilog(_jit, node) static void _epilog(jit_state_t*, jit_node_t*); +# define vastart(r0) _vastart(_jit, r0) +static void _vastart(jit_state_t*, jit_int32_t); +# define vaarg(r0, r1) _vaarg(_jit, r0, r1) +static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); +# define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0) +static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t); # define patch_at(node, instr, label) _patch_at(_jit, node, instr, label) static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); # if !defined(HAVE_FFSL) @@ -3434,6 +3446,13 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) return (_jit->pc.w); } +static void +_jmpsi(jit_state_t *_jit, jit_uint8_t i0) +{ + ic(0xeb); + ic(i0); +} + static void _prolog(jit_state_t *_jit, jit_node_t *node) { @@ -3528,6 +3547,39 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi_i(_jitc->function->aoffoff, _RBP_REGNO, rn(reg)); jit_unget_reg(reg); } + +#if __X64 && !__CYGWIN__ + if (_jitc->function->self.call & jit_call_varargs) { + jit_word_t nofp_code; + + /* Save gp registers in the save area, if any is a vararg */ + for (reg = first_gp_from_offset(_jitc->function->vagp); + jit_arg_reg_p(reg); ++reg) + stxi(_jitc->function->vaoff + first_gp_offset + + reg * 8, _RBP_REGNO, rn(JIT_RA0 - reg)); + + reg = first_fp_from_offset(_jitc->function->vafp); + if (jit_arg_f_reg_p(reg)) { + /* Skip over if no float registers were passed as argument */ + /* test %al, %al */ + ic(0x84); + ic(0xc0); + jes(0); + nofp_code = _jit->pc.w; + + /* Save fp registers in the save area, if any is a vararg */ + /* Note that the full 16 byte xmm is not saved, because + * lightning only handles float and double, and, while + * attempting to provide a va_list compatible pointer as + * jit_va_start return, does not guarantee it (on all ports). */ + for (; jit_arg_f_reg_p(reg); ++reg) + sse_stxi_d(_jitc->function->vaoff + first_fp_offset + + reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg)); + + patch_rel_char(nofp_code, _jit->pc.w); + } + } +#endif } static void @@ -3599,6 +3651,168 @@ _epilog(jit_state_t *_jit, jit_node_t *node) ic(0xc3); } +static void +_vastart(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t reg; + + /* Return jit_va_list_t in the register argument */ + addi(r0, _RBP_REGNO, _jitc->function->vaoff); + reg = jit_get_reg(jit_class_gpr); + +#if __X64 && !__CYGWIN__ + /* Initialize gp offset in the save area. */ + movi(rn(reg), _jitc->function->vagp); + stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg)); + + /* Initialize fp offset in the save area. */ + movi(rn(reg), _jitc->function->vafp); + stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg)); +#endif + + /* Initialize overflow pointer to the first stack argument. */ + addi(rn(reg), _RBP_REGNO, _jitc->function->self.size); + stxi(offsetof(jit_va_list_t, over), r0, rn(reg)); + +#if __X64 && !__CYGWIN__ + /* Initialize register save area pointer. */ + addi(rn(reg), r0, first_gp_offset); + stxi(offsetof(jit_va_list_t, save), r0, rn(reg)); +#endif + + jit_unget_reg(reg); +} + +static void +_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t rg0; +#if __X64 && !__CYGWIN__ + jit_int32_t rg1; + jit_word_t ge_code; + jit_word_t lt_code; +#endif + + rg0 = jit_get_reg(jit_class_gpr); +#if __X64 && !__CYGWIN__ + rg1 = jit_get_reg(jit_class_gpr); + + /* Load the gp offset in save area in the first temporary. */ + ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff)); + + /* Jump over if there are no remaining arguments in the save area. */ + icmpi(rn(rg0), va_gp_max_offset); + jaes(0); + ge_code = _jit->pc.w; + + /* Load the save area pointer in the second temporary. */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); + + /* Load the vararg argument in the first argument. */ + ldxr(r0, rn(rg1), rn(rg0)); + + /* Update the gp offset. */ + addi(rn(rg0), rn(rg0), 8); + stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0)); + + /* Will only need one temporary register below. */ + jit_unget_reg(rg1); + + /* Jump over overflow code. */ + jmpsi(0); + lt_code = _jit->pc.w; + + /* Where to land if argument is in overflow area. */ + patch_rel_char(ge_code, _jit->pc.w); +#endif + + /* Load overflow pointer. */ + ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); + + /* Load argument. */ + ldr(r0, rn(rg0)); + + /* Update overflow pointer. */ + addi(rn(rg0), rn(rg0), va_gp_increment); + stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); + +#if __X64 && !__CYGWIN__ + /* Where to land if argument is in save area. */ + patch_rel_char(lt_code, _jit->pc.w); +#endif + + jit_unget_reg(rg0); +} + +/* The x87 boolean argument tells if will put the result in a x87 + * register if non false, in a sse register otherwise. */ +static void +_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) +{ + jit_int32_t rg0; +#if __X64 && !__CYGWIN__ + jit_int32_t rg1; + jit_word_t ge_code; + jit_word_t lt_code; +#endif + + rg0 = jit_get_reg(jit_class_gpr); +#if __X64 && !__CYGWIN__ + rg1 = jit_get_reg(jit_class_gpr); + + /* Load the fp offset in save area in the first temporary. */ + ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff)); + + /* Jump over if there are no remaining arguments in the save area. */ + icmpi(rn(rg0), va_fp_max_offset); + jaes(0); + ge_code = _jit->pc.w; + + /* Load the save area pointer in the second temporary. */ + ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); + + /* Load the vararg argument in the first argument. */ + if (x87) + x87_ldxr_d(r0, rn(rg1), rn(rg0)); + else + sse_ldxr_d(r0, rn(rg1), rn(rg0)); + + /* Update the gp (or fp) offset. */ + addi(rn(rg0), rn(rg0), va_fp_increment); + stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0)); + + /* Will only need one temporary register below. */ + jit_unget_reg(rg1); + + /* Jump over overflow code. */ + jmpsi(0); + lt_code = _jit->pc.w; + + /* Where to land if argument is in overflow area. */ + patch_rel_char(ge_code, _jit->pc.w); +#endif + + /* Load overflow pointer. */ + ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); + + /* Load argument. */ + if (x87) + x87_ldr_d(r0, rn(rg0)); + else + sse_ldr_d(r0, rn(rg0)); + + /* Update overflow pointer. */ + addi(rn(rg0), rn(rg0), va_fp_increment); + stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); + +#if __X64 && !__CYGWIN__ + /* Where to land if argument is in save area. */ + patch_rel_char(lt_code, _jit->pc.w); +#endif + + jit_unget_reg(rg0); +} + static void _patch_at(jit_state_t *_jit, jit_node_t *node, jit_word_t instr, jit_word_t label) diff --git a/lib/jit_x86-sz.c b/lib/jit_x86-sz.c index 99aecbb33..ba79079ed 100644 --- a/lib/jit_x86-sz.c +++ b/lib/jit_x86-sz.c @@ -353,6 +353,10 @@ 0, /* movi_d_w */ 10, /* x86_retval_f */ 10, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __X32 */ #if __X64 @@ -710,6 +714,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #else # if __X64_32 @@ -1066,6 +1074,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ # else #define JIT_INSTR_MAX 43 @@ -1421,6 +1433,10 @@ 0, /* movi_d_w */ 0, /* x86_retval_f */ 0, /* x86_retval_d */ + 0, /* va_start */ + 0, /* va_arg */ + 0, /* va_arg_d */ + 0, /* va_end */ #endif /* __CYGWIN__ */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/lib/jit_x86.c b/lib/jit_x86.c index 33f77490e..1806a2735 100644 --- a/lib/jit_x86.c +++ b/lib/jit_x86.c @@ -29,21 +29,72 @@ # define stack_adjust 12 # define CVT_OFFSET -12 # define REAL_WORDSIZE 4 +# define va_gp_increment 4 +# define va_fp_increment 8 #else # if __CYGWIN__ # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) # define jit_arg_f_reg_p(i) jit_arg_reg_p(i) # define stack_framesize 152 +# define va_fp_increment 8 # else # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) # define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) # define stack_framesize 56 +# define first_gp_argument rdi +# define first_gp_offset offsetof(jit_va_list_t, rdi) +# define first_gp_from_offset(gp) ((gp) / 8) +# define last_gp_argument r9 +# define va_gp_max_offset \ + (offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8) +# define first_fp_argument xmm0 +# define first_fp_offset offsetof(jit_va_list_t, xmm0) +# define last_fp_argument xmm7 +# define va_fp_max_offset \ + (offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16) +# define va_fp_increment 16 +# define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16) # endif +# define va_gp_increment 8 # define stack_adjust 8 # define CVT_OFFSET -8 # define REAL_WORDSIZE 8 #endif +typedef struct jit_va_list { +#if __X64 && !__CYGWIN__ + jit_int32_t gpoff; + jit_int32_t fpoff; +#endif + jit_pointer_t over; +#if __X64 && !__CYGWIN__ + jit_pointer_t save; + /* Declared explicitly as int64 for the x32 abi */ + jit_int64_t rdi; + jit_int64_t rsi; + jit_int64_t rdx; + jit_int64_t rcx; + jit_int64_t r8; + jit_int64_t r9; + jit_float64_t xmm0; + jit_float64_t _up0; + jit_float64_t xmm1; + jit_float64_t _up1; + jit_float64_t xmm2; + jit_float64_t _up2; + jit_float64_t xmm3; + jit_float64_t _up3; + jit_float64_t xmm4; + jit_float64_t _up4; + jit_float64_t xmm5; + jit_float64_t _up5; + jit_float64_t xmm6; + jit_float64_t _up6; + jit_float64_t xmm7; + jit_float64_t _up7; +#endif +} jit_va_list_t; + /* * Prototypes */ @@ -500,12 +551,48 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) return (jit_arg_f_reg_p(u->u.w)); } +void +_jit_ellipsis(jit_state_t *_jit) +{ + if (_jitc->prepare) { + /* Remember that a varargs function call is being constructed. */ + assert(!(_jitc->function->call.call & jit_call_varargs)); + _jitc->function->call.call |= jit_call_varargs; + } + else { + /* Remember the current function is varargs. */ + assert(!(_jitc->function->self.call & jit_call_varargs)); + _jitc->function->self.call |= jit_call_varargs; + + /* Allocate va_list like object in the stack. + * If applicable, with enough space to save all argument + * registers, and use fixed offsets for them. */ + _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t)); + +#if __X64 && !__CYGWIN__ + /* Initialize gp offset in save area. */ + if (jit_arg_reg_p(_jitc->function->self.argi)) + _jitc->function->vagp = _jitc->function->self.argi * 8; + else + _jitc->function->vagp = va_gp_max_offset; + + /* Initialize fp offset in save area. */ + if (jit_arg_f_reg_p(_jitc->function->self.argf)) + _jitc->function->vafp = _jitc->function->self.argf * 16 + + va_gp_max_offset; + else + _jitc->function->vafp = va_fp_max_offset; +#endif + } +} + jit_node_t * _jit_arg(jit_state_t *_jit) { jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); #if __X64 if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; @@ -528,6 +615,7 @@ _jit_arg_f(jit_state_t *_jit) jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); #if __X64 # if __CYGWIN__ if (jit_arg_reg_p(_jitc->function->self.argi)) { @@ -553,6 +641,7 @@ _jit_arg_d(jit_state_t *_jit) jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); #if __X64 # if __CYGWIN__ if (jit_arg_reg_p(_jitc->function->self.argi)) { @@ -782,6 +871,8 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) jit_movr(JIT_RA0 - _jitc->function->call.argi, u); ++_jitc->function->call.argi; # if __CYGWIN__ + if (_jitc->function->call.call & jit_call_varargs) + jit_stxi(_jitc->function->call.size, _RSP, u); _jitc->function->call.size += sizeof(jit_word_t); # endif } @@ -802,10 +893,13 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) #if __X64 if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); - ++_jitc->function->call.argi; # if __CYGWIN__ + if (_jitc->function->call.call & jit_call_varargs) + jit_stxi(_jitc->function->call.size, _RSP, + JIT_RA0 - _jitc->function->call.argi); _jitc->function->call.size += sizeof(jit_word_t); # endif + ++_jitc->function->call.argi; } else #endif @@ -1869,9 +1963,19 @@ _emit_code(jit_state_t *_jit) fstpr(rn(node->u.w) + 1); break; #endif + case jit_code_va_start: + vastart(rn(node->u.w)); + break; + case jit_code_va_arg: + vaarg(rn(node->u.w), rn(node->v.w)); + break; + case jit_code_va_arg_d: + vaarg_d(rn(node->u.w), rn(node->v.w), jit_x87_reg_p(node->u.w)); + break; case jit_code_live: case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d: + case jit_code_va_end: break; default: abort(); diff --git a/lib/lightning.c b/lib/lightning.c index 4caa01c02..a7eb27150 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -1166,29 +1166,6 @@ _jit_prepare(jit_state_t *_jit) _jitc->prepare = 1; } -/* If declaring a jit function as varargs, in most backends it does - * not change anything. Currently only exception is arm backend, that - * if running in hardware float abi, switches to software float abi - * if "self" function is varargs. Otherwise, there is no logic to - * handle va_list like objects that need to parse runtime state, and - * that is mainly because jit_arg* and jit_getarg* work only with - * constants values, and one must not expect them to be handled at - * runtime, they are parsed only once (same applies to jit_allocai, - * that has no jit_allocar counterpart). - */ -void -_jit_ellipsis(jit_state_t *_jit) -{ - if (_jitc->prepare) { - assert(!(_jitc->function->call.call & jit_call_varargs)); - _jitc->function->call.call |= jit_call_varargs; - } - else { - assert(!(_jitc->function->self.call & jit_call_varargs)); - _jitc->function->self.call |= jit_call_varargs; - } -} - void _jit_patch(jit_state_t* _jit, jit_node_t *instr) { @@ -1210,7 +1187,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_note: case jit_code_prolog: case jit_code_epilog: mask = 0; break; - case jit_code_live: + case jit_code_live: case jit_code_va_end: mask = jit_cc_a0_reg; break; case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d: @@ -1224,6 +1201,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) break; case jit_code_x86_retval_f: case jit_code_x86_retval_d: + case jit_code_va_start: mask = jit_cc_a0_reg|jit_cc_a0_chg; break; case jit_code_movi: case jit_code_ldi_c: case jit_code_ldi_uc: @@ -1258,6 +1236,7 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_ldr_d: case jit_code_movr_w_f: case jit_code_movr_f_w: case jit_code_movr_w_d: case jit_code_movr_d_w: + case jit_code_va_arg: case jit_code_va_arg_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg; break; case jit_code_movr_d_ww: