From e82e5be4486728f5b475bb385608ac9306e6ac0c Mon Sep 17 00:00:00 2001 From: pcpa Date: Sun, 9 Jun 2013 18:31:50 -0300 Subject: [PATCH] Build and pass all test cases on a multiprocessor HP-UX. * configure.ac, check/Makefile.am, doc/Makefile.am: Do not explicitly link to -ldl, but instead autodetect the library with dlopen, dlsym, etc. * check/lightning.c: Add workaround to apparently buggy getopt in HP-UX that sets optind to the wrong index, and use RTLD_NEXT on HP-UX instead of RTLD_DEFAULT to dlsym global symbols. * include/lightning.h: Rework definitions of wordsize and byte order to detect proper values on HP-UX. * lib/lightning.c: Minor correction to use MAP_ANONYMOUS instead of MAP_ANON on HP-UX. * lib/jit_hppa.c: Float arguments must be passed on integer registers on HP-UX, not only for varargs functions. Add code to properly clear instruction cache. This was not required on Debian hppa port, but may have been working by accident. * lib/jit_hppa-cpu.c: Follow pattern of HP-UX binaries and use bve,n instead of bv,n to return from functions. * lib/jit_hppa-fpu.c: For some reason "fst? frX,rX,(rY)" did not work on the tested computer (HP-UX B.11.23 U 9000/785 HP-UX) so the code was changed, at first for __hpux only to add the base and offset register and use the instruction with an immediate (zero) offset. --- ChangeLog | 34 ++++++++++++++- check/Makefile.am | 4 +- check/lightning.c | 13 +++++- configure.ac | 6 +++ doc/Makefile.am | 10 ++--- include/lightning.h | 44 +++++++++++++------ lib/jit_hppa-cpu.c | 5 +++ lib/jit_hppa-fpu.c | 52 +++++++++++++++++++++- lib/jit_hppa.c | 102 +++++++++++++++++++++++++++++++++++++++++--- lib/lightning.c | 4 ++ 10 files changed, 244 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 24e97d7d7..fefd19d40 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,36 @@ -2013-07-01 Paulo Andrade +2013-06-09 Paulo Andrade + + * configure.ac, check/Makefile.am, doc/Makefile.am: Do not + explicitly link to -ldl, but instead autodetect the library + with dlopen, dlsym, etc. + + * check/lightning.c: Add workaround to apparently buggy + getopt in HP-UX that sets optind to the wrong index, and + use RTLD_NEXT on HP-UX instead of RTLD_DEFAULT to dlsym + global symbols. + + * include/lightning.h: Rework definitions of wordsize and + byte order to detect proper values on HP-UX. + + * lib/lightning.c: Minor correction to use MAP_ANONYMOUS + instead of MAP_ANON on HP-UX. + + * lib/jit_hppa.c: Float arguments must be passed on integer + registers on HP-UX, not only for varargs functions. + Add code to properly clear instruction cache. This was + not required on Debian hppa port, but may have been working + by accident. + + * lib/jit_hppa-cpu.c: Follow pattern of HP-UX binaries and + use bve,n instead of bv,n to return from functions. + + * lib/jit_hppa-fpu.c: For some reason "fst? frX,rX,(rY)" did + not work on the tested computer (HP-UX B.11.23 U 9000/785 HP-UX) + so the code was changed, at first for __hpux only to add the + base and offset register and use the instruction with an + immediate (zero) offset. + +2013-06-07 Paulo Andrade * check/lightning.c, lib/jit_disasm.c, lib/jit_ppc-cpu.c, lib/jit_ppc-fpu.c, lib/jit_ppc.c, include/lightning.h, diff --git a/check/Makefile.am b/check/Makefile.am index 220c99a37..ac8f14ca2 100644 --- a/check/Makefile.am +++ b/check/Makefile.am @@ -16,10 +16,10 @@ AM_CFLAGS = -I$(top_srcdir)/include -D_GNU_SOURCE check_PROGRAMS = lightning ccall -lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) lightning_SOURCES = lightning.c -ccall_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +ccall_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) ccall_SOURCES = ccall.c $(top_builddir)/lib/liblightning.la: diff --git a/check/lightning.c b/check/lightning.c index 6ffbdc5c0..3f785a31d 100644 --- a/check/lightning.c +++ b/check/lightning.c @@ -38,6 +38,12 @@ # define label_t l_label_t #endif +#if defined(__hpux) +# define DL_HANDLE RTLD_NEXT +#else +# define DL_HANDLE RTLD_DEFAULT +#endif + #if defined(__GNUC__) # define noreturn __attribute__ ((noreturn)) # define printf_format(f, v) __attribute__ ((format (printf, f, v))) @@ -2295,7 +2301,7 @@ dynamic(void) char *string; (void)identifier('@'); if ((label = get_label_by_name(parser.string)) == NULL) { - value = dlsym(RTLD_DEFAULT, parser.string + 1); + value = dlsym(DL_HANDLE, parser.string + 1); if ((string = dlerror())) error("%s", string); label = new_label(label_kind_dynamic, parser.string, value); @@ -3939,6 +3945,11 @@ main(int argc, char *argv[]) #endif opt_index = optind; +#if defined(__hpux) + /* Workaround */ + if (opt_index < argc && argv[opt_index][0] == '-') + ++opt_index; +#endif if (opt_index < 0 || opt_index >= argc) usage(); if (strcmp(argv[opt_index], "-") == 0) diff --git a/configure.ac b/configure.ac index edbb072e8..b833c93df 100644 --- a/configure.ac +++ b/configure.ac @@ -68,6 +68,12 @@ else DEBUG=no fi +case "$host_os" in + *hpux*) SHLIB="-ldld" ;; + *) SHLIB="-ldl" ;; +esac +AC_SUBST(SHLIB) + cpu= case "$target_cpu" in i?86|x86_64) cpu=x86 ;; diff --git a/doc/Makefile.am b/doc/Makefile.am index 3f4ff64ab..462b69c66 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -24,17 +24,17 @@ noinst_PROGRAMS = incr printf rpn rfib ifib $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la -incr_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +incr_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) incr_SOURCES = incr.c -printf_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +printf_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) printf_SOURCES = printf.c -rpn_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +rpn_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) rpn_SOURCES = rpn.c -rfib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +rfib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) rfib_SOURCES = rfib.c -ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm -ldl +ifib_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) ifib_SOURCES = ifib.c diff --git a/include/lightning.h b/include/lightning.h index ef21a1521..b347950b5 100644 --- a/include/lightning.h +++ b/include/lightning.h @@ -27,32 +27,48 @@ #include #include -#ifndef __WORDSIZE -# if defined(_AIX) -# define __WORDSIZE (__SIZEOF_POINTER__ << 3) -# else -# define __WORDSIZE WORDSIZE -# endif +#ifdef __hpux +# include #endif -#ifndef __BYTE_ORDER -# if defined(_AIX) -# define __BYTE_ORDER __BYTE_ORDER__ + +#ifndef __WORDSIZE +# if defined(WORDSIZE) +# define __WORDSIZE WORDSIZE +# elif defined(__SIZEOF_POINTER__) +# define __WORDSIZE (__SIZEOF_POINTER__ << 3) +# elif defined(_ILP32) +# define __WORDSIZE 32 # else -# define __BYTE_ORDER BYTE_ORDER +# error cannot figure __WORDSIZE # endif #endif #ifndef __LITTLE_ENDIAN -# if defined(_AIX) +# if defined(LITTLE_ENDIAN) +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# elif defined(__ORDER_LITTLE_ENDIAN__) # define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ # else -# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __LITTLE_ENDIAN 1234 # endif #endif #ifndef __BIG_ENDIAN -# if defined(_AIX) +# if defined(BIG_ENDIAN) +# define __BIG_ENDIAN BIG_ENDIAN +# elif defined(__ORDER_BIG_ENDIAN__) # define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ # else -# define __BIG_ENDIAN BIG_ENDIAN +# define __BIG_ENDIAN 4321 +# endif +#endif +#ifndef __BYTE_ORDER +# if defined(BYTE_ORDER) +# define __BYTE_ORDER BYTE_ORDER +# elif defined(__BYTE_ORDER__) +# define __BYTE_ORDER __BYTE_ORDER__ +# elif defined(_BIG_ENDIAN) +# define __BYTE_ORDER __BIG_ENDIAN +# else +# error cannot figure __BYTE_ORDER # endif #endif diff --git a/lib/jit_hppa-cpu.c b/lib/jit_hppa-cpu.c index 71a684ccc..24f67499d 100644 --- a/lib/jit_hppa-cpu.c +++ b/lib/jit_hppa-cpu.c @@ -322,6 +322,7 @@ static void _f38(jit_state_t*,jit_int32_t, #define BV(x,b) f21(0x3a,b,x,6,0) #define BV_N(x,b) f21(0x3a,b,x,6,1) #define BVE(b) f22(0x3a,b,6,0,0,0) +#define BVE_N(b) f22(0x3a,b,6,0,1,0) #define BVE_L(b) f22(0x3a,b,7,0,0,0) #define II_C_NONE 0 #define II_C_M (1<<5) @@ -2652,7 +2653,11 @@ _epilog(jit_state_t *_jit, jit_node_t *node) LDWL(-20, _FP_REGNO, _RP_REGNO); LDO(_jitc->function->stack, _FP_REGNO, _SP_REGNO); LDWL_MB(-_jitc->function->stack, _SP_REGNO, _FP_REGNO); +#if defined(__hpux) + BVE_N(_RP_REGNO); +#else BV_N(_R0_REGNO, _RP_REGNO); +#endif } static void diff --git a/lib/jit_hppa-fpu.c b/lib/jit_hppa-fpu.c index ab81891bf..611e1674e 100644 --- a/lib/jit_hppa-fpu.c +++ b/lib/jit_hppa-fpu.c @@ -16,6 +16,15 @@ */ #if PROTO + +/* FIXME should actually be hw model/version/etc or other constraint + * that causes a SIGSEGV if using these instructions */ +#if defined(__hpux) +# define FSTXR 0 +#else +# define FSTXR 1 +#endif + #define f39(o,b,x,t) _f39(_jit,o,b,x,t) static void _f39(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); #define f40(o,b,x,r) _f40(_jit,o,b,x,r) @@ -385,13 +394,20 @@ static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); #define str_f(r0,r1) FSTWI(r1,0,r0) #define sti_f(i0,r0) _sti_f(_jit,i0,r0) static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t); -#define stxr_f(r0,r1,r2) FSTW(r2,r1,r0) +#if FSTXR +# define stxr_f(r0,r1,r2) FSTW(r2,r1,r0) +# define stxr_d(r0,r1,r2) FSTD(r2,r1,r0) +#else +# define stxr_f(r0,r1,r2) _stxr_f(_jit,r0,r1,r2) +static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define stxr_d(r0,r1,r2) _stxr_d(_jit,r0,r1,r2) +static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +#endif #define stxi_f(i0,r0,r1) _stxi_f(_jit,i0,r0,r1) static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define str_d(r0,r1) FSTDI(r1,0,r0) #define sti_d(i0,r0) _sti_d(_jit,i0,r0) static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t); -#define stxr_d(r0,r1,r2) FSTD(r2,r1,r0) #define stxi_d(i0,r0,r1) _stxi_d(_jit,i0,r0,r1) static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define bcmpr_f(c,i0,r0,r1) _bcmpr_f(_jit,c,i0,r0,r1) @@ -813,6 +829,28 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) } } +#if !FSTXR +static void +_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r0, r1); + str_f(rn(reg), r2); + jit_unget_reg(reg); +} + +static void +_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r0, r1); + str_d(rn(reg), r2); + jit_unget_reg(reg); +} +#endif + static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { @@ -824,8 +862,13 @@ _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) FSTWL(r1, i0, r0); else { reg = jit_get_reg(jit_class_gpr); +#if FSTXR + movi(rn(reg), i0); + stxr_f(rn(reg), r0, r1); +#else addi(rn(reg), r0, i0); str_f(rn(reg), r1); +#endif jit_unget_reg(reg); } } @@ -856,8 +899,13 @@ _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) FSTDL(r1, i0, r0); else { reg = jit_get_reg(jit_class_gpr); +#if FSTXR movi(rn(reg), i0); stxr_d(rn(reg), r0, r1); +#else + addi(rn(reg), r0, i0); + str_d(rn(reg), r1); +#endif jit_unget_reg(reg); } } diff --git a/lib/jit_hppa.c b/lib/jit_hppa.c index d733231e6..43b43c2c2 100644 --- a/lib/jit_hppa.c +++ b/lib/jit_hppa.c @@ -32,6 +32,9 @@ #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); +/* libgcc */ +extern void __clear_cache(void *, void *); + /* * Initialization */ @@ -149,7 +152,7 @@ _jit_prolog(jit_state_t *_jit) _jitc->function->self.size = params_offset; _jitc->function->self.argi = _jitc->function->self.alen = 0; /* float conversion */ - _jitc->function->self.aoff = alloca_offset - 8; + _jitc->function->self.aoff = alloca_offset; _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -409,7 +412,10 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) _jitc->function->call.size -= sizeof(jit_word_t); if (_jitc->function->call.argi < 4) { jit_movr_f(_F4 - _jitc->function->call.argi, u); - if (_jitc->function->call.call & jit_call_varargs) { +#if !defined(__hpux) + if (_jitc->function->call.call & jit_call_varargs) +#endif + { jit_stxi_f(alloca_offset - 8, JIT_FP, u); jit_ldxi(_R26 - _jitc->function->call.argi, JIT_FP, alloca_offset - 8); @@ -428,7 +434,10 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) _jitc->function->call.size -= sizeof(jit_word_t); if (_jitc->function->call.argi < 4) { jit_movi_f(_F4 - _jitc->function->call.argi, u); - if (_jitc->function->call.call & jit_call_varargs) { +#if !defined(__hpux) + if (_jitc->function->call.call & jit_call_varargs) +#endif + { jit_stxi_f(alloca_offset - 8, JIT_FP, _F4 - _jitc->function->call.argi); jit_ldxi(_R26 - _jitc->function->call.argi, @@ -455,7 +464,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) } if (_jitc->function->call.argi < 4) { jit_movr_d(_F4 - (_jitc->function->call.argi + 1), u); - if (_jitc->function->call.call & jit_call_varargs) { +#if !defined(__hpux) + if (_jitc->function->call.call & jit_call_varargs) +#endif + { jit_stxi_d(alloca_offset - 8, JIT_FP, u); jit_ldxi(_R26 - _jitc->function->call.argi, JIT_FP, alloca_offset - 4); @@ -484,7 +496,10 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) } if (_jitc->function->call.argi < 4) { jit_movi_d(_F4 - (_jitc->function->call.argi + 1), u); - if (_jitc->function->call.call & jit_call_varargs) { +#if !defined(__hpux) + if (_jitc->function->call.call & jit_call_varargs) +#endif + { jit_stxi_d(alloca_offset - 8, JIT_FP, _F4 - (_jitc->function->call.argi + 1)); jit_ldxi(_R26 - _jitc->function->call.argi, @@ -1131,6 +1146,83 @@ _emit_code(jit_state_t *_jit) patch_at(_jitc->patches.ptr[offset].inst, word); } +#if defined(__hppa) +/* --- parisc2.0.pdf --- + Programming Note + +The minimum spacing that is guaranteed to work for "self-modifying code" is +shown in the code segment below. Since instruction prefetching is permitted, +any data cache flushes must be separated from any instruction cache flushes +by a SYNC. This will ensure that the "new" instruction will be written to +memory prior to any attempts at prefetching it as an instruction. + + LDIL l%newinstr,rnew + LDW r%newinstr(0,rnew),temp + LDIL l%instr,rinstr + STW temp,r%instr(0,rinstr) + FDC r%instr(0,rinstr) + SYNC + FIC r%instr(rinstr) + SYNC + instr ... + (at least seven instructions) + +This sequence assumes a uniprocessor system. In a multiprocessor system, +software must ensure no processor is executing code which is in the process +of being modified. +*/ + +/* + Adapted from ffcall/trampoline/cache-hppa.c:__TR_clear_cache to +loop over addresses as it is unlikely from and to addresses would fit in +at most two cachelines. + */ + /* + * Copyright 1995-1997 Bruno Haible, + * + * This is free software distributed under the GNU General Public Licence + * described in the file COPYING. Contact the author if you don't have this + * or can't live with it. There is ABSOLUTELY NO WARRANTY, explicit or implied, + * on this software. + */ + { + /* FIXME this may be required on Linux or other OSes with + * multiprocessor support (was not required for the hppa + * port done on Debian hppa...) */ + jit_word_t f = (jit_word_t)_jit->code.ptr; + jit_word_t t = f + _jit->code.length; + register int u, v; + for (; f <= t; f += 32) { + asm volatile ("fdc 0(0,%0)" + "\n\t" "fdc 0(0,%1)" + "\n\t" "sync" + : + : "r" (f), "r" (t) + ); + asm volatile ("mfsp %%sr0,%1" + "\n\t" "ldsid (0,%4),%0" + "\n\t" "mtsp %0,%%sr0" + "\n\t" "fic 0(%%sr0,%2)" + "\n\t" "fic 0(%%sr0,%3)" + "\n\t" "sync" + "\n\t" "mtsp %1,%%sr0" + "\n\t" "nop" + "\n\t" "nop" + "\n\t" "nop" + "\n\t" "nop" + "\n\t" "nop" + "\n\t" "nop" + : "=r" (u), "=r" (v) + : "r" (f), "r" (t), "r" (f) + ); + } + } +#else + /* This is supposed to work but appears to fail on multiprocessor systems */ + word = sysconf(_SC_PAGE_SIZE); + __clear_cache(_jit->code.ptr, (void *)((_jit->pc.w + word) & -word)); +#endif + return (_jit->code.ptr); } diff --git a/lib/lightning.c b/lib/lightning.c index 06ee0f5a6..f5ba7a4b3 100644 --- a/lib/lightning.c +++ b/lib/lightning.c @@ -19,6 +19,10 @@ #include #include +#ifndef MAP_ANON +# define MAP_ANON MAP_ANONYMOUS +#endif + #define jit_regload_reload 0 /* convert to reload */ #define jit_regload_delete 1 /* just remove node */ #define jit_regload_isdead 2 /* delete and unset live bit */