diff --git a/lightening/aarch64-cpu.c b/lightening/aarch64-cpu.c index 2eaf74406..ef3e83722 100644 --- a/lightening/aarch64-cpu.c +++ b/lightening/aarch64-cpu.c @@ -275,6 +275,10 @@ oxxrs(jit_state_t *_jit, int32_t Op, #define A64_MOVK 0x72800000 #define A64_BRK 0xd4200000 +/* ARMv8.1 */ +#define A64_SWPAL 0xf8e08000 +#define A64_CASAL 0xc8e0fc00 + static void SBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) { @@ -675,6 +679,18 @@ STLXR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) return oxxx(_jit, A64_STLXR, Rt, Rn, Rm); } +static void +SWPAL(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rs) +{ + return oxxx(_jit, A64_SWPAL, Rt, Rn, Rs); +} + +static void +CASAL(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rs) +{ + return oxxx(_jit, A64_CASAL, Rt, Rn, Rs); +} + static void LDRSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) { @@ -2532,36 +2548,50 @@ str_atomic(jit_state_t *_jit, int32_t loc, int32_t val) static void swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t val) { - int32_t result = jit_gpr_regno(get_temp_gpr(_jit)); - int32_t val_or_tmp = dst == val ? jit_gpr_regno(get_temp_gpr(_jit)) : val; - movr(_jit, val_or_tmp, val); - void *retry = jit_address(_jit); - LDAXR(_jit, dst, loc); - STLXR(_jit, val_or_tmp, loc, result); - jit_patch_there(_jit, bnei(_jit, result, 0), retry); - if (dst == val) unget_temp_gpr(_jit); - unget_temp_gpr(_jit); + if (has_lse_atomics) { + SWPAL(_jit, dst, loc, val); + } else { + int32_t result = jit_gpr_regno(get_temp_gpr(_jit)); + int32_t val_or_tmp = dst == val ? jit_gpr_regno(get_temp_gpr(_jit)) : val; + movr(_jit, val_or_tmp, val); + void *retry = jit_address(_jit); + LDAXR(_jit, dst, loc); + STLXR(_jit, val_or_tmp, loc, result); + jit_patch_there(_jit, bnei(_jit, result, 0), retry); + if (dst == val) unget_temp_gpr(_jit); + unget_temp_gpr(_jit); + } } static void cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, int32_t expected, int32_t desired) { - int32_t dst_or_tmp; - if (dst == loc || dst == expected || dst == expected) - dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); - else - dst_or_tmp = dst; - void *retry = jit_address(_jit); - LDAXR(_jit, dst_or_tmp, loc); - jit_reloc_t bad = bner(_jit, dst_or_tmp, expected); - int result = jit_gpr_regno(get_temp_gpr(_jit)); - STLXR(_jit, desired, loc, result); - jit_patch_there(_jit, bnei(_jit, result, 0), retry); - unget_temp_gpr(_jit); - jit_patch_here(_jit, bad); - movr(_jit, dst, dst_or_tmp); - unget_temp_gpr(_jit); + if (has_lse_atomics) { + int32_t expected_or_tmp = expected; + if (expected == loc || expected == desired) + expected_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); + movr(_jit, expected_or_tmp, expected); + CASAL(_jit, desired, loc, expected_or_tmp); + movr(_jit, dst, expected_or_tmp); + if (expected != expected_or_tmp) unget_temp_gpr(_jit); + } else { + int32_t dst_or_tmp; + if (dst == loc || dst == expected || dst == expected) + dst_or_tmp = jit_gpr_regno(get_temp_gpr(_jit)); + else + dst_or_tmp = dst; + void *retry = jit_address(_jit); + LDAXR(_jit, dst_or_tmp, loc); + jit_reloc_t bad = bner(_jit, dst_or_tmp, expected); + int result = jit_gpr_regno(get_temp_gpr(_jit)); + STLXR(_jit, desired, loc, result); + jit_patch_there(_jit, bnei(_jit, result, 0), retry); + unget_temp_gpr(_jit); + jit_patch_here(_jit, bad); + movr(_jit, dst, dst_or_tmp); + unget_temp_gpr(_jit); + } } static void diff --git a/lightening/aarch64.c b/lightening/aarch64.c index e67365f23..1018193c4 100644 --- a/lightening/aarch64.c +++ b/lightening/aarch64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2020 Free Software Foundation, Inc. + * Copyright (C) 2013-2020, 2024 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -20,6 +20,8 @@ /* libgcc */ extern void __clear_cache(void *, void *); +static int has_lse_atomics; + static inline int32_t read_signed_bitfield(uint32_t word, uint8_t width, uint8_t shift) @@ -163,11 +165,39 @@ struct abi_arg_iterator }; static size_t page_size; +static int has_lse_atomics; + +# define HWCAP_ATOMICS (1 << 8) + +#ifdef __gnu_linux__ +// See +// https://github.com/gcc-mirror/gcc/blob/master/libgcc/config/aarch64/lse-init.c. +# define AT_HWCAP 16 +unsigned long __getauxval (unsigned long int); +static unsigned long get_hwcap(void) +{ + return __getauxval (AT_HWCAP); +} +#elif defined(DARWIN) +static unsigned long get_hwcap(void) +{ + // All Mac machines have LSE atomics. Most iOS have it too but generally JIT + // isn't allowed there, so assume that it's OK to say we always have LSE. + return HWCAP_ATOMICS; +} +#else +static unsigned long get_hwcap(void) +{ + return 0; +} +#endif jit_bool_t jit_get_cpu(void) { page_size = sysconf(_SC_PAGE_SIZE); + unsigned long hwcap = get_hwcap(); + has_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0; return 1; }