1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00
guile/lightening/lightening.c
Ekaitz Zarraga 7c20ba7767 riscv: float/double call convention implementation
RISC-V uses a0-a7 registers for argument passing. Float/double arguments
use f0-f7 first and continue in a0-a7 if needed.

Once registers are consumed, stack is used.

This commit changes how lightening passes arguments in order to allow
this behavior.
2025-01-29 14:53:04 +01:00

1568 lines
41 KiB
C

/*
* Copyright (C) 2012-2020 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* Authors:
* Paulo Cesar Pereira de Andrade
*/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <sys/mman.h>
#include "../lightening.h"
#define ASSERT(x) do { if (!(x)) abort(); } while (0)
#if defined(__GNUC__)
# define maybe_unused __attribute__ ((unused))
# define UNLIKELY(exprn) __builtin_expect(exprn, 0)
#else
# define maybe_unused /**/
# define UNLIKELY(exprn) exprn
#endif
union jit_pc
{
uint8_t *uc;
uint16_t *us;
uint32_t *ui;
uint64_t *ul;
intptr_t w;
uintptr_t uw;
};
#ifdef JIT_NEEDS_LITERAL_POOL
struct jit_literal_pool_entry
{
jit_reloc_t reloc;
uintptr_t value;
};
struct jit_literal_pool
{
uint32_t deadline;
uint32_t size;
uint32_t capacity;
struct jit_literal_pool_entry entries[];
};
#endif // JIT_NEEDS_LITERAL_POOL
struct jit_state
{
union jit_pc pc;
uint8_t *start;
uint8_t *last_instruction_start;
uint8_t *limit;
uint8_t temp_gpr_saved;
uint8_t temp_fpr_saved;
uint8_t overflow;
uint8_t emitting_data;
int frame_size; // Used to know when to align stack.
#ifdef JIT_NEEDS_LITERAL_POOL
struct jit_literal_pool *pool;
#endif
void* (*alloc)(size_t);
void (*free)(void*);
};
static jit_bool_t jit_get_cpu(void);
static jit_bool_t jit_init(jit_state_t *);
static void jit_flush(void *fptr, void *tptr);
static void jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc,
jit_pointer_t addr);
static void* bless_function_pointer(void *ptr);
struct abi_arg_iterator;
#ifdef JIT_NEEDS_LITERAL_POOL
static struct jit_literal_pool* alloc_literal_pool(jit_state_t *_jit,
size_t capacity);
static void reset_literal_pool(jit_state_t *_jit,
struct jit_literal_pool *pool);
static jit_bool_t add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
uint8_t max_offset_bits);
static void remove_pending_literal(jit_state_t *_jit, jit_reloc_t src);
static void patch_pending_literal(jit_state_t *_jit, jit_reloc_t src,
uintptr_t value);
enum guard_pool { GUARD_NEEDED, NO_GUARD_NEEDED };
static void emit_literal_pool(jit_state_t *_jit, enum guard_pool guard);
static int32_t read_jmp_offset(uint32_t *loc);
static int offset_in_jmp_range(ptrdiff_t offset, int flags);
static void patch_jmp_offset(uint32_t *loc, ptrdiff_t offset);
static void patch_veneer_jmp_offset(uint32_t *loc, ptrdiff_t offset);
static int32_t read_jcc_offset(uint32_t *loc);
static int offset_in_jcc_range(ptrdiff_t offset, int flags);
static void patch_jcc_offset(uint32_t *loc, ptrdiff_t offset);
static void patch_veneer_jcc_offset(uint32_t *loc, ptrdiff_t offset);
static void patch_veneer(uint32_t *loc, jit_pointer_t addr);
static int32_t read_load_from_pool_offset(uint32_t *loc);
#endif
static jit_bool_t is_fpr_arg(enum jit_operand_abi arg);
static jit_bool_t is_gpr_arg(enum jit_operand_abi arg);
static void reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
const jit_operand_t *args);
static void next_abi_arg(struct abi_arg_iterator *iter,
jit_operand_t *arg);
static jit_gpr_t get_callr_temp (jit_state_t * _jit);
jit_bool_t
init_jit(void)
{
return jit_get_cpu ();
}
jit_state_t *
jit_new_state(void* (*alloc_fn)(size_t), void (*free_fn)(void*))
{
if (!alloc_fn) alloc_fn = malloc;
if (!free_fn) free_fn = free;
jit_state_t *_jit = alloc_fn (sizeof (*_jit));
if (!_jit)
abort ();
memset(_jit, 0, sizeof (*_jit));
_jit->alloc = alloc_fn;
_jit->free = free_fn;
if (!jit_init (_jit)) {
#ifdef JIT_NEEDS_LITERAL_POOL
free_fn (_jit->pool);
#endif
free_fn (_jit);
return NULL;
}
#ifdef JIT_NEEDS_LITERAL_POOL
_jit->pool = alloc_literal_pool(_jit, 0);
#endif
return _jit;
}
void
jit_destroy_state(jit_state_t *_jit)
{
#ifdef JIT_NEEDS_LITERAL_POOL
_jit->free (_jit->pool);
#endif
_jit->free (_jit);
}
jit_pointer_t
jit_address(jit_state_t *_jit)
{
ASSERT (_jit->start);
jit_pointer_t ret = _jit->pc.uc;
return _jit->emitting_data ? ret : jit_address_to_function_pointer (ret);
}
void
jit_begin(jit_state_t *_jit, uint8_t* buf, size_t length)
{
ASSERT (!_jit->start);
_jit->pc.uc = _jit->start = buf;
_jit->limit = buf + length;
_jit->overflow = 0;
_jit->frame_size = 0;
_jit->emitting_data = 0;
#if JIT_NEEDS_LITERAL_POOL
ASSERT(_jit->pool->size == 0);
_jit->pool->deadline = length;
#endif
}
jit_bool_t
jit_has_overflow(jit_state_t *_jit)
{
ASSERT (_jit->start);
return _jit->overflow;
}
void
jit_reset(jit_state_t *_jit)
{
ASSERT (_jit->start);
_jit->pc.uc = _jit->start = _jit->limit = NULL;
_jit->overflow = 0;
_jit->frame_size = 0;
_jit->emitting_data = 0;
#ifdef JIT_NEEDS_LITERAL_POOL
reset_literal_pool(_jit, _jit->pool);
#endif
}
jit_function_pointer_t
jit_address_to_function_pointer(jit_pointer_t p)
{
return bless_function_pointer(p);
}
void*
jit_end(jit_state_t *_jit, size_t *length)
{
#ifdef JIT_NEEDS_LITERAL_POOL
if (_jit->pool->size)
emit_literal_pool(_jit, NO_GUARD_NEEDED);
#endif
if (_jit->overflow)
return NULL;
uint8_t *start = _jit->start;
uint8_t *end = _jit->pc.uc;
ASSERT(start);
ASSERT(start <= end);
ASSERT(end <= _jit->limit);
ASSERT(!_jit->emitting_data);
jit_flush (start, end);
if (length) {
*length = end - start;
}
_jit->pc.uc = _jit->start = _jit->limit = NULL;
_jit->overflow = 0;
_jit->frame_size = 0;
#ifdef JIT_NEEDS_LITERAL_POOL
reset_literal_pool(_jit, _jit->pool);
#endif
return jit_address_to_function_pointer(start);
}
static int
is_power_of_two (unsigned x)
{
return x && !(x & (x-1));
}
static jit_gpr_t
get_temp_gpr(jit_state_t *_jit)
{
switch(_jit->temp_gpr_saved++)
{
case 0:
return JIT_TMP0;
#ifdef JIT_TMP1
case 1:
return JIT_TMP1;
#endif
#ifdef JIT_TMP2
case 2:
return JIT_TMP2;
#endif
#ifdef JIT_TMP3
case 3:
return JIT_TMP3;
#endif
#ifdef JIT_TMP4
case 4:
return JIT_TMP4;
#endif
#ifdef JIT_TMP5
case 5:
return JIT_TMP5;
#endif
default:
abort();
}
}
static jit_fpr_t
get_temp_fpr(jit_state_t *_jit)
{
switch(_jit->temp_fpr_saved++)
{
case 0:
return JIT_FTMP;
default:
abort();
}
}
static void
unget_temp_fpr(jit_state_t *_jit)
{
ASSERT(_jit->temp_fpr_saved);
_jit->temp_fpr_saved--;
}
static void
unget_temp_gpr(jit_state_t *_jit)
{
ASSERT(_jit->temp_gpr_saved);
_jit->temp_gpr_saved--;
}
static inline void emit_u8(jit_state_t *_jit, uint8_t u8) {
if (UNLIKELY(_jit->pc.uc + 1 > _jit->limit)) {
_jit->overflow = 1;
} else {
*_jit->pc.uc++ = u8;
}
}
static inline void emit_u16(jit_state_t *_jit, uint16_t u16) {
if (UNLIKELY(_jit->pc.us + 1 > (uint16_t*)_jit->limit)) {
_jit->overflow = 1;
} else {
*_jit->pc.us++ = u16;
}
}
static inline void emit_u32(jit_state_t *_jit, uint32_t u32) {
if (UNLIKELY(_jit->pc.ui + 1 > (uint32_t*)_jit->limit)) {
_jit->overflow = 1;
} else {
*_jit->pc.ui++ = u32;
}
}
#ifdef JIT_NEEDS_LITERAL_POOL
static inline void emit_u16_with_pool(jit_state_t *_jit, uint16_t u16) {
emit_u16(_jit, u16);
if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
emit_literal_pool(_jit, GUARD_NEEDED);
}
static inline void emit_u32_with_pool(jit_state_t *_jit, uint32_t u32) {
emit_u32(_jit, u32);
if (UNLIKELY(_jit->pc.uc >= _jit->start + _jit->pool->deadline))
emit_literal_pool(_jit, GUARD_NEEDED);
}
#endif
static inline void emit_u64(jit_state_t *_jit, uint64_t u64) {
if (UNLIKELY(_jit->pc.ul + 1 > (uint64_t*)_jit->limit)) {
_jit->overflow = 1;
} else {
*_jit->pc.ul++ = u64;
}
}
static inline void emit_uintptr(jit_state_t *_jit, uintptr_t u) {
if (sizeof(u) == 4)
emit_u32 (_jit, u);
else
emit_u64 (_jit, u);
}
static inline jit_reloc_t
jit_reloc(jit_state_t *_jit, enum jit_reloc_kind kind,
uint8_t inst_start_offset, uint8_t *loc, uint8_t *pc_base,
uint8_t rsh)
{
jit_reloc_t ret;
ASSERT(rsh < __WORDSIZE);
ASSERT(pc_base >= (loc - inst_start_offset));
ASSERT(pc_base - (loc - inst_start_offset) < 256);
ret.kind = kind;
ret.inst_start_offset = inst_start_offset;
ret.pc_base_offset = pc_base - (loc - inst_start_offset);
ret.rsh = rsh;
ret.offset = loc - _jit->start;
return ret;
}
static inline jit_reloc_t
emit_abs_reloc (jit_state_t *_jit, uint8_t inst_start)
{
uint8_t *loc = _jit->pc.uc;
emit_uintptr (_jit, 0);
return jit_reloc(_jit, JIT_RELOC_ABSOLUTE, inst_start, loc, _jit->pc.uc, 0);
}
void
jit_patch_here(jit_state_t *_jit, jit_reloc_t reloc)
{
jit_patch_there (_jit, reloc, jit_address (_jit));
}
void
jit_patch_there(jit_state_t* _jit, jit_reloc_t reloc, jit_pointer_t addr)
{
if (_jit->overflow)
return;
union jit_pc loc;
uint8_t *end;
loc.uc = _jit->start + reloc.offset;
uint8_t *pc_base = loc.uc - reloc.inst_start_offset + reloc.pc_base_offset;
ptrdiff_t diff = (uint8_t*)addr - pc_base;
ASSERT((diff & ((1 << reloc.rsh) - 1)) == 0);
diff >>= reloc.rsh;
#ifdef JIT_NEEDS_LITERAL_POOL
int flags = reloc.kind & ~JIT_RELOC_MASK;
#endif
switch (reloc.kind & JIT_RELOC_MASK)
{
case JIT_RELOC_ABSOLUTE:
if (sizeof(diff) == 4)
*loc.ui = (uintptr_t)addr;
else
*loc.ul = (uintptr_t)addr;
end = loc.uc + sizeof(diff);
break;
case JIT_RELOC_REL8:
ASSERT (INT8_MIN <= diff && diff <= INT8_MAX);
*loc.uc = diff;
end = loc.uc + 1;
break;
case JIT_RELOC_REL16:
ASSERT (INT16_MIN <= diff && diff <= INT16_MAX);
*loc.us = diff;
end = loc.uc + 2;
break;
#ifdef JIT_NEEDS_LITERAL_POOL
case JIT_RELOC_JMP_WITH_VENEER: {
int32_t voff = read_jmp_offset(loc.ui);
uint8_t *target = pc_base + (voff << reloc.rsh);
if (target == loc.uc) {
// PC still in range to reify direct branch.
if (offset_in_jmp_range(diff, flags)) {
// Target also in range: reify direct branch.
patch_jmp_offset(loc.ui, diff);
remove_pending_literal(_jit, reloc);
} else {
// Target out of range; branch to veneer.
patch_pending_literal(_jit, reloc, (uintptr_t) addr);
}
} else {
// Already emitted a veneer. In this case, patch the veneer
// directly.
patch_veneer((uint32_t *) target, addr);
}
return;
}
case JIT_RELOC_JCC_WITH_VENEER: {
int32_t voff = read_jcc_offset(loc.ui);
uint8_t *target = pc_base + (voff << reloc.rsh);
if (target == loc.uc) {
if (offset_in_jcc_range(diff, flags)) {
patch_jcc_offset(loc.ui, diff);
remove_pending_literal(_jit, reloc);
} else {
patch_pending_literal(_jit, reloc, (uintptr_t) addr);
}
} else {
patch_veneer((uint32_t *) target, addr);
}
return;
}
case JIT_RELOC_LOAD_FROM_POOL: {
int32_t voff = read_load_from_pool_offset(loc.ui);
uint8_t *target = pc_base + (voff << reloc.rsh);
if (target == loc.uc) {
patch_pending_literal(_jit, reloc, (uintptr_t) addr);
} else {
*(uintptr_t *) target = (uintptr_t) addr;
}
return;
}
#endif
case JIT_RELOC_REL32:
ASSERT (INT32_MIN <= diff && diff <= INT32_MAX);
*loc.ui = diff;
end = loc.uc + 4;
break;
case JIT_RELOC_REL64:
*loc.ul = diff;
end = loc.uc + 8;
break;
default:
abort ();
}
if (end == _jit->pc.uc)
jit_try_shorten (_jit, reloc, addr);
}
void
jit_begin_data(jit_state_t *j, size_t max_size_or_zero)
{
#ifdef JIT_NEEDS_LITERAL_POOL
if (j->pool->size) {
uint8_t *deadline = j->start + j->pool->deadline;
// Emit a literal pool now if the data might overwrite the deadline.
// Emitting data won't add entries to the pool.
if (max_size_or_zero == 0 || j->pc.uc + max_size_or_zero >= deadline)
emit_literal_pool(j, NO_GUARD_NEEDED);
}
#endif
ASSERT(!j->emitting_data);
j->emitting_data = 1;
}
void
jit_end_data(jit_state_t *j)
{
ASSERT(j->emitting_data);
j->emitting_data = 0;
}
void
jit_emit_u8(jit_state_t *j, uint8_t u8)
{
ASSERT(j->emitting_data);
emit_u8(j, u8);
}
void
jit_emit_u16(jit_state_t *j, uint16_t u16)
{
ASSERT(j->emitting_data);
emit_u16(j, u16);
}
void
jit_emit_u32(jit_state_t *j, uint32_t u32)
{
ASSERT(j->emitting_data);
emit_u32(j, u32);
}
void
jit_emit_u64(jit_state_t *j, uint64_t u64)
{
ASSERT(j->emitting_data);
emit_u64(j, u64);
}
jit_reloc_t
jit_emit_addr(jit_state_t *j)
{
ASSERT(j->emitting_data);
uint8_t inst_start = 0;
return emit_abs_reloc(j, inst_start);
}
#if defined(__i386__) || defined(__x86_64__)
# include "x86.c"
#elif defined(__mips__)
# include "mips.c"
#elif defined(__arm__)
# include "arm.c"
#elif defined(__ppc__) || defined(__powerpc__)
# include "ppc.c"
#elif defined(__aarch64__)
# include "aarch64.c"
#elif defined(__s390__) || defined(__s390x__)
# include "s390.c"
#elif defined(__riscv__) || defined(__riscv)
# include "riscv.c"
#endif
#define JIT_IMPL_0(stem, ret) \
ret jit_##stem (jit_state_t* _jit) \
{ \
return stem(_jit); \
}
#define JIT_IMPL_1(stem, ret, ta) \
ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a) \
{ \
return stem(_jit, unwrap_##ta(a)); \
}
#define JIT_IMPL_2(stem, ret, ta, tb) \
ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b) \
{ \
return stem(_jit, unwrap_##ta(a), unwrap_##tb(b)); \
}
#define JIT_IMPL_3(stem, ret, ta, tb, tc) \
ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c) \
{ \
return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c)); \
}
#define JIT_IMPL_4(stem, ret, ta, tb, tc, td) \
ret jit_##stem (jit_state_t* _jit, jit_##ta##_t a, jit_##tb##_t b, jit_##tc##_t c, jit_##td##_t d) \
{ \
return stem(_jit, unwrap_##ta(a), unwrap_##tb(b), unwrap_##tc(c), unwrap_##td(d)); \
}
#define JIT_IMPL_RFF__(stem) JIT_IMPL_2(stem, jit_reloc_t, fpr, fpr)
#define JIT_IMPL_RGG__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, gpr)
#define JIT_IMPL_RG___(stem) JIT_IMPL_1(stem, jit_reloc_t, gpr)
#define JIT_IMPL_RGi__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, imm)
#define JIT_IMPL_RGu__(stem) JIT_IMPL_2(stem, jit_reloc_t, gpr, uimm)
#define JIT_IMPL_R____(stem) JIT_IMPL_0(stem, jit_reloc_t)
#define JIT_IMPL__FFF_(stem) JIT_IMPL_3(stem, void, fpr, fpr, fpr)
#define JIT_IMPL__FF__(stem) JIT_IMPL_2(stem, void, fpr, fpr)
#define JIT_IMPL__FGG_(stem) JIT_IMPL_3(stem, void, fpr, gpr, gpr)
#define JIT_IMPL__FG__(stem) JIT_IMPL_2(stem, void, fpr, gpr)
#define JIT_IMPL__FGo_(stem) JIT_IMPL_3(stem, void, fpr, gpr, off)
#define JIT_IMPL__F___(stem) JIT_IMPL_1(stem, void, fpr)
#define JIT_IMPL__Fd__(stem) JIT_IMPL_2(stem, void, fpr, float64)
#define JIT_IMPL__Ff__(stem) JIT_IMPL_2(stem, void, fpr, float32)
#define JIT_IMPL__Fp__(stem) JIT_IMPL_2(stem, void, fpr, pointer)
#define JIT_IMPL__GF__(stem) JIT_IMPL_2(stem, void, gpr, fpr)
#define JIT_IMPL__GGF_(stem) JIT_IMPL_3(stem, void, gpr, gpr, fpr)
#define JIT_IMPL__GGGG(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, gpr)
#define JIT_IMPL__GGG_(stem) JIT_IMPL_3(stem, void, gpr, gpr, gpr)
#define JIT_IMPL__GGGi(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, imm)
#define JIT_IMPL__GGGu(stem) JIT_IMPL_4(stem, void, gpr, gpr, gpr, uimm)
#define JIT_IMPL__GG__(stem) JIT_IMPL_2(stem, void, gpr, gpr)
#define JIT_IMPL__GGi_(stem) JIT_IMPL_3(stem, void, gpr, gpr, imm)
#define JIT_IMPL__GGo_(stem) JIT_IMPL_3(stem, void, gpr, gpr, off)
#define JIT_IMPL__GGu_(stem) JIT_IMPL_3(stem, void, gpr, gpr, uimm)
#define JIT_IMPL__G___(stem) JIT_IMPL_1(stem, void, gpr)
#define JIT_IMPL__Gi__(stem) JIT_IMPL_2(stem, void, gpr, imm)
#define JIT_IMPL__Gp__(stem) JIT_IMPL_2(stem, void, gpr, pointer)
#define JIT_IMPL______(stem) JIT_IMPL_0(stem, void)
#define JIT_IMPL__i___(stem) JIT_IMPL_1(stem, void, imm)
#define JIT_IMPL__oGF_(stem) JIT_IMPL_3(stem, void, off, gpr, fpr)
#define JIT_IMPL__oGG_(stem) JIT_IMPL_3(stem, void, off, gpr, gpr)
#define JIT_IMPL__pF__(stem) JIT_IMPL_2(stem, void, pointer, fpr)
#define JIT_IMPL__pG__(stem) JIT_IMPL_2(stem, void, pointer, gpr)
#define JIT_IMPL__p___(stem) JIT_IMPL_1(stem, void, pointer)
#define unwrap_gpr(r) jit_gpr_regno(r)
#define unwrap_fpr(r) jit_fpr_regno(r)
#define unwrap_imm(i) i
#define unwrap_uimm(u) u
#define unwrap_off(o) o
#define unwrap_pointer(p) ((uintptr_t) p)
#define unwrap_float32(f) f
#define unwrap_float64(d) d
#define IMPL_INSTRUCTION(kind, stem) JIT_IMPL_##kind(stem)
FOR_EACH_INSTRUCTION(IMPL_INSTRUCTION)
#undef IMPL_INSTRUCTION
void
jit_align(jit_state_t *_jit, unsigned align)
{
ASSERT (is_power_of_two (align));
uintptr_t here = _jit->pc.w;
uintptr_t there = (here + align - 1) & ~(align - 1);
if (there - here)
nop(_jit, there - here);
}
static jit_bool_t
is_fpr_arg(enum jit_operand_abi arg)
{
switch (arg)
{
case JIT_OPERAND_ABI_UINT8:
case JIT_OPERAND_ABI_INT8:
case JIT_OPERAND_ABI_UINT16:
case JIT_OPERAND_ABI_INT16:
case JIT_OPERAND_ABI_UINT32:
case JIT_OPERAND_ABI_INT32:
case JIT_OPERAND_ABI_UINT64:
case JIT_OPERAND_ABI_INT64:
case JIT_OPERAND_ABI_POINTER:
return 0;
case JIT_OPERAND_ABI_FLOAT:
case JIT_OPERAND_ABI_DOUBLE:
return 1;
default:
abort();
}
}
static jit_bool_t
is_gpr_arg(enum jit_operand_abi arg)
{
return !is_fpr_arg(arg);
}
static void
abi_imm_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t dst,
intptr_t imm)
{
switch (abi) {
case JIT_OPERAND_ABI_UINT8:
ASSERT(0 <= imm);
ASSERT(imm <= UINT8_MAX);
break;
case JIT_OPERAND_ABI_INT8:
ASSERT(INT8_MIN <= imm);
ASSERT(imm <= INT8_MAX);
break;
case JIT_OPERAND_ABI_UINT16:
ASSERT(0 <= imm);
ASSERT(imm <= UINT16_MAX);
break;
case JIT_OPERAND_ABI_INT16:
ASSERT(INT16_MIN <= imm);
ASSERT(imm <= INT16_MAX);
break;
case JIT_OPERAND_ABI_UINT32:
ASSERT(0 <= imm);
ASSERT(imm <= UINT32_MAX);
break;
case JIT_OPERAND_ABI_INT32:
ASSERT(INT32_MIN <= imm);
ASSERT(imm <= INT32_MAX);
break;
#if __WORDSIZE > 32
case JIT_OPERAND_ABI_UINT64:
case JIT_OPERAND_ABI_INT64:
break;
#endif
case JIT_OPERAND_ABI_POINTER:
break;
default:
abort();
}
jit_movi (_jit, dst, imm);
}
static void
abi_gpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
jit_gpr_t base, ptrdiff_t offset, jit_gpr_t src)
{
// Invariant: GPR memory destination operand sizes are rounded up to words.
// True for ARM, AArch64, IA32, and X86-64. Some ABIs expect to be able to
// load operands from the stack via a full-word read, so we need to make sure
// we don't leave garbage in the high bytes of (for example) the stack slot
// for a uint8_t arg.
switch (abi) {
case JIT_OPERAND_ABI_UINT8:
case JIT_OPERAND_ABI_INT8:
jit_stxi(_jit, offset, base, src);
break;
case JIT_OPERAND_ABI_UINT16:
case JIT_OPERAND_ABI_INT16:
jit_stxi(_jit, offset, base, src);
break;
case JIT_OPERAND_ABI_UINT32:
case JIT_OPERAND_ABI_INT32:
#if __WORDSIZE == 32
case JIT_OPERAND_ABI_POINTER:
#endif
jit_stxi(_jit, offset, base, src);
break;
#if __WORDSIZE == 64
case JIT_OPERAND_ABI_UINT64:
case JIT_OPERAND_ABI_INT64:
case JIT_OPERAND_ABI_POINTER:
jit_stxi_l(_jit, offset, base, src);
break;
#endif
default:
abort();
}
}
static void
abi_fpr_to_mem(jit_state_t *_jit, enum jit_operand_abi abi,
jit_gpr_t base, ptrdiff_t offset, jit_fpr_t src)
{
switch (abi) {
case JIT_OPERAND_ABI_FLOAT:
jit_stxi_f(_jit, offset, base, src);
break;
case JIT_OPERAND_ABI_DOUBLE:
jit_stxi_d(_jit, offset, base, src);
break;
default:
abort();
}
}
static void
abi_mem_to_gpr(jit_state_t *_jit, enum jit_operand_abi abi,
jit_gpr_t dst, jit_gpr_t base, ptrdiff_t offset)
{
switch (abi) {
case JIT_OPERAND_ABI_UINT8:
jit_ldxi_uc(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_INT8:
jit_ldxi_c(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_UINT16:
jit_ldxi_us(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_INT16:
jit_ldxi_s(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_FLOAT:
{
jit_fpr_t tmp = get_temp_fpr(_jit);
jit_ldxi_f(_jit, tmp, base, offset);
jit_movr_i_f(_jit, dst, tmp);
unget_temp_fpr(_jit);
break;
}
#if __WORDSIZE == 32
case JIT_OPERAND_ABI_UINT32:
case JIT_OPERAND_ABI_POINTER:
#endif
case JIT_OPERAND_ABI_INT32:
jit_ldxi_i(_jit, dst, base, offset);
break;
#if __WORDSIZE == 64
case JIT_OPERAND_ABI_UINT32:
jit_ldxi_ui(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_UINT64:
case JIT_OPERAND_ABI_POINTER:
case JIT_OPERAND_ABI_INT64:
jit_ldxi_l(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_DOUBLE:
{
jit_fpr_t tmp = get_temp_fpr(_jit);
jit_ldxi_d(_jit, tmp, base, offset);
jit_movr_l_d(_jit, dst, tmp);
unget_temp_fpr(_jit);
break;
}
#endif
default:
abort();
}
}
static void
abi_mem_to_fpr(jit_state_t *_jit, enum jit_operand_abi abi,
jit_fpr_t dst, jit_gpr_t base, ptrdiff_t offset)
{
switch (abi) {
case JIT_OPERAND_ABI_FLOAT:
jit_ldxi_f(_jit, dst, base, offset);
break;
case JIT_OPERAND_ABI_DOUBLE:
jit_ldxi_d(_jit, dst, base, offset);
break;
default:
abort();
}
}
static void
abi_imm_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
ptrdiff_t offset, jit_imm_t imm)
{
ASSERT(!is_fpr_arg(abi));
jit_gpr_t tmp = get_temp_gpr(_jit);
abi_imm_to_gpr(_jit, abi, tmp, imm);
abi_gpr_to_mem(_jit, abi, base, offset, tmp);
unget_temp_gpr(_jit);
}
static void
abi_mem_to_mem(jit_state_t *_jit, enum jit_operand_abi abi, jit_gpr_t base,
ptrdiff_t offset, jit_gpr_t src_base, ptrdiff_t src_offset)
{
if (is_gpr_arg (abi)) {
jit_gpr_t tmp = get_temp_gpr(_jit);
abi_mem_to_gpr(_jit, abi, tmp, src_base, src_offset);
abi_gpr_to_mem(_jit, abi, base, offset, tmp);
unget_temp_gpr(_jit);
} else {
jit_fpr_t tmp = get_temp_fpr(_jit);
abi_mem_to_fpr(_jit, abi, tmp, src_base, src_offset);
abi_fpr_to_mem(_jit, abi, base, offset, tmp);
unget_temp_fpr(_jit);
}
}
#define MOVE_KIND(a, b) ((((int) a) << 4) | ((int) b))
#define MOVE_KIND_ENUM(a, b) \
MOVE_##a##_TO_##b = MOVE_KIND(JIT_OPERAND_KIND_##a, JIT_OPERAND_KIND_##b)
enum move_kind {
MOVE_KIND_ENUM(IMM, GPR),
MOVE_KIND_ENUM(GPR, GPR),
MOVE_KIND_ENUM(MEM, GPR),
MOVE_KIND_ENUM(FPR, FPR),
MOVE_KIND_ENUM(MEM, FPR),
MOVE_KIND_ENUM(IMM, MEM),
MOVE_KIND_ENUM(GPR, MEM),
MOVE_KIND_ENUM(FPR, MEM),
MOVE_KIND_ENUM(MEM, MEM),
MOVE_KIND_ENUM(FPR, GPR)
};
#undef MOVE_KIND_ENUM
static void
move_operand(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
{
switch (MOVE_KIND (src.kind, dst.kind)) {
case MOVE_IMM_TO_GPR:
return abi_imm_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.imm);
case MOVE_GPR_TO_GPR:
return jit_movr(_jit, dst.loc.gpr.gpr, src.loc.gpr.gpr);
case MOVE_FPR_TO_GPR:
#if __WORDSIZE > 32
if (src.abi == JIT_OPERAND_ABI_DOUBLE)
return jit_movr_l_d(_jit, dst.loc.gpr.gpr, src.loc.fpr);
else
#endif
return jit_movr_i_f(_jit, dst.loc.gpr.gpr, src.loc.fpr);
case MOVE_MEM_TO_GPR:
return abi_mem_to_gpr(_jit, src.abi, dst.loc.gpr.gpr, src.loc.mem.base,
src.loc.mem.offset);
case MOVE_FPR_TO_FPR:
ASSERT(src.abi == dst.abi);
if (src.abi == JIT_OPERAND_ABI_DOUBLE)
return jit_movr_d(_jit, dst.loc.fpr, src.loc.fpr);
else
return jit_movr_f(_jit, dst.loc.fpr, src.loc.fpr);
case MOVE_MEM_TO_FPR:
return abi_mem_to_fpr(_jit, src.abi, dst.loc.fpr, src.loc.mem.base,
src.loc.mem.offset);
case MOVE_IMM_TO_MEM:
return abi_imm_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
src.loc.imm);
case MOVE_GPR_TO_MEM:
return abi_gpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
src.loc.gpr.gpr);
case MOVE_FPR_TO_MEM:
return abi_fpr_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
src.loc.fpr);
case MOVE_MEM_TO_MEM:
return abi_mem_to_mem(_jit, src.abi, dst.loc.mem.base, dst.loc.mem.offset,
src.loc.mem.base, src.loc.mem.offset);
default:
abort();
}
}
// A direct transliteration of "Tilting at windmills with Coq: formal
// verification of a compilation algorithm for parallel moves" by
// Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy:
// https://xavierleroy.org/publi/parallel-move.pdf
enum move_status { TO_MOVE, BEING_MOVED, MOVED };
static inline int
already_in_place(jit_operand_t src, jit_operand_t dst)
{
switch (MOVE_KIND(src.kind, dst.kind)) {
case MOVE_GPR_TO_GPR:
return jit_same_gprs (src.loc.gpr.gpr, dst.loc.gpr.gpr);
case MOVE_FPR_TO_FPR:
return jit_same_fprs (src.loc.fpr, dst.loc.fpr);
case MOVE_MEM_TO_MEM:
return jit_same_gprs (src.loc.mem.base, dst.loc.mem.base) &&
src.loc.mem.offset == dst.loc.mem.offset;
default:
return 0;
}
}
static inline int
write_would_clobber(jit_operand_t src, jit_operand_t dst)
{
if (already_in_place (src, dst))
return 1;
if (MOVE_KIND(src.kind, dst.kind) == MOVE_MEM_TO_GPR)
return jit_same_gprs(src.loc.mem.base, dst.loc.gpr.gpr);
return 0;
}
static inline ptrdiff_t
operand_addend(jit_operand_t op)
{
switch (op.kind) {
case JIT_OPERAND_KIND_GPR:
return op.loc.gpr.addend;
case JIT_OPERAND_KIND_MEM:
return op.loc.mem.addend;
default:
abort();
}
}
static void
move_one(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
size_t argc, enum move_status *status, size_t i)
{
int tmp_gpr = 0, tmp_fpr = 0;
if (already_in_place(src[i], dst[i]))
return;
status[i] = BEING_MOVED;
for (size_t j = 0; j < argc; j++) {
if (write_would_clobber(src[j], dst[i])) {
switch (status[j]) {
case TO_MOVE:
move_one(_jit, dst, src, argc, status, j);
break;
case BEING_MOVED: {
jit_operand_t tmp;
if (is_fpr_arg (src[j].kind)) {
tmp_fpr = 1;
tmp = jit_operand_fpr(src[j].abi, get_temp_fpr(_jit));
} else {
tmp_gpr = 1;
/* Preserve addend, if any, from source operand, to be applied
at the end. */
tmp = jit_operand_gpr_with_addend(src[j].abi, get_temp_gpr(_jit),
operand_addend(src[j]));
}
move_operand (_jit, tmp, src[j]);
src[j] = tmp;
break;
}
case MOVED:
break;
default:
abort ();
}
}
}
move_operand (_jit, dst[i], src[i]);
status[i] = MOVED;
if (tmp_gpr)
unget_temp_gpr(_jit);
else if (tmp_fpr)
unget_temp_fpr(_jit);
}
static void
apply_addend(jit_state_t *_jit, jit_operand_t dst, jit_operand_t src)
{
switch (MOVE_KIND(src.kind, dst.kind)) {
case MOVE_GPR_TO_GPR:
case MOVE_MEM_TO_GPR:
if (operand_addend(src))
jit_addi(_jit, dst.loc.gpr.gpr, dst.loc.gpr.gpr, operand_addend(src));
break;
case MOVE_GPR_TO_MEM:
case MOVE_MEM_TO_MEM:
if (operand_addend(src)) {
jit_gpr_t tmp = get_temp_gpr(_jit);
abi_mem_to_gpr(_jit, dst.abi, tmp, dst.loc.mem.base, dst.loc.mem.offset);
jit_addi(_jit, tmp, tmp, operand_addend(src));
abi_gpr_to_mem(_jit, dst.abi, dst.loc.mem.base, dst.loc.mem.offset, tmp);
unget_temp_gpr(_jit);
}
break;
default:
break;
}
}
/* Preconditions: No dest operand is IMM. No dest operand aliases
another dest operand. No dest MEM operand uses a base register which
is used as a dest GPR. No dst operand has an addend. The registers
returned by get_temp_gpr and get_temp_fpr do not appear in source or
dest args. */
void
jit_move_operands(jit_state_t *_jit, jit_operand_t *dst, jit_operand_t *src,
size_t argc)
{
// Check preconditions, except the condition about tmp registers.
{
uint64_t src_gprs = 0;
uint64_t dst_gprs = 0;
uint64_t dst_fprs = 0;
uint64_t dst_mem_base_gprs = 0;
for (size_t i = 0; i < argc; i++) {
switch (src[i].kind) {
case JIT_OPERAND_KIND_GPR:
src_gprs |= 1ULL << jit_gpr_regno(src[i].loc.gpr.gpr);
break;
case JIT_OPERAND_KIND_FPR:
case JIT_OPERAND_KIND_IMM:
case JIT_OPERAND_KIND_MEM:
break;
default:
abort();
}
switch (dst[i].kind) {
case JIT_OPERAND_KIND_GPR: {
ASSERT(dst[i].loc.gpr.addend == 0);
uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.gpr.gpr);
ASSERT((dst_gprs & bit) == 0);
dst_gprs |= bit;
break;
}
case JIT_OPERAND_KIND_FPR: {
uint64_t bit = 1ULL << jit_fpr_regno(dst[i].loc.fpr);
ASSERT((dst_fprs & bit) == 0);
dst_fprs |= bit;
break;
}
case JIT_OPERAND_KIND_MEM: {
ASSERT(dst[i].loc.mem.addend == 0);
uint64_t bit = 1ULL << jit_gpr_regno(dst[i].loc.mem.base);
dst_mem_base_gprs |= bit;
break;
}
case JIT_OPERAND_KIND_IMM:
default:
abort();
break;
}
}
ASSERT(((src_gprs | dst_gprs) & dst_mem_base_gprs) == 0);
}
enum move_status status[argc];
for (size_t i = 0; i < argc; i++)
status[i] = TO_MOVE;
// Mem-to-mem moves require a temp register but don't overwrite
// other argument registers. Perform them first to free up the tmp
// for other uses.
for (size_t i = 0; i < argc; i++)
if ((status[i] == TO_MOVE)
&& (MOVE_KIND (src[i].kind, dst[i].kind) == MOVE_MEM_TO_MEM))
move_one(_jit, dst, src, argc, status, i);
for (size_t i = 0; i < argc; i++)
if (status[i] == TO_MOVE)
move_one(_jit, dst, src, argc, status, i);
// Apply addends at the end. We could do it earlier in some cases but
// at least at the end we know that an in-place increment of one
// operand won't alias another.
for (size_t i = 0; i < argc; i++)
apply_addend(_jit, dst[i], src[i]);
}
size_t
jit_align_stack(jit_state_t *_jit, size_t expand)
{
size_t new_size = _jit->frame_size + expand;
// Align stack to double-word boundaries. This isn't really a
// principle but it does work for Aarch32, AArch64 and x86-64.
size_t alignment = jit_stack_alignment ();
size_t aligned_size = (new_size + alignment - 1) & ~(alignment - 1);
size_t diff = aligned_size - _jit->frame_size;
if (diff)
jit_subi (_jit, JIT_SP, JIT_SP, diff);
_jit->frame_size = aligned_size;
return diff;
}
void
jit_shrink_stack(jit_state_t *_jit, size_t diff)
{
if (diff)
jit_addi (_jit, JIT_SP, JIT_SP, diff);
_jit->frame_size -= diff;
}
static const jit_gpr_t platform_callee_save_gprs[] = {
JIT_PLATFORM_CALLEE_SAVE_GPRS
};
static const jit_gpr_t user_callee_save_gprs[] = {
JIT_V0, JIT_V1, JIT_V2
#ifdef JIT_V3
, JIT_V3
#endif
#ifdef JIT_V4
, JIT_V4
#endif
#ifdef JIT_V5
, JIT_V5
#endif
#ifdef JIT_V6
, JIT_V6
#endif
#ifdef JIT_V7
, JIT_V7
#endif
#ifdef JIT_V8
, JIT_V8
#endif
#ifdef JIT_V9
, JIT_V9
#endif
#ifdef JIT_V10
, JIT_V10
#endif
};
static const jit_fpr_t user_callee_save_fprs[] = {
#ifdef JIT_VF0
JIT_VF0
#endif
#ifdef JIT_VF1
, JIT_VF1
#endif
#ifdef JIT_VF2
, JIT_VF2
#endif
#ifdef JIT_VF3
, JIT_VF3
#endif
#ifdef JIT_VF4
, JIT_VF4
#endif
#ifdef JIT_VF5
, JIT_VF5
#endif
#ifdef JIT_VF6
, JIT_VF6
#endif
#ifdef JIT_VF7
, JIT_VF7
#endif
#ifdef JIT_VF8
, JIT_VF8
#endif
#ifdef JIT_VF9
, JIT_VF9
#endif
#ifdef JIT_VF10
, JIT_VF10
#endif
#ifdef JIT_VF11
, JIT_VF11
#endif
};
#define ARRAY_SIZE(X) (sizeof (X)/sizeof ((X)[0]))
static const size_t pv_count = ARRAY_SIZE(platform_callee_save_gprs);
static const size_t v_count = ARRAY_SIZE(user_callee_save_gprs);
static const size_t vf_count = ARRAY_SIZE(user_callee_save_fprs);
size_t
jit_enter_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
{
ASSERT(v <= v_count);
ASSERT(vf <= vf_count);
ASSERT(_jit->frame_size == 0);
_jit->frame_size = jit_initial_frame_size();
size_t reserved =
jit_align_stack(_jit, (pv_count + v) * (__WORDSIZE / 8) + vf * 8);
size_t offset = 0;
for (size_t i = 0; i < vf; i++, offset += 8)
jit_stxi_d(_jit, offset, JIT_SP, user_callee_save_fprs[i]);
for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
jit_stxi(_jit, offset, JIT_SP, user_callee_save_gprs[i]);
for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
jit_stxi(_jit, offset, JIT_SP, platform_callee_save_gprs[i]);
ASSERT(offset <= reserved);
return reserved;
}
void
jit_leave_jit_abi(jit_state_t *_jit, size_t v, size_t vf, size_t frame_size)
{
ASSERT(v <= v_count);
ASSERT(vf <= vf_count);
ASSERT((pv_count + v) * (__WORDSIZE / 8) + vf * 8 <= frame_size);
size_t offset = 0;
for (size_t i = 0; i < vf; i++, offset += 8)
jit_ldxi_d(_jit, user_callee_save_fprs[i], JIT_SP, offset);
for (size_t i = 0; i < v; i++, offset += __WORDSIZE / 8)
jit_ldxi(_jit, user_callee_save_gprs[i], JIT_SP, offset);
for (size_t i = 0; i < pv_count; i++, offset += __WORDSIZE / 8)
jit_ldxi(_jit, platform_callee_save_gprs[i], JIT_SP, offset);
ASSERT(offset <= frame_size);
jit_shrink_stack(_jit, frame_size);
}
// Precondition: stack is already aligned.
static size_t
prepare_call_args(jit_state_t *_jit, size_t argc, jit_operand_t args[],
jit_gpr_t *fun)
{
size_t count = argc + (fun == NULL ? 0 : 1);
jit_operand_t src[count];
jit_operand_t dst[count];
memcpy (src, args, sizeof (jit_operand_t) * argc);
if (fun != NULL) {
jit_gpr_t fun_tmp = argc == 0 ? *fun : get_callr_temp (_jit);
src[argc] = jit_operand_gpr (JIT_OPERAND_ABI_POINTER, *fun);
dst[argc] = jit_operand_gpr (JIT_OPERAND_ABI_POINTER, fun_tmp);
*fun = fun_tmp;
}
struct abi_arg_iterator iter;
// Compute shuffle destinations and space for spilled arguments.
reset_abi_arg_iterator(&iter, argc, args);
for (size_t i = 0; i < argc; i++)
next_abi_arg(&iter, &dst[i]);
// Reserve space for spilled arguments and ensure stack alignment.
size_t stack_size = jit_align_stack(_jit, iter.stack_size);
// Fix up SP-relative operands.
for (size_t i = 0; i < argc; i++) {
switch(args[i].kind) {
case JIT_OPERAND_KIND_GPR:
if (jit_same_gprs (args[i].loc.gpr.gpr, JIT_SP))
args[i].loc.gpr.addend += stack_size;
break;
case JIT_OPERAND_KIND_MEM:
if (jit_same_gprs (args[i].loc.mem.base, JIT_SP))
args[i].loc.mem.offset += stack_size;
break;
default:
break;
}
}
jit_move_operands(_jit, dst, src, count);
return stack_size;
}
void
jit_calli(jit_state_t *_jit, jit_pointer_t f, size_t argc, jit_operand_t args[])
{
size_t stack_bytes = prepare_call_args(_jit, argc, args, NULL);
calli(_jit, (jit_word_t)f);
jit_shrink_stack(_jit, stack_bytes);
}
void
jit_callr(jit_state_t *_jit, jit_gpr_t f, size_t argc, jit_operand_t args[])
{
size_t stack_bytes = prepare_call_args(_jit, argc, args, &f);
callr(_jit, jit_gpr_regno(f));
jit_shrink_stack(_jit, stack_bytes);
}
void
jit_locate_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
{
struct abi_arg_iterator iter;
reset_abi_arg_iterator(&iter, argc, args);
iter.stack_size += _jit->frame_size;
for (size_t i = 0; i < argc; i++)
next_abi_arg(&iter, &args[i]);
}
/* Precondition: args are distinct locations of type GPR or FPR. All
addends of arg operands are zero. No GPR arg is SP. */
void
jit_load_args(jit_state_t *_jit, size_t argc, jit_operand_t args[])
{
jit_operand_t src[argc];
memcpy(src, args, sizeof(src[0]) * argc);
jit_locate_args(_jit, argc, src);
jit_move_operands(_jit, args, src, argc);
}
#ifdef JIT_NEEDS_LITERAL_POOL
static uint32_t
literal_pool_byte_size(struct jit_literal_pool *pool)
{
// Assume that we might need a uint32_t to branch over a table, and up
// to 7 bytes for alignment of the table. Then we assume that no
// entry will be more than two words.
return sizeof(uint32_t) + 7 + pool->size * sizeof(uintptr_t) * 2;
}
static void
reset_literal_pool(jit_state_t *_jit, struct jit_literal_pool *pool)
{
pool->deadline = _jit->limit - _jit->start;
memset(pool->entries, 0, sizeof(pool->entries[0]) * pool->size);
pool->size = 0;
}
#define INITIAL_LITERAL_POOL_CAPACITY 12
static struct jit_literal_pool*
alloc_literal_pool(jit_state_t *_jit, size_t capacity)
{
if (capacity == 0) capacity = INITIAL_LITERAL_POOL_CAPACITY;
struct jit_literal_pool *ret =
_jit->alloc (sizeof (struct jit_literal_pool) +
sizeof (struct jit_literal_pool_entry) * capacity);
ASSERT (ret);
ret->capacity = capacity;
ret->size = 0;
reset_literal_pool(_jit, ret);
return ret;
}
static void
grow_literal_pool(jit_state_t *_jit)
{
struct jit_literal_pool *new_pool =
alloc_literal_pool(_jit, _jit->pool->capacity * 2);
for (size_t i = 0; i < _jit->pool->size; i++)
new_pool->entries[new_pool->size++] = _jit->pool->entries[i];
new_pool->deadline = _jit->pool->deadline;
_jit->free (_jit->pool);
_jit->pool = new_pool;
}
static jit_bool_t
add_literal_pool_entry(jit_state_t *_jit, struct jit_literal_pool_entry entry,
uint32_t max_offset)
{
if (_jit->overflow)
return 1;
if (max_offset <= literal_pool_byte_size(_jit->pool)) {
emit_literal_pool(_jit, GUARD_NEEDED);
return 0;
}
if (_jit->pool->size == _jit->pool->capacity)
grow_literal_pool (_jit);
uint32_t loc_offset = _jit->pc.uc - _jit->start;
uint32_t inst_offset = loc_offset - entry.reloc.inst_start_offset;
uint32_t pc_base_offset = inst_offset + entry.reloc.pc_base_offset;
uint32_t deadline =
pc_base_offset + (max_offset - literal_pool_byte_size(_jit->pool));
if (deadline < _jit->pool->deadline)
_jit->pool->deadline = deadline;
_jit->pool->entries[_jit->pool->size++] = entry;
return 1;
}
static jit_bool_t
add_pending_literal(jit_state_t *_jit, jit_reloc_t src,
uint8_t max_offset_bits)
{
struct jit_literal_pool_entry entry = { src, 0 };
uint32_t max_inst_size = sizeof(uint32_t);
uint32_t max_offset = (1 << (max_offset_bits + src.rsh)) - max_inst_size;
return add_literal_pool_entry(_jit, entry, max_offset);
}
static void
remove_pending_literal(jit_state_t *_jit, jit_reloc_t src)
{
for (size_t i = _jit->pool->size; i--; ) {
if (_jit->pool->entries[i].reloc.offset == src.offset) {
for (size_t j = i + 1; j < _jit->pool->size; j++)
_jit->pool->entries[j-1] = _jit->pool->entries[j];
_jit->pool->size--;
return;
}
}
abort();
}
static void
patch_pending_literal(jit_state_t *_jit, jit_reloc_t src, uintptr_t value)
{
for (size_t i = _jit->pool->size; i--; ) {
if (_jit->pool->entries[i].reloc.offset == src.offset) {
ASSERT(_jit->pool->entries[i].value == 0);
_jit->pool->entries[i].value = value;
return;
}
}
abort();
}
static void
emit_literal_pool(jit_state_t *_jit, enum guard_pool guard)
{
if (_jit->overflow)
return;
if (!_jit->pool->size)
return;
uint32_t *patch_loc = NULL;
if (guard == GUARD_NEEDED)
patch_loc = jmp_without_veneer(_jit);
// FIXME: Could de-duplicate constants.
for (size_t i = 0; i < _jit->pool->size; i++) {
// Align to word boundary without emitting pool.
if (_jit->pc.w & 1) emit_u8(_jit, 0);
if (_jit->pc.w & 2) emit_u16(_jit, 0);
if (sizeof(uintptr_t) > 4 && (_jit->pc.w & 4))
emit_u32(_jit, 0);
ASSERT((_jit->pc.w & (sizeof(uintptr_t) - 1)) == 0);
struct jit_literal_pool_entry *entry = &_jit->pool->entries[i];
uint8_t *loc = _jit->start + entry->reloc.offset;
uint8_t *pc_base =
loc - entry->reloc.inst_start_offset + entry->reloc.pc_base_offset;
ptrdiff_t diff = _jit->pc.uc - pc_base;
diff >>= entry->reloc.rsh;
if (_jit->overflow)
return;
switch (entry->reloc.kind & JIT_RELOC_MASK) {
case JIT_RELOC_JMP_WITH_VENEER:
patch_veneer_jmp_offset((uint32_t*) loc, diff);
emit_veneer(_jit, (void*) entry->value);
break;
case JIT_RELOC_JCC_WITH_VENEER:
patch_veneer_jcc_offset((uint32_t*) loc, diff);
emit_veneer(_jit, (void*) entry->value);
break;
case JIT_RELOC_LOAD_FROM_POOL:
patch_load_from_pool_offset((uint32_t*) loc, diff);
emit_uintptr(_jit, entry->value);
break;
default:
abort();
}
}
if (_jit->overflow)
return;
if (guard == GUARD_NEEDED)
patch_jmp_without_veneer(_jit, patch_loc);
reset_literal_pool(_jit, _jit->pool);
}
#endif