1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-04 14:20:26 +02:00
guile/jit/x86.c
Andy Wingo bad7e34c83 Implementation of new design
Documentation to come, as tests get added and things settle down.
2019-03-25 12:21:23 +01:00

443 lines
14 KiB
C

/*
* Copyright (C) 2012-2018 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* Authors:
* Paulo Cesar Pereira de Andrade
*/
#if __X32
# define jit_arg_reg_p(i) 0
# define jit_arg_f_reg_p(i) 0
# define stack_framesize 20
# define stack_adjust 12
# define CVT_OFFSET -12
# define REAL_WORDSIZE 4
# define va_gp_increment 4
# define va_fp_increment 8
#else
# if __CYGWIN__
# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4)
# define jit_arg_f_reg_p(i) jit_arg_reg_p(i)
# define stack_framesize 152
# define va_fp_increment 8
# else
# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6)
# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
# define stack_framesize 56
# define first_gp_argument rdi
# define first_gp_offset offsetof(jit_va_list_t, rdi)
# define first_gp_from_offset(gp) ((gp) / 8)
# define last_gp_argument r9
# define va_gp_max_offset \
(offsetof(jit_va_list_t, r9) - offsetof(jit_va_list_t, rdi) + 8)
# define first_fp_argument xmm0
# define first_fp_offset offsetof(jit_va_list_t, xmm0)
# define last_fp_argument xmm7
# define va_fp_max_offset \
(offsetof(jit_va_list_t, xmm7) - offsetof(jit_va_list_t, rdi) + 16)
# define va_fp_increment 16
# define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16)
# endif
# define va_gp_increment 8
# define stack_adjust 8
# define CVT_OFFSET -8
# define REAL_WORDSIZE 8
#endif
/*
* Types
*/
#if __X32 || __CYGWIN__
typedef jit_pointer_t jit_va_list_t;
#else
typedef struct jit_va_list {
int32_t gpoff;
int32_t fpoff;
jit_pointer_t over;
jit_pointer_t save;
/* Declared explicitly as int64 for the x32 abi */
int64_t rdi;
int64_t rsi;
int64_t rdx;
int64_t rcx;
int64_t r8;
int64_t r9;
jit_float64_t xmm0;
jit_float64_t _up0;
jit_float64_t xmm1;
jit_float64_t _up1;
jit_float64_t xmm2;
jit_float64_t _up2;
jit_float64_t xmm3;
jit_float64_t _up3;
jit_float64_t xmm4;
jit_float64_t _up4;
jit_float64_t xmm5;
jit_float64_t _up5;
jit_float64_t xmm6;
jit_float64_t _up6;
jit_float64_t xmm7;
jit_float64_t _up7;
} jit_va_list_t;
#endif
jit_cpu_t jit_cpu;
static const jit_register_t _rvs[] = {
#if __X32
{ rc(gpr) | rc(rg8) | 0, "%eax" },
{ rc(gpr) | rc(rg8) | 1, "%ecx" },
{ rc(gpr) | rc(rg8) | 2, "%edx" },
{ rc(sav) | rc(rg8) | rc(gpr) | 3, "%ebx" },
{ rc(sav) | rc(gpr) | 6, "%esi" },
{ rc(sav) | rc(gpr) | 7, "%edi" },
{ rc(sav) | 4, "%esp" },
{ rc(sav) | 5, "%ebp" },
{ rc(xpr) | rc(fpr) | 0, "%xmm0" },
{ rc(xpr) | rc(fpr) | 1, "%xmm1" },
{ rc(xpr) | rc(fpr) | 2, "%xmm2" },
{ rc(xpr) | rc(fpr) | 3, "%xmm3" },
{ rc(xpr) | rc(fpr) | 4, "%xmm4" },
{ rc(xpr) | rc(fpr) | 5, "%xmm5" },
{ rc(xpr) | rc(fpr) | 6, "%xmm6" },
{ rc(xpr) | rc(fpr) | 7, "%xmm7" },
#elif __CYGWIN__
{ rc(gpr) | rc(rg8) | 0, "%rax" },
{ rc(gpr) | rc(rg8) | rc(rg8) | 10, "%r10" },
{ rc(gpr) | rc(rg8) | rc(rg8) | 11, "%r11" },
{ rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
{ rc(sav) | rc(gpr) | 7, "%rdi" },
{ rc(sav) | rc(gpr) | 6, "%rsi" },
{ rc(sav) | rc(gpr) | 12, "%r12" },
{ rc(sav) | rc(gpr) | 13, "%r13" },
{ rc(sav) | rc(gpr) | 14, "%r14" },
{ rc(sav) | rc(gpr) | 15, "%r15" },
{ rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
{ rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
{ rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
{ rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
{ rc(sav) | 4, "%rsp" },
{ rc(sav) | 5, "%rbp" },
{ rc(xpr) | rc(fpr) | 4, "%xmm4" },
{ rc(xpr) | rc(fpr) | 5, "%xmm5" },
{ rc(sav) | rc(xpr) | rc(fpr) | 6, "%xmm6" },
{ rc(sav) | rc(xpr) | rc(fpr) | 7, "%xmm7" },
{ rc(sav) | rc(xpr) | rc(fpr) | 8, "%xmm8" },
{ rc(sav) | rc(xpr) | rc(fpr) | 9, "%xmm9" },
{ rc(sav) | rc(xpr) | rc(fpr) | 10, "%xmm10" },
{ rc(sav) | rc(xpr) | rc(fpr) | 11, "%xmm11" },
{ rc(sav) | rc(xpr) | rc(fpr) | 12, "%xmm12" },
{ rc(sav) | rc(xpr) | rc(fpr) | 13, "%xmm13" },
{ rc(sav) | rc(xpr) | rc(fpr) | 14, "%xmm14" },
{ rc(sav) | rc(xpr) | rc(fpr) | 15, "%xmm15" },
{ rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
{ rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
{ rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
{ rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
#else
/* %rax is a pseudo flag argument for varargs functions */
{ rc(arg) | rc(gpr) | rc(rg8) | 0, "%rax" },
{ rc(gpr) | rc(rg8) | 10, "%r10" },
{ rc(gpr) | rc(rg8) | 11, "%r11" },
{ rc(gpr) | rc(rg8) | 12, "%r12" },
{ rc(sav) | rc(rg8) | rc(gpr) | 3, "%rbx" },
{ rc(sav) | rc(rg8) | rc(gpr) | 13, "%r13" },
{ rc(sav) | rc(rg8) | rc(gpr) | 14, "%r14" },
{ rc(sav) | rc(rg8) | rc(gpr) | 15, "%r15" },
{ rc(arg) | rc(rg8) | rc(gpr) | 9, "%r9" },
{ rc(arg) | rc(rg8) | rc(gpr) | 8, "%r8" },
{ rc(arg) | rc(rg8) | rc(gpr) | 1, "%rcx" },
{ rc(arg) | rc(rg8) | rc(gpr) | 2, "%rdx" },
{ rc(arg) | rc(rg8) | rc(gpr) | 6, "%rsi" },
{ rc(arg) | rc(rg8) | rc(gpr) | 7, "%rdi" },
{ rc(sav) | 4, "%rsp" },
{ rc(sav) | 5, "%rbp" },
{ rc(xpr) | rc(fpr) | 8, "%xmm8" },
{ rc(xpr) | rc(fpr) | 9, "%xmm9" },
{ rc(xpr) | rc(fpr) | 10, "%xmm10" },
{ rc(xpr) | rc(fpr) | 11, "%xmm11" },
{ rc(xpr) | rc(fpr) | 12, "%xmm12" },
{ rc(xpr) | rc(fpr) | 13, "%xmm13" },
{ rc(xpr) | rc(fpr) | 14, "%xmm14" },
{ rc(xpr) | rc(fpr) | 15, "%xmm15" },
{ rc(xpr) | rc(arg) | rc(fpr) | 7, "%xmm7" },
{ rc(xpr) | rc(arg) | rc(fpr) | 6, "%xmm6" },
{ rc(xpr) | rc(arg) | rc(fpr) | 5, "%xmm5" },
{ rc(xpr) | rc(arg) | rc(fpr) | 4, "%xmm4" },
{ rc(xpr) | rc(arg) | rc(fpr) | 3, "%xmm3" },
{ rc(xpr) | rc(arg) | rc(fpr) | 2, "%xmm2" },
{ rc(xpr) | rc(arg) | rc(fpr) | 1, "%xmm1" },
{ rc(xpr) | rc(arg) | rc(fpr) | 0, "%xmm0" },
#endif
{ _NOREG, "<none>" },
};
#include "x86-cpu.c"
#include "x86-sse.c"
jit_bool_t
jit_get_cpu(void)
{
union {
struct {
uint32_t sse3 : 1;
uint32_t pclmulqdq : 1;
uint32_t dtes64 : 1; /* amd reserved */
uint32_t monitor : 1;
uint32_t ds_cpl : 1; /* amd reserved */
uint32_t vmx : 1; /* amd reserved */
uint32_t smx : 1; /* amd reserved */
uint32_t est : 1; /* amd reserved */
uint32_t tm2 : 1; /* amd reserved */
uint32_t ssse3 : 1;
uint32_t cntx_id : 1; /* amd reserved */
uint32_t __reserved0 : 1;
uint32_t fma : 1;
uint32_t cmpxchg16b : 1;
uint32_t xtpr : 1; /* amd reserved */
uint32_t pdcm : 1; /* amd reserved */
uint32_t __reserved1 : 1;
uint32_t pcid : 1; /* amd reserved */
uint32_t dca : 1; /* amd reserved */
uint32_t sse4_1 : 1;
uint32_t sse4_2 : 1;
uint32_t x2apic : 1; /* amd reserved */
uint32_t movbe : 1; /* amd reserved */
uint32_t popcnt : 1;
uint32_t tsc : 1; /* amd reserved */
uint32_t aes : 1;
uint32_t xsave : 1;
uint32_t osxsave : 1;
uint32_t avx : 1;
uint32_t __reserved2 : 1; /* amd F16C */
uint32_t __reserved3 : 1;
uint32_t __alwayszero : 1; /* amd RAZ */
} bits;
jit_uword_t cpuid;
} ecx;
union {
struct {
uint32_t fpu : 1;
uint32_t vme : 1;
uint32_t de : 1;
uint32_t pse : 1;
uint32_t tsc : 1;
uint32_t msr : 1;
uint32_t pae : 1;
uint32_t mce : 1;
uint32_t cmpxchg8b : 1;
uint32_t apic : 1;
uint32_t __reserved0 : 1;
uint32_t sep : 1;
uint32_t mtrr : 1;
uint32_t pge : 1;
uint32_t mca : 1;
uint32_t cmov : 1;
uint32_t pat : 1;
uint32_t pse36 : 1;
uint32_t psn : 1; /* amd reserved */
uint32_t clfsh : 1;
uint32_t __reserved1 : 1;
uint32_t ds : 1; /* amd reserved */
uint32_t acpi : 1; /* amd reserved */
uint32_t mmx : 1;
uint32_t fxsr : 1;
uint32_t sse : 1;
uint32_t sse2 : 1;
uint32_t ss : 1; /* amd reserved */
uint32_t htt : 1;
uint32_t tm : 1; /* amd reserved */
uint32_t __reserved2 : 1;
uint32_t pbe : 1; /* amd reserved */
} bits;
jit_uword_t cpuid;
} edx;
#if __X32
int ac, flags;
#endif
jit_uword_t eax, ebx;
#if __X32
/* adapted from glibc __sysconf */
__asm__ volatile ("pushfl;\n\t"
"popl %0;\n\t"
"movl $0x240000, %1;\n\t"
"xorl %0, %1;\n\t"
"pushl %1;\n\t"
"popfl;\n\t"
"pushfl;\n\t"
"popl %1;\n\t"
"xorl %0, %1;\n\t"
"pushl %0;\n\t"
"popfl"
: "=r" (flags), "=r" (ac));
/* i386 or i486 without cpuid */
if ((ac & (1 << 21)) == 0)
/* probably without x87 as well */
return false;
#endif
/* query %eax = 1 function */
__asm__ volatile (
#if __X32 || __X64_32
"xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
#else
"xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
#endif
: "=a" (eax), "=r" (ebx),
"=c" (ecx.cpuid), "=d" (edx.cpuid)
: "0" (1));
jit_cpu.fpu = edx.bits.fpu;
jit_cpu.cmpxchg8b = edx.bits.cmpxchg8b;
jit_cpu.cmov = edx.bits.cmov;
jit_cpu.mmx = edx.bits.mmx;
jit_cpu.sse = edx.bits.sse;
jit_cpu.sse2 = edx.bits.sse2;
jit_cpu.sse3 = ecx.bits.sse3;
jit_cpu.pclmulqdq = ecx.bits.pclmulqdq;
jit_cpu.ssse3 = ecx.bits.ssse3;
jit_cpu.fma = ecx.bits.fma;
jit_cpu.cmpxchg16b = ecx.bits.cmpxchg16b;
jit_cpu.sse4_1 = ecx.bits.sse4_1;
jit_cpu.sse4_2 = ecx.bits.sse4_2;
jit_cpu.movbe = ecx.bits.movbe;
jit_cpu.popcnt = ecx.bits.popcnt;
jit_cpu.aes = ecx.bits.aes;
jit_cpu.avx = ecx.bits.avx;
/* query %eax = 0x80000001 function */
__asm__ volatile (
#if __X64
# if __X64_32
"xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
# else
"xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
# endif
: "=a" (eax), "=r" (ebx),
"=c" (ecx.cpuid), "=d" (edx.cpuid)
: "0" (0x80000001));
jit_cpu.lahf = ecx.cpuid & 1;
#endif
return jit_cpu.sse2;
}
jit_bool_t
jit_init(jit_state_t *_jit)
{
return jit_cpu.sse2;
}
void
jit_epilog(jit_state_t *_jit)
{
/* TODO: Restore registers. */
}
void
jit_calli(jit_state_t *_jit, jit_pointer_t f,
size_t argc, const jit_arg_abi_t abi[], const jit_arg_t args[])
{
/* TODO: Do the call! */
calli(_jit, (jit_word_t)f);
}
void
jit_callr(jit_state_t *_jit, jit_gpr_t f,
size_t argc, const jit_arg_abi_t abi[], const jit_arg_t args[])
{
/* TODO: Do the call! */
callr(_jit, f);
}
void
jit_receive(jit_state_t *_jit,
size_t argc, const jit_arg_abi_t abi[], jit_arg_t args[])
{
const jit_reg_t gpr_args[] = {
#if __X32
/* No GPRs in args. */
#elif __CYGWIN__
_RCX, _RDX, _R8, _R9
#else
_RDI, _RSI, _RDX, _RCX, _R8, _R9
#endif
};
const jit_reg_t fpr_args[] = {
#if __X32
/* No FPRs in args. */
#elif __CYGWIN__
_XMM0, _XMM1, _XMM2, _XMM3
#else
_XMM0, _XMM1, _XMM2, _XMM3, _XMM4, _XMM5, _XMM6, _XMM7
#endif
};
size_t gpr_arg_idx = 0;
size_t fpr_arg_idx = 0;
/* size_t stack_offset = 0; */
size_t gpr_arg_count = sizeof(gpr_args) / sizeof(jit_reg_t);
size_t fpr_arg_count = sizeof(fpr_args) / sizeof(jit_reg_t);
#if __CYGWIN__
#define NEXT_GPR() do { gpr_arg_idx++; fpr_arg_idx++; } while (0)
#define NEXT_FPR() do { gpr_arg_idx++; fpr_arg_idx++; } while (0)
#else
#define NEXT_GPR() do { gpr_arg_idx++; } while (0)
#define NEXT_FPR() do { fpr_arg_idx++; } while (0)
#endif
for (size_t i = 0; i < argc; i++) {
switch (abi[i]) {
case JIT_ARG_ABI_UINT8:
case JIT_ARG_ABI_INT8:
case JIT_ARG_ABI_UINT16:
case JIT_ARG_ABI_INT16:
case JIT_ARG_ABI_UINT32:
case JIT_ARG_ABI_INT32:
case JIT_ARG_ABI_UINT64:
case JIT_ARG_ABI_INT64:
case JIT_ARG_ABI_POINTER:
if (gpr_arg_idx < gpr_arg_count) {
args[i].kind = JIT_ARG_LOC_GPR;
args[i].loc.gpr = gpr_args[gpr_arg_idx];
NEXT_GPR();
} else {
abort();
}
break;
case JIT_ARG_ABI_FLOAT:
case JIT_ARG_ABI_DOUBLE:
if (fpr_arg_idx < fpr_arg_count) {
args[i].kind = JIT_ARG_LOC_FPR;
args[i].loc.fpr = fpr_args[fpr_arg_idx];
NEXT_FPR();
} else {
abort();
}
break;
}
}
}
void
jit_flush(void *fptr, void *tptr)
{
}
static void
jit_try_shorten(jit_state_t *_jit, jit_reloc_t reloc)
{
}