mirror of
https://https.git.savannah.gnu.org/git/guix.git/
synced 2025-07-12 01:50:46 +02:00
* gnu/packages/lua.scm (luajit)[arguments]: Use G-expression, when target riscv64, Add luajit-add-riscv64-support.patch. [supported-systems]: Add riscv64-linux. * gnu/packages/patches/luajit-add-riscv64-support.patch: New file. * gnu/local.mk (dist_patch_DATA): Register it. Change-Id: Ibdb40a99bc0d7709887cfe772e6d9dae5083592c Signed-off-by: Christopher Baines <mail@cbaines.net>
12439 lines
377 KiB
Diff
12439 lines
377 KiB
Diff
From https://patch-diff.githubusercontent.com/raw/LuaJIT/LuaJIT/pull/1267.patch
|
|
|
|
From 459094e89900b94a9a764e1bab1ec735f2b10421 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Tue, 5 Mar 2024 17:09:31 +0800
|
|
Subject: [PATCH 01/22] riscv(support): add RISC-V 64 arch base definition
|
|
|
|
---
|
|
src/lj_arch.h | 23 +++++++++++++++++++++++
|
|
1 file changed, 23 insertions(+)
|
|
|
|
diff --git a/src/lj_arch.h b/src/lj_arch.h
|
|
index a4eecf27e0..bf2cd4e581 100644
|
|
--- a/src/lj_arch.h
|
|
+++ b/src/lj_arch.h
|
|
@@ -31,6 +31,8 @@
|
|
#define LUAJIT_ARCH_mips32 6
|
|
#define LUAJIT_ARCH_MIPS64 7
|
|
#define LUAJIT_ARCH_mips64 7
|
|
+#define LUAJIT_ARCH_riscv64 8
|
|
+#define LUAJIT_ARCH_RISCV64 8
|
|
|
|
/* Target OS. */
|
|
#define LUAJIT_OS_OTHER 0
|
|
@@ -65,6 +67,8 @@
|
|
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
|
|
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
|
|
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
|
|
+#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
|
|
+#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
|
|
#else
|
|
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
|
|
#endif
|
|
@@ -439,6 +443,21 @@
|
|
#define LJ_ARCH_VERSION 10
|
|
#endif
|
|
|
|
+#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64
|
|
+
|
|
+#define LJ_ARCH_NAME "riscv64"
|
|
+#define LJ_ARCH_BITS 64
|
|
+#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */
|
|
+#define LJ_TARGET_RISCV64 1
|
|
+#define LJ_TARGET_GC64 1
|
|
+#define LJ_TARGET_EHRETREG 10
|
|
+#define LJ_TARGET_EHRAREG 1
|
|
+#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\
|
|
+ AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */
|
|
+#define LJ_TARGET_MASKSHIFT 1
|
|
+#define LJ_TARGET_MASKROT 1
|
|
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
|
|
+
|
|
#else
|
|
#error "No target architecture defined"
|
|
#endif
|
|
@@ -531,6 +550,10 @@
|
|
#error "Only n64 ABI supported for MIPS64"
|
|
#undef LJ_TARGET_MIPS
|
|
#endif
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#if !defined(__riscv_float_abi_double)
|
|
+#error "Only RISC-V 64 double float supported for now"
|
|
+#endif
|
|
#endif
|
|
#endif
|
|
|
|
|
|
From 865b39a9707a5fede21ffa7bcd96385eb9fc2466 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Tue, 5 Mar 2024 17:11:11 +0800
|
|
Subject: [PATCH 02/22] riscv(dynasm): add RISC-V support
|
|
|
|
---
|
|
dynasm/dasm_riscv.h | 433 ++++++++++++++++++
|
|
dynasm/dasm_riscv.lua | 979 ++++++++++++++++++++++++++++++++++++++++
|
|
dynasm/dasm_riscv32.lua | 12 +
|
|
dynasm/dasm_riscv64.lua | 12 +
|
|
4 files changed, 1436 insertions(+)
|
|
create mode 100644 dynasm/dasm_riscv.h
|
|
create mode 100644 dynasm/dasm_riscv.lua
|
|
create mode 100644 dynasm/dasm_riscv32.lua
|
|
create mode 100644 dynasm/dasm_riscv64.lua
|
|
|
|
diff --git a/dynasm/dasm_riscv.h b/dynasm/dasm_riscv.h
|
|
new file mode 100644
|
|
index 0000000000..523bb66113
|
|
--- /dev/null
|
|
+++ b/dynasm/dasm_riscv.h
|
|
@@ -0,0 +1,433 @@
|
|
+/*
|
|
+** DynASM RISC-V encoding engine.
|
|
+** Copyright (C) 2005-2025 Mike Pall. All rights reserved.
|
|
+** Released under the MIT license. See dynasm.lua for full copyright notice.
|
|
+*/
|
|
+
|
|
+#include <stddef.h>
|
|
+#include <stdarg.h>
|
|
+#include <string.h>
|
|
+#include <stdlib.h>
|
|
+
|
|
+#define DASM_ARCH "riscv"
|
|
+
|
|
+#ifndef DASM_EXTERN
|
|
+#define DASM_EXTERN(a,b,c,d) 0
|
|
+#endif
|
|
+
|
|
+/* Action definitions. */
|
|
+enum {
|
|
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
|
|
+ /* The following actions need a buffer position. */
|
|
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
|
|
+ /* The following actions also have an argument. */
|
|
+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
|
|
+ DASM__MAX
|
|
+};
|
|
+
|
|
+/* Maximum number of section buffer positions for a single dasm_put() call. */
|
|
+#define DASM_MAXSECPOS 25
|
|
+
|
|
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
|
|
+#define DASM_S_OK 0x00000000
|
|
+#define DASM_S_NOMEM 0x01000000
|
|
+#define DASM_S_PHASE 0x02000000
|
|
+#define DASM_S_MATCH_SEC 0x03000000
|
|
+#define DASM_S_RANGE_I 0x11000000
|
|
+#define DASM_S_RANGE_SEC 0x12000000
|
|
+#define DASM_S_RANGE_LG 0x13000000
|
|
+#define DASM_S_RANGE_PC 0x14000000
|
|
+#define DASM_S_RANGE_REL 0x15000000
|
|
+#define DASM_S_UNDEF_LG 0x21000000
|
|
+#define DASM_S_UNDEF_PC 0x22000000
|
|
+
|
|
+/* Macros to convert positions (8 bit section + 24 bit index). */
|
|
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
|
|
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
|
|
+#define DASM_SEC2POS(sec) ((sec)<<24)
|
|
+#define DASM_POS2SEC(pos) ((pos)>>24)
|
|
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
|
|
+
|
|
+/* Action list type. */
|
|
+typedef const unsigned int *dasm_ActList;
|
|
+
|
|
+/* Per-section structure. */
|
|
+typedef struct dasm_Section {
|
|
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
|
|
+ int *buf; /* True buffer pointer. */
|
|
+ size_t bsize; /* Buffer size in bytes. */
|
|
+ int pos; /* Biased buffer position. */
|
|
+ int epos; /* End of biased buffer position - max single put. */
|
|
+ int ofs; /* Byte offset into section. */
|
|
+} dasm_Section;
|
|
+
|
|
+/* Core structure holding the DynASM encoding state. */
|
|
+struct dasm_State {
|
|
+ size_t psize; /* Allocated size of this structure. */
|
|
+ dasm_ActList actionlist; /* Current actionlist pointer. */
|
|
+ int *lglabels; /* Local/global chain/pos ptrs. */
|
|
+ size_t lgsize;
|
|
+ int *pclabels; /* PC label chains/pos ptrs. */
|
|
+ size_t pcsize;
|
|
+ void **globals; /* Array of globals. */
|
|
+ dasm_Section *section; /* Pointer to active section. */
|
|
+ size_t codesize; /* Total size of all code sections. */
|
|
+ int maxsection; /* 0 <= sectionidx < maxsection. */
|
|
+ int status; /* Status code. */
|
|
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
|
|
+};
|
|
+
|
|
+/* The size of the core structure depends on the max. number of sections. */
|
|
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
|
|
+
|
|
+
|
|
+/* Initialize DynASM state. */
|
|
+void dasm_init(Dst_DECL, int maxsection)
|
|
+{
|
|
+ dasm_State *D;
|
|
+ size_t psz = 0;
|
|
+ Dst_REF = NULL;
|
|
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
|
|
+ D = Dst_REF;
|
|
+ D->psize = psz;
|
|
+ D->lglabels = NULL;
|
|
+ D->lgsize = 0;
|
|
+ D->pclabels = NULL;
|
|
+ D->pcsize = 0;
|
|
+ D->globals = NULL;
|
|
+ D->maxsection = maxsection;
|
|
+ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
|
|
+}
|
|
+
|
|
+/* Free DynASM state. */
|
|
+void dasm_free(Dst_DECL)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ int i;
|
|
+ for (i = 0; i < D->maxsection; i++)
|
|
+ if (D->sections[i].buf)
|
|
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
|
|
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
|
|
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
|
|
+ DASM_M_FREE(Dst, D, D->psize);
|
|
+}
|
|
+
|
|
+/* Setup global label array. Must be called before dasm_setup(). */
|
|
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ D->globals = gl;
|
|
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
|
|
+}
|
|
+
|
|
+/* Grow PC label array. Can be called after dasm_setup(), too. */
|
|
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ size_t osz = D->pcsize;
|
|
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
|
|
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
|
|
+}
|
|
+
|
|
+/* Setup encoder. */
|
|
+void dasm_setup(Dst_DECL, const void *actionlist)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ int i;
|
|
+ D->actionlist = (dasm_ActList)actionlist;
|
|
+ D->status = DASM_S_OK;
|
|
+ D->section = &D->sections[0];
|
|
+ memset((void *)D->lglabels, 0, D->lgsize);
|
|
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
|
|
+ for (i = 0; i < D->maxsection; i++) {
|
|
+ D->sections[i].pos = DASM_SEC2POS(i);
|
|
+ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
|
|
+ D->sections[i].ofs = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+#define CK(x, st) \
|
|
+ do { if (!(x)) { \
|
|
+ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
|
|
+#define CKPL(kind, st) \
|
|
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
|
|
+ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
|
|
+#else
|
|
+#define CK(x, st) ((void)0)
|
|
+#define CKPL(kind, st) ((void)0)
|
|
+#endif
|
|
+
|
|
+static int dasm_imms(int n)
|
|
+{
|
|
+ return (n >= -2048 && n < 2048) ? n : 4096;
|
|
+}
|
|
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
|
|
+void dasm_put(Dst_DECL, int start, ...)
|
|
+{
|
|
+ va_list ap;
|
|
+ dasm_State *D = Dst_REF;
|
|
+ dasm_ActList p = D->actionlist + start;
|
|
+ dasm_Section *sec = D->section;
|
|
+ int pos = sec->pos, ofs = sec->ofs;
|
|
+ int *b;
|
|
+
|
|
+ if (pos >= sec->epos) {
|
|
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
|
|
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
|
|
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
|
|
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
|
|
+ }
|
|
+
|
|
+ b = sec->rbuf;
|
|
+ b[pos++] = start;
|
|
+
|
|
+ va_start(ap, start);
|
|
+ while (1) {
|
|
+ unsigned int ins = *p++;
|
|
+ unsigned int action = (ins >> 20);
|
|
+ if (action >= DASM__MAX || (ins & 0xf)) {
|
|
+ ofs += 4;
|
|
+ } else {
|
|
+ ins >>= 4;
|
|
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
|
|
+ switch (action) {
|
|
+ case DASM_STOP: goto stop;
|
|
+ case DASM_SECTION:
|
|
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
|
|
+ D->section = &D->sections[n]; goto stop;
|
|
+ case DASM_ESC: p++; ofs += 4; break;
|
|
+ case DASM_REL_EXT: break;
|
|
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
|
|
+ case DASM_REL_LG:
|
|
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
|
|
+ /* Bkwd rel or global. */
|
|
+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
|
|
+ pl += 10; n = *pl;
|
|
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
|
|
+ goto linkrel;
|
|
+ case DASM_REL_PC:
|
|
+ pl = D->pclabels + n; CKPL(pc, PC);
|
|
+ putrel:
|
|
+ n = *pl;
|
|
+ if (n < 0) { /* Label exists. Get label pos and store it. */
|
|
+ b[pos] = -n;
|
|
+ } else {
|
|
+ linkrel:
|
|
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
|
|
+ *pl = pos;
|
|
+ }
|
|
+ pos++;
|
|
+ break;
|
|
+ case DASM_LABEL_LG:
|
|
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
|
|
+ case DASM_LABEL_PC:
|
|
+ pl = D->pclabels + n; CKPL(pc, PC);
|
|
+ putlabel:
|
|
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
|
|
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
|
|
+ }
|
|
+ *pl = -pos; /* Label exists now. */
|
|
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
|
|
+ break;
|
|
+ case DASM_IMM:
|
|
+#ifdef DASM_CHECKS
|
|
+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
|
|
+#endif
|
|
+ n >>= ((ins>>10)&31);
|
|
+#ifdef DASM_CHECKS
|
|
+ if (ins & 0x8000)
|
|
+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
|
|
+ else
|
|
+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
|
|
+#endif
|
|
+ b[pos++] = n;
|
|
+ break;
|
|
+ case DASM_IMMS:
|
|
+#ifdef DASM_CHECKS
|
|
+ CK(dasm_imms(n) != 4096, RANGE_I);
|
|
+#endif
|
|
+ b[pos++] = n;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+stop:
|
|
+ va_end(ap);
|
|
+ sec->pos = pos;
|
|
+ sec->ofs = ofs;
|
|
+}
|
|
+#undef CK
|
|
+
|
|
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
|
|
+int dasm_link(Dst_DECL, size_t *szp)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ int secnum;
|
|
+ int ofs = 0;
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+ *szp = 0;
|
|
+ if (D->status != DASM_S_OK) return D->status;
|
|
+ {
|
|
+ int pc;
|
|
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
|
|
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ { /* Handle globals not defined in this translation unit. */
|
|
+ int idx;
|
|
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
|
|
+ int n = D->lglabels[idx];
|
|
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
|
|
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Combine all code sections. No support for data sections (yet). */
|
|
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
|
|
+ dasm_Section *sec = D->sections + secnum;
|
|
+ int *b = sec->rbuf;
|
|
+ int pos = DASM_SEC2POS(secnum);
|
|
+ int lastpos = sec->pos;
|
|
+
|
|
+ while (pos != lastpos) {
|
|
+ dasm_ActList p = D->actionlist + b[pos++];
|
|
+ while (1) {
|
|
+ unsigned int ins = *p++;
|
|
+ unsigned int action = (ins >> 20);
|
|
+ if (ins & 0xf) continue; else ins >>= 4;
|
|
+ switch (action) {
|
|
+ case DASM_STOP: case DASM_SECTION: goto stop;
|
|
+ case DASM_ESC: p++; break;
|
|
+ case DASM_REL_EXT: break;
|
|
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
|
|
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
|
|
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
|
|
+ case DASM_IMM: case DASM_IMMS: pos++; break;
|
|
+ }
|
|
+ }
|
|
+ stop: (void)0;
|
|
+ }
|
|
+ ofs += sec->ofs; /* Next section starts right after current section. */
|
|
+ }
|
|
+
|
|
+ D->codesize = ofs; /* Total size of all code sections */
|
|
+ *szp = ofs;
|
|
+ return DASM_S_OK;
|
|
+}
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+#define CK(x, st) \
|
|
+ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
|
|
+#else
|
|
+#define CK(x, st) ((void)0)
|
|
+#endif
|
|
+
|
|
+/* Pass 3: Encode sections. */
|
|
+int dasm_encode(Dst_DECL, void *buffer)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ char *base = (char *)buffer;
|
|
+ unsigned int *cp = (unsigned int *)buffer;
|
|
+ int secnum;
|
|
+
|
|
+ /* Encode all code sections. No support for data sections (yet). */
|
|
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
|
|
+ dasm_Section *sec = D->sections + secnum;
|
|
+ int *b = sec->buf;
|
|
+ int *endb = sec->rbuf + sec->pos;
|
|
+
|
|
+ while (b != endb) {
|
|
+ dasm_ActList p = D->actionlist + *b++;
|
|
+ while (1) {
|
|
+ unsigned int ins = *p++;
|
|
+ if (ins & 0xf) { *cp++ = ins; continue; }
|
|
+ unsigned int action = (ins >> 20);
|
|
+ unsigned int val = (ins >> 4);
|
|
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
|
|
+ switch (action) {
|
|
+ case DASM_STOP: case DASM_SECTION: goto stop;
|
|
+ case DASM_ESC: *cp++ = *p++; break;
|
|
+ case DASM_REL_EXT:
|
|
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1);
|
|
+ goto patchrel;
|
|
+ case DASM_ALIGN:
|
|
+ val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000;
|
|
+ break;
|
|
+ case DASM_REL_LG:
|
|
+ if (n < 0) {
|
|
+ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4);
|
|
+ goto patchrel;
|
|
+ }
|
|
+ /* fallthrough */
|
|
+ case DASM_REL_PC:
|
|
+ CK(n >= 0, UNDEF_PC);
|
|
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
|
|
+ patchrel:
|
|
+ if (val & 2048) { /* B */
|
|
+ CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL);
|
|
+ cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000)
|
|
+ | ((n << 7) & 0x00000f00) | ((n >> 4) & 0x00000080);
|
|
+ } else { /* J */
|
|
+ CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
|
|
+ cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000)
|
|
+ | ((n << 9) & 0x00100000) | (n & 0x000ff000);
|
|
+ }
|
|
+ break;
|
|
+ case DASM_LABEL_LG:
|
|
+ val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n);
|
|
+ break;
|
|
+ case DASM_LABEL_PC: break;
|
|
+ case DASM_IMM:
|
|
+ cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31);
|
|
+ break;
|
|
+ case DASM_IMMS:
|
|
+ cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80));
|
|
+ break;
|
|
+ default: *cp++ = ins; break;
|
|
+ }
|
|
+ }
|
|
+ stop: (void)0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
|
|
+ return DASM_S_PHASE;
|
|
+ return DASM_S_OK;
|
|
+}
|
|
+#undef CK
|
|
+
|
|
+/* Get PC label offset. */
|
|
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ if (pc*sizeof(int) < D->pcsize) {
|
|
+ int pos = D->pclabels[pc];
|
|
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
|
|
+ if (pos > 0) return -1; /* Undefined. */
|
|
+ }
|
|
+ return -2; /* Unused or out of range. */
|
|
+}
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+/* Optional sanity checker to call between isolated encoding steps. */
|
|
+int dasm_checkstep(Dst_DECL, int secmatch)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ if (D->status == DASM_S_OK) {
|
|
+ int i;
|
|
+ for (i = 1; i <= 9; i++) {
|
|
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
|
|
+ D->lglabels[i] = 0;
|
|
+ }
|
|
+ }
|
|
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
|
|
+ D->section != &D->sections[secmatch])
|
|
+ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
|
|
+ return D->status;
|
|
+}
|
|
+#endif
|
|
+
|
|
diff --git a/dynasm/dasm_riscv.lua b/dynasm/dasm_riscv.lua
|
|
new file mode 100644
|
|
index 0000000000..4c8518f163
|
|
--- /dev/null
|
|
+++ b/dynasm/dasm_riscv.lua
|
|
@@ -0,0 +1,979 @@
|
|
+------------------------------------------------------------------------------
|
|
+-- DynASM RISC-V module.
|
|
+--
|
|
+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
|
|
+-- See dynasm.lua for full copyright notice.
|
|
+--
|
|
+-- Contributed by gns from PLCT Lab, ISCAS.
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+local riscv32 = riscv32
|
|
+local riscv64 = riscv64
|
|
+
|
|
+-- Module information:
|
|
+local _info = {
|
|
+ arch = riscv32 and "riscv32" or riscv64 and "riscv64",
|
|
+ description = "DynASM RISC-V module",
|
|
+ version = "1.5.0",
|
|
+ vernum = 10500,
|
|
+ release = "2022-07-12",
|
|
+ author = "Mike Pall",
|
|
+ license = "MIT",
|
|
+}
|
|
+
|
|
+-- Exported glue functions for the arch-specific module.
|
|
+local _M = { _info = _info }
|
|
+
|
|
+-- Cache library functions.
|
|
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
|
|
+local assert, setmetatable = assert, setmetatable
|
|
+local _s = string
|
|
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
|
|
+local match, gmatch = _s.match, _s.gmatch
|
|
+local concat, sort = table.concat, table.sort
|
|
+local bit = bit or require("bit")
|
|
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
|
|
+local tohex = bit.tohex
|
|
+
|
|
+local function __orderedIndexGen(t)
|
|
+ local orderedIndex = {}
|
|
+ for key in pairs(t) do
|
|
+ table.insert(orderedIndex, key)
|
|
+ end
|
|
+ table.sort( orderedIndex )
|
|
+ return orderedIndex
|
|
+end
|
|
+
|
|
+local function __orderedNext(t, state)
|
|
+ local key = nil
|
|
+ if state == nil then
|
|
+ t.__orderedIndex = __orderedIndexGen(t)
|
|
+ key = t.__orderedIndex[1]
|
|
+ else
|
|
+ local j = 0
|
|
+ for _,_ in pairs(t.__orderedIndex) do j = j + 1 end
|
|
+ for i = 1, j do
|
|
+ if t.__orderedIndex[i] == state then
|
|
+ key = t.__orderedIndex[i+1]
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+
|
|
+ if key then
|
|
+ return key, t[key]
|
|
+ end
|
|
+
|
|
+ t.__orderedIndex = nil
|
|
+ return
|
|
+end
|
|
+
|
|
+local function opairs(t)
|
|
+ return __orderedNext, t, nil
|
|
+end
|
|
+
|
|
+-- Inherited tables and callbacks.
|
|
+local g_opt, g_arch
|
|
+local wline, werror, wfatal, wwarn
|
|
+
|
|
+-- Action name list.
|
|
+-- CHECK: Keep this in sync with the C code!
|
|
+local action_names = {
|
|
+ "STOP", "SECTION", "ESC", "REL_EXT",
|
|
+ "ALIGN", "REL_LG", "LABEL_LG",
|
|
+ "REL_PC", "LABEL_PC", "IMM", "IMMS",
|
|
+}
|
|
+
|
|
+-- Maximum number of section buffer positions for dasm_put().
|
|
+-- CHECK: Keep this in sync with the C code!
|
|
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
|
|
+
|
|
+-- Action name -> action number.
|
|
+local map_action = {}
|
|
+for n,name in ipairs(action_names) do
|
|
+ map_action[name] = n-1
|
|
+end
|
|
+
|
|
+-- Action list buffer.
|
|
+local actlist = {}
|
|
+
|
|
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
|
|
+local actargs = { 0 }
|
|
+
|
|
+-- Current number of section buffer positions for dasm_put().
|
|
+local secpos = 1
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Dump action names and numbers.
|
|
+local function dumpactions(out)
|
|
+ out:write("DynASM encoding engine action codes:\n")
|
|
+ for n,name in ipairs(action_names) do
|
|
+ local num = map_action[name]
|
|
+ out:write(format(" %-10s %02X %d\n", name, num, num))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+-- Write action list buffer as a huge static C array.
|
|
+local function writeactions(out, name)
|
|
+ local nn = #actlist
|
|
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
|
|
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
|
|
+ for i = 1,nn-1 do
|
|
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
|
|
+ end
|
|
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Add word to action list.
|
|
+local function wputxw(n)
|
|
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
|
|
+ actlist[#actlist+1] = n
|
|
+end
|
|
+
|
|
+-- Add action to list with optional arg. Advance buffer pos, too.
|
|
+local function waction(action, val, a, num)
|
|
+ local w = assert(map_action[action], "bad action name `"..action.."'")
|
|
+ wputxw(w * 0x100000 + (val or 0) * 16)
|
|
+ if a then actargs[#actargs+1] = a end
|
|
+ if a or num then secpos = secpos + (num or 1) end
|
|
+end
|
|
+
|
|
+-- Flush action list (intervening C code or buffer pos overflow).
|
|
+local function wflush(term)
|
|
+ if #actlist == actargs[1] then return end -- Nothing to flush.
|
|
+ if not term then waction("STOP") end -- Terminate action list.
|
|
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
|
|
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
|
|
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
|
|
+end
|
|
+
|
|
+-- Put escaped word.
|
|
+local function wputw(n)
|
|
+ if band(n, 0xf) == 0 then waction("ESC") end
|
|
+ wputxw(n)
|
|
+end
|
|
+
|
|
+-- Reserve position for word.
|
|
+local function wpos()
|
|
+ local pos = #actlist+1
|
|
+ actlist[pos] = ""
|
|
+ return pos
|
|
+end
|
|
+
|
|
+-- Store word to reserved position.
|
|
+local function wputpos(pos, n)
|
|
+ assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range")
|
|
+ actlist[pos] = n
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Global label name -> global label number. With auto assignment on 1st use.
|
|
+local next_global = 20
|
|
+local map_global = setmetatable({}, { __index = function(t, name)
|
|
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
|
|
+ local n = next_global
|
|
+ if n > 2047 then werror("too many global labels") end
|
|
+ next_global = n + 1
|
|
+ t[name] = n
|
|
+ return n
|
|
+end})
|
|
+
|
|
+-- Dump global labels.
|
|
+local function dumpglobals(out, lvl)
|
|
+ local t = {}
|
|
+ for name, n in pairs(map_global) do t[n] = name end
|
|
+ out:write("Global labels:\n")
|
|
+ for i=20,next_global-1 do
|
|
+ out:write(format(" %s\n", t[i]))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+-- Write global label enum.
|
|
+local function writeglobals(out, prefix)
|
|
+ local t = {}
|
|
+ for name, n in pairs(map_global) do t[n] = name end
|
|
+ out:write("enum {\n")
|
|
+ for i=20,next_global-1 do
|
|
+ out:write(" ", prefix, t[i], ",\n")
|
|
+ end
|
|
+ out:write(" ", prefix, "_MAX\n};\n")
|
|
+end
|
|
+
|
|
+-- Write global label names.
|
|
+local function writeglobalnames(out, name)
|
|
+ local t = {}
|
|
+ for name, n in pairs(map_global) do t[n] = name end
|
|
+ out:write("static const char *const ", name, "[] = {\n")
|
|
+ for i=20,next_global-1 do
|
|
+ out:write(" \"", t[i], "\",\n")
|
|
+ end
|
|
+ out:write(" (const char *)0\n};\n")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Extern label name -> extern label number. With auto assignment on 1st use.
|
|
+local next_extern = 0
|
|
+local map_extern_ = {}
|
|
+local map_extern = setmetatable({}, { __index = function(t, name)
|
|
+ -- No restrictions on the name for now.
|
|
+ local n = next_extern
|
|
+ if n > 2047 then werror("too many extern labels") end
|
|
+ next_extern = n + 1
|
|
+ t[name] = n
|
|
+ map_extern_[n] = name
|
|
+ return n
|
|
+end})
|
|
+
|
|
+-- Dump extern labels.
|
|
+local function dumpexterns(out, lvl)
|
|
+ out:write("Extern labels:\n")
|
|
+ for i=0,next_extern-1 do
|
|
+ out:write(format(" %s\n", map_extern_[i]))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+-- Write extern label names.
|
|
+local function writeexternnames(out, name)
|
|
+ out:write("static const char *const ", name, "[] = {\n")
|
|
+ for i=0,next_extern-1 do
|
|
+ out:write(" \"", map_extern_[i], "\",\n")
|
|
+ end
|
|
+ out:write(" (const char *)0\n};\n")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Arch-specific maps.
|
|
+local map_archdef = {
|
|
+ ra = "x1", sp = "x2",
|
|
+} -- Ext. register name -> int. name.
|
|
+
|
|
+local map_type = {} -- Type name -> { ctype, reg }
|
|
+local ctypenum = 0 -- Type number (for Dt... macros).
|
|
+
|
|
+-- Reverse defines for registers.
|
|
+function _M.revdef(s)
|
|
+ if s == "x1" then return "ra"
|
|
+ elseif s == "x2" then return "sp" end
|
|
+ return s
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Template strings for RISC-V instructions.
|
|
+local map_op = {}
|
|
+
|
|
+local map_op_rv32imafd = {
|
|
+
|
|
+ -- RV32I
|
|
+ lui_2 = "00000037DU",
|
|
+ auipc_2 = "00000017DA",
|
|
+
|
|
+ jal_2 = "0000006fDJ",
|
|
+ jalr_3 = "00000067DRJ",
|
|
+ -- pseudo-instrs
|
|
+ j_1 = "0000006fJ",
|
|
+ jal_1 = "000000efJ",
|
|
+ jr_1 = "00000067R",
|
|
+ jalr_1 = "000000e7R",
|
|
+ jalr_2 = "000000e7RJ",
|
|
+
|
|
+ beq_3 = "00000063RrB",
|
|
+ bne_3 = "00001063RrB",
|
|
+ blt_3 = "00004063RrB",
|
|
+ bge_3 = "00005063RrB",
|
|
+ bltu_3 = "00006063RrB",
|
|
+ bgeu_3 = "00007063RrB",
|
|
+ -- pseudo-instrs
|
|
+ bnez_2 = "00001063RB",
|
|
+ beqz_2 = "00000063RB",
|
|
+ blez_2 = "00005063rB",
|
|
+ bgez_2 = "00005063RB",
|
|
+ bltz_2 = "00004063RB",
|
|
+ bgtz_2 = "00004063rB",
|
|
+ bgt_3 = "00004063rRB",
|
|
+ ble_3 = "00005063rRB",
|
|
+ bgtu_3 = "00006063rRB",
|
|
+ bleu_3 = "00007063rRB",
|
|
+
|
|
+ lb_2 = "00000003DL",
|
|
+ lh_2 = "00001003DL",
|
|
+ lw_2 = "00002003DL",
|
|
+ lbu_2 = "00004003DL",
|
|
+ lhu_2 = "00005003DL",
|
|
+
|
|
+ sb_2 = "00000023rS",
|
|
+ sh_2 = "00001023rS",
|
|
+ sw_2 = "00002023rS",
|
|
+
|
|
+ addi_3 = "00000013DRI",
|
|
+ slti_3 = "00002013DRI",
|
|
+ sltiu_3 = "00003013DRI",
|
|
+ xori_3 = "00004013DRI",
|
|
+ ori_3 = "00006013DRI",
|
|
+ andi_3 = "00007013DRI",
|
|
+ slli_3 = "00001013DRi",
|
|
+ srli_3 = "00005013DRi",
|
|
+ srai_3 = "40005013DRi",
|
|
+ -- pseudo-instrs
|
|
+ seqz_2 = "00103013DR",
|
|
+ ["zext.b_2"] = "0ff07013DR",
|
|
+
|
|
+ add_3 = "00000033DRr",
|
|
+ sub_3 = "40000033DRr",
|
|
+ sll_3 = "00001033DRr",
|
|
+ slt_3 = "00002033DRr",
|
|
+ sltu_3 = "00003033DRr",
|
|
+ xor_3 = "00004033DRr",
|
|
+ srl_3 = "00005033DRr",
|
|
+ sra_3 = "40005033DRr",
|
|
+ or_3 = "00006033DRr",
|
|
+ and_3 = "00007033DRr",
|
|
+ -- pseudo-instrs
|
|
+ snez_2 = "00003033Dr",
|
|
+ sltz_2 = "00002033DR",
|
|
+ sgtz_2 = "00002033Dr",
|
|
+
|
|
+ ecall_0 = "00000073",
|
|
+ ebreak_0 = "00100073",
|
|
+
|
|
+ nop_0 = "00000013",
|
|
+ li_2 = "00000013DI",
|
|
+ mv_2 = "00000013DR",
|
|
+ not_2 = "fff04013DR",
|
|
+ neg_2 = "40000033Dr",
|
|
+ ret_0 = "00008067",
|
|
+
|
|
+ -- RV32M
|
|
+ mul_3 = "02000033DRr",
|
|
+ mulh_3 = "02001033DRr",
|
|
+ mulhsu_3 = "02002033DRr",
|
|
+ mulhu_3 = "02003033DRr",
|
|
+ div_3 = "02004033DRr",
|
|
+ divu_3 = "02005033DRr",
|
|
+ rem_3 = "02006033DRr",
|
|
+ remu_3 = "02007033DRr",
|
|
+
|
|
+ -- RV32A
|
|
+ ["lr.w_2"] = "c0000053FR",
|
|
+ ["sc.w_2"] = "c0001053FRr",
|
|
+ ["amoswap.w_3"] = "c0002053FRr",
|
|
+ ["amoadd.w_3"] = "c0003053FRr",
|
|
+ ["amoxor.w_3"] = "c0004053FRr",
|
|
+ ["amoor.w_3"] = "c0005053FRr",
|
|
+ ["amoand.w_3"] = "c0006053FRr",
|
|
+ ["amomin.w_3"] = "c0007053FRr",
|
|
+ ["amomax.w_3"] = "c0008053FRr",
|
|
+ ["amominu.w_3"] = "c0009053FRr",
|
|
+ ["amomaxu.w_3"] = "c000a053FRr",
|
|
+
|
|
+ -- RV32F
|
|
+ ["flw_2"] = "00002007FL",
|
|
+ ["fsw_2"] = "00002027gS",
|
|
+
|
|
+ ["fmadd.s_4"] = "00000043FGgH",
|
|
+ ["fmsub.s_4"] = "00000047FGgH",
|
|
+ ["fnmsub.s_4"] = "0000004bFGgH",
|
|
+ ["fnmadd.s_4"] = "0000004fFGgH",
|
|
+ ["fmadd.s_5"] = "00000043FGgHM",
|
|
+ ["fmsub.s_5"] = "00000047FGgHM",
|
|
+ ["fnmsub.s_5"] = "0000004bFGgHM",
|
|
+ ["fnmadd.s_5"] = "0000004fFGgHM",
|
|
+
|
|
+ ["fadd.s_3"] = "00000053FGg",
|
|
+ ["fsub.s_3"] = "08000053FGg",
|
|
+ ["fmul.s_3"] = "10000053FGg",
|
|
+ ["fdiv.s_3"] = "18000053FGg",
|
|
+ ["fsqrt.s_2"] = "58000053FG",
|
|
+ ["fadd.s_4"] = "00000053FGgM",
|
|
+ ["fsub.s_4"] = "08000053FGgM",
|
|
+ ["fmul.s_4"] = "10000053FGgM",
|
|
+ ["fdiv.s_4"] = "18000053FGgM",
|
|
+ ["fsqrt.s_3"] = "58000053FGM",
|
|
+
|
|
+ ["fsgnj.s_3"] = "20000053FGg",
|
|
+ ["fsgnjn.s_3"] = "20001053FGg",
|
|
+ ["fsgnjx.s_3"] = "20002053FGg",
|
|
+
|
|
+ ["fmin.s_3"] = "28000053FGg",
|
|
+ ["fmax.s_3"] = "28001053FGg",
|
|
+
|
|
+ ["fcvt.w.s_2"] = "c0000053DG",
|
|
+ ["fcvt.wu.s_2"] = "c0100053DG",
|
|
+ ["fcvt.w.s_3"] = "c0000053DGM",
|
|
+ ["fcvt.wu.s_3"] = "c0100053DGM",
|
|
+ ["fmv.x.w_2"] = "e0000053DG",
|
|
+
|
|
+ ["feq.s_3"] = "a0002053DGg",
|
|
+ ["flt.s_3"] = "a0001053DGg",
|
|
+ ["fle.s_3"] = "a0000053DGg",
|
|
+
|
|
+ ["fclass.s_2"] = "e0001053DG",
|
|
+
|
|
+ ["fcvt.s.w_2"] = "d0000053FR",
|
|
+ ["fcvt.s.wu_2"] = "d0100053FR",
|
|
+ ["fcvt.s.w_3"] = "d0000053FRM",
|
|
+ ["fcvt.s.wu_3"] = "d0100053FRM",
|
|
+ ["fmv.w.x_2"] = "f0000053FR",
|
|
+
|
|
+ -- RV32D
|
|
+ ["fld_2"] = "00003007FL",
|
|
+ ["fsd_2"] = "00003027gS",
|
|
+
|
|
+ ["fmadd.d_4"] = "02000043FGgH",
|
|
+ ["fmsub.d_4"] = "02000047FGgH",
|
|
+ ["fnmsub.d_4"] = "0200004bFGgH",
|
|
+ ["fnmadd.d_4"] = "0200004fFGgH",
|
|
+ ["fmadd.d_5"] = "02000043FGgHM",
|
|
+ ["fmsub.d_5"] = "02000047FGgHM",
|
|
+ ["fnmsub.d_5"] = "0200004bFGgHM",
|
|
+ ["fnmadd.d_5"] = "0200004fFGgHM",
|
|
+
|
|
+ ["fadd.d_3"] = "02000053FGg",
|
|
+ ["fsub.d_3"] = "0a000053FGg",
|
|
+ ["fmul.d_3"] = "12000053FGg",
|
|
+ ["fdiv.d_3"] = "1a000053FGg",
|
|
+ ["fsqrt.d_2"] = "5a000053FG",
|
|
+ ["fadd.d_4"] = "02000053FGgM",
|
|
+ ["fsub.d_4"] = "0a000053FGgM",
|
|
+ ["fmul.d_4"] = "12000053FGgM",
|
|
+ ["fdiv.d_4"] = "1a000053FGgM",
|
|
+ ["fsqrt.d_3"] = "5a000053FGM",
|
|
+
|
|
+ ["fsgnj.d_3"] = "22000053FGg",
|
|
+ ["fsgnjn.d_3"] = "22001053FGg",
|
|
+ ["fsgnjx.d_3"] = "22002053FGg",
|
|
+ ["fmin.d_3"] = "2a000053FGg",
|
|
+ ["fmax.d_3"] = "2a001053FGg",
|
|
+ ["fcvt.s.d_2"] = "40100053FG",
|
|
+ ["fcvt.d.s_2"] = "42000053FG",
|
|
+ ["feq.d_3"] = "a2002053DGg",
|
|
+ ["flt.d_3"] = "a2001053DGg",
|
|
+ ["fle.d_3"] = "a2000053DGg",
|
|
+ ["fclass.d_2"] = "e2001053DG",
|
|
+ ["fcvt.w.d_2"] = "c2000053DG",
|
|
+ ["fcvt.wu.d_2"] = "c2100053DG",
|
|
+ ["fcvt.d.w_2"] = "d2000053FR",
|
|
+ ["fcvt.d.wu_2"] = "d2100053FR",
|
|
+ ["fcvt.w.d_3"] = "c2000053DGM",
|
|
+ ["fcvt.wu.d_3"] = "c2100053DGM",
|
|
+ ["fcvt.d.w_3"] = "d2000053FRM",
|
|
+ ["fcvt.d.wu_3"] = "d2100053FRM",
|
|
+
|
|
+ ["fmv.d_2"] = "22000053FY",
|
|
+ ["fneg.d_2"] = "22001053FY",
|
|
+ ["fabs.d_2"] = "22002053FY",
|
|
+
|
|
+}
|
|
+
|
|
+local map_op_rv64imafd = {
|
|
+
|
|
+ -- RV64I
|
|
+ lwu_2 = "00006003DL",
|
|
+ ld_2 = "00003003DL",
|
|
+
|
|
+ sd_2 = "00003023rS",
|
|
+
|
|
+ slli_3 = "00001013DRj",
|
|
+ srli_3 = "00005013DRj",
|
|
+ srai_3 = "40005013DRj",
|
|
+
|
|
+ addiw_3 = "0000001bDRI",
|
|
+ slliw_3 = "0000101bDRi",
|
|
+ srliw_3 = "0000501bDRi",
|
|
+ sraiw_3 = "4000501bDRi",
|
|
+
|
|
+ addw_3 = "0000003bDRr",
|
|
+ subw_3 = "4000003bDRr",
|
|
+ sllw_3 = "0000103bDRr",
|
|
+ srlw_3 = "0000503bDRr",
|
|
+ sraw_3 = "4000503bDRr",
|
|
+
|
|
+ negw_2 = "4000003bDr",
|
|
+ ["sext.w_2"] = "0000001bDR",
|
|
+
|
|
+ -- RV64M
|
|
+ mulw_3 = "0200003bDRr",
|
|
+ divw_3 = "0200403bDRr",
|
|
+ divuw_3 = "0200503bDRr",
|
|
+ remw_3 = "0200603bDRr",
|
|
+ remuw_3 = "0200703bDRr",
|
|
+
|
|
+ -- RV64A
|
|
+ ["lr.d_2"] = "c2000053FR",
|
|
+ ["sc.d_2"] = "c2001053FRr",
|
|
+ ["amoswap.d_3"] = "c2002053FRr",
|
|
+ ["amoadd.d_3"] = "c2003053FRr",
|
|
+ ["amoxor.d_3"] = "c2004053FRr",
|
|
+ ["amoor.d_3"] = "c2005053FRr",
|
|
+ ["amoand.d_3"] = "c2006053FRr",
|
|
+ ["amomin.d_3"] = "c2007053FRr",
|
|
+ ["amomax.d_3"] = "c2008053FRr",
|
|
+ ["amominu.d_3"] = "c2009053FRr",
|
|
+ ["amomaxu.d_3"] = "c200a053FRr",
|
|
+
|
|
+ -- RV64F
|
|
+ ["fcvt.l.s_2"] = "c0200053DG",
|
|
+ ["fcvt.lu.s_2"] = "c0300053DG",
|
|
+ ["fcvt.l.s_3"] = "c0200053DGM",
|
|
+ ["fcvt.lu.s_3"] = "c0300053DGM",
|
|
+ ["fcvt.s.l_2"] = "d0200053FR",
|
|
+ ["fcvt.s.lu_2"] = "d0300053FR",
|
|
+ ["fcvt.s.l_3"] = "d0200053FRM",
|
|
+ ["fcvt.s.lu_3"] = "d0300053FRM",
|
|
+
|
|
+ -- RV64D
|
|
+ ["fcvt.l.d_2"] = "c2200053DG",
|
|
+ ["fcvt.lu.d_2"] = "c2300053DG",
|
|
+ ["fcvt.l.d_3"] = "c2200053DGM",
|
|
+ ["fcvt.lu.d_3"] = "c2300053DGM",
|
|
+ ["fmv.x.d_2"] = "e2000053DG",
|
|
+ ["fcvt.d.l_2"] = "d2200053FR",
|
|
+ ["fcvt.d.lu_2"] = "d2300053FR",
|
|
+ ["fcvt.d.l_3"] = "d2200053FRM",
|
|
+ ["fcvt.d.lu_3"] = "d2300053FRM",
|
|
+ ["fmv.d.x_2"] = "f2000053FR",
|
|
+
|
|
+}
|
|
+
|
|
+local map_op_zicsr = {
|
|
+ csrrw_3 = "00001073DCR",
|
|
+ csrrs_3 = "00002073DCR",
|
|
+ csrrc_3 = "00003073DCR",
|
|
+ csrrwi_3 = "00005073DCu",
|
|
+ csrrsi_3 = "00006073DCu",
|
|
+ csrrci_3 = "00007073DCu",
|
|
+
|
|
+ -- pseudo-ops
|
|
+ csrrw_2 = "00001073DC",
|
|
+ csrrs_2 = "00002073CR",
|
|
+ csrrc_2 = "00003073CR",
|
|
+ csrrwi_2 = "00005073Cu",
|
|
+ csrrsi_2 = "00006073Cu",
|
|
+ csrrci_2 = "00007073Cu",
|
|
+
|
|
+ rdinstret_1 = "C0202073D",
|
|
+ rdcycle_1 = "C0002073D",
|
|
+ rdtime_1 = "C0102073D",
|
|
+ rdinstreth_1 = "C8202073D",
|
|
+ rdcycleh_1 = "C8002073D",
|
|
+ rdtimeh_1 = "C8102073D",
|
|
+
|
|
+ frcsr_1 = "00302073D",
|
|
+ fscsr_2 = "00301073DR",
|
|
+ fscsr_1 = "00301073R",
|
|
+ frrm_1 = "00202073D",
|
|
+ fsrm_2 = "00201073DR",
|
|
+ fsrm_1 = "00201073R",
|
|
+ fsrmi_2 = "00205073Du",
|
|
+ fsrmi_1 = "00205073u",
|
|
+ frflags_1 = "00102073D",
|
|
+ fsflags_2 = "00101073DR",
|
|
+ fsflagsi_2 = "00105073Du",
|
|
+ fsflagsi_1 = "00105073u",
|
|
+}
|
|
+
|
|
+local map_op_zifencei = {
|
|
+ ["fence.i_3"] = "0000100fDRI",
|
|
+}
|
|
+
|
|
+local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr }
|
|
+local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr }
|
|
+
|
|
+if riscv32 then for _, map in opairs(list_map_op_rv32) do
|
|
+ for k, v in pairs(map) do map_op[k] = v end
|
|
+ end
|
|
+end
|
|
+if riscv64 then for _, map in opairs(list_map_op_rv64) do
|
|
+ for k, v in pairs(map) do map_op[k] = v end
|
|
+ end
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+local function parse_gpr(expr)
|
|
+ local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$")
|
|
+ local tp = map_type[tname or expr]
|
|
+ if tp then
|
|
+ local reg = ovreg or tp.reg
|
|
+ if not reg then
|
|
+ werror("type `"..(tname or expr).."' needs a register override")
|
|
+ end
|
|
+ expr = reg
|
|
+ end
|
|
+ local r = match(expr, "^x([1-3]?[0-9])$")
|
|
+ if r then
|
|
+ r = tonumber(r)
|
|
+ if r <= 31 then return r, tp end
|
|
+ end
|
|
+ werror("bad register name `"..expr.."'")
|
|
+end
|
|
+
|
|
+local function parse_fpr(expr)
|
|
+ local r = match(expr, "^f([1-3]?[0-9])$")
|
|
+ if r then
|
|
+ r = tonumber(r)
|
|
+ if r <= 31 then return r end
|
|
+ end
|
|
+ werror("bad register name `"..expr.."'")
|
|
+end
|
|
+
|
|
+local function parse_imm(imm, bits, shift, scale, signed, action)
|
|
+ local n = tonumber(imm)
|
|
+ if n then
|
|
+ local m = sar(n, scale)
|
|
+ if shl(m, scale) == n then
|
|
+ if signed then
|
|
+ local s = sar(m, bits-1)
|
|
+ if s == 0 then return shl(m, shift)
|
|
+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
|
|
+ else
|
|
+ if sar(m, bits) == 0 then return shl(m, shift) end
|
|
+ end
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ elseif match(imm, "^[xf]([1-3]?[0-9])$") or
|
|
+ match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
|
|
+ werror("expected immediate operand, got register")
|
|
+ else
|
|
+ waction(action or "IMM",
|
|
+ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
|
|
+ return 0
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_csr(expr)
|
|
+ local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$")
|
|
+ if r then
|
|
+ r = tonumber(r)
|
|
+ if r <= 4095 then return r end
|
|
+ end
|
|
+ werror("bad register name `"..expr.."'")
|
|
+end
|
|
+
|
|
+local function parse_imms(imm)
|
|
+ local n = tonumber(imm)
|
|
+ if n then
|
|
+ if n >= -2048 and n < 2048 then
|
|
+ local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5)
|
|
+ return shl(imm5, 7) + shl(imm7, 25)
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ elseif match(imm, "^[xf]([1-3]?[0-9])$") or
|
|
+ match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
|
|
+ werror("expected immediate operand, got register")
|
|
+ else
|
|
+ waction("IMMS", 0, imm); return 0
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_rm(mode)
|
|
+ local rnd_mode = {
|
|
+ rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7
|
|
+ }
|
|
+ local n = rnd_mode[mode]
|
|
+ if n then return n
|
|
+ else werror("bad rounding mode `"..mode.."'") end
|
|
+end
|
|
+
|
|
+local function parse_disp(disp, mode)
|
|
+ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
|
|
+ if imm then
|
|
+ local r = shl(parse_gpr(reg), 15)
|
|
+ local extname = match(imm, "^extern%s+(%S+)$")
|
|
+ if extname then
|
|
+ waction("REL_EXT", map_extern[extname], nil, 1)
|
|
+ return r
|
|
+ else
|
|
+ if mode == "load" then
|
|
+ return r + parse_imm(imm, 12, 20, 0, true)
|
|
+ elseif mode == "store" then
|
|
+ return r + parse_imms(imm)
|
|
+ else
|
|
+ werror("bad displacement mode '"..mode.."'")
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
|
|
+ if reg and tailr ~= "" then
|
|
+ local r, tp = parse_gpr(reg)
|
|
+ if tp then
|
|
+ if mode == "load" then
|
|
+ waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr))
|
|
+ elseif mode == "store" then
|
|
+ waction("IMMS", 0, format(tp.ctypefmt, tailr))
|
|
+ else
|
|
+ werror("bad displacement mode '"..mode.."'")
|
|
+ end
|
|
+ return shl(r, 15)
|
|
+ end
|
|
+ end
|
|
+ werror("bad displacement `"..disp.."'")
|
|
+end
|
|
+
|
|
+local function parse_label(label, def)
|
|
+ local prefix = sub(label, 1, 2)
|
|
+ -- =>label (pc label reference)
|
|
+ if prefix == "=>" then
|
|
+ return "PC", 0, sub(label, 3)
|
|
+ end
|
|
+ -- ->name (global label reference)
|
|
+ if prefix == "->" then
|
|
+ return "LG", map_global[sub(label, 3)]
|
|
+ end
|
|
+ if def then
|
|
+ -- [1-9] (local label definition)
|
|
+ if match(label, "^[1-9]$") then
|
|
+ return "LG", 10+tonumber(label)
|
|
+ end
|
|
+ else
|
|
+ -- [<>][1-9] (local label reference)
|
|
+ local dir, lnum = match(label, "^([<>])([1-9])$")
|
|
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
|
|
+ return "LG", lnum + (dir == ">" and 0 or 10)
|
|
+ end
|
|
+ -- extern label (extern label reference)
|
|
+ local extname = match(label, "^extern%s+(%S+)$")
|
|
+ if extname then
|
|
+ return "EXT", map_extern[extname]
|
|
+ end
|
|
+ end
|
|
+ werror("bad label `"..label.."'")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Handle opcodes defined with template strings.
|
|
+map_op[".template__"] = function(params, template, nparams)
|
|
+ if not params then return sub(template, 9) end
|
|
+ local op = tonumber(sub(template, 1, 8), 16)
|
|
+ local n = 1
|
|
+
|
|
+ -- Limit number of section buffer positions used by a single dasm_put().
|
|
+ -- A single opcode needs a maximum of 2 positions (ins/ext).
|
|
+ if secpos+2 > maxsecpos then wflush() end
|
|
+ local pos = wpos()
|
|
+
|
|
+ -- Process each character.
|
|
+ for p in gmatch(sub(template, 9), ".") do
|
|
+ if p == "D" then -- gpr rd
|
|
+ op = op + shl(parse_gpr(params[n]), 7); n = n + 1
|
|
+ elseif p == "R" then -- gpr rs1
|
|
+ op = op + shl(parse_gpr(params[n]), 15); n = n + 1
|
|
+ elseif p == "r" then -- gpr rs2
|
|
+ op = op + shl(parse_gpr(params[n]), 20); n = n + 1
|
|
+ elseif p == "F" then -- fpr rd
|
|
+ op = op + shl(parse_fpr(params[n]), 7); n = n + 1
|
|
+ elseif p == "G" then -- fpr rs1
|
|
+ op = op + shl(parse_fpr(params[n]), 15); n = n + 1
|
|
+ elseif p == "g" then -- fpr rs2
|
|
+ op = op + shl(parse_fpr(params[n]), 20); n = n + 1
|
|
+ elseif p == "H" then -- fpr rs3
|
|
+ op = op + shl(parse_fpr(params[n]), 27); n = n + 1
|
|
+ elseif p == "C" then -- csr
|
|
+ op = op + shl(parse_csr(params[n]), 20); n = n + 1
|
|
+ elseif p == "M" then -- fpr rounding mode
|
|
+ op = op + shl(parse_rm(params[n]), 12); n = n + 1
|
|
+ elseif p == "Y" then -- fpr psuedo-op
|
|
+ local r = parse_fpr(params[n])
|
|
+ op = op + shl(r, 15) + shl(r, 20); n = n + 1
|
|
+ elseif p == "I" then -- I-type imm12
|
|
+ op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1
|
|
+ elseif p == "i" then -- I-type shamt5
|
|
+ op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1
|
|
+ elseif p == "j" then -- I-type shamt6
|
|
+ op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1
|
|
+ elseif p == "u" then -- I-type uimm
|
|
+ op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1
|
|
+ elseif p == "U" then -- U-type imm20
|
|
+ op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1
|
|
+ elseif p == "L" then -- load
|
|
+ op = op + parse_disp(params[n], "load"); n = n + 1
|
|
+ elseif p == "S" then -- store
|
|
+ op = op + parse_disp(params[n], "store"); n = n + 1
|
|
+ elseif p == "B" or p == "J" then -- control flow
|
|
+ local mode, m, s = parse_label(params[n], false)
|
|
+ if p == "B" then m = m + 2048 end
|
|
+ waction("REL_"..mode, m, s, 1); n = n + 1
|
|
+ elseif p == "A" then -- AUIPC
|
|
+ local mode, m, s = parse_label(params[n], false)
|
|
+ waction("REL_"..mode, m, s, 1); n = n + 1
|
|
+ else
|
|
+ assert(false)
|
|
+ end
|
|
+ end
|
|
+ wputpos(pos, op)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
|
|
+map_op[".actionlist_1"] = function(params)
|
|
+ if not params then return "cvar" end
|
|
+ local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeactions(out, name) end)
|
|
+end
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
|
|
+map_op[".globals_1"] = function(params)
|
|
+ if not params then return "prefix" end
|
|
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeglobals(out, prefix) end)
|
|
+end
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
|
|
+map_op[".globalnames_1"] = function(params)
|
|
+ if not params then return "cvar" end
|
|
+ local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeglobalnames(out, name) end)
|
|
+end
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
|
|
+map_op[".externnames_1"] = function(params)
|
|
+ if not params then return "cvar" end
|
|
+ local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeexternnames(out, name) end)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Label pseudo-opcode (converted from trailing colon form).
|
|
+map_op[".label_1"] = function(params)
|
|
+ if not params then return "[1-9] | ->global | =>pcexpr" end
|
|
+ if secpos+1 > maxsecpos then wflush() end
|
|
+ local mode, n, s = parse_label(params[1], true)
|
|
+ if mode == "EXT" then werror("bad label definition") end
|
|
+ waction("LABEL_"..mode, n, s, 1)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pseudo-opcodes for data storage.
|
|
+map_op[".long_*"] = function(params)
|
|
+ if not params then return "imm..." end
|
|
+ for _,p in ipairs(params) do
|
|
+ local n = tonumber(p)
|
|
+ if not n then werror("bad immediate `"..p.."'") end
|
|
+ if n < 0 then n = n + 2^32 end
|
|
+ wputw(n)
|
|
+ if secpos+2 > maxsecpos then wflush() end
|
|
+ end
|
|
+end
|
|
+
|
|
+-- Alignment pseudo-opcode.
|
|
+map_op[".align_1"] = function(params)
|
|
+ if not params then return "numpow2" end
|
|
+ if secpos+1 > maxsecpos then wflush() end
|
|
+ local align = tonumber(params[1])
|
|
+ if align then
|
|
+ local x = align
|
|
+ -- Must be a power of 2 in the range (2 ... 256).
|
|
+ for i=1,8 do
|
|
+ x = x / 2
|
|
+ if x == 1 then
|
|
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
|
|
+ return
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ werror("bad alignment")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
|
|
+map_op[".type_3"] = function(params, nparams)
|
|
+ if not params then
|
|
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
|
|
+ end
|
|
+ local name, ctype, reg = params[1], params[2], params[3]
|
|
+ if not match(name, "^[%a_][%w_]*$") then
|
|
+ werror("bad type name `"..name.."'")
|
|
+ end
|
|
+ local tp = map_type[name]
|
|
+ if tp then
|
|
+ werror("duplicate type `"..name.."'")
|
|
+ end
|
|
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
|
|
+ map_archdef["#"..name] = "sizeof("..ctype..")"
|
|
+ -- Add new type and emit shortcut define.
|
|
+ local num = ctypenum + 1
|
|
+ map_type[name] = {
|
|
+ ctype = ctype,
|
|
+ ctypefmt = format("Dt%X(%%s)", num),
|
|
+ reg = reg,
|
|
+ }
|
|
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
|
|
+ ctypenum = num
|
|
+end
|
|
+map_op[".type_2"] = map_op[".type_3"]
|
|
+
|
|
+-- Dump type definitions.
|
|
+local function dumptypes(out, lvl)
|
|
+ local t = {}
|
|
+ for name in pairs(map_type) do t[#t+1] = name end
|
|
+ sort(t)
|
|
+ out:write("Type definitions:\n")
|
|
+ for _,name in ipairs(t) do
|
|
+ local tp = map_type[name]
|
|
+ local reg = tp.reg or ""
|
|
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Set the current section.
|
|
+function _M.section(num)
|
|
+ waction("SECTION", num)
|
|
+ wflush(true) -- SECTION is a terminal action.
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Dump architecture description.
|
|
+function _M.dumparch(out)
|
|
+ out:write(format("DynASM %s version %s, released %s\n\n",
|
|
+ _info.arch, _info.version, _info.release))
|
|
+ dumpactions(out)
|
|
+end
|
|
+
|
|
+-- Dump all user defined elements.
|
|
+function _M.dumpdef(out, lvl)
|
|
+ dumptypes(out, lvl)
|
|
+ dumpglobals(out, lvl)
|
|
+ dumpexterns(out, lvl)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pass callbacks from/to the DynASM core.
|
|
+function _M.passcb(wl, we, wf, ww)
|
|
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
|
|
+ return wflush
|
|
+end
|
|
+
|
|
+-- Setup the arch-specific module.
|
|
+function _M.setup(arch, opt)
|
|
+ g_arch, g_opt = arch, opt
|
|
+end
|
|
+
|
|
+-- Merge the core maps and the arch-specific maps.
|
|
+function _M.mergemaps(map_coreop, map_def)
|
|
+ setmetatable(map_op, { __index = map_coreop })
|
|
+ setmetatable(map_def, { __index = map_archdef })
|
|
+ return map_op, map_def
|
|
+end
|
|
+
|
|
+return _M
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
diff --git a/dynasm/dasm_riscv32.lua b/dynasm/dasm_riscv32.lua
|
|
new file mode 100644
|
|
index 0000000000..f194ce1dcb
|
|
--- /dev/null
|
|
+++ b/dynasm/dasm_riscv32.lua
|
|
@@ -0,0 +1,12 @@
|
|
+------------------------------------------------------------------------------
|
|
+-- DynASM RISC-V 32 module.
|
|
+--
|
|
+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
|
|
+-- See dynasm.lua for full copyright notice.
|
|
+------------------------------------------------------------------------------
|
|
+-- This module just sets 32 bit mode for the combined RISC-V module.
|
|
+-- All the interesting stuff is there.
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+riscv32 = true -- Using a global is an ugly, but effective solution.
|
|
+return require("dasm_riscv")
|
|
diff --git a/dynasm/dasm_riscv64.lua b/dynasm/dasm_riscv64.lua
|
|
new file mode 100644
|
|
index 0000000000..25274395dc
|
|
--- /dev/null
|
|
+++ b/dynasm/dasm_riscv64.lua
|
|
@@ -0,0 +1,12 @@
|
|
+------------------------------------------------------------------------------
|
|
+-- DynASM RISC-V 64 module.
|
|
+--
|
|
+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
|
|
+-- See dynasm.lua for full copyright notice.
|
|
+------------------------------------------------------------------------------
|
|
+-- This module just sets 64 bit mode for the combined RISC-V module.
|
|
+-- All the interesting stuff is there.
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+riscv64 = true -- Using a global is an ugly, but effective solution.
|
|
+return require("dasm_riscv")
|
|
|
|
From 5a704727730307a71d1b5b9df634db76b367fab9 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Tue, 5 Mar 2024 18:03:33 +0800
|
|
Subject: [PATCH 03/22] riscv(interp): add register definition
|
|
|
|
---
|
|
src/vm_riscv64.dasc | 82 +++++++++++++++++++++++++++++++++++++++++++++
|
|
1 file changed, 82 insertions(+)
|
|
create mode 100644 src/vm_riscv64.dasc
|
|
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
new file mode 100644
|
|
index 0000000000..bfe324c2ef
|
|
--- /dev/null
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -0,0 +1,82 @@
|
|
+|// Low-level VM code for RISC-V 64 CPUs.
|
|
+|// Bytecode interpreter, fast functions and helper functions.
|
|
+|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
|
|
+|//
|
|
+|// Contributed by gns from PLCT Lab, ISCAS.
|
|
+|// Sponsored by PLCT Lab, ISCAS.
|
|
+|
|
|
+|.arch riscv64
|
|
+|.section code_op, code_sub
|
|
+|
|
|
+|.actionlist build_actionlist
|
|
+|.globals GLOB_
|
|
+|.globalnames globnames
|
|
+|.externnames extnames
|
|
+|
|
|
+|// Note: The ragged indentation of the instructions is intentional.
|
|
+|// The starting columns indicate data dependencies.
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Fixed register assignments for the interpreter.
|
|
+|// Don't use: x0 = 0, x1 = ra, x2 = sp, x3 = gp, x4 = tp
|
|
+|
|
|
+|
|
|
+|// The following must be C callee-save (but BASE is often refetched).
|
|
+|.define BASE, x18 // Base of current Lua stack frame.
|
|
+|.define KBASE, x19 // Constants of current Lua function.
|
|
+|.define PC, x20 // Next PC.
|
|
+|.define GLREG, x21 // Global state.
|
|
+|.define DISPATCH, x22 // Opcode dispatch table.
|
|
+|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
|
|
+|.define MULTRES, x24 // Size of multi-result: (nresults+1)*8.
|
|
+|
|
|
+|// Constants for type-comparisons, stores and conversions. C callee-save.
|
|
+|.define TISNIL, x8
|
|
+|.define TISNUM, x25
|
|
+|.define TOBIT, f27 // 2^52 + 2^51.
|
|
+|
|
|
+|// The following temporaries are not saved across C calls, except for RA.
|
|
+|.define RA, x9 // Callee-save.
|
|
+|.define RB, x14
|
|
+|.define RC, x15
|
|
+|.define RD, x16
|
|
+|.define INS, x17
|
|
+|
|
|
+|.define TMP0, x6
|
|
+|.define TMP1, x7
|
|
+|.define TMP2, x28
|
|
+|.define TMP3, x29
|
|
+|.define TMP4, x30
|
|
+|
|
|
+|// RISC-V lp64d calling convention.
|
|
+|.define CFUNCADDR, x5
|
|
+|.define CARG1, x10
|
|
+|.define CARG2, x11
|
|
+|.define CARG3, x12
|
|
+|.define CARG4, x13
|
|
+|.define CARG5, x14
|
|
+|.define CARG6, x15
|
|
+|.define CARG7, x16
|
|
+|.define CARG8, x17
|
|
+|
|
|
+|.define CRET1, x10
|
|
+|.define CRET2, x11
|
|
+|
|
|
+|.define FARG1, f10
|
|
+|.define FARG2, f11
|
|
+|.define FARG3, f12
|
|
+|.define FARG4, f13
|
|
+|.define FARG5, f14
|
|
+|.define FARG6, f15
|
|
+|.define FARG7, f16
|
|
+|.define FARG8, f17
|
|
+|
|
|
+|.define FRET1, f10
|
|
+|.define FRET2, f11
|
|
+|
|
|
+|.define FTMP0, f0
|
|
+|.define FTMP1, f1
|
|
+|.define FTMP2, f2
|
|
+|.define FTMP3, f3
|
|
+|.define FTMP4, f4
|
|
|
|
From e3b19286cd1e849eb71fd834247a14d8e0ce2095 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Tue, 5 Mar 2024 18:05:22 +0800
|
|
Subject: [PATCH 04/22] riscv(interp): add frame definition
|
|
|
|
---
|
|
src/lj_frame.h | 9 +++++
|
|
src/vm_riscv64.dasc | 83 +++++++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 92 insertions(+)
|
|
|
|
diff --git a/src/lj_frame.h b/src/lj_frame.h
|
|
index a7e07d8969..06a3d8eebb 100644
|
|
--- a/src/lj_frame.h
|
|
+++ b/src/lj_frame.h
|
|
@@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
|
|
#endif
|
|
#define CFRAME_OFS_MULTRES 0
|
|
#define CFRAME_SHIFT_MULTRES 3
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#define CFRAME_OFS_ERRF 252
|
|
+#define CFRAME_OFS_NRES 248
|
|
+#define CFRAME_OFS_PREV 240
|
|
+#define CFRAME_OFS_L 232
|
|
+#define CFRAME_OFS_PC 224
|
|
+#define CFRAME_OFS_MULTRES 0
|
|
+#define CFRAME_SIZE 256
|
|
+#define CFRAME_SHIFT_MULTRES 3
|
|
#else
|
|
#error "Missing CFRAME_* definitions for this architecture"
|
|
#endif
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index bfe324c2ef..4499c7df36 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -80,3 +80,86 @@
|
|
|.define FTMP2, f2
|
|
|.define FTMP3, f3
|
|
|.define FTMP4, f4
|
|
+|
|
|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
|
|
+|// RISC-V 64 lp64d.
|
|
+|
|
|
+|.define CFRAME_SPACE, 256 // Delta for sp.
|
|
+|
|
|
+|//----- 16 byte aligned, <-- sp entering interpreter
|
|
+|.define SAVE_ERRF, 252 // 32 bit values.
|
|
+|.define SAVE_NRES, 248
|
|
+|.define SAVE_CFRAME, 240 // 64 bit values.
|
|
+|.define SAVE_L, 232
|
|
+|.define SAVE_PC, 224
|
|
+|//----- 16 byte aligned
|
|
+|// Padding 216
|
|
+|.define SAVE_GPR_, 112 // .. 112+13*8: 64 bit GPR saves.
|
|
+|.define SAVE_FPR_, 16 // .. 16+12*8: 64 bit FPR saves.
|
|
+|
|
|
+|
|
|
+|.define TMPD, 0
|
|
+|//----- 16 byte aligned
|
|
+|
|
|
+|.define TMPD_OFS, 0
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|.macro saveregs
|
|
+| addi sp, sp, -CFRAME_SPACE
|
|
+| fsd f27, SAVE_FPR_+11*8(sp)
|
|
+| fsd f26, SAVE_FPR_+10*8(sp)
|
|
+| fsd f25, SAVE_FPR_+9*8(sp)
|
|
+| fsd f24, SAVE_FPR_+8*8(sp)
|
|
+| fsd f23, SAVE_FPR_+7*8(sp)
|
|
+| fsd f22, SAVE_FPR_+6*8(sp)
|
|
+| fsd f21, SAVE_FPR_+5*8(sp)
|
|
+| fsd f20, SAVE_FPR_+4*8(sp)
|
|
+| fsd f19, SAVE_FPR_+3*8(sp)
|
|
+| fsd f18, SAVE_FPR_+2*8(sp)
|
|
+| fsd f9, SAVE_FPR_+1*8(sp)
|
|
+| fsd f8, SAVE_FPR_+0*8(sp)
|
|
+| sd ra, SAVE_GPR_+12*8(sp)
|
|
+| sd x27, SAVE_GPR_+11*8(sp)
|
|
+| sd x26, SAVE_GPR_+10*8(sp)
|
|
+| sd x25, SAVE_GPR_+9*8(sp)
|
|
+| sd x24, SAVE_GPR_+8*8(sp)
|
|
+| sd x23, SAVE_GPR_+7*8(sp)
|
|
+| sd x22, SAVE_GPR_+6*8(sp)
|
|
+| sd x21, SAVE_GPR_+5*8(sp)
|
|
+| sd x20, SAVE_GPR_+4*8(sp)
|
|
+| sd x19, SAVE_GPR_+3*8(sp)
|
|
+| sd x18, SAVE_GPR_+2*8(sp)
|
|
+| sd x9, SAVE_GPR_+1*8(sp)
|
|
+| sd x8, SAVE_GPR_+0*8(sp)
|
|
+|.endmacro
|
|
+|
|
|
+|.macro restoreregs_ret
|
|
+| ld ra, SAVE_GPR_+12*8(sp)
|
|
+| ld x27, SAVE_GPR_+11*8(sp)
|
|
+| ld x26, SAVE_GPR_+10*8(sp)
|
|
+| ld x25, SAVE_GPR_+9*8(sp)
|
|
+| ld x24, SAVE_GPR_+8*8(sp)
|
|
+| ld x23, SAVE_GPR_+7*8(sp)
|
|
+| ld x22, SAVE_GPR_+6*8(sp)
|
|
+| ld x21, SAVE_GPR_+5*8(sp)
|
|
+| ld x20, SAVE_GPR_+4*8(sp)
|
|
+| ld x19, SAVE_GPR_+3*8(sp)
|
|
+| ld x18, SAVE_GPR_+2*8(sp)
|
|
+| ld x9, SAVE_GPR_+1*8(sp)
|
|
+| ld x8, SAVE_GPR_+0*8(sp)
|
|
+| fld f27, SAVE_FPR_+11*8(sp)
|
|
+| fld f26, SAVE_FPR_+10*8(sp)
|
|
+| fld f25, SAVE_FPR_+9*8(sp)
|
|
+| fld f24, SAVE_FPR_+8*8(sp)
|
|
+| fld f23, SAVE_FPR_+7*8(sp)
|
|
+| fld f22, SAVE_FPR_+6*8(sp)
|
|
+| fld f21, SAVE_FPR_+5*8(sp)
|
|
+| fld f20, SAVE_FPR_+4*8(sp)
|
|
+| fld f19, SAVE_FPR_+3*8(sp)
|
|
+| fld f18, SAVE_FPR_+2*8(sp)
|
|
+| fld f9, SAVE_FPR_+1*8(sp)
|
|
+| fld f8, SAVE_FPR_+0*8(sp)
|
|
+| addi sp, sp, CFRAME_SPACE
|
|
+| ret
|
|
+|.endmacro
|
|
|
|
From c27634a26d4b2ec296724c6d9387d6b34c03b93c Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Tue, 5 Mar 2024 18:07:52 +0800
|
|
Subject: [PATCH 05/22] riscv(interp): add helper macros and typedefs
|
|
|
|
---
|
|
src/vm_riscv64.dasc | 353 ++++++++++++++++++++++++++++++++++++++++++++
|
|
1 file changed, 353 insertions(+)
|
|
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index 4499c7df36..f8f338ce89 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -163,3 +163,356 @@
|
|
| addi sp, sp, CFRAME_SPACE
|
|
| ret
|
|
|.endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Pseudo-instruction macros
|
|
+|// Be cautious with local label 9 since we use them here!
|
|
+|.macro bxeq, a, b, tgt
|
|
+| bne a, b, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxne, a, b, tgt
|
|
+| beq a, b, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxlt, a, b, tgt
|
|
+| bge a, b, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxge, a, b, tgt
|
|
+| blt a, b, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxgt, a, b, tgt
|
|
+| bge b, a, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxle, a, b, tgt
|
|
+| blt b, a, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxltu, a, b, tgt
|
|
+| bgeu a, b, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxgeu, a, b, tgt
|
|
+| bltu a, b, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxgtu, a, b, tgt
|
|
+| bgeu b, a, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxleu, a, b, tgt
|
|
+| bltu b, a, >9
|
|
+| j tgt
|
|
+|9:
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxeqz, a, tgt
|
|
+| bxeq a, x0, tgt
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxnez, a, tgt
|
|
+| bxne a, x0, tgt
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxlez, a, tgt
|
|
+| bxge x0, a, tgt
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxgez, a, tgt
|
|
+| bxge a, x0, tgt
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxltz, a, tgt
|
|
+| bxlt a, x0, tgt
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bxgtz, a, tgt
|
|
+| bxlt x0, a, tgt
|
|
+|.endmacro
|
|
+|
|
|
+|.macro lxi, a, b
|
|
+| lui a, (b)&0xfffff
|
|
+| srai a, a, 12
|
|
+|.endmacro
|
|
+|
|
|
+|.macro lzi, a, b
|
|
+| lui a, (b)&0xfffff
|
|
+| srli a, a, 12
|
|
+|.endmacro
|
|
+|
|
|
+|.macro addxi, a, b, c
|
|
+| lui x31, (c)&0xfffff
|
|
+| srai x31, x31, 12
|
|
+| add a, x31, b
|
|
+|.endmacro
|
|
+|
|
|
+|.macro sext.b, a, b
|
|
+| slli a, b, 56
|
|
+| srai a, a, 56
|
|
+|.endmacro
|
|
+|
|
|
+|.macro sext.h, a, b
|
|
+| slli a, b, 48
|
|
+| srai a, a, 48
|
|
+|.endmacro
|
|
+|
|
|
+|.macro zext.h, a, b
|
|
+| slli a, b, 48
|
|
+| srli a, a, 48
|
|
+|.endmacro
|
|
+|
|
|
+|.macro zext.w, a, b
|
|
+| slli a, b, 32
|
|
+| srli a, a, 32
|
|
+|.endmacro
|
|
+|
|
|
+|.macro bfextri, a, b, c, d
|
|
+| slli a, b, (63-c)
|
|
+| srli a, a, (d+63-c)
|
|
+|.endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Type definitions. Some of these are only used for documentation.
|
|
+|.type L, lua_State, LREG
|
|
+|.type GL, global_State, GLREG
|
|
+|.type TVALUE, TValue
|
|
+|.type GCOBJ, GCobj
|
|
+|.type STR, GCstr
|
|
+|.type TAB, GCtab
|
|
+|.type LFUNC, GCfuncL
|
|
+|.type CFUNC, GCfuncC
|
|
+|.type PROTO, GCproto
|
|
+|.type UPVAL, GCupval
|
|
+|.type NODE, Node
|
|
+|.type NARGS8, int
|
|
+|.type TRACE, GCtrace
|
|
+|.type SBUF, SBuf
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Trap for not-yet-implemented parts.
|
|
+|.macro NYI; .long 0x00100073; .endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Access to frame relative to BASE.
|
|
+|.define FRAME_PC, -8
|
|
+|.define FRAME_FUNC, -16
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Endian-specific defines. RISC-V only has little endian ABI for now.
|
|
+|.define OFS_RD, 2
|
|
+|.define OFS_RA, 1
|
|
+|.define OFS_OP, 0
|
|
+|
|
|
+|// Instruction decode.
|
|
+|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
|
|
+|.macro decode_BC4b, dst; slliw dst, dst, 2; .endmacro
|
|
+|.macro decode_BC8b, dst; slliw dst, dst, 3; .endmacro
|
|
+|.macro decode_RX8b, dst; andi dst, dst, 0x7f8; .endmacro
|
|
+|
|
|
+|.macro decode_OP8a, dst, ins; decode_OP1 dst, ins; .endmacro
|
|
+|.macro decode_OP8b, dst; decode_BC8b dst; .endmacro
|
|
+|.macro decode_RA8a, dst, ins; srliw dst, ins, 5; .endmacro
|
|
+|.macro decode_RA8b, dst; decode_RX8b dst; .endmacro
|
|
+|.macro decode_RB8a, dst, ins; srliw dst, ins, 21; .endmacro
|
|
+|.macro decode_RB8b, dst; decode_RX8b dst; .endmacro
|
|
+|.macro decode_RC8a, dst, ins; srliw dst, ins, 13; .endmacro
|
|
+|.macro decode_RC8b, dst; decode_RX8b dst; .endmacro
|
|
+|.macro decode_RD8a, dst, ins; srliw dst, ins, 16; .endmacro
|
|
+|.macro decode_RD4b, dst; decode_BC4b dst; .endmacro
|
|
+|.macro decode_RD8b, dst; decode_BC8b dst; .endmacro
|
|
+|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
|
|
+|
|
|
+|.macro decode_OP8, dst, ins; decode_OP1 dst, ins; decode_BC8b dst; .endmacro
|
|
+|.macro decode_RA8, dst, ins; decode_RA8a dst, ins; decode_RA8b dst; .endmacro
|
|
+|.macro decode_RB8, dst, ins; decode_RB8a dst, ins; decode_RB8b dst; .endmacro
|
|
+|.macro decode_RC8, dst, ins; decode_RC8a dst, ins; decode_RC8b dst; .endmacro
|
|
+|.macro decode_RD8, dst, ins; decode_RD8a dst, ins; decode_RD8b dst; .endmacro
|
|
+|
|
|
+|// Instruction fetch.
|
|
+|.macro ins_NEXT1
|
|
+| lw INS, 0(PC)
|
|
+| addi PC, PC, 4
|
|
+|.endmacro
|
|
+|// Instruction decode+dispatch.
|
|
+|.macro ins_NEXT2
|
|
+| decode_OP8 TMP1, INS
|
|
+| add TMP0, DISPATCH, TMP1
|
|
+| decode_RD8a RD, INS
|
|
+| ld TMP4, 0(TMP0)
|
|
+| decode_RA8a RA, INS
|
|
+| decode_RD8b RD
|
|
+| decode_RA8b RA
|
|
+| jr TMP4
|
|
+|.endmacro
|
|
+|.macro ins_NEXT
|
|
+| ins_NEXT1
|
|
+| ins_NEXT2
|
|
+|.endmacro
|
|
+|
|
|
+|// Instruction footer.
|
|
+|.if 1
|
|
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
|
|
+| .define ins_next, ins_NEXT
|
|
+| .define ins_next_, ins_NEXT
|
|
+| .define ins_next1, ins_NEXT1
|
|
+| .define ins_next2, ins_NEXT2
|
|
+|.else
|
|
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
|
|
+| // Affects only certain kinds of benchmarks (and only with -j off).
|
|
+| .macro ins_next
|
|
+| j ->ins_next
|
|
+| .endmacro
|
|
+| .macro ins_next1
|
|
+| .endmacro
|
|
+| .macro ins_next2
|
|
+| j ->ins_next
|
|
+| .endmacro
|
|
+| .macro ins_next_
|
|
+| ->ins_next:
|
|
+| ins_NEXT
|
|
+| .endmacro
|
|
+|.endif
|
|
+|
|
|
+|// Call decode and dispatch.
|
|
+|.macro ins_callt
|
|
+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
|
|
+| ld PC, LFUNC:RB->pc
|
|
+| lw INS, 0(PC)
|
|
+| addi PC, PC, 4
|
|
+| decode_OP8 TMP1, INS
|
|
+| decode_RA8 RA, INS
|
|
+| add TMP0, DISPATCH, TMP1
|
|
+| ld TMP0, 0(TMP0)
|
|
+| add RA, RA, BASE
|
|
+| jr TMP0
|
|
+|.endmacro
|
|
+|
|
|
+|.macro ins_call
|
|
+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
|
|
+| sd PC, FRAME_PC(BASE)
|
|
+| ins_callt
|
|
+|.endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|.macro branch_RD
|
|
+| srliw TMP0, RD, 1
|
|
+| lui TMP4, (-(BCBIAS_J*4 >> 12)) & 0xfffff
|
|
+| addw TMP0, TMP0, TMP4
|
|
+| add PC, PC, TMP0
|
|
+|.endmacro
|
|
+|
|
|
+|// Assumes J is relative to GL. Some J members might be out of range though.
|
|
+#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
|
|
+|
|
|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|
+|
|
|
+|.macro call_intern, curfunc, func
|
|
+|->curfunc .. _pcrel_ .. func:
|
|
+| auipc CFUNCADDR, extern %pcrel_hi(func)
|
|
+| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func)
|
|
+|.endmacro
|
|
+|.macro call_extern, func
|
|
+| call extern func
|
|
+| empty
|
|
+|.endmacro
|
|
+|
|
|
+|// Set current VM state. Uses TMP0.
|
|
+|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
|
|
+|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro
|
|
+|
|
|
+|// Move table write barrier back. Overwrites mark and tmp.
|
|
+|.macro barrierback, tab, mark, tmp, target
|
|
+| ld tmp, GL->gc.grayagain
|
|
+| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
|
|
+| sd tab, GL->gc.grayagain
|
|
+| sb mark, tab->marked
|
|
+| sd tmp, tab->gclist
|
|
+| j target
|
|
+|.endmacro
|
|
+|
|
|
+|// Clear type tag. Isolate lowest 64-17=47 bits of reg.
|
|
+|.macro cleartp, reg; slli reg, reg, 17; srli reg, reg, 17; .endmacro
|
|
+|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro
|
|
+|
|
|
+|// Set type tag: Merge 17 type bits into bits [47, 63] of dst.
|
|
+|.macro settp_a, dst; cleartp dst; .endmacro
|
|
+|.macro settp_a, dst, src; cleartp dst, src; .endmacro
|
|
+|.macro settp_b, dst, tp;
|
|
+| slli x31, tp, 47
|
|
+| or dst, dst, x31
|
|
+|.endmacro
|
|
+|.macro settp_b, dst, src, tp;
|
|
+| slli x31, tp, 47
|
|
+| or dst, src, x31
|
|
+|.endmacro
|
|
+|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro
|
|
+|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro
|
|
+|
|
|
+|// Extract (negative) type tag.
|
|
+|.macro gettp, dst, src; srai dst, src, 47; .endmacro
|
|
+|
|
|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
|
|
+|.macro checktp, reg, tp, target
|
|
+| gettp TMP4, reg
|
|
+| addi TMP4, TMP4, tp
|
|
+| cleartp reg
|
|
+| bxnez TMP4, target
|
|
+|.endmacro
|
|
+|.macro checktp, dst, reg, tp, target
|
|
+| gettp TMP4, reg
|
|
+| addi TMP4, TMP4, tp
|
|
+| cleartp dst, reg
|
|
+| bxnez TMP4, target
|
|
+|.endmacro
|
|
+|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
|
|
+|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
|
|
+|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
|
|
+|.macro checkint, reg, target
|
|
+| gettp TMP4, reg
|
|
+| bxne TMP4, TISNUM, target
|
|
+|.endmacro
|
|
+|.macro checknum, reg, target
|
|
+| gettp TMP4, reg
|
|
+| sltiu TMP4, TMP4, LJ_TISNUM
|
|
+| bxeqz TMP4, target
|
|
+|.endmacro
|
|
+|
|
|
+|.macro mov_false, reg
|
|
+| li reg, 0x001
|
|
+| slli reg, reg, 47
|
|
+| not reg, reg
|
|
+|.endmacro
|
|
+|.macro mov_true, reg
|
|
+| li reg, 0x001
|
|
+| slli reg, reg, 48
|
|
+| not reg, reg
|
|
+|.endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
|
|
From caeefe25d08c45e1339fc59f63ca8b466403c151 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 08:40:08 +0800
|
|
Subject: [PATCH 06/22] riscv(interp): add base assembly interpreter VM
|
|
|
|
---
|
|
src/lj_vm.h | 3 +
|
|
src/lj_vmmath.c | 3 +-
|
|
src/vm_riscv64.dasc | 3584 ++++++++++++++++++++++++++++++++++++++++++-
|
|
3 files changed, 3586 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/src/lj_vm.h b/src/lj_vm.h
|
|
index 9cc42613d3..774944725d 100644
|
|
--- a/src/lj_vm.h
|
|
+++ b/src/lj_vm.h
|
|
@@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
|
|
#if LJ_TARGET_PPC
|
|
void lj_vm_cachesync(void *start, void *end);
|
|
#endif
|
|
+#if LJ_TARGET_RISCV64
|
|
+void lj_vm_fence_rw_rw();
|
|
+#endif
|
|
LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
|
|
#if LJ_HASJIT
|
|
LJ_ASMF double lj_vm_foldfpm(double x, int op);
|
|
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
|
|
index 2c9b96cce4..a31a4adf51 100644
|
|
--- a/src/lj_vmmath.c
|
|
+++ b/src/lj_vmmath.c
|
|
@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op)
|
|
|
|
/* -- Helper functions for generated machine code ------------------------- */
|
|
|
|
-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
|
|
+#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
|
|
+ || LJ_TARGET_RISCV64
|
|
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
|
|
{
|
|
uint32_t y, ua, ub;
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index f8f338ce89..c86f94bddb 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -438,9 +438,11 @@
|
|
| auipc CFUNCADDR, extern %pcrel_hi(func)
|
|
| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func)
|
|
|.endmacro
|
|
-|.macro call_extern, func
|
|
-| call extern func
|
|
-| empty
|
|
+|.macro call_extern, curfunc, func
|
|
+|->curfunc .. _got_pcrel_ .. func:
|
|
+| auipc CFUNCADDR, extern %got_pcrel_hi(func)
|
|
+| ld CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _got_pcrel_ .. func)(CFUNCADDR)
|
|
+| jalr CFUNCADDR
|
|
|.endmacro
|
|
|
|
|
|// Set current VM state. Uses TMP0.
|
|
@@ -516,3 +518,3579 @@
|
|
|.endmacro
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
+
|
|
+/* Generate subroutines used by opcodes and other parts of the VM. */
|
|
+/* The .code_sub section should be last to help static branch prediction. */
|
|
+static void build_subroutines(BuildCtx *ctx)
|
|
+{
|
|
+ |.code_sub
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Return handling ----------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_returnp:
|
|
+ | // See vm_return. Also: TMP2 = previous base.
|
|
+ | andi TMP0, PC, FRAME_P
|
|
+ |
|
|
+ | // Return from pcall or xpcall fast func.
|
|
+ | mov_true TMP1
|
|
+ | bxeqz TMP0, ->cont_dispatch
|
|
+ | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
|
|
+ | mv BASE, TMP2 // Restore caller base.
|
|
+ | // Prepending may overwrite the pcall frame, so do it at the end.
|
|
+ | sd TMP1, -8(RA) // Prepend true to results.
|
|
+ | addi RA, RA, -8
|
|
+ |
|
|
+ |->vm_returnc:
|
|
+ | addiw RD, RD, 8 // RD = (nresults+1)*8.
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | li CRET1, LUA_YIELD
|
|
+ | bxeqz RD, ->vm_unwind_c_eh
|
|
+ | mv MULTRES, RD
|
|
+ | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
|
|
+ |
|
|
+ |->vm_return:
|
|
+ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
|
|
+ | // TMP0 = PC & FRAME_TYPE
|
|
+ | andi TMP2, PC, ~FRAME_TYPEP
|
|
+ | xori TMP0, TMP0, FRAME_C
|
|
+ | sub TMP2, BASE, TMP2 // TMP2 = previous base.
|
|
+ | bxnez TMP0, ->vm_returnp
|
|
+ |
|
|
+ | addiw TMP1, RD, -8
|
|
+ | sd TMP2, L->base
|
|
+ | li_vmstate C
|
|
+ | lw TMP2, SAVE_NRES(sp)
|
|
+ | addi BASE, BASE, -16
|
|
+ | st_vmstate
|
|
+ | slliw TMP2, TMP2, 3
|
|
+ | beqz TMP1, >2
|
|
+ |1:
|
|
+ | addiw TMP1, TMP1, -8
|
|
+ | ld CRET1, 0(RA)
|
|
+ | addi RA, RA, 8
|
|
+ | sd CRET1, 0(BASE)
|
|
+ | addi BASE, BASE, 8
|
|
+ | bnez TMP1, <1
|
|
+ |
|
|
+ |2:
|
|
+ | bne TMP2, RD, >6
|
|
+ |3:
|
|
+ | sd BASE, L->top // Store new top.
|
|
+ |
|
|
+ |->vm_leave_cp:
|
|
+ | ld TMP0, SAVE_CFRAME(sp) // Restore previous C frame.
|
|
+ | mv CRET1, x0 // Ok return status for vm_pcall.
|
|
+ | sd TMP0, L->cframe
|
|
+ |
|
|
+ |->vm_leave_unw:
|
|
+ | restoreregs_ret
|
|
+ |
|
|
+ |6:
|
|
+ | ld TMP1, L->maxstack
|
|
+ | blt TMP2, RD, >7
|
|
+ | // More results wanted. Check stack size and fill up results with nil.
|
|
+ | bge BASE, TMP1, >9
|
|
+ | sd TISNIL, 0(BASE)
|
|
+ | addiw RD, RD, 8
|
|
+ | addi BASE, BASE, 8
|
|
+ | j <2
|
|
+ |
|
|
+ |7: // Less results wanted.
|
|
+ | subw TMP0, RD, TMP2
|
|
+ | sub TMP0, BASE, TMP0 // Either keep top or shrink it.
|
|
+ | beqz TMP2, >8
|
|
+ | mv BASE, TMP0 // LUA_MULTRET+1 case
|
|
+ |8:
|
|
+ | j <3
|
|
+ |
|
|
+ |9: // Corner case: need to grow stack for filling up results.
|
|
+ | // This can happen if:
|
|
+ | // - A C function grows the stack (a lot).
|
|
+ | // - The GC shrinks the stack in between.
|
|
+ | // - A return back from a lua_call() with (high) nresults adjustment.
|
|
+ |
|
|
+ | sd BASE, L->top // Save current top held in BASE (yes).
|
|
+ | mv MULTRES, RD
|
|
+ | srliw CARG2, TMP2, 3
|
|
+ | mv CARG1, L
|
|
+ | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n)
|
|
+ | lw TMP2, SAVE_NRES(sp)
|
|
+ | ld BASE, L->top // Need the (realloced) L->top in BASE.
|
|
+ | mv RD, MULTRES
|
|
+ | slliw TMP2, TMP2, 3
|
|
+ | j <2
|
|
+ |
|
|
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
|
|
+ | // (void *cframe, int errcode)
|
|
+ | mv sp, CARG1
|
|
+ | mv CRET1, CARG2
|
|
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
|
|
+ | ld L, SAVE_L(sp)
|
|
+ | li TMP0, ~LJ_VMST_C
|
|
+ | ld GL, L->glref
|
|
+ | sw TMP0, GL->vmstate
|
|
+ | j ->vm_leave_unw
|
|
+ |
|
|
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
|
|
+ | // (void *cframe)
|
|
+ | andi sp, CARG1, CFRAME_RAWMASK
|
|
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
|
+ | ld L, SAVE_L(sp)
|
|
+ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
|
|
+ | li TISNIL, LJ_TNIL
|
|
+ | li TISNUM, LJ_TISNUM
|
|
+ | ld BASE, L->base
|
|
+ | ld GL, L->glref // Setup pointer to global state.
|
|
+ | slli TMP3, TMP3, 32
|
|
+ | mov_false TMP1
|
|
+ | li_vmstate INTERP
|
|
+ | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame.
|
|
+ | fmv.d.x TOBIT, TMP3
|
|
+ | addi RA, BASE, -8 // Results start at BASE-8.
|
|
+ | addxi DISPATCH, GL, GG_G2DISP
|
|
+ | sd TMP1, -8(BASE) // Prepend false to error message.
|
|
+ | st_vmstate
|
|
+ | li RD, 16 // 2 results: false + error message.
|
|
+ | j ->vm_returnc
|
|
+ |
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Grow stack for calls -----------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_growstack_c: // Grow stack for C function.
|
|
+ | li CARG2, LUA_MINSTACK
|
|
+ | j >2
|
|
+ |
|
|
+ |->vm_growstack_l: // Grow stack for Lua function.
|
|
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
|
|
+ | add RC, BASE, RC
|
|
+ | sub RA, RA, BASE
|
|
+ | sd BASE, L->base
|
|
+ | addi PC, PC, 4 // Must point after first instruction.
|
|
+ | sd RC, L->top
|
|
+ | srliw CARG2, RA, 3
|
|
+ |2:
|
|
+ | // L->base = new base, L->top = top
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv CARG1, L
|
|
+ | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n)
|
|
+ | ld BASE, L->base
|
|
+ | ld RC, L->top
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | sub RC, RC, BASE
|
|
+ | cleartp LFUNC:RB
|
|
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
|
|
+ | ins_callt // Just retry the call.
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Entry points into the assembler VM ---------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_resume: // Setup C frame and resume thread.
|
|
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
|
|
+ | saveregs
|
|
+ | mv L, CARG1
|
|
+ | ld GL, L->glref // Setup pointer to global state.
|
|
+ | mv BASE, CARG2
|
|
+ | lbu TMP1, L->status
|
|
+ | sd L, SAVE_L(sp)
|
|
+ | li PC, FRAME_CP
|
|
+ | addi TMP0, sp, CFRAME_RESUME
|
|
+ | addxi DISPATCH, GL, GG_G2DISP
|
|
+ | sw x0, SAVE_NRES(sp)
|
|
+ | sw x0, SAVE_ERRF(sp)
|
|
+ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
|
|
+ | sd x0, SAVE_CFRAME(sp)
|
|
+ | sd TMP0, L->cframe
|
|
+ | beqz TMP1, >3
|
|
+ |
|
|
+ | // Resume after yield (like a return).
|
|
+ | sd L, GL->cur_L
|
|
+ | mv RA, BASE
|
|
+ | ld BASE, L->base
|
|
+ | ld TMP1, L->top
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
|
|
+ | sub RD, TMP1, BASE
|
|
+ | slli TMP3, TMP3, 32
|
|
+ | sb x0, L->status
|
|
+ | fmv.d.x TOBIT, TMP3
|
|
+ | li_vmstate INTERP
|
|
+ | addi RD, RD, 8
|
|
+ | st_vmstate
|
|
+ | mv MULTRES, RD
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | li TISNIL, LJ_TNIL
|
|
+ | li TISNUM, LJ_TISNUM
|
|
+ | bxeqz TMP0, ->BC_RET_Z
|
|
+ | j ->vm_return
|
|
+ |
|
|
+ |->vm_pcall: // Setup protected C frame and enter VM.
|
|
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
|
|
+ | saveregs
|
|
+ | sw CARG4, SAVE_ERRF(sp)
|
|
+ | li PC, FRAME_CP
|
|
+ | j >1
|
|
+ |
|
|
+ |->vm_call: // Setup C frame and enter VM.
|
|
+ | // (lua_State *L, TValue *base, int nres1)
|
|
+ | saveregs
|
|
+ | li PC, FRAME_C
|
|
+ |
|
|
+ |1: // Entry point for vm_pcall above (PC = ftype).
|
|
+ | ld TMP1, L:CARG1->cframe
|
|
+ | mv L, CARG1
|
|
+ | sw CARG3, SAVE_NRES(sp)
|
|
+ | ld GL, L->glref // Setup pointer to global state.
|
|
+ | sd CARG1, SAVE_L(sp)
|
|
+ | mv BASE, CARG2
|
|
+ | addxi DISPATCH, GL, GG_G2DISP
|
|
+ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
|
|
+ | sd TMP1, SAVE_CFRAME(sp)
|
|
+ | sd sp, L->cframe // Add our C frame to cframe chain.
|
|
+ |
|
|
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
|
|
+ | sd L, GL->cur_L
|
|
+ | ld TMP2, L->base // TMP2 = old base (used in vmeta_call).
|
|
+ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
|
|
+ | ld TMP1, L->top
|
|
+ | slli TMP3, TMP3, 32
|
|
+ | add PC, PC, BASE
|
|
+ | sub NARGS8:RC, TMP1, BASE
|
|
+ | li TISNUM, LJ_TISNUM
|
|
+ | sub PC, PC, TMP2 // PC = frame delta + frame type
|
|
+ | fmv.d.x TOBIT, TMP3
|
|
+ | li_vmstate INTERP
|
|
+ | li TISNIL, LJ_TNIL
|
|
+ | st_vmstate
|
|
+ |
|
|
+ |->vm_call_dispatch:
|
|
+ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | checkfunc LFUNC:RB, ->vmeta_call
|
|
+ |
|
|
+ |->vm_call_dispatch_f:
|
|
+ | ins_call
|
|
+ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
|
|
+ |
|
|
+ |->vm_cpcall: // Setup protected C frame, call C.
|
|
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
|
|
+ | saveregs
|
|
+ | mv L, CARG1
|
|
+ | ld TMP0, L:CARG1->stack
|
|
+ | sd CARG1, SAVE_L(sp)
|
|
+ | ld TMP1, L->top
|
|
+ | ld GL, L->glref // Setup pointer to global state.
|
|
+ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok.
|
|
+ | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
|
|
+ | ld TMP1, L->cframe
|
|
+ | addxi DISPATCH, GL, GG_G2DISP
|
|
+ | sw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame.
|
|
+ | sw x0, SAVE_ERRF(sp) // No error function.
|
|
+ | sd TMP1, SAVE_CFRAME(sp)
|
|
+ | sd sp, L->cframe // Add our C frame to cframe chain.
|
|
+ | sd L, GL->cur_L
|
|
+ | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
|
|
+ | mv BASE, CRET1
|
|
+ | li PC, FRAME_CP
|
|
+ | bnez CRET1, <3 // Else continue with the call.
|
|
+ | j ->vm_leave_cp // No base? Just remove C frame.
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Metamethod handling ------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |//-- Continuation dispatch ----------------------------------------------
|
|
+ |
|
|
+ |->cont_dispatch:
|
|
+ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
|
|
+ | ld TMP0, -32(BASE) // Continuation.
|
|
+ | mv RB, BASE
|
|
+ | mv BASE, TMP2 // Restore caller BASE.
|
|
+ | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
|
|
+ | ld PC, -24(RB) // Restore PC from [cont|PC].
|
|
+ | cleartp LFUNC:TMP1
|
|
+ | add TMP2, RA, RD
|
|
+ | ld TMP1, LFUNC:TMP1->pc
|
|
+ | sd TISNIL, -8(TMP2) // Ensure one valid arg.
|
|
+ | // BASE = base, RA = resultptr, RB = meta base
|
|
+ | ld KBASE, PC2PROTO(k)(TMP1)
|
|
+ | jr TMP0 // Jump to continuation.
|
|
+ |
|
|
+ |->cont_cat: // RA = resultptr, RB = meta base
|
|
+ | lw INS, -4(PC)
|
|
+ | addi CARG2, RB, -32
|
|
+ | ld TMP0, 0(RA)
|
|
+ | decode_RB8 MULTRES, INS
|
|
+ | decode_RA8 RA, INS
|
|
+ | add TMP1, BASE, MULTRES
|
|
+ | sd BASE, L->base
|
|
+ | sub CARG3, CARG2, TMP1
|
|
+ | sd TMP0, 0(CARG2)
|
|
+ | bxne TMP1, CARG2, ->BC_CAT_Z
|
|
+ | add RA, BASE, RA
|
|
+ | sd TMP0, 0(RA)
|
|
+ | j ->cont_nop
|
|
+ |
|
|
+ |//-- Table indexing metamethods -----------------------------------------
|
|
+ |
|
|
+ |->vmeta_tgets1:
|
|
+ | addi CARG3, GL, offsetof(global_State, tmptv)
|
|
+ | li TMP0, LJ_TSTR
|
|
+ | settp STR:RC, TMP0
|
|
+ | sd STR:RC, 0(CARG3)
|
|
+ | j >1
|
|
+ |
|
|
+ |->vmeta_tgets:
|
|
+ | addi CARG2, GL, offsetof(global_State, tmptv)
|
|
+ | addi CARG3, GL, offsetof(global_State, tmptv2)
|
|
+ | li TMP0, LJ_TTAB
|
|
+ | li TMP1, LJ_TSTR
|
|
+ | settp TAB:RB, TMP0
|
|
+ | settp STR:RC, TMP1
|
|
+ | sd TAB:RB, 0(CARG2)
|
|
+ | sd STR:RC, 0(CARG3)
|
|
+ | j >1
|
|
+ |
|
|
+ |->vmeta_tgetb: // TMP0 = index
|
|
+ | addi CARG3, GL, offsetof(global_State, tmptv)
|
|
+ | settp TMP0, TISNUM
|
|
+ | sd TMP0, 0(CARG3)
|
|
+ |
|
|
+ |->vmeta_tgetv:
|
|
+ |1:
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | // (lua_State *L, TValue *o, TValue *k)
|
|
+ | call_intern vmeta_tgetv, lj_meta_tget
|
|
+ | // Returns TValue * (finished) or NULL (metamethod).
|
|
+ | beqz CRET1, >3
|
|
+ | ld TMP0, 0(CRET1)
|
|
+ | ins_next1
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next2
|
|
+ |
|
|
+ |3: // Call __index metamethod.
|
|
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
|
|
+ | addi TMP1, BASE, -FRAME_CONT
|
|
+ | li NARGS8:RC, 16 // 2 args for func(t, k).
|
|
+ | ld BASE, L->top
|
|
+ | sd PC, -24(BASE) // [cont|PC]
|
|
+ | sub PC, BASE, TMP1
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
|
|
+ | cleartp LFUNC:RB
|
|
+ | j ->vm_call_dispatch_f
|
|
+ |
|
|
+ |->vmeta_tgetr:
|
|
+ | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key)
|
|
+ | // Returns cTValue * or NULL.
|
|
+ | mv TMP1, TISNIL
|
|
+ | bxeqz CRET1, ->BC_TGETR_Z
|
|
+ | ld TMP1, 0(CRET1)
|
|
+ | j ->BC_TGETR_Z
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vmeta_tsets1:
|
|
+ | addi, CARG3, GL, offsetof(global_State, tmptv)
|
|
+ | li TMP0, LJ_TSTR
|
|
+ | settp STR:RC, TMP0
|
|
+ | sd STR:RC, 0(CARG3)
|
|
+ | j >1
|
|
+ |
|
|
+ |->vmeta_tsets:
|
|
+ | addi CARG2, GL, offsetof(global_State, tmptv)
|
|
+ | addi CARG3, GL, offsetof(global_State, tmptv2)
|
|
+ | li TMP0, LJ_TTAB
|
|
+ | li TMP1, LJ_TSTR
|
|
+ | settp TAB:RB, TMP0
|
|
+ | settp STR:RC, TMP1
|
|
+ | sd TAB:RB, 0(CARG2)
|
|
+ | sd STR:RC, 0(CARG3)
|
|
+ | j >1
|
|
+ |
|
|
+ |->vmeta_tsetb: // TMP0 = index
|
|
+ | addi CARG3, GL, offsetof(global_State, tmptv)
|
|
+ | settp TMP0, TISNUM
|
|
+ | sd TMP0, 0(CARG3)
|
|
+ |
|
|
+ |->vmeta_tsetv:
|
|
+ |1:
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | // (lua_State *L, TValue *o, TValue *k)
|
|
+ | call_intern vmeta_tsetv, lj_meta_tset
|
|
+ | // Returns TValue * (finished) or NULL (metamethod).
|
|
+ | ld TMP2, 0(RA)
|
|
+ | beqz CRET1, >3
|
|
+ | ins_next1
|
|
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
|
|
+ | sd TMP2, 0(CRET1)
|
|
+ | ins_next2
|
|
+ |
|
|
+ |3: // Call __newindex metamethod.
|
|
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
|
|
+ | addi TMP1, BASE, -FRAME_CONT
|
|
+ | ld BASE, L->top
|
|
+ | sd PC, -24(BASE) // [cont|PC]
|
|
+ | sub PC, BASE, TMP1
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
|
|
+ | li NARGS8:RC, 24 // 3 args for func(t, k, v)
|
|
+ | cleartp LFUNC:RB
|
|
+ | sd TMP2, 16(BASE) // Copy value to third argument.
|
|
+ | j ->vm_call_dispatch_f
|
|
+ |
|
|
+ |->vmeta_tsetr:
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | // (lua_State *L, GCtab *t, int32_t key)
|
|
+ | call_intern vmeta_tsetr, lj_tab_setinth
|
|
+ | // Returns TValue *.
|
|
+ | j ->BC_TSETR_Z
|
|
+ |
|
|
+ |//-- Comparison metamethods ---------------------------------------------
|
|
+ |
|
|
+ |->vmeta_comp:
|
|
+ | // RA/RD point to o1/o2.
|
|
+ | mv CARG2, RA
|
|
+ | mv CARG3, RD
|
|
+ | addi PC, PC, -4
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | decode_OP1 CARG4, INS
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | // (lua_State *L, TValue *o1, *o2, int op)
|
|
+ | call_intern vmeta_comp, lj_meta_comp
|
|
+ | // Returns 0/1 or TValue * (metamethod).
|
|
+ |3:
|
|
+ | sltiu TMP1, CRET1, 2
|
|
+ | bxeqz TMP1, ->vmeta_binop
|
|
+ | negw TMP2, CRET1
|
|
+ |4:
|
|
+ | lhu RD, OFS_RD(PC)
|
|
+ | addi PC, PC, 4
|
|
+ | lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff
|
|
+ | slliw RD, RD, 2
|
|
+ | addw RD, RD, TMP1
|
|
+ | and RD, RD, TMP2
|
|
+ | add PC, PC, RD
|
|
+ |->cont_nop:
|
|
+ | ins_next
|
|
+ |
|
|
+ |->cont_ra: // RA = resultptr
|
|
+ | lbu TMP1, -4+OFS_RA(PC)
|
|
+ | ld TMP2, 0(RA)
|
|
+ | slliw TMP1, TMP1, 3
|
|
+ | add TMP1, BASE, TMP1
|
|
+ | sd TMP2, 0(TMP1)
|
|
+ | j ->cont_nop
|
|
+ |
|
|
+ |->cont_condt: // RA = resultptr
|
|
+ | ld TMP0, 0(RA)
|
|
+ | gettp TMP0, TMP0
|
|
+ | sltiu TMP1, TMP0, LJ_TISTRUECOND
|
|
+ | negw TMP2, TMP1 // Branch if result is true.
|
|
+ | j <4
|
|
+ |
|
|
+ |->cont_condf: // RA = resultptr
|
|
+ | ld TMP0, 0(RA)
|
|
+ | gettp TMP0, TMP0
|
|
+ | sltiu TMP1, TMP0, LJ_TISTRUECOND
|
|
+ | addiw TMP2, TMP1, -1 // Branch if result is false.
|
|
+ | j <4
|
|
+ |
|
|
+ |->vmeta_equal:
|
|
+ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
|
|
+ | cleartp LFUNC:CARG3, CARG2
|
|
+ | cleartp LFUNC:CARG2, CARG1
|
|
+ | mv CARG4, TMP0
|
|
+ | addi PC, PC, -4
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | // (lua_State *L, GCobj *o1, *o2, int ne)
|
|
+ | call_intern vmeta_equal, lj_meta_equal
|
|
+ | // Returns 0/1 or TValue * (metamethod).
|
|
+ | j <3
|
|
+ |
|
|
+ |->vmeta_istype:
|
|
+ | addi PC, PC, -4
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | srliw CARG2, RA, 3
|
|
+ | srliw CARG3, RD, 3
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | // (lua_State *L, TValue *o, BCReg tp)
|
|
+ | call_intern vmeta_istype, lj_meta_istype
|
|
+ | j ->cont_nop
|
|
+ |
|
|
+ |//-- Arithmetic metamethods ---------------------------------------------
|
|
+ |
|
|
+ |->vmeta_unm:
|
|
+ | mv RC, RB
|
|
+ |
|
|
+ |->vmeta_arith:
|
|
+ | mv CARG1, L
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG2, RA
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv CARG3, RB
|
|
+ | mv CARG4, RC
|
|
+ | decode_OP1 CARG5, INS
|
|
+ | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
|
|
+ | call_intern vmeta_arith, lj_meta_arith
|
|
+ | // Returns NULL (finished) or TValue * (metamethod).
|
|
+ | bxeqz CRET1, ->cont_nop
|
|
+ |
|
|
+ | // Call metamethod for binary op.
|
|
+ |->vmeta_binop:
|
|
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
|
|
+ | sub TMP1, CRET1, BASE
|
|
+ | sd PC, -24(CRET1) // [cont|PC]
|
|
+ | mv TMP2, BASE
|
|
+ | addi PC, TMP1, FRAME_CONT
|
|
+ | mv BASE, CRET1
|
|
+ | li NARGS8:RC, 16 // 2 args for func(o1, o2).
|
|
+ | j ->vm_call_dispatch
|
|
+ |
|
|
+ |->vmeta_len:
|
|
+ | // CARG2 already set by BC_LEN.
|
|
+#if LJ_52
|
|
+ | mv MULTRES, CARG1
|
|
+#endif
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o)
|
|
+ | // Returns NULL (retry) or TValue * (metamethod base).
|
|
+#if LJ_52
|
|
+ | bxnez CRET1, ->vmeta_binop // Binop call for compatibility.
|
|
+ | mv CARG1, MULTRES
|
|
+ | j ->BC_LEN_Z
|
|
+#else
|
|
+ | j ->vmeta_binop // Binop call for compatibility.
|
|
+#endif
|
|
+ |
|
|
+ |//-- Call metamethod ----------------------------------------------------
|
|
+ |
|
|
+ |->vmeta_call: // Resolve and call __call metamethod.
|
|
+ | // TMP2 = old base, BASE = new base, RC = nargs*8
|
|
+ | mv CARG1, L
|
|
+ | sd TMP2, L->base // This is the callers base!
|
|
+ | addi CARG2, BASE, -16
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | add CARG3, BASE, RC
|
|
+ | mv MULTRES, NARGS8:RC
|
|
+ | // (lua_State *L, TValue *func, TValue *top)
|
|
+ | call_intern vmeta_call, lj_meta_call
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
|
|
+ | addi NARGS8:RC, MULTRES, 8 // Got one more argument now.
|
|
+ | cleartp LFUNC:RB
|
|
+ | ins_call
|
|
+ |
|
|
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
|
|
+ | // BASE = old base, RA = new base, RC = nargs*8
|
|
+ | mv CARG1, L
|
|
+ | sd BASE, L->base
|
|
+ | addi CARG2, RA, -16
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | add CARG3, RA, RC
|
|
+ | mv MULTRES, NARGS8:RC
|
|
+ | // (lua_State *L, TValue *func, TValue *top)
|
|
+ | call_intern vmeta_callt, lj_meta_call
|
|
+ | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
|
|
+ | ld TMP1, FRAME_PC(BASE)
|
|
+ | addi NARGS8:RC, MULTRES, 8 // Got one more argument now.
|
|
+ | cleartp LFUNC:CARG3, RB
|
|
+ | j ->BC_CALLT_Z
|
|
+ |
|
|
+ |//-- Argument coercion for 'for' statement ------------------------------
|
|
+ |
|
|
+ |->vmeta_for:
|
|
+ | mv CARG1, L
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG2, RA
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv MULTRES, INS
|
|
+ | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base)
|
|
+ | decode_RA8 RA, MULTRES
|
|
+ | decode_RD8 RD, MULTRES
|
|
+ | j =>BC_FORI
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Fast functions -----------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |.macro .ffunc, name
|
|
+ |->ff_ .. name:
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_1, name
|
|
+ |->ff_ .. name:
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_2, name
|
|
+ |->ff_ .. name:
|
|
+ | sltiu TMP0, NARGS8:RC, 16
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | ld CARG2, 8(BASE)
|
|
+ | bxnez TMP0, ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_n, name
|
|
+ |->ff_ .. name:
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | fld FARG1, 0(BASE)
|
|
+ | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ | checknum CARG1, ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_nn, name
|
|
+ |->ff_ .. name:
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | sltiu TMP0, NARGS8:RC, 16
|
|
+ | ld CARG2, 8(BASE)
|
|
+ | bxnez TMP0, ->fff_fallback
|
|
+ | gettp TMP1, CARG1
|
|
+ | gettp TMP2, CARG2
|
|
+ | sltiu TMP1, TMP1, LJ_TISNUM
|
|
+ | sltiu TMP2, TMP2, LJ_TISNUM
|
|
+ | fld FARG1, 0(BASE)
|
|
+ | and TMP1, TMP1, TMP2
|
|
+ | fld FARG2, 8(BASE)
|
|
+ | bxeqz TMP1, ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |// Inlined GC threshold check.
|
|
+ |.macro ffgccheck
|
|
+ | ld TMP0, GL->gc.total
|
|
+ | ld TMP1, GL->gc.threshold
|
|
+ | bltu TMP0, TMP1, >1
|
|
+ | jal ->fff_gcstep
|
|
+ |1:
|
|
+ |.endmacro
|
|
+ |
|
|
+ |//-- Base library: checks -----------------------------------------------
|
|
+ |.ffunc_1 assert
|
|
+ | gettp TMP1, CARG1
|
|
+ | sltiu TMP1, TMP1, LJ_TISTRUECOND
|
|
+ | addi RA, BASE, -16
|
|
+ | bxeqz TMP1, ->fff_fallback
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | addiw RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
|
|
+ | addi TMP1, BASE, 8
|
|
+ | add TMP2, RA, RD
|
|
+ | sd CARG1, -16(BASE)
|
|
+ | bne BASE, TMP2, >1
|
|
+ | j ->fff_res // Done if exactly 1 argument.
|
|
+ |1:
|
|
+ | ld TMP0, 0(TMP1)
|
|
+ | sd TMP0, -16(TMP1)
|
|
+ | mv TMP3, TMP1
|
|
+ | addi TMP1, TMP1, 8
|
|
+ | bne TMP3, TMP2, <1
|
|
+ | j ->fff_res
|
|
+ |
|
|
+ |.ffunc_1 type
|
|
+ | gettp TMP0, CARG1
|
|
+ | not TMP3, TMP0
|
|
+ | bltu TISNUM, TMP0, >1
|
|
+ | li TMP3, ~LJ_TISNUM
|
|
+ |1:
|
|
+ | slli TMP3, TMP3, 3
|
|
+ | add TMP3, CFUNC:RB, TMP3
|
|
+ | ld CARG1, CFUNC:TMP3->upvalue
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |//-- Base library: getters and setters ---------------------------------
|
|
+ |
|
|
+ |.ffunc_1 getmetatable
|
|
+ | gettp TMP2, CARG1
|
|
+ | addi TMP0, TMP2, -LJ_TTAB
|
|
+ | addi TMP1, TMP2, -LJ_TUDATA
|
|
+ | snez TMP0, TMP0
|
|
+ | neg TMP0, TMP0
|
|
+ | and TMP0, TMP0, TMP1
|
|
+ | cleartp TAB:CARG1
|
|
+ | bnez TMP0, >6
|
|
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
|
|
+ | ld TAB:RB, TAB:CARG1->metatable
|
|
+ |2:
|
|
+ | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
|
|
+ | li CARG1, LJ_TNIL
|
|
+ | bxeqz TAB:RB, ->fff_restv
|
|
+ | lw TMP0, TAB:RB->hmask
|
|
+ | lw TMP1, STR:RC->sid
|
|
+ | ld NODE:TMP2, TAB:RB->node
|
|
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
|
|
+ | slli TMP0, TMP1, 5
|
|
+ | slli TMP1, TMP1, 3
|
|
+ | sub TMP1, TMP0, TMP1
|
|
+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|
|
+ | li CARG4, LJ_TSTR
|
|
+ | settp STR:RC, CARG4 // Tagged key to look for.
|
|
+ |3: // Rearranged logic, because we expect _not_ to find the key.
|
|
+ | ld TMP0, NODE:TMP2->key
|
|
+ | ld CARG1, NODE:TMP2->val
|
|
+ | ld NODE:TMP2, NODE:TMP2->next
|
|
+ | li TMP3, LJ_TTAB
|
|
+ | beq RC, TMP0, >5
|
|
+ | bnez NODE:TMP2, <3
|
|
+ |4:
|
|
+ | settp CARG1, RB, TMP3
|
|
+ | j ->fff_restv // Not found, keep default result.
|
|
+ |5:
|
|
+ | bxne CARG1, TISNIL, ->fff_restv
|
|
+ | j <4 // Ditto for nil value.
|
|
+ |
|
|
+ |6:
|
|
+ | sltiu TMP3, TMP2, LJ_TISNUM
|
|
+ | neg TMP4, TMP3
|
|
+ | xor TMP0, TMP2, TISNUM // TMP2 = TMP3 ? TISNUM : TMP2
|
|
+ | and TMP0, TMP0, TMP4
|
|
+ | xor TMP2, TMP0, TMP2
|
|
+ | slli TMP2, TMP2, 3
|
|
+ | sub TMP0, GL, TMP2
|
|
+ | ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0)
|
|
+ | j <2
|
|
+ |
|
|
+ |.ffunc_2 setmetatable
|
|
+ | // Fast path: no mt for table yet and not clearing the mt.
|
|
+ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
|
|
+ | gettp TMP3, CARG2
|
|
+ | ld TAB:TMP0, TAB:TMP1->metatable
|
|
+ | lbu TMP2, TAB:TMP1->marked
|
|
+ | addi TMP3, TMP3, -LJ_TTAB
|
|
+ | cleartp TAB:CARG2
|
|
+ | or TMP3, TMP3, TAB:TMP0
|
|
+ | bxnez TMP3, ->fff_fallback
|
|
+ | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table)
|
|
+ | sd TAB:CARG2, TAB:TMP1->metatable
|
|
+ | bxeqz TMP3, ->fff_restv
|
|
+ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
|
|
+ |
|
|
+ |.ffunc rawget
|
|
+ | ld CARG2, 0(BASE)
|
|
+ | sltiu TMP0, NARGS8:RC, 16
|
|
+ | gettp TMP1, CARG2
|
|
+ | cleartp CARG2
|
|
+ | addi TMP1, TMP1, -LJ_TTAB
|
|
+ | or TMP0, TMP0, TMP1
|
|
+ | addi CARG3, BASE, 8
|
|
+ | bxnez TMP0, ->fff_fallback
|
|
+ | mv CARG1, L
|
|
+ | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|
|
+ | // Returns cTValue *.
|
|
+ | ld CARG1, 0(CRET1)
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |//-- Base library: conversions ------------------------------------------
|
|
+ |
|
|
+ |.ffunc tonumber
|
|
+ | // Only handles the number case inline (without a base argument).
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | xori TMP0, NARGS8:RC, 8 // Exactly one number argument.
|
|
+ | gettp TMP1, CARG1
|
|
+ | sltu TMP1, TISNUM, TMP1
|
|
+ | or TMP0, TMP0, TMP1
|
|
+ | bxnez TMP0, ->fff_fallback // No args or CARG1 is not number
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |.ffunc_1 tostring
|
|
+ | // Only handles the string or number case inline.
|
|
+ | gettp TMP0, CARG1
|
|
+ | addi TMP1, TMP0, -LJ_TSTR
|
|
+ | // A __tostring method in the string base metatable is ignored.
|
|
+ | bxeqz TMP1, ->fff_restv // String key?
|
|
+ | // Handle numbers inline, unless a number base metatable is present.
|
|
+ | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
|
|
+ | sltu TMP0, TISNUM, TMP0
|
|
+ | sd BASE, L->base // Add frame since C call can throw.
|
|
+ | or TMP0, TMP0, TMP1
|
|
+ | bxnez TMP0, ->fff_fallback
|
|
+ | sd PC, SAVE_PC(sp) // Redundant (but a defined value).
|
|
+ | ffgccheck
|
|
+ | mv CARG1, L
|
|
+ | mv CARG2, BASE
|
|
+ | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o)
|
|
+ | // Returns GCstr *.
|
|
+ | li TMP1, LJ_TSTR
|
|
+ | ld BASE, L->base
|
|
+ | settp CARG1, TMP1
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |//-- Base library: iterators -------------------------------------------
|
|
+ |
|
|
+ |.ffunc_1 next
|
|
+ | checktp CARG1, -LJ_TTAB, ->fff_fallback
|
|
+ | add TMP0, BASE, NARGS8:RC
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil.
|
|
+ | addi CARG2, BASE, 8
|
|
+ | addi CARG3, BASE, -16
|
|
+ | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
|
|
+ | // Returns 1=found, 0=end, -1=error.
|
|
+ | li RD, (2+1)*8
|
|
+ | bxgtz CRET1, ->fff_res // Found key/value.
|
|
+ | mv TMP1, CRET1
|
|
+ | mv CARG1, TISNIL
|
|
+ | bxeqz TMP1, ->fff_restv // End of traversal: return nil.
|
|
+ | ld CFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | li RC, 2*8
|
|
+ | cleartp CFUNC:RB
|
|
+ | j ->fff_fallback // Invalid key.
|
|
+ |
|
|
+ |.ffunc_1 pairs
|
|
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+#if LJ_52
|
|
+ | ld TAB:TMP2, TAB:TMP1->metatable
|
|
+ | ld TMP0, CFUNC:RB->upvalue[0]
|
|
+ | bxnez TAB:TMP2, ->fff_fallback
|
|
+#else
|
|
+ | ld TMP0, CFUNC:RB->upvalue[0]
|
|
+#endif
|
|
+ | sd TISNIL, 0(BASE)
|
|
+ | sd CARG1, -8(BASE)
|
|
+ | sd TMP0, -16(BASE)
|
|
+ | li RD, (3+1)*8
|
|
+ | j ->fff_res
|
|
+ |
|
|
+ |.ffunc_2 ipairs_aux
|
|
+ | checktab CARG1, ->fff_fallback
|
|
+ | checkint CARG2, ->fff_fallback
|
|
+ | lw TMP0, TAB:CARG1->asize
|
|
+ | ld TMP1, TAB:CARG1->array
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | sext.w TMP2, CARG2
|
|
+ | addiw TMP2, TMP2, 1
|
|
+ | sltu TMP3, TMP2, TMP0
|
|
+ | zext.w TMP0, TMP2
|
|
+ | settp_b TMP0, TISNUM
|
|
+ | sd TMP0, -16(BASE)
|
|
+ | beqz TMP3, >2 // Not in array part?
|
|
+ | slli TMP3, TMP2, 3
|
|
+ | add TMP3, TMP1, TMP3
|
|
+ | ld TMP1, 0(TMP3)
|
|
+ |1:
|
|
+ | li RD, (0+1)*8
|
|
+ | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
|
|
+ | sd TMP1, -8(BASE)
|
|
+ | li RD, (2+1)*8
|
|
+ | j ->fff_res
|
|
+ |2: // Check for empty hash part first. Otherwise call C function.
|
|
+ | lw TMP0, TAB:CARG1->hmask
|
|
+ | li RD, (0+1)*8
|
|
+ | bxeqz TMP0, ->fff_res
|
|
+ | mv CARG2, TMP2
|
|
+ | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key)
|
|
+ | // Returns cTValue * or NULL.
|
|
+ | li RD, (0+1)*8
|
|
+ | bxeqz CRET1, ->fff_res
|
|
+ | ld TMP1, 0(CRET1)
|
|
+ | j <1
|
|
+ |
|
|
+ |.ffunc_1 ipairs
|
|
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+#if LJ_52
|
|
+ | ld TAB:TMP2, TAB:TMP1->metatable
|
|
+#endif
|
|
+ | ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
|
|
+#if LJ_52
|
|
+ | bxnez TAB:TMP2, ->fff_fallback
|
|
+#endif
|
|
+ | slli TMP1, TISNUM, 47
|
|
+ | sd CARG1, -8(BASE)
|
|
+ | sd TMP1, 0(BASE)
|
|
+ | sd CFUNC:TMP0, -16(BASE)
|
|
+ | li RD, (3+1)*8
|
|
+ | j ->fff_res
|
|
+ |
|
|
+ |//-- Base library: catch errors ----------------------------------------
|
|
+ |
|
|
+ |.ffunc pcall
|
|
+ | ld TMP1, L->maxstack
|
|
+ | add TMP2, BASE, NARGS8:RC
|
|
+ | bxltu TMP1, TMP2, ->fff_fallback
|
|
+ | addi NARGS8:TMP0, NARGS8:RC, -8
|
|
+ | lbu TMP3, GL->hookmask
|
|
+ | mv TMP2, BASE
|
|
+ | bxltz NARGS8:TMP0, ->fff_fallback
|
|
+ | mv NARGS8:RC, NARGS8:TMP0
|
|
+ | addi BASE, BASE, 16
|
|
+ | // Remember active hook before pcall.
|
|
+ | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
|
|
+ | andi TMP3, TMP3, 1
|
|
+ | addi PC, TMP3, 16+FRAME_PCALL
|
|
+ | bxeqz NARGS8:RC, ->vm_call_dispatch
|
|
+ |1:
|
|
+ | add TMP0, BASE, NARGS8:RC
|
|
+ |2:
|
|
+ | ld TMP1, -16(TMP0)
|
|
+ | sd TMP1, -8(TMP0)
|
|
+ | addi TMP0, TMP0, -8
|
|
+ | bne TMP0, BASE, <2
|
|
+ | j ->vm_call_dispatch
|
|
+ |
|
|
+ |.ffunc xpcall
|
|
+ | ld TMP1, L->maxstack
|
|
+ | add TMP2, BASE, NARGS8:RC
|
|
+ | bxltu TMP1, TMP2, ->fff_fallback
|
|
+ | addi NARGS8:TMP0, NARGS8:RC, -16
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | ld CARG2, 8(BASE)
|
|
+ | lbu TMP1, GL->hookmask
|
|
+ | bxltz NARGS8:TMP0, ->fff_fallback
|
|
+ | gettp TMP2, CARG2
|
|
+ | addi TMP2, TMP2, -LJ_TFUNC
|
|
+ | bxnez TMP2, ->fff_fallback // Traceback must be a function.
|
|
+ | mv TMP2, BASE
|
|
+ | mv NARGS8:RC, NARGS8:TMP0
|
|
+ | addi BASE, BASE, 24
|
|
+ | // Remember active hook before pcall.
|
|
+ | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
|
|
+ | sd CARG2, 0(TMP2) // Swap function and traceback.
|
|
+ | andi TMP3, TMP3, 1
|
|
+ | sd CARG1, 8(TMP2)
|
|
+ | addi PC, TMP3, 24+FRAME_PCALL
|
|
+ | bnez NARGS8:RC, <1
|
|
+ | j ->vm_call_dispatch
|
|
+ |
|
|
+ |//-- Coroutine library --------------------------------------------------
|
|
+ |
|
|
+ |.macro coroutine_resume_wrap, resume
|
|
+ |.if resume
|
|
+ |.ffunc_1 coroutine_resume
|
|
+ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
|
|
+ |.else
|
|
+ |.ffunc coroutine_wrap_aux
|
|
+ | ld L:CARG1, CFUNC:RB->upvalue[0].gcr
|
|
+ | cleartp L:CARG1
|
|
+ |.endif
|
|
+ | lbu TMP0, L:CARG1->status
|
|
+ | ld TMP1, L:CARG1->cframe
|
|
+ | ld CARG2, L:CARG1->top
|
|
+ | ld TMP2, L:CARG1->base
|
|
+ | addiw CARG4, TMP0, -LUA_YIELD
|
|
+ | add CARG3, CARG2, TMP0
|
|
+ | addi TMP3, CARG2, 8
|
|
+ | seqz TMP4, CARG4
|
|
+ | neg TMP4, TMP4
|
|
+ | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3
|
|
+ | and CARG2, CARG2, TMP4
|
|
+ | xor CARG2, TMP3, CARG2
|
|
+ | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD?
|
|
+ | xor TMP2, TMP2, CARG3
|
|
+ | or CARG4, TMP2, TMP0
|
|
+ | bxnez TMP1, ->fff_fallback // cframe != 0?
|
|
+ | ld TMP0, L:CARG1->maxstack
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | bxeqz CARG4, ->fff_fallback // base == top && st == 0?
|
|
+ | add TMP2, CARG2, NARGS8:RC
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow?
|
|
+ |1:
|
|
+ |.if resume
|
|
+ | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
|
|
+ | addi NARGS8:RC, NARGS8:RC, -8
|
|
+ | addi TMP2, TMP2, -8
|
|
+ |.endif
|
|
+ | sd TMP2, L:CARG1->top
|
|
+ | sd BASE, L->top
|
|
+ | add TMP1, BASE, NARGS8:RC
|
|
+ | mv CARG3, CARG2
|
|
+ |2: // Move args to coroutine.
|
|
+ | ld TMP0, 0(BASE)
|
|
+ | sltu TMP3, BASE, TMP1
|
|
+ | addi BASE, BASE, 8
|
|
+ | beqz TMP3, >3
|
|
+ | sd TMP0, 0(CARG3)
|
|
+ | addi CARG3, CARG3, 8
|
|
+ | j <2
|
|
+ |3:
|
|
+ | mv L:RA, L:CARG1
|
|
+ | jal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
|
|
+ | // Returns thread status.
|
|
+ |4:
|
|
+ | ld TMP2, L:RA->base
|
|
+ | sltiu TMP1, CRET1, LUA_YIELD+1
|
|
+ | ld TMP3, L:RA->top
|
|
+ | li_vmstate INTERP
|
|
+ | ld BASE, L->base
|
|
+ | sd L, GL->cur_L
|
|
+ | st_vmstate
|
|
+ | sub RD, TMP3, TMP2
|
|
+ | beqz TMP1, >8
|
|
+ | ld TMP0, L->maxstack
|
|
+ | add TMP1, BASE, RD
|
|
+ | beqz RD, >6 // No results?
|
|
+ | add TMP3, TMP2, RD
|
|
+ | bltu TMP0, TMP1, >9 // Need to grow stack?
|
|
+ | sd TMP2, L:RA->top // Clear coroutine stack.
|
|
+ | mv TMP1, BASE
|
|
+ |5: // Move results from coroutine.
|
|
+ | ld TMP0, 0(TMP2)
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | sd TMP0, 0(TMP1)
|
|
+ | addi TMP1, TMP1, 8
|
|
+ | bltu TMP2, TMP3, <5
|
|
+ |6:
|
|
+ |.if resume
|
|
+ | mov_true TMP1
|
|
+ | addi RD, RD, 16
|
|
+ |7:
|
|
+ | sd TMP1, -8(BASE) // Prepend true/false to results.
|
|
+ | addi RA, BASE, -8
|
|
+ |.else
|
|
+ | mv RA, BASE
|
|
+ | addi RD, RD, 8
|
|
+ |.endif
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv MULTRES, RD
|
|
+ |// bxeqz TMP0, ->BC_RET_Z // Local label 9 in use
|
|
+ | bnez TMP0, >6
|
|
+ | j ->BC_RET_Z
|
|
+ |6:
|
|
+ | j ->vm_return
|
|
+ |
|
|
+ |8: // Coroutine returned with error (at co->top-1).
|
|
+ |.if resume
|
|
+ | addi TMP3, TMP3, -8
|
|
+ | mov_false TMP1
|
|
+ | li RD, (2+1)*8
|
|
+ | ld TMP0, 0(TMP3)
|
|
+ | sd TMP3, L:RA->top // Remove error from coroutine stack.
|
|
+ | sd TMP0, 0(BASE) // Copy error message.
|
|
+ | j <7
|
|
+ |.else
|
|
+ | mv CARG1, L
|
|
+ | mv CARG2, L:RA
|
|
+ | // (lua_State *L, lua_State *co)
|
|
+ | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err
|
|
+ |.endif
|
|
+ |
|
|
+ |9: // Handle stack expansion on return from yield.
|
|
+ | mv CARG1, L
|
|
+ | srliw CARG2, RD, 3
|
|
+ | // (lua_State *L, int n)
|
|
+ |.if resume
|
|
+ | call_intern ff_coroutine_resume, lj_state_growstack
|
|
+ |.else
|
|
+ | call_intern ff_coroutine_wrap_aux, lj_state_growstack
|
|
+ |.endif
|
|
+ | mv CRET1, x0
|
|
+ | j <4
|
|
+ |.endmacro
|
|
+ |
|
|
+ | coroutine_resume_wrap 1 // coroutine.resume
|
|
+ | coroutine_resume_wrap 0 // coroutine.wrap
|
|
+ |
|
|
+ |.ffunc coroutine_yield
|
|
+ | ld TMP0, L->cframe
|
|
+ | add TMP1, BASE, NARGS8:RC
|
|
+ | li CRET1, LUA_YIELD
|
|
+ | sd BASE, L->base
|
|
+ | andi TMP0, TMP0, CFRAME_RESUME
|
|
+ | sd TMP1, L->top
|
|
+ | bxeqz TMP0, ->fff_fallback
|
|
+ | sd x0, L->cframe
|
|
+ | sb CRET1, L->status
|
|
+ | j ->vm_leave_unw
|
|
+ |
|
|
+ |//-- Math library -------------------------------------------------------
|
|
+ |
|
|
+ |.macro math_round, func, rm
|
|
+ |->ff_math_ .. func:
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | gettp TMP0, CARG1
|
|
+ | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ | fmv.d.x FARG1, CARG1
|
|
+ | bxeq TMP0, TISNUM, ->fff_restv
|
|
+ | srli TMP1, CARG1, 52 // Extract exponent (and sign).
|
|
+ | bxgeu TMP0, TISNUM, ->fff_fallback
|
|
+ | andi TMP1, TMP1, 0x7ff // Extract exponent.
|
|
+ | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction
|
|
+ | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN?
|
|
+ | fcvt.l.d TMP3, FARG1, rm
|
|
+ | fcvt.d.l FTMP1, TMP3
|
|
+ | fsgnj.d FRET1, FTMP1, FARG1
|
|
+ | j ->fff_resn
|
|
+ |.endmacro
|
|
+ |
|
|
+ | math_round floor, rdn
|
|
+ | math_round ceil, rup
|
|
+ |
|
|
+ |.ffunc_1 math_abs
|
|
+ | gettp CARG2, CARG1
|
|
+ | addi TMP2, CARG2, -LJ_TISNUM
|
|
+ | sext.w TMP1, CARG1
|
|
+ | bnez TMP2, >1
|
|
+ | sraiw TMP0, TMP1, 31 // Extract sign. int
|
|
+ | xor TMP1, TMP1, TMP0
|
|
+ | sub CARG1, TMP1, TMP0
|
|
+ | slli TMP3, CARG1, 32
|
|
+ | settp CARG1, TISNUM
|
|
+ | bxgez TMP3, ->fff_restv
|
|
+ | lui CARG1, 0x41e00 // 2^31 as a double.
|
|
+ | slli CARG1, CARG1, 32
|
|
+ | j ->fff_restv
|
|
+ |1:
|
|
+ | sltiu TMP2, CARG2, LJ_TISNUM
|
|
+ | slli CARG1, CARG1, 1
|
|
+ | srli CARG1, CARG1, 1
|
|
+ | bxeqz TMP2, ->fff_fallback // int
|
|
+ |// fallthrough
|
|
+ |
|
|
+ |->fff_restv:
|
|
+ | // CARG1 = TValue result.
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | sd CARG1, -16(BASE)
|
|
+ |->fff_res1:
|
|
+ | // RA = results, PC = return.
|
|
+ | li RD, (1+1)*8
|
|
+ |->fff_res:
|
|
+ | // RA = results, RD = (nresults+1)*8, PC = return.
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | mv MULTRES, RD
|
|
+ | addi RA, BASE, -16
|
|
+ | bxnez TMP0, ->vm_return
|
|
+ | lw INS, -4(PC)
|
|
+ | decode_RB8 RB, INS
|
|
+ |5:
|
|
+ | bltu RD, RB, >6 // More results expected?
|
|
+ | decode_RA8a TMP0, INS
|
|
+ | ins_next1
|
|
+ | decode_RA8b TMP0
|
|
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
|
|
+ | sub BASE, RA, TMP0
|
|
+ | ins_next2
|
|
+ |
|
|
+ |6: // Fill up results with nil.
|
|
+ | add TMP1, RA, RD
|
|
+ | addi RD, RD, 8
|
|
+ | sd TISNIL, -8(TMP1)
|
|
+ | j <5
|
|
+ |
|
|
+ |.macro math_extern, func
|
|
+ | .ffunc_n math_ .. func
|
|
+ | call_extern ff_math_extern, func
|
|
+ | j ->fff_resn
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro math_extern2, func
|
|
+ | .ffunc_nn math_ .. func
|
|
+ | call_extern ff_math_extern2, func
|
|
+ | j ->fff_resn
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_n math_sqrt
|
|
+ | fsqrt.d FRET1, FARG1
|
|
+ |->fff_resn:
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | fsd FRET1, -16(BASE)
|
|
+ | j ->fff_res1
|
|
+ |
|
|
+ |.ffunc math_log
|
|
+ | li TMP1, 8
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | fld FARG1, 0(BASE)
|
|
+ | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument.
|
|
+ | checknum CARG1, ->fff_fallback
|
|
+ | call_extern ff_math_log, log
|
|
+ | j ->fff_resn
|
|
+ |
|
|
+ | math_extern log10
|
|
+ | math_extern exp
|
|
+ | math_extern sin
|
|
+ | math_extern cos
|
|
+ | math_extern tan
|
|
+ | math_extern asin
|
|
+ | math_extern acos
|
|
+ | math_extern atan
|
|
+ | math_extern sinh
|
|
+ | math_extern cosh
|
|
+ | math_extern tanh
|
|
+ | math_extern2 pow
|
|
+ | math_extern2 atan2
|
|
+ | math_extern2 fmod
|
|
+ |
|
|
+ |.ffunc_2 math_ldexp
|
|
+ | checknum CARG1, ->fff_fallback
|
|
+ | checkint CARG2, ->fff_fallback
|
|
+ | fld FARG1, 0(BASE)
|
|
+ | lw CARG1, 8(BASE)
|
|
+ | call_extern ff_math_ldexp, ldexp // (double x, int exp)
|
|
+ | j ->fff_resn
|
|
+ |
|
|
+ |.ffunc_n math_frexp
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | addi CARG1, GL, offsetof(global_State, tmptv)
|
|
+ | call_extern ff_math_frexp, frexp
|
|
+ | lw TMP1, GL->tmptv
|
|
+ | fcvt.d.w FARG2, TMP1
|
|
+ | fsd FRET1, -16(BASE)
|
|
+ | fsd FARG2, -8(BASE)
|
|
+ | li RD, (2+1)*8
|
|
+ | j ->fff_res
|
|
+ |
|
|
+ |.ffunc_n math_modf
|
|
+ | addi CARG1, BASE, -16
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | call_extern ff_math_modf, modf
|
|
+ | fsd FRET1, -8(BASE)
|
|
+ | li RD, (2+1)*8
|
|
+ | j ->fff_res
|
|
+ |
|
|
+ |.macro math_minmax, name, ismax
|
|
+ | .ffunc_1 name
|
|
+ | add RB, BASE, NARGS8:RC
|
|
+ | addi RA, BASE, 8
|
|
+ | checkint CARG1, >4
|
|
+ |1: // Handle integers.
|
|
+ | ld CARG2, 0(RA)
|
|
+ | bxeq RA, RB, ->fff_restv
|
|
+ | sext.w CARG1, CARG1
|
|
+ | checkint CARG2, >3
|
|
+ | sext.w CARG2, CARG2
|
|
+ | slt TMP0, CARG1, CARG2
|
|
+ |.if ismax
|
|
+ | addi TMP1, TMP0, -1
|
|
+ |.else
|
|
+ | neg TMP1, TMP0
|
|
+ |.endif
|
|
+ | xor TMP2, CARG1, CARG2 // CARG1 = TMP1 ? CARG1 : CARG2
|
|
+ | and TMP2, TMP2, TMP1
|
|
+ | xor CARG1, CARG2, TMP2
|
|
+ | addi RA, RA, 8
|
|
+ | zext.w CARG1, CARG1
|
|
+ | settp_b CARG1, TISNUM
|
|
+ | j <1
|
|
+ |3: // Convert intermediate result to number and continue below.
|
|
+ | fcvt.d.w FARG1, CARG1
|
|
+ | checknum CARG2, ->fff_fallback
|
|
+ | fld FARG2, 0(RA)
|
|
+ | j >6
|
|
+ |
|
|
+ |4:
|
|
+ | fld FARG1, 0(BASE)
|
|
+ | checknum CARG1, ->fff_fallback
|
|
+ |5: // Handle numbers.
|
|
+ | ld CARG2, 0(RA)
|
|
+ | fld FARG2, 0(RA)
|
|
+ | bxgeu RA, RB, ->fff_resn
|
|
+ | checknum CARG2, >7
|
|
+ |6:
|
|
+ |.if ismax
|
|
+ | flt.d TMP0, FARG2, FARG1
|
|
+ |.else // min
|
|
+ | flt.d TMP0, FARG1, FARG2
|
|
+ |.endif
|
|
+ | bnez TMP0, >8 // skip swap
|
|
+ | fmv.d FARG1, FARG2
|
|
+ |8:
|
|
+ | addi RA, RA, 8
|
|
+ | j <5
|
|
+ |7: // Convert integer to number and continue above.
|
|
+ | checkint CARG2, ->fff_fallback
|
|
+ | fcvt.d.w FARG2, CARG2
|
|
+ | j <6
|
|
+ |.endmacro
|
|
+ |
|
|
+ | math_minmax math_min, 0
|
|
+ | math_minmax math_max, 1
|
|
+ |
|
|
+ |//-- String library -----------------------------------------------------
|
|
+ |
|
|
+ |.ffunc string_byte // Only handle the 1-arg case here.
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | gettp TMP0, CARG1
|
|
+ | xori TMP1, NARGS8:RC, 8
|
|
+ | addi TMP0, TMP0, -LJ_TSTR
|
|
+ | or TMP1, TMP1, TMP0
|
|
+ | cleartp STR:CARG1
|
|
+ | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument.
|
|
+ | lw TMP0, STR:CARG1->len
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | snez RD, TMP0
|
|
+ | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end).
|
|
+ | addiw RD, RD, 1
|
|
+ | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8
|
|
+ | settp_b TMP2, TISNUM
|
|
+ | sd TMP2, -16(BASE)
|
|
+ | j ->fff_res
|
|
+ |
|
|
+ |.ffunc string_char // Only handle the 1-arg case here.
|
|
+ | ffgccheck
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | gettp TMP0, CARG1
|
|
+ | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument.
|
|
+ | addi TMP0, TMP0, -LJ_TISNUM // Integer.
|
|
+ | li TMP2, 255
|
|
+ | sext.w CARG1, CARG1
|
|
+ | or TMP1, TMP1, TMP0
|
|
+ | sltu TMP2, TMP2, CARG1 // !(255 < n).
|
|
+ | or TMP1, TMP1, TMP2
|
|
+ | li CARG3, 1
|
|
+ | bxnez TMP1, ->fff_fallback
|
|
+ | addi CARG2, sp, TMPD_OFS
|
|
+ | sb CARG1, TMPD(sp)
|
|
+ |->fff_newstr:
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv CARG1, L
|
|
+ | // (lua_State *L, const char *str, size_t l)
|
|
+ | call_intern fff_newstr, lj_str_new
|
|
+ | // Returns GCstr *.
|
|
+ | ld BASE, L->base
|
|
+ |->fff_resstr:
|
|
+ | li TMP1, LJ_TSTR
|
|
+ | settp CRET1, TMP1
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |.ffunc string_sub
|
|
+ | ffgccheck
|
|
+ | ld CARG1, 0(BASE)
|
|
+ | ld CARG2, 8(BASE)
|
|
+ | ld CARG3, 16(BASE)
|
|
+ | addi TMP0, NARGS8:RC, -16
|
|
+ | gettp TMP1, CARG1
|
|
+ | bxltz TMP0, ->fff_fallback
|
|
+ | cleartp STR:CARG1, CARG1
|
|
+ | li CARG4, -1
|
|
+ | beqz TMP0, >1
|
|
+ | sext.w CARG4, CARG3
|
|
+ | checkint CARG3, ->fff_fallback
|
|
+ |1:
|
|
+ | checkint CARG2, ->fff_fallback
|
|
+ | addi TMP0, TMP1, -LJ_TSTR
|
|
+ | sext.w CARG3, CARG2
|
|
+ | bxnez TMP0, ->fff_fallback
|
|
+ | lw CARG2, STR:CARG1->len
|
|
+ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
|
|
+ | addiw TMP0, CARG2, 1
|
|
+ | bgez CARG4, >2
|
|
+ | addw CARG4, CARG4, TMP0 // if (end < 0) end += len+1
|
|
+ |2:
|
|
+ | bgez CARG3, >3
|
|
+ | addw CARG3, CARG3, TMP0 // if (start < 0) start += len+1
|
|
+ |3:
|
|
+ | bgez CARG4, >4
|
|
+ | mv CARG4, x0 // if (end < 0) end = 0
|
|
+ |4:
|
|
+ | bgtz CARG3, >5
|
|
+ | li CARG3, 1 // if (start < 1) start = 1
|
|
+ |5:
|
|
+ | ble CARG4, CARG2, >6
|
|
+ | mv CARG4, CARG2 // if (end > len) end = len
|
|
+ |6:
|
|
+ | add CARG2, STR:CARG1, CARG3
|
|
+ | sub CARG3, CARG4, CARG3 // len = end - start
|
|
+ | addi CARG2, CARG2, sizeof(GCstr)-1
|
|
+ | addiw CARG3, CARG3, 1 // len += 1
|
|
+ | bxgez CARG3, ->fff_newstr
|
|
+ |->fff_emptystr: // Return empty string.
|
|
+ | li TMP1, LJ_TSTR
|
|
+ | addi STR:CARG1, GL, offsetof(global_State, strempty)
|
|
+ | settp CARG1, TMP1
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |.macro ffstring_op, name
|
|
+ | .ffunc string_ .. name
|
|
+ | ffgccheck
|
|
+ | ld CARG2, 0(BASE)
|
|
+ | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ | checkstr STR:CARG2, ->fff_fallback
|
|
+ | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf)
|
|
+ | ld TMP0, SBUF:CARG1->b
|
|
+ | sd L, SBUF:CARG1->L
|
|
+ | sd BASE, L->base
|
|
+ | sd TMP0, SBUF:CARG1->w
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | call_intern ff_string_ .. name, lj_buf_putstr_ .. name
|
|
+ | call_intern ff_string_ .. name, lj_buf_tostr // CARG1 = CRET1
|
|
+ | ld BASE, L->base
|
|
+ | j ->fff_resstr
|
|
+ |.endmacro
|
|
+ |
|
|
+ |ffstring_op reverse
|
|
+ |ffstring_op lower
|
|
+ |ffstring_op upper
|
|
+ |
|
|
+ |//-- Bit library --------------------------------------------------------
|
|
+ |
|
|
+ |->vm_tobit_fb:
|
|
+ | fld FARG1, 0(BASE)
|
|
+ | bxeqz TMP1, ->fff_fallback
|
|
+ | fadd.d FARG1, FARG1, TOBIT
|
|
+ | fmv.x.w CRET1, FARG1
|
|
+ | zext.w CRET1, CRET1
|
|
+ | ret
|
|
+ |
|
|
+ |.macro .ffunc_bit, name
|
|
+ | .ffunc_1 bit_..name
|
|
+ | gettp TMP0, CARG1
|
|
+ | zext.w CRET1, CARG1
|
|
+ | beq TMP0, TISNUM, >1
|
|
+ | sltiu TMP1, TMP0, LJ_TISNUM
|
|
+ | jal ->vm_tobit_fb
|
|
+ |1:
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_bit_op, name, bins
|
|
+ | .ffunc_bit name
|
|
+ | addi TMP2, BASE, 8
|
|
+ | add TMP3, BASE, NARGS8:RC
|
|
+ |1:
|
|
+ | ld TMP1, 0(TMP2)
|
|
+ | bxeq TMP2, TMP3, ->fff_resi
|
|
+ | gettp TMP0, TMP1
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | bne TMP0, TISNUM, >2
|
|
+ | zext.w TMP1, TMP1
|
|
+ | bins CRET1, CRET1, TMP1
|
|
+ | j <1
|
|
+ |2:
|
|
+ | fld FARG1, -8(TMP2)
|
|
+ | sltiu TMP0, TMP0, LJ_TISNUM
|
|
+ | fadd.d FARG1, FARG1, TOBIT
|
|
+ | bxeqz TMP0, ->fff_fallback
|
|
+ | fmv.x.w TMP1, FARG1
|
|
+ | zext.w TMP1, TMP1
|
|
+ | bins CRET1, CRET1, TMP1
|
|
+ | j <1
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_bit_op band, and
|
|
+ |.ffunc_bit_op bor, or
|
|
+ |.ffunc_bit_op bxor, xor
|
|
+ |
|
|
+ |.ffunc_bit bswap
|
|
+ | srliw CARG2, CARG1, 8
|
|
+ | lui CARG3, 16
|
|
+ | addiw CARG3, CARG3, -256
|
|
+ | and CARG2, CARG2, CARG3
|
|
+ | srliw CARG3, CARG1, 24
|
|
+ | or CARG2, CARG2, CARG3
|
|
+ | slli CARG3, CARG1, 8
|
|
+ | lui CARG4, 0x00ff0
|
|
+ | and CARG3, CARG3, CARG4
|
|
+ | slli CARG1, CARG1, 24
|
|
+ | or CARG1, CARG1, CARG3
|
|
+ | or CARG1, CARG1, CARG2
|
|
+ | slli CARG1, CARG1, 32
|
|
+ | srli CARG1, CARG1, 32
|
|
+ | j ->fff_resi
|
|
+ |
|
|
+ |.ffunc_bit tobit
|
|
+ |->fff_resi:
|
|
+ | settp CARG1, TISNUM // CARG1 = CRET1
|
|
+ | j ->fff_restv
|
|
+ |
|
|
+ |.ffunc_bit bnot
|
|
+ | not CRET1, CRET1
|
|
+ | zext.w CRET1, CRET1
|
|
+ | j ->fff_resi
|
|
+ |
|
|
+ |.macro .ffunc_bit_sh, name, shins
|
|
+ | .ffunc_2 bit_..name
|
|
+ | gettp TMP0, CARG1
|
|
+ | beq TMP0, TISNUM, >1
|
|
+ | sltiu TMP1, TMP0, LJ_TISNUM
|
|
+ | jal ->vm_tobit_fb
|
|
+ |// mv CARG1, CRET1 // CARG1 = CRET1
|
|
+ |1:
|
|
+ | gettp TMP0, CARG2
|
|
+ | zext.w CARG2, CARG2
|
|
+ | bxne TMP0, TISNUM, ->fff_fallback
|
|
+ | sext.w CARG1, CARG1
|
|
+ | shins CRET1, CARG1, CARG2
|
|
+ | zext.w CRET1, CRET1
|
|
+ | j ->fff_resi
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_bit_sh lshift, sllw
|
|
+ |.ffunc_bit_sh rshift, srlw
|
|
+ |.ffunc_bit_sh arshift, sraw
|
|
+ |
|
|
+ |.macro .ffunc_bit_rot, name, rotinsa, rotinsb
|
|
+ | .ffunc_2 bit_..name
|
|
+ | gettp TMP0, CARG1
|
|
+ | beq TMP0, TISNUM, >1
|
|
+ | sltiu TMP1, TMP0, LJ_TISNUM
|
|
+ | jal ->vm_tobit_fb
|
|
+ |// mv CARG1, CRET1 // CARG1 = CRET1
|
|
+ |1:
|
|
+ | gettp TMP0, CARG2
|
|
+ | zext.w CARG2, CARG2
|
|
+ | bxne TMP0, TISNUM, ->fff_fallback
|
|
+ | sext.w CARG1, CARG1
|
|
+ | neg TMP2, CARG2
|
|
+ | rotinsa TMP1, CARG1, CARG2
|
|
+ | rotinsb TMP0, CARG1, TMP2
|
|
+ | or CRET1, TMP0, TMP1
|
|
+ | zext.w CRET1, CRET1
|
|
+ | j ->fff_resi
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_bit_rot rol, sllw, srlw
|
|
+ |.ffunc_bit_rot ror, srlw, sllw
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->fff_fallback: // Call fast function fallback handler.
|
|
+ | // BASE = new base, RB = CFUNC, RC = nargs*8
|
|
+ | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC.
|
|
+ | ld CARG3, CFUNC:RB->f
|
|
+ | add TMP1, BASE, NARGS8:RC
|
|
+ | sd BASE, L->base
|
|
+ | addi TMP0, TMP1, 8*LUA_MINSTACK
|
|
+ | ld TMP2, L->maxstack
|
|
+ | sd PC, SAVE_PC(sp) // Redundant (but a defined value).
|
|
+ | sd TMP1, L->top
|
|
+ | mv CARG1, L
|
|
+ | bltu TMP2, TMP0, >5 // Need to grow stack.
|
|
+ | jalr CARG3 // (lua_State *L)
|
|
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
|
|
+ | ld BASE, L->base
|
|
+ | slliw RD, CRET1, 3
|
|
+ | bxgtz CRET1, ->fff_res // Returned nresults+1?
|
|
+ |1: // Returned 0 or -1: retry fast path.
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | ld TMP0, L->top
|
|
+ | sub NARGS8:RC, TMP0, BASE
|
|
+ | cleartp LFUNC:RB
|
|
+ | bxnez CRET1, ->vm_call_tail // Returned -1?
|
|
+ | ins_callt // Returned 0: retry fast path.
|
|
+ |
|
|
+ |// Reconstruct previous base for vmeta_call during tailcall.
|
|
+ |->vm_call_tail:
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | andi TMP1, PC, ~FRAME_TYPEP // TODO
|
|
+ | bnez TMP0, >3
|
|
+ | lbu TMP1, OFS_RA(PC)
|
|
+ | slliw TMP1, TMP1, 3
|
|
+ | addiw TMP1, TMP1, 16
|
|
+ |3:
|
|
+ | sub TMP2, BASE, TMP1
|
|
+ | j ->vm_call_dispatch // Resolve again for tailcall.
|
|
+ |
|
|
+ |5: // Grow stack for fallback handler.
|
|
+ | li CARG2, LUA_MINSTACK
|
|
+ | mv CARG1, L
|
|
+ | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n)
|
|
+ | ld BASE, L->base
|
|
+ | mv CRET1, x0 // Set zero-flag to force retry.
|
|
+ | j <1
|
|
+ |
|
|
+ |->fff_gcstep: // Call GC step function.
|
|
+ | // BASE = new base, RC = nargs*8
|
|
+ | mv MULTRES, ra
|
|
+ | add TMP0, BASE, NARGS8:RC // Calculate L->top.
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp) // Redundant (but a defined value).
|
|
+ | mv CARG1, L
|
|
+ | sd TMP0, L->top
|
|
+ | call_intern fff_gc_step, lj_gc_step // (lua_State *L)
|
|
+ | ld BASE, L->base
|
|
+ | mv ra, MULTRES // Help return address predictor.
|
|
+ | ld TMP0, L->top
|
|
+ | ld CFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | cleartp CFUNC:RB
|
|
+ | sub NARGS8:RC, TMP0, BASE
|
|
+ | ret
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Special dispatch targets -------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_record: // Dispatch target for recording phase.
|
|
+ |
|
|
+ |->vm_rethook: // Dispatch target for return hooks.
|
|
+ | lbu TMP3, GL->hookmask
|
|
+ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
|
|
+ | beqz TMP1, >1
|
|
+ |5: // Re-dispatch to static ins.
|
|
+ | ld TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
|
|
+ | jr TMP1
|
|
+ |
|
|
+ |->vm_inshook: // Dispatch target for instr/line hooks.
|
|
+ | lbu TMP3, GL->hookmask
|
|
+ | lw TMP2, GL->hookcount
|
|
+ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active?
|
|
+ | bnez TMP1, <5
|
|
+ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
|
|
+ | addiw TMP2, TMP2, -1
|
|
+ | beqz TMP1, <5
|
|
+ | sw TMP2, GL->hookcount
|
|
+ | beqz TMP2, >1
|
|
+ | andi TMP1, TMP3, LUA_MASKLINE
|
|
+ | beqz TMP1, <5
|
|
+ |1:
|
|
+ | sw MULTRES, TMPD(sp)
|
|
+ | mv CARG2, PC
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
|
|
+ | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc)
|
|
+ |3:
|
|
+ | ld BASE, L->base
|
|
+ |4: // Re-dispatch to static ins.
|
|
+ | lw INS, -4(PC)
|
|
+ | decode_OP8 TMP1, INS
|
|
+ | add TMP0, DISPATCH, TMP1
|
|
+ | decode_RD8a RD, INS
|
|
+ | ld TMP1, GG_DISP2STATIC(TMP0)
|
|
+ | decode_RA8 RA, INS
|
|
+ | decode_RD8b RD
|
|
+ | jr TMP1
|
|
+ |
|
|
+ |->cont_hook: // Continue from hook yield.
|
|
+ | addi PC, PC, 4
|
|
+ | lw MULTRES, -24(RB) // Restore MULTRES for *M ins.
|
|
+ | j <4
|
|
+ |
|
|
+ |
|
|
+ |->vm_callhook: // Dispatch target for call hooks.
|
|
+ | mv CARG2, PC
|
|
+ |
|
|
+ |->cont_stitch: // Trace stitching.
|
|
+ |
|
|
+ |->vm_profhook: // Dispatch target for profiler hook.
|
|
+#if LJ_HASPROFILE
|
|
+ | mv CARG1, L
|
|
+ | mv CARG2, PC
|
|
+ | sd BASE, L->base
|
|
+ | sw MULTRES, TMPD(sp)
|
|
+ | // (lua_State *L, const BCIns *pc)
|
|
+ | call_intern vm_profhook, lj_dispatch_profile
|
|
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
|
|
+ | addi PC, PC, -4
|
|
+ | ld BASE, L->base
|
|
+ | j ->cont_nop
|
|
+#endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Math helper functions ----------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |
|
|
+ |// Hard-float round to integer.
|
|
+ |// Modifies TMP0, FARG1, FARG5
|
|
+ |.macro vm_round, rm
|
|
+ | fmv.x.d TMP0, FARG1
|
|
+ | srli TMP0, TMP0, 52 // Extract exponent (and sign).
|
|
+ | andi TMP0, TMP0, 0x7ff // Extract exponent.
|
|
+ | addi TMP0, TMP0, -1075
|
|
+ | bgtz TMP0, >1 // Less than 2^52 / Not NaN?
|
|
+ | fcvt.l.d TMP0, FARG1, rm
|
|
+ | fcvt.d.l FARG5, TMP0
|
|
+ | fsgnj.d FRET1, FARG5, FARG1
|
|
+ |1:
|
|
+ | ret
|
|
+ |.endmacro
|
|
+ |
|
|
+ |
|
|
+ |->vm_floor:
|
|
+ | vm_round rdn
|
|
+ |->vm_ceil:
|
|
+ | vm_round rup
|
|
+ |
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Miscellaneous functions --------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |// void lj_vm_fence_rw_rw()
|
|
+ |->vm_fence_rw_rw:
|
|
+ |.if JIT or FFI
|
|
+ | .long 0x0330000f
|
|
+ | ret
|
|
+ |.endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+}
|
|
+
|
|
+/* Generate the code for a single instruction. */
|
|
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
+{
|
|
+ int vk = 0;
|
|
+ |=>defop:
|
|
+
|
|
+ switch (op) {
|
|
+
|
|
+ /* -- Comparison ops ---------------------------------------------------- */
|
|
+
|
|
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
|
|
+
|
|
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
|
|
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
|
|
+ | add RA, BASE, RA
|
|
+ | add RD, BASE, RD
|
|
+ if (op == BC_ISLT || op == BC_ISGE) {
|
|
+ | ld CARG1, 0(RA)
|
|
+ | ld CARG2, 0(RD)
|
|
+ | gettp CARG3, CARG1
|
|
+ | gettp CARG4, CARG2
|
|
+ } else {
|
|
+ | ld CARG2, 0(RA)
|
|
+ | ld CARG1, 0(RD)
|
|
+ | gettp CARG3, CARG2
|
|
+ | gettp CARG4, CARG1
|
|
+ }
|
|
+ | lhu TMP2, OFS_RD(PC) // TMP2=jump
|
|
+ | addi PC, PC, 4
|
|
+ | bne CARG3, TISNUM, >2
|
|
+ | decode_BC4b TMP2
|
|
+ | bne CARG4, TISNUM, >5
|
|
+ | sext.w CARG1, CARG1
|
|
+ | sext.w CARG2, CARG2
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | slt TMP1, CARG1, CARG2
|
|
+ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
|
|
+ if (op == BC_ISLT || op == BC_ISGT) {
|
|
+ | neg TMP1, TMP1
|
|
+ } else {
|
|
+ | addi TMP1, TMP1, -1
|
|
+ }
|
|
+ | and TMP2, TMP2, TMP1
|
|
+ |1:
|
|
+ | add PC, PC, TMP2
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // RA is not an integer.
|
|
+ | sltiu TMP1, CARG3, LJ_TISNUM
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | bxeqz TMP1, ->vmeta_comp
|
|
+ | sltiu TMP1, CARG4, LJ_TISNUM
|
|
+ | decode_BC4b TMP2
|
|
+ | beqz TMP1, >4
|
|
+ | fmv.d.x FTMP0, CARG1
|
|
+ | fmv.d.x FTMP2, CARG2
|
|
+ |3: // RA and RD are both numbers.
|
|
+ | addw TMP2, TMP2, TMP3
|
|
+ if (op == BC_ISLT) {
|
|
+ | flt.d TMP3, FTMP0, FTMP2
|
|
+ | neg TMP3, TMP3
|
|
+ } else if (op == BC_ISGE) {
|
|
+ | flt.d TMP3, FTMP0, FTMP2
|
|
+ | addi TMP3, TMP3, -1
|
|
+ } else if (op == BC_ISLE) {
|
|
+ | fle.d TMP3, FTMP2, FTMP0
|
|
+ | neg TMP3, TMP3
|
|
+ } else if (op == BC_ISGT) {
|
|
+ | fle.d TMP3, FTMP2, FTMP0
|
|
+ | addi TMP3, TMP3, -1
|
|
+ }
|
|
+ | and TMP2, TMP2, TMP3
|
|
+ | j <1
|
|
+ |
|
|
+ |4: // RA is a number, RD is not a number.
|
|
+ | // RA is a number, RD is an integer. Convert RD to a number.
|
|
+ | bxne CARG4, TISNUM, ->vmeta_comp
|
|
+ if (op == BC_ISLT || op == BC_ISGE) {
|
|
+ | fcvt.d.w FTMP2, CARG2
|
|
+ | fmv.d.x FTMP0, CARG1
|
|
+ } else {
|
|
+ | fcvt.d.w FTMP0, CARG1
|
|
+ | fmv.d.x FTMP2, CARG2
|
|
+ }
|
|
+ | j <3
|
|
+ |
|
|
+ |5: // RA is an integer, RD is not an integer
|
|
+ | sltiu TMP1, CARG4, LJ_TISNUM
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | bxeqz TMP1, ->vmeta_comp
|
|
+ | // RA is an integer, RD is a number. Convert RA to a number.
|
|
+ if (op == BC_ISLT || op == BC_ISGE) {
|
|
+ | fcvt.d.w FTMP0, CARG1
|
|
+ | fmv.d.x FTMP2, CARG2
|
|
+ } else {
|
|
+ | fcvt.d.w FTMP2, CARG2
|
|
+ | fmv.d.x FTMP0, CARG1
|
|
+ }
|
|
+ | j <3
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQV: case BC_ISNEV:
|
|
+ vk = op == BC_ISEQV;
|
|
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
|
|
+ | add RA, BASE, RA
|
|
+ | add RD, BASE, RD
|
|
+ | addi PC, PC, 4
|
|
+ | ld CARG1, 0(RA)
|
|
+ | ld CARG2, 0(RD)
|
|
+ | lhu TMP2, -4+OFS_RD(PC)
|
|
+ | gettp CARG3, CARG1
|
|
+ | gettp CARG4, CARG2
|
|
+ | sltu TMP0, TISNUM, CARG3
|
|
+ | sltu TMP1, TISNUM, CARG4
|
|
+ | or TMP0, TMP0, TMP1
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ if (vk) {
|
|
+ | beqz TMP0, ->BC_ISEQN_Z
|
|
+ } else {
|
|
+ | beqz TMP0, ->BC_ISNEN_Z
|
|
+ }
|
|
+ |// Either or both types are not numbers.
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | decode_BC4b TMP2
|
|
+ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2
|
|
+ | bne CARG1, CARG2, >2
|
|
+ | // Tag and value are equal.
|
|
+ if (vk) {
|
|
+ |->BC_ISEQV_Z:
|
|
+ | add PC, PC, TMP2
|
|
+ }
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // Check if the tags are the same and it's a table or userdata.
|
|
+ | xor TMP3, CARG3, CARG4 // Same type?
|
|
+ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1
|
|
+ | beqz TMP3, >3
|
|
+ | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata
|
|
+ |3:
|
|
+ | cleartp TAB:TMP1, CARG1
|
|
+ if (vk) {
|
|
+ | beqz TMP0, <1
|
|
+ } else {
|
|
+ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
|
|
+ }
|
|
+ | // Different tables or userdatas. Need to check __eq metamethod.
|
|
+ | // Field metatable must be at same offset for GCtab and GCudata!
|
|
+ | ld TAB:TMP3, TAB:TMP1->metatable
|
|
+ if (vk) {
|
|
+ | beqz TAB:TMP3, <1 // No metatable?
|
|
+ | lbu TMP3, TAB:TMP3->nomm
|
|
+ | andi TMP3, TMP3, 1<<MM_eq
|
|
+ | li TMP0, 0 // ne = 0
|
|
+ | bnez TMP3, <1 // Or 'no __eq' flag set?
|
|
+ } else {
|
|
+ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
|
|
+ | lbu TMP3, TAB:TMP3->nomm
|
|
+ | andi TMP3, TMP3, 1<<MM_eq
|
|
+ | li TMP0, 1 // ne = 1
|
|
+ | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
|
|
+ }
|
|
+ | j ->vmeta_equal // Handle __eq metamethod.
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQS: case BC_ISNES:
|
|
+ vk = op == BC_ISEQS;
|
|
+ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
|
|
+ | add RA, BASE, RA
|
|
+ | addi PC, PC, 4
|
|
+ | ld CARG1, 0(RA)
|
|
+ | sub RD, KBASE, RD
|
|
+ | lhu TMP2, -4+OFS_RD(PC)
|
|
+ | ld CARG2, -8(RD) // KBASE-8-str_const*8
|
|
+ | li TMP0, LJ_TSTR
|
|
+ | decode_BC4b TMP2
|
|
+ | settp CARG2, TMP0
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
|
|
+ | addw TMP2, TMP2, TMP3
|
|
+ if (vk) {
|
|
+ | seqz TMP4, TMP0
|
|
+ } else {
|
|
+ | snez TMP4, TMP0
|
|
+ }
|
|
+ | neg TMP4, TMP4
|
|
+ | and TMP2, TMP2, TMP4
|
|
+ | add PC, PC, TMP2
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQN: case BC_ISNEN:
|
|
+ vk = op == BC_ISEQN;
|
|
+ | // RA = src*8, RD = num_const*8, JMP with RD = target
|
|
+ | add RA, BASE, RA
|
|
+ | add RD, KBASE, RD
|
|
+ | ld CARG1, 0(RA)
|
|
+ | ld CARG2, 0(RD)
|
|
+ | lhu TMP2, OFS_RD(PC)
|
|
+ | gettp CARG3, CARG1
|
|
+ | gettp CARG4, CARG2
|
|
+ | addi PC, PC, 4
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ if (vk) {
|
|
+ |->BC_ISEQN_Z:
|
|
+ } else {
|
|
+ |->BC_ISNEN_Z:
|
|
+ }
|
|
+ | decode_BC4b TMP2
|
|
+ | bne CARG3, TISNUM, >4
|
|
+ | addw TMP2, TMP2, TMP3
|
|
+ | bne CARG4, TISNUM, >6
|
|
+ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D
|
|
+ |1:
|
|
+ if (vk) {
|
|
+ | seqz TMP4, TMP0
|
|
+ | neg TMP4, TMP4
|
|
+ | and TMP2, TMP2, TMP4
|
|
+ | add PC, PC, TMP2
|
|
+ |2:
|
|
+ } else {
|
|
+ | snez TMP4, TMP0
|
|
+ | neg TMP4, TMP4
|
|
+ | and TMP2, TMP2, TMP4
|
|
+ |2:
|
|
+ | add PC, PC, TMP2
|
|
+ }
|
|
+ |3:
|
|
+ | ins_next
|
|
+ |
|
|
+ |4: // RA is not an integer.
|
|
+ | addw TMP2, TMP2, TMP3
|
|
+ | bgeu CARG3, TISNUM, <2
|
|
+ | fmv.d.x FTMP0, CARG1
|
|
+ | fmv.d.x FTMP2, CARG2
|
|
+ | bne CARG4, TISNUM, >5
|
|
+ |// RA is a number, RD is an integer.
|
|
+ | fcvt.d.w FTMP2, CARG2
|
|
+ |
|
|
+ |5: // RA and RD are both numbers.
|
|
+ | feq.d TMP0, FTMP0, FTMP2
|
|
+ | seqz TMP0, TMP0
|
|
+ | j <1
|
|
+ |
|
|
+ |6: // RA is an integer, RD is a number.
|
|
+ | bgeu CARG4, TISNUM, <2
|
|
+ | fcvt.d.w FTMP0, CARG1
|
|
+ | fmv.d.x FTMP2, CARG2
|
|
+ | j <5
|
|
+ |
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQP: case BC_ISNEP:
|
|
+ vk = op == BC_ISEQP;
|
|
+ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
|
|
+ | add RA, BASE, RA
|
|
+ | srliw TMP0, RD, 3
|
|
+ | ld TMP1, 0(RA)
|
|
+ | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2
|
|
+ | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target
|
|
+ | gettp TMP1, TMP1
|
|
+ | addi PC, PC, 4
|
|
+ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
|
|
+ | decode_BC4b TMP2
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
|
|
+ if (vk) {
|
|
+ | seqz TMP4, TMP0
|
|
+ } else {
|
|
+ | snez TMP4, TMP0
|
|
+ }
|
|
+ | neg TMP4, TMP4
|
|
+ | and TMP2, TMP2, TMP4
|
|
+ | add PC, PC, TMP2
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Unary test and copy ops ------------------------------------------- */
|
|
+
|
|
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
|
|
+ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
|
|
+ | add RD, BASE, RD
|
|
+ | lhu TMP2, OFS_RD(PC)
|
|
+ | ld TMP0, 0(RD)
|
|
+ | addi PC, PC, 4
|
|
+ | gettp TMP0, TMP0
|
|
+ | add RA, BASE, RA
|
|
+ | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false
|
|
+ | decode_BC4b TMP2
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | ld CRET1, 0(RD)
|
|
+ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2
|
|
+ if (op == BC_IST || op == BC_ISTC) {
|
|
+ | beqz TMP0, >1
|
|
+ if (op == BC_ISTC) {
|
|
+ | sd CRET1, 0(RA)
|
|
+ }
|
|
+ } else {
|
|
+ | bnez TMP0, >1
|
|
+ if (op == BC_ISFC) {
|
|
+ | sd CRET1, 0(RA)
|
|
+ }
|
|
+ }
|
|
+ | add PC, PC, TMP2
|
|
+ |1:
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_ISTYPE:
|
|
+ | // RA = src*8, RD = -type*8
|
|
+ | add TMP0, BASE, RA
|
|
+ | srliw TMP1, RD, 3
|
|
+ | ld TMP0, 0(TMP0)
|
|
+ | gettp TMP0, TMP0
|
|
+ | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0
|
|
+ | bxnez TMP0, ->vmeta_istype
|
|
+ | ins_next
|
|
+ break;
|
|
+ case BC_ISNUM:
|
|
+ | // RA = src*8, RD = -(TISNUM-1)*8
|
|
+ | add TMP0, BASE, RA
|
|
+ | ld TMP0, 0(TMP0)
|
|
+ | checknum TMP0, ->vmeta_istype
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Unary ops --------------------------------------------------------- */
|
|
+
|
|
+ case BC_MOV:
|
|
+ | // RA = dst*8, RD = src*8
|
|
+ | add RD, BASE, RD
|
|
+ | add RA, BASE, RA
|
|
+ | ld TMP0, 0(RD)
|
|
+ | ins_next1
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+ case BC_NOT:
|
|
+ | // RA = dst*8, RD = src*8
|
|
+ | add RD, BASE, RD
|
|
+ | add RA, BASE, RA
|
|
+ | ld TMP0, 0(RD)
|
|
+ | li TMP1, LJ_TTRUE
|
|
+ | ins_next1
|
|
+ | gettp TMP0, TMP0
|
|
+ | sltu TMP0, TMP1, TMP0
|
|
+ | addiw TMP0, TMP0, 1
|
|
+ | slli TMP0, TMP0, 47
|
|
+ | not TMP0, TMP0
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+ case BC_UNM:
|
|
+ | // RA = dst*8, RD = src*8
|
|
+ | add RB, BASE, RD
|
|
+ | add RA, BASE, RA
|
|
+ | ld TMP0, 0(RB)
|
|
+ | lui TMP1, 0x80000
|
|
+ | gettp CARG3, TMP0
|
|
+ | bne CARG3, TISNUM, >1
|
|
+ | negw TMP0, TMP0
|
|
+ | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
|
|
+ | zext.w TMP0, TMP0
|
|
+ | settp_b TMP0, TISNUM
|
|
+ | j >2
|
|
+ |1:
|
|
+ | sltiu TMP3, CARG3, LJ_TISNUM
|
|
+ | slli TMP1, TMP1, 32
|
|
+ | bxeqz TMP3, ->vmeta_unm
|
|
+ | xor TMP0, TMP0, TMP1 // sign => ~sign
|
|
+ |2:
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next
|
|
+ break;
|
|
+ case BC_LEN:
|
|
+ | // RA = dst*8, RD = src*8
|
|
+ | add CARG2, BASE, RD
|
|
+ | ld TMP0, 0(CARG2)
|
|
+ | add RA, BASE, RA
|
|
+ | gettp TMP1, TMP0
|
|
+ | addi TMP2, TMP1, -LJ_TSTR
|
|
+ | cleartp STR:CARG1, TMP0
|
|
+ | bnez TMP2, >2
|
|
+ | lwu CARG1, STR:CARG1->len
|
|
+ |1:
|
|
+ | settp_b CARG1, TISNUM
|
|
+ | sd CARG1, 0(RA)
|
|
+ | ins_next
|
|
+ |2:
|
|
+ | addi TMP2, TMP1, -LJ_TTAB
|
|
+ | bxnez TMP2, ->vmeta_len
|
|
+#if LJ_52
|
|
+ | ld TAB:TMP2, TAB:CARG1->metatable
|
|
+ | bnez TAB:TMP2, >9
|
|
+ |3:
|
|
+#endif
|
|
+ |->BC_LEN_Z:
|
|
+ | call_intern BC_LEN, lj_tab_len // (GCtab *t)
|
|
+ | // Returns uint32_t (but less than 2^31).
|
|
+ | j <1
|
|
+#if LJ_52
|
|
+ |9:
|
|
+ | lbu TMP0, TAB:TMP2->nomm
|
|
+ | andi TMP0, TMP0, 1<<MM_len
|
|
+ | bnez TMP0, <3 // 'no __len' flag set: done.
|
|
+ | j ->vmeta_len
|
|
+#endif
|
|
+ break;
|
|
+
|
|
+ /* -- Binary ops -------------------------------------------------------- */
|
|
+
|
|
+ |.macro fpmod, a, b, c
|
|
+ | fdiv.d FARG1, b, c
|
|
+ | jal ->vm_floor // floor(b/c)
|
|
+ | fmul.d a, FRET1, c
|
|
+ | fsub.d a, b, a // b - floor(b/c)*c
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithpre
|
|
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
|
|
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
|
|
+ ||if (vk == 1) {
|
|
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
|
|
+ | decode_RB8 RC, INS
|
|
+ | decode_RDtoRC8 RB, RD
|
|
+ ||} else {
|
|
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ ||}
|
|
+ ||switch (vk) {
|
|
+ ||case 0: // suffix is VN
|
|
+ | add RB, BASE, RB
|
|
+ | add RC, KBASE, RC
|
|
+ || break;
|
|
+ ||case 1: // suffix is NV
|
|
+ | add RC, BASE, RC
|
|
+ | add RB, KBASE, RB
|
|
+ || break;
|
|
+ ||default: // CAT or suffix is VV
|
|
+ | add RB, BASE, RB
|
|
+ | add RC, BASE, RC
|
|
+ || break;
|
|
+ ||}
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithfp, fpins, itype1, itype2
|
|
+ | fld FTMP0, 0(RB)
|
|
+ | sltu itype1, itype1, TISNUM
|
|
+ | sltu itype2, itype2, TISNUM
|
|
+ | fld FTMP2, 0(RC)
|
|
+ | and itype1, itype1, itype2
|
|
+ | add RA, BASE, RA
|
|
+ | bxeqz itype1, ->vmeta_arith
|
|
+ | fpins FRET1, FTMP0, FTMP2
|
|
+ | ins_next1
|
|
+ | fsd FRET1, 0(RA)
|
|
+ | ins_next2
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithead, itype1, itype2, tval1, tval2
|
|
+ | ld tval1, 0(RB)
|
|
+ | ld tval2, 0(RC)
|
|
+ | // Check for two integers.
|
|
+ | gettp itype1, tval1
|
|
+ | gettp itype2, tval2
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithdn, intins, fpins
|
|
+ | ins_arithpre
|
|
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
|
|
+ | bne TMP0, TISNUM, >1
|
|
+ | bne TMP1, TISNUM, >1
|
|
+ | sext.w CARG3, CARG1
|
|
+ | sext.w CARG4, CARG2
|
|
+ |.if "intins" == "addw"
|
|
+ | intins CRET1, CARG3, CARG4
|
|
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
|
|
+ | xor TMP2, CRET1, CARG4
|
|
+ | and TMP1, TMP1, TMP2
|
|
+ | add RA, BASE, RA
|
|
+ | bxltz TMP1, ->vmeta_arith
|
|
+ |.elif "intins" == "subw"
|
|
+ | intins CRET1, CARG3, CARG4
|
|
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
|
|
+ | xor TMP2, CARG3, CARG4
|
|
+ | and TMP1, TMP1, TMP2
|
|
+ | add RA, BASE, RA
|
|
+ | bxltz TMP1, ->vmeta_arith
|
|
+ |.elif "intins" == "mulw"
|
|
+ | mul TMP2, CARG3, CARG4
|
|
+ | add RA, BASE, RA
|
|
+ | sext.w CRET1, TMP2
|
|
+ | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow.
|
|
+ |.endif
|
|
+ | zext.w CRET1, CRET1
|
|
+ | settp_b CRET1, TISNUM
|
|
+ | sd CRET1, 0(RA)
|
|
+ | ins_next
|
|
+ |1: // Check for two numbers.
|
|
+ | ins_arithfp, fpins, TMP0, TMP1
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithdiv, fpins
|
|
+ | ins_arithpre
|
|
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
|
|
+ | ins_arithfp, fpins, TMP0, TMP1
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithmod, fpins, BC
|
|
+ | ins_arithpre
|
|
+ | ins_arithead TMP0, TMP1, CARG1, CARG2
|
|
+ | bne TMP0, TISNUM, >1
|
|
+ | bne TMP1, TISNUM, >1
|
|
+ | sext.w CARG1, CARG1
|
|
+ | sext.w CARG2, CARG2
|
|
+ | add RA, BASE, RA
|
|
+ | bxeqz CARG2, ->vmeta_arith
|
|
+ | call_intern BC, lj_vm_modi
|
|
+ | zext.w CRET1, CRET1
|
|
+ | settp_b CRET1, TISNUM
|
|
+ | sd CRET1, 0(RA)
|
|
+ | ins_next
|
|
+ |1: // Check for two numbers.
|
|
+ | ins_arithfp, fpins, TMP0, TMP1
|
|
+ |.endmacro
|
|
+
|
|
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
|
|
+ | ins_arithdn addw, fadd.d
|
|
+ break;
|
|
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
|
|
+ | ins_arithdn subw, fsub.d
|
|
+ break;
|
|
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
|
|
+ | ins_arithdn mulw, fmul.d
|
|
+ break;
|
|
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
|
|
+ | ins_arithdiv fdiv.d
|
|
+ break;
|
|
+ case BC_MODVN:
|
|
+ | ins_arithmod fpmod, BC_MODVN
|
|
+ break;
|
|
+ case BC_MODNV:
|
|
+ | ins_arithmod fpmod, BC_MODNV
|
|
+ break;
|
|
+ case BC_MODVV:
|
|
+ | ins_arithmod fpmod, BC_MODVV
|
|
+ break;
|
|
+ case BC_POW:
|
|
+ | ins_arithpre
|
|
+ | ld CARG1, 0(RB)
|
|
+ | ld CARG2, 0(RC)
|
|
+ | gettp TMP0, CARG1
|
|
+ | gettp TMP1, CARG2
|
|
+ | sltiu TMP0, TMP0, LJ_TISNUM
|
|
+ | sltiu TMP1, TMP1, LJ_TISNUM
|
|
+ | and TMP0, TMP0, TMP1
|
|
+ | add RA, BASE, RA
|
|
+ | bxeqz TMP0, ->vmeta_arith
|
|
+ | fld FARG1, 0(RB)
|
|
+ | fld FARG2, 0(RC)
|
|
+ | call_extern BC_POW, pow
|
|
+ | ins_next1
|
|
+ | fsd FRET1, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+
|
|
+ case BC_CAT:
|
|
+ | // RA = dst*8, RB = src_start*8, RC = src_end*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | sub CARG3, RC, RB
|
|
+ | sd BASE, L->base
|
|
+ | add CARG2, BASE, RC
|
|
+ | mv MULTRES, RB
|
|
+ |->BC_CAT_Z:
|
|
+ | srliw CARG3, CARG3, 3
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv CARG1, L
|
|
+ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left)
|
|
+ | // Returns NULL (finished) or TValue * (metamethod).
|
|
+ | ld BASE, L->base
|
|
+ | bxnez CRET1, ->vmeta_binop
|
|
+ | add RB, BASE, MULTRES
|
|
+ | ld TMP0, 0(RB)
|
|
+ | add RA, BASE, RA
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Constant ops ------------------------------------------------------ */
|
|
+
|
|
+ case BC_KSTR:
|
|
+ | // RA = dst*8, RD = str_const*8 (~)
|
|
+ | sub TMP1, KBASE, RD
|
|
+ | li TMP2, LJ_TSTR
|
|
+ | ld TMP0, -8(TMP1) // KBASE-8-str_const*8
|
|
+ | add RA, BASE, RA
|
|
+ | settp TMP0, TMP2
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next
|
|
+ break;
|
|
+ case BC_KCDATA:
|
|
+ break;
|
|
+ case BC_KSHORT:
|
|
+ | // RA = dst*8, RD = int16_literal*8
|
|
+ | sraiw RD, INS, 16
|
|
+ | add RA, BASE, RA
|
|
+ | zext.w RD, RD
|
|
+ | ins_next1
|
|
+ | settp_b RD, TISNUM
|
|
+ | sd RD, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+ case BC_KNUM:
|
|
+ | // RA = dst*8, RD = num_const*8
|
|
+ | add RD, KBASE, RD
|
|
+ | add RA, BASE, RA
|
|
+ | ld TMP0, 0(RD)
|
|
+ | ins_next1
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+ case BC_KPRI:
|
|
+ | // RA = dst*8, RD = primitive_type*8 (~)
|
|
+ | add RA, BASE, RA
|
|
+ | slli TMP0, RD, 44 // 44+3
|
|
+ | not TMP0, TMP0
|
|
+ | ins_next1
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+ case BC_KNIL:
|
|
+ | // RA = base*8, RD = end*8
|
|
+ | add RA, BASE, RA
|
|
+ | sd TISNIL, 0(RA)
|
|
+ | addi RA, RA, 8
|
|
+ | add RD, BASE, RD
|
|
+ |1:
|
|
+ | sd TISNIL, 0(RA)
|
|
+ | slt TMP0, RA, RD
|
|
+ | addi RA, RA, 8
|
|
+ | bnez TMP0, <1
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Upvalue and function ops ------------------------------------------ */
|
|
+
|
|
+ case BC_UGET:
|
|
+ | // RA = dst*8, RD = uvnum*8
|
|
+ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
|
|
+ | add RA, BASE, RA
|
|
+ | cleartp LFUNC:TMP0
|
|
+ | add RD, RD, LFUNC:TMP0
|
|
+ | ld UPVAL:TMP0, LFUNC:RD->uvptr
|
|
+ | ld TMP1, UPVAL:TMP0->v
|
|
+ | ld TMP2, 0(TMP1)
|
|
+ | ins_next1
|
|
+ | sd TMP2, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+ case BC_USETV:
|
|
+ | // RA = uvnum*8, RD = src*8
|
|
+ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
|
|
+ | add RD, BASE, RD
|
|
+ | cleartp LFUNC:TMP0
|
|
+ | add RA, RA, LFUNC:TMP0
|
|
+ | ld UPVAL:TMP0, LFUNC:RA->uvptr
|
|
+ | ld CRET1, 0(RD)
|
|
+ | lbu TMP3, UPVAL:TMP0->marked
|
|
+ | ld CARG2, UPVAL:TMP0->v
|
|
+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
|
|
+ | lbu TMP0, UPVAL:TMP0->closed
|
|
+ | gettp TMP2, CRET1
|
|
+ | sd CRET1, 0(CARG2)
|
|
+ | or TMP3, TMP3, TMP0
|
|
+ | li TMP0, LJ_GC_BLACK|1
|
|
+ | addi TMP2, TMP2, -(LJ_TNUMX+1)
|
|
+ | beq TMP3, TMP0, >2 // Upvalue is closed and black?
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // Check if new value is collectable.
|
|
+ | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
|
|
+ | cleartp GCOBJ:CRET1, CRET1
|
|
+ | beqz TMP0, <1 // tvisgcv(v)
|
|
+ | lbu TMP3, GCOBJ:CRET1->gch.marked
|
|
+ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
|
|
+ | beqz TMP3, <1
|
|
+ | // Crossed a write barrier. Move the barrier forward.
|
|
+ | mv CARG1, GL
|
|
+ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv)
|
|
+ | j <1
|
|
+ break;
|
|
+ case BC_USETS:
|
|
+ | // RA = uvnum*8, RD = str_const*8 (~)
|
|
+ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
|
|
+ | sub TMP1, KBASE, RD
|
|
+ | cleartp LFUNC:TMP0
|
|
+ | add RA, RA, LFUNC:TMP0
|
|
+ | ld UPVAL:TMP0, LFUNC:RA->uvptr
|
|
+ | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
|
|
+ | lbu TMP2, UPVAL:TMP0->marked
|
|
+ | ld CARG2, UPVAL:TMP0->v
|
|
+ | lbu TMP3, STR:TMP1->marked
|
|
+ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv)
|
|
+ | lbu TMP2, UPVAL:TMP0->closed
|
|
+ | li TMP0, LJ_TSTR
|
|
+ | settp TMP1, TMP0
|
|
+ | sd TMP1, 0(CARG2)
|
|
+ | bnez TMP4, >2
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // Check if string is white and ensure upvalue is closed.
|
|
+ | beqz TMP2, <1
|
|
+ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str)
|
|
+ | beqz TMP0, <1
|
|
+ | // Crossed a write barrier. Move the barrier forward.
|
|
+ | mv CARG1, GL
|
|
+ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv)
|
|
+ | j <1
|
|
+ break;
|
|
+ case BC_USETN:
|
|
+ | // RA = uvnum*8, RD = num_const*8
|
|
+ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
|
|
+ | add RD, KBASE, RD
|
|
+ | cleartp LFUNC:TMP0
|
|
+ | add TMP0, RA, LFUNC:TMP0
|
|
+ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr
|
|
+ | ld TMP1, 0(RD)
|
|
+ | ld TMP0, UPVAL:TMP0->v
|
|
+ | sd TMP1, 0(TMP0)
|
|
+ | ins_next
|
|
+ break;
|
|
+ case BC_USETP:
|
|
+ | // RA = uvnum*8, RD = primitive_type*8 (~)
|
|
+ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
|
|
+ | slli TMP2, RD, 44
|
|
+ | cleartp LFUNC:TMP0
|
|
+ | add TMP0, RA, LFUNC:TMP0
|
|
+ | not TMP2, TMP2
|
|
+ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr
|
|
+ | ld TMP1, UPVAL:TMP0->v
|
|
+ | sd TMP2, 0(TMP1)
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_UCLO:
|
|
+ | // RA = level*8, RD = target
|
|
+ | ld TMP2, L->openupval
|
|
+ | branch_RD // Do this first since RD is not saved.
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG1, L
|
|
+ | beqz TMP2, >1
|
|
+ | add CARG2, BASE, RA
|
|
+ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level)
|
|
+ | ld BASE, L->base
|
|
+ |1:
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_FNEW:
|
|
+ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
|
|
+ | sub TMP1, KBASE, RD
|
|
+ | ld CARG3, FRAME_FUNC(BASE)
|
|
+ | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | cleartp CARG3
|
|
+ | mv CARG1, L
|
|
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
|
|
+ | call_intern BC_FNEW, lj_func_newL_gc
|
|
+ | // Returns GCfuncL *.
|
|
+ | li TMP0, LJ_TFUNC
|
|
+ | ld BASE, L->base
|
|
+ | settp CRET1, TMP0
|
|
+ | add RA, BASE, RA
|
|
+ | sd CRET1, 0(RA)
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Table ops --------------------------------------------------------- */
|
|
+
|
|
+ case BC_TNEW:
|
|
+ case BC_TDUP:
|
|
+ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
|
|
+ | ld TMP0, GL->gc.total
|
|
+ | ld TMP1, GL->gc.threshold
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | bgeu TMP0, TMP1, >5
|
|
+ |1:
|
|
+ if (op == BC_TNEW) {
|
|
+ | srliw CARG2, RD, 3
|
|
+ | andi CARG2, CARG2, 0x7ff
|
|
+ | lzi TMP0, 0x801
|
|
+ | addiw TMP2, CARG2, -0x7ff
|
|
+ | srliw CARG3, RD, 14
|
|
+ | seqz TMP3, TMP2
|
|
+ | neg TMP4, TMP3
|
|
+ | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2
|
|
+ | and CARG1, CARG1, TMP4
|
|
+ | xor CARG2, CARG2, CARG1
|
|
+ | mv CARG1, L
|
|
+ | // (lua_State *L, int32_t asize, uint32_t hbits)
|
|
+ | call_intern BC_TNEW, lj_tab_new
|
|
+ | // Returns Table *.
|
|
+ } else {
|
|
+ | sub TMP1, KBASE, RD
|
|
+ | mv CARG1, L
|
|
+ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8
|
|
+ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt)
|
|
+ | // Returns Table *.
|
|
+ }
|
|
+ | li TMP0, LJ_TTAB
|
|
+ | ld BASE, L->base
|
|
+ | ins_next1
|
|
+ | settp CRET1, TMP0
|
|
+ | add RA, BASE, RA
|
|
+ | sd CRET1, 0(RA)
|
|
+ | ins_next2
|
|
+ |5:
|
|
+ | mv MULTRES, RD
|
|
+ | mv CARG1, L
|
|
+ if (op == BC_TNEW) {
|
|
+ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L)
|
|
+ } else {
|
|
+ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L)
|
|
+ }
|
|
+ | mv RD, MULTRES
|
|
+ | j <1
|
|
+ break;
|
|
+
|
|
+ case BC_GGET:
|
|
+ | // RA = dst*8, RD = str_const*8 (~)
|
|
+ case BC_GSET:
|
|
+ | // RA = src*8, RD = str_const*8 (~)
|
|
+ | ld LFUNC:TMP0, FRAME_FUNC(BASE)
|
|
+ | sub TMP1, KBASE, RD
|
|
+ | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8
|
|
+ | cleartp LFUNC:TMP0
|
|
+ | ld TAB:RB, LFUNC:TMP0->env
|
|
+ | add RA, BASE, RA
|
|
+ if (op == BC_GGET) {
|
|
+ | j ->BC_TGETS_Z
|
|
+ } else {
|
|
+ | j ->BC_TSETS_Z
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case BC_TGETV:
|
|
+ | // RA = dst*8, RB = table*8, RC = key*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add CARG2, BASE, RB
|
|
+ | add CARG3, BASE, RC
|
|
+ | ld TAB:RB, 0(CARG2)
|
|
+ | ld TMP2, 0(CARG3)
|
|
+ | add RA, BASE, RA
|
|
+ | checktab TAB:RB, ->vmeta_tgetv
|
|
+ | gettp TMP3, TMP2
|
|
+ | lw TMP0, TAB:RB->asize
|
|
+ | bne TMP3, TISNUM, >5 // Integer key?
|
|
+ | sext.w TMP2, TMP2
|
|
+ | ld TMP1, TAB:RB->array
|
|
+ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part?
|
|
+ | slliw TMP2, TMP2, 3
|
|
+ | add TMP2, TMP1, TMP2
|
|
+ | ld CRET1, 0(TMP2)
|
|
+ | beq CRET1, TISNIL, >2
|
|
+ |1:
|
|
+ | sd CRET1, 0(RA)
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // Check for __index if table value is nil.
|
|
+ | ld TAB:TMP2, TAB:RB->metatable
|
|
+ | beqz TAB:TMP2, <1 // No metatable: done.
|
|
+ | lbu TMP0, TAB:TMP2->nomm
|
|
+ | andi TMP0, TMP0, 1<<MM_index
|
|
+ | bnez TMP0, <1 // 'no __index' flag set: done.
|
|
+ | j ->vmeta_tgetv
|
|
+ |
|
|
+ |5:
|
|
+ | li TMP0, LJ_TSTR
|
|
+ | cleartp RC, TMP2
|
|
+ | bxne TMP3, TMP0, ->vmeta_tgetv // String key?
|
|
+ | j ->BC_TGETS_Z
|
|
+ break;
|
|
+ case BC_TGETS:
|
|
+ | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add CARG2, BASE, RB
|
|
+ | sub CARG3, KBASE, RC
|
|
+ | ld TAB:RB, 0(CARG2)
|
|
+ | add RA, BASE, RA
|
|
+ | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8
|
|
+ | checktab TAB:RB, ->vmeta_tgets1
|
|
+ |->BC_TGETS_Z:
|
|
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
|
|
+ | lw TMP0, TAB:RB->hmask
|
|
+ | lw TMP1, STR:RC->sid
|
|
+ | ld NODE:TMP2, TAB:RB->node
|
|
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
|
|
+ | slliw TMP0, TMP1, 5
|
|
+ | slliw TMP1, TMP1, 3
|
|
+ | subw TMP1, TMP0, TMP1
|
|
+ | li TMP3, LJ_TSTR
|
|
+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|
|
+ | settp STR:RC, TMP3 // Tagged key to look for.
|
|
+ |1:
|
|
+ | ld CARG1, NODE:TMP2->key
|
|
+ | ld CARG2, NODE:TMP2->val
|
|
+ | ld NODE:TMP1, NODE:TMP2->next
|
|
+ | ld TAB:TMP3, TAB:RB->metatable
|
|
+ | bne CARG1, RC, >4
|
|
+ | beq CARG2, TISNIL, >5 // Key found, but nil value?
|
|
+ |3:
|
|
+ | sd CARG2, 0(RA)
|
|
+ | ins_next
|
|
+ |
|
|
+ |4: // Follow hash chain.
|
|
+ | mv NODE:TMP2, NODE:TMP1
|
|
+ | bnez NODE:TMP1, <1
|
|
+ | // End of hash chain: key not found, nil result.
|
|
+ |
|
|
+ |5: // Check for __index if table value is nil.
|
|
+ | mv CARG2, TISNIL
|
|
+ | beqz TAB:TMP3, <3 // No metatable: done.
|
|
+ | lbu TMP0, TAB:TMP3->nomm
|
|
+ | andi TMP0, TMP0, 1<<MM_index
|
|
+ | bnez TMP0, <3 // 'no __index' flag set: done.
|
|
+ | j ->vmeta_tgets
|
|
+ break;
|
|
+ case BC_TGETB:
|
|
+ | // RA = dst*8, RB = table*8, RC = index*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | add CARG2, BASE, RB
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | ld TAB:RB, 0(CARG2)
|
|
+ | add RA, BASE, RA
|
|
+ | srliw TMP0, RC, 3
|
|
+ | checktab TAB:RB, ->vmeta_tgetb
|
|
+ | lw TMP1, TAB:RB->asize
|
|
+ | ld TMP2, TAB:RB->array
|
|
+ | bxgeu TMP0, TMP1, ->vmeta_tgetb
|
|
+ | add RC, TMP2, RC
|
|
+ | ld CRET1, 0(RC)
|
|
+ | beq CRET1, TISNIL, >5
|
|
+ |1:
|
|
+ | sd CRET1, 0(RA)
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Check for __index if table value is nil.
|
|
+ | ld TAB:TMP2, TAB:RB->metatable
|
|
+ | beqz TAB:TMP2, <1 // No metatable: done.
|
|
+ | lbu TMP1, TAB:TMP2->nomm
|
|
+ | andi TMP1, TMP1, 1<<MM_index
|
|
+ | bnez TMP1, <1 // 'no __index' flag set: done.
|
|
+ | j ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
|
|
+ break;
|
|
+ case BC_TGETR:
|
|
+ | // RA = dst*8, RB = table*8, RC = key*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add RB, BASE, RB
|
|
+ | add RC, BASE, RC
|
|
+ | ld TAB:CARG1, 0(RB)
|
|
+ | lw CARG2, 0(RC)
|
|
+ | add RA, BASE, RA
|
|
+ | cleartp TAB:CARG1
|
|
+ | lw TMP0, TAB:CARG1->asize
|
|
+ | ld TMP1, TAB:CARG1->array
|
|
+ | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part?
|
|
+ | slliw TMP2, CARG2, 3
|
|
+ | add TMP3, TMP1, TMP2
|
|
+ | ld TMP1, 0(TMP3)
|
|
+ |->BC_TGETR_Z:
|
|
+ | ins_next1
|
|
+ | sd TMP1, 0(RA)
|
|
+ | ins_next2
|
|
+ break;
|
|
+
|
|
+ case BC_TSETV:
|
|
+ | // RA = src*8, RB = table*8, RC = key*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add CARG2, BASE, RB
|
|
+ | add CARG3, BASE, RC
|
|
+ | ld TAB:RB, 0(CARG2)
|
|
+ | ld TMP2, 0(CARG3)
|
|
+ | add RA, BASE, RA
|
|
+ | checktab TAB:RB, ->vmeta_tsetv
|
|
+ | sext.w RC, TMP2
|
|
+ | checkint TMP2, >5
|
|
+ | lw TMP0, TAB:RB->asize
|
|
+ | ld TMP1, TAB:RB->array
|
|
+ | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part?
|
|
+ | slliw TMP2, RC, 3
|
|
+ | add TMP1, TMP1, TMP2
|
|
+ | lbu TMP3, TAB:RB->marked
|
|
+ | ld TMP0, 0(TMP1)
|
|
+ | ld CRET1, 0(RA)
|
|
+ | beq TMP0, TISNIL, >3
|
|
+ |1:
|
|
+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ | sd CRET1, 0(TMP1)
|
|
+ | bnez TMP2, >7
|
|
+ |2:
|
|
+ | ins_next
|
|
+ |
|
|
+ |3: // Check for __newindex if previous value is nil.
|
|
+ | ld TAB:TMP2, TAB:RB->metatable
|
|
+ | beqz TAB:TMP2, <1 // No metatable: done.
|
|
+ | lbu TMP2, TAB:TMP2->nomm
|
|
+ | andi TMP2, TMP2, 1<<MM_newindex
|
|
+ | bnez TMP2, <1 // 'no __newindex' flag set: done.
|
|
+ | j ->vmeta_tsetv
|
|
+ |5:
|
|
+ | gettp TMP0, TMP2
|
|
+ | addi TMP0, TMP0, -LJ_TSTR
|
|
+ | bxnez TMP0, ->vmeta_tsetv
|
|
+ | cleartp STR:RC, TMP2
|
|
+ | j ->BC_TSETS_Z // String key?
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:RB, TMP3, TMP0, <2
|
|
+ break;
|
|
+ case BC_TSETS:
|
|
+ | // RA = src*8, RB = table*8, RC = str_const*8 (~)
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add CARG2, BASE, RB
|
|
+ | sub CARG3, KBASE, RC
|
|
+ | ld TAB:RB, 0(CARG2)
|
|
+ | ld RC, -8(CARG3) // KBASE-8-str_const*8
|
|
+ | add RA, BASE, RA
|
|
+ | cleartp STR:RC
|
|
+ | checktab TAB:RB, ->vmeta_tsets1
|
|
+ |->BC_TSETS_Z:
|
|
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
|
|
+ | lw TMP0, TAB:RB->hmask
|
|
+ | lw TMP1, STR:RC->sid
|
|
+ | ld NODE:TMP2, TAB:RB->node
|
|
+ | sb x0, TAB:RB->nomm // Clear metamethod cache.
|
|
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
|
|
+ | slliw TMP0, TMP1, 5
|
|
+ | slliw TMP1, TMP1, 3
|
|
+ | subw TMP1, TMP0, TMP1
|
|
+ | li TMP3, LJ_TSTR
|
|
+ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|
|
+ | settp STR:RC, TMP3 // Tagged key to look for.
|
|
+ | fld FTMP0, 0(RA)
|
|
+ |1:
|
|
+ | ld TMP0, NODE:TMP2->key
|
|
+ | ld CARG2, NODE:TMP2->val
|
|
+ | ld NODE:TMP1, NODE:TMP2->next
|
|
+ | lbu TMP3, TAB:RB->marked
|
|
+ | bne TMP0, RC, >5
|
|
+ | ld TAB:TMP0, TAB:RB->metatable
|
|
+ | beq CARG2, TISNIL, >4 // Key found, but nil value?
|
|
+ |2:
|
|
+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ | fsd FTMP0, NODE:TMP2->val
|
|
+ | bnez TMP3, >7
|
|
+ |3:
|
|
+ | ins_next
|
|
+ |
|
|
+ |4: // Check for __newindex if previous value is nil.
|
|
+ | beqz TAB:TMP0, <2 // No metatable: done.
|
|
+ | lbu TMP0, TAB:TMP0->nomm
|
|
+ | andi TMP0, TMP0, 1<<MM_newindex
|
|
+ | bnez TMP0, <2 // 'no __newindex' flag set: done.
|
|
+ | j ->vmeta_tsets
|
|
+ |
|
|
+ |5: // Follow hash chain.
|
|
+ | mv NODE:TMP2, NODE:TMP1
|
|
+ | bnez NODE:TMP1, <1
|
|
+ | // End of hash chain: key not found, add a new one
|
|
+ |
|
|
+ | // But check for __newindex first.
|
|
+ | ld TAB:TMP2, TAB:RB->metatable
|
|
+ | addi CARG3, GL, offsetof(global_State, tmptv)
|
|
+ | beqz TAB:TMP2, >6 // No metatable: continue.
|
|
+ | lbu TMP0, TAB:TMP2->nomm
|
|
+ | andi TMP0, TMP0, 1<<MM_newindex
|
|
+ | bxeqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|
|
+ |6:
|
|
+ | sd RC, 0(CARG3)
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG2, TAB:RB
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv CARG1, L
|
|
+ | // (lua_State *L, GCtab *t, TValue *k)
|
|
+ | call_intern BC_TSETS, lj_tab_newkey
|
|
+ | // Returns TValue *.
|
|
+ | ld BASE, L->base
|
|
+ | fsd FTMP0, 0(CRET1)
|
|
+ | j <3 // No 2nd write barrier needed.
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:RB, TMP3, TMP0, <3
|
|
+ break;
|
|
+ case BC_TSETB:
|
|
+ | // RA = src*8, RB = table*8, RC = index*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add CARG2, BASE, RB
|
|
+ | add RA, BASE, RA
|
|
+ | ld TAB:RB, 0(CARG2)
|
|
+ | srliw TMP0, RC, 3
|
|
+ | checktab RB, ->vmeta_tsetb
|
|
+ | lw TMP1, TAB:RB->asize
|
|
+ | ld TMP2, TAB:RB->array
|
|
+ | bxgeu TMP0, TMP1, ->vmeta_tsetb
|
|
+ | add RC, TMP2, RC
|
|
+ | ld TMP1, 0(RC)
|
|
+ | lbu TMP3, TAB:RB->marked
|
|
+ | beq TMP1, TISNIL, >5
|
|
+ |1:
|
|
+ | ld CRET1, 0(RA)
|
|
+ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ | sd CRET1, 0(RC)
|
|
+ | bnez TMP1, >7
|
|
+ |2:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Check for __newindex if previous value is nil.
|
|
+ | ld TAB:TMP2, TAB:RB->metatable
|
|
+ | beqz TAB:TMP2, <1 // No metatable: done.
|
|
+ | lbu TMP1, TAB:TMP2->nomm
|
|
+ | andi TMP1, TMP1, 1<<MM_newindex
|
|
+ | bnez TMP1, <1 // 'no __newindex' flag set: done.
|
|
+ | j ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:RB, TMP3, TMP0, <2
|
|
+ break;
|
|
+ case BC_TSETR:
|
|
+ | // RA = dst*8, RB = table*8, RC = key*8
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | add CARG1, BASE, RB
|
|
+ | add CARG3, BASE, RC
|
|
+ | ld TAB:CARG2, 0(CARG1)
|
|
+ | lw CARG3, 0(CARG3)
|
|
+ | cleartp TAB:CARG2
|
|
+ | lbu TMP3, TAB:CARG2->marked
|
|
+ | lw TMP0, TAB:CARG2->asize
|
|
+ | ld TMP1, TAB:CARG2->array
|
|
+ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ | add RA, BASE, RA
|
|
+ | bnez TMP2, >7
|
|
+ |2:
|
|
+ | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part?
|
|
+ | slliw TMP2, CARG3, 3
|
|
+ | add CRET1, TMP1, TMP2
|
|
+ |->BC_TSETR_Z:
|
|
+ | ld TMP1, 0(RA)
|
|
+ | ins_next1
|
|
+ | sd TMP1, 0(CRET1)
|
|
+ | ins_next2
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:CARG2, TMP3, CRET1, <2
|
|
+ break;
|
|
+
|
|
+ case BC_TSETM:
|
|
+ | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
|
|
+ | add RA, BASE, RA
|
|
+ |1:
|
|
+ | add TMP3, KBASE, RD
|
|
+ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table.
|
|
+ | addiw TMP0, MULTRES, -8
|
|
+ | lw TMP3, 0(TMP3) // Integer constant is in lo-word.
|
|
+ | srliw CARG3, TMP0, 3
|
|
+ | beqz TMP0, >4 // Nothing to copy?
|
|
+ | cleartp TAB:CARG2
|
|
+ | addw CARG3, CARG3, TMP3
|
|
+ | lw TMP2, TAB:CARG2->asize
|
|
+ | slliw TMP1, TMP3, 3
|
|
+ | lbu TMP3, TAB:CARG2->marked
|
|
+ | ld CARG1, TAB:CARG2->array
|
|
+ | bltu TMP2, CARG3, >5
|
|
+ | add TMP2, RA, TMP0
|
|
+ | add TMP1, TMP1, CARG1
|
|
+ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|
|
+ |3: // Copy result slots to table.
|
|
+ | ld CRET1, 0(RA)
|
|
+ | addi RA, RA, 8
|
|
+ | sd CRET1, 0(TMP1)
|
|
+ | addi TMP1, TMP1, 8
|
|
+ | bltu RA, TMP2, <3
|
|
+ | bnez TMP0, >7
|
|
+ |4:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Need to resize array part.
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | mv BASE, RD
|
|
+ | mv CARG1, L
|
|
+ | // (lua_State *L, GCtab *t, int nasize)
|
|
+ | call_intern BC_TSETM, lj_tab_reasize
|
|
+ | // Must not reallocate the stack.
|
|
+ | mv RD, BASE
|
|
+ | ld BASE, L->base // Reload BASE for lack of a saved register.
|
|
+ | j <1
|
|
+ |
|
|
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
|
|
+ | barrierback TAB:CARG2, TMP3, TMP0, <4
|
|
+ break;
|
|
+
|
|
+ /* -- Calls and vararg handling ----------------------------------------- */
|
|
+
|
|
+ case BC_CALLM:
|
|
+ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
|
|
+ | decode_RDtoRC8 NARGS8:RC, RD
|
|
+ | addw NARGS8:RC, NARGS8:RC, MULTRES
|
|
+ | j ->BC_CALL_Z
|
|
+ break;
|
|
+ case BC_CALL:
|
|
+ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
|
|
+ | decode_RDtoRC8 NARGS8:RC, RD
|
|
+ |->BC_CALL_Z:
|
|
+ | mv TMP2, BASE
|
|
+ | add BASE, BASE, RA
|
|
+ | ld LFUNC:RB, 0(BASE)
|
|
+ | addi BASE, BASE, 16
|
|
+ | addiw NARGS8:RC, NARGS8:RC, -8
|
|
+ | checkfunc RB, ->vmeta_call
|
|
+ | ins_call
|
|
+ break;
|
|
+
|
|
+ case BC_CALLMT:
|
|
+ | // RA = base*8, (RB = 0,) RC = extra_nargs*8
|
|
+ | addw NARGS8:RD, NARGS8:RD, MULTRES
|
|
+ | j ->BC_CALLT_Z1
|
|
+ break;
|
|
+ case BC_CALLT:
|
|
+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
|
|
+ |->BC_CALLT_Z1:
|
|
+ | add RA, BASE, RA
|
|
+ | ld LFUNC:RB, 0(RA)
|
|
+ | mv NARGS8:RC, RD
|
|
+ | ld TMP1, FRAME_PC(BASE)
|
|
+ | addi RA, RA, 16
|
|
+ | addiw NARGS8:RC, NARGS8:RC, -8
|
|
+ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt
|
|
+ |->BC_CALLT_Z:
|
|
+ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
|
|
+ | lbu TMP3, LFUNC:CARG3->ffid
|
|
+ | xori TMP2, TMP1, FRAME_VARG
|
|
+ | bnez TMP0, >7
|
|
+ |1:
|
|
+ | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
|
|
+ | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function?
|
|
+ | mv TMP2, BASE
|
|
+ | mv RB, CARG3
|
|
+ | mv TMP3, NARGS8:RC
|
|
+ | beqz NARGS8:RC, >3
|
|
+ |2:
|
|
+ | ld CRET1, 0(RA)
|
|
+ | addi RA, RA, 8
|
|
+ | addiw TMP3, TMP3, -8
|
|
+ | sd CRET1, 0(TMP2)
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | bnez TMP3, <2
|
|
+ |3:
|
|
+ | or TMP0, TMP0, CARG4
|
|
+ | beqz TMP0, >5
|
|
+ |4:
|
|
+ | ins_callt
|
|
+ |
|
|
+ |5: // Tailcall to a fast function with a Lua frame below.
|
|
+ | lw INS, -4(TMP1)
|
|
+ | decode_RA8 RA, INS
|
|
+ | sub TMP1, BASE, RA
|
|
+ | ld TMP1, -32(TMP1)
|
|
+ | cleartp LFUNC:TMP1
|
|
+ | ld TMP1, LFUNC:TMP1->pc
|
|
+ | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
|
|
+ | j <4
|
|
+ |
|
|
+ |7: // Tailcall from a vararg function.
|
|
+ | andi CARG4, TMP2, FRAME_TYPEP
|
|
+ | sub TMP2, BASE, TMP2 // Relocate BASE down.
|
|
+ | bnez CARG4, <1 // Vararg frame below?
|
|
+ | mv BASE, TMP2
|
|
+ | ld TMP1, FRAME_PC(TMP2)
|
|
+ | andi TMP0, TMP1, FRAME_TYPE
|
|
+ | j <1
|
|
+ break;
|
|
+
|
|
+ case BC_ITERC:
|
|
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
|
|
+ | mv TMP2, BASE // Save old BASE for vmeta_call.
|
|
+ | add BASE, BASE, RA
|
|
+ | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1.
|
|
+ | ld CARG1, -16(BASE)
|
|
+ | ld CARG2, -8(BASE)
|
|
+ | li NARGS8:RC, 16 // Iterators get 2 arguments.
|
|
+ | sd RB, 0(BASE) // Copy callable.
|
|
+ | sd CARG1, 16(BASE) // Copy state.
|
|
+ | sd CARG2, 24(BASE) // Copy control var.
|
|
+ | addi BASE, BASE, 16
|
|
+ | checkfunc RB, ->vmeta_call
|
|
+ | ins_call
|
|
+ break;
|
|
+
|
|
+ case BC_ITERN:
|
|
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
|
|
+ |->vm_IITERN:
|
|
+ | add RA, BASE, RA
|
|
+ | ld TAB:RB, -16(RA)
|
|
+ | lw RC, -8(RA) // Get index from control var.
|
|
+ | cleartp TAB:RB
|
|
+ | addi PC, PC, 4
|
|
+ | lw TMP0, TAB:RB->asize
|
|
+ | ld TMP1, TAB:RB->array
|
|
+ | slli CARG3, TISNUM, 47
|
|
+ |1: // Traverse array part.
|
|
+ | bleu TMP0, RC, >5 // Index points after array part?
|
|
+ | slliw TMP3, RC, 3
|
|
+ | add TMP3, TMP1, TMP3
|
|
+ | ld CARG1, 0(TMP3)
|
|
+ | lhu RD, -4+OFS_RD(PC) // ITERL RD
|
|
+ | or TMP2, RC, CARG3
|
|
+ | addiw RC, RC, 1
|
|
+ | beq CARG1, TISNIL, <1 // Skip holes in array part.
|
|
+ | sd TMP2, 0(RA)
|
|
+ | sd CARG1, 8(RA)
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | decode_BC4b RD
|
|
+ | add RD, RD, TMP3
|
|
+ | sw RC, -8(RA) // Update control var.
|
|
+ | add PC, PC, RD
|
|
+ |3:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Traverse hash part.
|
|
+ | lw TMP1, TAB:RB->hmask
|
|
+ | subw RC, RC, TMP0
|
|
+ | ld TMP2, TAB:RB->node
|
|
+ |6:
|
|
+ | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1.
|
|
+ | slliw TMP3, RC, 5
|
|
+ | slliw RB, RC, 3
|
|
+ | subw TMP3, TMP3, RB
|
|
+ | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8)
|
|
+ | ld CARG1, 0(NODE:TMP3)
|
|
+ | lhu RD, -4+OFS_RD(PC) // ITERL RD
|
|
+ | addiw RC, RC, 1
|
|
+ | beq CARG1, TISNIL, <6 // Skip holes in hash part.
|
|
+ | ld CARG2, NODE:TMP3->key
|
|
+ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | sd CARG1, 8(RA)
|
|
+ | addw RC, RC, TMP0
|
|
+ | decode_BC4b RD
|
|
+ | addw RD, RD, TMP3
|
|
+ | sd CARG2, 0(RA)
|
|
+ | add PC, PC, RD
|
|
+ | sw RC, -8(RA) // Update control var.
|
|
+ | j <3
|
|
+ break;
|
|
+
|
|
+ case BC_ISNEXT:
|
|
+ | // RA = base*8, RD = target (points to ITERN)
|
|
+ | add RA, BASE, RA
|
|
+ | srliw TMP0, RD, 1
|
|
+ | ld CFUNC:CARG1, -24(RA)
|
|
+ | add TMP0, PC, TMP0
|
|
+ | ld CARG2, -16(RA)
|
|
+ | ld CARG3, -8(RA)
|
|
+ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ | checkfunc CFUNC:CARG1, >5
|
|
+ | gettp CARG2, CARG2
|
|
+ | addi CARG2, CARG2, -LJ_TTAB
|
|
+ | lbu TMP1, CFUNC:CARG1->ffid
|
|
+ | addi CARG3, CARG3, -LJ_TNIL
|
|
+ | or TMP3, CARG2, CARG3
|
|
+ | addi TMP1, TMP1, -FF_next_N
|
|
+ | or TMP3, TMP3, TMP1
|
|
+ | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff
|
|
+ | bnez TMP3, >5
|
|
+ | add PC, TMP0, TMP2
|
|
+ | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)
|
|
+ | slli TMP1, TMP1, 32
|
|
+ | sd TMP1, -8(RA)
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |5: // Despecialize bytecode if any of the checks fail.
|
|
+ | li TMP3, BC_JMP
|
|
+ | li TMP1, BC_ITERC
|
|
+ | sb TMP3, -4+OFS_OP(PC)
|
|
+ | add PC, TMP0, TMP2
|
|
+ | sb TMP1, OFS_OP(PC)
|
|
+ | j <1
|
|
+ break;
|
|
+
|
|
+ case BC_VARG:
|
|
+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
|
|
+ | ld TMP0, FRAME_PC(BASE)
|
|
+ | decode_RDtoRC8 RC, RD
|
|
+ | decode_RB8 RB, INS
|
|
+ | add RC, BASE, RC
|
|
+ | add RA, BASE, RA
|
|
+ | addi RC, RC, FRAME_VARG
|
|
+ | add TMP2, RA, RB
|
|
+ | addi TMP3, BASE, -16 // TMP3 = vtop
|
|
+ | sub RC, RC, TMP0 // RC = vbase
|
|
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
|
|
+ | sub TMP1, TMP3, RC
|
|
+ | beqz RB, >5 // Copy all varargs?
|
|
+ | addi TMP2, TMP2, -16
|
|
+ |1: // Copy vararg slots to destination slots.
|
|
+ | ld CARG1, 0(RC)
|
|
+ | sltu TMP0, RC, TMP3
|
|
+ | addi RC, RC, 8
|
|
+ | bnez TMP0, >2
|
|
+ | mv CARG1, TISNIL
|
|
+ |2:
|
|
+ | sd CARG1, 0(RA)
|
|
+ | sltu TMP0, RA, TMP2
|
|
+ | addi RA, RA, 8
|
|
+ | bnez TMP0, <1
|
|
+ |3:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Copy all varargs.
|
|
+ | ld TMP0, L->maxstack
|
|
+ | li MULTRES, 8 // MULTRES = (0+1)*8
|
|
+ | blez TMP1, <3 // No vararg slots?
|
|
+ | add TMP2, RA, TMP1
|
|
+ | addi MULTRES, TMP1, 8
|
|
+ | bltu TMP0, TMP2, >7
|
|
+ |6:
|
|
+ | ld CRET1, 0(RC)
|
|
+ | addi RC, RC, 8
|
|
+ | sd CRET1, 0(RA)
|
|
+ | addi RA, RA, 8
|
|
+ | bltu RC, TMP3, <6 // More vararg slots?
|
|
+ | j <3
|
|
+ |
|
|
+ |7: // Grow stack for varargs.
|
|
+ | sd RA, L->top
|
|
+ | sub RA, RA, BASE
|
|
+ | sd BASE, L->base
|
|
+ | sub BASE, RC, BASE // Need delta, because BASE may change.
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | srliw CARG2, TMP1, 3
|
|
+ | mv CARG1, L
|
|
+ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n)
|
|
+ | mv RC, BASE
|
|
+ | ld BASE, L->base
|
|
+ | add RA, BASE, RA
|
|
+ | add RC, BASE, RC
|
|
+ | addi TMP3, BASE, -16
|
|
+ | j <6
|
|
+ break;
|
|
+
|
|
+ /* -- Returns ----------------------------------------------------------- */
|
|
+
|
|
+ case BC_RETM:
|
|
+ | // RA = results*8, RD = extra_nresults*8
|
|
+ | addw RD, RD, MULTRES
|
|
+ | j ->BC_RET_Z1
|
|
+ break;
|
|
+
|
|
+ case BC_RET:
|
|
+ | // RA = results*8, RD = (nresults+1)*8
|
|
+ |->BC_RET_Z1:
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | add RA, BASE, RA
|
|
+ | mv MULTRES, RD
|
|
+ |1:
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | xori TMP1, PC, FRAME_VARG
|
|
+ | bnez TMP0, ->BC_RETV_Z
|
|
+ |
|
|
+ |->BC_RET_Z:
|
|
+ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
|
|
+ | lw INS, -4(PC)
|
|
+ | addi TMP2, BASE, -16
|
|
+ | addi RC, RD, -8
|
|
+ | decode_RA8 TMP0, INS
|
|
+ | decode_RB8 RB, INS
|
|
+ | sub BASE, TMP2, TMP0
|
|
+ | add TMP3, TMP2, RB
|
|
+ | beqz RC, >3
|
|
+ |2:
|
|
+ | ld CRET1, 0(RA)
|
|
+ | addi RA, RA, 8
|
|
+ | addi RC, RC, -8
|
|
+ | sd CRET1, 0(TMP2)
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | bnez RC, <2
|
|
+ |3:
|
|
+ | addi TMP3, TMP3, -8
|
|
+ |5:
|
|
+ | bltu TMP2, TMP3, >6
|
|
+ | ld LFUNC:TMP1, FRAME_FUNC(BASE)
|
|
+ | cleartp LFUNC:TMP1
|
|
+ | ld TMP1, LFUNC:TMP1->pc
|
|
+ | ld KBASE, PC2PROTO(k)(TMP1)
|
|
+ | ins_next
|
|
+ |
|
|
+ |6: // Fill up results with nil.
|
|
+ | sd TISNIL, 0(TMP2)
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | j <5
|
|
+ |
|
|
+ |->BC_RETV_Z: // Non-standard return case.
|
|
+ | andi TMP2, TMP1, FRAME_TYPEP
|
|
+ | bxnez TMP2, ->vm_return
|
|
+ | // Return from vararg function: relocate BASE down.
|
|
+ | sub BASE, BASE, TMP1
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | j <1
|
|
+ break;
|
|
+
|
|
+ case BC_RET0: case BC_RET1:
|
|
+ | // RA = results*8, RD = (nresults+1)*8
|
|
+ | ld PC, FRAME_PC(BASE)
|
|
+ | add RA, BASE, RA
|
|
+ | mv MULTRES, RD
|
|
+ | andi TMP0, PC, FRAME_TYPE
|
|
+ | xori TMP1, PC, FRAME_VARG
|
|
+ | bnez TMP0, ->BC_RETV_Z
|
|
+ | lw INS, -4(PC)
|
|
+ | addi TMP2, BASE, -16
|
|
+ if (op == BC_RET1) {
|
|
+ | ld CRET1, 0(RA)
|
|
+ }
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RA8 RA, INS
|
|
+ | sub BASE, TMP2, RA
|
|
+ if (op == BC_RET1) {
|
|
+ | sd CRET1, 0(TMP2)
|
|
+ }
|
|
+ |5:
|
|
+ | bltu RD, RB, >6
|
|
+ | ld TMP1, FRAME_FUNC(BASE)
|
|
+ | cleartp LFUNC:TMP1
|
|
+ | ld TMP1, LFUNC:TMP1->pc
|
|
+ | ins_next1
|
|
+ | ld KBASE, PC2PROTO(k)(TMP1)
|
|
+ | ins_next2
|
|
+ |
|
|
+ |6: // Fill up results with nil.
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | addi RD, RD, 8
|
|
+ if (op == BC_RET1) {
|
|
+ | sd TISNIL, 0(TMP2)
|
|
+ } else {
|
|
+ | sd TISNIL, -8(TMP2)
|
|
+ }
|
|
+ | j <5
|
|
+ break;
|
|
+
|
|
+ /* -- Loops and branches ------------------------------------------------ */
|
|
+
|
|
+ case BC_FORL:
|
|
+ | // Fall through. Assumes BC_IFORL follows.
|
|
+ break;
|
|
+
|
|
+ case BC_JFORI:
|
|
+ case BC_JFORL:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ case BC_FORI:
|
|
+ case BC_IFORL:
|
|
+ | // RA = base*8, RD = target (after end of loop or start of loop)
|
|
+ vk = (op == BC_IFORL || op == BC_JFORL);
|
|
+ | add RA, BASE, RA
|
|
+ | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX
|
|
+ | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP
|
|
+ | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP
|
|
+ | gettp CARG4, CARG1
|
|
+ | gettp CARG5, CARG2
|
|
+ | gettp CARG6, CARG3
|
|
+ if (op != BC_JFORL) {
|
|
+ | srliw RD, RD, 1
|
|
+ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2
|
|
+ | add TMP2, RD, TMP2
|
|
+ }
|
|
+ | bne CARG4, TISNUM, >3
|
|
+ | sext.w CARG4, CARG1 // start
|
|
+ | sext.w CARG3, CARG3 // stop
|
|
+ if (!vk) { // init
|
|
+ | bxne CARG6, TISNUM, ->vmeta_for
|
|
+ | bxne CARG5, TISNUM, ->vmeta_for
|
|
+ | bfextri TMP0, CARG2, 31, 31 // sign
|
|
+ | slt CARG2, CARG3, CARG4
|
|
+ | slt TMP1, CARG4, CARG3
|
|
+ | neg TMP4, TMP0
|
|
+ | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2
|
|
+ | and TMP0, TMP0, TMP4
|
|
+ | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop
|
|
+ } else {
|
|
+ | sext.w CARG5, CARG2 // step
|
|
+ | addw CARG1, CARG4, CARG5 // start + step
|
|
+ | xor TMP3, CARG1, CARG4 // y^a
|
|
+ | xor TMP1, CARG1, CARG5 // y^b
|
|
+ | and TMP3, TMP3, TMP1
|
|
+ | slt TMP1, CARG1, CARG3 // start+step < stop ?
|
|
+ | slt CARG3, CARG3, CARG1 // stop < start+step ?
|
|
+ | sltz TMP0, CARG5 // step < 0 ?
|
|
+ | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow.
|
|
+ | neg TMP4, TMP0
|
|
+ | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3
|
|
+ | and TMP1, TMP1, TMP4
|
|
+ | xor CARG3, CARG3, TMP1
|
|
+ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue
|
|
+ | zext.w CARG1, CARG1
|
|
+ | settp_b CARG1, TISNUM
|
|
+ | sd CARG1, FORL_IDX*8(RA)
|
|
+ }
|
|
+ |1:
|
|
+ if (op == BC_FORI) {
|
|
+ | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS
|
|
+ | and TMP2, TMP2, TMP4
|
|
+ | add PC, PC, TMP2
|
|
+ } else if (op == BC_JFORI) {
|
|
+ | add PC, PC, TMP2
|
|
+ | lhu RD, -4+OFS_RD(PC)
|
|
+ } else if (op == BC_IFORL) {
|
|
+ | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back
|
|
+ | and TMP2, TMP2, TMP4
|
|
+ | add PC, PC, TMP2
|
|
+ }
|
|
+ | ins_next1
|
|
+ | sd CARG1, FORL_EXT*8(RA)
|
|
+ |2:
|
|
+ if (op == BC_JFORI) {
|
|
+ | decode_RD8b RD
|
|
+ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop
|
|
+ } else if (op == BC_JFORL) {
|
|
+ | beqz CARG2, =>BC_JLOOP
|
|
+ }
|
|
+ | ins_next2
|
|
+ |
|
|
+ |3: // FP loop.
|
|
+ | fld FTMP0, FORL_IDX*8(RA) // start
|
|
+ | fld FTMP1, FORL_STOP*8(RA) // stop
|
|
+ | ld TMP0, FORL_STEP*8(RA) // step
|
|
+ | sltz CARG2, TMP0 // step < 0 ?
|
|
+ | neg CARG2, CARG2
|
|
+ if (!vk) {
|
|
+ | sltiu TMP3, CARG4, LJ_TISNUM // start is number ?
|
|
+ | sltiu TMP0, CARG5, LJ_TISNUM // step is number ?
|
|
+ | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ?
|
|
+ | and TMP3, TMP3, TMP1
|
|
+ | and TMP0, TMP0, TMP3
|
|
+ | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number
|
|
+ | flt.d TMP3, FTMP0, FTMP1 // start < stop ?
|
|
+ | flt.d TMP4, FTMP1, FTMP0 // stop < start ?
|
|
+ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4
|
|
+ | and TMP0, TMP0, CARG2
|
|
+ | xor CARG2, TMP4, TMP0 // CARG2=0:+,start<stop or -,start>stop
|
|
+ | j <1
|
|
+ } else {
|
|
+ | fld FTMP3, FORL_STEP*8(RA)
|
|
+ | fadd.d FTMP0, FTMP0, FTMP3 // start + step
|
|
+ | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ?
|
|
+ | flt.d TMP4, FTMP1, FTMP0
|
|
+ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4
|
|
+ | and TMP0, TMP0, CARG2
|
|
+ | xor CARG2, TMP4, TMP0
|
|
+ if (op == BC_IFORL) {
|
|
+ | addi TMP3, CARG2, -1
|
|
+ | and TMP2, TMP2, TMP3
|
|
+ | add PC, PC, TMP2
|
|
+ }
|
|
+ | fsd FTMP0, FORL_IDX*8(RA)
|
|
+ | ins_next1
|
|
+ | fsd FTMP0, FORL_EXT*8(RA)
|
|
+ | j <2
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case BC_ITERL:
|
|
+ | // Fall through. Assumes BC_IITERL follows.
|
|
+ break;
|
|
+
|
|
+ case BC_JITERL:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ case BC_IITERL:
|
|
+ | // RA = base*8, RD = target
|
|
+ | add RA, BASE, RA
|
|
+ | ld TMP1, 0(RA)
|
|
+ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
|
|
+ if (op == BC_JITERL) {
|
|
+ | sd TMP1,-8(RA)
|
|
+ | j =>BC_JLOOP
|
|
+ } else {
|
|
+ | branch_RD // Otherwise save control var + branch.
|
|
+ | sd TMP1, -8(RA)
|
|
+ }
|
|
+ |1:
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_LOOP:
|
|
+ | // Fall through. Assumes BC_ILOOP follows.
|
|
+ break;
|
|
+
|
|
+ case BC_ILOOP:
|
|
+ | // RA = base*8, RD = target (loop extent)
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_JLOOP:
|
|
+ break;
|
|
+
|
|
+ case BC_JMP:
|
|
+ | // RA = base*8 (only used by trace recorder), RD = target
|
|
+ | branch_RD // PC + (jump - 0x8000)<<2
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Function headers -------------------------------------------------- */
|
|
+
|
|
+ case BC_FUNCF:
|
|
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
|
|
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
|
|
+ break;
|
|
+
|
|
+ case BC_JFUNCF:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ case BC_IFUNCF:
|
|
+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
|
|
+ | ld TMP2, L->maxstack
|
|
+ | lbu TMP1, -4+PC2PROTO(numparams)(PC)
|
|
+ | ld KBASE, -4+PC2PROTO(k)(PC)
|
|
+ | bxltu TMP2, RA, ->vm_growstack_l
|
|
+ | slliw TMP1, TMP1, 3 // numparams*8
|
|
+ |2:
|
|
+ | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters.
|
|
+ if (op == BC_JFUNCF) {
|
|
+ | decode_RD8 RD, INS
|
|
+ | j =>BC_JLOOP
|
|
+ } else {
|
|
+ | ins_next
|
|
+ }
|
|
+ |
|
|
+ |3: // Clear missing parameters.
|
|
+ | add TMP0, BASE, NARGS8:RC
|
|
+ | sd TISNIL, 0(TMP0)
|
|
+ | addiw NARGS8:RC, NARGS8:RC, 8
|
|
+ | j <2
|
|
+ break;
|
|
+
|
|
+ case BC_JFUNCV:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ | NYI // NYI: compiled vararg functions
|
|
+ break; /* NYI: compiled vararg functions. */
|
|
+
|
|
+ case BC_IFUNCV:
|
|
+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
|
|
+ | li TMP0, LJ_TFUNC
|
|
+ | add TMP1, BASE, RC
|
|
+ | ld TMP2, L->maxstack
|
|
+ | settp LFUNC:RB, TMP0
|
|
+ | add TMP0, RA, RC
|
|
+ | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
|
|
+ | addi TMP2, TMP2, -8
|
|
+ | addi TMP3, RC, 16+FRAME_VARG
|
|
+ | ld KBASE, -4+PC2PROTO(k)(PC)
|
|
+ | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
|
|
+ | bxgeu TMP0, TMP2, ->vm_growstack_l
|
|
+ | lbu TMP2, -4+PC2PROTO(numparams)(PC)
|
|
+ | mv RA, BASE
|
|
+ | mv RC, TMP1
|
|
+ | ins_next1
|
|
+ | addi BASE, TMP1, 16
|
|
+ | beqz TMP2, >2
|
|
+ |1:
|
|
+ | ld TMP0, 0(RA)
|
|
+ | sltu CARG2, RA, RC // Less args than parameters?
|
|
+ | addi RA, RA, 8
|
|
+ | addi TMP1, TMP1, 8
|
|
+ | addiw TMP2, TMP2, -1
|
|
+ | beqz CARG2, >3
|
|
+ | neg TMP4, CARG2 // Clear old fixarg slot (help the GC).
|
|
+ | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0
|
|
+ | and TMP3, TMP3, TMP4
|
|
+ | xor CARG1, TMP0, TMP3
|
|
+ | sd CARG1, -8(RA)
|
|
+ | sd TMP0, 8(TMP1)
|
|
+ | bnez TMP2, <1
|
|
+ |2:
|
|
+ | ins_next2
|
|
+ |3:
|
|
+ | neg TMP4, CARG2 // Clear missing fixargs.
|
|
+ | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL
|
|
+ | and TMP3, TMP3, TMP4
|
|
+ | xor TMP0, TISNIL, TMP3
|
|
+ | sd TMP0, 8(TMP1)
|
|
+ | bnez TMP2, <1
|
|
+ | j <2
|
|
+ break;
|
|
+
|
|
+ case BC_FUNCC:
|
|
+ case BC_FUNCCW:
|
|
+ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
|
|
+ if (op == BC_FUNCC) {
|
|
+ | ld CARG4, CFUNC:RB->f
|
|
+ } else {
|
|
+ | ld CARG4, GL->wrapf
|
|
+ }
|
|
+ | add TMP1, RA, NARGS8:RC
|
|
+ | ld TMP2, L->maxstack
|
|
+ | add RC, BASE, NARGS8:RC
|
|
+ | sd BASE, L->base // base of currently excuting function
|
|
+ | sd RC, L->top
|
|
+ | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack.
|
|
+ | li_vmstate C // li TMP0, ~LJ_VMST_C
|
|
+ if (op == BC_FUNCCW) {
|
|
+ | ld CARG2, CFUNC:RB->f
|
|
+ }
|
|
+ | mv CARG1, L
|
|
+ | st_vmstate // sw TMP0, GL->vmstate
|
|
+ | jalr CARG4 // (lua_State *L [, lua_CFunction f])
|
|
+ | // Returns nresults.
|
|
+ | ld BASE, L->base
|
|
+ | ld TMP1, L->top
|
|
+ | sd L, GL->cur_L
|
|
+ | slliw RD, CRET1, 3
|
|
+ | li_vmstate INTERP
|
|
+ | ld PC, FRAME_PC(BASE) // Fetch PC of caller.
|
|
+ | sub RA, TMP1, RD // RA = L->top - nresults*8
|
|
+ | st_vmstate
|
|
+ | j ->vm_returnc
|
|
+ break;
|
|
+
|
|
+ /* ---------------------------------------------------------------------- */
|
|
+
|
|
+ default:
|
|
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
|
|
+ exit(2);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int build_backend(BuildCtx *ctx)
|
|
+{
|
|
+ int op;
|
|
+
|
|
+ dasm_growpc(Dst, BC__MAX);
|
|
+
|
|
+ build_subroutines(ctx);
|
|
+
|
|
+ |.code_op
|
|
+ for (op = 0; op < BC__MAX; op++)
|
|
+ build_ins(ctx, (BCOp)op, op);
|
|
+
|
|
+ return BC__MAX;
|
|
+}
|
|
+
|
|
+/* Emit pseudo frame-info for all assembler functions. */
|
|
+static void emit_asm_debug(BuildCtx *ctx)
|
|
+{
|
|
+
|
|
+}
|
|
|
|
From 33d45379ae5fa65fc0824a2463e319d3f275e918 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:19:25 +0800
|
|
Subject: [PATCH 07/22] riscv(support): add target definition
|
|
|
|
---
|
|
src/lj_target.h | 4 +-
|
|
src/lj_target_riscv.h | 542 ++++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 545 insertions(+), 1 deletion(-)
|
|
create mode 100644 src/lj_target_riscv.h
|
|
|
|
diff --git a/src/lj_target.h b/src/lj_target.h
|
|
index 47592432f0..08637f6934 100644
|
|
--- a/src/lj_target.h
|
|
+++ b/src/lj_target.h
|
|
@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
|
|
/* Bitset for registers. 32 registers suffice for most architectures.
|
|
** Note that one set holds bits for both GPRs and FPRs.
|
|
*/
|
|
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
|
|
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
|
|
typedef uint64_t RegSet;
|
|
#define RSET_BITS 6
|
|
#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
|
|
@@ -143,6 +143,8 @@ typedef uint32_t RegCost;
|
|
#include "lj_target_ppc.h"
|
|
#elif LJ_TARGET_MIPS
|
|
#include "lj_target_mips.h"
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#include "lj_target_riscv.h"
|
|
#else
|
|
#error "Missing include for target CPU"
|
|
#endif
|
|
diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h
|
|
new file mode 100644
|
|
index 0000000000..a2a907dbad
|
|
--- /dev/null
|
|
+++ b/src/lj_target_riscv.h
|
|
@@ -0,0 +1,542 @@
|
|
+/*
|
|
+** Definitions for RISC-V CPUs.
|
|
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
|
|
+*/
|
|
+
|
|
+#ifndef _LJ_TARGET_RISCV_H
|
|
+#define _LJ_TARGET_RISCV_H
|
|
+
|
|
+/* -- Registers IDs ------------------------------------------------------- */
|
|
+
|
|
+#define GPRDEF(_) \
|
|
+ _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \
|
|
+ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
|
|
+ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
|
|
+ _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31)
|
|
+#define FPRDEF(_) \
|
|
+ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
|
|
+ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
|
|
+ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
|
|
+ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
|
|
+#define VRIDDEF(_)
|
|
+
|
|
+#define RIDENUM(name) RID_##name,
|
|
+
|
|
+enum {
|
|
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
|
|
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
|
|
+ RID_MAX,
|
|
+ RID_ZERO = RID_X0,
|
|
+ RID_TMP = RID_RA,
|
|
+ RID_GP = RID_X3,
|
|
+ RID_TP = RID_X4,
|
|
+
|
|
+ /* Calling conventions. */
|
|
+ RID_RET = RID_X10,
|
|
+ RID_RETLO = RID_X10,
|
|
+ RID_RETHI = RID_X11,
|
|
+ RID_FPRET = RID_F10,
|
|
+ RID_CFUNCADDR = RID_X5,
|
|
+
|
|
+ /* These definitions must match with the *.dasc file(s): */
|
|
+ RID_BASE = RID_X18, /* Interpreter BASE. */
|
|
+ RID_LPC = RID_X20, /* Interpreter PC. */
|
|
+ RID_GL = RID_X21, /* Interpreter GL. */
|
|
+ RID_LREG = RID_X23, /* Interpreter L. */
|
|
+
|
|
+ /* Register ranges [min, max) and number of registers. */
|
|
+ RID_MIN_GPR = RID_X0,
|
|
+ RID_MAX_GPR = RID_X31+1,
|
|
+ RID_MIN_FPR = RID_MAX_GPR,
|
|
+ RID_MAX_FPR = RID_F31+1,
|
|
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
|
|
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
|
|
+};
|
|
+
|
|
+#define RID_NUM_KREF RID_NUM_GPR
|
|
+#define RID_MIN_KREF RID_X0
|
|
+
|
|
+/* -- Register sets ------------------------------------------------------- */
|
|
+
|
|
+/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */
|
|
+#define RSET_FIXED \
|
|
+ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
|
|
+ RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL))
|
|
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
|
|
+#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
|
|
+
|
|
+#define RSET_ALL (RSET_GPR|RSET_FPR)
|
|
+#define RSET_INIT RSET_ALL
|
|
+
|
|
+#define RSET_SCRATCH_GPR \
|
|
+ (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\
|
|
+ RSET_RANGE(RID_X10, RID_X17+1))
|
|
+
|
|
+#define RSET_SCRATCH_FPR \
|
|
+ (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\
|
|
+ RSET_RANGE(RID_F28, RID_F31+1))
|
|
+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
|
|
+
|
|
+#define REGARG_FIRSTGPR RID_X10
|
|
+#define REGARG_LASTGPR RID_X17
|
|
+#define REGARG_NUMGPR 8
|
|
+
|
|
+#define REGARG_FIRSTFPR RID_F10
|
|
+#define REGARG_LASTFPR RID_F17
|
|
+#define REGARG_NUMFPR 8
|
|
+
|
|
+/* -- Spill slots --------------------------------------------------------- */
|
|
+
|
|
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
|
|
+**
|
|
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
|
|
+** This definition must match with the *.dasc file(s).
|
|
+**
|
|
+** SPS_FIRST: First spill slot for general use.
|
|
+*/
|
|
+#if LJ_32
|
|
+#define SPS_FIXED 5
|
|
+#else
|
|
+#define SPS_FIXED 4
|
|
+#endif
|
|
+#define SPS_FIRST 4
|
|
+
|
|
+#define SPOFS_TMP 0
|
|
+
|
|
+#define sps_scale(slot) (4 * (int32_t)(slot))
|
|
+#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
|
|
+
|
|
+/* -- Exit state ---------------------------------------------------------- */
|
|
+/* This definition must match with the *.dasc file(s). */
|
|
+typedef struct {
|
|
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
|
|
+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
|
|
+ int32_t spill[256]; /* Spill slots. */
|
|
+} ExitState;
|
|
+
|
|
+/* Highest exit + 1 indicates stack check. */
|
|
+#define EXITSTATE_CHECKEXIT 1
|
|
+
|
|
+/* Return the address of a per-trace exit stub. */
|
|
+static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
|
|
+{
|
|
+ while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */
|
|
+ return p + 4 + exitno;
|
|
+}
|
|
+/* Avoid dependence on lj_jit.h if only including lj_target.h. */
|
|
+#define exitstub_trace_addr(T, exitno) \
|
|
+ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
|
|
+
|
|
+/* -- Instructions -------------------------------------------------------- */
|
|
+
|
|
+/* Instruction fields. */
|
|
+#define RISCVF_D(d) (((d)&31) << 7)
|
|
+#define RISCVF_S1(r) (((r)&31) << 15)
|
|
+#define RISCVF_S2(r) (((r)&31) << 20)
|
|
+#define RISCVF_S3(r) (((r)&31) << 27)
|
|
+#define RISCVF_FUNCT2(f) (((f)&3) << 25)
|
|
+#define RISCVF_FUNCT3(f) (((f)&7) << 12)
|
|
+#define RISCVF_FUNCT7(f) (((f)&127) << 25)
|
|
+#define RISCVF_SHAMT(s) ((s) << 20)
|
|
+#define RISCVF_RM(m) (((m)&7) << 12)
|
|
+#define RISCVF_IMMI(i) ((i) << 20)
|
|
+#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7)
|
|
+#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7)
|
|
+#define RISCVF_IMMU(i) (((i)&0xfffff) << 12)
|
|
+#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20)
|
|
+
|
|
+/* Encode helpers. */
|
|
+#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800))
|
|
+#define RISCVF_W_LO(w) ((w)&0xfff)
|
|
+#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff)
|
|
+#define RISCVF_LO(i) RISCVF_W_LO(i)
|
|
+
|
|
+/* Check for valid field range. */
|
|
+#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
|
|
+#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0)
|
|
+#define checku11(i) RISCVF_UIMM_OK(i, 11)
|
|
+#define checki12(i) RISCVF_SIMM_OK(i, 12)
|
|
+#define checki13(i) RISCVF_SIMM_OK(i, 13)
|
|
+#define checki20(i) RISCVF_SIMM_OK(i, 20)
|
|
+#define checki21(i) RISCVF_SIMM_OK(i, 21)
|
|
+#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800)
|
|
+
|
|
+typedef enum RISCVIns {
|
|
+
|
|
+ /* --- RVI --- */
|
|
+ RISCVI_LUI = 0x00000037,
|
|
+ RISCVI_AUIPC = 0x00000017,
|
|
+
|
|
+ RISCVI_JAL = 0x0000006f,
|
|
+ RISCVI_JALR = 0x00000067,
|
|
+
|
|
+ RISCVI_ADDI = 0x00000013,
|
|
+ RISCVI_SLTI = 0x00002013,
|
|
+ RISCVI_SLTIU = 0x00003013,
|
|
+ RISCVI_XORI = 0x00004013,
|
|
+ RISCVI_ORI = 0x00006013,
|
|
+ RISCVI_ANDI = 0x00007013,
|
|
+
|
|
+ RISCVI_SLLI = 0x00001013,
|
|
+ RISCVI_SRLI = 0x00005013,
|
|
+ RISCVI_SRAI = 0x40005013,
|
|
+
|
|
+ RISCVI_ADD = 0x00000033,
|
|
+ RISCVI_SUB = 0x40000033,
|
|
+ RISCVI_SLL = 0x00001033,
|
|
+ RISCVI_SLT = 0x00002033,
|
|
+ RISCVI_SLTU = 0x00003033,
|
|
+ RISCVI_XOR = 0x00004033,
|
|
+ RISCVI_SRL = 0x00005033,
|
|
+ RISCVI_SRA = 0x40005033,
|
|
+ RISCVI_OR = 0x00006033,
|
|
+ RISCVI_AND = 0x00007033,
|
|
+
|
|
+ RISCVI_LB = 0x00000003,
|
|
+ RISCVI_LH = 0x00001003,
|
|
+ RISCVI_LW = 0x00002003,
|
|
+ RISCVI_LBU = 0x00004003,
|
|
+ RISCVI_LHU = 0x00005003,
|
|
+ RISCVI_SB = 0x00000023,
|
|
+ RISCVI_SH = 0x00001023,
|
|
+ RISCVI_SW = 0x00002023,
|
|
+
|
|
+ RISCVI_BEQ = 0x00000063,
|
|
+ RISCVI_BNE = 0x00001063,
|
|
+ RISCVI_BLT = 0x00004063,
|
|
+ RISCVI_BGE = 0x00005063,
|
|
+ RISCVI_BLTU = 0x00006063,
|
|
+ RISCVI_BGEU = 0x00007063,
|
|
+
|
|
+ RISCVI_ECALL = 0x00000073,
|
|
+ RISCVI_EBREAK = 0x00100073,
|
|
+
|
|
+ RISCVI_NOP = 0x00000013,
|
|
+ RISCVI_MV = 0x00000013,
|
|
+ RISCVI_NOT = 0xfff04013,
|
|
+ RISCVI_NEG = 0x40000033,
|
|
+ RISCVI_RET = 0x00008067,
|
|
+ RISCVI_ZEXT_B = 0x0ff07013,
|
|
+
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_LWU = 0x00007003,
|
|
+ RISCVI_LD = 0x00003003,
|
|
+ RISCVI_SD = 0x00003023,
|
|
+
|
|
+ RISCVI_ADDIW = 0x0000001b,
|
|
+
|
|
+ RISCVI_SLLIW = 0x0000101b,
|
|
+ RISCVI_SRLIW = 0x0000501b,
|
|
+ RISCVI_SRAIW = 0x4000501b,
|
|
+
|
|
+ RISCVI_ADDW = 0x0000003b,
|
|
+ RISCVI_SUBW = 0x4000003b,
|
|
+ RISCVI_SLLW = 0x0000103b,
|
|
+ RISCVI_SRLW = 0x0000503b,
|
|
+ RISCVI_SRAW = 0x4000503b,
|
|
+
|
|
+ RISCVI_NEGW = 0x4000003b,
|
|
+ RISCVI_SEXT_W = 0x0000001b,
|
|
+#endif
|
|
+
|
|
+ /* --- RVM --- */
|
|
+ RISCVI_MUL = 0x02000033,
|
|
+ RISCVI_MULH = 0x02001033,
|
|
+ RISCVI_MULHSU = 0x02002033,
|
|
+ RISCVI_MULHU = 0x02003033,
|
|
+ RISCVI_DIV = 0x02004033,
|
|
+ RISCVI_DIVU = 0x02005033,
|
|
+ RISCVI_REM = 0x02006033,
|
|
+ RISCVI_REMU = 0x02007033,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_MULW = 0x0200003b,
|
|
+ RISCVI_DIVW = 0x0200403b,
|
|
+ RISCVI_DIVUW = 0x0200503b,
|
|
+ RISCVI_REMW = 0x0200603b,
|
|
+ RISCVI_REMUW = 0x0200703b,
|
|
+#endif
|
|
+
|
|
+ /* --- RVF --- */
|
|
+ RISCVI_FLW = 0x00002007,
|
|
+ RISCVI_FSW = 0x00002027,
|
|
+
|
|
+ RISCVI_FMADD_S = 0x00000043,
|
|
+ RISCVI_FMSUB_S = 0x00000047,
|
|
+ RISCVI_FNMSUB_S = 0x0000004b,
|
|
+ RISCVI_FNMADD_S = 0x0000004f,
|
|
+
|
|
+ RISCVI_FADD_S = 0x00000053,
|
|
+ RISCVI_FSUB_S = 0x08000053,
|
|
+ RISCVI_FMUL_S = 0x10000053,
|
|
+ RISCVI_FDIV_S = 0x18000053,
|
|
+ RISCVI_FSQRT_S = 0x58000053,
|
|
+
|
|
+ RISCVI_FSGNJ_S = 0x20000053,
|
|
+ RISCVI_FSGNJN_S = 0x20001053,
|
|
+ RISCVI_FSGNJX_S = 0x20002053,
|
|
+
|
|
+ RISCVI_FMIN_S = 0x28000053,
|
|
+ RISCVI_FMAX_S = 0x28001053,
|
|
+
|
|
+ RISCVI_FCVT_W_S = 0xc0000053,
|
|
+ RISCVI_FCVT_WU_S = 0xc0100053,
|
|
+
|
|
+ RISCVI_FMV_X_W = 0xe0000053,
|
|
+
|
|
+ RISCVI_FEQ_S = 0xa0002053,
|
|
+ RISCVI_FLT_S = 0xa0001053,
|
|
+ RISCVI_FLE_S = 0xa0000053,
|
|
+
|
|
+ RISCVI_FCLASS_S = 0xe0001053,
|
|
+
|
|
+ RISCVI_FCVT_S_W = 0xd0000053,
|
|
+ RISCVI_FCVT_S_WU = 0xd0100053,
|
|
+ RISCVI_FMV_W_X = 0xf0000053,
|
|
+
|
|
+ RISCVI_FMV_S = 0x20000053,
|
|
+ RISCVI_FNEG_S = 0x20001053,
|
|
+ RISCVI_FABS_S = 0x20002053,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_FCVT_L_S = 0xc0200053,
|
|
+ RISCVI_FCVT_LU_S = 0xc0300053,
|
|
+ RISCVI_FCVT_S_L = 0xd0200053,
|
|
+ RISCVI_FCVT_S_LU = 0xd0300053,
|
|
+#endif
|
|
+
|
|
+ /* --- RVD --- */
|
|
+ RISCVI_FLD = 0x00003007,
|
|
+ RISCVI_FSD = 0x00003027,
|
|
+
|
|
+ RISCVI_FMADD_D = 0x02000043,
|
|
+ RISCVI_FMSUB_D = 0x02000047,
|
|
+ RISCVI_FNMSUB_D = 0x0200004b,
|
|
+ RISCVI_FNMADD_D = 0x0200004f,
|
|
+
|
|
+ RISCVI_FADD_D = 0x02000053,
|
|
+ RISCVI_FSUB_D = 0x0a000053,
|
|
+ RISCVI_FMUL_D = 0x12000053,
|
|
+ RISCVI_FDIV_D = 0x1a000053,
|
|
+ RISCVI_FSQRT_D = 0x5a000053,
|
|
+
|
|
+ RISCVI_FSGNJ_D = 0x22000053,
|
|
+ RISCVI_FSGNJN_D = 0x22001053,
|
|
+ RISCVI_FSGNJX_D = 0x22002053,
|
|
+
|
|
+ RISCVI_FMIN_D = 0x2a000053,
|
|
+ RISCVI_FMAX_D = 0x2a001053,
|
|
+
|
|
+ RISCVI_FCVT_S_D = 0x40100053,
|
|
+ RISCVI_FCVT_D_S = 0x42000053,
|
|
+
|
|
+ RISCVI_FEQ_D = 0xa2002053,
|
|
+ RISCVI_FLT_D = 0xa2001053,
|
|
+ RISCVI_FLE_D = 0xa2000053,
|
|
+
|
|
+ RISCVI_FCLASS_D = 0xe2001053,
|
|
+
|
|
+ RISCVI_FCVT_W_D = 0xc2000053,
|
|
+ RISCVI_FCVT_WU_D = 0xc2100053,
|
|
+ RISCVI_FCVT_D_W = 0xd2000053,
|
|
+ RISCVI_FCVT_D_WU = 0xd2100053,
|
|
+
|
|
+ RISCVI_FMV_D = 0x22000053,
|
|
+ RISCVI_FNEG_D = 0x22001053,
|
|
+ RISCVI_FABS_D = 0x22002053,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_FCVT_L_D = 0xc2200053,
|
|
+ RISCVI_FCVT_LU_D = 0xc2300053,
|
|
+ RISCVI_FMV_X_D = 0xe2000053,
|
|
+ RISCVI_FCVT_D_L = 0xd2200053,
|
|
+ RISCVI_FCVT_D_LU = 0xd2300053,
|
|
+ RISCVI_FMV_D_X = 0xf2000053,
|
|
+#endif
|
|
+
|
|
+ /* --- Zifencei --- */
|
|
+ RISCVI_FENCE = 0x0000000f,
|
|
+ RISCVI_FENCE_I = 0x0000100f,
|
|
+
|
|
+ /* --- Zicsr --- */
|
|
+ RISCVI_CSRRW = 0x00001073,
|
|
+ RISCVI_CSRRS = 0x00002073,
|
|
+ RISCVI_CSRRC = 0x00003073,
|
|
+ RISCVI_CSRRWI = 0x00005073,
|
|
+ RISCVI_CSRRSI = 0x00006073,
|
|
+ RISCVI_CSRRCI = 0x00007073,
|
|
+
|
|
+ /* --- RVB --- */
|
|
+ /* Zba */
|
|
+ RISCVI_SH1ADD = 0x20002033,
|
|
+ RISCVI_SH2ADD = 0x20004033,
|
|
+ RISCVI_SH3ADD = 0x20006033,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_ADD_UW = 0x0800003b,
|
|
+
|
|
+ RISCVI_SH1ADD_UW = 0x2000203b,
|
|
+ RISCVI_SH2ADD_UW = 0x2000403b,
|
|
+ RISCVI_SH3ADD_UW = 0x2000603b,
|
|
+
|
|
+ RISCVI_SLLI_UW = 0x0800101b,
|
|
+
|
|
+ RISCVI_ZEXT_W = 0x0800003b,
|
|
+#endif
|
|
+ /* Zbb */
|
|
+ RISCVI_ANDN = 0x40007033,
|
|
+ RISCVI_ORN = 0x40006033,
|
|
+ RISCVI_XNOR = 0x40004033,
|
|
+
|
|
+ RISCVI_CLZ = 0x60001013,
|
|
+ RISCVI_CTZ = 0x60101013,
|
|
+
|
|
+ RISCVI_CPOP = 0x60201013,
|
|
+
|
|
+ RISCVI_MAX = 0x0a006033,
|
|
+ RISCVI_MAXU = 0x0a007033,
|
|
+ RISCVI_MIN = 0x0a004033,
|
|
+ RISCVI_MINU = 0x0a005033,
|
|
+
|
|
+ RISCVI_SEXT_B = 0x60401013,
|
|
+ RISCVI_SEXT_H = 0x60501013,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_ZEXT_H = 0x0800403b,
|
|
+#endif
|
|
+
|
|
+ RISCVI_ROL = 0x60001033,
|
|
+ RISCVI_ROR = 0x60005033,
|
|
+ RISCVI_RORI = 0x60005013,
|
|
+
|
|
+ RISCVI_ORC_B = 0x28705013,
|
|
+
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_REV8 = 0x6b805013,
|
|
+
|
|
+ RISCVI_CLZW = 0x6000101b,
|
|
+ RISCVI_CTZW = 0x6010101b,
|
|
+
|
|
+ RISCVI_CPOPW = 0x6020101b,
|
|
+
|
|
+ RISCVI_ROLW = 0x6000103b,
|
|
+ RISCVI_RORIW = 0x6000501b,
|
|
+ RISCVI_RORW = 0x6000503b,
|
|
+#endif
|
|
+ /* NYI: Zbc, Zbs */
|
|
+
|
|
+ /* --- Zicond --- */
|
|
+ RISCVI_CZERO_EQZ = 0x0e005033,
|
|
+ RISCVI_CZERO_NEZ = 0x0e007033,
|
|
+
|
|
+ /* --- Zfa --- */
|
|
+ RISCVI_FLI_S = 0xf0100053,
|
|
+ RISCVI_FMINM_S = 0x28002053,
|
|
+ RISCVI_FMAXM_S = 0x28003053,
|
|
+ RISCVI_FROUND_S = 0x40400053,
|
|
+ RISCVI_FROUNDNX_S = 0x40500053,
|
|
+ RISCVI_FCVTMOD_W_D = 0xc2801053,
|
|
+ RISCVI_FLEQ_S = 0xa0004053,
|
|
+ RISCVI_FLTQ_S = 0xa0005053,
|
|
+ RISCVI_FLI_D = 0xf2100053,
|
|
+ RISCVI_FMINM_D = 0x2a002053,
|
|
+ RISCVI_FMAXM_D = 0x2a003053,
|
|
+ RISCVI_FROUND_D = 0x42400053,
|
|
+ RISCVI_FROUNDNX_D = 0x42500053,
|
|
+ RISCVI_FLEQ_D = 0xa2004053,
|
|
+ RISCVI_FLTQ_D = 0xa2005053,
|
|
+
|
|
+ RISCVI_FROUND_S_RTZ = 0x40401053,
|
|
+ RISCVI_FROUND_S_RDN = 0x40402053,
|
|
+ RISCVI_FROUND_S_RUP = 0x40403053,
|
|
+ RISCVI_FROUNDNX_S_RTZ = 0x40501053,
|
|
+ RISCVI_FROUNDNX_S_RDN = 0x40502053,
|
|
+ RISCVI_FROUNDNX_S_RUP = 0x40503053,
|
|
+ RISCVI_FROUND_D_RTZ = 0x42401053,
|
|
+ RISCVI_FROUND_D_RDN = 0x42402053,
|
|
+ RISCVI_FROUND_D_RUP = 0x42403053,
|
|
+ RISCVI_FROUNDNX_D_RTZ = 0x42501053,
|
|
+ RISCVI_FROUNDNX_D_RDN = 0x42502053,
|
|
+ RISCVI_FROUNDNX_D_RUP = 0x42503053,
|
|
+
|
|
+ /* TBD: RVV?, RVP?, RVJ? */
|
|
+
|
|
+ /* --- XThead* --- */
|
|
+ /* XTHeadBa */
|
|
+ RISCVI_TH_ADDSL = 0x0000100b,
|
|
+
|
|
+ /* XTHeadBb */
|
|
+ RISCVI_TH_SRRI = 0x1000100b,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_TH_SRRIW = 0x1400100b,
|
|
+#endif
|
|
+ RISCVI_TH_EXT = 0x0000200b,
|
|
+ RISCVI_TH_EXTU = 0x0000300b,
|
|
+ RISCVI_TH_FF0 = 0x8400100b,
|
|
+ RISCVI_TH_FF1 = 0x8600100b,
|
|
+ RISCVI_TH_REV = 0x8200100b,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_TH_REVW = 0x9000100b,
|
|
+#endif
|
|
+ RISCVI_TH_TSTNBZ = 0x8000100b,
|
|
+
|
|
+ /* XTHeadBs */
|
|
+ RISCVI_TH_TST = 0x8800100b,
|
|
+
|
|
+ /* XTHeadCondMov */
|
|
+ RISCVI_TH_MVEQZ = 0x4000100b,
|
|
+ RISCVI_TH_MVNEZ = 0x4200100b,
|
|
+
|
|
+ /* XTHeadMac */
|
|
+ RISCVI_TH_MULA = 0x2000100b,
|
|
+ RISCVI_TH_MULAH = 0x2800100b,
|
|
+#if LJ_TARGET_RISCV64
|
|
+ RISCVI_TH_MULAW = 0x2400100b,
|
|
+#endif
|
|
+ RISCVI_TH_MULS = 0x2200100b,
|
|
+ RISCVI_TH_MULSH = 0x2a00100b,
|
|
+ RISCVI_TH_MULSW = 0x2600100b,
|
|
+
|
|
+ /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */
|
|
+} RISCVIns;
|
|
+
|
|
+typedef enum RISCVRM {
|
|
+ RISCVRM_RNE = 0,
|
|
+ RISCVRM_RTZ = 1,
|
|
+ RISCVRM_RDN = 2,
|
|
+ RISCVRM_RUP = 3,
|
|
+ RISCVRM_RMM = 4,
|
|
+ RISCVRM_DYN = 7,
|
|
+} RISCVRM;
|
|
+
|
|
+static const uint16_t riscv_fli_map_hi16[32] = {
|
|
+ 0xbff0u, // -1
|
|
+ 0x0010u, // min
|
|
+ 0x3ef0u, // 2^-16
|
|
+ 0x3f00u, // 2^-15
|
|
+ 0x3f70u, // 2^-8
|
|
+ 0x3f80u, // 2^-7
|
|
+ 0x3fb0u, // 2^-4
|
|
+ 0x3fc0u, // 2^-3, 0.125
|
|
+ 0x3fd0u, // 2^-2, 0.25
|
|
+ 0x3fd4u, // 0.3125
|
|
+ 0x3fd8u, // 0.375
|
|
+ 0x3fdcu, // 0.4375
|
|
+ 0x3fe0u, // 0.5
|
|
+ 0x3fe4u, // 0.625
|
|
+ 0x3fe8u, // 0.75
|
|
+ 0x3fecu, // 0.875
|
|
+ 0x3ff0u, // 1
|
|
+ 0x3ff4u, // 1.25
|
|
+ 0x3ff8u, // 1.5
|
|
+ 0x3ffcu, // 1.75
|
|
+ 0x4000u, // 2
|
|
+ 0x4004u, // 2.5
|
|
+ 0x4008u, // 3
|
|
+ 0x4010u, // 4
|
|
+ 0x4020u, // 8
|
|
+ 0x4030u, // 16
|
|
+ 0x4060u, // 128
|
|
+ 0x4070u, // 256
|
|
+ 0x40e0u, // 2^15, 32768
|
|
+ 0x40f0u, // 2^16, 65536
|
|
+ 0x7ff0u, // inf
|
|
+ 0x7ff8u, // canonical nan
|
|
+};
|
|
+
|
|
+#endif
|
|
|
|
From 9eb94f1afc108b216e5161977bdbf8093bf5ecb3 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:21:02 +0800
|
|
Subject: [PATCH 08/22] riscv(ffi): add call convention and support framework
|
|
|
|
---
|
|
src/lj_ccall.c | 156 +++++++++++++++++++++++++++++++-
|
|
src/lj_ccall.h | 17 +++-
|
|
src/lj_ccallback.c | 64 +++++++++++++-
|
|
src/lj_target_riscv.h | 2 +-
|
|
src/vm_riscv64.dasc | 201 ++++++++++++++++++++++++++++++++++++++++++
|
|
5 files changed, 435 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
|
|
index ae69cd28d1..bb4da414f4 100644
|
|
--- a/src/lj_ccall.c
|
|
+++ b/src/lj_ccall.c
|
|
@@ -575,6 +575,97 @@
|
|
goto done; \
|
|
}
|
|
|
|
+#elif LJ_TARGET_RISCV64
|
|
+/* -- RISC-V lp64d calling conventions ------------------------------------ */
|
|
+
|
|
+#define CCALL_HANDLE_STRUCTRET \
|
|
+ /* Return structs of size > 16 by reference. */ \
|
|
+ cc->retref = !(sz <= 16); \
|
|
+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
|
|
+
|
|
+#define CCALL_HANDLE_STRUCTRET2 \
|
|
+ unsigned int cl = ccall_classify_struct(cts, ctr); \
|
|
+ if ((cl & 4) && (cl >> 8) <= 2) { \
|
|
+ CTSize i = (cl >> 8) - 1; \
|
|
+ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
|
|
+ } else { \
|
|
+ if (cl > 1) { \
|
|
+ sp = (uint8_t *)&cc->fpr[0]; \
|
|
+ if ((cl >> 8) > 2) \
|
|
+ sp = (uint8_t *)&cc->gpr[0]; \
|
|
+ } \
|
|
+ memcpy(dp, sp, ctr->size); \
|
|
+ } \
|
|
+
|
|
+#define CCALL_HANDLE_COMPLEXRET \
|
|
+ /* Complex values are returned in 1 or 2 FPRs. */ \
|
|
+ cc->retref = 0;
|
|
+
|
|
+#define CCALL_HANDLE_COMPLEXRET2 \
|
|
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
|
|
+ ((float *)dp)[0] = cc->fpr[0].f; \
|
|
+ ((float *)dp)[1] = cc->fpr[1].f; \
|
|
+ } else { /* Copy complex double from FPRs. */ \
|
|
+ ((double *)dp)[0] = cc->fpr[0].d; \
|
|
+ ((double *)dp)[1] = cc->fpr[1].d; \
|
|
+ }
|
|
+
|
|
+#define CCALL_HANDLE_COMPLEXARG \
|
|
+ /* Pass long double complex by reference. */ \
|
|
+ if (sz == 2*sizeof(long double)) { \
|
|
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
|
|
+ sz = CTSIZE_PTR; \
|
|
+ } \
|
|
+ /* Pass complex in two FPRs or on stack. */ \
|
|
+ else if (sz == 2*sizeof(float)) { \
|
|
+ isfp = 2; \
|
|
+ sz = 2*CTSIZE_PTR; \
|
|
+ } else { \
|
|
+ isfp = 1; \
|
|
+ sz = 2*CTSIZE_PTR; \
|
|
+ }
|
|
+
|
|
+#define CCALL_HANDLE_RET \
|
|
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
|
+ sp = (uint8_t *)&cc->fpr[0].f;
|
|
+
|
|
+#define CCALL_HANDLE_STRUCTARG \
|
|
+ /* Pass structs of size >16 by reference. */ \
|
|
+ unsigned int cl = ccall_classify_struct(cts, d); \
|
|
+ nff = cl >> 8; \
|
|
+ if (sz > 16) { \
|
|
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
|
|
+ sz = CTSIZE_PTR; \
|
|
+ } \
|
|
+ /* Pass struct in FPRs. */ \
|
|
+ if (cl > 1) { \
|
|
+ isfp = (cl & 4) ? 2 : 1; \
|
|
+ }
|
|
+
|
|
+
|
|
+#define CCALL_HANDLE_REGARG \
|
|
+ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \
|
|
+ int n2 = ctype_isvector(d->info) ? 1 : \
|
|
+ isfp == 1 ? n : 2; \
|
|
+ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
|
|
+ dp = &cc->fpr[nfpr]; \
|
|
+ nfpr += n2; \
|
|
+ goto done; \
|
|
+ } else { \
|
|
+ if (ngpr + n2 <= maxgpr) { \
|
|
+ dp = &cc->gpr[ngpr]; \
|
|
+ ngpr += n2; \
|
|
+ goto done; \
|
|
+ } \
|
|
+ } \
|
|
+ } else { /* Try to pass argument in GPRs. */ \
|
|
+ if (ngpr + n <= maxgpr) { \
|
|
+ dp = &cc->gpr[ngpr]; \
|
|
+ ngpr += n; \
|
|
+ goto done; \
|
|
+ } \
|
|
+ }
|
|
+
|
|
#else
|
|
#error "Missing calling convention definitions for this architecture"
|
|
#endif
|
|
@@ -891,6 +982,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
|
|
|
|
#endif
|
|
|
|
+/* -- RISC-V ABI struct classification ---------------------------- */
|
|
+
|
|
+#if LJ_TARGET_RISCV64
|
|
+
|
|
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
|
|
+{
|
|
+ CTSize sz = ct->size;
|
|
+ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
|
|
+ while (ct->sib) {
|
|
+ CType *sct;
|
|
+ ct = ctype_get(cts, ct->sib);
|
|
+ if (ctype_isfield(ct->info)) {
|
|
+ sct = ctype_rawchild(cts, ct);
|
|
+ if (ctype_isfp(sct->info)) {
|
|
+ r |= sct->size;
|
|
+ if (!isu) n++; else if (n == 0) n = 1;
|
|
+ } else if (ctype_iscomplex(sct->info)) {
|
|
+ r |= (sct->size >> 1);
|
|
+ if (!isu) n += 2; else if (n < 2) n = 2;
|
|
+ } else if (ctype_isstruct(sct->info)) {
|
|
+ goto substruct;
|
|
+ } else {
|
|
+ goto noth;
|
|
+ }
|
|
+ } else if (ctype_isbitfield(ct->info)) {
|
|
+ goto noth;
|
|
+ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
|
|
+ sct = ctype_rawchild(cts, ct);
|
|
+ substruct:
|
|
+ if (sct->size > 0) {
|
|
+ unsigned int s = ccall_classify_struct(cts, sct);
|
|
+ if (s <= 1) goto noth;
|
|
+ r |= (s & 255);
|
|
+ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if ((r == 4 || r == 8) && n <= 4)
|
|
+ return r + (n << 8);
|
|
+noth: /* Not a homogeneous float/double aggregate. */
|
|
+ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
/* -- Common C call handling ---------------------------------------------- */
|
|
|
|
/* Infer the destination CTypeID for a vararg argument. */
|
|
@@ -937,6 +1073,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
|
#endif
|
|
#endif
|
|
|
|
+#if LJ_TARGET_RISCV64
|
|
+ int nff = 0;
|
|
+#endif
|
|
+
|
|
/* Clear unused regs to get some determinism in case of misdeclaration. */
|
|
memset(cc->gpr, 0, sizeof(cc->gpr));
|
|
#if CCALL_NUM_FPR
|
|
@@ -1077,7 +1217,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
|
if (isfp && d->size == sizeof(float))
|
|
((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
|
|
#endif
|
|
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
|
|
+#if LJ_TARGET_RISCV64
|
|
+ if (isfp && d->size == sizeof(float))
|
|
+ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */
|
|
+#endif
|
|
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
|
|
if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
|
|
#if LJ_TARGET_MIPS64
|
|
|| (isfp && nsp == 0)
|
|
@@ -1107,6 +1251,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
|
CTSize i = (sz >> 2) - 1;
|
|
do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
|
|
}
|
|
+#elif LJ_TARGET_RISCV64
|
|
+ if (isfp == 2 && nff <= 2) {
|
|
+ /* Split complex float into separate registers. */
|
|
+ CTSize i = (sz >> 2) - 1;
|
|
+ do {
|
|
+ ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
|
|
+ } while (i--);
|
|
+ }
|
|
#else
|
|
UNUSED(isfp);
|
|
#endif
|
|
@@ -1116,7 +1268,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
|
|
if ((int32_t)nsp < 0) nsp = 0;
|
|
#endif
|
|
|
|
-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
|
|
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
|
|
cc->nfpr = nfpr; /* Required for vararg functions. */
|
|
#endif
|
|
cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
|
|
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
|
|
index 3528fca55e..d8c59fd8d8 100644
|
|
--- a/src/lj_ccall.h
|
|
+++ b/src/lj_ccall.h
|
|
@@ -129,6 +129,21 @@ typedef union FPRArg {
|
|
struct { LJ_ENDIAN_LOHI(float f; , float g;) };
|
|
} FPRArg;
|
|
|
|
+#elif LJ_TARGET_RISCV64
|
|
+
|
|
+#define CCALL_NARG_GPR 8
|
|
+#define CCALL_NARG_FPR 8
|
|
+#define CCALL_NRET_GPR 2
|
|
+#define CCALL_NRET_FPR 2
|
|
+#define CCALL_SPS_EXTRA 3
|
|
+#define CCALL_SPS_FREE 1
|
|
+
|
|
+typedef intptr_t GPRArg;
|
|
+typedef union FPRArg {
|
|
+ double d;
|
|
+ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
|
|
+} FPRArg;
|
|
+
|
|
#else
|
|
#error "Missing calling convention definitions for this architecture"
|
|
#endif
|
|
@@ -175,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
|
|
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
|
|
#elif LJ_TARGET_ARM64
|
|
void *retp; /* Aggregate return pointer in x8. */
|
|
-#elif LJ_TARGET_PPC
|
|
+#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
|
|
uint8_t nfpr; /* Number of arguments in FPRs. */
|
|
#endif
|
|
#if LJ_32
|
|
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
|
|
index 52f92932f0..e904c49339 100644
|
|
--- a/src/lj_ccallback.c
|
|
+++ b/src/lj_ccallback.c
|
|
@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
|
|
|
|
#define CALLBACK_MCODE_HEAD 52
|
|
|
|
+#elif LJ_TARGET_RISCV64
|
|
+
|
|
+#define CALLBACK_MCODE_HEAD 68
|
|
+
|
|
#else
|
|
|
|
/* Missing support for this architecture. */
|
|
@@ -238,6 +242,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
|
|
}
|
|
return p;
|
|
}
|
|
+#elif LJ_TARGET_RISCV64
|
|
+static void *callback_mcode_init(global_State *g, uint32_t *page)
|
|
+{
|
|
+ uint32_t *p = page;
|
|
+ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
|
|
+ uintptr_t ug = (uintptr_t)(void *)g;
|
|
+ uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL;
|
|
+ uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL;
|
|
+ MSize slot;
|
|
+ *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi));
|
|
+ *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi));
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi));
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi));
|
|
+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
|
|
+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21);
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21);
|
|
+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
|
|
+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff);
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff);
|
|
+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10);
|
|
+ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10);
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff);
|
|
+ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff);
|
|
+ *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0);
|
|
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
|
|
+ *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot);
|
|
+ *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p));
|
|
+ p++;
|
|
+ }
|
|
+ return p;
|
|
+}
|
|
#else
|
|
/* Missing support for this architecture. */
|
|
#define callback_mcode_init(g, p) (p)
|
|
@@ -512,6 +549,31 @@ void lj_ccallback_mcode_free(CTState *cts)
|
|
}
|
|
#endif
|
|
|
|
+#define CALLBACK_HANDLE_RET \
|
|
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
|
+ ((float *)dp)[1] = *(float *)dp;
|
|
+
|
|
+#elif LJ_TARGET_RISCV64
|
|
+
|
|
+#define CALLBACK_HANDLE_REGARG \
|
|
+ if (isfp) { \
|
|
+ if (nfpr + n <= CCALL_NARG_FPR) { \
|
|
+ sp = &cts->cb.fpr[nfpr]; \
|
|
+ nfpr += n; \
|
|
+ goto done; \
|
|
+ } else if (ngpr + n <= maxgpr) { \
|
|
+ sp = &cts->cb.gpr[ngpr]; \
|
|
+ ngpr += n; \
|
|
+ goto done; \
|
|
+ } \
|
|
+ } else { \
|
|
+ if (ngpr + n <= maxgpr) { \
|
|
+ sp = &cts->cb.gpr[ngpr]; \
|
|
+ ngpr += n; \
|
|
+ goto done; \
|
|
+ } \
|
|
+ }
|
|
+
|
|
#define CALLBACK_HANDLE_RET \
|
|
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
|
|
((float *)dp)[1] = *(float *)dp;
|
|
@@ -662,7 +724,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
|
|
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
|
|
(int32_t)*(int16_t *)dp;
|
|
}
|
|
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
|
|
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
|
|
/* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
|
|
if (ctr->size <= 4 &&
|
|
(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
|
|
diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h
|
|
index a2a907dbad..22948dc5a4 100644
|
|
--- a/src/lj_target_riscv.h
|
|
+++ b/src/lj_target_riscv.h
|
|
@@ -1,6 +1,6 @@
|
|
/*
|
|
** Definitions for RISC-V CPUs.
|
|
-** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
|
|
+** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
|
|
*/
|
|
|
|
#ifndef _LJ_TARGET_RISCV_H
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index c86f94bddb..0a8970a109 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -812,14 +812,29 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| mv BASE, TMP2 // Restore caller BASE.
|
|
| ld LFUNC:TMP1, FRAME_FUNC(TMP2)
|
|
| ld PC, -24(RB) // Restore PC from [cont|PC].
|
|
+ |.if FFI
|
|
+ | sltiu TMP3, TMP0, 2
|
|
+ |.endif
|
|
| cleartp LFUNC:TMP1
|
|
| add TMP2, RA, RD
|
|
| ld TMP1, LFUNC:TMP1->pc
|
|
| sd TISNIL, -8(TMP2) // Ensure one valid arg.
|
|
+ |.if FFI
|
|
+ | bnez TMP3, >1
|
|
+ |.endif
|
|
| // BASE = base, RA = resultptr, RB = meta base
|
|
| ld KBASE, PC2PROTO(k)(TMP1)
|
|
| jr TMP0 // Jump to continuation.
|
|
|
|
|
+ |.if FFI
|
|
+ |1:
|
|
+ | addi TMP1, RB, -32
|
|
+ | bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
|
|
+ | // cont = 0: tailcall from C function.
|
|
+ | sub RC, TMP1, BASE
|
|
+ | j ->vm_call_tail
|
|
+ |.endif
|
|
+ |
|
|
|->cont_cat: // RA = resultptr, RB = meta base
|
|
| lw INS, -4(PC)
|
|
| addi CARG2, RB, -32
|
|
@@ -1019,6 +1034,18 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| // Returns 0/1 or TValue * (metamethod).
|
|
| j <3
|
|
|
|
|
+ |->vmeta_equal_cd:
|
|
+ |.if FFI
|
|
+ | addi PC, PC, -4
|
|
+ | mv CARG1, L
|
|
+ | mv CARG2, INS
|
|
+ | sd BASE, L->base
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op)
|
|
+ | // Returns 0/1 or TValue * (metamethod).
|
|
+ | j <3
|
|
+ |.endif
|
|
+ |
|
|
|->vmeta_istype:
|
|
| addi PC, PC, -4
|
|
| sd BASE, L->base
|
|
@@ -2219,6 +2246,133 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
+ |//-- FFI helper functions -----------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |// Handler for callback functions. Callback slot number in x5, g in x7.
|
|
+ |->vm_ffi_callback:
|
|
+ |.if FFI
|
|
+ |.type CTSTATE, CTState, PC
|
|
+ | saveregs
|
|
+ | ld CTSTATE, GL:x7->ctype_state
|
|
+ | mv GL, x7
|
|
+ | addxi DISPATCH, x7, GG_G2DISP
|
|
+ | srli x5, x5, 12
|
|
+ | sw x5, CTSTATE->cb.slot
|
|
+ | sd CARG1, CTSTATE->cb.gpr[0]
|
|
+ | fsd FARG1, CTSTATE->cb.fpr[0]
|
|
+ | sd CARG2, CTSTATE->cb.gpr[1]
|
|
+ | fsd FARG2, CTSTATE->cb.fpr[1]
|
|
+ | sd CARG3, CTSTATE->cb.gpr[2]
|
|
+ | fsd FARG3, CTSTATE->cb.fpr[2]
|
|
+ | sd CARG4, CTSTATE->cb.gpr[3]
|
|
+ | fsd FARG4, CTSTATE->cb.fpr[3]
|
|
+ | sd CARG5, CTSTATE->cb.gpr[4]
|
|
+ | fsd FARG5, CTSTATE->cb.fpr[4]
|
|
+ | sd CARG6, CTSTATE->cb.gpr[5]
|
|
+ | fsd FARG6, CTSTATE->cb.fpr[5]
|
|
+ | sd CARG7, CTSTATE->cb.gpr[6]
|
|
+ | fsd FARG7, CTSTATE->cb.fpr[6]
|
|
+ | sd CARG8, CTSTATE->cb.gpr[7]
|
|
+ | fsd FARG8, CTSTATE->cb.fpr[7]
|
|
+ | addi TMP0, sp, CFRAME_SPACE
|
|
+ | sd TMP0, CTSTATE->cb.stack
|
|
+ | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok.
|
|
+ | mv CARG1, CTSTATE
|
|
+ | mv CARG2, sp
|
|
+ | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf)
|
|
+ | // Returns lua_State *.
|
|
+ | ld BASE, L:CRET1->base
|
|
+ | ld RC, L:CRET1->top
|
|
+ | mv L, CRET1
|
|
+ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | li TISNIL, LJ_TNIL
|
|
+ | li TISNUM, LJ_TISNUM
|
|
+ | slli TMP3, TMP3, 32
|
|
+ | li_vmstate INTERP
|
|
+ | subw RC, RC, BASE
|
|
+ | cleartp LFUNC:RB
|
|
+ | st_vmstate
|
|
+ | fmv.d.x TOBIT, TMP3
|
|
+ | ins_callt
|
|
+ |.endif
|
|
+ |
|
|
+ |->cont_ffi_callback: // Return from FFI callback.
|
|
+ |.if FFI
|
|
+ | ld CTSTATE, GL->ctype_state
|
|
+ | sd BASE, L->base
|
|
+ | sd RB, L->top
|
|
+ | sd L, CTSTATE->L
|
|
+ | mv CARG1, CTSTATE
|
|
+ | mv CARG2, RA
|
|
+ | // (CTState *cts, TValue *o)
|
|
+ | call_intern cont_ffi_callback, lj_ccallback_leave
|
|
+ | fld FRET1, CTSTATE->cb.fpr[0]
|
|
+ | ld CRET1, CTSTATE->cb.gpr[0]
|
|
+ | fld FRET2, CTSTATE->cb.fpr[1]
|
|
+ | ld CRET2, CTSTATE->cb.gpr[1]
|
|
+ | j ->vm_leave_unw
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_ffi_call: // Call C function via FFI.
|
|
+ | // Caveat: needs special frame unwinding, see below.
|
|
+ |.if FFI
|
|
+ | .type CCSTATE, CCallState, CARG1
|
|
+ | lw TMP1, CCSTATE->spadj
|
|
+ | lbu CARG2, CCSTATE->nsp
|
|
+ | lbu CARG3, CCSTATE->nfpr
|
|
+ | mv TMP2, sp
|
|
+ | sub sp, sp, TMP1
|
|
+ | sd ra, -8(TMP2)
|
|
+ | sd x18, -16(TMP2)
|
|
+ | sd CCSTATE, -24(TMP2)
|
|
+ | mv x18, TMP2
|
|
+ | addi TMP1, CCSTATE, offsetof(CCallState, stack)
|
|
+ | mv TMP2, sp
|
|
+ | add TMP3, TMP1, CARG2
|
|
+ | beqz CARG2, >2
|
|
+ |1:
|
|
+ | ld TMP0, 0(TMP1)
|
|
+ | addi TMP1, TMP1, 8
|
|
+ | sd TMP0, 0(TMP2)
|
|
+ | addi TMP2, TMP2, 8
|
|
+ | bltu TMP1, TMP3, <1
|
|
+ |2:
|
|
+ | beqz CARG3, >3
|
|
+ | fld FARG1, CCSTATE->fpr[0]
|
|
+ | fld FARG2, CCSTATE->fpr[1]
|
|
+ | fld FARG3, CCSTATE->fpr[2]
|
|
+ | fld FARG4, CCSTATE->fpr[3]
|
|
+ | fld FARG5, CCSTATE->fpr[4]
|
|
+ | fld FARG6, CCSTATE->fpr[5]
|
|
+ | fld FARG7, CCSTATE->fpr[6]
|
|
+ | fld FARG8, CCSTATE->fpr[7]
|
|
+ |3:
|
|
+ | ld CFUNCADDR, CCSTATE->func
|
|
+ | ld CARG2, CCSTATE->gpr[1]
|
|
+ | ld CARG3, CCSTATE->gpr[2]
|
|
+ | ld CARG4, CCSTATE->gpr[3]
|
|
+ | ld CARG5, CCSTATE->gpr[4]
|
|
+ | ld CARG6, CCSTATE->gpr[5]
|
|
+ | ld CARG7, CCSTATE->gpr[6]
|
|
+ | ld CARG8, CCSTATE->gpr[7]
|
|
+ | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
|
|
+ | jalr CFUNCADDR
|
|
+ | ld CCSTATE:TMP1, -24(x18)
|
|
+ | ld TMP0, -16(x18)
|
|
+ | ld ra, -8(x18)
|
|
+ | sd CRET1, CCSTATE:TMP1->gpr[0]
|
|
+ | sd CRET2, CCSTATE:TMP1->gpr[1]
|
|
+ | fsd FRET1, CCSTATE:TMP1->fpr[0]
|
|
+ | fsd FRET2, CCSTATE:TMP1->fpr[1]
|
|
+ | mv sp, x18
|
|
+ | mv x18, TMP0
|
|
+ | ret
|
|
+ |.endif
|
|
+ |// Note: vm_ffi_call must be the last function in this object file!
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
}
|
|
|
|
/* Generate the code for a single instruction. */
|
|
@@ -2343,6 +2497,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| beqz TMP0, ->BC_ISNEN_Z
|
|
}
|
|
|// Either or both types are not numbers.
|
|
+ |.if FFI
|
|
+ | // Check if RA or RD is a cdata.
|
|
+ | xori TMP0, CARG3, LJ_TCDATA
|
|
+ | xori TMP1, CARG4, LJ_TCDATA
|
|
+ | and TMP0, TMP0, TMP1
|
|
+ | bxeqz TMP0, ->vmeta_equal_cd
|
|
+ |.endif
|
|
| lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
| decode_BC4b TMP2
|
|
| addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2
|
|
@@ -2395,10 +2556,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| sub RD, KBASE, RD
|
|
| lhu TMP2, -4+OFS_RD(PC)
|
|
| ld CARG2, -8(RD) // KBASE-8-str_const*8
|
|
+ |.if FFI
|
|
+ | gettp CARG3, CARG1
|
|
+ | li TMP1, LJ_TCDATA
|
|
+ |.endif
|
|
| li TMP0, LJ_TSTR
|
|
| decode_BC4b TMP2
|
|
| settp CARG2, TMP0
|
|
| lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
+ |.if FFI
|
|
+ | bxeq CARG3, TMP1, ->vmeta_equal_cd
|
|
+ |.endif
|
|
| xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D
|
|
| addw TMP2, TMP2, TMP3
|
|
if (vk) {
|
|
@@ -2453,7 +2621,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
|
|
|
|4: // RA is not an integer.
|
|
| addw TMP2, TMP2, TMP3
|
|
+ |.if FFI
|
|
+ | bgeu CARG3, TISNUM, >7
|
|
+ |.else
|
|
| bgeu CARG3, TISNUM, <2
|
|
+ |.endif
|
|
| fmv.d.x FTMP0, CARG1
|
|
| fmv.d.x FTMP2, CARG2
|
|
| bne CARG4, TISNUM, >5
|
|
@@ -2466,11 +2638,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| j <1
|
|
|
|
|
|6: // RA is an integer, RD is a number.
|
|
+ |.if FFI
|
|
+ | bgeu CARG4, TISNUM, >8
|
|
+ |.else
|
|
| bgeu CARG4, TISNUM, <2
|
|
+ |.endif
|
|
| fcvt.d.w FTMP0, CARG1
|
|
| fmv.d.x FTMP2, CARG2
|
|
| j <5
|
|
|
|
|
+ |.if FFI
|
|
+ |7: // RA not int, not number
|
|
+ | li TMP0, LJ_TCDATA
|
|
+ | bne CARG3, TMP0, <2
|
|
+ | j ->vmeta_equal_cd
|
|
+ |
|
|
+ |8: // RD not int, not number
|
|
+ | li TMP0, LJ_TCDATA
|
|
+ | bne CARG4, TMP0, <2
|
|
+ | j ->vmeta_equal_cd
|
|
+ |.endif
|
|
break;
|
|
|
|
case BC_ISEQP: case BC_ISNEP:
|
|
@@ -2484,6 +2671,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| gettp TMP1, TMP1
|
|
| addi PC, PC, 4
|
|
| xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D
|
|
+ |.if FFI
|
|
+ | li TMP3, LJ_TCDATA
|
|
+ | bxeq TMP1, TMP3, ->vmeta_equal_cd
|
|
+ |.endif
|
|
| decode_BC4b TMP2
|
|
| lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4
|
|
| addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2
|
|
@@ -2824,6 +3015,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| ins_next
|
|
break;
|
|
case BC_KCDATA:
|
|
+ |.if FFI
|
|
+ | // RA = dst*8, RD = cdata_const*8 (~)
|
|
+ | sub TMP1, KBASE, RD
|
|
+ | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
|
|
+ | li TMP2, LJ_TCDATA
|
|
+ | add RA, BASE, RA
|
|
+ | settp TMP0, TMP2
|
|
+ | sd TMP0, 0(RA)
|
|
+ | ins_next
|
|
+ |.endif
|
|
break;
|
|
case BC_KSHORT:
|
|
| // RA = dst*8, RD = int16_literal*8
|
|
|
|
From e1ce400d1fe432e5eee562ec4d000082e0189a0e Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:27:26 +0800
|
|
Subject: [PATCH 09/22] riscv(support): add extension detection
|
|
|
|
---
|
|
src/lib_jit.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
src/lj_jit.h | 11 +++++
|
|
2 files changed, 134 insertions(+)
|
|
|
|
diff --git a/src/lib_jit.c b/src/lib_jit.c
|
|
index fd8e585b83..e97e4d45e8 100644
|
|
--- a/src/lib_jit.c
|
|
+++ b/src/lib_jit.c
|
|
@@ -631,6 +631,111 @@ JIT_PARAMDEF(JIT_PARAMINIT)
|
|
#include <sys/utsname.h>
|
|
#endif
|
|
|
|
+#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX
|
|
+#include <setjmp.h>
|
|
+#include <signal.h>
|
|
+static sigjmp_buf sigbuf = {0};
|
|
+static void detect_sigill(int sig)
|
|
+{
|
|
+ siglongjmp(sigbuf, 1);
|
|
+}
|
|
+
|
|
+static int riscv_compressed()
|
|
+{
|
|
+#if defined(__riscv_c) || defined(__riscv_compressed)
|
|
+ /* Don't bother checking for RVC -- would crash before getting here. */
|
|
+ return 1;
|
|
+#elif defined(__GNUC__)
|
|
+ /* c.nop; c.nop; */
|
|
+ __asm__(".4byte 0x00010001");
|
|
+ return 1;
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int riscv_zba()
|
|
+{
|
|
+#if defined(__riscv_b) || defined(__riscv_zba)
|
|
+ /* Don't bother checking for Zba -- would crash before getting here. */
|
|
+ return 1;
|
|
+#elif defined(__GNUC__)
|
|
+ /* Don't bother verifying the result, just check if the instruction exists. */
|
|
+ /* add.uw zero, zero, zero */
|
|
+ __asm__(".4byte 0x0800003b");
|
|
+ return 1;
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int riscv_zbb()
|
|
+{
|
|
+#if defined(__riscv_b) || defined(__riscv_zbb)
|
|
+ /* Don't bother checking for Zbb -- would crash before getting here. */
|
|
+ return 1;
|
|
+#elif defined(__GNUC__)
|
|
+ register int t asm ("a0");
|
|
+ /* addi a0, zero, 255; sext.b a0, a0; */
|
|
+ __asm__("addi a0, zero, 255\n\t.4byte 0x60451513");
|
|
+ return t < 0;
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int riscv_zicond()
|
|
+{
|
|
+#if defined(__riscv_zicond)
|
|
+ /* Don't bother checking for Zicond -- would crash before getting here. */
|
|
+ return 1;
|
|
+#elif defined(__GNUC__)
|
|
+ /* czero.eqz zero, zero, zero; */
|
|
+ __asm__(".4byte 0x0e005033");
|
|
+ return 1;
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int riscv_zfa()
|
|
+{
|
|
+#if defined(__riscv_zfa)
|
|
+ /* Don't bother checking for Zfa -- would crash before getting here. */
|
|
+ return 1;
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int riscv_xthead()
|
|
+{
|
|
+#if (defined(__riscv_xtheadba) \
|
|
+ && defined(__riscv_xtheadbb) \
|
|
+ && defined(__riscv_xtheadcondmov) \
|
|
+ && defined(__riscv_xtheadmac))
|
|
+ /* Don't bother checking for XThead -- would crash before getting here. */
|
|
+ return 1;
|
|
+#elif defined(__GNUC__)
|
|
+ register int t asm ("a0");
|
|
+ /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */
|
|
+ /* Therefore assume XThead* are present if XTheadBb is present. */
|
|
+ /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */
|
|
+ __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b");
|
|
+ return t == -1; /* In case of collision with other vendor extensions. */
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static uint32_t riscv_probe(int (*func)(void), uint32_t flag)
|
|
+{
|
|
+ if (sigsetjmp(sigbuf, 1) == 0) {
|
|
+ return func() ? flag : 0;
|
|
+ } else return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
/* Arch-dependent CPU feature detection. */
|
|
static uint32_t jit_cpudetect(void)
|
|
{
|
|
@@ -702,6 +807,24 @@ static uint32_t jit_cpudetect(void)
|
|
}
|
|
#endif
|
|
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#if LJ_HASJIT
|
|
+ /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */
|
|
+ struct sigaction old = {0}, act = {0};
|
|
+ act.sa_handler = detect_sigill;
|
|
+ sigaction(SIGILL, &act, &old);
|
|
+ flags |= riscv_probe(riscv_compressed, JIT_F_RVC);
|
|
+ flags |= riscv_probe(riscv_zba, JIT_F_RVZba);
|
|
+ flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb);
|
|
+ flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond);
|
|
+ flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa);
|
|
+ flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead);
|
|
+ sigaction(SIGILL, &old, NULL);
|
|
+
|
|
+ /* Detect V/P? */
|
|
+ /* V have no hardware available, P not ratified yet. */
|
|
+#endif
|
|
+
|
|
#else
|
|
#error "Missing CPU detection for this architecture"
|
|
#endif
|
|
diff --git a/src/lj_jit.h b/src/lj_jit.h
|
|
index 102ba0b4b7..e61d99ffa9 100644
|
|
--- a/src/lj_jit.h
|
|
+++ b/src/lj_jit.h
|
|
@@ -67,6 +67,17 @@
|
|
#endif
|
|
#endif
|
|
|
|
+#elif LJ_TARGET_RISCV64
|
|
+
|
|
+#define JIT_F_RVC (JIT_F_CPU << 0)
|
|
+#define JIT_F_RVZba (JIT_F_CPU << 1)
|
|
+#define JIT_F_RVZbb (JIT_F_CPU << 2)
|
|
+#define JIT_F_RVZicond (JIT_F_CPU << 3)
|
|
+#define JIT_F_RVZfa (JIT_F_CPU << 4)
|
|
+#define JIT_F_RVXThead (JIT_F_CPU << 5)
|
|
+
|
|
+#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead"
|
|
+
|
|
#else
|
|
|
|
#define JIT_F_CPUSTRING ""
|
|
|
|
From 160c28acb57e27189f439ebb4ad2a2ad07301b36 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:29:41 +0800
|
|
Subject: [PATCH 10/22] riscv(jit): add insn emitter
|
|
|
|
---
|
|
src/lj_emit_riscv.h | 574 ++++++++++++++++++++++++++++++++++++++++++++
|
|
1 file changed, 574 insertions(+)
|
|
create mode 100644 src/lj_emit_riscv.h
|
|
|
|
diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h
|
|
new file mode 100644
|
|
index 0000000000..d4160663e2
|
|
--- /dev/null
|
|
+++ b/src/lj_emit_riscv.h
|
|
@@ -0,0 +1,574 @@
|
|
+/*
|
|
+** RISC-V instruction emitter.
|
|
+** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
|
|
+**
|
|
+** Contributed by gns from PLCT Lab, ISCAS.
|
|
+*/
|
|
+
|
|
+static intptr_t get_k64val(ASMState *as, IRRef ref)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ir->o == IR_KINT64) {
|
|
+ return (intptr_t)ir_kint64(ir)->u64;
|
|
+ } else if (ir->o == IR_KGC) {
|
|
+ return (intptr_t)ir_kgc(ir);
|
|
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
|
|
+ return (intptr_t)ir_kptr(ir);
|
|
+ } else {
|
|
+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
|
|
+ "bad 64 bit const IR op %d", ir->o);
|
|
+ return ir->i; /* Sign-extended. */
|
|
+ }
|
|
+}
|
|
+
|
|
+#define get_kval(as, ref) get_k64val(as, ref)
|
|
+
|
|
+/* -- Emit basic instructions --------------------------------------------- */
|
|
+
|
|
+static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2);
|
|
+}
|
|
+
|
|
+#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0)
|
|
+#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2)
|
|
+#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2)
|
|
+
|
|
+static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3);
|
|
+}
|
|
+
|
|
+#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3)
|
|
+
|
|
+static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff);
|
|
+}
|
|
+
|
|
+#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i)
|
|
+#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i)
|
|
+#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f)
|
|
+
|
|
+static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff);
|
|
+}
|
|
+
|
|
+#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i)
|
|
+
|
|
+/*
|
|
+static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe);
|
|
+}
|
|
+*/
|
|
+
|
|
+static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff);
|
|
+}
|
|
+
|
|
+#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i)
|
|
+
|
|
+/*
|
|
+static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i)
|
|
+{
|
|
+ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe);
|
|
+}
|
|
+*/
|
|
+
|
|
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
|
|
+static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
|
|
+static Reg ra_scratch(ASMState *as, RegSet allow);
|
|
+
|
|
+static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs)
|
|
+{
|
|
+ lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs);
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB:
|
|
+ case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU:
|
|
+ case RISCVI_FLW: case RISCVI_FLD:
|
|
+ emit_dsi(as, riscvi, data, base, ofs);
|
|
+ break;
|
|
+ case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB:
|
|
+ case RISCVI_FSW: case RISCVI_FSD:
|
|
+ emit_s1s2i(as, riscvi, base, data, ofs);
|
|
+ break;
|
|
+ default: lj_assertA(0, "invalid lso"); break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp,
|
|
+ int32_t shamt)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) {
|
|
+ if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) {
|
|
+ case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break;
|
|
+ case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break;
|
|
+ default: lj_assertA(0, "invalid roti op"); break;
|
|
+ }
|
|
+ emit_dsshamt(as, riscvi, rd, rs1, shamt);
|
|
+ } else {
|
|
+ RISCVIns ai, bi;
|
|
+ int32_t shwid, shmsk;
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_RORI:
|
|
+ ai = RISCVI_SRLI, bi = RISCVI_SLLI;
|
|
+ shwid = 64, shmsk = 63;
|
|
+ break;
|
|
+ case RISCVI_RORIW:
|
|
+ ai = RISCVI_SRLIW, bi = RISCVI_SLLIW;
|
|
+ shwid = 32, shmsk = 31;
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "invalid roti op");
|
|
+ return;
|
|
+ }
|
|
+ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
|
|
+ emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk);
|
|
+ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVZbb) {
|
|
+ emit_ds1s2(as, riscvi, rd, rs1, rs2);
|
|
+ } else {
|
|
+ RISCVIns sai, sbi;
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_ROL:
|
|
+ sai = RISCVI_SLL, sbi = RISCVI_SRL;
|
|
+ break;
|
|
+ case RISCVI_ROR:
|
|
+ sai = RISCVI_SRL, sbi = RISCVI_SLL;
|
|
+ break;
|
|
+ case RISCVI_ROLW:
|
|
+ sai = RISCVI_SLLW, sbi = RISCVI_SRLW;
|
|
+ break;
|
|
+ case RISCVI_RORW:
|
|
+ sai = RISCVI_SRLW, sbi = RISCVI_SLLW;
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "invalid rot op");
|
|
+ return;
|
|
+ }
|
|
+ if (rd == rs2) {
|
|
+ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
|
|
+ emit_ds1s2(as, sbi, tmp, rs1, tmp);
|
|
+ emit_ds1s2(as, sai, rd, rs1, rs2);
|
|
+ emit_ds2(as, RISCVI_NEG, tmp, rs2);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
|
|
+ emit_ds1s2(as, sai, rd, rs1, rs2);
|
|
+ emit_ds1s2(as, sbi, tmp, rs1, tmp);
|
|
+ emit_ds2(as, RISCVI_NEG, tmp, rs2);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1)
|
|
+{
|
|
+ if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) ||
|
|
+ (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) {
|
|
+ emit_ds(as, riscvi, rd, rs1);
|
|
+ } else if (as->flags & JIT_F_RVXThead) {
|
|
+ uint32_t hi, sext;
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_ZEXT_B:
|
|
+ case RISCVI_SEXT_W:
|
|
+ emit_ds(as, riscvi, rd, rs1);
|
|
+ return;
|
|
+ case RISCVI_ZEXT_H:
|
|
+ hi = 15, sext = 0;
|
|
+ break;
|
|
+ case RISCVI_ZEXT_W:
|
|
+ hi = 31, sext = 0;
|
|
+ break;
|
|
+ case RISCVI_SEXT_B:
|
|
+ hi = 7, sext = 1;
|
|
+ break;
|
|
+ case RISCVI_SEXT_H:
|
|
+ hi = 15, sext = 1;
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "invalid ext op");
|
|
+ return;
|
|
+ }
|
|
+ emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU,
|
|
+ rd, rs1, hi << 6);
|
|
+ } else {
|
|
+ RISCVIns sli, sri;
|
|
+ int32_t shamt;
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_ZEXT_B:
|
|
+ case RISCVI_SEXT_W:
|
|
+ emit_ds(as, riscvi, rd, rs1);
|
|
+ return;
|
|
+ case RISCVI_ZEXT_H:
|
|
+ sli = RISCVI_SLLI, sri = RISCVI_SRLI;
|
|
+ shamt = 48;
|
|
+ break;
|
|
+ case RISCVI_ZEXT_W:
|
|
+ sli = RISCVI_SLLI, sri = RISCVI_SRLI;
|
|
+ shamt = 32;
|
|
+ break;
|
|
+ case RISCVI_SEXT_B:
|
|
+ sli = RISCVI_SLLI, sri = RISCVI_SRAI;
|
|
+ shamt = 56;
|
|
+ break;
|
|
+ case RISCVI_SEXT_H:
|
|
+ sli = RISCVI_SLLI, sri = RISCVI_SRAI;
|
|
+ shamt = 48;
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "invalid ext op");
|
|
+ return;
|
|
+ }
|
|
+ emit_dsshamt(as, sri, rd, rd, shamt);
|
|
+ emit_dsshamt(as, sli, rd, rs1, shamt);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void emit_cleartp(ASMState *as, Reg rd, Reg rs1)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVXThead) {
|
|
+ emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6);
|
|
+ } else {
|
|
+ emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVZbb) {
|
|
+ emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp);
|
|
+ emit_ds(as, RISCVI_NOT, tmp, rs2);
|
|
+ }
|
|
+}
|
|
+*/
|
|
+
|
|
+/*
|
|
+static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVZbb) {
|
|
+ emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp);
|
|
+ emit_ds(as, RISCVI_NOT, tmp, rs2);
|
|
+ }
|
|
+}
|
|
+*/
|
|
+
|
|
+static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVZbb) {
|
|
+ emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2);
|
|
+ } else {
|
|
+ emit_ds(as, RISCVI_NOT, rd, rd);
|
|
+ emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt)
|
|
+{
|
|
+ if (as->flags & JIT_F_RVZba) {
|
|
+ switch (shamt) {
|
|
+ case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break;
|
|
+ case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break;
|
|
+ case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break;
|
|
+ default: return;
|
|
+ }
|
|
+ } else if (as->flags & JIT_F_RVXThead) {
|
|
+ emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1)
|
|
+#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2)
|
|
+#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3)
|
|
+
|
|
+static void emit_loadk12(ASMState *as, Reg rd, int32_t i)
|
|
+{
|
|
+ emit_di(as, RISCVI_ADDI, rd, i);
|
|
+}
|
|
+
|
|
+static void emit_loadk32(ASMState *as, Reg rd, int32_t i)
|
|
+{
|
|
+ if (checki12((int64_t)i)) {
|
|
+ emit_loadk12(as, rd, i);
|
|
+ } else {
|
|
+ if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0))
|
|
+ emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i));
|
|
+ else
|
|
+ emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i));
|
|
+ emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i));
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Emit loads/stores --------------------------------------------------- */
|
|
+
|
|
+/* Prefer rematerialization of BASE/L from global_State over spills. */
|
|
+#define emit_canremat(ref) ((ref) <= REF_BASE)
|
|
+
|
|
+
|
|
+/* Load a 32 bit constant into a GPR. */
|
|
+#define emit_loadi(as, r, i) emit_loadk32(as, r, i);
|
|
+
|
|
+/* Load a 64 bit constant into a GPR. */
|
|
+static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
|
|
+{
|
|
+ int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2));
|
|
+ if (checki32((int64_t)u64)) {
|
|
+ emit_loadk32(as, r, (int32_t)u64);
|
|
+ } else if (checki32auipc(u64_delta)) {
|
|
+ emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta));
|
|
+ emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta));
|
|
+ } else {
|
|
+ uint32_t lo32 = u64 & 0xfffffffful;
|
|
+ if (checku11(lo32)) {
|
|
+ if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, r, r, 32);
|
|
+ } else {
|
|
+ RISCVIns li_insn[7] = {0};
|
|
+ int shamt = 0, step = 0;
|
|
+ for(int bit = 0; bit < 32; bit++) {
|
|
+ if (lo32 & (1u << bit)) {
|
|
+ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
|
|
+ int inc = bit+10 > 31 ? 31-bit : 10;
|
|
+ bit += inc, shamt = inc+1;
|
|
+ uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1);
|
|
+ uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0);
|
|
+ li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload);
|
|
+ } else shamt++;
|
|
+ }
|
|
+ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
|
|
+
|
|
+ if (step < 6) {
|
|
+ for(int i = 0; i < step; i++)
|
|
+ *--as->mcp = li_insn[i];
|
|
+ } else {
|
|
+ emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, r, r, 10);
|
|
+ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
|
|
+ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ uint32_t hi32 = u64 >> 32;
|
|
+ if (hi32 & 0xfff) emit_loadk32(as, r, hi32);
|
|
+ else emit_du(as, RISCVI_LUI, r, hi32 >> 12);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
|
|
+
|
|
+/* Get/set from constant pointer. */
|
|
+static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow)
|
|
+{
|
|
+ emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0);
|
|
+}
|
|
+
|
|
+/* Load 64 bit IR constant into register. */
|
|
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
|
+{
|
|
+ const uint64_t *k = &ir_k64(ir)->u64;
|
|
+ Reg r64 = r;
|
|
+ if (rset_test(RSET_FPR, r)) {
|
|
+ if (as->flags & JIT_F_RVZfa) {
|
|
+ uint8_t sign = (*k >> 63) & 1;
|
|
+ uint16_t k_hi16 = (*k >> 48) & 0xffff;
|
|
+ uint64_t k_lo48 = *k & 0xffffffffffff;
|
|
+ uint16_t mk_hi16 = k_hi16 & 0x7fff;
|
|
+ if (!k_lo48) {
|
|
+ if (riscv_fli_map_hi16[0] == k_hi16) {
|
|
+ emit_ds(as, RISCVI_FLI_D, r, 0);
|
|
+ return;
|
|
+ }
|
|
+ for (int i = 1; i < 32; i++) {
|
|
+ if (riscv_fli_map_hi16[i] == mk_hi16) {
|
|
+ if (sign)
|
|
+ emit_ds1s2(as, RISCVI_FNEG_D, r, r, r);
|
|
+ emit_ds(as, RISCVI_FLI_D, r, i);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ r64 = RID_TMP;
|
|
+ emit_ds(as, RISCVI_FMV_D_X, r, r64);
|
|
+ }
|
|
+ emit_loadu64(as, r64, *k);
|
|
+}
|
|
+
|
|
+/* Get/set global_State fields. */
|
|
+static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs)
|
|
+{
|
|
+ emit_lso(as, riscvi, r, RID_GL, ofs);
|
|
+}
|
|
+
|
|
+#define emit_getgl(as, r, field) \
|
|
+ emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field))
|
|
+#define emit_setgl(as, r, field) \
|
|
+ emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field))
|
|
+
|
|
+/* Trace number is determined from per-trace exit stubs. */
|
|
+#define emit_setvmstate(as, i) UNUSED(i)
|
|
+
|
|
+/* -- Emit control-flow instructions -------------------------------------- */
|
|
+
|
|
+/* Label for internal jumps. */
|
|
+typedef MCode *MCLabel;
|
|
+
|
|
+/* Return label pointing to current PC. */
|
|
+#define emit_label(as) ((as)->mcp)
|
|
+
|
|
+static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump)
|
|
+{
|
|
+ MCode *p = as->mcp;
|
|
+ ptrdiff_t delta = (char *)target - (char *)(p - 1);
|
|
+ switch (jump) {
|
|
+ case -1:
|
|
+ lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */
|
|
+ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta);
|
|
+ break;
|
|
+ case 0: case 1:
|
|
+ lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */
|
|
+ if (checki13(delta) && !jump) {
|
|
+ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta);
|
|
+ *--p = RISCVI_NOP;
|
|
+ } else {
|
|
+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */
|
|
+ *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "invalid jump type");
|
|
+ break;
|
|
+ }
|
|
+ as->mcp = p;
|
|
+}
|
|
+
|
|
+static void emit_jump(ASMState *as, MCode *target, int jump)
|
|
+{
|
|
+ MCode *p = as->mcp;
|
|
+ ptrdiff_t delta;
|
|
+ switch(jump) {
|
|
+ case -1:
|
|
+ delta = (char *)target - (char *)(p - 1);
|
|
+ lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */
|
|
+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
|
|
+ break;
|
|
+ case 0: case 1:
|
|
+ delta = (char *)target - (char *)(p - 2);
|
|
+ lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */
|
|
+ if (checki21(delta) && !jump) {
|
|
+ *--p = RISCVI_NOP;
|
|
+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
|
|
+ } else {
|
|
+ *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
|
|
+ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "invalid jump type");
|
|
+ break;
|
|
+ }
|
|
+ as->mcp = p;
|
|
+}
|
|
+
|
|
+#define emit_jmp(as, target) emit_jump(as, target, 0)
|
|
+
|
|
+#define emit_mv(as, dst, src) \
|
|
+ emit_ds(as, RISCVI_MV, (dst), (src))
|
|
+
|
|
+static void emit_call(ASMState *as, void *target, int needcfa)
|
|
+{
|
|
+ MCode *p = as->mcp;
|
|
+ ptrdiff_t delta = (char *)target - (char *)(p - 2);
|
|
+ if (checki21(delta)) {
|
|
+ *--p = RISCVI_NOP;
|
|
+ *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta);
|
|
+ } else if (checki32(delta)) {
|
|
+ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
|
|
+ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
|
|
+ needcfa = 1;
|
|
+ } else {
|
|
+ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0);
|
|
+ needcfa = 2;
|
|
+ }
|
|
+ as->mcp = p;
|
|
+ if (needcfa > 1)
|
|
+ ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
|
|
+}
|
|
+
|
|
+/* -- Emit generic operations --------------------------------------------- */
|
|
+
|
|
+/* Generic move between two regs. */
|
|
+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
|
|
+{
|
|
+ if (src < RID_MAX_GPR && dst < RID_MAX_GPR)
|
|
+ emit_mv(as, dst, src);
|
|
+ else if (src < RID_MAX_GPR)
|
|
+ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src);
|
|
+ else if (dst < RID_MAX_GPR)
|
|
+ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src);
|
|
+ else
|
|
+ emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src);
|
|
+}
|
|
+
|
|
+/* Emit an arithmetic operation with a constant operand. */
|
|
+static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src,
|
|
+ Reg tmp, intptr_t k)
|
|
+{
|
|
+ if (checki12(k)) emit_dsi(as, riscvi, dest, src, k);
|
|
+ else {
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_ADDI: riscvi = RISCVI_ADD; break;
|
|
+ case RISCVI_XORI: riscvi = RISCVI_XOR; break;
|
|
+ case RISCVI_ORI: riscvi = RISCVI_OR; break;
|
|
+ case RISCVI_ANDI: riscvi = RISCVI_AND; break;
|
|
+ default: lj_assertA(0, "NYI arithmetic RISCVIns"); return;
|
|
+ }
|
|
+ emit_ds1s2(as, riscvi, dest, src, tmp);
|
|
+ emit_loadu64(as, tmp, (uintptr_t)k);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Generic load of register with base and (small) offset address. */
|
|
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
|
|
+{
|
|
+ if (r < RID_MAX_GPR)
|
|
+ emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs);
|
|
+ else
|
|
+ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs);
|
|
+}
|
|
+
|
|
+/* Generic store of register with base and (small) offset address. */
|
|
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
|
|
+{
|
|
+ if (r < RID_MAX_GPR)
|
|
+ emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs);
|
|
+ else
|
|
+ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs);
|
|
+}
|
|
+
|
|
+/* Add offset to pointer. */
|
|
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
|
+{
|
|
+ if (ofs)
|
|
+ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs);
|
|
+}
|
|
+
|
|
+
|
|
+#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
|
|
|
|
From 83f3cc575e7488ef00c32db6b4aea2a6a0a51237 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:32:53 +0800
|
|
Subject: [PATCH 11/22] riscv(jit): add IR assembler
|
|
|
|
---
|
|
src/lj_asm.c | 4 +
|
|
src/lj_asm_riscv64.h | 2037 ++++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 2041 insertions(+)
|
|
create mode 100644 src/lj_asm_riscv64.h
|
|
|
|
diff --git a/src/lj_asm.c b/src/lj_asm.c
|
|
index fec4351251..d9ac8cf131 100644
|
|
--- a/src/lj_asm.c
|
|
+++ b/src/lj_asm.c
|
|
@@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs)
|
|
#include "lj_emit_ppc.h"
|
|
#elif LJ_TARGET_MIPS
|
|
#include "lj_emit_mips.h"
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#include "lj_emit_riscv.h"
|
|
#else
|
|
#error "Missing instruction emitter for target CPU"
|
|
#endif
|
|
@@ -1708,6 +1710,8 @@ static void asm_loop(ASMState *as)
|
|
#include "lj_asm_ppc.h"
|
|
#elif LJ_TARGET_MIPS
|
|
#include "lj_asm_mips.h"
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#include "lj_asm_riscv64.h"
|
|
#else
|
|
#error "Missing assembler for target CPU"
|
|
#endif
|
|
diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h
|
|
new file mode 100644
|
|
index 0000000000..c40b57eb73
|
|
--- /dev/null
|
|
+++ b/src/lj_asm_riscv64.h
|
|
@@ -0,0 +1,2037 @@
|
|
+/*
|
|
+** RISC-V IR assembler (SSA IR -> machine code).
|
|
+** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
|
|
+**
|
|
+** Contributed by gns from PLCT Lab, ISCAS.
|
|
+*/
|
|
+
|
|
+/* -- Register allocator extensions --------------------------------------- */
|
|
+
|
|
+/* Allocate a register with a hint. */
|
|
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
|
|
+{
|
|
+ Reg r = IR(ref)->r;
|
|
+ if (ra_noreg(r)) {
|
|
+ if (!ra_hashint(r) && !iscrossref(as, ref))
|
|
+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
|
|
+ r = ra_allocref(as, ref, allow);
|
|
+ }
|
|
+ ra_noweak(as, r);
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* Allocate a register or RID_ZERO. */
|
|
+static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
|
|
+{
|
|
+ Reg r = IR(ref)->r;
|
|
+ if (ra_noreg(r)) {
|
|
+ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0)
|
|
+ return RID_ZERO;
|
|
+ r = ra_allocref(as, ref, allow);
|
|
+ } else {
|
|
+ ra_noweak(as, r);
|
|
+ }
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* Allocate two source registers for three-operand instructions. */
|
|
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
|
|
+{
|
|
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
|
+ Reg left = irl->r, right = irr->r;
|
|
+ if (ra_hasreg(left)) {
|
|
+ ra_noweak(as, left);
|
|
+ if (ra_noreg(right))
|
|
+ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
|
|
+ else
|
|
+ ra_noweak(as, right);
|
|
+ } else if (ra_hasreg(right)) {
|
|
+ ra_noweak(as, right);
|
|
+ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
|
|
+ } else if (ra_hashint(right)) {
|
|
+ right = ra_alloc1z(as, ir->op2, allow);
|
|
+ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
|
|
+ } else {
|
|
+ left = ra_alloc1z(as, ir->op1, allow);
|
|
+ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
|
|
+ }
|
|
+ return left | (right << 8);
|
|
+}
|
|
+
|
|
+/* -- Guard handling ------------------------------------------------------ */
|
|
+
|
|
+/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */
|
|
+#define RISCV_SPAREJUMP 4
|
|
+
|
|
+/* Setup spare long-range jump (trampoline?) slots per mcarea. */
|
|
+
|
|
+static void asm_sparejump_setup(ASMState *as)
|
|
+{
|
|
+ MCode *mxp = as->mctop;
|
|
+ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
|
|
+ for (int i = RISCV_SPAREJUMP*2; i--; )
|
|
+ *--mxp = RISCVI_EBREAK;
|
|
+ as->mctop = mxp;
|
|
+ }
|
|
+}
|
|
+
|
|
+static MCode *asm_sparejump_use(MCode *mcarea, MCode *target)
|
|
+{
|
|
+ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
|
|
+ int slot = RISCV_SPAREJUMP;
|
|
+ RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr;
|
|
+ while (slot--) {
|
|
+ mxp -= 2;
|
|
+ ptrdiff_t delta = (char *)target - (char *)mxp;
|
|
+ tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)),
|
|
+ tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
|
|
+ if (mxp[0] == tauipc && mxp[1] == tjalr) {
|
|
+ return mxp;
|
|
+ } else if (mxp[0] == tslot) {
|
|
+ mxp[0] = tauipc, mxp[1] = tjalr;
|
|
+ return mxp;
|
|
+ }
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+/* Setup exit stub after the end of each trace. */
|
|
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
|
|
+{
|
|
+ ExitNo i;
|
|
+ MCode *mxp = as->mctop;
|
|
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
|
|
+ asm_mclimit(as);
|
|
+ for (i = nexits-1; (int32_t)i >= 0; i--)
|
|
+ *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i)));
|
|
+ ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3);
|
|
+ /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */
|
|
+ *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno);
|
|
+ *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP)
|
|
+ | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta));
|
|
+ *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP)
|
|
+ | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta));
|
|
+ *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP);
|
|
+ as->mctop = mxp;
|
|
+}
|
|
+
|
|
+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
|
|
+{
|
|
+ /* Keep this in-sync with exitstub_trace_addr(). */
|
|
+ return as->mctop + exitno + 4;
|
|
+}
|
|
+
|
|
+/* Emit conditional branch to exit for guard. */
|
|
+static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2)
|
|
+{
|
|
+ MCode *target = asm_exitstub_addr(as, as->snapno);
|
|
+ MCode *p = as->mcp;
|
|
+ if (LJ_UNLIKELY(p == as->invmcp)) {
|
|
+ as->loopinv = 1;
|
|
+ as->mcp = ++p;
|
|
+ *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p);
|
|
+ riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */
|
|
+ target = p - 1; /* Patch target later in asm_loop_fixup. */
|
|
+ }
|
|
+ ptrdiff_t delta = (char *)target - (char *)(p - 1);
|
|
+ *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
|
|
+ *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
|
|
+ as->mcp = p;
|
|
+}
|
|
+
|
|
+/* -- Operand fusion ------------------------------------------------------ */
|
|
+
|
|
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
|
|
+#define CONFLICT_SEARCH_LIM 31
|
|
+
|
|
+/* Check if there's no conflicting instruction between curins and ref. */
|
|
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
|
|
+{
|
|
+ IRIns *ir = as->ir;
|
|
+ IRRef i = as->curins;
|
|
+ if (i > ref + CONFLICT_SEARCH_LIM)
|
|
+ return 0; /* Give up, ref is too far away. */
|
|
+ while (--i > ref)
|
|
+ if (ir[i].o == conflict)
|
|
+ return 0; /* Conflict found. */
|
|
+ return 1; /* Ok, no conflict. */
|
|
+}
|
|
+
|
|
+/* Fuse the array base of colocated arrays. */
|
|
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
|
|
+ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
|
|
+ return (int32_t)sizeof(GCtab);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Fuse array/hash/upvalue reference into register+offset operand. */
|
|
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ra_noreg(ir->r)) {
|
|
+ if (ir->o == IR_AREF) {
|
|
+ if (mayfuse(as, ref)) {
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ IRRef tab = IR(ir->op1)->op1;
|
|
+ int32_t ofs = asm_fuseabase(as, tab);
|
|
+ IRRef refa = ofs ? tab : ir->op1;
|
|
+ ofs += 8*IR(ir->op2)->i;
|
|
+ if (checki12(ofs)) {
|
|
+ *ofsp = ofs;
|
|
+ return ra_alloc1(as, refa, allow);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else if (ir->o == IR_HREFK) {
|
|
+ if (mayfuse(as, ref)) {
|
|
+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
|
|
+ if (checki12(ofs)) {
|
|
+ *ofsp = ofs;
|
|
+ return ra_alloc1(as, ir->op1, allow);
|
|
+ }
|
|
+ }
|
|
+ } else if (ir->o == IR_UREFC) {
|
|
+ if (irref_isk(ir->op1)) {
|
|
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
+ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
|
|
+ intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g));
|
|
+ if (checki12(ofs)) {
|
|
+ *ofsp = (int32_t)ofs;
|
|
+ return RID_GL;
|
|
+ }
|
|
+ }
|
|
+ } else if (ir->o == IR_TMPREF) {
|
|
+ *ofsp = (int32_t)offsetof(global_State, tmptv);
|
|
+ return RID_GL;
|
|
+ }
|
|
+ }
|
|
+ *ofsp = 0;
|
|
+ return ra_alloc1(as, ref, allow);
|
|
+}
|
|
+
|
|
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
|
|
+static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref,
|
|
+ RegSet allow, int32_t ofs)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ Reg base;
|
|
+ if (ra_noreg(ir->r) && canfuse(as, ir)) {
|
|
+ intptr_t ofs2;
|
|
+ if (ir->o == IR_ADD) {
|
|
+ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
|
|
+ checki12(ofs2))) {
|
|
+ ref = ir->op1;
|
|
+ ofs = (int32_t)ofs2;
|
|
+ }
|
|
+ } else if (ir->o == IR_STRREF) {
|
|
+ ofs2 = 4096;
|
|
+ lj_assertA(ofs == 0, "bad usage");
|
|
+ ofs = (int32_t)sizeof(GCstr);
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ ofs2 = ofs + get_kval(as, ir->op2);
|
|
+ ref = ir->op1;
|
|
+ } else if (irref_isk(ir->op1)) {
|
|
+ ofs2 = ofs + get_kval(as, ir->op1);
|
|
+ ref = ir->op2;
|
|
+ }
|
|
+ if (!checki12(ofs2)) {
|
|
+ /* NYI: Fuse ADD with constant. */
|
|
+ Reg right, left = ra_alloc2(as, ir, allow);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_lso(as, riscvi, rd, RID_TMP, ofs);
|
|
+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right);
|
|
+ return;
|
|
+ }
|
|
+ ofs = ofs2;
|
|
+ }
|
|
+ }
|
|
+ base = ra_alloc1(as, ref, allow);
|
|
+ emit_lso(as, riscvi, rd, base, ofs);
|
|
+}
|
|
+
|
|
+/* Fuse Integer multiply-accumulate. */
|
|
+
|
|
+static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi)
|
|
+{
|
|
+ IRRef lref = ir->op1, rref = ir->op2;
|
|
+ IRIns *irm;
|
|
+ if (lref != rref &&
|
|
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
|
|
+ ra_noreg(irm->r)) ||
|
|
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
|
|
+ (rref = lref, ra_noreg(irm->r))))) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg add = ra_hintalloc(as, rref, dest, RSET_GPR);
|
|
+ Reg left = ra_alloc2(as, irm,
|
|
+ rset_exclude(rset_exclude(RSET_GPR, dest), add));
|
|
+ Reg right = (left >> 8); left &= 255;
|
|
+ emit_ds1s2(as, riscvi, dest, left, right);
|
|
+ if (dest != add) emit_mv(as, dest, add);
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Fuse FP multiply-add/sub. */
|
|
+
|
|
+static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir)
|
|
+{
|
|
+ IRRef lref = ir->op1, rref = ir->op2;
|
|
+ IRIns *irm;
|
|
+ if ((as->flags & JIT_F_OPT_FMA) &&
|
|
+ lref != rref &&
|
|
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
|
|
+ ra_noreg(irm->r)) ||
|
|
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
|
|
+ (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
|
|
+ Reg left = ra_alloc2(as, irm,
|
|
+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
|
|
+ Reg right = (left >> 8); left &= 255;
|
|
+ emit_ds1s2s3(as, riscvi, dest, left, right, add);
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+/* -- Calls --------------------------------------------------------------- */
|
|
+
|
|
+/* Generate a call to a C function. */
|
|
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
+{
|
|
+ uint32_t n, nargs = CCI_XNARGS(ci);
|
|
+ int32_t ofs = 0;
|
|
+ Reg gpr, fpr = REGARG_FIRSTFPR;
|
|
+ if ((void *)ci->func)
|
|
+ emit_call(as, (void *)ci->func, 1);
|
|
+ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
|
|
+ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
|
|
+ gpr = REGARG_FIRSTGPR;
|
|
+ for (n = 0; n < nargs; n++) { /* Setup args. */
|
|
+ IRRef ref = args[n];
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ref) {
|
|
+ if (irt_isfp(ir->t)) {
|
|
+ if (fpr <= REGARG_LASTFPR) {
|
|
+ lj_assertA(rset_test(as->freeset, fpr),
|
|
+ "reg %d not free", fpr); /* Must have been evicted. */
|
|
+ ra_leftov(as, fpr, ref);
|
|
+ fpr++; if(ci->flags & CCI_VARARG) gpr++;
|
|
+ } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) {
|
|
+ lj_assertA(rset_test(as->freeset, gpr),
|
|
+ "reg %d not free", gpr); /* Must have been evicted. */
|
|
+ ra_leftov(as, gpr, ref);
|
|
+ gpr++;
|
|
+ } else {
|
|
+ Reg r = ra_alloc1(as, ref, RSET_FPR);
|
|
+ emit_spstore(as, ir, r, ofs);
|
|
+ ofs += 8;
|
|
+ }
|
|
+ } else {
|
|
+ if (gpr <= REGARG_LASTGPR) {
|
|
+ lj_assertA(rset_test(as->freeset, gpr),
|
|
+ "reg %d not free", gpr); /* Must have been evicted. */
|
|
+ ra_leftov(as, gpr, ref);
|
|
+ gpr++; if(ci->flags & CCI_VARARG) fpr++;
|
|
+ } else {
|
|
+ Reg r = ra_alloc1z(as, ref, RSET_GPR);
|
|
+ emit_spstore(as, ir, r, ofs);
|
|
+ ofs += 8;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Setup result reg/sp for call. Evict scratch regs. */
|
|
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
+{
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
|
|
+ if (ra_hasreg(ir->r))
|
|
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
|
|
+ if (hiop && ra_hasreg((ir+1)->r))
|
|
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
|
|
+ ra_evictset(as, drop); /* Evictions must be performed first. */
|
|
+ if (ra_used(ir)) {
|
|
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
|
|
+ if (irt_isfp(ir->t)) {
|
|
+ if ((ci->flags & CCI_CASTU64)) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X,
|
|
+ dest, RID_RET);
|
|
+ } else {
|
|
+ ra_destreg(as, ir, RID_FPRET);
|
|
+ }
|
|
+ } else if (hiop) {
|
|
+ ra_destpair(as, ir);
|
|
+ } else {
|
|
+ ra_destreg(as, ir, RID_RET);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_callx(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRRef args[CCI_NARGS_MAX*2];
|
|
+ CCallInfo ci;
|
|
+ IRRef func;
|
|
+ IRIns *irf;
|
|
+ ci.flags = asm_callx_flags(as, ir);
|
|
+ asm_collectargs(as, ir, &ci, args);
|
|
+ asm_setupresult(as, ir, &ci);
|
|
+ func = ir->op2; irf = IR(func);
|
|
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
|
|
+ if (irref_isk(func)) { /* Call to constant address. */
|
|
+ ci.func = (ASMFunction)(void *)get_kval(as, func);
|
|
+ } else { /* Need specific register for indirect calls. */
|
|
+ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
|
|
+ MCode *p = as->mcp;
|
|
+ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r);
|
|
+ if (r == RID_CFUNCADDR)
|
|
+ *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r);
|
|
+ else
|
|
+ *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r);
|
|
+ as->mcp = p;
|
|
+ ci.func = (ASMFunction)(void *)0;
|
|
+ }
|
|
+ asm_gencall(as, &ci, args);
|
|
+}
|
|
+
|
|
+/* -- Returns ------------------------------------------------------------- */
|
|
+
|
|
+/* Return to lower frame. Guard that it goes to the right spot. */
|
|
+static void asm_retf(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
|
+ void *pc = ir_kptr(IR(ir->op2));
|
|
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
|
|
+ as->topslot -= (BCReg)delta;
|
|
+ if ((int32_t)as->topslot < 0) as->topslot = 0;
|
|
+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
|
|
+ emit_setgl(as, base, jit_base);
|
|
+ emit_addptr(as, base, -8*delta);
|
|
+ asm_guard(as, RISCVI_BNE, RID_TMP,
|
|
+ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
|
|
+ emit_lso(as, RISCVI_LD, RID_TMP, base, -8);
|
|
+}
|
|
+
|
|
+/* -- Buffer operations --------------------------------------------------- */
|
|
+
|
|
+#if LJ_HASBUFFER
|
|
+static void asm_bufhdr_write(ASMState *as, Reg sb)
|
|
+{
|
|
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
|
|
+ IRIns irgc;
|
|
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
|
|
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
|
|
+ emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp);
|
|
+ emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
|
|
+ emit_getgl(as, RID_TMP, cur_L);
|
|
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Type conversions ---------------------------------------------------- */
|
|
+
|
|
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
|
|
+{
|
|
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
|
|
+ asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO);
|
|
+ emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left);
|
|
+ emit_ds(as, RISCVI_FCVT_D_W, tmp, dest);
|
|
+ emit_ds(as, RISCVI_FCVT_W_D, dest, left);
|
|
+}
|
|
+
|
|
+static void asm_tobit(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ RegSet allow = RSET_FPR;
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, allow);
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
|
|
+ Reg tmp = ra_scratch(as, rset_clear(allow, right));
|
|
+ emit_ds(as, RISCVI_FMV_X_W, dest, tmp);
|
|
+ emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right);
|
|
+}
|
|
+
|
|
+static void asm_conv(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
|
+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
|
|
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
|
|
+ IRRef lref = ir->op1;
|
|
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
|
|
+ /* Use GPR to pass floating-point arguments */
|
|
+ if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg ftmp = ra_scratch(as, RSET_FPR);
|
|
+ if (stfp) { /* FP to FP conversion. */
|
|
+ emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp);
|
|
+ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S,
|
|
+ ftmp, ra_alloc1(as, lref, RSET_FPR));
|
|
+ } else { /* Integer to FP conversion. */
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ RISCVIns riscvi = irt_isfloat(ir->t) ?
|
|
+ (((IRT_IS64 >> st) & 1) ?
|
|
+ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) :
|
|
+ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) :
|
|
+ (((IRT_IS64 >> st) & 1) ?
|
|
+ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) :
|
|
+ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU));
|
|
+ emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp);
|
|
+ emit_ds(as, riscvi, ftmp, left);
|
|
+ }
|
|
+ } else if (irt_isfp(ir->t)) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ if (stfp) { /* FP to FP conversion. */
|
|
+ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S,
|
|
+ dest, ra_alloc1(as, lref, RSET_FPR));
|
|
+ } else { /* Integer to FP conversion. */
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ RISCVIns riscvi = irt_isfloat(ir->t) ?
|
|
+ (((IRT_IS64 >> st) & 1) ?
|
|
+ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) :
|
|
+ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) :
|
|
+ (((IRT_IS64 >> st) & 1) ?
|
|
+ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) :
|
|
+ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU));
|
|
+ emit_ds(as, riscvi, dest, left);
|
|
+ }
|
|
+ } else if (stfp) { /* FP to integer conversion. */
|
|
+ if (irt_isguard(ir->t)) {
|
|
+ /* Checked conversions are only supported from number to int. */
|
|
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
|
|
+ "bad type for checked CONV");
|
|
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
|
|
+ } else {
|
|
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ RISCVIns riscvi = irt_is64(ir->t) ?
|
|
+ (st == IRT_NUM ?
|
|
+ (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) :
|
|
+ (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) :
|
|
+ (st == IRT_NUM ?
|
|
+ (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) :
|
|
+ (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S));
|
|
+ emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left);
|
|
+ }
|
|
+ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B :
|
|
+ st == IRT_U8 ? RISCVI_ZEXT_B :
|
|
+ st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H;
|
|
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
|
|
+ emit_ext(as, riscvi, dest, left);
|
|
+ } else { /* 32/64 bit integer conversions. */
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ if (irt_is64(ir->t)) {
|
|
+ if (st64) {
|
|
+ /* 64/64 bit no-op (cast)*/
|
|
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
|
|
+ } else { /* 32 to 64 bit sign extension. */
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
|
|
+ emit_ext(as, RISCVI_SEXT_W, dest, left);
|
|
+ } else { /* 32 to 64 bit zero extension. */
|
|
+ emit_ext(as, RISCVI_ZEXT_W, dest, left);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ if (st64 && !(ir->op2 & IRCONV_NONE)) {
|
|
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
|
|
+ ** or a load of the loword from a 64 bit address.
|
|
+ */
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ emit_ext(as, RISCVI_ZEXT_W, dest, left);
|
|
+ } else { /* 32/32 bit no-op (cast). */
|
|
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_strto(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
|
|
+ IRRef args[2];
|
|
+ int32_t ofs = SPOFS_TMP;
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
|
|
+ ra_evictset(as, drop);
|
|
+ if (ir->s) ofs = sps_scale(ir->s);
|
|
+ asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
|
|
+ args[0] = ir->op1; /* GCstr *str */
|
|
+ args[1] = ASMREF_TMP1; /* TValue *n */
|
|
+ asm_gencall(as, ci, args);
|
|
+ /* Store the result to the spill slot or temp slots. */
|
|
+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
|
|
+ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs);
|
|
+}
|
|
+
|
|
+/* -- Memory references --------------------------------------------------- */
|
|
+
|
|
+/* Store tagged value for ref at base+ofs. */
|
|
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
|
|
+{
|
|
+ RegSet allow = rset_exclude(RSET_GPR, base);
|
|
+ IRIns *ir = IR(ref);
|
|
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
|
|
+ "store of IR type %d", irt_type(ir->t));
|
|
+ if (irref_isk(ref)) {
|
|
+ TValue k;
|
|
+ lj_ir_kvalue(as->J->L, &k, ir);
|
|
+ emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
|
|
+ } else {
|
|
+ Reg src = ra_alloc1(as, ref, allow);
|
|
+ rset_clear(allow, src);
|
|
+ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
|
|
+ emit_lso(as, RISCVI_SD, RID_TMP, base, ofs);
|
|
+ if (irt_isinteger(ir->t)) {
|
|
+ if (as->flags & JIT_F_RVZba) {
|
|
+ emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type);
|
|
+ emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src);
|
|
+ }
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Get pointer to TValue. */
|
|
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new
|
|
+{
|
|
+ if ((mode & IRTMPREF_IN1)) {
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
|
|
+ /* Use the number constant itself as a TValue. */
|
|
+ ra_allockreg(as, igcptr(ir_knum(ir)), dest);
|
|
+ return;
|
|
+ }
|
|
+ emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0);
|
|
+ } else {
|
|
+ asm_tvstore64(as, dest, 0, ref);
|
|
+ }
|
|
+ }
|
|
+ /* g->tmptv holds the TValue(s). */
|
|
+ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv));
|
|
+}
|
|
+
|
|
+static void asm_aref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg idx, base;
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ IRRef tab = IR(ir->op1)->op1;
|
|
+ int32_t ofs = asm_fuseabase(as, tab);
|
|
+ IRRef refa = ofs ? tab : ir->op1;
|
|
+ ofs += 8*IR(ir->op2)->i;
|
|
+ if (checki12(ofs)) {
|
|
+ base = ra_alloc1(as, refa, RSET_GPR);
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, base, ofs);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ base = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
|
|
+ emit_sh3add(as, dest, base, idx, RID_TMP);
|
|
+}
|
|
+
|
|
+/* Inlined hash lookup. Specialized for key type and for const keys.
|
|
+** The equivalent C code is:
|
|
+** Node *n = hashkey(t, key);
|
|
+** do {
|
|
+** if (lj_obj_equal(&n->key, key)) return &n->val;
|
|
+** } while ((n = nextnode(n)));
|
|
+** return niltv(L);
|
|
+*/
|
|
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
|
+{
|
|
+ RegSet allow = RSET_GPR;
|
|
+ int destused = ra_used(ir);
|
|
+ Reg dest = ra_dest(as, ir, allow);
|
|
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
|
|
+ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2;
|
|
+ Reg cmp64 = RID_NONE;
|
|
+ IRRef refkey = ir->op2;
|
|
+ IRIns *irkey = IR(refkey);
|
|
+ int isk = irref_isk(refkey);
|
|
+ IRType1 kt = irkey->t;
|
|
+ uint32_t khash;
|
|
+ MCLabel l_end, l_loop, l_next;
|
|
+ rset_clear(allow, tab);
|
|
+ tmp1 = ra_scratch(as, allow);
|
|
+ rset_clear(allow, tmp1);
|
|
+ tmp2 = ra_scratch(as, allow);
|
|
+ rset_clear(allow, tmp2);
|
|
+
|
|
+ if (irt_isnum(kt)) {
|
|
+ key = ra_alloc1(as, refkey, RSET_FPR);
|
|
+ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
|
|
+ } else {
|
|
+ /* Allocate cmp64 register used for 64-bit comparisons */
|
|
+ if (!isk && irt_isaddr(kt)) {
|
|
+ cmp64 = tmp2;
|
|
+ } else {
|
|
+ int64_t k;
|
|
+ if (isk && irt_isaddr(kt)) {
|
|
+ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
|
|
+ } else {
|
|
+ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
|
|
+ k = ~((int64_t)~irt_toitype(kt) << 47);
|
|
+ }
|
|
+ cmp64 = ra_allock(as, k, allow);
|
|
+ rset_clear(allow, cmp64);
|
|
+ }
|
|
+ if (!irt_ispri(kt)) {
|
|
+ key = ra_alloc1(as, refkey, allow);
|
|
+ rset_clear(allow, key);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
|
|
+ l_end = emit_label(as);
|
|
+ int is_lend_exit = 0;
|
|
+ as->invmcp = NULL;
|
|
+ if (merge == IR_NE)
|
|
+ asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO);
|
|
+ else if (destused)
|
|
+ emit_loada(as, dest, niltvg(J2G(as->J)));
|
|
+
|
|
+ /* Follow hash chain until the end. */
|
|
+ l_loop = --as->mcp;
|
|
+ emit_mv(as, dest, tmp1);
|
|
+ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next));
|
|
+ l_next = emit_label(as);
|
|
+
|
|
+ /* Type and value comparison. */
|
|
+ if (merge == IR_EQ) { /* Must match asm_guard(). */
|
|
+ l_end = asm_exitstub_addr(as, as->snapno);
|
|
+ is_lend_exit = 1;
|
|
+ }
|
|
+ if (irt_isnum(kt)) {
|
|
+ emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit);
|
|
+ emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key);
|
|
+ emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, -1);
|
|
+ emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM));
|
|
+ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47);
|
|
+ emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1);
|
|
+ } else {
|
|
+ emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit);
|
|
+ }
|
|
+ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
|
|
+ *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO)
|
|
+ | RISCVF_IMMB((char *)as->mcp-(char *)l_loop);
|
|
+ if (!isk && irt_isaddr(kt)) {
|
|
+ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
|
|
+ emit_ds1s2(as, RISCVI_ADD, tmp2, key, type);
|
|
+ rset_clear(allow, type);
|
|
+ }
|
|
+
|
|
+ /* Load main position relative to tab->node into dest. */
|
|
+ khash = isk ? ir_khash(as, irkey) : 1;
|
|
+ if (khash == 0) {
|
|
+ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node));
|
|
+ } else {
|
|
+ Reg tmphash = tmp1;
|
|
+ if (isk)
|
|
+ tmphash = ra_allock(as, khash, allow);
|
|
+ /* node = tab->node + (idx*32-idx*8) */
|
|
+ emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1);
|
|
+ lj_assertA(sizeof(Node) == 24, "bad Node size");
|
|
+ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1);
|
|
+ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3);
|
|
+ emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5);
|
|
+ emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask
|
|
+ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node));
|
|
+ emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
|
|
+ if (isk) {
|
|
+ /* Nothing to do. */
|
|
+ } else if (irt_isstr(kt)) {
|
|
+ emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
|
|
+ } else { /* Must match with hash*() in lj_tab.c. */
|
|
+ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2);
|
|
+ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f);
|
|
+ emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2);
|
|
+ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f);
|
|
+ emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest);
|
|
+ emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1);
|
|
+ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f);
|
|
+ if (irt_isnum(kt)) {
|
|
+ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1);
|
|
+ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi
|
|
+ emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo
|
|
+ emit_ds(as, RISCVI_FMV_X_D, tmp1, key);
|
|
+ } else {
|
|
+ checkmclim(as);
|
|
+ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi
|
|
+ emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo
|
|
+ emit_ds1s2(as, RISCVI_ADD, tmp1, key, type);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_hrefk(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRIns *kslot = IR(ir->op2);
|
|
+ IRIns *irkey = IR(kslot->op1);
|
|
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
|
|
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
|
|
+ int bigofs = !checki12(kofs);
|
|
+ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
|
|
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ RegSet allow = rset_exclude(RSET_GPR, node);
|
|
+ Reg idx = node;
|
|
+ int64_t k;
|
|
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
|
|
+ if (bigofs) {
|
|
+ idx = dest;
|
|
+ rset_clear(allow, dest);
|
|
+ kofs = (int32_t)offsetof(Node, key);
|
|
+ } else if (ra_hasreg(dest)) {
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, node, ofs);
|
|
+ }
|
|
+ if (irt_ispri(irkey->t)) {
|
|
+ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
|
|
+ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
|
|
+ } else if (irt_isnum(irkey->t)) {
|
|
+ k = (int64_t)ir_knum(irkey)->u64;
|
|
+ } else {
|
|
+ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
|
|
+ }
|
|
+ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow));
|
|
+ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs);
|
|
+ if (bigofs)
|
|
+ emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow));
|
|
+}
|
|
+
|
|
+static void asm_uref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
|
|
+ if (irref_isk(ir->op1) && !guarded) {
|
|
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
|
+ emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR);
|
|
+ } else {
|
|
+ if (guarded)
|
|
+ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO);
|
|
+ if (ir->o == IR_UREFC)
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
|
|
+ else
|
|
+ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v));
|
|
+ if (guarded)
|
|
+ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
|
|
+ if (irref_isk(ir->op1)) {
|
|
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
+ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
|
|
+ emit_loada(as, dest, o);
|
|
+ } else {
|
|
+ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR),
|
|
+ (int32_t)offsetof(GCfuncL, uvptr) +
|
|
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_fref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ UNUSED(as); UNUSED(ir);
|
|
+ lj_assertA(!ra_used(ir), "unfused FREF");
|
|
+}
|
|
+
|
|
+static void asm_strref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ RegSet allow = RSET_GPR;
|
|
+ Reg dest = ra_dest(as, ir, allow);
|
|
+ Reg base = ra_alloc1(as, ir->op1, allow);
|
|
+ IRIns *irr = IR(ir->op2);
|
|
+ int32_t ofs = sizeof(GCstr);
|
|
+ rset_clear(allow, base);
|
|
+ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) {
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i);
|
|
+ } else {
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, dest, ofs);
|
|
+ emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow));
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Loads and stores ---------------------------------------------------- */
|
|
+
|
|
+static RISCVIns asm_fxloadins(IRIns *ir)
|
|
+{
|
|
+ switch (irt_type(ir->t)) {
|
|
+ case IRT_I8: return RISCVI_LB;
|
|
+ case IRT_U8: return RISCVI_LBU;
|
|
+ case IRT_I16: return RISCVI_LH;
|
|
+ case IRT_U16: return RISCVI_LHU;
|
|
+ case IRT_NUM: return RISCVI_FLD;
|
|
+ case IRT_FLOAT: return RISCVI_FLW;
|
|
+ default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW;
|
|
+ }
|
|
+}
|
|
+
|
|
+static RISCVIns asm_fxstoreins(IRIns *ir)
|
|
+{
|
|
+ switch (irt_type(ir->t)) {
|
|
+ case IRT_I8: case IRT_U8: return RISCVI_SB;
|
|
+ case IRT_I16: case IRT_U16: return RISCVI_SH;
|
|
+ case IRT_NUM: return RISCVI_FSD;
|
|
+ case IRT_FLOAT: return RISCVI_FSW;
|
|
+ default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_fload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ RegSet allow = RSET_GPR;
|
|
+ Reg idx, dest = ra_dest(as, ir, allow);
|
|
+ rset_clear(allow, dest);
|
|
+ RISCVIns riscvi = asm_fxloadins(ir);
|
|
+ int32_t ofs;
|
|
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
|
|
+ idx = RID_GL;
|
|
+ ofs = (ir->op2 << 2) - GG_OFS(g);
|
|
+ } else {
|
|
+ idx = ra_alloc1(as, ir->op1, allow);
|
|
+ if (ir->op2 == IRFL_TAB_ARRAY) {
|
|
+ ofs = asm_fuseabase(as, ir->op1);
|
|
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, idx, ofs);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ ofs = field_ofs[ir->op2];
|
|
+ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
|
|
+ }
|
|
+ rset_clear(allow, idx);
|
|
+ emit_lso(as, riscvi, dest, idx, ofs);
|
|
+}
|
|
+
|
|
+static void asm_fstore(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (ir->r != RID_SINK) {
|
|
+ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
|
|
+ IRIns *irf = IR(ir->op1);
|
|
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
|
|
+ int32_t ofs = field_ofs[irf->op2];
|
|
+ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
|
|
+ emit_lso(as, asm_fxstoreins(ir), src, idx, ofs);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_xload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
|
|
+ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
|
|
+ "unaligned XLOAD");
|
|
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
|
|
+}
|
|
+
|
|
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
|
|
+{
|
|
+ if (ir->r != RID_SINK) {
|
|
+ Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
|
|
+ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
|
|
+ rset_exclude(RSET_GPR, src), ofs);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
|
|
+
|
|
+static void asm_ahuvload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = RID_NONE, type = RID_TMP, idx;
|
|
+ RegSet allow = RSET_GPR;
|
|
+ int32_t ofs = 0;
|
|
+ IRType1 t = ir->t;
|
|
+ if (ra_used(ir)) {
|
|
+ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t),
|
|
+ "bad load type %d", irt_type(ir->t));
|
|
+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
|
|
+ rset_clear(allow, dest);
|
|
+ if (irt_isaddr(t)) {
|
|
+ emit_cleartp(as, dest, dest);
|
|
+ } else if (irt_isint(t))
|
|
+ emit_ext(as, RISCVI_SEXT_W, dest, dest);
|
|
+ }
|
|
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
|
|
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
|
|
+ rset_clear(allow, idx);
|
|
+ if (irt_isnum(t)) {
|
|
+ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO);
|
|
+ emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
|
|
+ } else {
|
|
+ asm_guard(as, RISCVI_BNE, type,
|
|
+ ra_allock(as, (int32_t)irt_toitype(t), allow));
|
|
+ }
|
|
+ if (ra_hasreg(dest)) {
|
|
+ if (irt_isnum(t)) {
|
|
+ emit_lso(as, RISCVI_FLD, dest, idx, ofs);
|
|
+ dest = type;
|
|
+ }
|
|
+ } else {
|
|
+ dest = type;
|
|
+ }
|
|
+ emit_dsshamt(as, RISCVI_SRAI, type, dest, 47);
|
|
+ emit_lso(as, RISCVI_LD, dest, idx, ofs);
|
|
+}
|
|
+
|
|
+static void asm_ahustore(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ RegSet allow = RSET_GPR;
|
|
+ Reg idx, src = RID_NONE, type = RID_NONE;
|
|
+ int32_t ofs = 0;
|
|
+ if (ir->r == RID_SINK)
|
|
+ return;
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
|
|
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
|
|
+ emit_lso(as, RISCVI_FSD, src, idx, ofs);
|
|
+ } else {
|
|
+ Reg tmp = RID_TMP;
|
|
+ if (irt_ispri(ir->t)) {
|
|
+ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
|
|
+ rset_clear(allow, tmp);
|
|
+ } else {
|
|
+ src = ra_alloc1(as, ir->op2, allow);
|
|
+ rset_clear(allow, src);
|
|
+ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
|
|
+ rset_clear(allow, type);
|
|
+ }
|
|
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
|
|
+ emit_lso(as, RISCVI_SD, tmp, idx, ofs);
|
|
+ if (ra_hasreg(src)) {
|
|
+ if (irt_isinteger(ir->t)) {
|
|
+ if (as->flags & JIT_F_RVZba) {
|
|
+ emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type);
|
|
+ emit_ext(as, RISCVI_ZEXT_W, tmp, src);
|
|
+ }
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_ADD, tmp, src, type);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_sload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = RID_NONE, type = RID_NONE, base;
|
|
+ RegSet allow = RSET_GPR;
|
|
+ IRType1 t = ir->t;
|
|
+ int32_t ofs = 8*((int32_t)ir->op1-2);
|
|
+ lj_assertA(checki12(ofs), "sload IR operand out of range");
|
|
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
|
|
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
|
|
+ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
|
|
+ "inconsistent SLOAD variant");
|
|
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
|
|
+ dest = ra_scratch(as, RSET_FPR);
|
|
+ asm_tointg(as, ir, dest);
|
|
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
|
|
+ } else if (ra_used(ir)) {
|
|
+ Reg tmp = RID_NONE;
|
|
+ if ((ir->op2 & IRSLOAD_CONVERT))
|
|
+ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
|
|
+ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
|
|
+ "bad SLOAD type %d", irt_type(t));
|
|
+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
|
|
+ rset_clear(allow, dest);
|
|
+ base = ra_alloc1(as, REF_BASE, allow);
|
|
+ rset_clear(allow, base);
|
|
+ if (irt_isaddr(t)) { /* Clear type from pointers. */
|
|
+ emit_cleartp(as, dest, dest);
|
|
+ } else if (ir->op2 & IRSLOAD_CONVERT) {
|
|
+ if (irt_isint(t)) {
|
|
+ emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp);
|
|
+ /* If value is already loaded for type check, move it to FPR. */
|
|
+ if ((ir->op2 & IRSLOAD_TYPECHECK))
|
|
+ emit_ds(as, RISCVI_FMV_D_X, tmp, dest);
|
|
+ else
|
|
+ dest = tmp;
|
|
+ t.irt = IRT_NUM; /* Check for original type. */
|
|
+ } else {
|
|
+ emit_ds(as, RISCVI_FCVT_D_W, dest, tmp);
|
|
+ dest = tmp;
|
|
+ t.irt = IRT_INT; /* Check for original type. */
|
|
+ }
|
|
+ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
|
|
+ /* Sign-extend integers. */
|
|
+ emit_ext(as, RISCVI_SEXT_W, dest, dest);
|
|
+ }
|
|
+ goto dotypecheck;
|
|
+ }
|
|
+ base = ra_alloc1(as, REF_BASE, allow);
|
|
+ rset_clear(allow, base);
|
|
+dotypecheck:
|
|
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
|
|
+ type = dest < RID_MAX_GPR ? dest : RID_TMP;
|
|
+ if (irt_ispri(t)) {
|
|
+ asm_guard(as, RISCVI_BNE, type,
|
|
+ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
|
|
+ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
|
|
+ asm_guard(as, RISCVI_BNE, RID_TMP,
|
|
+ ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
|
|
+ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32);
|
|
+ } else {
|
|
+ if (irt_isnum(t)) {
|
|
+ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO);
|
|
+ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM);
|
|
+ if (ra_hasreg(dest)) {
|
|
+ emit_lso(as, RISCVI_FLD, dest, base, ofs);
|
|
+ }
|
|
+ } else {
|
|
+ asm_guard(as, RISCVI_BNE, RID_TMP,
|
|
+ ra_allock(as, (int32_t)irt_toitype(t), allow));
|
|
+ }
|
|
+ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47);
|
|
+ }
|
|
+ emit_lso(as, RISCVI_LD, type, base, ofs);
|
|
+ } else if (ra_hasreg(dest)) {
|
|
+ emit_lso(as, irt_isnum(t) ? RISCVI_FLD :
|
|
+ irt_isint(t) ? RISCVI_LW : RISCVI_LD,
|
|
+ dest, base, ofs);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Allocations --------------------------------------------------------- */
|
|
+
|
|
+#if LJ_HASFFI
|
|
+static void asm_cnew(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ CTState *cts = ctype_ctsG(J2G(as->J));
|
|
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
|
|
+ CTSize sz;
|
|
+ CTInfo info = lj_ctype_info(cts, id, &sz);
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
|
|
+ IRRef args[4];
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
|
|
+ "bad CNEW/CNEWI operands");
|
|
+
|
|
+ as->gcsteps++;
|
|
+ if (ra_hasreg(ir->r))
|
|
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
|
|
+ ra_evictset(as, drop);
|
|
+ if (ra_used(ir))
|
|
+ ra_destreg(as, ir, RID_RET); /* GCcdata * */
|
|
+
|
|
+ /* Initialize immutable cdata object. */
|
|
+ if (ir->o == IR_CNEWI) {
|
|
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
|
|
+ emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow),
|
|
+ RID_RET, (sizeof(GCcdata)));
|
|
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
|
|
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
|
|
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
|
|
+ args[0] = ASMREF_L; /* lua_State *L */
|
|
+ args[1] = ir->op1; /* CTypeID id */
|
|
+ args[2] = ir->op2; /* CTSize sz */
|
|
+ args[3] = ASMREF_TMP1; /* CTSize align */
|
|
+ asm_gencall(as, ci, args);
|
|
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
|
|
+ emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct)));
|
|
+ emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid)));
|
|
+ emit_loadk12(as, RID_RET+1, ~LJ_TCDATA);
|
|
+ emit_loadk32(as, RID_TMP, id);
|
|
+ args[0] = ASMREF_L; /* lua_State *L */
|
|
+ args[1] = ASMREF_TMP1; /* MSize size */
|
|
+ asm_gencall(as, ci, args);
|
|
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
|
|
+ ra_releasetmp(as, ASMREF_TMP1));
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Write barriers ------------------------------------------------------ */
|
|
+
|
|
+static void asm_tbar(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
|
|
+ Reg link = RID_TMP;
|
|
+ MCLabel l_end = emit_label(as);
|
|
+ emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist));
|
|
+ emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
|
|
+ emit_setgl(as, tab, gc.grayagain); // make tab gray again
|
|
+ emit_getgl(as, link, gc.grayagain);
|
|
+ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: not jump
|
|
+ emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray
|
|
+ emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
|
|
+ emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked)));
|
|
+}
|
|
+
|
|
+static void asm_obar(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
|
|
+ IRRef args[2];
|
|
+ MCLabel l_end;
|
|
+ Reg obj, val, tmp;
|
|
+ /* No need for other object barriers (yet). */
|
|
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue
|
|
+ ra_evictset(as, RSET_SCRATCH);
|
|
+ l_end = emit_label(as);
|
|
+ args[0] = ASMREF_TMP1; /* global_State *g */
|
|
+ args[1] = ir->op1; /* TValue *tv */
|
|
+ asm_gencall(as, ci, args);
|
|
+ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
|
|
+ obj = IR(ir->op1)->r;
|
|
+ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
|
|
+ emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, -1);
|
|
+ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: jump
|
|
+ emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK);
|
|
+ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
|
|
+ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
|
|
+ emit_lso(as, RISCVI_LBU, tmp, obj,
|
|
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)));
|
|
+ emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked)));
|
|
+}
|
|
+
|
|
+/* -- Arithmetic and logic operations ------------------------------------- */
|
|
+
|
|
+static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_ds1s2(as, riscvi, dest, left, right);
|
|
+}
|
|
+
|
|
+static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
|
|
+ switch(riscvi) {
|
|
+ case RISCVI_FROUND_S_RTZ: case RISCVI_FROUND_S_RDN: case RISCVI_FROUND_S_RUP:
|
|
+ case RISCVI_FROUND_D_RTZ: case RISCVI_FROUND_D_RDN: case RISCVI_FROUND_D_RUP:
|
|
+ case RISCVI_FSQRT_S: case RISCVI_FSQRT_D:
|
|
+ emit_ds(as, riscvi, dest, left);
|
|
+ break;
|
|
+ case RISCVI_FMV_S: case RISCVI_FMV_D:
|
|
+ case RISCVI_FABS_S: case RISCVI_FABS_D:
|
|
+ case RISCVI_FNEG_S: case RISCVI_FNEG_D:
|
|
+ emit_ds1s2(as, riscvi, dest, left, left);
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "bad fp unary instruction");
|
|
+ return;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_fpround(ASMState *as, IRIns *ir, RISCVIns riscvi)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
|
|
+ MCLabel l_end = emit_label(as);
|
|
+
|
|
+ if (dest != left) {
|
|
+ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, dest, left);
|
|
+ emit_ds(as, RISCVI_FCVT_D_L, dest, RID_TMP);
|
|
+ } else {
|
|
+ Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
|
|
+ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, ftmp, left);
|
|
+ emit_ds(as, RISCVI_FCVT_D_L, ftmp, RID_TMP);
|
|
+ }
|
|
+ emit_ds(as, riscvi, RID_TMP, left);
|
|
+ emit_branch(as, RISCVI_BLT, RID_ZERO, RID_TMP, l_end, -1);
|
|
+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1075);
|
|
+ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, 0x7ff);
|
|
+ emit_dsi(as, RISCVI_SRLI, RID_TMP, RID_TMP, 52);
|
|
+ if (dest != left)
|
|
+ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left);
|
|
+ emit_ds(as, RISCVI_FMV_X_D, RID_TMP, left);
|
|
+}
|
|
+
|
|
+static void asm_fpmath(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
|
|
+ if (fpm <= IRFPM_TRUNC)
|
|
+ if (as->flags & JIT_F_RVZfa) {
|
|
+ asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FROUND_D_RDN :
|
|
+ fpm == IRFPM_CEIL ? RISCVI_FROUND_D_RUP : RISCVI_FROUND_D_RTZ);
|
|
+ } else {
|
|
+ asm_fpround(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RDN) :
|
|
+ fpm == IRFPM_CEIL ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RUP) :
|
|
+ RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RTZ));
|
|
+ }
|
|
+ else if (fpm == IRFPM_SQRT)
|
|
+ asm_fpunary(as, ir, RISCVI_FSQRT_D);
|
|
+ else
|
|
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
|
|
+}
|
|
+
|
|
+static void asm_add(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRType1 t = ir->t;
|
|
+ if (irt_isnum(t)) {
|
|
+ if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D))
|
|
+ asm_fparith(as, ir, RISCVI_FADD_D);
|
|
+ return;
|
|
+ } else {
|
|
+ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA))
|
|
+ return;
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ intptr_t k = get_kval(as, ir->op2);
|
|
+ if (checki12(k)) {
|
|
+ if (irt_is64(t)) {
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, left, k);
|
|
+ } else {
|
|
+ emit_dsi(as, RISCVI_ADDIW, dest, left, k);
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest,
|
|
+ left, right);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_sub(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D))
|
|
+ asm_fparith(as, ir, RISCVI_FSUB_D);
|
|
+ return;
|
|
+ } else {
|
|
+ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS))
|
|
+ return;
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest,
|
|
+ left, right);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_mul(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ asm_fparith(as, ir, RISCVI_FMUL_D);
|
|
+ } else {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest,
|
|
+ left, right);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_fpdiv(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ asm_fparith(as, ir, RISCVI_FDIV_D);
|
|
+}
|
|
+
|
|
+static void asm_neg(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ asm_fpunary(as, ir, RISCVI_FNEG_D);
|
|
+ } else {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest,
|
|
+ RID_ZERO, left);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D)
|
|
+
|
|
+static void asm_arithov(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
|
|
+ lj_assertA(!irt_is64(ir->t), "bad usage");
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ int k = IR(ir->op2)->i;
|
|
+ if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
|
|
+ if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */
|
|
+ left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left);
|
|
+ emit_dsi(as, RISCVI_ADDI, dest, left, k);
|
|
+ if (dest == left) emit_mv(as, RID_TMP, left);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ left = ra_alloc2(as, ir, RSET_GPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
|
|
+ right), dest));
|
|
+ asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO);
|
|
+ emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp);
|
|
+ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */
|
|
+ emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
|
|
+ } else { /* ((dest^left) & (dest^~right)) < 0 */
|
|
+ emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right);
|
|
+ }
|
|
+ emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left);
|
|
+ emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right);
|
|
+ if (dest == left || dest == right)
|
|
+ emit_mv(as, RID_TMP, dest == left ? left : right);
|
|
+}
|
|
+
|
|
+#define asm_addov(as, ir) asm_arithov(as, ir)
|
|
+#define asm_subov(as, ir) asm_arithov(as, ir)
|
|
+
|
|
+static void asm_mulov(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ asm_guard(as, RISCVI_BNE, dest, RID_TMP);
|
|
+ emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend
|
|
+ emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0]
|
|
+}
|
|
+
|
|
+static void asm_bnot(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
|
|
+ IRIns *irl = IR(ir->op1);
|
|
+ if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) {
|
|
+ left = ra_alloc2(as, irl, RSET_GPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_ds1s2(as, RISCVI_XNOR, dest, left, right);
|
|
+ } else {
|
|
+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ emit_ds(as, RISCVI_NOT, dest, left);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_bswap(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left);
|
|
+ if (as->flags & JIT_F_RVZbb) {
|
|
+ if (!irt_is64(ir->t))
|
|
+ emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32);
|
|
+ emit_ds(as, RISCVI_REV8, dest, left);
|
|
+ } else if (as->flags & JIT_F_RVXThead) {
|
|
+ emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW,
|
|
+ dest, left);
|
|
+ } else if (irt_is64(ir->t)) {
|
|
+ Reg tmp1, tmp2, tmp3, tmp4;
|
|
+ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1);
|
|
+ tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2);
|
|
+ tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3);
|
|
+ tmp4 = ra_scratch(as, allow);
|
|
+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4);
|
|
+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3);
|
|
+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, dest, left, 56);
|
|
+ emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3);
|
|
+ emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24);
|
|
+ emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24);
|
|
+ emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2);
|
|
+ emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1);
|
|
+ emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24);
|
|
+ emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24);
|
|
+ emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1);
|
|
+ emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8);
|
|
+ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24);
|
|
+ emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3);
|
|
+ emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u));
|
|
+ emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP);
|
|
+ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56);
|
|
+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00));
|
|
+ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u));
|
|
+ emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40);
|
|
+ } else {
|
|
+ Reg tmp1, tmp2;
|
|
+ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1);
|
|
+ tmp2 = ra_scratch(as, allow);
|
|
+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2);
|
|
+ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1);
|
|
+ emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8);
|
|
+ emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24);
|
|
+ emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2);
|
|
+ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP);
|
|
+ emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP);
|
|
+ emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24);
|
|
+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00));
|
|
+ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u));
|
|
+ emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left, right;
|
|
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ intptr_t k = get_kval(as, ir->op2);
|
|
+ if (checki12(k)) {
|
|
+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ emit_dsi(as, riscvik, dest, left, k);
|
|
+ return;
|
|
+ }
|
|
+ } else if (as->flags & JIT_F_RVZbb) {
|
|
+ if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) {
|
|
+ left = ra_alloc1(as, irl->op1, RSET_GPR);
|
|
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ emit_ds1s2(as, riscvin, dest, right, left);
|
|
+ return;
|
|
+ } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) {
|
|
+ left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
|
|
+ emit_ds1s2(as, riscvin, dest, left, right);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ emit_ds1s2(as, riscvi, dest, left, right);
|
|
+}
|
|
+
|
|
+#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN)
|
|
+#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN)
|
|
+#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR)
|
|
+
|
|
+static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ uint32_t shmsk = irt_is64(ir->t) ? 63 : 31;
|
|
+ if (irref_isk(ir->op2)) { /* Constant shifts. */
|
|
+ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk);
|
|
+ switch (riscvik) {
|
|
+ case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI:
|
|
+ case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW:
|
|
+ emit_dsshamt(as, riscvik, dest, left, shift);
|
|
+ break;
|
|
+ case RISCVI_ADDI: shift = (-shift) & shmsk;
|
|
+ case RISCVI_RORI:
|
|
+ emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift);
|
|
+ break;
|
|
+ case RISCVI_ADDIW: shift = (-shift) & shmsk;
|
|
+ case RISCVI_RORIW:
|
|
+ emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift);
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "bad shift instruction");
|
|
+ return;
|
|
+ }
|
|
+ } else {
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ switch (riscvi) {
|
|
+ case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL:
|
|
+ case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW:
|
|
+ emit_ds1s2(as, riscvi, dest, left, right);
|
|
+ break;
|
|
+ case RISCVI_ROR: case RISCVI_ROL:
|
|
+ case RISCVI_RORW: case RISCVI_ROLW:
|
|
+ emit_rot(as, riscvi, dest, left, right, RID_TMP);
|
|
+ break;
|
|
+ default:
|
|
+ lj_assertA(0, "bad shift instruction");
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_bshl(as, ir) (irt_is64(ir->t) ? \
|
|
+ asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \
|
|
+ asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW))
|
|
+#define asm_bshr(as, ir) (irt_is64(ir->t) ? \
|
|
+ asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \
|
|
+ asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW))
|
|
+#define asm_bsar(as, ir) (irt_is64(ir->t) ? \
|
|
+ asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \
|
|
+ asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW))
|
|
+#define asm_brol(as, ir) (irt_is64(ir->t) ? \
|
|
+ asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \
|
|
+ asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW))
|
|
+ // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works.
|
|
+#define asm_bror(as, ir) (irt_is64(ir->t) ? \
|
|
+ asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \
|
|
+ asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW))
|
|
+
|
|
+static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
|
|
+{
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ MCLabel l_ret_left, l_end;
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ l_end = emit_label(as);
|
|
+
|
|
+ if (dest != left)
|
|
+ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left);
|
|
+ l_ret_left = emit_label(as);
|
|
+
|
|
+ if (dest != left)
|
|
+ emit_jump(as, l_end, -1);
|
|
+ if (dest != right)
|
|
+ emit_ds1s2(as, RISCVI_FMV_D, dest, right, right);
|
|
+
|
|
+ emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, -1);
|
|
+ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left,
|
|
+ ismax ? left : right);
|
|
+ } else {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ if (as->flags & JIT_F_RVZbb) {
|
|
+ emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right);
|
|
+ } else {
|
|
+ if (as->flags & JIT_F_RVXThead) {
|
|
+ if (left == right) {
|
|
+ if (dest != left) emit_mv(as, dest, left);
|
|
+ } else {
|
|
+ if (dest == left) {
|
|
+ emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP);
|
|
+ if (dest != right) emit_mv(as, dest, right);
|
|
+ }
|
|
+ }
|
|
+ } else if (as->flags & JIT_F_RVZicond) {
|
|
+ emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP);
|
|
+ if (dest != right) {
|
|
+ emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP);
|
|
+ emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP);
|
|
+ emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP);
|
|
+ }
|
|
+ } else {
|
|
+ if (dest != right) {
|
|
+ emit_ds1s2(as, RISCVI_XOR, dest, right, dest);
|
|
+ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP);
|
|
+ emit_ds1s2(as, RISCVI_XOR, dest, right, left);
|
|
+ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1);
|
|
+ } else {
|
|
+ emit_ds1s2(as, RISCVI_XOR, dest, left, dest);
|
|
+ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP);
|
|
+ emit_ds1s2(as, RISCVI_XOR, dest, left, right);
|
|
+ emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP);
|
|
+ }
|
|
+ }
|
|
+ emit_ds1s2(as, RISCVI_SLT, RID_TMP,
|
|
+ ismax ? left : right, ismax ? right : left);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_min(as, ir) asm_min_max(as, ir, 0)
|
|
+#define asm_max(as, ir) asm_min_max(as, ir, 1)
|
|
+
|
|
+/* -- Comparisons --------------------------------------------------------- */
|
|
+
|
|
+/* FP comparisons. */
|
|
+static void asm_fpcomp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IROp op = ir->o;
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ asm_guard(as, (op < IR_EQ ? (op&4) : (op&1))
|
|
+ ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO);
|
|
+ switch (op) {
|
|
+ case IR_LT: case IR_UGE:
|
|
+ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right);
|
|
+ break;
|
|
+ case IR_LE: case IR_UGT: case IR_ABC:
|
|
+ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right);
|
|
+ break;
|
|
+ case IR_GT: case IR_ULE:
|
|
+ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left);
|
|
+ break;
|
|
+ case IR_GE: case IR_ULT:
|
|
+ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left);
|
|
+ break;
|
|
+ case IR_EQ: case IR_NE:
|
|
+ emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right);
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Integer comparisons. */
|
|
+static void asm_intcomp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
|
|
+ /* 00 01 10 11 100 101 110 111 */
|
|
+ IROp op = ir->o;
|
|
+ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ if (op == IR_ABC) op = IR_UGT;
|
|
+ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
|
|
+ switch (op) {
|
|
+ case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break;
|
|
+ case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break;
|
|
+ case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break;
|
|
+ case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break;
|
|
+ default: break;
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ intptr_t k = get_kval(as, ir->op2);
|
|
+ if ((op&2)) k++;
|
|
+ if (checki12(k)) {
|
|
+ asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO);
|
|
+ emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1),
|
|
+ (op&2) ? right : left, (op&2) ? left : right);
|
|
+}
|
|
+
|
|
+static void asm_comp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (irt_isnum(ir->t))
|
|
+ asm_fpcomp(as, ir);
|
|
+ else
|
|
+ asm_intcomp(as, ir);
|
|
+}
|
|
+
|
|
+static void asm_equal(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ asm_fpcomp(as, ir);
|
|
+ } else {
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_GPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Split register ops -------------------------------------------------- */
|
|
+
|
|
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
|
|
+static void asm_hiop(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ /* HIOP is marked as a store because it needs its own DCE logic. */
|
|
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
|
|
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
|
|
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
|
|
+ switch ((ir-1)->o) {
|
|
+ case IR_CALLN:
|
|
+ case IR_CALLL:
|
|
+ case IR_CALLS:
|
|
+ case IR_CALLXS:
|
|
+ if (!uselo)
|
|
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
|
|
+ break;
|
|
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Profiling ----------------------------------------------------------- */
|
|
+
|
|
+static void asm_prof(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ UNUSED(ir);
|
|
+ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO);
|
|
+ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
|
|
+ emit_lsglptr(as, RISCVI_LBU, RID_TMP,
|
|
+ (int32_t)offsetof(global_State, hookmask));
|
|
+}
|
|
+
|
|
+/* -- Stack handling ------------------------------------------------------ */
|
|
+
|
|
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
|
|
+static void asm_stack_check(ASMState *as, BCReg topslot,
|
|
+ IRIns *irp, RegSet allow, ExitNo exitno)
|
|
+{
|
|
+ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */
|
|
+ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
|
|
+ ExitNo oldsnap = as->snapno;
|
|
+ rset_clear(allow, pbase);
|
|
+ as->snapno = exitno;
|
|
+ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO);
|
|
+ as->snapno = oldsnap;
|
|
+ if (allow) {
|
|
+ tmp = rset_pickbot(allow);
|
|
+ ra_modified(as, tmp);
|
|
+ } else { // allow == RSET_EMPTY
|
|
+ tmp = RID_RET;
|
|
+ emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */
|
|
+ }
|
|
+ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
|
|
+ emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase);
|
|
+ emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack));
|
|
+ if (pbase == RID_TMP)
|
|
+ emit_getgl(as, RID_TMP, jit_base);
|
|
+ emit_getgl(as, tmp, cur_L);
|
|
+ if (allow == RSET_EMPTY) /* Spill temp register. */
|
|
+ emit_lso(as, RISCVI_SD, tmp, RID_SP, 0);
|
|
+}
|
|
+
|
|
+/* Restore Lua stack from on-trace state. */
|
|
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
|
+{
|
|
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
|
+#ifdef LUA_USE_ASSERT
|
|
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
|
|
+#endif
|
|
+ MSize n, nent = snap->nent;
|
|
+ /* Store the value of all modified slots to the Lua stack. */
|
|
+ for (n = 0; n < nent; n++) {
|
|
+ SnapEntry sn = map[n];
|
|
+ BCReg s = snap_slot(sn);
|
|
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
|
|
+ IRRef ref = snap_ref(sn);
|
|
+ IRIns *ir = IR(ref);
|
|
+ if ((sn & SNAP_NORESTORE))
|
|
+ continue;
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
|
|
+ emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs);
|
|
+ } else {
|
|
+ if ((sn & SNAP_KEYINDEX)) {
|
|
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
|
|
+ int64_t kki = (int64_t)LJ_KEYINDEX << 32;
|
|
+ if (irref_isk(ref)) {
|
|
+ emit_lso(as, RISCVI_SD,
|
|
+ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
|
|
+ RID_BASE, ofs);
|
|
+ } else {
|
|
+ Reg src = ra_alloc1(as, ref, allow);
|
|
+ Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
|
|
+ emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs);
|
|
+ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki);
|
|
+ }
|
|
+ } else {
|
|
+ asm_tvstore64(as, RID_BASE, ofs, ref);
|
|
+ }
|
|
+ }
|
|
+ checkmclim(as);
|
|
+ }
|
|
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
|
|
+}
|
|
+
|
|
+/* -- GC handling --------------------------------------------------------- */
|
|
+
|
|
+/* Marker to prevent patching the GC check exit. */
|
|
+#define RISCV_NOPATCH_GC_CHECK \
|
|
+ (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP))
|
|
+
|
|
+/* Check GC threshold and do one or more GC steps. */
|
|
+static void asm_gc_check(ASMState *as)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
|
|
+ IRRef args[2];
|
|
+ MCLabel l_end;
|
|
+ Reg tmp;
|
|
+ ra_evictset(as, RSET_SCRATCH);
|
|
+ l_end = emit_label(as);
|
|
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
|
|
+ asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */
|
|
+ *--as->mcp = RISCV_NOPATCH_GC_CHECK;
|
|
+ args[0] = ASMREF_TMP1; /* global_State *g */
|
|
+ args[1] = ASMREF_TMP2; /* MSize steps */
|
|
+ asm_gencall(as, ci, args);
|
|
+ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
|
|
+ tmp = ra_releasetmp(as, ASMREF_TMP2);
|
|
+ emit_loadi(as, tmp, as->gcsteps);
|
|
+ /* Jump around GC step if GC total < GC threshold. */
|
|
+ emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, -1);
|
|
+ emit_getgl(as, tmp, gc.threshold);
|
|
+ emit_getgl(as, RID_TMP, gc.total);
|
|
+ as->gcsteps = 0;
|
|
+ checkmclim(as);
|
|
+}
|
|
+
|
|
+/* -- Loop handling ------------------------------------------------------- */
|
|
+
|
|
+/* Fixup the loop branch. */
|
|
+static void asm_loop_fixup(ASMState *as)
|
|
+{
|
|
+ MCode *p = as->mctop;
|
|
+ MCode *target = as->mcp;
|
|
+ ptrdiff_t delta;
|
|
+ if (as->loopinv) { /* Inverted loop branch? */
|
|
+ delta = (char *)target - (char *)(p - 2);
|
|
+ /* asm_guard* already inverted the branch, and patched the final b. */
|
|
+ lj_assertA(checki21(delta), "branch target out of range");
|
|
+ p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta);
|
|
+ } else {
|
|
+ /* J */
|
|
+ delta = (char *)target - (char *)(p - 1);
|
|
+ p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Fixup the tail of the loop. */
|
|
+static void asm_loop_tail_fixup(ASMState *as)
|
|
+{
|
|
+ UNUSED(as); /* Nothing to do(?) */
|
|
+}
|
|
+
|
|
+/* -- Head of trace ------------------------------------------------------- */
|
|
+
|
|
+/* Coalesce BASE register for a root trace. */
|
|
+static void asm_head_root_base(ASMState *as)
|
|
+{
|
|
+ IRIns *ir = IR(REF_BASE);
|
|
+ Reg r = ir->r;
|
|
+ if (ra_hasreg(r)) {
|
|
+ ra_free(as, r);
|
|
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
|
+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
|
+ if (r != RID_BASE)
|
|
+ emit_mv(as, r, RID_BASE);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Coalesce BASE register for a side trace. */
|
|
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
|
|
+{
|
|
+ IRIns *ir = IR(REF_BASE);
|
|
+ Reg r = ir->r;
|
|
+ if (ra_hasreg(r)) {
|
|
+ ra_free(as, r);
|
|
+ if (rset_test(as->modset, r) || irt_ismarked(ir->t))
|
|
+ ir->r = RID_INIT; /* No inheritance for modified BASE register. */
|
|
+ if (irp->r == r) {
|
|
+ return r; /* Same BASE register already coalesced. */
|
|
+ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
|
|
+ emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */
|
|
+ return irp->r;
|
|
+ } else {
|
|
+ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
|
|
+ }
|
|
+ }
|
|
+ return RID_NONE;
|
|
+}
|
|
+
|
|
+/* -- Tail of trace ------------------------------------------------------- */
|
|
+
|
|
+/* Fixup the tail code. */
|
|
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
|
+{
|
|
+ MCode *p = as->mctop;
|
|
+ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
|
|
+ int32_t spadj = as->T->spadjust;
|
|
+ if (spadj == 0) {
|
|
+ p[-3] = RISCVI_NOP;
|
|
+ // as->mctop = p-2;
|
|
+ } else {
|
|
+ /* Patch stack adjustment. */
|
|
+ p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj);
|
|
+ }
|
|
+ /* Patch exit jump. */
|
|
+ ptrdiff_t delta = (char *)target - (char *)(p - 2);
|
|
+ p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
|
|
+ p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
|
|
+}
|
|
+
|
|
+/* Prepare tail of code. */
|
|
+static void asm_tail_prep(ASMState *as)
|
|
+{
|
|
+ MCode *p = as->mctop - 2; /* Leave room for exitstub. */
|
|
+ if (as->loopref) {
|
|
+ as->invmcp = as->mcp = p;
|
|
+ } else {
|
|
+ as->mcp = p-1; /* Leave room for stack pointer adjustment. */
|
|
+ as->invmcp = NULL;
|
|
+ }
|
|
+ p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */
|
|
+}
|
|
+
|
|
+/* -- Trace setup --------------------------------------------------------- */
|
|
+
|
|
+/* Ensure there are enough stack slots for call arguments. */
|
|
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
+{
|
|
+ IRRef args[CCI_NARGS_MAX*2];
|
|
+ uint32_t i, nargs = CCI_XNARGS(ci);
|
|
+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
|
|
+ asm_collectargs(as, ir, ci, args);
|
|
+ for (i = 0; i < nargs; i++) {
|
|
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
|
|
+ if (nfpr > 0) {
|
|
+ nfpr--; if(ci->flags & CCI_VARARG) ngpr--;
|
|
+ } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--;
|
|
+ else nslots += 2;
|
|
+ } else {
|
|
+ if (ngpr > 0) {
|
|
+ ngpr--; if(ci->flags & CCI_VARARG) nfpr--;
|
|
+ } else nslots += 2;
|
|
+ }
|
|
+ }
|
|
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
|
|
+ as->evenspill = nslots;
|
|
+ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
|
|
+}
|
|
+
|
|
+static void asm_setup_target(ASMState *as)
|
|
+{
|
|
+ asm_sparejump_setup(as);
|
|
+ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
|
|
+}
|
|
+
|
|
+/* -- Trace patching ------------------------------------------------------ */
|
|
+
|
|
+/* Patch exit jumps of existing machine code to a new target. */
|
|
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
|
+{
|
|
+ MCode *p = T->mcode;
|
|
+ MCode *pe = (MCode *)((char *)p + T->szmcode);
|
|
+ MCode *px = exitstub_trace_addr(T, exitno);
|
|
+ MCode *cstart = NULL;
|
|
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
|
|
+
|
|
+ for (; p < pe; p++) {
|
|
+ /* Look for exitstub branch, replace with branch to target. */
|
|
+ ptrdiff_t odelta = (char *)px - (char *)(p+1),
|
|
+ ndelta = (char *)target - (char *)(p+1);
|
|
+ if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 &&
|
|
+ ((p[0] & 0x0000007fu) == 0x63u) &&
|
|
+ ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 &&
|
|
+ ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) ||
|
|
+ (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 &&
|
|
+ ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) {
|
|
+ lj_assertJ(checki32(ndelta), "branch target out of range");
|
|
+ /* Patch jump, if within range. */
|
|
+ patchbranch:
|
|
+ if (checki21(ndelta)) { /* Patch jump */
|
|
+ p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta);
|
|
+ if (!cstart) cstart = p + 1;
|
|
+ } else { /* Branch out of range. Use spare jump slot in mcarea. */
|
|
+ MCode *mcjump = asm_sparejump_use(mcarea, target);
|
|
+ if (mcjump) {
|
|
+ lj_mcode_sync(mcjump, mcjump+2);
|
|
+ ndelta = (char *)mcjump - (char *)(p+1);
|
|
+ if (checki21(ndelta)) {
|
|
+ goto patchbranch;
|
|
+ } else {
|
|
+ lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
|
|
+ }
|
|
+ }
|
|
+ /* Ignore jump slot overflow. Child trace is simply not attached. */
|
|
+ }
|
|
+ } else if (p+2 == pe) {
|
|
+ if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) {
|
|
+ ptrdiff_t delta = (char *)target - (char *)p;
|
|
+ lj_assertJ(checki32(delta), "jump target out of range");
|
|
+ p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
|
|
+ p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
|
|
+ if (!cstart) cstart = p;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (cstart) lj_mcode_sync(cstart, px+1);
|
|
+ lj_mcode_patch(J, mcarea, 1);
|
|
+}
|
|
|
|
From 843a01e1586bed915ef0ab0e2a20d515a2d6674b Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:36:58 +0800
|
|
Subject: [PATCH 12/22] riscv(interp): add VM builder support
|
|
|
|
---
|
|
src/host/buildvm.c | 2 ++
|
|
src/host/buildvm_asm.c | 31 +++++++++++++++++++++++++++++++
|
|
2 files changed, 33 insertions(+)
|
|
|
|
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
|
|
index 24db75f40b..6d9bfd6b9e 100644
|
|
--- a/src/host/buildvm.c
|
|
+++ b/src/host/buildvm.c
|
|
@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
|
|
#include "../dynasm/dasm_ppc.h"
|
|
#elif LJ_TARGET_MIPS
|
|
#include "../dynasm/dasm_mips.h"
|
|
+#elif LJ_TARGET_RISCV64
|
|
+#include "../dynasm/dasm_riscv.h"
|
|
#else
|
|
#error "No support for this architecture (yet)"
|
|
#endif
|
|
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
|
|
index 3870b8fe5e..e1ef296f31 100644
|
|
--- a/src/host/buildvm_asm.c
|
|
+++ b/src/host/buildvm_asm.c
|
|
@@ -156,6 +156,34 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
|
|
"Error: unsupported opcode %08x for %s symbol relocation.\n",
|
|
ins, sym);
|
|
exit(1);
|
|
+#elif LJ_TARGET_RISCV64
|
|
+ if ((ins & 0x7f) == 0x17u) {
|
|
+ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym);
|
|
+ } else if ((ins & 0x7f) == 0x67u) {
|
|
+ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym);
|
|
+ } else if ((ins & 0x7f) == 0x6fu) {
|
|
+ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym);
|
|
+ } else if ((ins & 0x7f) == 0x03u) {
|
|
+ uint8_t funct3 = (ins >> 12) & 7;
|
|
+ uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31;
|
|
+ switch (funct3) {
|
|
+ case 0: fprintf(ctx->fp, "\tlb"); break;
|
|
+ case 1: fprintf(ctx->fp, "\tlh"); break;
|
|
+ case 2: fprintf(ctx->fp, "\tlw"); break;
|
|
+ case 3: fprintf(ctx->fp, "\tld"); break;
|
|
+ case 4: fprintf(ctx->fp, "\tlbu"); break;
|
|
+ case 5: fprintf(ctx->fp, "\tlhu"); break;
|
|
+ case 6: fprintf(ctx->fp, "\tlwu"); break;
|
|
+ default: goto rv_reloc_err;
|
|
+ }
|
|
+ fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1);
|
|
+ } else {
|
|
+rv_reloc_err:
|
|
+ fprintf(stderr,
|
|
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
|
|
+ ins, sym);
|
|
+ exit(1);
|
|
+ }
|
|
#else
|
|
#error "missing relocation support for this architecture"
|
|
#endif
|
|
@@ -248,6 +276,9 @@ void emit_asm(BuildCtx *ctx)
|
|
#endif
|
|
#if LJ_TARGET_MIPS
|
|
fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
|
|
+#endif
|
|
+#if LJ_TARGET_RISCV64
|
|
+ fprintf(ctx->fp, ".option arch, -c\n.option norelax\n");
|
|
#endif
|
|
emit_asm_align(ctx, 4);
|
|
|
|
|
|
From a4def325033641166e3db1d15754360b3310fe74 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:38:50 +0800
|
|
Subject: [PATCH 13/22] riscv(misc): add bytecode listing support
|
|
|
|
---
|
|
src/jit/bcsave.lua | 1 +
|
|
1 file changed, 1 insertion(+)
|
|
|
|
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
|
|
index a30a34b6be..05b1ae4261 100644
|
|
--- a/src/jit/bcsave.lua
|
|
+++ b/src/jit/bcsave.lua
|
|
@@ -101,6 +101,7 @@ local map_arch = {
|
|
mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
|
|
mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
|
|
mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
|
|
+ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, },
|
|
}
|
|
|
|
local map_os = {
|
|
|
|
From bd6f9fcdb109fa37e69d5b4be4b97320fb9bf608 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:40:51 +0800
|
|
Subject: [PATCH 14/22] riscv(jit): add hooks in interpreter
|
|
|
|
---
|
|
src/vm_riscv64.dasc | 387 ++++++++++++++++++++++++++++++++++++++++++++
|
|
1 file changed, 387 insertions(+)
|
|
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index 0a8970a109..ee45afef99 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -449,6 +449,24 @@
|
|
|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
|
|
|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro
|
|
|
|
|
+|.macro hotcheck, delta, target
|
|
+| srli TMP1, PC, 1
|
|
+| andi TMP1, TMP1, 126
|
|
+| add TMP1, TMP1, DISPATCH
|
|
+| lhu TMP2, GG_DISP2HOT(TMP1)
|
|
+| addiw TMP2, TMP2, -delta
|
|
+| sh TMP2, GG_DISP2HOT(TMP1)
|
|
+| bxltz TMP2, target
|
|
+|.endmacro
|
|
+|
|
|
+|.macro hotloop
|
|
+| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
|
|
+|.endmacro
|
|
+|
|
|
+|.macro hotcall
|
|
+| hotcheck HOTCOUNT_CALL, ->vm_hotcall
|
|
+|.endmacro
|
|
+|
|
|
|// Move table write barrier back. Overwrites mark and tmp.
|
|
|.macro barrierback, tab, mark, tmp, target
|
|
| ld tmp, GL->gc.grayagain
|
|
@@ -1146,8 +1164,15 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| sd PC, SAVE_PC(sp)
|
|
| mv MULTRES, INS
|
|
| call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base)
|
|
+ |.if JIT
|
|
+ | decode_OP1 TMP0, MULTRES
|
|
+ | li TMP1, BC_JFORI
|
|
+ |.endif
|
|
| decode_RA8 RA, MULTRES
|
|
| decode_RD8 RD, MULTRES
|
|
+ |.if JIT
|
|
+ | bxeq TMP0, TMP1, =>BC_JFORI
|
|
+ |.endif
|
|
| j =>BC_FORI
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
@@ -2142,6 +2167,20 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|//-----------------------------------------------------------------------
|
|
|
|
|
|->vm_record: // Dispatch target for recording phase.
|
|
+ |.if JIT
|
|
+ | lbu TMP3, GL->hookmask
|
|
+ | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent.
|
|
+ | bnez TMP1, >5
|
|
+ | // Decrement the hookcount for consistency, but always do the call.
|
|
+ | lw TMP2, GL->hookcount
|
|
+ | andi TMP1, TMP3, HOOK_ACTIVE
|
|
+ | bnez TMP1, >1
|
|
+ | addiw TMP2, TMP2, -1
|
|
+ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
|
|
+ | beqz TMP1, >1
|
|
+ | sw TMP2, GL->hookcount
|
|
+ | j >1
|
|
+ |.endif
|
|
|
|
|
|->vm_rethook: // Dispatch target for return hooks.
|
|
| lbu TMP3, GL->hookmask
|
|
@@ -2187,11 +2226,103 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| lw MULTRES, -24(RB) // Restore MULTRES for *M ins.
|
|
| j <4
|
|
|
|
|
+ |->vm_hotloop: // Hot loop counter underflow.
|
|
+ |.if JIT
|
|
+ | ld LFUNC:TMP1, FRAME_FUNC(BASE)
|
|
+ | addi CARG1, GL, GG_G2J
|
|
+ | cleartp LFUNC:TMP1
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | ld TMP1, LFUNC:TMP1->pc
|
|
+ | mv CARG2, PC
|
|
+ | sd L, (offsetof(jit_State, L))(CARG1)
|
|
+ | lbu TMP1, PC2PROTO(framesize)(TMP1)
|
|
+ | sd BASE, L->base
|
|
+ | slli TMP1, TMP1, 3
|
|
+ | add TMP1, BASE, TMP1
|
|
+ | sd TMP1, L->top
|
|
+ | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc)
|
|
+ | j <3
|
|
+ |.endif
|
|
+ |
|
|
|
|
|
|->vm_callhook: // Dispatch target for call hooks.
|
|
| mv CARG2, PC
|
|
+ |.if JIT
|
|
+ | j >1
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_hotcall: // Hot call counter underflow.
|
|
+ |.if JIT
|
|
+ | ori CARG2, PC, 1
|
|
+ |1:
|
|
+ |.endif
|
|
+ | add TMP0, BASE, RC
|
|
+ | sd PC, SAVE_PC(sp)
|
|
+ | sd BASE, L->base
|
|
+ | sub RA, RA, BASE
|
|
+ | sd TMP0, L->top
|
|
+ | mv CARG1, L
|
|
+ | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc)
|
|
+ | // Returns ASMFunction.
|
|
+ | ld BASE, L->base
|
|
+ | ld TMP0, L->top
|
|
+ | sd x0, SAVE_PC(sp) // Invalidate for subsequent line hook.
|
|
+ | add RA, BASE, RA
|
|
+ | sub NARGS8:RC, TMP0, BASE
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | cleartp LFUNC:RB
|
|
+ | lw INS, -4(PC)
|
|
+ | jr CRET1
|
|
|
|
|
|->cont_stitch: // Trace stitching.
|
|
+ |.if JIT
|
|
+ | // RA = resultptr, RB = meta base
|
|
+ | lw INS, -4(PC)
|
|
+ | ld TRACE:TMP2, -40(RB) // Save previous trace.
|
|
+ | decode_RA8 RC, INS
|
|
+ | addi TMP1, MULTRES, -8
|
|
+ | cleartp TRACE:TMP2
|
|
+ | add RC, BASE, RC // Call base.
|
|
+ | beqz TMP1, >2
|
|
+ |1: // Move results down.
|
|
+ | ld CARG1, 0(RA)
|
|
+ | addi TMP1, TMP1, -8
|
|
+ | addi RA, RA, 8
|
|
+ | sd CARG1, 0(RC)
|
|
+ | addi RC, RC, 8
|
|
+ | bnez TMP1, <1
|
|
+ |2:
|
|
+ | decode_RA8 RA, INS
|
|
+ | decode_RB8 RB, INS
|
|
+ | add RA, RA, RB
|
|
+ | add RA, BASE, RA
|
|
+ |3:
|
|
+ | bltu RC, RA, >8 // More results wanted?
|
|
+ |
|
|
+ | lhu TMP3, TRACE:TMP2->traceno
|
|
+ | lhu RD, TRACE:TMP2->link
|
|
+ | bxeq RD, TMP3, ->cont_nop // Blacklisted.
|
|
+ | slliw RD, RD, 3
|
|
+ | bxnez RD, =>BC_JLOOP // Jump to stitched trace.
|
|
+ |
|
|
+ | // Stitch a new trace to the previous trace.
|
|
+ | addi CARG1, GL, GG_G2J
|
|
+ | // addi CARG2, CARG1, 1 // We don't care what's on the verge.
|
|
+ | addi CARG2, CARG1, 2047 // jit_State too large.
|
|
+ | sw TMP3, (offsetof(jit_State, exitno)-2047)(CARG2)
|
|
+ | sd L, (offsetof(jit_State, L)-2047)(CARG2)
|
|
+ | sd BASE, L->base
|
|
+ | mv CARG2, PC
|
|
+ | // (jit_State *J, const BCIns *pc)
|
|
+ | call_intern cont_stitch, lj_dispatch_stitch
|
|
+ | ld BASE, L->base
|
|
+ | j ->cont_nop
|
|
+ |
|
|
+ |8:
|
|
+ | sd TISNIL, 0(RC)
|
|
+ | addi RC, RC, 8
|
|
+ | j <3
|
|
+ |.endif
|
|
|
|
|
|->vm_profhook: // Dispatch target for profiler hook.
|
|
#if LJ_HASPROFILE
|
|
@@ -2206,6 +2337,149 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| ld BASE, L->base
|
|
| j ->cont_nop
|
|
#endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Trace exit handler -------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |.macro savex_, a, b
|
|
+ | fsd f..a, a*8(sp)
|
|
+ | fsd f..b, b*8(sp)
|
|
+ | sd x..a, 32*8+a*8(sp)
|
|
+ | sd x..b, 32*8+b*8(sp)
|
|
+ |.endmacro
|
|
+ |
|
|
+ |->vm_exit_handler:
|
|
+ |.if JIT
|
|
+ | addi sp, sp, -(32*8+32*8)
|
|
+ | savex_ 0, 5
|
|
+ | savex_ 6, 7
|
|
+ | savex_ 8, 9
|
|
+ | savex_ 10, 11
|
|
+ | savex_ 12, 13
|
|
+ | savex_ 14, 15
|
|
+ | savex_ 16, 17
|
|
+ | savex_ 18, 19
|
|
+ | savex_ 20, 21
|
|
+ | savex_ 22, 23
|
|
+ | savex_ 24, 25
|
|
+ | savex_ 26, 27
|
|
+ | savex_ 28, 29
|
|
+ | savex_ 30, 31
|
|
+ | fsd f1, 1*8(sp)
|
|
+ | fsd f2, 2*8(sp)
|
|
+ | fsd f3, 3*8(sp)
|
|
+ | fsd f4, 4*8(sp)
|
|
+ | sd x0, 32*8+1*8(sp) // Clear RID_TMP.
|
|
+ | ld TMP1, 32*8+32*8(sp) // Load exit pc.
|
|
+ | addi TMP2, sp, 32*8+32*8 // Recompute original value of sp.
|
|
+ | addxi DISPATCH, GL, GG_G2DISP
|
|
+ | sd TMP2, 32*8+2*8(sp) // Store sp in RID_SP
|
|
+ | addi CARG1, GL, GG_G2J
|
|
+ | li_vmstate EXIT
|
|
+ | // addi CARG2, CARG1, 1 // We don't care what's on the verge.
|
|
+ | addi CARG2, CARG1, 2047 // jit_State too large.
|
|
+ | sub TMP1, TMP1, ra
|
|
+ | lw TMP2, 0(ra) // Load trace number.
|
|
+ | st_vmstate
|
|
+ | srli TMP1, TMP1, 2
|
|
+ | ld L, GL->cur_L
|
|
+ | ld BASE, GL->jit_base
|
|
+ | srli TMP2, TMP2, 12
|
|
+ | addi TMP1, TMP1, -2
|
|
+ | sd L, (offsetof(jit_State, L)-2047)(CARG2)
|
|
+ | sw TMP2, (offsetof(jit_State, parent)-2047)(CARG2) // Store trace number.
|
|
+ | sd BASE, L->base
|
|
+ | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number.
|
|
+ | sd x0, GL->jit_base
|
|
+ | mv CARG2, sp
|
|
+ | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex)
|
|
+ | // Returns MULTRES (unscaled) or negated error code.
|
|
+ | ld TMP1, L->cframe
|
|
+ | ld BASE, L->base
|
|
+ | andi sp, TMP1, CFRAME_RAWMASK
|
|
+ | ld PC, SAVE_PC(sp) // Get SAVE_PC.
|
|
+ | sd L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield).
|
|
+ | j >1
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_exit_interp:
|
|
+ |.if JIT
|
|
+ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
|
|
+ | ld L, SAVE_L(sp)
|
|
+ | addxi DISPATCH, GL, GG_G2DISP
|
|
+ | sd BASE, L->base
|
|
+ |1:
|
|
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
+ | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit.
|
|
+ | beqz TMP0, >9
|
|
+ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double).
|
|
+ | slli MULTRES, CRET1, 3
|
|
+ | cleartp LFUNC:RB
|
|
+ | sw MULTRES, TMPD(sp)
|
|
+ | li TISNIL, LJ_TNIL
|
|
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
|
|
+ | slli TMP3, TMP3, 32
|
|
+ | ld TMP1, LFUNC:RB->pc
|
|
+ | sd x0, GL->jit_base
|
|
+ | ld KBASE, PC2PROTO(k)(TMP1)
|
|
+ | fmv.d.x TOBIT, TMP3
|
|
+ | // Modified copy of ins_next which handles function header dispatch, too.
|
|
+ | lw INS, 0(PC)
|
|
+ | addi PC, PC, 4
|
|
+ | addiw CRET1, CRET1, 17 // Static dispatch?
|
|
+ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
|
|
+ | sw TISNIL, GL->vmstate
|
|
+ | decode_RD8a RD, INS
|
|
+ | beqz CRET1, >5
|
|
+ | decode_OP8 TMP1, INS
|
|
+ | add TMP0, DISPATCH, TMP1
|
|
+ | sltiu TMP2, TMP1, BC_FUNCF*8
|
|
+ | ld TMP3, 0(TMP0)
|
|
+ | decode_RA8 RA, INS
|
|
+ | beqz TMP2, >2
|
|
+ | decode_RD8b RD
|
|
+ | jr TMP3
|
|
+ |2:
|
|
+ | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
|
|
+ | ld TMP1, FRAME_PC(BASE)
|
|
+ | bnez TMP2, >3
|
|
+ | // Check frame below fast function.
|
|
+ | andi TMP0, TMP1, FRAME_TYPE
|
|
+ | bnez TMP0, >3 // Trace stitching continuation?
|
|
+ | // Otherwise set KBASE for Lua function below fast function.
|
|
+ | lw TMP2, -4(TMP1)
|
|
+ | decode_RA8 TMP0, TMP2
|
|
+ | sub TMP1, BASE, TMP0
|
|
+ | ld LFUNC:TMP2, -32(TMP1)
|
|
+ | cleartp LFUNC:TMP2
|
|
+ | ld TMP1, LFUNC:TMP2->pc
|
|
+ | ld KBASE, PC2PROTO(k)(TMP1)
|
|
+ |3:
|
|
+ | addi RC, MULTRES, -8
|
|
+ | add RA, RA, BASE
|
|
+ | jr TMP3
|
|
+ |
|
|
+ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
|
|
+ | ld TMP0, GL_J(trace)(GL)
|
|
+ | decode_RD8b RD
|
|
+ | add TMP0, TMP0, RD
|
|
+ | ld TRACE:TMP2, 0(TMP0)
|
|
+ | lw INS, TRACE:TMP2->startins
|
|
+ | decode_OP8 TMP1, INS
|
|
+ | add TMP0, DISPATCH, TMP1
|
|
+ | decode_RD8a RD, INS
|
|
+ | ld TMP3, GG_DISP2STATIC(TMP0)
|
|
+ | decode_RA8a RA, INS
|
|
+ | decode_RD8b RD
|
|
+ | decode_RA8b RA
|
|
+ | jr TMP3
|
|
+ |
|
|
+ |9: // Rethrow error from the right C frame.
|
|
+ | negw CARG2, CRET1
|
|
+ | mv CARG1, L
|
|
+ | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode)
|
|
+ |.endif
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
|//-- Math helper functions ----------------------------------------------
|
|
@@ -2232,6 +2506,10 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| vm_round rdn
|
|
|->vm_ceil:
|
|
| vm_round rup
|
|
+ |->vm_trunc:
|
|
+ |.if JIT
|
|
+ | vm_round rtz
|
|
+ |.endif
|
|
|
|
|
|
|
|
|//-----------------------------------------------------------------------
|
|
@@ -2245,6 +2523,67 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| ret
|
|
|.endif
|
|
|
|
|
+ |.define NEXT_TAB, TAB:CARG1
|
|
+ |.define NEXT_IDX, CARG2
|
|
+ |.define NEXT_ASIZE, CARG3
|
|
+ |.define NEXT_NIL, CARG4
|
|
+ |.define NEXT_TMP0, TMP0
|
|
+ |.define NEXT_TMP1, TMP1
|
|
+ |.define NEXT_TMP2, TMP2
|
|
+ |.define NEXT_RES_VK, CRET1
|
|
+ |.define NEXT_RES_IDX, CRET2
|
|
+ |.define NEXT_RES_PTR, sp
|
|
+ |.define NEXT_RES_VAL, 0(sp)
|
|
+ |.define NEXT_RES_KEY, 8(sp)
|
|
+ |
|
|
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
|
|
+ |// Next idx returned in CRET2.
|
|
+ |->vm_next:
|
|
+ |.if JIT
|
|
+ | lw NEXT_ASIZE, NEXT_TAB->asize
|
|
+ | ld NEXT_TMP0, NEXT_TAB->array
|
|
+ | li NEXT_NIL, LJ_TNIL
|
|
+ |1: // Traverse array part.
|
|
+ | bgeu NEXT_IDX, NEXT_ASIZE, >5
|
|
+ | slliw NEXT_TMP1, NEXT_IDX, 3
|
|
+ | add NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
|
|
+ | li TMP3, LJ_TISNUM
|
|
+ | ld NEXT_TMP2, 0(NEXT_TMP1)
|
|
+ | slli TMP3, TMP3, 47
|
|
+ | or NEXT_TMP1, NEXT_IDX, TMP3
|
|
+ | addiw NEXT_IDX, NEXT_IDX, 1
|
|
+ | beq NEXT_TMP2, NEXT_NIL, <1
|
|
+ | sd NEXT_TMP2, NEXT_RES_VAL
|
|
+ | sd NEXT_TMP1, NEXT_RES_KEY
|
|
+ | mv NEXT_RES_VK, NEXT_RES_PTR
|
|
+ | mv NEXT_RES_IDX, NEXT_IDX
|
|
+ | ret
|
|
+ |
|
|
+ |5: // Traverse hash part.
|
|
+ | subw NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
|
|
+ | lw NEXT_TMP0, NEXT_TAB->hmask
|
|
+ | ld NODE:NEXT_RES_VK, NEXT_TAB->node
|
|
+ | slliw NEXT_TMP2, NEXT_RES_IDX, 5
|
|
+ | slliw TMP3, NEXT_RES_IDX, 3
|
|
+ | subw TMP3, NEXT_TMP2, TMP3
|
|
+ | add NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3
|
|
+ |6:
|
|
+ | bltu NEXT_TMP0, NEXT_RES_IDX, >8
|
|
+ | ld NEXT_TMP2, NODE:NEXT_RES_VK->val
|
|
+ | addiw NEXT_RES_IDX, NEXT_RES_IDX, 1
|
|
+ | bne NEXT_TMP2, NEXT_NIL, >9
|
|
+ | // Skip holes in hash part.
|
|
+ | addi NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
|
|
+ | j <6
|
|
+ |
|
|
+ |8: // End of iteration. Set the key to nil (not the value).
|
|
+ | sd NEXT_NIL, NEXT_RES_KEY
|
|
+ | mv NEXT_RES_VK, NEXT_RES_PTR
|
|
+ |9:
|
|
+ | addw NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
|
|
+ | ret
|
|
+ |.endif
|
|
+ |
|
|
|//-----------------------------------------------------------------------
|
|
|//-- FFI helper functions -----------------------------------------------
|
|
|//-----------------------------------------------------------------------
|
|
@@ -3735,6 +4074,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
|
|
case BC_ITERN:
|
|
| // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
|->vm_IITERN:
|
|
| add RA, BASE, RA
|
|
| ld TAB:RB, -16(RA)
|
|
@@ -3819,8 +4161,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| li TMP1, BC_ITERC
|
|
| sb TMP3, -4+OFS_OP(PC)
|
|
| add PC, TMP0, TMP2
|
|
+ |.if JIT
|
|
+ | lb TMP0, OFS_OP(PC)
|
|
+ | li TMP3, BC_ITERN
|
|
+ | lhu TMP2, OFS_RD(PC)
|
|
+ | bne TMP0, TMP3, >6
|
|
+ |.endif
|
|
| sb TMP1, OFS_OP(PC)
|
|
| j <1
|
|
+ |.if JIT
|
|
+ |6: // Unpatch JLOOP.
|
|
+ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL.
|
|
+ | slliw TMP2, TMP2, 3
|
|
+ | add TMP0, TMP0, TMP2
|
|
+ | ld TRACE:TMP2, 0(TMP0)
|
|
+ | lw TMP0, TRACE:TMP2->startins
|
|
+ | andi TMP0, TMP0, -256
|
|
+ | or TMP0, TMP0, TMP1
|
|
+ | sw TMP0, 0(PC)
|
|
+ | j <1
|
|
+ |.endif
|
|
break;
|
|
|
|
case BC_VARG:
|
|
@@ -3986,6 +4346,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
/* -- Loops and branches ------------------------------------------------ */
|
|
|
|
case BC_FORL:
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
| // Fall through. Assumes BC_IFORL follows.
|
|
break;
|
|
|
|
@@ -4106,6 +4469,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
break;
|
|
|
|
case BC_ITERL:
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
| // Fall through. Assumes BC_IITERL follows.
|
|
break;
|
|
|
|
@@ -4130,6 +4496,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
break;
|
|
|
|
case BC_LOOP:
|
|
+ | // RA = base*8, RD = target (loop extent)
|
|
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
|
|
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
| // Fall through. Assumes BC_ILOOP follows.
|
|
break;
|
|
|
|
@@ -4139,6 +4511,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
break;
|
|
|
|
case BC_JLOOP:
|
|
+ |.if JIT
|
|
+ | // RA = base*8 (ignored), RD = traceno*8
|
|
+ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL.
|
|
+ | add TMP0, TMP0, RD
|
|
+ | // Traces on RISC-V don't store the trace number, so use 0.
|
|
+ | sd x0, GL->vmstate
|
|
+ | ld TRACE:TMP1, 0(TMP0)
|
|
+ | sd BASE, GL->jit_base // store Current JIT code L->base
|
|
+ | ld TMP1, TRACE:TMP1->mcode
|
|
+ | sd L, GL->tmpbuf.L
|
|
+ | jr TMP1
|
|
+ |.endif
|
|
break;
|
|
|
|
case BC_JMP:
|
|
@@ -4150,6 +4534,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
/* -- Function headers -------------------------------------------------- */
|
|
|
|
case BC_FUNCF:
|
|
+ |.if JIT
|
|
+ | hotcall
|
|
+ |.endif
|
|
case BC_FUNCV: /* NYI: compiled vararg functions. */
|
|
| // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
|
|
break;
|
|
|
|
From 6a2c877aa8460069806c311bcf4cae055dc7fce3 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:42:05 +0800
|
|
Subject: [PATCH 15/22] riscv(interp): add DWARF info
|
|
|
|
---
|
|
src/vm_riscv64.dasc | 132 +++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 131 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index ee45afef99..1fee1f7ed4 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -4680,5 +4680,135 @@ static int build_backend(BuildCtx *ctx)
|
|
/* Emit pseudo frame-info for all assembler functions. */
|
|
static void emit_asm_debug(BuildCtx *ctx)
|
|
{
|
|
-
|
|
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
|
|
+ int i;
|
|
+ switch (ctx->mode) {
|
|
+ case BUILD_elfasm:
|
|
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".Lframe0:\n"
|
|
+ "\t.4byte .LECIE0-.LSCIE0\n"
|
|
+ ".LSCIE0:\n"
|
|
+ "\t.4byte 0xffffffff\n"
|
|
+ "\t.byte 0x1\n"
|
|
+ "\t.string \"\"\n"
|
|
+ "\t.uleb128 0x1\n"
|
|
+ "\t.sleb128 -4\n"
|
|
+ "\t.byte 1\n" /* Return address is in ra. */
|
|
+ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */
|
|
+ "\t.align 3\n"
|
|
+ ".LECIE0:\n\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".LSFDE0:\n"
|
|
+ "\t.4byte .LEFDE0-.LASFDE0\n"
|
|
+ ".LASFDE0:\n"
|
|
+ "\t.4byte .Lframe0\n"
|
|
+ "\t.8byte .Lbegin\n"
|
|
+ "\t.8byte %d\n"
|
|
+ "\t.byte 0xe\n\t.uleb128 %d\n"
|
|
+ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */,
|
|
+ fcofs, CFRAME_SIZE);
|
|
+ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */
|
|
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7));
|
|
+ fprintf(ctx->fp,
|
|
+ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */
|
|
+ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */);
|
|
+ for (i = 27; i >= 18; i--) /* offset f31-f18 */
|
|
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19));
|
|
+ fprintf(ctx->fp,
|
|
+ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */
|
|
+ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */
|
|
+ "\t.align 3\n"
|
|
+ ".LEFDE0:\n\n");
|
|
+#if LJ_HASFFI
|
|
+ fprintf(ctx->fp,
|
|
+ ".LSFDE1:\n"
|
|
+ "\t.4byte .LEFDE1-.LASFDE1\n"
|
|
+ ".LASFDE1:\n"
|
|
+ "\t.4byte .Lframe0\n"
|
|
+ "\t.4byte lj_vm_ffi_call\n"
|
|
+ "\t.4byte %d\n"
|
|
+ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */
|
|
+ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */
|
|
+ "\t.byte 0xd\n\t.uleb128 0x12\n"
|
|
+ "\t.align 3\n"
|
|
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
|
|
+#endif
|
|
+#if !LJ_NO_UNWIND
|
|
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".Lframe1:\n"
|
|
+ "\t.4byte .LECIE1-.LSCIE1\n"
|
|
+ ".LSCIE1:\n"
|
|
+ "\t.4byte 0\n"
|
|
+ "\t.byte 0x1\n"
|
|
+ "\t.string \"zPR\"\n"
|
|
+ "\t.uleb128 0x1\n"
|
|
+ "\t.sleb128 -4\n"
|
|
+ "\t.byte 1\n" /* Return address is in ra. */
|
|
+ "\t.uleb128 6\n" /* augmentation length */
|
|
+ "\t.byte 0x1b\n"
|
|
+ "\t.4byte lj_err_unwind_dwarf-.\n"
|
|
+ "\t.byte 0x1b\n"
|
|
+ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */
|
|
+ "\t.align 2\n"
|
|
+ ".LECIE1:\n\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".LSFDE2:\n"
|
|
+ "\t.4byte .LEFDE2-.LASFDE2\n"
|
|
+ ".LASFDE2:\n"
|
|
+ "\t.4byte .LASFDE2-.Lframe1\n"
|
|
+ "\t.4byte .Lbegin-.\n"
|
|
+ "\t.4byte %d\n"
|
|
+ "\t.uleb128 0\n" /* augmentation length */
|
|
+ "\t.byte 0xe\n\t.uleb128 %d\n"
|
|
+ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */
|
|
+ fcofs, CFRAME_SIZE);
|
|
+ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */
|
|
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7));
|
|
+ fprintf(ctx->fp,
|
|
+ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */
|
|
+ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */);
|
|
+ for (i = 27; i >= 18; i--) /* offset f31-f18 */
|
|
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19));
|
|
+ fprintf(ctx->fp,
|
|
+ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */
|
|
+ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */
|
|
+ "\t.align 2\n"
|
|
+ ".LEFDE2:\n\n");
|
|
+#if LJ_HASFFI
|
|
+ fprintf(ctx->fp,
|
|
+ ".Lframe2:\n"
|
|
+ "\t.4byte .LECIE2-.LSCIE2\n"
|
|
+ ".LSCIE2:\n"
|
|
+ "\t.4byte 0\n"
|
|
+ "\t.byte 0x1\n"
|
|
+ "\t.string \"zR\"\n"
|
|
+ "\t.uleb128 0x1\n"
|
|
+ "\t.sleb128 -4\n"
|
|
+ "\t.byte 1\n" /* Return address is in ra. */
|
|
+ "\t.uleb128 1\n" /* augmentation length */
|
|
+ "\t.byte 0x1b\n"
|
|
+ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */
|
|
+ "\t.align 2\n"
|
|
+ ".LECIE2:\n\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".LSFDE3:\n"
|
|
+ "\t.4byte .LEFDE3-.LASFDE3\n"
|
|
+ ".LASFDE3:\n"
|
|
+ "\t.4byte .LASFDE3- .Lframe2\n"
|
|
+ "\t.4byte lj_vm_ffi_call-.\n"
|
|
+ "\t.4byte %d\n"
|
|
+ "\t.uleb128 0\n" /* augmentation length */
|
|
+ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */
|
|
+ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */
|
|
+ "\t.byte 0xd\n\t.uleb128 0x12\n"
|
|
+ "\t.align 2\n"
|
|
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
|
|
+#endif
|
|
+#endif
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
}
|
|
|
|
From dae3b8fc1e0d4929b6c4b253df8d43b8f6f0c6a1 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:42:34 +0800
|
|
Subject: [PATCH 16/22] riscv(jit): add GDBJIT support
|
|
|
|
---
|
|
src/lj_gdbjit.c | 15 +++++++++++++++
|
|
1 file changed, 15 insertions(+)
|
|
|
|
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
|
|
index 9e68932a8c..7d677c7c74 100644
|
|
--- a/src/lj_gdbjit.c
|
|
+++ b/src/lj_gdbjit.c
|
|
@@ -306,6 +306,9 @@ enum {
|
|
#elif LJ_TARGET_MIPS
|
|
DW_REG_SP = 29,
|
|
DW_REG_RA = 31,
|
|
+#elif LJ_TARGET_RISCV64
|
|
+ DW_REG_SP = 2,
|
|
+ DW_REG_RA = 1,
|
|
#else
|
|
#error "Unsupported target architecture"
|
|
#endif
|
|
@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = {
|
|
.machine = 20,
|
|
#elif LJ_TARGET_MIPS
|
|
.machine = 8,
|
|
+#elif LJ_TARGET_RISCV64
|
|
+ .machine = 243,
|
|
#else
|
|
#error "Unsupported target architecture"
|
|
#endif
|
|
@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
|
|
for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); }
|
|
for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); }
|
|
}
|
|
+#elif LJ_TARGET_RISCV64
|
|
+ {
|
|
+ int i;
|
|
+ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); }
|
|
+ DB(DW_CFA_offset|9); DUV(17);
|
|
+ DB(DW_CFA_offset|8); DUV(18);
|
|
+ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); }
|
|
+ DB(DW_CFA_offset|32|9); DUV(29);
|
|
+ DB(DW_CFA_offset|32|8); DUV(30);
|
|
+ }
|
|
#else
|
|
#error "Unsupported target architecture"
|
|
#endif
|
|
|
|
From e2a45eb4bc14787b374996a485527e9353d63e67 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:43:46 +0800
|
|
Subject: [PATCH 17/22] riscv(support,linux): add Linux specfic icache sync
|
|
codepath
|
|
|
|
---
|
|
src/lj_mcode.c | 17 +++++++++++++++++
|
|
1 file changed, 17 insertions(+)
|
|
|
|
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
|
|
index 864da7fb4c..3b88cfcfea 100644
|
|
--- a/src/lj_mcode.c
|
|
+++ b/src/lj_mcode.c
|
|
@@ -38,6 +38,12 @@
|
|
void sys_icache_invalidate(void *start, size_t len);
|
|
#endif
|
|
|
|
+#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX
|
|
+#include <unistd.h>
|
|
+#include <sys/syscall.h>
|
|
+#include <sys/cachectl.h>
|
|
+#endif
|
|
+
|
|
/* Synchronize data/instruction cache. */
|
|
void lj_mcode_sync(void *start, void *end)
|
|
{
|
|
@@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *end)
|
|
sys_icache_invalidate(start, (char *)end-(char *)start);
|
|
#elif LJ_TARGET_PPC
|
|
lj_vm_cachesync(start, end);
|
|
+#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX
|
|
+#if (defined(__GNUC__) || defined(__clang__))
|
|
+ __asm__ volatile("fence rw, rw");
|
|
+#else
|
|
+ lj_vm_fence_rw_rw();
|
|
+#endif
|
|
+#ifdef __GLIBC__
|
|
+ __riscv_flush_icache(start, end, 0);
|
|
+#else
|
|
+ syscall(__NR_riscv_flush_icache, start, end, 0UL);
|
|
+#endif
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
__clear_cache(start, end);
|
|
#else
|
|
|
|
From 751c8b6396a2f0e7557052edb7a5788ff78004e2 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:47:58 +0800
|
|
Subject: [PATCH 18/22] riscv(support,linux): make mremap() non-moving due to
|
|
VA space woes
|
|
|
|
---
|
|
src/lj_alloc.c | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
|
|
index cb704f7b3f..9039d80537 100644
|
|
--- a/src/lj_alloc.c
|
|
+++ b/src/lj_alloc.c
|
|
@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
|
|
#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
|
|
#define CALL_MREMAP_NOMOVE 0
|
|
#define CALL_MREMAP_MAYMOVE 1
|
|
-#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
|
|
+#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64)
|
|
#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
|
|
#else
|
|
#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
|
|
|
|
From 55f24bab73c59dd99644e27da90ca5e591888b17 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:48:43 +0800
|
|
Subject: [PATCH 19/22] riscv(misc): add disassmbler support
|
|
|
|
---
|
|
src/jit/dis_riscv.lua | 979 ++++++++++++++++++++++++++++++++++++++++
|
|
src/jit/dis_riscv64.lua | 16 +
|
|
2 files changed, 995 insertions(+)
|
|
create mode 100644 src/jit/dis_riscv.lua
|
|
create mode 100644 src/jit/dis_riscv64.lua
|
|
|
|
diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua
|
|
new file mode 100644
|
|
index 0000000000..8de563a724
|
|
--- /dev/null
|
|
+++ b/src/jit/dis_riscv.lua
|
|
@@ -0,0 +1,979 @@
|
|
+------------------------------------------------------------------------------
|
|
+-- LuaJIT RISC-V disassembler module.
|
|
+--
|
|
+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
|
|
+-- Released under the MIT license. See Copyright Notice in luajit.h
|
|
+--
|
|
+-- Contributed by Milos Poletanovic from Syrmia.com.
|
|
+-- Contributed by gns from PLCT Lab, ISCAS.
|
|
+------------------------------------------------------------------------------
|
|
+-- This is a helper module used by the LuaJIT machine code dumper module.
|
|
+--
|
|
+-- It disassembles most standard RISC-V instructions.
|
|
+-- Mode is little-endian
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+local type = type
|
|
+local byte, format = string.byte, string.format
|
|
+local match, gmatch = string.match, string.gmatch
|
|
+local concat = table.concat
|
|
+local bit = require("bit")
|
|
+local band, bor, tohex = bit.band, bit.bor, bit.tohex
|
|
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
|
|
+local jit = require("jit")
|
|
+
|
|
+local jstat = { jit.status() }
|
|
+local function is_opt_enabled(opt)
|
|
+ for _, v in ipairs(jstat) do
|
|
+ if v == opt then
|
|
+ return true
|
|
+ end
|
|
+ end
|
|
+ return false
|
|
+end
|
|
+local xthead = is_opt_enabled("XThead")
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+-- Opcode maps
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+--RVC32 extension
|
|
+
|
|
+local map_quad0 = {
|
|
+ shift = 13, mask = 7,
|
|
+ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn",
|
|
+ false, "c.fsdNMh", "c.swZMn", "c.fswNMn"
|
|
+}
|
|
+
|
|
+local map_sub2quad1 = {
|
|
+ shift = 5, mask = 3,
|
|
+ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ"
|
|
+}
|
|
+
|
|
+local map_sub1quad1 = {
|
|
+ shift = 10, mask = 3,
|
|
+ [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1
|
|
+}
|
|
+
|
|
+local map_quad1 = {
|
|
+ shift = 13, mask = 7,
|
|
+ [0] = {
|
|
+ shift = 7, mask = 31,
|
|
+ [0] = "c.nop", _ = "c.addiDx"
|
|
+ },
|
|
+ [1] = "c.jalT", [2] = "c.liDx",
|
|
+ [3] = {
|
|
+ shift = 7, mask = 31,
|
|
+ [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX",
|
|
+ _ = "c.luiDK"
|
|
+ },
|
|
+ [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq"
|
|
+}
|
|
+
|
|
+local map_sub1quad2 = {
|
|
+ shift = 12, mask = 1,
|
|
+ [0] = {
|
|
+ shift = 2, mask = 31,
|
|
+ [0] = "c.jrD", _ = "c.mvDE"
|
|
+ },
|
|
+ [1] = {
|
|
+ shift = 2, mask = 31,
|
|
+ [0] = {
|
|
+ shift = 7, mask = 31,
|
|
+ [0] = "c.ebreak", _ = "c.jalrD"
|
|
+ },
|
|
+ _ = "c.addDE"
|
|
+ }
|
|
+}
|
|
+
|
|
+local map_quad2 = {
|
|
+ shift = 13, mask = 7,
|
|
+ [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY",
|
|
+ [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu"
|
|
+}
|
|
+
|
|
+local map_compr = {
|
|
+ [0] = map_quad0, map_quad1, map_quad2
|
|
+}
|
|
+
|
|
+--RV32M
|
|
+local map_mext = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr",
|
|
+ "divDRr", "divuDRr", "remDRr", "remuDRr"
|
|
+}
|
|
+
|
|
+--RV64M
|
|
+local map_mext64 = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr",
|
|
+ [7] = "remuwDRr"
|
|
+}
|
|
+
|
|
+--RV32F, RV64F, RV32D, RV64D
|
|
+local map_fload = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "flwFL", [3] = "fldFL"
|
|
+}
|
|
+
|
|
+local map_fstore = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "fswSg", [3] = "fsdSg"
|
|
+}
|
|
+
|
|
+local map_fmadd = {
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "fmadd.sFGgHo", "fmadd.dFGgHo"
|
|
+}
|
|
+
|
|
+local map_fmsub = {
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "fmsub.sFGgHo", "fmsub.dFGgHo"
|
|
+}
|
|
+
|
|
+local map_fnmsub = {
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo"
|
|
+}
|
|
+
|
|
+local map_fnmadd = {
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo"
|
|
+}
|
|
+
|
|
+local map_fsgnjs = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6"
|
|
+}
|
|
+
|
|
+local map_fsgnjd = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6"
|
|
+}
|
|
+
|
|
+local map_fms = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg"
|
|
+}
|
|
+
|
|
+local map_fmd = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg"
|
|
+}
|
|
+
|
|
+local map_fcomps = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg",
|
|
+ [4] = "fleq.sDGg", "fltq.sDGg"
|
|
+}
|
|
+
|
|
+local map_fcompd = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg",
|
|
+ [4] = "fleq.dDGg", "fltq.dDGg"
|
|
+}
|
|
+
|
|
+local map_fcvtwls = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo"
|
|
+}
|
|
+
|
|
+local map_fcvtwld = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo",
|
|
+ [8] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [1] = "fcvtmodw.dDG"
|
|
+ }
|
|
+}
|
|
+
|
|
+local map_fcvts = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo"
|
|
+}
|
|
+
|
|
+local map_fcvtd = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo"
|
|
+}
|
|
+
|
|
+local map_fcvtsd = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fcvt.s.dFGo",
|
|
+ [4] = "fround.sFGo", [5] = "froundnx.sFGo"
|
|
+}
|
|
+
|
|
+local map_fcvtds = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fcvt.d.sFGo",
|
|
+ [4] = "fround.dFGo", [5] = "froundnx.dFGo"
|
|
+}
|
|
+
|
|
+local map_fmvwx = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fmv.w.xFR", [1] = "fli.sFy"
|
|
+}
|
|
+
|
|
+local map_fmvdx = {
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = "fmv.d.xFR", [1] = "fli.dFy"
|
|
+}
|
|
+
|
|
+local map_fext = {
|
|
+ shift = 25, mask = 127,
|
|
+ [0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo",
|
|
+ [8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo",
|
|
+ [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd,
|
|
+ [32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo",
|
|
+ [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld,
|
|
+ [104] = map_fcvts, [105] = map_fcvtd,
|
|
+ [112] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fmv.x.wDG", "fclass.sDG"
|
|
+ },
|
|
+ [113] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "fmv.x.dDG", "fclass.dDG"
|
|
+ },
|
|
+ [120] = map_fmvwx, [121] = map_fmvdx
|
|
+}
|
|
+
|
|
+--RV32A, RV64A
|
|
+local map_aext = {
|
|
+ shift = 27, mask = 31,
|
|
+ [0] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amoadd.wDrO", [3] = "amoadd.dDrO"
|
|
+ },
|
|
+ {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amoswap.wDrO", [3] = "amoswap.dDrO"
|
|
+ },
|
|
+ {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "lr.wDO", [3] = "lr.dDO"
|
|
+ },
|
|
+ {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "sc.wDrO", [3] = "sc.dDrO"
|
|
+ },
|
|
+ {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amoxor.wDrO", [3] = "amoxor.dDrO"
|
|
+ },
|
|
+ [8] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amoor.wDrO", [3] = "amoor.dDrO"
|
|
+ },
|
|
+ [12] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amoand.wDrO", [3] = "amoand.dDrO"
|
|
+ },
|
|
+ [16] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amomin.wDrO", [3] = "amomin.dDrO"
|
|
+ },
|
|
+ [20] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amomax.wDrO", [3] = "amomax.dDrO"
|
|
+ },
|
|
+ [24] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amominu.wDrO", [3] = "amominu.dDrO"
|
|
+ },
|
|
+ [28] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO"
|
|
+ },
|
|
+}
|
|
+
|
|
+-- RV32I, RV64I
|
|
+local map_load = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "lbDL", "lhDL", "lwDL", "ldDL",
|
|
+ "lbuDL", "lhuDL", "lwuDL"
|
|
+}
|
|
+
|
|
+local map_opimm = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = {
|
|
+ shift = 7, mask = 0x1ffffff,
|
|
+ [0] = "nop", _ = "addi|li|mvDR0I2"
|
|
+ },
|
|
+ {
|
|
+ shift = 25, mask = 127,
|
|
+ [48] = {
|
|
+ shift = 20, mask = 31,
|
|
+ [4] = "sext.bDR", [5] = "sext.hDR"
|
|
+ },
|
|
+ _ = "slliDRi",
|
|
+ }, "sltiDRI", "sltiu|seqzDRI5",
|
|
+ "xori|notDRI4",
|
|
+ {
|
|
+ shift = 26, mask = 63,
|
|
+ [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi",
|
|
+ [26] = {
|
|
+ shift = 20, mask = 63,
|
|
+ [56] = "rev8DR"
|
|
+ }
|
|
+ },
|
|
+ "oriDRI", "andiDRI"
|
|
+}
|
|
+
|
|
+local map_branch = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false,
|
|
+ "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB"
|
|
+}
|
|
+
|
|
+local map_store = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "sbSr", "shSr", "swSr", "sdSr"
|
|
+}
|
|
+
|
|
+local map_op = {
|
|
+ shift = 25, mask = 127,
|
|
+ [0] = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r",
|
|
+ "xorDRr", "srlDRr", "orDRr", "andDRr"
|
|
+ },
|
|
+ [1] = map_mext,
|
|
+ [4] = {
|
|
+
|
|
+ },
|
|
+ [5] = { -- Zbb
|
|
+ shift = 12, mask = 7,
|
|
+ [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr"
|
|
+ },
|
|
+ [7] = { -- Zicond
|
|
+ shift = 12, mask = 7,
|
|
+ [5] = "czero.eqzDRr", [7] = "czero.nezDRr"
|
|
+ },
|
|
+ [16] = { -- Zba
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr"
|
|
+ },
|
|
+ [32] = { -- Zbb
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr"
|
|
+ },
|
|
+ [48] = { -- Zbb
|
|
+ shift = 12, mask = 7,
|
|
+ [1] = "rolDRr", [5] = "rorDRr"
|
|
+ }
|
|
+}
|
|
+
|
|
+--- 64I
|
|
+local map_opimm32 = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "addiw|sext.wDRI0", "slliwDRi",
|
|
+ [2] = { -- Zba
|
|
+ shift = 25, mask = 127,
|
|
+ [1] = "slli.uwDRi"
|
|
+ },
|
|
+ [5] = { -- 64I
|
|
+ shift = 25, mask = 127,
|
|
+ [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi"
|
|
+ },
|
|
+ [48] = { -- Zbb
|
|
+ shift = 25, mask = 127,
|
|
+ [5] = "roriwDRi"
|
|
+ }
|
|
+}
|
|
+
|
|
+local map_op32 = {
|
|
+ shift = 25, mask = 127,
|
|
+ [0] = { -- 64I
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr"
|
|
+ },
|
|
+ [1] = map_mext64,
|
|
+ [4] = { -- Zba & Zbb
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr"
|
|
+ },
|
|
+ [16] = { -- Zba
|
|
+ shift = 12, mask = 7,
|
|
+ [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw"
|
|
+ },
|
|
+ [32] = { -- 64I
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = "subw|negwDR0r", [5] = "srawDRr"
|
|
+ },
|
|
+ [48] = { -- Zbb
|
|
+ shift = 12, mask = 7,
|
|
+ [1] = "rolwDRr", [5] = "rorwDRr"
|
|
+ }
|
|
+}
|
|
+
|
|
+local map_ecabre = {
|
|
+ shift = 12, mask = 7,
|
|
+ [0] = {
|
|
+ shift = 20, mask = 4095,
|
|
+ [0] = "ecall", "ebreak"
|
|
+ }
|
|
+}
|
|
+
|
|
+local map_fence = {
|
|
+ shift = 12, mask = 1,
|
|
+ [0] = "fence", --"fence.i" ZIFENCEI EXTENSION
|
|
+}
|
|
+
|
|
+local map_jalr = {
|
|
+ shift = 7, mask = 0x1ffffff,
|
|
+ _ = "jalr|jrDRI7", [256] = "ret"
|
|
+}
|
|
+
|
|
+local map_xthead_custom0 = {
|
|
+ shift = 12, mask = 7,
|
|
+ [1] = { -- Arithmetic
|
|
+ shift = 27, mask = 31,
|
|
+ [0] = "th.addslDRrv",
|
|
+ [2] = {
|
|
+ shift = 26, mask = 63,
|
|
+ [4] = "th.srriDRi",
|
|
+ [5] = {
|
|
+ shift = 25, mask = 127,
|
|
+ [10] = "th.srriwDRi"
|
|
+ }
|
|
+ },
|
|
+ [4] = { -- XTheadMac
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr"
|
|
+ },
|
|
+ [5] = { -- XTheadMac
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.mulahDRr", "th.mulshDRr"
|
|
+ },
|
|
+ [8] = { -- XTheadCondMov
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.mveqzDRr", "th.mvnezDRr"
|
|
+ },
|
|
+ [16] = { -- XTheadBb
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = {
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR"
|
|
+ }
|
|
+ },
|
|
+ [17] = { -- XTheadBb
|
|
+ shift = 26, mask = 1,
|
|
+ [0] = "th.tstDRi"
|
|
+ },
|
|
+ [18] = { -- XTheadBb
|
|
+ shift = 20, mask = 31,
|
|
+ [0] = {
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.revwDR"
|
|
+ }
|
|
+ }
|
|
+ },
|
|
+ [2] = "th.extDRji", [3] = "th.extuDRji",
|
|
+ { -- MemLoad
|
|
+ shift = 29, mask = 7,
|
|
+ [7] = { -- XTheadMemPair
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP"
|
|
+ }
|
|
+ },
|
|
+ { -- MemStore
|
|
+ shift = 29, mask = 7,
|
|
+ [7] = { -- XTheadMemPair
|
|
+ shift = 25, mask = 3,
|
|
+ [0] = "th.swdDrP", [3] = "th.sddDrP"
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+local map_custom0 = xthead and map_xthead_custom0 or nil
|
|
+
|
|
+local map_pri = {
|
|
+ [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm,
|
|
+ [23] = "auipcDA", [27] = map_opimm32,
|
|
+ [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op,
|
|
+ [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub,
|
|
+ [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext,
|
|
+ [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre
|
|
+}
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+local map_gpr = {
|
|
+ [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7",
|
|
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
|
|
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
|
|
+ "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31",
|
|
+}
|
|
+
|
|
+local map_fgpr = {
|
|
+ [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
|
|
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
|
|
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
|
|
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
|
|
+}
|
|
+
|
|
+local map_rm = {
|
|
+ [0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn"
|
|
+}
|
|
+
|
|
+local map_fli = {
|
|
+ [0] = "-1.0",
|
|
+ "min",
|
|
+ "0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7",
|
|
+ "0.0625", "0.125",
|
|
+ "0.25", "0.3125", "0.375", "0.4375",
|
|
+ "0.5", "0.625", "0.75", "0.875",
|
|
+ "1.0", "1.25", "1.5", "1.75",
|
|
+ "2.0", "2.5", "3.0",
|
|
+ "4.0", "8.0", "16.0", "128.0", "256.0",
|
|
+ "32768.0", "65536.0", "inf", "nan"
|
|
+}
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Output a nicely formatted line with an opcode and operands.
|
|
+local function putop(ctx, text, operands)
|
|
+ local pos = ctx.pos
|
|
+ local extra = ""
|
|
+ if ctx.rel then
|
|
+ local sym = ctx.symtab[ctx.rel]
|
|
+ if sym then extra = "\t->"..sym end
|
|
+ end
|
|
+ if ctx.hexdump > 0 then
|
|
+ ctx.out:write((format("%08x %s %-7s %s%s\n",
|
|
+ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra)))
|
|
+ else
|
|
+ ctx.out(format("%08x %-7s %s%s\n",
|
|
+ ctx.addr+pos, text, concat(operands, ", "), extra))
|
|
+ end
|
|
+ local pos = ctx.pos
|
|
+ local first_byte = byte(ctx.code, ctx.pos+1)
|
|
+ --Examine if the next instruction is 16-bits or 32-bits
|
|
+ if(band(first_byte, 3) < 3) then
|
|
+ ctx.pos = pos + 2
|
|
+ else
|
|
+ ctx.pos = pos + 4
|
|
+ end
|
|
+end
|
|
+
|
|
+-- Fallback for unknown opcodes.
|
|
+local function unknown(ctx)
|
|
+ return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
|
|
+end
|
|
+
|
|
+local function get_le(ctx)
|
|
+ local pos = ctx.pos
|
|
+ --Examine if the next instruction is 16-bits or 32-bits
|
|
+ local first_byte = byte(ctx.code, pos+1)
|
|
+ if(band(first_byte, 3) < 3) then --checking first two bits of opcode
|
|
+ local b0, b1 = byte(ctx.code, pos+1, pos+2)
|
|
+ return bor(lshift(b1, 8), b0)
|
|
+ else
|
|
+ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
|
|
+ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_W(opcode)
|
|
+ local part1 = band(rshift(opcode, 7), 15) --9:6
|
|
+ local part2 = band(rshift(opcode, 11), 3) --5:4
|
|
+ local part3 = band(rshift(opcode, 5), 1)--3
|
|
+ local part4 = band(rshift(opcode, 6), 1)--2
|
|
+ return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4),
|
|
+ lshift(part3, 3), lshift(part4, 2))
|
|
+end
|
|
+
|
|
+local function parse_x(opcode)
|
|
+ local part1 = band(rshift(opcode, 12), 1) --5
|
|
+ local part2 = band(rshift(opcode, 2), 31) --4:0
|
|
+ if(part1 == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2)
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part1, 5), part2)
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_X(opcode)
|
|
+ local part1 = band(rshift(opcode, 12), 1) --12
|
|
+ local part2 = band(rshift(opcode, 3), 3) --8:7
|
|
+ local part3 = band(rshift(opcode, 5), 1) --6
|
|
+ local part4 = band(rshift(opcode, 2), 1) --5
|
|
+ local part5 = band(rshift(opcode, 6), 1) --4
|
|
+ if(part1 == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7),
|
|
+ lshift(part3, 6), lshift(part4, 5), lshift(part5, 4))
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6),
|
|
+ lshift(part4, 5), lshift(part5, 4))
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_S(opcode)
|
|
+ local part1 = band(rshift(opcode, 25), 127) --11:5
|
|
+ local sign = band(rshift(part1, 6), 1)
|
|
+ local part2 = band(rshift(opcode, 7), 31) --4:0
|
|
+ if (sign == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2)
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part1, 5), part2)
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_B(opcode)
|
|
+ local part1 = band(rshift(opcode, 7), 1) --11
|
|
+ local part2 = band(rshift(opcode, 25), 63) --10:5
|
|
+ local part3 = band(rshift(opcode, 8), 15) -- 4 : 1
|
|
+ if (part1 == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
|
|
+ lshift(part2, 5), lshift(part3, 1), 0)
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5),
|
|
+ lshift(part3, 1), 0)
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_q(opcode)
|
|
+ local part1 = band(rshift(opcode, 12), 1) --8
|
|
+ local part2 = band(rshift(opcode, 5), 3) --7:6
|
|
+ local part3 = band(rshift(opcode, 2), 1) --5
|
|
+ local part4 = band(rshift(opcode, 10), 3) --4:3
|
|
+ local part5 = band(rshift(opcode, 3), 3) --2:1
|
|
+ if(part1 == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6),
|
|
+ lshift(part3, 5), lshift(part4, 3), lshift(part5, 1))
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5),
|
|
+ lshift(part4, 3), lshift(part5, 1))
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_J(opcode)
|
|
+ local part1 = band(rshift(opcode, 31), 1) --20
|
|
+ local part2 = band(rshift(opcode, 12), 255) -- 19:12
|
|
+ local part3 = band(rshift(opcode, 20), 1) --11
|
|
+ local part4 = band(rshift(opcode, 21), 1023) --10:1
|
|
+ if(part1 == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12),
|
|
+ lshift(part3, 11), lshift(part4, 1))
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12),
|
|
+ lshift(part3, 11), lshift(part4, 1))
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_T(opcode)
|
|
+ local part1 = band(rshift(opcode, 12), 1) --11
|
|
+ local part2 = band(rshift(opcode, 8), 1) --10
|
|
+ local part3 = band(rshift(opcode, 9), 3)--9:8
|
|
+ local part4 = band(rshift(opcode, 6), 1) --7
|
|
+ local part5 = band(rshift(opcode, 7), 1) -- 6
|
|
+ local part6 = band(rshift(opcode, 2), 1) --5
|
|
+ local part7 = band(rshift(opcode, 11), 1) --4
|
|
+ local part8 = band(rshift(opcode, 3), 7) --3:1
|
|
+ if(part1 == 1) then
|
|
+ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
|
|
+ lshift(part2, 10), lshift(part3, 8), lshift(part4, 7),
|
|
+ lshift(part5, 6), lshift(part6, 5), lshift(part7, 4),
|
|
+ lshift(part8, 1))
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10),
|
|
+ lshift(part3, 8), lshift(part4, 7), lshift(part5, 6),
|
|
+ lshift(part6, 5), lshift(part7, 4), lshift(part8, 1))
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_K(opcode)
|
|
+ local part1 = band(rshift(opcode, 12), 1) --5 17
|
|
+ local part2 = band(rshift(opcode, 2), 31) --4:0 16:12
|
|
+ if(part1 == 1) then
|
|
+ return bor(lshift(0, 31), lshift(0x7fff, 5), part2)
|
|
+ else
|
|
+ return bor(lshift(0, 31), lshift(part1, 5), part2)
|
|
+ end
|
|
+end
|
|
+
|
|
+-- Disassemble a single instruction.
|
|
+local function disass_ins(ctx)
|
|
+ local op = ctx:get()
|
|
+ local operands = {}
|
|
+ local last = nil
|
|
+ ctx.op = op
|
|
+ ctx.rel =nil
|
|
+
|
|
+ local opat = 0
|
|
+ --for compressed instructions
|
|
+ if(band(op, 3) < 3) then
|
|
+ opat = ctx.map_compr[band(op, 3)]
|
|
+ while type(opat) ~= "string" do
|
|
+ if not opat then return unknown(ctx) end
|
|
+ local test = band(rshift(op, opat.shift), opat.mask)
|
|
+ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
|
|
+ end
|
|
+ else
|
|
+ opat = ctx.map_pri[band(op,127)]
|
|
+ while type(opat) ~= "string" do
|
|
+ if not opat then return unknown(ctx) end
|
|
+ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
|
|
+ end
|
|
+ end
|
|
+ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
|
|
+ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
|
|
+ local a1, a2 = 0
|
|
+ if altname then
|
|
+ pat = pat2
|
|
+ end
|
|
+
|
|
+ local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0
|
|
+
|
|
+ for p in gmatch(pat, ".") do
|
|
+ local x = nil
|
|
+ if p == "D" then
|
|
+ x = map_gpr[band(rshift(op, 7), 31)]
|
|
+ elseif p == "F" then
|
|
+ x = map_fgpr[band(rshift(op, 7), 31)]
|
|
+ elseif p == "R" then
|
|
+ x = map_gpr[band(rshift(op, 15), 31)]
|
|
+ elseif p == "G" then
|
|
+ x = map_fgpr[band(rshift(op, 15), 31)]
|
|
+ elseif p == "r" then
|
|
+ x = map_gpr[band(rshift(op, 20), 31)]
|
|
+ if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then
|
|
+ local temp = last --because of the diffrent order of the characters
|
|
+ operands[#operands] = x
|
|
+ x = temp
|
|
+ end
|
|
+ elseif p == "g" then
|
|
+ x = map_fgpr[band(rshift(op, 20), 31)]
|
|
+ if(name == "fsw" or name == "fsd") then
|
|
+ local temp = last
|
|
+ operands[#operands] = x
|
|
+ x = temp
|
|
+ end
|
|
+ elseif p == "Z" then
|
|
+ x = map_gpr[8 + band(rshift(op, 2), 7)]
|
|
+ elseif p == "N" then
|
|
+ x = map_fgpr[8 + band(rshift(op, 2), 7)]
|
|
+ elseif p == "M" then
|
|
+ x = map_gpr[8 + band(rshift(op, 7), 7)]
|
|
+ elseif p == "E" then
|
|
+ x = map_gpr[band(rshift(op, 2), 31)]
|
|
+ elseif p == "W" then
|
|
+ local uimm = parse_W(op)
|
|
+ x = format("%s,%d", "sp", uimm)
|
|
+ elseif p == "x" then
|
|
+ x = parse_x(op)
|
|
+ elseif p == "h" then
|
|
+ local part1 = band(rshift(op, 5), 3) --7:6
|
|
+ local part2 = band(rshift(op, 10), 7) --5:3
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3))
|
|
+ operands[#operands] = format("%d(%s)", uimm, last)
|
|
+ elseif p == "X" then
|
|
+ local imm = parse_X(op)
|
|
+ x = format("%s,%d", "sp", imm)
|
|
+ elseif p == "O" then
|
|
+ x = format("(%s)", map_gpr[band(rshift(op, 15), 31)])
|
|
+ elseif p == "H" then
|
|
+ x = map_fgpr[band(rshift(op, 27), 31)]
|
|
+ elseif p == "L" then
|
|
+ local register = map_gpr[band(rshift(op, 15), 31)]
|
|
+ local disp = arshift(op, 20)
|
|
+ x = format("%d(%s)", disp, register)
|
|
+ elseif p == "P" then -- XTheadMemPair
|
|
+ local register = map_gpr[band(rshift(op, 15), 31)]
|
|
+ local disp = band(arshift(op, 25), 3)
|
|
+ local isword = bxor(band(arshift(op, 26), 1), 1)
|
|
+ x = format("(%s), %d, %d", register, disp, isword and 3 or 4)
|
|
+ elseif p == "I" then
|
|
+ x = arshift(op, 20)
|
|
+ --different for jalr
|
|
+ if(name == "jalr") then
|
|
+ local reg = map_gpr[band(rshift(op, 15), 31)]
|
|
+ if(ctx.reltab[reg] == nil) then
|
|
+ operands[#operands] = format("%d(%s)", x, last)
|
|
+ else
|
|
+ local target = ctx.reltab[reg] + x
|
|
+ operands[#operands] = format("%d(%s) #0x%08x", x, last, target)
|
|
+ ctx.rel = target
|
|
+ ctx.reltab[reg] = nil --assume no reuses of the register
|
|
+ end
|
|
+ x = nil --not to add additional operand
|
|
+ end
|
|
+ elseif p == "i" then
|
|
+ --both for RV32I AND RV64I
|
|
+ local value = band(arshift(op, 20), 63)
|
|
+ x = string.format("%d", value)
|
|
+ elseif p == "j" then -- XThead imm1[31..26]
|
|
+ local value = band(rshift(op, 26), 63)
|
|
+ x = string.format("%d", value)
|
|
+ elseif p == "v" then --XThead imm[2][26..25]
|
|
+ local value = band(rshift(op, 25), 3)
|
|
+ x = string.format("%d", value)
|
|
+ elseif p == "S" then
|
|
+ local register = map_gpr[band(rshift(op, 15), 31)] --register
|
|
+ local imm = parse_S(op)
|
|
+ x = format("%d(%s)", imm, register)
|
|
+ elseif p == "n" then
|
|
+ local part1 = band(rshift(op, 5), 1) --6
|
|
+ local part2 = band(rshift(op, 10), 7) --5:3
|
|
+ local part3 = band(rshift(op, 6), 1) --2
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3),
|
|
+ lshift(part3, 2))
|
|
+ operands[#operands] = format("%d(%s)", uimm, last)
|
|
+ elseif p == "A" then
|
|
+ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)]
|
|
+ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12)
|
|
+ x = format("0x%x", value)
|
|
+ elseif p == "B" then
|
|
+ x = ctx.addr + ctx.pos + parse_B(op)
|
|
+ ctx.rel = x
|
|
+ x = format("0x%08x", x)
|
|
+ elseif p == "U" then
|
|
+ local value = band(rshift(op, 12), 0xfffff)
|
|
+ x = string.format("0x%x", value)
|
|
+ elseif p == "Q" then
|
|
+ local part1 = band(rshift(op, 2), 7) --8:6
|
|
+ local part2 = band(rshift(op, 12), 1) --5
|
|
+ local part3 = band(rshift(op, 5), 3) --4:3
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
|
|
+ lshift(part3, 3))
|
|
+ x = format("%d(%s)", uimm, "sp")
|
|
+ elseif p == "q" then
|
|
+ x = ctx.addr + ctx.pos + parse_q(op)
|
|
+ ctx.rel = x
|
|
+ x = format("0x%08x", x)
|
|
+ elseif p == "J" then
|
|
+ x = ctx.addr + ctx.pos + parse_J(op)
|
|
+ ctx.rel = x
|
|
+ x = format("0x%08x", x)
|
|
+ elseif p == "K" then
|
|
+ local value = parse_K(op)
|
|
+ x = string.format("0x%x", value)
|
|
+ elseif p == "Y" then
|
|
+ local part1 = band(rshift(op, 2), 3) --7:6
|
|
+ local part2 = band(rshift(op, 12), 1) --5
|
|
+ local part3 = band(rshift(op, 4), 7) --4:2
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
|
|
+ lshift(part3, 2))
|
|
+ x = format("%d(%s)", uimm, "sp")
|
|
+ elseif p == "o" then -- rounding mode
|
|
+ x = map_rm[band(rshift(op, 12), 7)]
|
|
+ elseif p == "y" then -- fli lut
|
|
+ x = map_fli[band(rshift(op, 15), 31)]
|
|
+ elseif p == "1" then
|
|
+ local part1 = band(rshift(op, 12), 1) --5
|
|
+ local part2 = band(rshift(op, 2), 31) --4:0
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 5), part2)
|
|
+ x = string.format("0x%x", uimm)
|
|
+ elseif p == "T" then
|
|
+ x = ctx.addr + ctx.pos + parse_T(op)
|
|
+ ctx.rel = x
|
|
+ x = format("0x%08x", x)
|
|
+ elseif p == "t" then
|
|
+ local part1 = band(rshift(op, 7), 7) --8:6
|
|
+ local part2 = band(rshift(op, 10), 7) --5:3
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3))
|
|
+ x = format("%d(%s)", uimm, "sp")
|
|
+ elseif p == "u" then
|
|
+ local part1 = band(rshift(op, 7), 3) --7:6
|
|
+ local part2 = band(rshift(op, 9), 15) --5:2
|
|
+ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2))
|
|
+ x = format("%d(%s)", uimm, "sp")
|
|
+ elseif p == "V" then
|
|
+ x = map_fgpr[band(rshift(op, 2), 31)]
|
|
+ elseif p == "0" then --PSEUDOINSTRUCTIONS
|
|
+ if (last == "zero" or last == 0) then
|
|
+ local n = #operands
|
|
+ operands[n] = nil
|
|
+ last = operands[n-1]
|
|
+ local a1, a2 = match(altname, "([^|]*)|(.*)")
|
|
+ if a1 then name, altname = a1, a2
|
|
+ else name = altname end
|
|
+ alias_done = true
|
|
+ end
|
|
+ elseif (p == "4") then
|
|
+ if(last == -1) then
|
|
+ name = altname
|
|
+ operands[#operands] = nil
|
|
+ end
|
|
+ elseif (p == "5") then
|
|
+ if(last == 1) then
|
|
+ name = altname
|
|
+ operands[#operands] = nil
|
|
+ end
|
|
+ elseif (p == "6") then
|
|
+ if(last == operands[#operands - 1]) then
|
|
+ name = altname
|
|
+ operands[#operands] = nil
|
|
+ end
|
|
+ elseif (p == "7") then --jalr rs
|
|
+ local value = string.sub(operands[#operands], 1, 1)
|
|
+ local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1)
|
|
+ if(value == "0" and
|
|
+ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then
|
|
+ if(operands[#operands - 1] == "zero") then
|
|
+ name = altname
|
|
+ end
|
|
+ operands[#operands] = nil
|
|
+ operands[#operands] = reg
|
|
+ end
|
|
+ elseif (p == "2" and alias_done == false) then
|
|
+ if (last == "zero" or last == 0) then
|
|
+ local a1, a2 = match(altname, "([^|]*)|(.*)")
|
|
+ name = a2
|
|
+ operands[#operands] = nil
|
|
+ end
|
|
+ end
|
|
+ if x then operands[#operands+1] = x; last = x end
|
|
+ end
|
|
+ return putop(ctx, name, operands)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Disassemble a block of code.
|
|
+local function disass_block(ctx, ofs, len)
|
|
+ if not ofs then
|
|
+ ofs = 0
|
|
+ end
|
|
+ local stop = len and ofs+len or #ctx.code
|
|
+ --instructions can be both 32 and 16 bits
|
|
+ stop = stop - stop % 2
|
|
+ ctx.pos = ofs - ofs % 2
|
|
+ ctx.rel = nil
|
|
+ while ctx.pos < stop do disass_ins(ctx) end
|
|
+end
|
|
+
|
|
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
|
|
+local function create(code, addr, out)
|
|
+ local ctx = {}
|
|
+ ctx.code = code
|
|
+ ctx.addr = addr or 0
|
|
+ ctx.out = out or io.write
|
|
+ ctx.symtab = {}
|
|
+ ctx.disass = disass_block
|
|
+ ctx.hexdump = 8
|
|
+ ctx.get = get_le
|
|
+ ctx.map_pri = map_pri
|
|
+ ctx.map_compr = map_compr
|
|
+ ctx.reltab = {}
|
|
+ return ctx
|
|
+end
|
|
+
|
|
+-- Simple API: disassemble code (a string) at address and output via out.
|
|
+local function disass(code, addr, out)
|
|
+ create(code, addr, out):disass(addr)
|
|
+end
|
|
+
|
|
+-- Return register name for RID.
|
|
+local function regname(r)
|
|
+ if r < 32 then return map_gpr[r] end
|
|
+ return "f"..(r-32)
|
|
+end
|
|
+
|
|
+-- Public module functions.
|
|
+return {
|
|
+ create = create,
|
|
+ disass = disass,
|
|
+ regname = regname
|
|
+}
|
|
diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua
|
|
new file mode 100644
|
|
index 0000000000..fd6ce27689
|
|
--- /dev/null
|
|
+++ b/src/jit/dis_riscv64.lua
|
|
@@ -0,0 +1,16 @@
|
|
+----------------------------------------------------------------------------
|
|
+-- LuaJIT RISC-V 64 disassembler wrapper module.
|
|
+--
|
|
+-- Copyright (C) 2005-2025 Mike Pall. All rights reserved.
|
|
+-- Released under the MIT license. See Copyright Notice in luajit.h
|
|
+----------------------------------------------------------------------------
|
|
+-- This module just exports the default riscv little-endian functions from the
|
|
+-- RISC-V disassembler module. All the interesting stuff is there.
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv")
|
|
+return {
|
|
+ create = dis_riscv.create,
|
|
+ disass = dis_riscv.disass,
|
|
+ regname = dis_riscv.regname
|
|
+}
|
|
\ No newline at end of file
|
|
|
|
From 23b5c55e1e18a75bb35a1a5ce7cd3b85a84903ad Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 6 Mar 2024 09:50:08 +0800
|
|
Subject: [PATCH 20/22] riscv(misc): add support in Makefile
|
|
|
|
---
|
|
Makefile | 1 +
|
|
src/Makefile | 8 ++++++++
|
|
2 files changed, 9 insertions(+)
|
|
|
|
diff --git a/Makefile b/Makefile
|
|
index d789e9f374..7b7368adde 100644
|
|
--- a/Makefile
|
|
+++ b/Makefile
|
|
@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
|
|
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
|
|
dis_mips64.lua dis_mips64el.lua \
|
|
dis_mips64r6.lua dis_mips64r6el.lua \
|
|
+ dis_riscv.lua dis_riscv64.lua \
|
|
vmdef.lua
|
|
|
|
ifeq (,$(findstring Windows,$(OS)))
|
|
diff --git a/src/Makefile b/src/Makefile
|
|
index 4a56d1e8e5..cdf1d5d279 100644
|
|
--- a/src/Makefile
|
|
+++ b/src/Makefile
|
|
@@ -52,6 +52,7 @@ CCOPT_arm=
|
|
CCOPT_arm64=
|
|
CCOPT_ppc=
|
|
CCOPT_mips=
|
|
+CCOPT_riscv64=
|
|
#
|
|
CCDEBUG=
|
|
# Uncomment the next line to generate debug information:
|
|
@@ -266,6 +267,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
|
|
else
|
|
TARGET_LJARCH= mips
|
|
endif
|
|
+else
|
|
+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
|
|
+ TARGET_LJARCH= riscv64
|
|
else
|
|
$(error Unsupported target architecture)
|
|
endif
|
|
@@ -274,6 +278,7 @@ endif
|
|
endif
|
|
endif
|
|
endif
|
|
+endif
|
|
|
|
ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
|
|
TARGET_SYS= PS3
|
|
@@ -471,6 +476,9 @@ ifeq (ppc,$(TARGET_LJARCH))
|
|
DASM_AFLAGS+= -D PPE -D TOC
|
|
endif
|
|
endif
|
|
+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
|
|
+ DASM_AFLAGS+= -D RISCV64
|
|
+endif
|
|
endif
|
|
endif
|
|
|
|
|
|
From bedd0cf1e179a0a3370ea3320806237d8a4ec37e Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@proton.me>
|
|
Date: Wed, 21 Aug 2024 16:39:26 +0800
|
|
Subject: [PATCH 21/22] riscv(support,linux): use HWPROBE for ISE detection
|
|
|
|
Current SIGILL handler appears to have weird issues with libluajit on
|
|
some platform. Considering 6.6 kernel is becoming more common, switch
|
|
to HWPROBE for better compatibility.
|
|
---
|
|
src/lib_jit.c | 79 +++++++++++++++++++++++++--------------------------
|
|
src/lj_jit.h | 29 +++++++++++++++++++
|
|
2 files changed, 67 insertions(+), 41 deletions(-)
|
|
|
|
diff --git a/src/lib_jit.c b/src/lib_jit.c
|
|
index e97e4d45e8..376b3c85f9 100644
|
|
--- a/src/lib_jit.c
|
|
+++ b/src/lib_jit.c
|
|
@@ -632,23 +632,26 @@ JIT_PARAMDEF(JIT_PARAMINIT)
|
|
#endif
|
|
|
|
#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX
|
|
-#include <setjmp.h>
|
|
-#include <signal.h>
|
|
-static sigjmp_buf sigbuf = {0};
|
|
-static void detect_sigill(int sig)
|
|
-{
|
|
- siglongjmp(sigbuf, 1);
|
|
-}
|
|
+
|
|
+#if LJ_TARGET_LINUX
|
|
+#include <unistd.h>
|
|
+
|
|
+struct riscv_hwprobe hwprobe_requests[] = {
|
|
+ {RISCV_HWPROBE_KEY_IMA_EXT_0}
|
|
+};
|
|
+
|
|
+const uint64_t *hwprobe_ext = &hwprobe_requests[0].value;
|
|
+
|
|
+int hwprobe_ret = 0;
|
|
+#endif
|
|
|
|
static int riscv_compressed()
|
|
{
|
|
#if defined(__riscv_c) || defined(__riscv_compressed)
|
|
/* Don't bother checking for RVC -- would crash before getting here. */
|
|
return 1;
|
|
-#elif defined(__GNUC__)
|
|
- /* c.nop; c.nop; */
|
|
- __asm__(".4byte 0x00010001");
|
|
- return 1;
|
|
+#elif LJ_TARGET_LINUX
|
|
+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
@@ -659,11 +662,8 @@ static int riscv_zba()
|
|
#if defined(__riscv_b) || defined(__riscv_zba)
|
|
/* Don't bother checking for Zba -- would crash before getting here. */
|
|
return 1;
|
|
-#elif defined(__GNUC__)
|
|
- /* Don't bother verifying the result, just check if the instruction exists. */
|
|
- /* add.uw zero, zero, zero */
|
|
- __asm__(".4byte 0x0800003b");
|
|
- return 1;
|
|
+#elif LJ_TARGET_LINUX
|
|
+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
@@ -674,11 +674,8 @@ static int riscv_zbb()
|
|
#if defined(__riscv_b) || defined(__riscv_zbb)
|
|
/* Don't bother checking for Zbb -- would crash before getting here. */
|
|
return 1;
|
|
-#elif defined(__GNUC__)
|
|
- register int t asm ("a0");
|
|
- /* addi a0, zero, 255; sext.b a0, a0; */
|
|
- __asm__("addi a0, zero, 255\n\t.4byte 0x60451513");
|
|
- return t < 0;
|
|
+#elif LJ_TARGET_LINUX
|
|
+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
@@ -689,10 +686,8 @@ static int riscv_zicond()
|
|
#if defined(__riscv_zicond)
|
|
/* Don't bother checking for Zicond -- would crash before getting here. */
|
|
return 1;
|
|
-#elif defined(__GNUC__)
|
|
- /* czero.eqz zero, zero, zero; */
|
|
- __asm__(".4byte 0x0e005033");
|
|
- return 1;
|
|
+#elif LJ_TARGET_LINUX
|
|
+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
@@ -703,6 +698,8 @@ static int riscv_zfa()
|
|
#if defined(__riscv_zfa)
|
|
/* Don't bother checking for Zfa -- would crash before getting here. */
|
|
return 1;
|
|
+#elif LJ_TARGET_LINUX
|
|
+ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
@@ -716,23 +713,19 @@ static int riscv_xthead()
|
|
&& defined(__riscv_xtheadmac))
|
|
/* Don't bother checking for XThead -- would crash before getting here. */
|
|
return 1;
|
|
-#elif defined(__GNUC__)
|
|
- register int t asm ("a0");
|
|
- /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */
|
|
- /* Therefore assume XThead* are present if XTheadBb is present. */
|
|
- /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */
|
|
- __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b");
|
|
- return t == -1; /* In case of collision with other vendor extensions. */
|
|
#else
|
|
- return 0;
|
|
+/*
|
|
+** Hardcoded as there's no easy way of detection:
|
|
+** - SIGILL have some trouble with libluajit as we speak
|
|
+** - Checking mvendorid looks good, but might not be reliable.
|
|
+*/
|
|
+ return 0;
|
|
#endif
|
|
}
|
|
|
|
static uint32_t riscv_probe(int (*func)(void), uint32_t flag)
|
|
{
|
|
- if (sigsetjmp(sigbuf, 1) == 0) {
|
|
- return func() ? flag : 0;
|
|
- } else return 0;
|
|
+ return func() ? flag : 0;
|
|
}
|
|
#endif
|
|
|
|
@@ -809,17 +802,21 @@ static uint32_t jit_cpudetect(void)
|
|
|
|
#elif LJ_TARGET_RISCV64
|
|
#if LJ_HASJIT
|
|
- /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */
|
|
- struct sigaction old = {0}, act = {0};
|
|
- act.sa_handler = detect_sigill;
|
|
- sigaction(SIGILL, &act, &old);
|
|
+
|
|
+#if LJ_TARGET_LINUX
|
|
+ /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */
|
|
+ hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests,
|
|
+ sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0,
|
|
+ NULL, 0);
|
|
+
|
|
flags |= riscv_probe(riscv_compressed, JIT_F_RVC);
|
|
flags |= riscv_probe(riscv_zba, JIT_F_RVZba);
|
|
flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb);
|
|
flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond);
|
|
flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa);
|
|
flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead);
|
|
- sigaction(SIGILL, &old, NULL);
|
|
+
|
|
+#endif
|
|
|
|
/* Detect V/P? */
|
|
/* V have no hardware available, P not ratified yet. */
|
|
diff --git a/src/lj_jit.h b/src/lj_jit.h
|
|
index e61d99ffa9..124985692d 100644
|
|
--- a/src/lj_jit.h
|
|
+++ b/src/lj_jit.h
|
|
@@ -78,6 +78,35 @@
|
|
|
|
#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead"
|
|
|
|
+#if LJ_TARGET_LINUX
|
|
+#include <sys/syscall.h>
|
|
+
|
|
+#ifndef __NR_riscv_hwprobe
|
|
+#ifndef __NR_arch_specific_syscall
|
|
+#define __NR_arch_specific_syscall 244
|
|
+#endif
|
|
+#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14)
|
|
+#endif
|
|
+
|
|
+struct riscv_hwprobe {
|
|
+ int64_t key;
|
|
+ uint64_t value;
|
|
+};
|
|
+
|
|
+#define RISCV_HWPROBE_KEY_MVENDORID 0
|
|
+#define RISCV_HWPROBE_KEY_MARCHID 1
|
|
+#define RISCV_HWPROBE_KEY_MIMPID 2
|
|
+#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
|
|
+#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
|
|
+
|
|
+#define RISCV_HWPROBE_IMA_C (1 << 1)
|
|
+#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
|
|
+#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
|
|
+#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
|
|
+#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
|
|
+
|
|
+#endif
|
|
+
|
|
#else
|
|
|
|
#define JIT_F_CPUSTRING ""
|
|
|
|
From fd1422f59e1e93fbeff55e2598786a25138fe9c2 Mon Sep 17 00:00:00 2001
|
|
From: gns <infiwang@pm.me>
|
|
Date: Thu, 16 Jan 2025 01:02:19 +0800
|
|
Subject: [PATCH 22/22] riscv(interp): strip excessive extended branch (^B+J)
|
|
|
|
---
|
|
src/vm_riscv64.dasc | 104 ++++++++++++++++++++++----------------------
|
|
1 file changed, 52 insertions(+), 52 deletions(-)
|
|
|
|
diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc
|
|
index 1fee1f7ed4..d4cbe4ebb1 100644
|
|
--- a/src/vm_riscv64.dasc
|
|
+++ b/src/vm_riscv64.dasc
|
|
@@ -553,7 +553,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|
|
|
| // Return from pcall or xpcall fast func.
|
|
| mov_true TMP1
|
|
- | bxeqz TMP0, ->cont_dispatch
|
|
+ | beqz TMP0, ->cont_dispatch
|
|
| ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
|
|
| mv BASE, TMP2 // Restore caller base.
|
|
| // Prepending may overwrite the pcall frame, so do it at the end.
|
|
@@ -564,9 +564,9 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| addiw RD, RD, 8 // RD = (nresults+1)*8.
|
|
| andi TMP0, PC, FRAME_TYPE
|
|
| li CRET1, LUA_YIELD
|
|
- | bxeqz RD, ->vm_unwind_c_eh
|
|
+ | beqz RD, ->vm_unwind_c_eh
|
|
| mv MULTRES, RD
|
|
- | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
|
|
+ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
|
|
|
|
|
|->vm_return:
|
|
| // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
|
|
@@ -574,7 +574,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| andi TMP2, PC, ~FRAME_TYPEP
|
|
| xori TMP0, TMP0, FRAME_C
|
|
| sub TMP2, BASE, TMP2 // TMP2 = previous base.
|
|
- | bxnez TMP0, ->vm_returnp
|
|
+ | bnez TMP0, ->vm_returnp
|
|
|
|
|
| addiw TMP1, RD, -8
|
|
| sd TMP2, L->base
|
|
@@ -743,7 +743,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| andi TMP0, PC, FRAME_TYPE
|
|
| li TISNIL, LJ_TNIL
|
|
| li TISNUM, LJ_TISNUM
|
|
- | bxeqz TMP0, ->BC_RET_Z
|
|
+ | beqz TMP0, ->BC_RET_Z
|
|
| j ->vm_return
|
|
|
|
|
|->vm_pcall: // Setup protected C frame and enter VM.
|
|
@@ -1003,7 +1003,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| // Returns 0/1 or TValue * (metamethod).
|
|
|3:
|
|
| sltiu TMP1, CRET1, 2
|
|
- | bxeqz TMP1, ->vmeta_binop
|
|
+ | beqz TMP1, ->vmeta_binop
|
|
| negw TMP2, CRET1
|
|
|4:
|
|
| lhu RD, OFS_RD(PC)
|
|
@@ -1091,7 +1091,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
|
|
| call_intern vmeta_arith, lj_meta_arith
|
|
| // Returns NULL (finished) or TValue * (metamethod).
|
|
- | bxeqz CRET1, ->cont_nop
|
|
+ | beqz CRET1, ->cont_nop
|
|
|
|
|
| // Call metamethod for binary op.
|
|
|->vmeta_binop:
|
|
@@ -1115,7 +1115,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o)
|
|
| // Returns NULL (retry) or TValue * (metamethod base).
|
|
#if LJ_52
|
|
- | bxnez CRET1, ->vmeta_binop // Binop call for compatibility.
|
|
+ | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
|
|
| mv CARG1, MULTRES
|
|
| j ->BC_LEN_Z
|
|
#else
|
|
@@ -1201,7 +1201,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|->ff_ .. name:
|
|
| ld CARG1, 0(BASE)
|
|
| fld FARG1, 0(BASE)
|
|
- | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ | beqz NARGS8:RC, ->fff_fallback
|
|
| checknum CARG1, ->fff_fallback
|
|
|.endmacro
|
|
|
|
|
@@ -1210,7 +1210,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| ld CARG1, 0(BASE)
|
|
| sltiu TMP0, NARGS8:RC, 16
|
|
| ld CARG2, 8(BASE)
|
|
- | bxnez TMP0, ->fff_fallback
|
|
+ | bnez TMP0, ->fff_fallback
|
|
| gettp TMP1, CARG1
|
|
| gettp TMP2, CARG2
|
|
| sltiu TMP1, TMP1, LJ_TISNUM
|
|
@@ -1218,7 +1218,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| fld FARG1, 0(BASE)
|
|
| and TMP1, TMP1, TMP2
|
|
| fld FARG2, 8(BASE)
|
|
- | bxeqz TMP1, ->fff_fallback
|
|
+ | beqz TMP1, ->fff_fallback
|
|
|.endmacro
|
|
|
|
|
|// Inlined GC threshold check.
|
|
@@ -1278,7 +1278,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|2:
|
|
| ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
|
|
| li CARG1, LJ_TNIL
|
|
- | bxeqz TAB:RB, ->fff_restv
|
|
+ | beqz TAB:RB, ->fff_restv
|
|
| lw TMP0, TAB:RB->hmask
|
|
| lw TMP1, STR:RC->sid
|
|
| ld NODE:TMP2, TAB:RB->node
|
|
@@ -1300,7 +1300,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| settp CARG1, RB, TMP3
|
|
| j ->fff_restv // Not found, keep default result.
|
|
|5:
|
|
- | bxne CARG1, TISNIL, ->fff_restv
|
|
+ | bne CARG1, TISNIL, ->fff_restv
|
|
| j <4 // Ditto for nil value.
|
|
|
|
|
|6:
|
|
@@ -1326,7 +1326,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| bxnez TMP3, ->fff_fallback
|
|
| andi TMP3, TMP2, LJ_GC_BLACK // isblack(table)
|
|
| sd TAB:CARG2, TAB:TMP1->metatable
|
|
- | bxeqz TMP3, ->fff_restv
|
|
+ | beqz TMP3, ->fff_restv
|
|
| barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
|
|
|
|
|
|.ffunc rawget
|
|
@@ -1361,7 +1361,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| gettp TMP0, CARG1
|
|
| addi TMP1, TMP0, -LJ_TSTR
|
|
| // A __tostring method in the string base metatable is ignored.
|
|
- | bxeqz TMP1, ->fff_restv // String key?
|
|
+ | beqz TMP1, ->fff_restv // String key?
|
|
| // Handle numbers inline, unless a number base metatable is present.
|
|
| ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
|
|
| sltu TMP0, TISNUM, TMP0
|
|
@@ -1391,10 +1391,10 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
|
|
| // Returns 1=found, 0=end, -1=error.
|
|
| li RD, (2+1)*8
|
|
- | bxgtz CRET1, ->fff_res // Found key/value.
|
|
+ | bgtz CRET1, ->fff_res // Found key/value.
|
|
| mv TMP1, CRET1
|
|
| mv CARG1, TISNIL
|
|
- | bxeqz TMP1, ->fff_restv // End of traversal: return nil.
|
|
+ | beqz TMP1, ->fff_restv // End of traversal: return nil.
|
|
| ld CFUNC:RB, FRAME_FUNC(BASE)
|
|
| li RC, 2*8
|
|
| cleartp CFUNC:RB
|
|
@@ -1434,19 +1434,19 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| ld TMP1, 0(TMP3)
|
|
|1:
|
|
| li RD, (0+1)*8
|
|
- | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
|
|
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
|
|
| sd TMP1, -8(BASE)
|
|
| li RD, (2+1)*8
|
|
| j ->fff_res
|
|
|2: // Check for empty hash part first. Otherwise call C function.
|
|
| lw TMP0, TAB:CARG1->hmask
|
|
| li RD, (0+1)*8
|
|
- | bxeqz TMP0, ->fff_res
|
|
+ | beqz TMP0, ->fff_res
|
|
| mv CARG2, TMP2
|
|
| call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key)
|
|
| // Returns cTValue * or NULL.
|
|
| li RD, (0+1)*8
|
|
- | bxeqz CRET1, ->fff_res
|
|
+ | beqz CRET1, ->fff_res
|
|
| ld TMP1, 0(CRET1)
|
|
| j <1
|
|
|
|
|
@@ -1483,7 +1483,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
|
|
| andi TMP3, TMP3, 1
|
|
| addi PC, TMP3, 16+FRAME_PCALL
|
|
- | bxeqz NARGS8:RC, ->vm_call_dispatch
|
|
+ | beqz NARGS8:RC, ->vm_call_dispatch
|
|
|1:
|
|
| add TMP0, BASE, NARGS8:RC
|
|
|2:
|
|
@@ -1540,17 +1540,17 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3
|
|
| and CARG2, CARG2, TMP4
|
|
| xor CARG2, TMP3, CARG2
|
|
- | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD?
|
|
+ | bgtz CARG4, ->fff_fallback // st > LUA_YIELD?
|
|
| xor TMP2, TMP2, CARG3
|
|
| or CARG4, TMP2, TMP0
|
|
- | bxnez TMP1, ->fff_fallback // cframe != 0?
|
|
+ | bnez TMP1, ->fff_fallback // cframe != 0?
|
|
| ld TMP0, L:CARG1->maxstack
|
|
| ld PC, FRAME_PC(BASE)
|
|
- | bxeqz CARG4, ->fff_fallback // base == top && st == 0?
|
|
+ | beqz CARG4, ->fff_fallback // base == top && st == 0?
|
|
| add TMP2, CARG2, NARGS8:RC
|
|
| sd BASE, L->base
|
|
| sd PC, SAVE_PC(sp)
|
|
- | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow?
|
|
+ | bltu TMP0, TMP2, ->fff_fallback // Stack overflow?
|
|
|1:
|
|
|.if resume
|
|
| addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
|
|
@@ -1655,7 +1655,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| sd BASE, L->base
|
|
| andi TMP0, TMP0, CFRAME_RESUME
|
|
| sd TMP1, L->top
|
|
- | bxeqz TMP0, ->fff_fallback
|
|
+ | beqz TMP0, ->fff_fallback
|
|
| sd x0, L->cframe
|
|
| sb CRET1, L->status
|
|
| j ->vm_leave_unw
|
|
@@ -1666,14 +1666,14 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|->ff_math_ .. func:
|
|
| ld CARG1, 0(BASE)
|
|
| gettp TMP0, CARG1
|
|
- | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ | beqz NARGS8:RC, ->fff_fallback
|
|
| fmv.d.x FARG1, CARG1
|
|
- | bxeq TMP0, TISNUM, ->fff_restv
|
|
+ | beq TMP0, TISNUM, ->fff_restv
|
|
| srli TMP1, CARG1, 52 // Extract exponent (and sign).
|
|
- | bxgeu TMP0, TISNUM, ->fff_fallback
|
|
+ | bgeu TMP0, TISNUM, ->fff_fallback
|
|
| andi TMP1, TMP1, 0x7ff // Extract exponent.
|
|
| slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction
|
|
- | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN?
|
|
+ | beqz TMP2, ->fff_resn // Less than 2^52 / Not NaN?
|
|
| fcvt.l.d TMP3, FARG1, rm
|
|
| fcvt.d.l FTMP1, TMP3
|
|
| fsgnj.d FRET1, FTMP1, FARG1
|
|
@@ -1693,7 +1693,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| sub CARG1, TMP1, TMP0
|
|
| slli TMP3, CARG1, 32
|
|
| settp CARG1, TISNUM
|
|
- | bxgez TMP3, ->fff_restv
|
|
+ | bgez TMP3, ->fff_restv
|
|
| lui CARG1, 0x41e00 // 2^31 as a double.
|
|
| slli CARG1, CARG1, 32
|
|
| j ->fff_restv
|
|
@@ -1701,7 +1701,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| sltiu TMP2, CARG2, LJ_TISNUM
|
|
| slli CARG1, CARG1, 1
|
|
| srli CARG1, CARG1, 1
|
|
- | bxeqz TMP2, ->fff_fallback // int
|
|
+ | beqz TMP2, ->fff_fallback // int
|
|
|// fallthrough
|
|
|
|
|
|->fff_restv:
|
|
@@ -1757,7 +1757,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| li TMP1, 8
|
|
| ld CARG1, 0(BASE)
|
|
| fld FARG1, 0(BASE)
|
|
- | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument.
|
|
+ | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument.
|
|
| checknum CARG1, ->fff_fallback
|
|
| call_extern ff_math_log, log
|
|
| j ->fff_resn
|
|
@@ -1811,7 +1811,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| checkint CARG1, >4
|
|
|1: // Handle integers.
|
|
| ld CARG2, 0(RA)
|
|
- | bxeq RA, RB, ->fff_restv
|
|
+ | beq RA, RB, ->fff_restv
|
|
| sext.w CARG1, CARG1
|
|
| checkint CARG2, >3
|
|
| sext.w CARG2, CARG2
|
|
@@ -1840,7 +1840,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|5: // Handle numbers.
|
|
| ld CARG2, 0(RA)
|
|
| fld FARG2, 0(RA)
|
|
- | bxgeu RA, RB, ->fff_resn
|
|
+ | bgeu RA, RB, ->fff_resn
|
|
| checknum CARG2, >7
|
|
|6:
|
|
|.if ismax
|
|
@@ -1871,7 +1871,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| addi TMP0, TMP0, -LJ_TSTR
|
|
| or TMP1, TMP1, TMP0
|
|
| cleartp STR:CARG1
|
|
- | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument.
|
|
+ | bnez TMP1, ->fff_fallback // Need exactly 1 string argument.
|
|
| lw TMP0, STR:CARG1->len
|
|
| ld PC, FRAME_PC(BASE)
|
|
| snez RD, TMP0
|
|
@@ -1894,7 +1894,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| sltu TMP2, TMP2, CARG1 // !(255 < n).
|
|
| or TMP1, TMP1, TMP2
|
|
| li CARG3, 1
|
|
- | bxnez TMP1, ->fff_fallback
|
|
+ | bnez TMP1, ->fff_fallback
|
|
| addi CARG2, sp, TMPD_OFS
|
|
| sb CARG1, TMPD(sp)
|
|
|->fff_newstr:
|
|
@@ -1917,7 +1917,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| ld CARG3, 16(BASE)
|
|
| addi TMP0, NARGS8:RC, -16
|
|
| gettp TMP1, CARG1
|
|
- | bxltz TMP0, ->fff_fallback
|
|
+ | bltz TMP0, ->fff_fallback
|
|
| cleartp STR:CARG1, CARG1
|
|
| li CARG4, -1
|
|
| beqz TMP0, >1
|
|
@@ -1927,7 +1927,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| checkint CARG2, ->fff_fallback
|
|
| addi TMP0, TMP1, -LJ_TSTR
|
|
| sext.w CARG3, CARG2
|
|
- | bxnez TMP0, ->fff_fallback
|
|
+ | bnez TMP0, ->fff_fallback
|
|
| lw CARG2, STR:CARG1->len
|
|
| // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
|
|
| addiw TMP0, CARG2, 1
|
|
@@ -1950,7 +1950,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| sub CARG3, CARG4, CARG3 // len = end - start
|
|
| addi CARG2, CARG2, sizeof(GCstr)-1
|
|
| addiw CARG3, CARG3, 1 // len += 1
|
|
- | bxgez CARG3, ->fff_newstr
|
|
+ | bgez CARG3, ->fff_newstr
|
|
|->fff_emptystr: // Return empty string.
|
|
| li TMP1, LJ_TSTR
|
|
| addi STR:CARG1, GL, offsetof(global_State, strempty)
|
|
@@ -1961,7 +1961,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| .ffunc string_ .. name
|
|
| ffgccheck
|
|
| ld CARG2, 0(BASE)
|
|
- | bxeqz NARGS8:RC, ->fff_fallback
|
|
+ | beqz NARGS8:RC, ->fff_fallback
|
|
| checkstr STR:CARG2, ->fff_fallback
|
|
| addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf)
|
|
| ld TMP0, SBUF:CARG1->b
|
|
@@ -1983,7 +1983,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|
|
|
|->vm_tobit_fb:
|
|
| fld FARG1, 0(BASE)
|
|
- | bxeqz TMP1, ->fff_fallback
|
|
+ | beqz TMP1, ->fff_fallback
|
|
| fadd.d FARG1, FARG1, TOBIT
|
|
| fmv.x.w CRET1, FARG1
|
|
| zext.w CRET1, CRET1
|
|
@@ -2005,7 +2005,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| add TMP3, BASE, NARGS8:RC
|
|
|1:
|
|
| ld TMP1, 0(TMP2)
|
|
- | bxeq TMP2, TMP3, ->fff_resi
|
|
+ | beq TMP2, TMP3, ->fff_resi
|
|
| gettp TMP0, TMP1
|
|
| addi TMP2, TMP2, 8
|
|
| bne TMP0, TISNUM, >2
|
|
@@ -2016,7 +2016,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| fld FARG1, -8(TMP2)
|
|
| sltiu TMP0, TMP0, LJ_TISNUM
|
|
| fadd.d FARG1, FARG1, TOBIT
|
|
- | bxeqz TMP0, ->fff_fallback
|
|
+ | beqz TMP0, ->fff_fallback
|
|
| fmv.x.w TMP1, FARG1
|
|
| zext.w TMP1, TMP1
|
|
| bins CRET1, CRET1, TMP1
|
|
@@ -2064,7 +2064,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|1:
|
|
| gettp TMP0, CARG2
|
|
| zext.w CARG2, CARG2
|
|
- | bxne TMP0, TISNUM, ->fff_fallback
|
|
+ | bne TMP0, TISNUM, ->fff_fallback
|
|
| sext.w CARG1, CARG1
|
|
| shins CRET1, CARG1, CARG2
|
|
| zext.w CRET1, CRET1
|
|
@@ -2085,7 +2085,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|
|1:
|
|
| gettp TMP0, CARG2
|
|
| zext.w CARG2, CARG2
|
|
- | bxne TMP0, TISNUM, ->fff_fallback
|
|
+ | bne TMP0, TISNUM, ->fff_fallback
|
|
| sext.w CARG1, CARG1
|
|
| neg TMP2, CARG2
|
|
| rotinsa TMP1, CARG1, CARG2
|
|
@@ -2116,13 +2116,13 @@ static void build_subroutines(BuildCtx *ctx)
|
|
| // Either throws an error, or recovers and returns -1, 0 or nresults+1.
|
|
| ld BASE, L->base
|
|
| slliw RD, CRET1, 3
|
|
- | bxgtz CRET1, ->fff_res // Returned nresults+1?
|
|
+ | bgtz CRET1, ->fff_res // Returned nresults+1?
|
|
|1: // Returned 0 or -1: retry fast path.
|
|
| ld LFUNC:RB, FRAME_FUNC(BASE)
|
|
| ld TMP0, L->top
|
|
| sub NARGS8:RC, TMP0, BASE
|
|
| cleartp LFUNC:RB
|
|
- | bxnez CRET1, ->vm_call_tail // Returned -1?
|
|
+ | bnez CRET1, ->vm_call_tail // Returned -1?
|
|
| ins_callt // Returned 0: retry fast path.
|
|
|
|
|
|// Reconstruct previous base for vmeta_call during tailcall.
|
|
@@ -4297,7 +4297,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
|
|
|
|->BC_RETV_Z: // Non-standard return case.
|
|
| andi TMP2, TMP1, FRAME_TYPEP
|
|
- | bxnez TMP2, ->vm_return
|
|
+ | bnez TMP2, ->vm_return
|
|
| // Return from vararg function: relocate BASE down.
|
|
| sub BASE, BASE, TMP1
|
|
| ld PC, FRAME_PC(BASE)
|
|
@@ -4550,7 +4550,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| ld TMP2, L->maxstack
|
|
| lbu TMP1, -4+PC2PROTO(numparams)(PC)
|
|
| ld KBASE, -4+PC2PROTO(k)(PC)
|
|
- | bxltu TMP2, RA, ->vm_growstack_l
|
|
+ | bltu TMP2, RA, ->vm_growstack_l
|
|
| slliw TMP1, TMP1, 3 // numparams*8
|
|
|2:
|
|
| bltu NARGS8:RC, TMP1, >3 // Check for missing parameters.
|
|
@@ -4587,7 +4587,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| addi TMP3, RC, 16+FRAME_VARG
|
|
| ld KBASE, -4+PC2PROTO(k)(PC)
|
|
| sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
|
|
- | bxgeu TMP0, TMP2, ->vm_growstack_l
|
|
+ | bgeu TMP0, TMP2, ->vm_growstack_l
|
|
| lbu TMP2, -4+PC2PROTO(numparams)(PC)
|
|
| mv RA, BASE
|
|
| mv RC, TMP1
|
|
@@ -4633,7 +4633,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
| add RC, BASE, NARGS8:RC
|
|
| sd BASE, L->base // base of currently excuting function
|
|
| sd RC, L->top
|
|
- | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack.
|
|
+ | bgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack.
|
|
| li_vmstate C // li TMP0, ~LJ_VMST_C
|
|
if (op == BC_FUNCCW) {
|
|
| ld CARG2, CFUNC:RB->f
|