From 7deb9367f1bc8b0c9d71fb720920fa5d633924bc Mon Sep 17 00:00:00 2001 From: marvim Date: Mon, 11 Sep 2023 05:15:30 +0000 Subject: [PATCH] deps src: Automatic update --- src/luajit-stamp/download-luajit.cmake | 34 +-- src/luajit-stamp/extract-luajit.cmake | 2 +- src/luajit-stamp/luajit-urlinfo.txt | 4 +- src/luajit/.relver | 2 +- src/luajit/src/Makefile | 8 +- src/luajit/src/host/buildvm_peobj.c | 113 ++++++++- src/luajit/src/host/genversion.lua | 7 +- src/luajit/src/jit/dis_arm64.lua | 42 ++-- src/luajit/src/lj_arch.h | 18 +- src/luajit/src/lj_asm.c | 4 + src/luajit/src/lj_asm_arm.h | 7 +- src/luajit/src/lj_asm_arm64.h | 328 ++++++++++--------------- src/luajit/src/lj_def.h | 24 +- src/luajit/src/lj_emit_arm64.h | 157 ++++++------ src/luajit/src/lj_mcode.c | 12 +- src/luajit/src/lj_opt_fold.c | 5 +- src/luajit/src/lj_record.c | 12 +- src/luajit/src/lj_target.h | 11 +- src/luajit/src/msvcbuild.bat | 16 +- src/luajit/src/vm_arm64.dasc | 3 +- 20 files changed, 438 insertions(+), 371 deletions(-) diff --git a/src/luajit-stamp/download-luajit.cmake b/src/luajit-stamp/download-luajit.cmake index e4a6d59ad..22d40d17f 100644 --- a/src/luajit-stamp/download-luajit.cmake +++ b/src/luajit-stamp/download-luajit.cmake @@ -22,16 +22,16 @@ function(check_file_hash has_hash hash_is_good) set("${has_hash}" TRUE PARENT_SCOPE) message(STATUS "verifying file... - file='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz'") + file='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz'") - file("SHA256" "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz" actual_value) + file("SHA256" "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz" actual_value) - if(NOT "${actual_value}" STREQUAL "b518721280390e4cec1af30f6819d86756ce4234d82410a55a4e121855f64e08") + if(NOT "${actual_value}" STREQUAL "76f0a93ee9674aedf8604b6e8246a8a70ebfa487d6592703c2efdf9162c0debc") set("${hash_is_good}" FALSE PARENT_SCOPE) message(STATUS "SHA256 hash of - /home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz + /home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz does not match expected value - expected: 'b518721280390e4cec1af30f6819d86756ce4234d82410a55a4e121855f64e08' + expected: '76f0a93ee9674aedf8604b6e8246a8a70ebfa487d6592703c2efdf9162c0debc' actual: '${actual_value}'") else() set("${hash_is_good}" TRUE PARENT_SCOPE) @@ -71,40 +71,40 @@ function(sleep_before_download attempt) execute_process(COMMAND "${CMAKE_COMMAND}" -E sleep "${sleep_seconds}") endfunction() -if("/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz" STREQUAL "") +if("/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz" STREQUAL "") message(FATAL_ERROR "LOCAL can't be empty") endif() -if("https://github.com/LuaJIT/LuaJIT/archive/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz" STREQUAL "") +if("https://github.com/LuaJIT/LuaJIT/archive/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz" STREQUAL "") message(FATAL_ERROR "REMOTE can't be empty") endif() -if(EXISTS "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz") +if(EXISTS "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz") check_file_hash(has_hash hash_is_good) if(has_hash) if(hash_is_good) message(STATUS "File already exists and hash match (skip download): - file='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz' - SHA256='b518721280390e4cec1af30f6819d86756ce4234d82410a55a4e121855f64e08'" + file='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz' + SHA256='76f0a93ee9674aedf8604b6e8246a8a70ebfa487d6592703c2efdf9162c0debc'" ) return() else() message(STATUS "File already exists but hash mismatch. Removing...") - file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz") + file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz") endif() else() message(STATUS "File already exists but no hash specified (use URL_HASH): - file='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz' + file='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz' Old file will be removed and new file downloaded from URL." ) - file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz") + file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz") endif() endif() set(retry_number 5) message(STATUS "Downloading... - dst='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz' + dst='/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz' timeout='none' inactivity timeout='none'" ) @@ -115,7 +115,7 @@ foreach(i RANGE ${retry_number}) if(status_code IN_LIST download_retry_codes) sleep_before_download(${i}) endif() - foreach(url https://github.com/LuaJIT/LuaJIT/archive/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz) + foreach(url https://github.com/LuaJIT/LuaJIT/archive/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz) if(NOT url IN_LIST skip_url_list) message(STATUS "Using src='${url}'") @@ -126,7 +126,7 @@ foreach(i RANGE ${retry_number}) file( DOWNLOAD - "${url}" "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz" + "${url}" "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz" # no TIMEOUT # no INACTIVITY_TIMEOUT @@ -143,7 +143,7 @@ foreach(i RANGE ${retry_number}) check_file_hash(has_hash hash_is_good) if(has_hash AND NOT hash_is_good) message(STATUS "Hash mismatch, removing...") - file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz") + file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz") else() message(STATUS "Downloading... done") return() diff --git a/src/luajit-stamp/extract-luajit.cmake b/src/luajit-stamp/extract-luajit.cmake index 6c659f8bf..a438258e0 100644 --- a/src/luajit-stamp/extract-luajit.cmake +++ b/src/luajit-stamp/extract-luajit.cmake @@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.5) # Make file names absolute: # -get_filename_component(filename "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz" ABSOLUTE) +get_filename_component(filename "/home/runner/work/deps/deps/neovim/deps/build/downloads/luajit/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz" ABSOLUTE) get_filename_component(directory "/home/runner/work/deps/deps/neovim/deps/build/src/luajit" ABSOLUTE) message(STATUS "extracting... diff --git a/src/luajit-stamp/luajit-urlinfo.txt b/src/luajit-stamp/luajit-urlinfo.txt index e9c08e23c..82adb8fb3 100644 --- a/src/luajit-stamp/luajit-urlinfo.txt +++ b/src/luajit-stamp/luajit-urlinfo.txt @@ -6,7 +6,7 @@ method=url command=/usr/local/bin/cmake;-P;/home/runner/work/deps/deps/neovim/deps/build/src/luajit-stamp/download-luajit.cmake;COMMAND;/usr/local/bin/cmake;-P;/home/runner/work/deps/deps/neovim/deps/build/src/luajit-stamp/verify-luajit.cmake;COMMAND;/usr/local/bin/cmake;-P;/home/runner/work/deps/deps/neovim/deps/build/src/luajit-stamp/extract-luajit.cmake source_dir=/home/runner/work/deps/deps/neovim/deps/build/src/luajit work_dir=/home/runner/work/deps/deps/neovim/deps/build/src -url(s)=https://github.com/LuaJIT/LuaJIT/archive/41fb94defa8f830ce69a8122b03f6ac3216d392a.tar.gz -hash=SHA256=b518721280390e4cec1af30f6819d86756ce4234d82410a55a4e121855f64e08 +url(s)=https://github.com/LuaJIT/LuaJIT/archive/5a18d4582ff3a22898e49a039c05ed7d5720999d.tar.gz +hash=SHA256=76f0a93ee9674aedf8604b6e8246a8a70ebfa487d6592703c2efdf9162c0debc no_extract= diff --git a/src/luajit/.relver b/src/luajit/.relver index 6fb0ec3ad..f10565fc5 100644 --- a/src/luajit/.relver +++ b/src/luajit/.relver @@ -1 +1 @@ -1693350652 +1694316387 diff --git a/src/luajit/src/Makefile b/src/luajit/src/Makefile index 8c47a16bc..224d21e70 100644 --- a/src/luajit/src/Makefile +++ b/src/luajit/src/Makefile @@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) -TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) +TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) TARGET_LJARCH= x64 else @@ -475,7 +475,11 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc GIT= git -GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : +ifeq (Windows,$(HOST_SYS)$(HOST_MSYS)) + GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +else + GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : +endif GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*) BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ diff --git a/src/luajit/src/host/buildvm_peobj.c b/src/luajit/src/host/buildvm_peobj.c index 5bca6df8c..e3e1026ee 100644 --- a/src/luajit/src/host/buildvm_peobj.c +++ b/src/luajit/src/host/buildvm_peobj.c @@ -9,7 +9,7 @@ #include "buildvm.h" #include "lj_bc.h" -#if LJ_TARGET_X86ORX64 +#if LJ_TARGET_WINDOWS /* Context for PE object emitter. */ static char *strtab; @@ -93,6 +93,17 @@ typedef struct PEsymaux { #define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_RELOC_OFS 0 #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ +#define PEOBJ_PDATA_NRELOC 6 +#define PEOBJ_XDATA_SIZE (8*2+4+6*2) +#elif LJ_TARGET_ARM64 +#define PEOBJ_ARCH_TARGET 0xaa64 +#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */ +#define PEOBJ_RELOC_DIR32 0x01 +#define PEOBJ_RELOC_ADDR32NB 0x02 +#define PEOBJ_RELOC_OFS (-4) +#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ +#define PEOBJ_PDATA_NRELOC 4 +#define PEOBJ_XDATA_SIZE (4+24+4 +4+8) #endif /* Section numbers (0-based). */ @@ -100,7 +111,7 @@ enum { PEOBJ_SECT_ABS = -2, PEOBJ_SECT_UNDEF = -1, PEOBJ_SECT_TEXT, -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC PEOBJ_SECT_PDATA, PEOBJ_SECT_XDATA, #elif LJ_TARGET_X86 @@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx) uint32_t sofs; int i, nrsym; union { uint8_t b; uint32_t u; } host_endian; +#ifdef PEOBJ_PDATA_NRELOC + uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; +#endif sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); @@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx) /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); pesect[PEOBJ_SECT_PDATA].ofs = sofs; - sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); + sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4); pesect[PEOBJ_SECT_PDATA].relocofs = sofs; - sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; + sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); pesect[PEOBJ_SECT_XDATA].ofs = sofs; - sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ + sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */ pesect[PEOBJ_SECT_XDATA].relocofs = sofs; sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ @@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx) */ nrsym = ctx->nrelocsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ #endif @@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx) #if LJ_TARGET_X64 { /* Write .pdata section. */ - uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ PEreloc reloc; pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; @@ -308,6 +321,88 @@ void emit_peobj(BuildCtx *ctx) reloc.type = PEOBJ_RELOC_ADDR32NB; owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#elif LJ_TARGET_ARM64 + /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */ + { /* Write .pdata section. */ + uint32_t pdata[4]; + PEreloc reloc; + pdata[0] = 0; + pdata[1] = 0; + pdata[2] = fcofs; + pdata[3] = 4+24+4; + owrite(ctx, &pdata, sizeof(pdata)); + /* Start of .text and start of .xdata. */ + reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + /* Start of vm_ffi_call and start of second part of .xdata. */ + reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } + { /* Write .xdata section. */ + uint32_t u32; + uint8_t *p, uwc[24]; + PEreloc reloc; + +#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2) +#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */ +#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */ +#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r)-19)<< 6) | ((o) >> 3)) +#define CSAVE_REGS(r1,r2,o1) do { \ + int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \ +} while (0) +#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3)) +#define CSAVE_FREGS(r1,r2,o1) do { \ + int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \ +} while (0) +#define CSAVE_REGX(r,o) CBE16(0xd400 | (((r) - 19) << 5) | (~(o) >> 3)) +#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */ +#define CODE_NOP 0xe3 +#define CODE_END 0xe4 +#define CEND_ALIGN do { \ + *p++ = CODE_END; \ + while ((p - uwc) & 3) *p++ = CODE_NOP; \ +} while (0) + + /* Unwind codes for .text section with handler. */ + p = uwc; + CALLOC_S(208); /* +1 */ + CSAVE_FPLR(192); /* +1 */ + CADD_FP(192); /* +2 */ + CSAVE_REGS(19, 28, 184); /* +5*2 */ + CSAVE_FREGS(8, 15, 104); /* +4*2 */ + CEND_ALIGN; /* +1 +1 -> 24 */ + + u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2); + owrite(ctx, &u32, 4); + owrite(ctx, &uwc, 24); + + u32 = 0; /* Handler RVA to be relocated at 4 + 24. */ + owrite(ctx, &u32, 4); + + /* Unwind codes for vm_ffi_call without handler. */ + p = uwc; + CSAVE_FPLR(16); /* +1 */ + CADD_FP(16); /* +2 */ + CSAVE_REGX(19, -24); /* +2 */ + CSAVE_REGX(20, -32); /* +2 */ + CEND_ALIGN; /* +1 +0 -> 8 */ + + u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2); + owrite(ctx, &u32, 4); + owrite(ctx, &uwc, 8); + + reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } #elif LJ_TARGET_X86 /* Write .sxdata section. */ for (i = 0; i < nrsym; i++) { @@ -339,7 +434,7 @@ void emit_peobj(BuildCtx *ctx) emit_peobj_sym(ctx, ctx->relocsym[i], 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); emit_peobj_sym(ctx, "lj_err_unwind_win", 0, diff --git a/src/luajit/src/host/genversion.lua b/src/luajit/src/host/genversion.lua index 42b5e6fe9..28f7206c5 100644 --- a/src/luajit/src/host/genversion.lua +++ b/src/luajit/src/host/genversion.lua @@ -5,9 +5,10 @@ -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -local FILE_ROLLING_H = "luajit_rolling.h" -local FILE_RELVER_TXT = "luajit_relver.txt" -local FILE_LUAJIT_H = "luajit.h" +local arg = {...} +local FILE_ROLLING_H = arg[1] or "luajit_rolling.h" +local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt" +local FILE_LUAJIT_H = arg[3] or "luajit.h" local function file_read(file) local fp = assert(io.open(file, "rb"), "run from the wrong directory") diff --git a/src/luajit/src/jit/dis_arm64.lua b/src/luajit/src/jit/dis_arm64.lua index b10e2fb12..3d199bf26 100644 --- a/src/luajit/src/jit/dis_arm64.lua +++ b/src/luajit/src/jit/dis_arm64.lua @@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register. [0] = { shift = 29, mask = 3, [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + shift = 21, mask = 1, + [0] = "andDNMSg", "bicDNMSg" }, { - shift = 21, mask = 7, - [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + shift = 21, mask = 1, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg" }, { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + shift = 21, mask = 1, + [0] = "eorDNMSg", "eonDNMSg" }, { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + shift = 21, mask = 1, + [0] = "ands|tstD0NMSg", "bicsDNMSg" } }, false -- unallocated @@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register. { shift = 29, mask = 3, [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + shift = 21, mask = 1, + [0] = "andDNMSg", "bicDNMSg" }, { - shift = 21, mask = 7, - [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + shift = 21, mask = 1, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg" }, { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + shift = 21, mask = 1, + [0] = "eorDNMSg", "eonDNMSg" }, { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + shift = 21, mask = 1, + [0] = "ands|tstD0NMSg", "bicsDNMSg" } } } @@ -735,7 +727,7 @@ local map_cond = { "hi", "ls", "ge", "lt", "gt", "le", "al", } -local map_shift = { [0] = "lsl", "lsr", "asr", } +local map_shift = { [0] = "lsl", "lsr", "asr", "ror"} local map_extend = { [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", diff --git a/src/luajit/src/lj_arch.h b/src/luajit/src/lj_arch.h index 6e22b1b2a..026e741f4 100644 --- a/src/luajit/src/lj_arch.h +++ b/src/luajit/src/lj_arch.h @@ -57,7 +57,7 @@ #define LUAJIT_TARGET LUAJIT_ARCH_X64 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) #define LUAJIT_TARGET LUAJIT_ARCH_ARM -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(_M_ARM64) #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) #define LUAJIT_TARGET LUAJIT_ARCH_PPC @@ -66,7 +66,7 @@ #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #else -#error "No support for this architecture (yet)" +#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif #endif @@ -237,7 +237,7 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 @@ -331,6 +331,7 @@ #define LJ_ARCH_NOFFI 1 #elif LJ_ARCH_BITS == 64 #error "No support for PPC64" +#undef LJ_TARGET_PPC #endif #if _ARCH_PWR7 @@ -490,36 +491,45 @@ #elif LJ_TARGET_ARM #if defined(__ARMEB__) #error "No support for big-endian ARM" +#undef LJ_TARGET_ARM #endif #if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ #error "No support for Cortex-M CPUs" +#undef LJ_TARGET_ARM #endif #if !(__ARM_EABI__ || LJ_TARGET_IOS) #error "Only ARM EABI or iOS 3.0+ ABI is supported" +#undef LJ_TARGET_ARM #endif #elif LJ_TARGET_ARM64 #if defined(_ILP32) #error "No support for ILP32 model on ARM64" +#undef LJ_TARGET_ARM64 #endif #elif LJ_TARGET_PPC #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) #error "No support for little-endian PPC32" +#undef LJ_TARGET_PPC #endif #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) -#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" +#error "No support for PPC/e500, use LuaJIT 2.0" +#undef LJ_TARGET_PPC #endif #elif LJ_TARGET_MIPS32 #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) #error "Only o32 ABI supported for MIPS32" +#undef LJ_TARGET_MIPS #endif #if LJ_TARGET_MIPSR6 /* Not that useful, since most available r6 CPUs are 64 bit. */ #error "No support for MIPS32R6" +#undef LJ_TARGET_MIPS #endif #elif LJ_TARGET_MIPS64 #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ #error "Only n64 ABI supported for MIPS64" +#undef LJ_TARGET_MIPS #endif #endif #endif diff --git a/src/luajit/src/lj_asm.c b/src/luajit/src/lj_asm.c index c02a1b9e7..844910ad9 100644 --- a/src/luajit/src/lj_asm.c +++ b/src/luajit/src/lj_asm.c @@ -606,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) IRIns *ir = IR(ref); if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || #if LJ_GC64 +#if LJ_TARGET_ARM64 + (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) || +#else (ir->o == IR_KINT && k == ir->i) || +#endif (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && k == (intptr_t)ir_kptr(ir)) diff --git a/src/luajit/src/lj_asm_arm.h b/src/luajit/src/lj_asm_arm.h index b3b1f096b..a003d5cab 100644 --- a/src/luajit/src/lj_asm_arm.h +++ b/src/luajit/src/lj_asm_arm.h @@ -1990,6 +1990,7 @@ static void asm_prof(ASMState *as, IRIns *ir) static void asm_stack_check(ASMState *as, BCReg topslot, IRIns *irp, RegSet allow, ExitNo exitno) { + int savereg = 0; Reg pbase; uint32_t k; if (irp) { @@ -2000,12 +2001,14 @@ static void asm_stack_check(ASMState *as, BCReg topslot, pbase = rset_pickbot(allow); } else { pbase = RID_RET; - emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ + savereg = 1; } } else { pbase = RID_BASE; } emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); + if (savereg) + emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ k = emit_isk12(0, (int32_t)(8*topslot)); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, ARMI_CMP^k, RID_TMP); @@ -2017,7 +2020,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (ra_hasspill(irp->s)) emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); - if (ra_hasspill(irp->s) && !allow) + if (savereg) emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ emit_loadi(as, RID_TMP, (i & ~4095)); } else { diff --git a/src/luajit/src/lj_asm_arm64.h b/src/luajit/src/lj_asm_arm64.h index 5e690308c..4dd6b7119 100644 --- a/src/luajit/src/lj_asm_arm64.h +++ b/src/luajit/src/lj_asm_arm64.h @@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A64CC cc) emit_cond_branch(as, cc, target); } -/* Emit test and branch instruction to exit for guard. */ -static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) +/* Emit test and branch instruction to exit for guard, if in range. */ +static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) { MCode *target = asm_exitstub_addr(as, as->snapno); MCode *p = as->mcp; + ptrdiff_t delta = target - p; if (LJ_UNLIKELY(p == as->invmcp)) { + if (as->orignins > 1023) return 0; /* Delta might end up too large. */ as->loopinv = 1; - *p = A64I_B | A64F_S26(target-p); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; + *p = A64I_B | A64F_S26(delta); + ai ^= 0x01000000u; + target = p-1; + } else if (LJ_UNLIKELY(delta >= 0x1fff)) { + return 0; } emit_tnb(as, ai, r, bit, target); + return 1; } /* Emit compare and branch instruction to exit for guard. */ @@ -211,16 +216,13 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); + int logical = (ai & 0x1f000000) == 0x0a000000; if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); return A64F_M(ir->r); } else if (irref_isk(ref)) { - uint32_t m; int64_t k = get_k64val(as, ref); - if ((ai & 0x1f000000) == 0x0a000000) - m = emit_isk13(k, irt_is64(ir->t)); - else - m = emit_isk12(k); + uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : emit_isk12(k); if (m) return m; } else if (mayfuse(as, ref)) { @@ -232,7 +234,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); IRIns *irl = IR(ir->op1); if (sh == A64SH_LSL && - irl->o == IR_CONV && + irl->o == IR_CONV && !logical && irl->op2 == ((IRT_I64<op1, allow); return A64F_M(m) | A64F_SH(sh, shift); } - } else if (ir->o == IR_CONV && + } else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) { + Reg m = ra_alloc1(as, ir->op1, allow); + int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); + return A64F_M(m) | A64F_SH(A64SH_ROR, shift); + } else if (ir->o == IR_CONV && !logical && ir->op2 == ((IRT_I64<op1, allow); return A64F_M(m) | A64F_EX(A64EX_SXTW); @@ -541,8 +547,6 @@ static void asm_retf(ASMState *as, IRIns *ir) as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); @@ -666,25 +670,22 @@ static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; - Reg dest = 0, tmp; - int destused = ra_used(ir); + Reg tmp; int32_t ofs = 0; ra_evictset(as, RSET_SCRATCH); - if (destused) { + if (ra_used(ir)) { if (ra_hasspill(ir->s)) { ofs = sps_scale(ir->s); - destused = 0; if (ra_hasreg(ir->r)) { ra_free(as, ir->r); ra_modified(as, ir->r); emit_spload(as, ir, ir->r, ofs); } } else { - dest = ra_dest(as, ir, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_FPR); + emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); } } - if (destused) - emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); asm_guardcnb(as, A64I_CBZ, RID_RET); args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ @@ -775,113 +776,76 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) int destused = ra_used(ir); Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP; - Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; + Reg key = 0, tmp = RID_TMP, type = RID_NONE, tkey; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); - int isk = irref_isk(ir->op2); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t k = 0; uint32_t khash; - MCLabel l_end, l_loop, l_next; + MCLabel l_end, l_loop; rset_clear(allow, tab); - if (!isk) { - key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); + /* Allocate registers outside of the loop. */ + if (irkey->o != IR_KNUM || !(k = emit_isk12((int64_t)ir_knum(irkey)->u64))) { + key = ra_alloc1(as, refkey, irt_isnum(kt) ? RSET_FPR : allow); rset_clear(allow, key); - if (!irt_isstr(kt)) { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - } else if (irt_isnum(kt)) { - int64_t val = (int64_t)ir_knum(irkey)->u64; - if (!(k = emit_isk12(val))) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - } else if (!irt_ispri(kt)) { - if (!(k = emit_isk12(irkey->i))) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } } - - /* Allocate constants early. */ - if (irt_isnum(kt)) { - if (!isk) { - tisnum = ra_allock(as, LJ_TISNUM << 15, allow); - ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); - rset_clear(allow, tisnum); - } - } else if (irt_isaddr(kt)) { - if (isk) { - int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; - scr = ra_allock(as, kk, allow); + if (!isk) { + tkey = ra_scratch(as, allow); + rset_clear(allow, tkey); + } else if (irt_isnum(kt)) { + tkey = key; /* Assumes -0.0 is already canonicalized to +0.0. */ + } else { + int64_t kk; + if (irt_isaddr(kt)) { + kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; } else { - scr = ra_scratch(as, allow); + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + kk = ~((int64_t)~irt_toitype(kt) << 47); } - rset_clear(allow, scr); - } else { - lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); - type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); - scr = ra_scratch(as, rset_clear(allow, type)); - rset_clear(allow, scr); + tkey = ra_allock(as, kk, allow); + rset_clear(allow, tkey); } /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); as->invmcp = NULL; - if (merge == IR_NE) + if (merge == IR_NE) { asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); + } else if (destused) { + uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val)); + lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)"); + emit_dn(as, A64I_ADDx^k12, dest, RID_GL); + } /* Follow hash chain until the end. */ l_loop = --as->mcp; - emit_n(as, A64I_CMPx^A64I_K12^0, dest); - emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); - l_next = emit_label(as); + if (destused) + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); /* Type and value comparison. */ if (merge == IR_EQ) asm_guardcc(as, CC_EQ); else emit_cond_branch(as, CC_EQ, l_end); + emit_nm(as, A64I_CMPx^k, tmp, tkey); + if (!destused) + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key)); + *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest; - if (irt_isnum(kt)) { - if (isk) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - if (k) - emit_n(as, A64I_CMPx^k, tmp); - else - emit_nm(as, A64I_CMPx, key, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - emit_nm(as, A64I_FCMPd, key, ftmp); - emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); - emit_cond_branch(as, CC_LO, l_next); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); - } - } else if (irt_isaddr(kt)) { - if (isk) { - emit_nm(as, A64I_CMPx, scr, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + /* Construct tkey as canonicalized or tagged key. */ + if (!isk) { + if (irt_isnum(kt)) { + emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey); } else { - emit_nm(as, A64I_CMPx, tmp, scr); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); + lj_assertA(irt_isaddr(kt), "bad HREF key type"); + type = ra_allock(as, irt_toitype(kt) << 15, allow); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type); } - } else { - emit_nm(as, A64I_CMPx, scr, type); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); } - *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; - if (!isk && irt_isaddr(kt)) { - type = ra_allock(as, (int32_t)irt_toitype(kt), allow); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); - rset_clear(allow, type); - } /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { @@ -895,7 +859,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - /* Fetch of str->sid is cheaper than ra_allock. */ emit_dnm(as, A64I_ANDw, dest, dest, tmp); emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); @@ -904,23 +867,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); emit_dnm(as, A64I_SUBw, dest, dest, tmp); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); - emit_dnm(as, A64I_EORw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); + emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest); emit_dnm(as, A64I_SUBw, tmp, tmp, dest); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); - emit_dnm(as, A64I_EORw, tmp, tmp, dest); if (irt_isnum(kt)) { + emit_dnm(as, A64I_EORw, tmp, tkey, dest); emit_dnm(as, A64I_ADDw, dest, dest, dest); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVw, tmp, dest); - emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey); + emit_nm(as, A64I_FCMPZd, (key & 31), 0); + emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31)); } else { - checkmclim(as); - emit_dm(as, A64I_MOVw, tmp, key); - emit_dnm(as, A64I_EORw, dest, dest, - ra_allock(as, irt_toitype(kt) << 15, allow)); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVx, dest, key); + emit_dnm(as, A64I_EORw, tmp, key, dest); + emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key); } } } @@ -935,7 +893,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int bigofs = !emit_checkofs(A64I_LDRx, kofs); Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key, idx = node; + Reg idx = node; RegSet allow = rset_exclude(RSET_GPR, node); uint64_t k; lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); @@ -954,9 +912,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } else { k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); } - key = ra_scratch(as, allow); - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); - emit_lso(as, A64I_LDRx, key, idx, kofs); + emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow)); + emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs); if (bigofs) emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); } @@ -969,18 +926,16 @@ static void asm_uref(ASMState *as, IRIns *ir) MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, A64I_LDRx, dest, v); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); - emit_opk(as, A64I_ADDx, dest, uv, + asm_guardcnb(as, A64I_CBZ, RID_TMP); + emit_opk(as, A64I_ADDx, dest, dest, (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + emit_lso(as, A64I_LDRB, RID_TMP, dest, + (int32_t)offsetof(GCupval, closed)); } else { - emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); } - emit_lso(as, A64I_LDRx, uv, func, + emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); } } @@ -1086,7 +1041,7 @@ static void asm_xstore(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir) { - Reg idx, tmp, type; + Reg idx, tmp; int32_t ofs = 0; RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || @@ -1105,8 +1060,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } else { tmp = ra_scratch(as, gpr); } - type = ra_scratch(as, rset_clear(gpr, tmp)); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx); rset_clear(gpr, idx); if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31); if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; @@ -1118,8 +1072,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), ra_allock(as, LJ_TISNUM << 15, gpr), tmp); } else if (irt_isaddr(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); } else if (irt_isnil(ir->t)) { emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); } else { @@ -1242,9 +1196,8 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_nm(as, A64I_CMPx, ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); } else { - Reg type = ra_scratch(as, allow); - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); } emit_lso(as, A64I_LDRx, tmp, base, ofs); return; @@ -1330,7 +1283,6 @@ static void asm_obar(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; IRRef args[2]; MCLabel l_end; - RegSet allow = RSET_GPR; Reg obj, val, tmp; /* No need for other object barriers (yet). */ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); @@ -1341,14 +1293,13 @@ static void asm_obar(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(allow, obj)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end); emit_cond_branch(as, CC_EQ, l_end); emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); emit_lso(as, A64I_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); } @@ -1390,12 +1341,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) if (irref_isk(lref)) return 1; /* But swap constants to the right. */ ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || (ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_CONV && ir->op2 == ((IRT_I64<o >= IR_BSHL && ir->o <= IR_BSAR) || + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || (ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_CONV && ir->op2 == ((IRT_I64<t)) { /* IR_MULOV */ asm_guardcc(as, CC_NE); emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ - emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); - emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); + emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest); emit_dnm(as, A64I_SMULL, dest, right, left); } else { emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); @@ -1707,16 +1657,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir) if (asm_swapops(as, blref, brref)) { Reg tmp = blref; blref = brref; brref = tmp; } + bleft = ra_alloc1(as, blref, RSET_GPR); if (irref_isk(brref)) { uint64_t k = get_k64val(as, brref); - if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { - asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, - ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) && + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft, + emit_ctz64(k))) return; - } m2 = emit_isk13(k, irt_is64(irl->t)); } - bleft = ra_alloc1(as, blref, RSET_GPR); ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); if (!m2) m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); @@ -1791,37 +1740,28 @@ static void asm_prof(ASMState *as, IRIns *ir) static void asm_stack_check(ASMState *as, BCReg topslot, IRIns *irp, RegSet allow, ExitNo exitno) { - Reg pbase; uint32_t k; + Reg pbase = RID_BASE; if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lj_assertA(ra_hasreg(pbase), "base reg lost"); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ - } - } else { - pbase = RID_BASE; + pbase = irp->r; + if (!ra_hasreg(pbase)) + pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET); } emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + if (pbase & 0x80) /* Restore temp. register. */ + emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0); k = emit_isk12((8*topslot)); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, A64I_CMPx^k, RID_TMP); - emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31)); emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - if (ra_hasspill(irp->s)) - emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ - } else { - emit_getgl(as, RID_TMP, cur_L); + if (pbase & 0x40) { + emit_getgl(as, (pbase & 31), jit_base); + if (pbase & 0x80) /* Save temp register. */ + emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0); } + emit_getgl(as, RID_TMP, cur_L); } /* Restore Lua stack from on-trace state. */ @@ -1863,7 +1803,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) /* Marker to prevent patching the GC check exit. */ #define ARM64_NOPATCH_GC_CHECK \ - (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) + (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO)) /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as) @@ -1918,46 +1858,40 @@ static void asm_loop_tail_fixup(ASMState *as) /* -- Head of trace ------------------------------------------------------- */ -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - /* Coalesce BASE register for a root trace. */ static void asm_head_root_base(ASMState *as) { - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_movrr(as, ir, r, RID_BASE); + } } /* Coalesce BASE register for a side trace. */ static Reg asm_head_side_base(ASMState *as, IRIns *irp) { - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - return ra_dest(as, ir, RSET_GPR); - } else { - Reg r = irp->r; - lj_assertA(ra_hasreg(r), "base reg lost"); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - return r; + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ + emit_movrr(as, ir, r, irp->r); + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } } + return RID_NONE; } /* -- Tail of trace ------------------------------------------------------- */ diff --git a/src/luajit/src/lj_def.h b/src/luajit/src/lj_def.h index 88bc6336e..1461d3d79 100644 --- a/src/luajit/src/lj_def.h +++ b/src/luajit/src/lj_def.h @@ -146,15 +146,9 @@ typedef uintptr_t BloomFilter; #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) #define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) -/* Don't ask ... */ -#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r; -} -#else #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) -#endif +#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x)) +#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63)) #if defined(__arm__) static LJ_AINLINE uint32_t lj_bswap(uint32_t x) @@ -265,8 +259,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) #else unsigned char _BitScanForward(unsigned long *, unsigned long); unsigned char _BitScanReverse(unsigned long *, unsigned long); +unsigned char _BitScanForward64(unsigned long *, uint64_t); +unsigned char _BitScanReverse64(unsigned long *, uint64_t); #pragma intrinsic(_BitScanForward) #pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward64) +#pragma intrinsic(_BitScanReverse64) static LJ_AINLINE uint32_t lj_ffs(uint32_t x) { @@ -277,6 +275,16 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) { unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; } + +static LJ_AINLINE uint32_t lj_ffs64(uint64_t x) +{ + unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r; +} + +static LJ_AINLINE uint32_t lj_fls64(uint64_t x) +{ + unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r; +} #endif unsigned long _byteswap_ulong(unsigned long); diff --git a/src/luajit/src/lj_emit_arm64.h b/src/luajit/src/lj_emit_arm64.h index 6926c71a8..fef5d9736 100644 --- a/src/luajit/src/lj_emit_arm64.h +++ b/src/luajit/src/lj_emit_arm64.h @@ -20,7 +20,7 @@ static uint64_t get_k64val(ASMState *as, IRRef ref) } else { lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, "bad 64 bit const IR op %d", ir->o); - return ir->i; /* Sign-extended. */ + return (uint32_t)ir->i; /* Zero-extended. */ } } @@ -30,39 +30,31 @@ static uint32_t emit_isk12(int64_t n) uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n; uint32_t m = n < 0 ? 0x40000000 : 0; if (k < 0x1000) { - return A64I_K12|m|A64F_U12(k); + return (uint32_t)(A64I_K12|m|A64F_U12(k)); } else if ((k & 0xfff000) == k) { - return A64I_K12|m|0x400000|A64F_U12(k>>12); + return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12)); } return 0; } -#define emit_clz64(n) __builtin_clzll(n) -#define emit_ctz64(n) __builtin_ctzll(n) +#define emit_clz64(n) (lj_fls64(n)^63) +#define emit_ctz64(n) lj_ffs64(n) /* Encode constant in K13 format for logical data processing instructions. */ static uint32_t emit_isk13(uint64_t n, int is64) { - int inv = 0, w = 128, lz, tz; - if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ - if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ - do { /* Find the repeat width. */ - if (is64 && (uint32_t)(n^(n>>32))) break; - n = (uint32_t)n; - if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ - w = 32; if ((n^(n>>16)) & 0xffff) break; - n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; - n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; - n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; - n = n & 0x3; w = 2; - } while (0); - lz = emit_clz64(n); - tz = emit_ctz64(n); - if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ - if (inv) - return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); - else - return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); + /* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */ + int rot, ones, size, immr, imms; + if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n; + if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */ + rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64; + n = lj_ror(n, rot & 63); + ones = emit_ctz64(~n); + size = emit_clz64(n) + ones; + if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */ + immr = -rot & (size - 1); + imms = (-(size << 1) | (ones - 1)) & 63; + return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms); } static uint32_t emit_isfpk64(uint64_t n) @@ -121,6 +113,17 @@ static int emit_checkofs(A64Ins ai, int64_t ofs) } } +static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc) +{ + if (ofs >= 0) { + return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */ + } else if (ofs >= -256) { + return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff); + } else { + return A64F_D(31); /* Will mismatch prev. */ + } +} + static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) { int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; @@ -132,11 +135,9 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) uint32_t prev = *as->mcp & ~A64F_D(31); int ofsm = ofs - (1<>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { + if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) { aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); - } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { + } else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) { aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); ofsm = ofs; } else { @@ -158,13 +159,12 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) /* -- Emit loads/stores --------------------------------------------------- */ /* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= ASMREF_L) +#define emit_canremat(ref) ((ref) <= REF_BASE) -/* Try to find an N-step delta relative to other consts with N < lim. */ -static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) +/* Try to find a one-step delta relative to other consts. */ +static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) { RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); - if (lim <= 1) return 0; /* Can't beat that. */ while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); @@ -173,13 +173,14 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : get_k64val(as, ref); int64_t delta = (int64_t)(k - kx); + if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */ if (delta == 0) { - emit_dm(as, A64I_MOVx, rd, r); + emit_dm(as, is64|A64I_MOVw, rd, r); return 1; } else { uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta); if (k12) { - emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); + emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r); return 1; } /* Do other ops or multi-step deltas pay off? Probably not. @@ -192,53 +193,52 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) return 0; /* Failed. */ } -static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) +static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) { - int i, zeros = 0, ones = 0, neg; - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ - /* Count homogeneous 16 bit fragments. */ - for (i = 0; i < 4; i++) { - uint64_t frag = (u64 >> i*16) & 0xffff; - zeros += (frag == 0); - ones += (frag == 0xffff); + int zeros = 0, ones = 0, neg, lshift = 0; + int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2; + /* Count non-homogeneous 16 bit fragments. */ + while (--i >= 0) { + uint32_t frag = (u64 >> i*16) & 0xffff; + zeros += (frag != 0); + ones += (frag != 0xffff); } - neg = ones > zeros; /* Use MOVN if it pays off. */ - if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ + neg = ones < zeros; /* Use MOVN if it pays off. */ + if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */ uint32_t k13 = emit_isk13(u64, is64); if (k13) { emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); return; } - } - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { - int shift = 0, lshift = 0; - uint64_t n64 = neg ? ~u64 : u64; - if (n64 != 0) { - /* Find first/last fragment to be filled. */ - shift = (63-emit_clz64(n64)) & ~15; - lshift = emit_ctz64(n64) & ~15; + if (emit_kdelta(as, rd, u64, is64)) { + return; } - /* MOVK requires the original value (u64). */ - while (shift > lshift) { - uint32_t u16 = (u64 >> shift) & 0xffff; - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ - if (u16 != (neg ? 0xffff : 0)) - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); - shift -= 16; + } + if (neg) { + u64 = ~u64; + if (!is64) u64 = (uint32_t)u64; + } + if (u64) { + /* Find first/last fragment to be filled. */ + int shift = (63-emit_clz64(u64)) & ~15; + lshift = emit_ctz64(u64) & ~15; + for (; shift > lshift; shift -= 16) { + uint32_t frag = (u64 >> shift) & 0xffff; + if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */ + if (neg) frag ^= 0xffff; /* MOVK requires the original value. */ + emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd); } - /* But MOVN needs an inverted value (n64). */ - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); } + /* But MOVN needs an inverted value. */ + emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) | + A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); } /* Load a 32 bit constant into a GPR. */ -#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) +#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i) /* Load a 64 bit constant into a GPR. */ -#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) +#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i) #define glofs(as, k) \ ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) @@ -252,19 +252,20 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) { - /* First, check if ip + offset is in range. */ - if ((ai & 0x00400000) && checkmcpofs(as, p)) { + Reg base = RID_GL; + int64_t ofs = glofs(as, p); + if (emit_checkofs(ai, ofs)) { + /* GL + offset, might subsequently fuse to LDP/STP. */ + } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { + /* IP + offset is cheaper than allock, but address must be in range. */ emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); - } else { - Reg base = RID_GL; /* Next, try GL + offset. */ - int64_t ofs = glofs(as, p); - if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ - int64_t i64 = i64ptr(p); - base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); - ofs = i64 & 0x7fffull; - } - emit_lso(as, ai, r, base, ofs); + return; + } else { /* Split up into base reg + offset. */ + int64_t i64 = i64ptr(p); + base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); + ofs = i64 & 0x7fffull; } + emit_lso(as, ai, r, base, ofs); } /* Load 64 bit IR constant into register. */ diff --git a/src/luajit/src/lj_mcode.c b/src/luajit/src/lj_mcode.c index 947679377..8a4851dd6 100644 --- a/src/luajit/src/lj_mcode.c +++ b/src/luajit/src/lj_mcode.c @@ -29,6 +29,11 @@ #include #endif +#if LJ_TARGET_WINDOWS +#define WIN32_LEAN_AND_MEAN +#include +#endif + #if LJ_TARGET_IOS void sys_icache_invalidate(void *start, size_t len); #endif @@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *end) #endif #if LJ_TARGET_X86ORX64 UNUSED(start); UNUSED(end); +#elif LJ_TARGET_WINDOWS + FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start); #elif LJ_TARGET_IOS sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC @@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *end) #if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include - #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE @@ -363,7 +367,7 @@ void lj_mcode_limiterr(jit_State *J, size_t need) sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; - if ((size_t)need > sizemcode) + if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ if (J->szallmcarea + sizemcode > maxmcode) lj_trace_err(J, LJ_TRERR_MCODEAL); diff --git a/src/luajit/src/lj_opt_fold.c b/src/luajit/src/lj_opt_fold.c index 48effb8ab..d90477f6c 100644 --- a/src/luajit/src/lj_opt_fold.c +++ b/src/luajit/src/lj_opt_fold.c @@ -1972,7 +1972,10 @@ LJFOLD(NE any any) LJFOLDF(comm_equal) { /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */ - if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) + if (fins->op1 == fins->op2 && + (!irt_isnum(fins->t) || + (fleft->o == IR_CONV && /* Converted integers cannot be NaN. */ + (uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8)))) return CONDFOLD(fins->o == IR_EQ); return fold_comm_swap(J); } diff --git a/src/luajit/src/lj_record.c b/src/luajit/src/lj_record.c index 9d0021a60..134fad837 100644 --- a/src/luajit/src/lj_record.c +++ b/src/luajit/src/lj_record.c @@ -1599,10 +1599,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) lj_assertJ(!hasmm, "inconsistent metamethod handling"); if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ TRef key = ix->key; - if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ + if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */ key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); - else if (tref_isnumber(key) && tref_isk(key) && tvismzero(&ix->keyv)) - key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */ + } else if (tref_isnum(key)) { + if (tref_isk(key)) { + if (tvismzero(&ix->keyv)) + key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */ + } else { + emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */ + } + } xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP diff --git a/src/luajit/src/lj_target.h b/src/luajit/src/lj_target.h index 09d19bd90..e7322c072 100644 --- a/src/luajit/src/lj_target.h +++ b/src/luajit/src/lj_target.h @@ -58,9 +58,13 @@ typedef uint32_t RegSP; #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; #define RSET_BITS 6 +#define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs)) #else typedef uint32_t RegSet; #define RSET_BITS 5 +#define rset_picktop_(rs) ((Reg)lj_fls(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) #endif #define RID2RSET(r) (((RegSet)1) << (r)) @@ -71,13 +75,6 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -#define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63)) -#define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs)) -#else -#define rset_picktop_(rs) ((Reg)lj_fls(rs)) -#define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) -#endif /* -- Register allocation cost -------------------------------------------- */ diff --git a/src/luajit/src/msvcbuild.bat b/src/luajit/src/msvcbuild.bat index f9bf25289..2cfcf26e7 100644 --- a/src/luajit/src/msvcbuild.bat +++ b/src/luajit/src/msvcbuild.bat @@ -34,20 +34,26 @@ if exist minilua.exe.manifest^ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe -@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 +@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64 @set LJARCH=x64 @minilua -@if errorlevel 8 goto :X64 +@if errorlevel 8 goto :NO32 @set DASC=vm_x86.dasc -@set DASMFLAGS=-D WIN -D JIT -D FFI +@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU @set LJARCH=x86 @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 +@goto :DA +:NO32 +@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64 +@set DASC=vm_arm64.dasc +@set LJARCH=arm64 +@goto :DA :X64 -@if "%1" neq "nogc64" goto :GC64 +@if "%1" neq "nogc64" goto :DA @shift @set DASC=vm_x86.dasc @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 -:GC64 +:DA minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD diff --git a/src/luajit/src/vm_arm64.dasc b/src/luajit/src/vm_arm64.dasc index 698b42104..b94a9c0ef 100644 --- a/src/luajit/src/vm_arm64.dasc +++ b/src/luajit/src/vm_arm64.dasc @@ -3816,9 +3816,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if JIT | // RA = base (ignored), RC = traceno | ldr CARG1, [GL, #GL_J(trace)] - | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. + | st_vmstate wzr // Traces on ARM64 don't store the trace #, so use 0. | ldr TRACE:RC, [CARG1, RC, lsl #3] - | st_vmstate CARG2w |.if PAUTH | ldr RA, TRACE:RC->mcauth |.else