From f3d75075ed91137699c6071abe49e2252e794a9c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 18 Aug 2017 12:52:14 +0200 Subject: [PATCH 01/21] Use https for freelists.org links. --- doc/ext_ffi_semantics.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index 899640ce10..ae3c037964 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -844,7 +844,7 @@

Parameterized Types

The main use for parameterized types are libraries implementing abstract data types -(» example), +(example), similar to what can be achieved with C++ template metaprogramming. Another use case are derived types of anonymous structs, which avoids pollution of the global struct namespace. From 6b0824852677cc12570c20a3211fbfe0e4f0ce14 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 28 Aug 2017 10:43:37 +0200 Subject: [PATCH 02/21] x64/LJ_GC64: Fix fallback case of asm_fuseloadk64(). Contributed by Peter Cawley. --- src/lj_asm_x86.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 3e189b1d04..55c02d242e 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -387,6 +387,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); } as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); as->mrm.base = RID_RIP; From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 3 Sep 2017 23:20:53 +0200 Subject: [PATCH 03/21] PPC: Add soft-float support to JIT compiler backend. Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc. --- src/lj_arch.h | 1 - src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 278 insertions(+), 44 deletions(-) diff --git a/src/lj_arch.h b/src/lj_arch.h index 0145a7c020..5962f3afb0 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -273,7 +273,6 @@ #endif #if LJ_ABI_SOFTFP -#define LJ_ARCH_NOJIT 1 /* NYI */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #else #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 6daa861b91..1955429f0b 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, emit_tab(as, pi, rt, left, right); } +#if !LJ_SOFTFP /* Fuse to multiply-add/sub instruction. */ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) { @@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) } return 0; } +#endif /* -- Calls --------------------------------------------------------------- */ @@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = 8; - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; + Reg gpr = REGARG_FIRSTGPR; +#if !LJ_SOFTFP + Reg fpr = REGARG_FIRSTFPR; +#endif if ((void *)ci->func) emit_call(as, (void *)ci->func); for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; if (ref) { IRIns *ir = IR(ref); +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { if (fpr <= REGARG_LASTFPR) { lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ @@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; } - } else { + } else +#endif + { if (gpr <= REGARG_LASTGPR) { lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ ra_leftov(as, gpr, ref); @@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } checkmclim(as); } +#if !LJ_SOFTFP if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); +#endif } /* Setup result reg/sp for call. Evict scratch regs. */ @@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +#if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; +#endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ if (hiop && ra_hasreg((ir+1)->r)) @@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { + if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { /* Use spill slot or temp slots. */ int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; @@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir) /* -- Type conversions ---------------------------------------------------- */ +#if !LJ_SOFTFP static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { RegSet allow = RSET_FPR; @@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fab(as, PPCI_FADD, tmp, left, right); } +#endif static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if !LJ_SOFTFP int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); lua_assert(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ +#if LJ_SOFTFP + /* FP conversions are handled by SPLIT. */ + lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ +#else + lua_assert(irt_type(ir->t) != st); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fb(as, PPCI_FCTIWZ, tmp, left); } } - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); @@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; - int32_t ofs; + int32_t ofs = SPOFS_TMP; +#if LJ_SOFTFP + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { + int i; + for (i = 0; i < 2; i++) { + Reg r = (ir+i)->r; + if (ra_hasreg(r)) { + ra_free(as, r); + ra_modified(as, r); + emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); + } + } + ofs = sps_scale(ir->s & ~1); + } else { + Reg rhi = ra_dest(as, ir+1, RSET_GPR); + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); + emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); + emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); + } + } +#else RegSet drop = RSET_SCRATCH; if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ra_evictset(as, drop); + if (ir->s) ofs = sps_scale(ir->s); +#endif asm_guardcc(as, CC_EQ); emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ asm_gencall(as, ci, args); /* Store the result to the spill slot or temp slots. */ - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); } @@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) Reg src = ra_alloc1(as, ref, allow); emit_setgl(as, src, tmptv.gcr); } - type = ra_allock(as, irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, ref+1, allow); + else + type = ra_allock(as, irt_toitype(ir->t), allow); emit_setgl(as, type, tmptv.it); } } @@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg tisnum = RID_NONE, tmpnum = RID_NONE; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t khash; MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); +#if LJ_SOFTFP + if (!isk) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + if (irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey+1)->r)) { + tmpnum = (irkey+1)->r; + ra_noweak(as, tmpnum); + } else { + tmpnum = ra_allocref(as, refkey+1, allow); + } + rset_clear(allow, tmpnum); + } + } +#else if (irt_isnum(kt)) { key = ra_alloc1(as, refkey, RSET_FPR); tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); @@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); } +#endif tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); @@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) asm_guardcc(as, CC_EQ); else emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - if (irt_isnum(kt)) { + if (!LJ_SOFTFP && irt_isnum(kt)) { emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); emit_condbranch(as, PPCI_BC, CC_GE, l_next); emit_ab(as, PPCI_CMPLW, tmp1, tisnum); @@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_ab(as, PPCI_CMPW, tmp2, key); emit_condbranch(as, PPCI_BC, CC_NE, l_next); } - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); + if (LJ_SOFTFP && ra_hasreg(tmpnum)) + emit_ab(as, PPCI_CMPW, tmp1, tmpnum); + else + emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); if (!irt_ispri(kt)) emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); } @@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) (((char *)as->mcp-(char *)l_loop) & 0xffffu); /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(irkey) : 1; if (khash == 0) { emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); } else { Reg tmphash = tmp1; - if (irref_isk(refkey)) + if (isk) tmphash = ra_allock(as, khash, allow); emit_tab(as, PPCI_ADD, dest, dest, tmp1); emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { + if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); @@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); - if (irt_isnum(kt)) { + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { +#if LJ_SOFTFP + emit_asb(as, PPCI_XOR, tmp2, key, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); + emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); +#else int32_t ofs = ra_spill(as, irkey); emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); emit_rotlwi(as, dest, tmp1, HASH_ROT1); emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); +#endif } else { emit_asb(as, PPCI_XOR, tmp2, key, tmp1); emit_rotlwi(as, dest, tmp1, HASH_ROT1); @@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir) case IRT_U8: return PPCI_LBZ; case IRT_I16: return PPCI_LHA; case IRT_U16: return PPCI_LHZ; - case IRT_NUM: return PPCI_LFD; - case IRT_FLOAT: return PPCI_LFS; + case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; default: return PPCI_LWZ; } } @@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir) switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return PPCI_STB; case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: return PPCI_STFD; - case IRT_FLOAT: return PPCI_STFS; + case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; default: return PPCI_STW; } } @@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); if (irt_isi8(ir->t)) emit_as(as, PPCI_EXTSB, dest, dest); @@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) Reg src = ra_alloc1(as, irb->op1, RSET_GPR); asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); } else { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg src = ra_alloc1(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } @@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = AHUREF_LSX; + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { + t.irt = IRT_NUM; + if (ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } + ofs = 0; + } if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (!irt_isnum(t)) ofs = 0; - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t)); + if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); @@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) asm_guardcc(as, CC_GE); emit_ab(as, PPCI_CMPLW, type, tisnum); if (ra_hasreg(dest)) { - if (ofs == AHUREF_LSX) { + if (!LJ_SOFTFP && ofs == AHUREF_LSX) { tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, (idx&255)), (idx>>8))); emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); } else { - emit_fai(as, PPCI_LFD, dest, idx, ofs); + emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, + ofs+4*LJ_SOFTFP); } } } else { @@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) int32_t ofs = AHUREF_LSX; if (ir->r == RID_SINK) return; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { src = ra_alloc1(as, ir->op2, RSET_FPR); } else { if (!irt_ispri(ir->t)) { @@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) rset_clear(allow, src); ofs = 0; } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, (ir+1)->op2, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); rset_clear(allow, type); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { if (ofs == AHUREF_LSX) { emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); emit_slwi(as, RID_TMP, (idx>>8), 3); @@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; + int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); lua_assert(LJ_DUALNUM || !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); +#if LJ_SOFTFP + lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } +#else if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { + } else +#endif + if (ra_used(ir)) { lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { + if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { if (irt_isint(t)) { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); dest = ra_scratch(as, RSET_FPR); @@ -994,10 +1097,13 @@ static void asm_sload(ASMState *as, IRIns *ir) if ((ir->op2 & IRSLOAD_TYPECHECK)) { Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); +#if !LJ_SOFTFP type = RID_TMP; +#endif + emit_ab(as, PPCI_CMPLW, type, tisnum); } - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); + if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, + base, ofs-(LJ_SOFTFP?0:4)); } else { if ((ir->op2 & IRSLOAD_TYPECHECK)) { asm_guardcc(as, CC_NE); @@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir) /* -- Arithmetic and logic operations ------------------------------------- */ +#if !LJ_SOFTFP static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) { Reg dest = ra_dest(as, ir, RSET_FPR); @@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRIns *ir) else asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } +#endif static void asm_add(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) asm_fparith(as, ir, PPCI_FADD); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); PPCIns pi; @@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir) static void asm_sub(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) asm_fparith(as, ir, PPCI_FSUB); - } else { + } else +#endif + { PPCIns pi = PPCI_SUBF; Reg dest = ra_dest(as, ir, RSET_GPR); Reg left, right; @@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir) static void asm_mul(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fparith(as, ir, PPCI_FMUL); - } else { + } else +#endif + { PPCIns pi = PPCI_MULLW; Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); @@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns *ir) static void asm_neg(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fpunary(as, ir, PPCI_FNEG); - } else { + } else +#endif + { Reg dest, left; PPCIns pi = PPCI_NEG; if (as->flagmcp == as->mcp) { @@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) #define asm_bror(as, ir) lua_assert(0) +#if LJ_SOFTFP +static void asm_sfpmin_max(ASMState *as, IRIns *ir) +{ + CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; + IRRef args[4]; + MCLabel l_right, l_end; + Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); + Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); + Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); + PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; + righthi = (lefthi >> 8); lefthi &= 255; + rightlo = (leftlo >> 8); leftlo &= 255; + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; + l_end = emit_label(as); + if (desthi != righthi) emit_mr(as, desthi, righthi); + if (destlo != rightlo) emit_mr(as, destlo, rightlo); + l_right = emit_label(as); + if (l_end != l_right) emit_jmp(as, l_end); + if (desthi != lefthi) emit_mr(as, desthi, lefthi); + if (destlo != leftlo) emit_mr(as, destlo, leftlo); + if (l_right == as->mcp+1) { + cond ^= 4; l_right = l_end; ++as->mcp; + } + emit_condbranch(as, PPCI_BC, cond, l_right); + ra_evictset(as, RSET_SCRATCH); + emit_cmpi(as, RID_RET, 1); + asm_gencall(as, &ci, args); +} +#endif + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); Reg tmp = dest; Reg right, left = ra_alloc2(as, ir, RSET_FPR); @@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) static void asm_comp(ASMState *as, IRIns *ir) { PPCCC cc = asm_compmap[ir->o]; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guardcc(as, (cc >> 4)); @@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns *ir) #define asm_equal(as, ir) asm_comp(as, ir) +#if LJ_SOFTFP +/* SFP comparisons. */ +static void asm_sfpcomp(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; + IRRef args[4]; + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; + + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); + } + ra_evictset(as, drop); + asm_setupresult(as, ir, ci); + switch ((IROp)ir->o) { + case IR_ULT: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + case IR_ULE: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 1); + break; + case IR_GE: case IR_GT: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 2); + default: + asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + break; + } + asm_gencall(as, ci, args); +} +#endif + #if LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) @@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRIns *ir) /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI +#if LJ_HASFFI || LJ_SOFTFP /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ +#if LJ_HASFFI && !LJ_SOFTFP if (usehi || uselo) asm_conv64(as, ir); return; +#endif } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64(as, ir); +#endif + return; +#if LJ_SOFTFP + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) + asm_sfpmin_max(as, ir-1); return; +#endif } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { @@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns *ir) } if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_HASFFI case IR_ADD: as->curins--; asm_add64(as, ir); break; case IR_SUB: as->curins--; asm_sub64(as, ir); break; case IR_NEG: as->curins--; asm_neg64(as, ir); break; +#endif +#if LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: + if (!uselo) + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ + break; +#endif case IR_CALLN: + case IR_CALLS: case IR_CALLXS: if (!uselo) ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ break; +#if LJ_SOFTFP + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: +#endif case IR_CNEWI: /* Nothing to do here. Handled by lo op itself. */ break; @@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & SNAP_NORESTORE)) continue; if (irt_isnum(ir->t)) { +#if LJ_SOFTFP + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); +#else Reg src = ra_alloc1(as, ref, RSET_FPR); emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); +#endif } else { Reg type; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); @@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & (SNAP_CONT|SNAP_FRAME))) { if (s == 0) continue; /* Do not overwrite link to previous frame. */ type = ra_allock(as, (int32_t)(*flinks--), allow); +#if LJ_SOFTFP + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); +#endif } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } @@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; } else { if (ngpr > 0) ngpr--; else nslots++; } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); + return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : + REGSP_HINT(RID_RET); } static void asm_setup_target(ASMState *as) From 05fbdf565c700365d22e38f11478101a0d92a23e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 10 Sep 2017 14:05:30 +0200 Subject: [PATCH 04/21] x64/LJ_GC64: Fix type-check-only variant of SLOAD. Thanks to Peter Cawley. --- src/lj_asm_x86.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 55c02d242e..af54dc7f11 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1759,7 +1759,7 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_i8(as, irt_toitype(t)); emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); emit_shifti(as, XOg_SAR|REX_64, tmp, 47); - emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); #else } else { emit_i8(as, irt_toitype(t)); From bf12f1dafb157008b963f829b57b2472b6993cc8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 18 Sep 2017 09:50:22 +0200 Subject: [PATCH 05/21] MIPS64: Hide internal function. --- src/lj_ccall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 799be48724..25e938cb7e 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -848,7 +848,8 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) return 0; /* Struct is in GPRs. */ } -void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) +static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, + int ft) { if (LJ_ABI_SOFTFP ? ft : ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { From 0c0e7b168ea147866835954267c151ef789f64fb Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 20 Sep 2017 19:39:50 +0200 Subject: [PATCH 06/21] DynASM/x86: Fix potential REL_A overflow. Thanks to Joshua Haberman. --- dynasm/dasm_x86.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 90dc5d1595..f9260b0c6e 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -395,7 +395,8 @@ int dasm_encode(Dst_DECL, void *buffer) case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; b++; n = (int)(ptrdiff_t)D->globals[-n]; - case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ case DASM_REL_PC: rel_pc: { int shrink = *b++; int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } From b4ed3219a1a98dd9fe7d1e3eeea3b82f5a780948 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 2 Oct 2017 09:22:46 +0200 Subject: [PATCH 07/21] LJ_GC64: Fix ir_khash for non-string GCobj. Contributed by Peter Cawley. --- src/lj_asm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lj_asm.c b/src/lj_asm.c index bed2268efe..d961927bde 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1017,7 +1017,11 @@ static uint32_t ir_khash(IRIns *ir) } else { lua_assert(irt_isgcv(ir->t)); lo = u32ptr(ir_kgc(ir)); +#if LJ_GC64 + hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); +#else hi = lo + HASH_BIAS; +#endif } return hashrot(lo, hi); } From 850f8c59d3d04a9847f21f32a6c36d8269b5b6b1 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 2 Oct 2017 23:10:56 +0200 Subject: [PATCH 08/21] LJ_GC64: Make ASMREF_L references 64 bit. Reported by Yichun Zhang. --- src/lj_asm.c | 1 + src/lj_ir.h | 4 +++- src/lj_opt_sink.c | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index d961927bde..753fe6bd1d 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2015,6 +2015,7 @@ static void asm_setup_regsp(ASMState *as) ir->prev = REGSP_INIT; if (irt_is64(ir->t) && ir->o != IR_KNULL) { #if LJ_GC64 + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ #else /* Make life easier for backends by putting address of constant in i. */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 34c2785394..8057a7508c 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -377,10 +377,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) #if LJ_GC64 +/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ #define IRT_IS64 \ ((1u<cur.nk); ir < irbase; ir++) { irt_clearmark(ir->t); ir->prev = REGSP_INIT; + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ if (irt_is64(ir->t) && ir->o != IR_KNULL) ir++; } From 9f0caad0e43f97a4613850b3874b851cb1bc301d Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 8 Nov 2017 12:53:05 +0100 Subject: [PATCH 09/21] Fix FOLD rule for strength reduction of widening. Reported by Matthew Burk. --- src/lj_opt_fold.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 3d0e35a6f9..5dc7ae3da9 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1052,7 +1052,7 @@ LJFOLDF(simplify_conv_sext) if (ref == J->scev.idx) { IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; lua_assert(irt_isint(J->scev.t)); - if (lo && IR(lo)->i + ofs >= 0) { + if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { ok_reduce: #if LJ_TARGET_X64 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ From 06cd9fce7df440323647174f1ca4a01281ec8acd Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 8 Nov 2017 12:53:48 +0100 Subject: [PATCH 10/21] ARM64: Fix assembly of HREFK. Reported by Jason Teplitz. --- src/lj_asm_arm64.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 8fd92e76fd..cbb186d316 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -869,14 +869,12 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); int32_t kofs = ofs + (int32_t)offsetof(Node, key); int bigofs = !emit_checkofs(A64I_LDRx, ofs); - RegSet allow = RSET_GPR; Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, allow); - Reg key = ra_scratch(as, rset_clear(allow, node)); - Reg idx = node; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + Reg key, idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); uint64_t k; lua_assert(ofs % sizeof(Node) == 0); - rset_clear(allow, key); if (bigofs) { idx = dest; rset_clear(allow, dest); @@ -892,7 +890,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } else { k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); } - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); + key = ra_scratch(as, allow); + emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); emit_lso(as, A64I_LDRx, key, idx, kofs); if (bigofs) emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); From 99cdfbf6a1e8856f64908072ef10443a7eab14f2 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 8 Nov 2017 12:54:03 +0100 Subject: [PATCH 11/21] MIPS64: Fix register allocation in assembly of HREF. Contributed by James Cowgill. --- src/lj_asm_mips.h | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 1406a873ed..3a4679b855 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -859,6 +859,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; +#if LJ_64 + Reg cmp64 = RID_NONE; +#endif IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); int isk = irref_isk(refkey); @@ -901,6 +904,26 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); +#if LJ_64 + if (LJ_SOFTFP || !irt_isnum(kt)) { + /* Allocate cmp64 register used for 64-bit comparisons */ + if (LJ_SOFTFP && irt_isnum(kt)) { + cmp64 = key; + } else if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + } else { + lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + k = ~((int64_t)~irt_toitype(ir->t) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + } +#endif /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); @@ -943,24 +966,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (LJ_SOFTFP && irt_isnum(kt)) { - emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { - int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - refk = ra_allock(as, k, allow); - rset_clear(allow, refk); - } - emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); } else { - Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, pri); - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); + emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); } *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); if (!isk && irt_isaddr(kt)) { From 33082a6f4778aa152f6a4a684a7fe79436f1ecb6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Nov 2017 12:53:34 +0100 Subject: [PATCH 12/21] ARM64: Fix xpcall() error case. Thanks to Stefan Pejic. --- src/vm_arm64.dasc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 3eaf37638e..241c58a1a6 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -1185,12 +1185,12 @@ static void build_subroutines(BuildCtx *ctx) | subs NARGS8:RC, NARGS8:RC, #16 | blo ->fff_fallback | mov RB, BASE - | add BASE, BASE, #24 | asr ITYPE, CARG2, #47 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 | cmn ITYPE, #-LJ_TFUNC | add PC, TMP0, #24+FRAME_PCALL | bne ->fff_fallback // Traceback must be a function. + | add BASE, BASE, #24 | stp CARG2, CARG1, [RB] // Swap function and traceback. | cbz NARGS8:RC, ->vm_call_dispatch | b <1 From 7dbf0b05f1228c1c719866db5e5f3d58f87f74c8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Nov 2017 12:58:12 +0100 Subject: [PATCH 13/21] Fix saved bytecode encapsulated in ELF objects. Thanks to Dimitry Andric. --- src/jit/bcsave.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index aa677dfc34..c94064e48d 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -275,7 +275,7 @@ typedef struct { o.sect[2].size = fofs(ofs) o.sect[3].type = f32(3) -- .strtab o.sect[3].ofs = fofs(sofs + ofs) - o.sect[3].size = fofs(#symname+1) + o.sect[3].size = fofs(#symname+2) ffi.copy(o.space+ofs+1, symname) ofs = ofs + #symname + 2 o.sect[4].type = f32(1) -- .rodata From d417ded17945b4211608d497d50b509e0274f5e0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 18 Nov 2017 12:23:57 +0100 Subject: [PATCH 14/21] ARM64: Fix xpcall() error case (really). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to François Perrad and Stefan Pejic. --- src/vm_arm64.dasc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 241c58a1a6..c55794a689 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -1182,7 +1182,7 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc xpcall | ldp CARG1, CARG2, [BASE] | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #16 + | subs NARGS8:TMP1, NARGS8:RC, #16 | blo ->fff_fallback | mov RB, BASE | asr ITYPE, CARG2, #47 @@ -1190,6 +1190,7 @@ static void build_subroutines(BuildCtx *ctx) | cmn ITYPE, #-LJ_TFUNC | add PC, TMP0, #24+FRAME_PCALL | bne ->fff_fallback // Traceback must be a function. + | mov NARGS8:RC, NARGS8:TMP1 | add BASE, BASE, #24 | stp CARG2, CARG1, [RB] // Swap function and traceback. | cbz NARGS8:RC, ->vm_call_dispatch From ea7071d3c30b6432bfe6f8a9d263e0285cec25e3 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 18 Nov 2017 12:25:35 +0100 Subject: [PATCH 15/21] MIPS64: Fix xpcall() error case. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to François Perrad and Stefan Pejic. --- src/vm_mips64.dasc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 75b38deeee..a78cd25144 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -1399,15 +1399,16 @@ static void build_subroutines(BuildCtx *ctx) |. nop | |.ffunc xpcall - | daddiu NARGS8:RC, NARGS8:RC, -16 + | daddiu NARGS8:TMP0, NARGS8:RC, -16 | ld CARG1, 0(BASE) | ld CARG2, 8(BASE) - | bltz NARGS8:RC, ->fff_fallback + | bltz NARGS8:TMP0, ->fff_fallback |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | gettp AT, CARG2 | daddiu AT, AT, -LJ_TFUNC | bnez AT, ->fff_fallback // Traceback must be a function. |. move TMP2, BASE + | move NARGS8:RC, NARGS8:TMP0 | daddiu BASE, BASE, 24 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT From 58d0dde0a2df49abc991decbabff15230010829a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 14 Jan 2018 13:57:00 +0100 Subject: [PATCH 16/21] Fix IR_BUFPUT assembly. Thanks to Peter Cawley. --- src/lj_asm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index 753fe6bd1d..5f83779e14 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1119,7 +1119,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; IRRef args[3]; IRIns *irs; - int kchar = -1; + int kchar = -129; args[0] = ir->op1; /* SBuf * */ args[1] = ir->op2; /* GCstr * */ irs = IR(ir->op2); @@ -1127,7 +1127,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) if (irs->o == IR_KGC) { GCstr *s = ir_kstr(irs); if (s->len == 1) { /* Optimize put of single-char string constant. */ - kchar = strdata(s)[0]; + kchar = (int8_t)strdata(s)[0]; /* Signed! */ args[1] = ASMREF_TMP1; /* int, truncated to char */ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; } @@ -1154,7 +1154,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); if (args[1] == ASMREF_TMP1) { Reg tmp = ra_releasetmp(as, ASMREF_TMP1); - if (kchar == -1) + if (kchar == -129) asm_tvptr(as, tmp, irs->op1); else ra_allockreg(as, kchar, tmp); From 430d9f8f7ebb779948dbd43944b876b1a3f58551 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 14 Jan 2018 14:11:59 +0100 Subject: [PATCH 17/21] Fix string.format("%c", 0). --- src/lib_string.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/lib_string.c b/src/lib_string.c index d1a60b617f..c1e595c974 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -850,20 +850,21 @@ LJLIB_CF(string_format) } else { /* format item */ char form[MAX_FMTSPEC]; /* to store the format (`%...') */ char buff[MAX_FMTITEM]; /* to store the formatted item */ + int n = 0; if (++arg > top) luaL_argerror(L, arg, lj_obj_typename[0]); strfrmt = scanformat(L, strfrmt, form); switch (*strfrmt++) { case 'c': - sprintf(buff, form, lj_lib_checkint(L, arg)); + n = sprintf(buff, form, lj_lib_checkint(L, arg)); break; case 'd': case 'i': addintlen(form); - sprintf(buff, form, num2intfrm(L, arg)); + n = sprintf(buff, form, num2intfrm(L, arg)); break; case 'o': case 'u': case 'x': case 'X': addintlen(form); - sprintf(buff, form, num2uintfrm(L, arg)); + n = sprintf(buff, form, num2uintfrm(L, arg)); break; case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { TValue tv; @@ -880,10 +881,10 @@ LJLIB_CF(string_format) nbuf[len] = '\0'; for (p = form; *p < 'A' && *p != '.'; p++) ; *p++ = 's'; *p = '\0'; - sprintf(buff, form, nbuf); + n = sprintf(buff, form, nbuf); break; } - sprintf(buff, form, (double)tv.n); + n = sprintf(buff, form, (double)tv.n); break; } case 'q': @@ -902,14 +903,14 @@ LJLIB_CF(string_format) luaL_addvalue(&b); continue; } - sprintf(buff, form, strdata(str)); + n = sprintf(buff, form, strdata(str)); break; } default: lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); break; } - luaL_addlstring(&b, buff, strlen(buff)); + luaL_addlstring(&b, buff, n); } } luaL_pushresult(&b); From 97356f99f190c8be7cf12bcf7efc5132f7e6bd5f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 18 Aug 2017 12:52:14 +0200 Subject: [PATCH 18/21] Use https for freelists.org links. (cherry picked from commit f3d75075ed91137699c6071abe49e2252e794a9c) --- doc/ext_ffi_semantics.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index 800b6b18a0..268ac104f5 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -864,7 +864,7 @@

Parameterized Types

The main use for parameterized types are libraries implementing abstract data types -(» example), +(example), similar to what can be achieved with C++ template metaprogramming. Another use case are derived types of anonymous structs, which avoids pollution of the global struct namespace. From f6c52a3763cf32d50ffdf5feb94836f1c0124913 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 20 Sep 2017 19:42:34 +0200 Subject: [PATCH 19/21] Merge branch 'master' into v2.1 (cherry picked from commit 6a2d8b0b4d49eb5aac600c219e5903420806e56e) --- dynasm/dasm_x86.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index bc636357a6..f598da9afd 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -421,7 +421,8 @@ int dasm_encode(Dst_DECL, void *buffer) } case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; b++; n = (int)(ptrdiff_t)D->globals[-n]; - case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ case DASM_REL_PC: rel_pc: { int shrink = *b++; int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } From 91dbe2369644317c20aa8e11f1e78f58faf712ce Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 8 Nov 2017 12:53:24 +0100 Subject: [PATCH 20/21] Merge branch 'master' into v2.1 (cherry picked from commit 4b17a6a2ff4c93304433e13fc0e55d0a33a10318) --- src/lj_opt_fold.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index a4272cd4ee..d4e9f81694 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1168,7 +1168,7 @@ LJFOLDF(simplify_conv_sext) if (ref == J->scev.idx) { IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; lua_assert(irt_isint(J->scev.t)); - if (lo && IR(lo)->i + ofs >= 0) { + if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { ok_reduce: /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ return LEFTFOLD; From 42bb70b283be7dcacdb55be45f6e94c5c241ed4a Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 17 Jan 2018 14:53:49 +0000 Subject: [PATCH 21/21] lj_ir.h: Fix typo made when resolving merge conflict --- src/lj_ir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_ir.h b/src/lj_ir.h index c8acf1fafd..ee3373544c 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -379,7 +379,7 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define IRT_IS64 \ ((1u<> irt_type(t)) & 1)