diff --git a/changelog/dmd.AArch64.dd b/changelog/dmd.AArch64.dd new file mode 100644 index 000000000000..efb102bf529f --- /dev/null +++ b/changelog/dmd.AArch64.dd @@ -0,0 +1,28 @@ +# Add code generation for AArch64 CPU + +Adds the switch `-arm` which causes dmd to emit AArch64 code. + +This is a work in progress. It is not a functioning compiler. + +Things not implemented: + +* inline assembler +* object file generation for mscoff and mach formats +* Elf object file generation does not use AArch64 fixups +* floating point +* vector operations +* op= operators +* block initialization of structs +* pic/pie +* non-trivial lvalues +* stack arguments +* alloca() +* exception handling +* correct DWARF symbolic debug info +* testing on an actual AArch64 computer +* line coverage reports +* stack stomping code +* local stack frames larger than 64Kb +* variadic arguments +* disassembler is incomplete +* ENDBR64 diff --git a/compiler/src/.dscanner.ini b/compiler/src/.dscanner.ini index 94e2cd82006f..14cc23d9b567 100644 --- a/compiler/src/.dscanner.ini +++ b/compiler/src/.dscanner.ini @@ -127,6 +127,9 @@ unused_variable_check="-dmd.backend.aarray,\ -dmd.backend.cgelem,\ -dmd.backend.cgobj,\ -dmd.backend.cgsched,\ +-dmd.backend.arm.cod1,\ +-dmd.backend.arm.cod2,\ +-dmd.backend.arm.cod3,\ -dmd.backend.arm.disasmarm,\ -dmd.backend.arm.instr,\ -dmd.backend.x86.cgxmm,\ diff --git a/compiler/src/build.d b/compiler/src/build.d index 7293d6cde610..b29f849165e6 100755 --- a/compiler/src/build.d +++ b/compiler/src/build.d @@ -1623,6 +1623,7 @@ auto sourceFiles() x86/cod3.d cv8.d dcgcv.d pdata.d util2.d var.d backconfig.d drtlsym.d dwarfeh.d ptrntab.d dvarstats.d dwarfdbginf.d cgen.d goh.d barray.d cgcse.d elpicpie.d machobj.d elfobj.d mscoffobj.d filespec.d cgobj.d aarray.d x86/disasm86.d arm/disasmarm.d arm/instr.d + arm/cod1.d arm/cod2.d arm/cod3.d arm/cod4.d " ), }; diff --git a/compiler/src/dmd/backend/arm/cod1.d b/compiler/src/dmd/backend/arm/cod1.d new file mode 100644 index 000000000000..70fb6ad6742b --- /dev/null +++ b/compiler/src/dmd/backend/arm/cod1.d @@ -0,0 +1,2471 @@ +/** + * Code generation 1 + * + * Handles function calls: putting arguments in registers / on the stack, and jumping to the function. + * + * Compiler implementation of the + * $(LINK2 https://www.dlang.org, D programming language). + * + * Copyright: Copyright (C) 1984-1998 by Symantec + * Copyright (C) 2000-2024 by The D Language Foundation, All Rights Reserved + * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) + * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/arm/cod1.d, backend/cod1.d) + * Documentation: https://dlang.org/phobos/dmd_backend_arm_cod1.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/arm/cod1.d + */ + +module dmd.backend.arm.cod1; + +import core.bitop; +import core.stdc.stdio; +import core.stdc.stdlib; +import core.stdc.string; + +import dmd.backend.cc; +import dmd.backend.cdef; +import dmd.backend.code; +import dmd.backend.x86.code_x86; +import dmd.backend.codebuilder; +import dmd.backend.mem; +import dmd.backend.el; +import dmd.backend.global; +import dmd.backend.obj; +import dmd.backend.oper; +import dmd.backend.rtlsym; +import dmd.backend.ty; +import dmd.backend.type; +import dmd.backend.arm.cod3 : COND, genBranch, conditionCode, gentstreg; +import dmd.backend.arm.instr; +import dmd.backend.x86.cod1 : cdisscaledindex, ssindex_array; + +import dmd.backend.cg : segfl, stackfl; + +nothrow: +@safe: + +/************************************ + * Given cs which has the Effective Address encoded into it, + * create a load instruction to reg, and write it to cs.Iop + * Params: + * cs = EA information + * reg = destination register + * szw = number of bytes to write - 4,8 + * szr = number of bytes to read - 1,2,4,8 + */ +void loadFromEA(ref code cs, reg_t reg, uint szw, uint szr) +{ + if (cs.reg != NOREG) + { + if (cs.reg != reg) // do not mov onto itself + cs.Iop = INSTR.mov_register(szw == 8,cs.reg,reg); // MOV reg,cs.reg + cs.IFL1 = FLunde; + } + else if (cs.index != NOREG) + { + // LDRB/LDRH/LDR reg,[cs.base,cs.index,extend S] + if (szr == 1) + cs.Iop = INSTR.ldrb_reg(szw == 8, cs.index, cs.Sextend & 7, cs.Sextend >> 3, cs.base, reg); + else if (szr == 2) + cs.Iop = INSTR.ldrh_reg(szw == 8, cs.index, cs.Sextend & 7, cs.Sextend >> 3, cs.base, reg); + else + cs.Iop = INSTR.ldr_reg_gen(szw == 8, cs.index, cs.Sextend & 7, cs.Sextend >> 3, cs.base, reg); + } + else if (cs.base != NOREG) + { + // LDRB/LDRH/LDR reg,[cs.base, #0] + if (szr == 1) + cs.Iop = INSTR.ldrb_imm(szw == 8, reg, cs.base, 0); + else if (szr == 2) + cs.Iop = INSTR.ldrh_imm(szw == 8, reg, cs.base, 0); + else + cs.Iop = INSTR.ldr_imm_gen(szw == 8, reg, cs.base, 0); + } + else + assert(0); +} +/************************************ + * Given cs which has the Effective Address encoded into it, + * create a store instruction to reg, and write it to cs.Iop + * Params: + * cs = EA information + * reg = source register + * sz = number of bytes to store - 1,2,4,8 + */ +void storeToEA(ref code cs, reg_t reg, uint sz) +{ + if (cs.reg != NOREG) + { + if (cs.reg != reg) // do not mov onto itself + cs.Iop = INSTR.mov_register(sz == 8,reg,cs.reg); // MOV cs.reg,reg + cs.IFL1 = FLunde; + } + else if (cs.index != NOREG) + { + // STRB/STRH/STR reg,[cs.base,cs.index,extend S] + if (sz == 1) + cs.Iop = INSTR.strb_reg(cs.index, cs.Sextend & 7, cs.Sextend >> 3, cs.base, reg); + else if (sz == 2) + cs.Iop = INSTR.strh_reg(cs.index, cs.Sextend & 7, cs.Sextend >> 3, cs.base, reg); + else + cs.Iop = INSTR.str_reg_gen(sz == 8, cs.index, cs.Sextend & 7, cs.Sextend >> 3, cs.base, reg); + } + else if (cs.base != NOREG) + { + // STRB/STRH/STR reg,[cs.base, #0] + if (sz == 1) + cs.Iop = INSTR.strb_imm(reg, cs.base, 0); + else if (sz == 2) + cs.Iop = INSTR.strh_imm(reg, cs.base, 0); + else + cs.Iop = INSTR.str_imm_gen(sz == 8, reg, cs.base, 0); + } + else + assert(0); +} + +/************************** + * Determine if e is a scaled index addressing mode. + * + * size S extend + * 0 0 UXTW *(int*)((char*)p + u); + * 0 0 LSL *(int*)((char*)p + l) + * 0 0 SXTW *(int*)((char*)p + i); + * 0 0 SXTX + * 0 1 UXTW #2 *(int*)((char*)p + u*4) + * 0 1 LSL #2 *(int*)((char*)p + l*4) + * 0 1 SXTW #2 *(int*)((char*)p + i*4) + * 0 1 SXTX #2 + * 1 0 UXTW *(long*)((char*)p + u) + * 1 0 LSL *(long*)((char*)p + l) + * 1 0 SXTW *(long*)((char*)p + i) + * 1 0 SXTX + * 1 1 UXTW #3 *(long*)((char*)p + u*8) + * 1 1 LSL #3 *(long*)((char*)p + l*8) + * 1 1 SXTW #3 *(long*)((char*)p + i*8) + * 1 1 SXTX #3 + * Or: + * SXTB unsigned = FALSE; len = 8; + * SXTH unsigned = FALSE; len = 16; + * SXTW unsigned = FALSE; len = 32; + * SXTX unsigned = FALSE; len = 64; + * UXTB unsigned = TRUE; len = 8; + * UXTH unsigned = TRUE; len = 16; + * UXTW unsigned = TRUE; len = 32; + * UXTX unsigned = TRUE; len = 64; + * References: + * https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct + * https://www.scs.stanford.edu/~zyedidia/arm64/ldr_reg_gen.html + * https://www.scs.stanford.edu/~zyedidia/arm64/shared_pseudocode.html#impl-aarch64.ExtendReg.4 + * Returns: + * 0 not a scaled index addressing mode + * !=0 the scaling shift count + */ + +@trusted +int isscaledindex(tym_t ty, elem *e) +{ + const size = tysize(ty); // size of load + if (size != 4 && size != 8) + return 0; + + uint S; + uint option; + e = *el_scancommas(&e); + if (e.Eoper == OPshl && + !e.Ecount && + e.E2.Eoper == OPconst) + { + const shift = e.E2.Vuns; + if (shift == 2 && size == 4 || + shift == 3 && size == 8) + { + const tym = e.E1.Ety; + const sz = tysize(tym); + const uns = tyuns(tym); + S = 1; + if (sz == 4) + option = uns ? Extend.UXTW : Extend.SXTW; + else if (sz == 8) + option = Extend.LSL; + else + return 0; + return (S << 3) | option; + } + return 0; + } + else + { + const tym = e.Ety; + const sz = tysize(tym); + const uns = tyuns(tym); + S = 0; + if (sz == 4) + option = uns ? Extend.UXTW : Extend.SXTW; + else if (sz == 8) + option = Extend.LSL; + else + return 0; + return (S << 3) | option; + } +} + + +// void genEEcode +// uint gensaverestore +// void genstackclean + +/********************************* + * Generate code for a logical expression. + * Input: + * e elem + * jcond + * bit 1 if true then goto jump address if e + * if false then goto jump address if !e + * 2 don't call save87() + * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond + * targ either code or block pointer to destination + */ + +@trusted +void logexp(ref CodeBuilder cdb, elem *e, uint jcond, uint fltarg, code *targ) +{ + //printf("logexp(e = %p, jcond = %d)\n", e, jcond); elem_print(e); + if (tybasic(e.Ety) == TYnoreturn) + { + con_t regconsave = cgstate.regcon; + regm_t retregs = 0; + codelem(cgstate,cdb,e,retregs,0); + regconsave.used |= cgstate.regcon.used; + cgstate.regcon = regconsave; + return; + } + + int no87 = 1; + docommas(cdb, e); // scan down commas + cgstate.stackclean++; + + if (!OTleaf(e.Eoper) && !e.Ecount) // if operator and not common sub + { + switch (e.Eoper) + { + case OPoror: + { + con_t regconsave; + if (jcond & 1) + { + logexp(cdb, e.E1, jcond, fltarg, targ); + regconsave = cgstate.regcon; + logexp(cdb, e.E2, jcond, fltarg, targ); + } + else + { + code *cnop = gen1(null, INSTR.nop); + logexp(cdb, e.E1, jcond | 1, FLcode, cnop); + regconsave = cgstate.regcon; + logexp(cdb, e.E2, jcond, fltarg, targ); + cdb.append(cnop); + } + andregcon(regconsave); + freenode(e); + cgstate.stackclean--; + return; + } + + case OPandand: + { + con_t regconsave; + if (jcond & 1) + { + code *cnop = gen1(null, INSTR.nop); // a dummy target address + logexp(cdb, e.E1, jcond & ~1, FLcode, cnop); + regconsave = cgstate.regcon; + logexp(cdb, e.E2, jcond, fltarg, targ); + cdb.append(cnop); + } + else + { + logexp(cdb, e.E1, jcond, fltarg, targ); + regconsave = cgstate.regcon; + logexp(cdb, e.E2, jcond, fltarg, targ); + } + andregcon(regconsave); + freenode(e); + cgstate.stackclean--; + return; + } + + case OPnot: + jcond ^= 1; + goto case OPbool; + + case OPbool: + case OPs8_16: + case OPu8_16: + case OPs16_32: + case OPu16_32: + case OPs32_64: + case OPu32_64: + case OPu32_d: + case OPd_ld: + logexp(cdb, e.E1, jcond, fltarg, targ); + freenode(e); + cgstate.stackclean--; + return; + + case OPcond: + { + code *cnop2 = gen1(null, INSTR.nop); // addresses of start of leaves + code *cnop = gen1(null, INSTR.nop); + logexp(cdb, e.E1, false, FLcode, cnop2); // eval condition + con_t regconold = cgstate.regcon; + logexp(cdb, e.E2.E1, jcond, fltarg, targ); + genBranch(cdb, COND.al, FLcode, cast(block *) cnop); // skip second leaf + + con_t regconsave = cgstate.regcon; + cgstate.regcon = regconold; + + cdb.append(cnop2); + logexp(cdb, e.E2.E2, jcond, fltarg, targ); + andregcon(regconold); + andregcon(regconsave); + freenode(e.E2); + freenode(e); + cdb.append(cnop); + cgstate.stackclean--; + return; + } + + default: + break; + } + } + + /* Special code for signed long compare. + * Not necessary for I64 until we do cents. + */ + static if (0) + if (OTrel2(e.Eoper) && // if < <= >= > + !e.Ecount && + ( (I16 && tybasic(e.E1.Ety) == TYlong && tybasic(e.E2.Ety) == TYlong) || + (I32 && tybasic(e.E1.Ety) == TYllong && tybasic(e.E2.Ety) == TYllong)) + ) + { + longcmp(cdb, e, jcond != 0, fltarg, targ); + cgstate.stackclean--; + return; + } + + regm_t retregs = mPSW; // return result in flags + COND cond = conditionCode(e); // get jump opcode + if (!(jcond & 1)) + cond ^= 1; // toggle jump condition(s) + codelem(cgstate,cdb, e, retregs, true); // evaluate elem + if (no87) + cse_flush(cdb,no87); // flush CSE's to memory + genBranch(cdb, cond, fltarg, cast(block *) targ); // generate jmp instruction + cgstate.stackclean--; +} + +/****************************** + * Routine to aid in setting things up for gen(). + * Look for common subexpression. + * Can handle indirection operators, but not if they are common subs. + * Params: + * cdb = generated code sink + * e = elem where we get some of the data from + * cs = partially filled code to add + * op = opcode + * reg = destination register + * offset = data to be added to Voffset field + * keepmsk = mask of registers we must not destroy + * desmsk = mask of registers destroyed by executing the instruction + * rmx = RM.load/store + */ + +@trusted +void loadea(ref CodeBuilder cdb,elem *e,ref code cs,uint op,reg_t reg,targ_size_t offset, + regm_t keepmsk,regm_t desmsk, RM rmx = RM.rw) +{ + code* c, cg, cd; + + debug + if (debugw) + printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n", + e, &cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk)); + assert(e); + cs.Iflags = 0; + cs.Iop = op; + tym_t tym = e.Ety; + int sz = tysize(tym); + + /* Determine if location we want to get is in a register. If so, */ + /* substitute the register for the EA. */ + /* Note that operators don't go through this. CSE'd operators are */ + /* picked up by comsub(). */ + if (e.Ecount && /* if cse */ + e.Ecount != e.Ecomsub) /* and cse was generated */ + { + assert(OTleaf(e.Eoper)); /* can't handle operands */ + regm_t rm = cgstate.regcon.cse.mval & ~cgstate.regcon.cse.mops & ~cgstate.regcon.mvar; // possible regs + if (sz == REGSIZE * 2) // value is in 2 registers + { + if (offset) + rm &= mMSW; /* only high words */ + else + rm &= mLSW; /* only low words */ + } + for (uint i = 0; rm; i++) + { + if (mask(i) & rm) + { + if (cgstate.regcon.cse.value[i] == e) // if register has elem + { + getregs(cdb, desmsk); + if (i != reg) + cdb.gen1(INSTR.mov_register(sz == 8,cast(reg_t)i,reg)); // MOV reg,i + return; + } + rm &= ~mask(i); + } + } + } + + getlvalue(cdb, cs, e, keepmsk, rmx); + cs.IEV1.Voffset += offset; + + loadFromEA(cs,reg,sz == 8 ? 8 : 4,sz); + + getregs(cdb, desmsk); // save any regs we destroy + cdb.gen(&cs); +} + +// uint getaddrmode +// void setaddrmode +// getlvalue_msw +// getlvalue_lsw + +/****************** + * Compute Effective Address (EA). + * Set in pcs the EA info. The EA addressing modes are: + * 1. reg - (not NOREG) + * 2. [base + offset] - (not NOREG), IFL1, IEV1.Vsymbol and IEV1.Voffset + * 3. [base + index*size] - (base and index are not NOREG) + * Generate to cdb any code needed + * Params: + * cdb = sink for any code generated + * pcs = set to addressing mode + * e = the lvalue elem + * keepmsk = mask of registers we must not destroy or use + * rm = RM.store a store operation into the lvalue only + * RM.load a read operation from the lvalue only + * RM.rw load and store + * References: + * addressing modes https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912 + */ + +@trusted +void getlvalue(ref CodeBuilder cdb,ref code pcs,elem *e,regm_t keepmsk,RM rm = RM.rw) +{ + FL fl; + uint opsave; + elem* e1, e11, e12; + bool e1isadd, e1free; + reg_t reg; + tym_t e1ty; + Symbol* s; + + //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); + //elem_print(e); + assert(e); + elem_debug(e); + if (e.Eoper == OPvar || e.Eoper == OPrelconst) + { + s = e.Vsym; + fl = s.Sfl; + if (tyfloating(s.ty())) + objmod.fltused(); + //symbol_print(*s); + } + else + fl = FLoper; + enum BP = 29; + enum SP = 31; + pcs.IFL1 = cast(ubyte)fl; + pcs.Iflags = CFoff; /* only want offsets */ + pcs.reg = NOREG; + pcs.base = NOREG; + pcs.index = NOREG; + pcs.Sextend = 0; + pcs.IEV1.Vsym = null; + pcs.IEV1.Voffset = 0; + + tym_t ty = e.Ety; + uint sz = tysize(ty); + if (tyfloating(ty)) + objmod.fltused(); + if (ty & mTYvolatile) + pcs.Iflags |= CFvolatile; + + void Lptr(){ + if (config.flags3 & CFG3ptrchk) + cod3_ptrchk(cdb, pcs, keepmsk); // validate pointer code + } + + //printf("fl: %s\n", fl_str(fl)); + switch (fl) + { + case FLoper: + debug + if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); + + switch (e.Eoper) + { + case OPadd: // this way when we want to do LEA + e1 = e; + e1free = false; + e1isadd = true; + break; + + case OPind: + case OPpostinc: // when doing (*p++ = ...) + case OPpostdec: // when doing (*p-- = ...) + case OPbt: + case OPbtc: + case OPbtr: + case OPbts: + case OPvecfill: + e1 = e.E1; + e1free = true; + e1isadd = e1.Eoper == OPadd; + break; + + default: + printf("function: %s\n", funcsym_p.Sident.ptr); + elem_print(e); + assert(0); + } + e1ty = tybasic(e1.Ety); + if (e1isadd) + { + e12 = e1.E2; + e11 = e1.E1; + } + + /* First see if we can replace *(e+&v) with + * MOV idxreg,e + * EA = [ES:] &v+idxreg + */ + FL f = FLconst; + + /* Is address of `s` relative to RIP ? + */ + static bool relativeToRIP(Symbol* s) + { + if (config.exe == EX_WIN64) + return true; + if (config.flags3 & CFG3pie) + { + if (s.Sfl == FLtlsdata || s.ty() & mTYthread) + { + if (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat) + return false; + } + return true; + } + else + return (config.flags3 & CFG3pic) != 0; + } + + if (0 && e1isadd && + ((e12.Eoper == OPrelconst && + !relativeToRIP(e12.Vsym) + ) || + (e12.Eoper == OPconst && !e1.Ecount && el_signx32(e12))) && + e1.Ecount == e1.Ecomsub && + (!e1.Ecount || (~keepmsk & ALLREGS & mMSW)) && + tysize(e11.Ety) == REGSIZE + ) + { + f = el_fl(e12); + uint t; /* component of r/m field */ + int ss; + int ssi; + + if (e12.Eoper == OPrelconst) + f = el_fl(e12); + /*assert(datafl[f]);*/ /* what if addr of func? */ + /* Any register can be an index register */ + regm_t idxregs = cgstate.allregs & ~keepmsk; + assert(idxregs); + + /* See if e1.E1 can be a scaled index */ + ss = isscaledindex(ty, e11); + if (ss) + { + /* Load index register with result of e11.E1 */ + cdisscaledindex(cdb, e11, idxregs, keepmsk); + reg = findreg(idxregs); + { + t = stackfl[f] ? 2 : 0; + pcs.Irm = modregrm(t, 0, 4); + pcs.Isib = modregrm(ss, reg & 7, 5); + if (reg & 8) + pcs.Irex |= REX_X; + } + } + else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && + !e11.Ecount && + e11.E2.Eoper == OPconst && + (ssi = ssindex(e11.Eoper, e11.E2.Vuns)) != 0 + ) + { + regm_t scratchm; + + char ssflags = ssindex_array[ssi].ssflags; + if (ssflags & SSFLnobp && stackfl[f]) + goto L6; + + // Load index register with result of e11.E1 + scodelem(cgstate,cdb, e11.E1, idxregs, keepmsk, true); + reg = findreg(idxregs); + + int ss1 = ssindex_array[ssi].ss1; + if (ssflags & SSFLlea) + { + assert(!stackfl[f]); + pcs.Irm = modregrm(2,0,4); + pcs.Isib = modregrm(ss1, reg & 7, reg & 7); + if (reg & 8) + pcs.Irex |= REX_X | REX_B; + } + else + { + int rbase; + + scratchm = ALLREGS & ~keepmsk; + const r = allocreg(cdb, scratchm, TYint); + + if (ssflags & SSFLnobase1) + { + t = 0; + rbase = 5; + } + else + { + t = 0; + rbase = reg; + } + + cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7)); + if (reg & 8) + code_orrex(cdb.last(), REX_X); + if (rbase & 8) + code_orrex(cdb.last(), REX_B); + code_orrex(cdb.last(), REX_W); + + if (ssflags & SSFLnobase1) + { + cdb.last().IFL1 = FLconst; + cdb.last().IEV1.Vuns = 0; + } + + if (ssflags & SSFLnobase) + { + t = stackfl[f] ? 2 : 0; + rbase = 5; + } + else + { + t = 2; + rbase = r; + assert(rbase != BP); + } + pcs.Irm = modregrm(t, 0, 4); + pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7); + if (r & 8) + pcs.Irex |= REX_X; + if (rbase & 8) + pcs.Irex |= REX_B; + } + freenode(e11.E2); + freenode(e11); + } + else + { + L6: + /* Load index register with result of e11 */ + scodelem(cgstate,cdb, e11, idxregs, keepmsk, true); + setaddrmode(pcs, idxregs); + if (stackfl[f]) /* if we need [EBP] too */ + { + uint idx = pcs.Irm & 7; + if (pcs.Irex & REX_B) + pcs.Irex = (pcs.Irex & ~REX_B) | REX_X; + pcs.Isib = modregrm(0, idx, BP); + pcs.Irm = modregrm(2, 0, 4); + } + } + + if (f == FLpara) + cgstate.refparam = true; + else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast) + cgstate.reflocal = true; + else + assert(f != FLreg); + pcs.IFL1 = cast(ubyte)f; + if (f != FLconst) + pcs.IEV1.Vsym = e12.Vsym; + pcs.IEV1.Voffset = e12.Voffset; /* += ??? */ + + /* If e1 is a CSE, we must generate an addressing mode */ + /* but also leave EA in registers so others can use it */ + if (e1.Ecount) + { + regm_t regs = IDXREGS & ~keepmsk; + reg = allocreg(cdb, regs, TYoffset); + + opsave = pcs.Iop; + const flagsave = pcs.Iflags; + ubyte rexsave = pcs.Irex; + pcs.Iop = LEA; + code_newreg(&pcs, reg); + pcs.Iflags &= ~CFopsize; + pcs.Irex |= REX_W; + cdb.gen(&pcs); // LEA reg,EA + cssave(e1,regs,true); + pcs.Iflags = flagsave; + pcs.Irex = rexsave; + pcs.Iop = opsave; + pcs.IFL1 = FLoffset; + pcs.IEV1.Vuns = 0; + setaddrmode(pcs, regs); + } + freenode(e12); + if (e1free) + freenode(e1); + return Lptr(); + } + + regm_t idxregs; + idxregs = cgstate.allregs & ~keepmsk; // only these can be index regs + assert(idxregs); + if ((sz == REGSIZE || sz == 4) && + rm == RM.store) + idxregs |= cgstate.regcon.mvar; + + pcs.IFL1 = FLoffset; + pcs.IEV1.Vuns = 0; + + /* see if we can replace *(e+c) with + * MOV idxreg,e + * EA = c[idxreg] + */ + if (0 && e1isadd && + e12.Eoper == OPconst && + (el_signx32(e12)) && + (tysize(e12.Ety) == REGSIZE || (tysize(e12.Ety) == 4)) && + (!e1.Ecount || !e1free) + ) + { + int ss; + + pcs.IEV1.Vuns = e12.Vuns; + freenode(e12); + if (e1free) freenode(e1); + if (e11.Eoper == OPadd && !e11.Ecount && + tysize(e11.Ety) == REGSIZE) + { + e12 = e11.E2; + e11 = e11.E1; + e1 = e1.E1; + e1free = true; + goto L4; + } + if ((ss = isscaledindex(ty, e11)) != 0) + { // (v * scale) + const + cdisscaledindex(cdb, e11, idxregs, keepmsk); + reg = findreg(idxregs); + pcs.Irm = modregrm(0, 0, 4); + pcs.Isib = modregrm(ss, reg & 7, 5); + if (reg & 8) + pcs.Irex |= REX_X; + } + else + { + scodelem(cgstate,cdb, e11, idxregs, keepmsk, true); // load index reg + setaddrmode(pcs, idxregs); + } + return Lptr(); + } + + /* Look for *(v1 + v2) + * EA = [v1][v2] + */ + + if (0 && e1isadd && (!e1.Ecount || !e1free) && + (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4))) + { + L4: + regm_t idxregs2; + uint base, index; + + // Look for *(v1 + v2 << scale) + int ss = isscaledindex(ty, e12); + if (ss) + { + scodelem(cgstate,cdb, e11, idxregs, keepmsk, true); + idxregs2 = cgstate.allregs & ~(idxregs | keepmsk); + cdisscaledindex(cdb, e12, idxregs2, keepmsk | idxregs); + } + + // Look for *(v1 << scale + v2) + else if ((ss = isscaledindex(ty, e11)) != 0) + { + idxregs2 = idxregs; + cdisscaledindex(cdb, e11, idxregs2, keepmsk); + idxregs = cgstate.allregs & ~(idxregs2 | keepmsk); + scodelem(cgstate,cdb, e12, idxregs, keepmsk | idxregs2, true); + } + // Look for *(((v1 << scale) + c1) + v2) + else if (e11.Eoper == OPadd && !e11.Ecount && + e11.E2.Eoper == OPconst && + (ss = isscaledindex(ty, e11.E1)) != 0 + ) + { + pcs.IEV1.Vuns = e11.E2.Vuns; + idxregs2 = idxregs; + cdisscaledindex(cdb, e11.E1, idxregs2, keepmsk); + idxregs = cgstate.allregs & ~(idxregs2 | keepmsk); + scodelem(cgstate,cdb, e12, idxregs, keepmsk | idxregs2, true); + freenode(e11.E2); + freenode(e11); + } + else + { + scodelem(cgstate,cdb, e11, idxregs, keepmsk, true); + idxregs2 = cgstate.allregs & ~(idxregs | keepmsk); + scodelem(cgstate,cdb, e12, idxregs2, keepmsk | idxregs, true); + } + base = findreg(idxregs); + index = findreg(idxregs2); + pcs.Irm = modregrm(2, 0, 4); + pcs.Isib = modregrm(ss, index & 7, base & 7); + if (index & 8) + pcs.Irex |= REX_X; + if (base & 8) + pcs.Irex |= REX_B; + if (e1free) + freenode(e1); + + return Lptr(); + } + + /* give up and replace *e1 with + * MOV idxreg,e + * EA = 0[idxreg] + * pinholeopt() will usually correct the 0, we need it in case + * we have a pointer to a long and need an offset to the second + * word. + */ + assert(e1free); + scodelem(cgstate,cdb, e1, idxregs, keepmsk, true); // load index register + pcs.base = findreg(idxregs); + + return Lptr(); + + case FLdatseg: + assert(0); + + static if (0) + { + pcs.base = BP; + pcs.IEVpointer1 = e.EVpointer; + break; + } + + case FLfltreg: + cgstate.reflocal = true; + pcs.base = BP; + break; + + case FLreg: + goto L2; + + case FLpara: + if (s.Sclass == SC.shadowreg) + goto case FLfast; + Lpara: + cgstate.refparam = true; + pcs.base = BP; + goto L2; + + case FLauto: + case FLfast: + if (regParamInPreg(*s)) + { +//printf("regParamInPreg()\n"); + regm_t pregm = s.Spregm(); + /* See if the parameter is still hanging about in a register, + * and so can we load from that register instead. + */ + if (cgstate.regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/) + { + if (/*rm == RM.load &&*/ !cgstate.anyiasm) + { + auto voffset = e.Voffset; + if (sz <= REGSIZE) + { + const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg; + if (voffset >= REGSIZE) + voffset -= REGSIZE; + + /* preg could be NOREG if it's a variadic function and we're + * in Win64 shadow regs and we're offsetting to get to the start + * of the variadic args. + */ + if (preg != NOREG && cgstate.regcon.params & mask(preg)) + { + //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset); + if (mask(preg) & XMMREGS) + { + /* The following fails with this from std.math on Linux64: + void main() + { + alias T = float; + T x = T.infinity; + T e = T.infinity; + int eptr; + T v = frexp(x, eptr); + assert(isIdentical(e, v)); + } + */ + } + else if (voffset == 0) + { + pcs.reg = preg; + cgstate.regcon.used |= mask(preg); + break; + } + } + } + } + else + cgstate.regcon.params &= ~pregm; + } + } + if (s.Sclass == SC.shadowreg) + goto Lpara; + goto case FLbprel; + + case FLbprel: + cgstate.reflocal = true; + pcs.base = BP; + goto L2; + + case FLextern: + if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) + { + if (config.exe & EX_windos) + { + if (I64) + { // GS:[88] + pcs.Irm = modregrm(0, 0, 4); + pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing + pcs.IFL1 = FLconst; + pcs.IEV1.Vuns = 88; + pcs.Iflags = CFgs; + pcs.Irex |= REX_W; + break; + } + else + { + pcs.Iflags |= CFfs; // add FS: override + } + } + else if (config.exe & (EX_OSX | EX_OSX64)) + { + } + else if (config.exe & EX_posix) + assert(0); + } + goto L3; + + case FLtlsdata: + if (config.exe & EX_posix) + goto L3; + assert(0); + + case FLdata: + case FLudata: + case FLcsdata: + case FLgot: + case FLgotoff: + L3: + pcs.base = BP; + L2: + if (rm != RM.store) // if not store only + s.Sflags |= SFLread; // assume we are doing a read + + if (fl == FLreg) + { + //printf("test: FLreg, %s %d cgstate.regcon.mvar = %s\n", + // s.Sident.ptr, cast(int)e.Voffset, regm_str(cgstate.regcon.mvar)); + if (!(s.Sregm & cgstate.regcon.mvar)) + symbol_print(*s); + assert(s.Sregm & cgstate.regcon.mvar); + + /* Attempting to paint a float as an integer or an integer as a float + * will cause serious problems since the EA is loaded separatedly from + * the opcode. The only way to deal with this is to prevent enregistering + * such variables. + */ + if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) || + !tyxmmreg(ty) && (s.Sregm & XMMREGS)) + cgreg_unregister(s.Sregm); + + if ( + s.Sclass == SC.regpar || + s.Sclass == SC.parameter) + { cgstate.refparam = true; + cgstate.reflocal = true; // kludge to set up prolog + } + pcs.base = NOREG; + pcs.index = NOREG; + pcs.reg = s.Sreglsw; + if (e.Voffset == REGSIZE && sz == REGSIZE) + pcs.reg = s.Sregmsw; + break; + } + if (config.flags3 & CFG3pic && + (fl == FLtlsdata || s.ty() & mTYthread)) + { + assert(0); + } + pcs.IEV1.Vsym = s; + pcs.IEV1.Voffset = e.Voffset; + //pcs.Sextend = tyToExtend(ty); only need to worry about this if pcs.index is set + if (sz == 1) + { + s.Sflags |= GTbyte; + if (e.Voffset) + { + debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr); + s.Sflags &= ~GTregcand; + } + } + else if (sz == 2 && tyxmmreg(s.ty()) && config.fpxmmregs) + { + debug if (debugr) printf("'%s' not XMM reg cand due to short access\n", s.Sident.ptr); + s.Sflags &= ~GTregcand; + } + else if (e.Voffset || sz > tysize(s.Stype.Tty)) + { + debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr); + s.Sflags &= ~GTregcand; + } + else if (tyvector(s.Stype.Tty) && sz < tysize(s.Stype.Tty)) + { + // https://issues.dlang.org/show_bug.cgi?id=21673 + // https://issues.dlang.org/show_bug.cgi?id=21676 + // https://issues.dlang.org/show_bug.cgi?id=23009 + // PR: https://github.com/dlang/dmd/pull/13977 + // cannot read or write to partial vector + debug if (debugr) printf("'%s' not reg cand due to vector type\n", s.Sident.ptr); + s.Sflags &= ~GTregcand; + } + + if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty)) + { + debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr); + // Can't successfully mix XMM register variables accessed as integers + s.Sflags &= ~GTregcand; + } + break; + + case FLpseudo: + { + getregs(cdb, mask(s.Sreglsw)); + pcs.reg = s.Sreglsw; + break; + } + + case FLfardata: + case FLfunc: /* reading from code seg */ + if (config.exe & EX_flat) + goto L3; + assert(0); + + case FLstack: + pcs.base = SP; + pcs.IEV1.Vsym = s; + pcs.IEV1.Voffset = e.Voffset; + break; + + default: + symbol_print(*s); + assert(0); + } +} + +// fltregs + +/***************************** + * Given a result in registers, test it for true or false. + * Will fail if TYfptr and the reg is ES! + * If saveflag is true, preserve the contents of the + * registers. + */ +@trusted +void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, bool saveflag) +{ + reg_t scrreg; // scratch register + regm_t scrregm; + + //if (!(regm & (mBP | ALLREGS))) + //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", + //regm_str(regm),tym,saveflag); + + tym = tybasic(tym); + reg_t reg = findreg(regm); + uint sz = _tysize[tym]; + assert(regm & cgstate.allregs); +static if (0) +{ + assert(regm & (XMMREGS | mBP | ALLREGS)); + if (sz == 1) + { + assert(regm & BYTEREGS); + genregs(cdb, 0x84, reg, reg); // TEST regL,regL + if (I64 && reg >= 4) + code_orrex(cdb.last(), REX); + return; + } + if (regm & XMMREGS) + { + regm_t xregs = XMMREGS & ~regm; + const xreg = allocreg(cdb,xregs, TYdouble); + opcode_t op = 0; + if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) + op = 0x660000; + cdb.gen2(op | XORPS, modregrm(3, xreg-XMM0, xreg-XMM0)); // XORPS xreg,xreg + cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg + if (tym == TYcfloat || tym == TYcdouble) + { code *cnop = gennop(null); + genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE L1 + genjmp(cdb, JP, FLcode, cast(block *) cnop); // JP L1 + reg = findreg(regm & ~mask(reg)); + cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg + cdb.append(cnop); + } + return; + } +} + if (sz <= REGSIZE) + { + if (tym == TYfloat) + { + if (saveflag) + { + scrregm = cgstate.allregs & ~regm; // possible scratch regs + scrreg = allocreg(cdb, scrregm, TYoffset); // allocate scratch reg + genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg + reg = scrreg; + } + getregs(cdb, mask(reg)); + cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 + return; + } + } + + gentstreg(cdb,reg,sz == 8); // CMP reg,#0 + + +static if (0) +{ + if (saveflag || tyfv(tym)) + { + L1: + scrregm = ALLREGS & ~regm; // possible scratch regs + scrreg = allocreg(cdb, scrregm, TYoffset); // allocate scratch reg + if (I32 || sz == REGSIZE * 2) + { + assert(regm & mMSW && regm & mLSW); + + reg = findregmsw(regm); + if (I32) + { + if (tyfv(tym)) + genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg + else + { + genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg + if (tym == TYdouble || tym == TYdouble_alias) + cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 + } + } + else + { + genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg + if (tym == TYfloat) + cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 + } + reg = findreglsw(regm); + genorreg(cdb, scrreg, reg); // OR scrreg,lsreg + } + else if (sz == 8) + { + // !I32 + genmovreg(cdb, scrreg, AX); // MOV scrreg,AX + if (tym == TYdouble || tym == TYdouble_alias) + cdb.gen2(0xD1 ,modregrm(3, 4, scrreg)); // SHL scrreg,1 + genorreg(cdb, scrreg, BX); // OR scrreg,BX + genorreg(cdb, scrreg, CX); // OR scrreg,CX + genorreg(cdb, scrreg, DX); // OR scrreg,DX + } + else + assert(0); + } + else + { + if (I32 || sz == REGSIZE * 2) + { + // can't test ES:LSW for 0 + assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); + + reg = findregmsw(regm); + if (cgstate.regcon.mvar & mask(reg)) // if register variable + goto L1; // don't trash it + getregs(cdb, mask(reg)); // we're going to trash reg + if (tyfloating(tym) && sz == 2 * _tysize[TYint]) + cdb.gen2(0xD1, modregrm(3 ,4, reg)); // SHL reg,1 + genorreg(cdb, reg, findreglsw(regm)); // OR reg,reg+1 + if (I64) + code_orrex(cdb.last(), REX_W); + } + else if (sz == 8) + { assert(regm == DOUBLEREGS_16); + getregs(cdb,mAX); // allocate AX + if (tym == TYdouble || tym == TYdouble_alias) + cdb.gen2(0xD1, modregrm(3, 4, AX)); // SHL AX,1 + genorreg(cdb, AX, BX); // OR AX,BX + genorreg(cdb, AX, CX); // OR AX,CX + genorreg(cdb, AX, DX); // OR AX,DX + } + else + assert(0); + } + code_orflag(cdb.last(),CFpsw); +} +} + + +/****************************** + * Given the result of an expression is in retregs, + * generate necessary code to return result in outretregs. + */ +@trusted +void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, ref regm_t outretregs) +{ + //printf("arm.fixresult(e = %p, retregs = %s, outretregs = %s)\n",e,regm_str(retregs),regm_str(outretregs)); + if (outretregs == 0) return; // if don't want result + assert(e && retregs); // need something to work with + regm_t forccs = outretregs & mPSW; + //regm_t forregs = outretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); + regm_t forregs = outretregs & cgstate.allregs; + tym_t tym = tybasic(e.Ety); + + if (tym == TYstruct) + { + if (e.Eoper == OPpair || e.Eoper == OPrpair) + { + tym = TYucent; + } + else + // Hack to support cdstreq() + tym = TYnptr; + } + int sz = _tysize[tym]; + + reg_t reg,rreg; + if ((retregs & forregs) == retregs) // if already in right registers + outretregs = retregs; + else if (forregs) // if return the result in registers + { + bool opsflag = false; + rreg = allocreg(cdb, outretregs, tym); // allocate return regs + if (0 && retregs & XMMREGS) + { + reg = findreg(retregs & XMMREGS); + if (mask(rreg) & XMMREGS) + genmovreg(cdb, rreg, reg, tym); + else + { + // MOVSD floatreg, XMM? + cdb.genxmmreg(xmmstore(tym), reg, 0, tym); + // MOV rreg,floatreg + cdb.genfltreg(0x8B,rreg,0); + if (sz == 8) + { + if (I32) + { + rreg = findregmsw(outretregs); + cdb.genfltreg(0x8B, rreg,4); + } + else + code_orrex(cdb.last(),REX_W); + } + } + } +/+ + else if (forregs & XMMREGS) + { + reg = findreg(retregs & (mBP | ALLREGS)); + switch (sz) + { + case 4: + cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg + break; + + case 8: + if (I32) + { + cdb.genfltreg(0x89, reg, 0); + reg = findregmsw(retregs); + cdb.genfltreg(0x89, reg, 4); + cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem + } + else + { + cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg)); + code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg + } + break; + + default: + assert(false); + } + } ++/ + else if (sz > REGSIZE) + { + reg_t msreg = findregmsw(retregs); + reg_t lsreg = findreglsw(retregs); + reg_t msrreg = findregmsw(outretregs); + reg_t lsrreg = findreglsw(outretregs); + + genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg + genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg + } + else + { + assert(!(retregs & XMMREGS)); + assert(!(forregs & XMMREGS)); + reg = findreg(retregs & cgstate.allregs); + if (sz <= 4) + genmovreg(cdb, rreg, reg, TYint); // only move 32 bits, and zero the top 32 bits + else + genmovreg(cdb, rreg, reg); // MOV rreg,reg + } + cssave(e,retregs | outretregs,opsflag); + // Commented out due to Bugzilla 8840 + //forregs = 0; // don't care about result in reg cuz real result is in rreg + retregs = outretregs & ~mPSW; + } + if (forccs) // if return result in flags + { + tstresult(cdb, retregs, tym, forregs != 0); + } +} + +// cdfunc +/******************************* + * Generate code sequence for function call. + */ + +@trusted +void cdfunc(ref CGstate cg, ref CodeBuilder cdb, elem* e, ref regm_t pretregs) +{ + //printf("cdfunc()\n"); elem_print(e); + assert(e); + uint numpara = 0; // bytes of parameters + uint stackpushsave = cgstate.stackpush; // so we can compute # of parameters +printf("stackpushsave: %d\n", stackpushsave); + cgstate.stackclean++; + regm_t keepmsk = 0; + int xmmcnt = 0; + tym_t tyf = tybasic(e.E1.Ety); // the function type + + // Easier to deal with parameters as an array: parameters[0..np] + int np = OTbinary(e.Eoper) ? el_nparams(e.E2) : 0; + Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof); + + if (np) + { + int n = 0; + fillParameters(e.E2, parameters, &n); + assert(n == np); + } + + Symbol *sf = null; // symbol of the function being called + if (e.E1.Eoper == OPvar) + sf = e.E1.Vsym; + + /* Assume called function access statics + */ + if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64) && + config.flags3 & CFG3pic) + cgstate.accessedTLS = true; + + /* Special handling for call to __tls_get_addr, we must save registers + * before evaluating the parameter, so that the parameter load and call + * are adjacent. + */ + if (np == 1 && sf) + { + if (sf == tls_get_addr_sym) + getregs(cdb, ~sf.Sregsaved & cg.allregs); // XMMREGS? + } + + uint stackalign = REGSIZE; + if (tyf == TYf16func) + stackalign = 2; + // Figure out which parameters go in registers. + // Compute numpara, the total bytes pushed on the stack + FuncParamRegs fpr = FuncParamRegs_create(tyf); + for (int i = np; --i >= 0;) + { + elem *ep = parameters[i].e; + uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf)); // align on stack boundary + if (config.exe == EX_WIN64) + { + //printf("[%d] size = %u, numpara = %d ep = %p %s\n", i, psize, numpara, ep, tym_str(ep.Ety)); + debug + if (psize > REGSIZE) elem_print(e); + + assert(psize <= REGSIZE); + psize = REGSIZE; + } + //printf("[%d] size = %u, numpara = %d %s\n", i, psize, numpara, tym_str(ep.Ety)); + if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, parameters[i].reg, parameters[i].reg2)) + { + if (config.exe == EX_WIN64) + numpara += REGSIZE; // allocate stack space for it anyway + continue; // goes in register, not stack + } + + // Parameter i goes on the stack + parameters[i].reg = NOREG; + uint alignsize = el_alignsize(ep); + parameters[i].numalign = 0; + if (alignsize > stackalign && + (I64 || (alignsize >= 16 && + (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety)))))) + { + if (alignsize > STACKALIGN) + { + STACKALIGN = alignsize; + cgstate.enforcealign = true; + } + uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); + parameters[i].numalign = newnumpara - numpara; + numpara = newnumpara; + assert(config.exe != EX_WIN64); + } + numpara += psize; + } + + if (config.exe == EX_WIN64) + { + if (numpara < 4 * REGSIZE) + numpara = 4 * REGSIZE; + } + + //printf("numpara = %d, cgstate.stackpush = %d\n", numpara, stackpush); + assert((numpara & (REGSIZE - 1)) == 0); + assert((cgstate.stackpush & (REGSIZE - 1)) == 0); + + /* Should consider reordering the order of evaluation of the parameters + * so that args that go into registers are evaluated after args that get + * pushed. We can reorder args that are constants or relconst's. + */ + + /* There's only one way to call functions (unlike for x86_64). + * The SP is moved down by the aggregate size of the arguments that go on the + * stack, and the arguments are then moved into the stack. + * The x86_64 ways do not work because BP cannot have negative offsets, and so + * BP at the bottom would be in the way + */ + + /* Check for obsolete operators + */ + foreach (i; 0 .. np) + { + elem* ep = parameters[i].e; + int preg = parameters[i].reg; + //printf("parameter[%d] = %d, np = %d\n", i, preg, np); + if (preg == NOREG) + { + switch (ep.Eoper) + { + case OPstrctor: + case OPstrthis: + case OPstrpar: + case OPnp_fp: + assert(0); + + default: + break; + } + } + } + + /* stack for parameters is allocated all at once - no pushing + * and ensure it is aligned + */ +printf("STACKALIGN: %d\n", STACKALIGN); + uint numalign = -numpara & (STACKALIGN - 1); +printf("numalign: %d numpara: %d\n", numalign, numpara); + cod3_stackadj(cdb, numalign + numpara); + cdb.genadjesp(numalign + numpara); + cgstate.stackpush += numalign + numpara; + stackpushsave += numalign + numpara; + + assert(cgstate.stackpush == stackpushsave); + if (config.exe == EX_WIN64) + { + //printf("np = %d, numpara = %d, cgstate.stackpush = %d\n", np, numpara, stackpush); + assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE)); + + // Allocate stack space for four entries anyway + // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29 + } + + CodeBuilder cdbrestore; + cdbrestore.ctor(); + regm_t saved = 0; + targ_size_t funcargtossave = cgstate.funcargtos; + targ_size_t funcargtos = numpara; + //printf("funcargtos1 = %d\n", cast(int)funcargtos); + + /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 + * float and double parameters go into XMM0..XMM7 + * For variadic functions, count of XMM registers used goes in AL + */ + foreach (i; 0 .. np) + { + elem* ep = parameters[i].e; + reg_t preg = parameters[i].reg; + //printf("parameter[%d] = %d, np = %d\n", i, preg, np); + if (preg == NOREG) + { + /* Move parameter on stack, but keep track of registers used + * in the process. If they interfere with keepmsk, we'll have + * to save/restore them. + */ + CodeBuilder cdbsave; + cdbsave.ctor(); + regm_t overlap = cgstate.msavereg & keepmsk; + cgstate.msavereg |= keepmsk; + CodeBuilder cdbparams; + cdbparams.ctor(); + + // Alignment for parameter comes after it was placed on stack + const uint numalignx = parameters[i].numalign; + funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx; + + movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf); + regm_t tosave = keepmsk & ~cgstate.msavereg; + cgstate.msavereg &= ~keepmsk | overlap; + + // tosave is the mask to save and restore + for (reg_t j = 0; tosave; j++) + { + regm_t mi = mask(j); + assert(j <= XMM7); + if (mi & tosave) + { + uint idx; + cgstate.regsave.save(cdbsave, j, idx); + cgstate.regsave.restore(cdbrestore, j, idx); + saved |= mi; + keepmsk &= ~mi; // don't need to keep these for rest of params + tosave &= ~mi; + } + } + + cdb.append(cdbsave); + cdb.append(cdbparams); + } + else + { + // Goes in register preg, not stack + regm_t retregs = mask(preg); + if (retregs & XMMREGS) + ++xmmcnt; + reg_t preg2 = parameters[i].reg2; + reg_t mreg,lreg; + if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat) + { + assert(ep.Eoper != OPstrthis); + if (mask(preg2) & XMMREGS) + ++xmmcnt; + if (tybasic(ep.Ety) == TYcfloat) + { + lreg = ST01; + mreg = NOREG; + } + else if (tyrelax(ep.Ety) == TYcent) + { + lreg = mask(preg ) & mLSW ? cast(reg_t)preg : AX; + mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX; + } + else + { + lreg = XMM0; + mreg = XMM1; + } + retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG); + CodeBuilder cdbsave; + cdbsave.ctor(); + if (keepmsk & retregs) + { + regm_t tosave = keepmsk & retregs; + + // tosave is the mask to save and restore + for (reg_t j = 0; tosave; j++) + { + regm_t mi = mask(j); + assert(j <= XMM7); + if (mi & tosave) + { + uint idx; + cgstate.regsave.save(cdbsave, j, idx); + cgstate.regsave.restore(cdbrestore, j, idx); + saved |= mi; + keepmsk &= ~mi; // don't need to keep these for rest of params + tosave &= ~mi; + } + } + } + cdb.append(cdbsave); + + scodelem(cgstate,cdb, ep, retregs, keepmsk, false); + + // Move result [mreg,lreg] into parameter registers from [preg2,preg] + retregs = 0; + if (preg != lreg) + retregs |= mask(preg); + if (preg2 != mreg) + retregs |= mask(preg2); + retregs &= ~mask(NOREG); + getregs(cdb,retregs); + + tym_t ty1 = tybasic(ep.Ety); + tym_t ty2 = ty1; + if (ep.Ety & mTYgprxmm) + { + ty1 = TYllong; + ty2 = TYdouble; + } + else if (ep.Ety & mTYxmmgpr) + { + ty1 = TYdouble; + ty2 = TYllong; + } + else if (ty1 == TYstruct) + { + type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type; + type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type; + if (targ1) + ty1 = targ1.Tty; + if (targ2) + ty2 = targ2.Tty; + } + else if (tyrelax(ty1) == TYcent) + ty1 = ty2 = TYllong; + else if (tybasic(ty1) == TYcdouble) + ty1 = ty2 = TYdouble; + + if (tybasic(ep.Ety) == TYcfloat) + { + assert(I64); + assert(lreg == ST01 && mreg == NOREG); + // spill + pop87(); + pop87(); + cdb.genfltreg(0xD9, 3, tysize(TYfloat)); + genfwait(cdb); + cdb.genfltreg(0xD9, 3, 0); + genfwait(cdb); + // reload + if (config.exe == EX_WIN64) + { + cdb.genfltreg(LOD, preg, 0); + code_orrex(cdb.last(), REX_W); + } + else + { + assert(mask(preg) & XMMREGS); + cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble); + } + } + else foreach (v; 0 .. 2) + { + if (v ^ (preg != mreg)) + genmovreg(cdb, preg, lreg, ty1); + else + genmovreg(cdb, preg2, mreg, ty2); + } + + retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG); + } + else if (ep.Eoper == OPstrthis) + { + getregs(cdb,retregs); + // LEA preg,np[RSP] + uint delta = cgstate.stackpush - ep.Vuns; // stack delta to parameter + cdb.genc1(LEA, + (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta); + if (I64) + code_orrex(cdb.last(), REX_W); + } + else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0) + { + retregs = 0; + scodelem(cgstate,cdb, ep.E1, retregs, keepmsk, false); + freenode(ep); + } + else + { + scodelem(cgstate,cdb, ep, retregs, keepmsk, false); + } + keepmsk |= retregs; // don't change preg when evaluating func address + } + } + + if (config.exe == EX_WIN64) + { // Allocate stack space for four entries anyway + // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29 + { uint sz = 4 * REGSIZE; + cod3_stackadj(cdb, sz); + cdb.genadjesp(sz); + cgstate.stackpush += sz; + } + + /* Variadic functions store XMM parameters into their corresponding GP registers + */ + for (int i = 0; i < np; i++) + { + int preg = parameters[i].reg; + regm_t retregs = mask(preg); + if (retregs & XMMREGS) + { + reg_t reg; + switch (preg) + { + case XMM0: reg = CX; break; + case XMM1: reg = DX; break; + case XMM2: reg = R8; break; + case XMM3: reg = R9; break; + + default: assert(0); + } + getregs(cdb,mask(reg)); + //cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg + } + } + } + + // Restore any register parameters we saved + getregs(cdb,saved); + cdb.append(cdbrestore); + keepmsk |= saved; + + // Variadic functions store the number of XMM registers used in AL + if (config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic && !cg.AArch64) + { + getregs(cdb,mAX); + movregconst(cdb,AX,xmmcnt,1); + keepmsk |= mAX; + } + + //printf("funcargtos: %d cgstate.funcargtos: %d\n", cast(int)funcargtos, cast(int)cgstate.funcargtos); + assert(funcargtos == 0 && cgstate.funcargtos == ~0); + cgstate.stackclean--; + + funccall(cdb,e,numpara,numalign,pretregs,keepmsk,false); + cgstate.funcargtos = funcargtossave; +} + +/****************************** + * Call function. All parameters have already been pushed onto the stack. + * Params: + * e = function call + * numpara = size in bytes of all the parameters + * numalign = amount the stack was aligned by before the parameters were pushed + * pretregs = where return value goes + * keepmsk = registers to not change when evaluating the function address + * usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return + */ + +@trusted +private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign, + ref regm_t pretregs,regm_t keepmsk, bool usefuncarg) +{ + //printf("funccall(e = %p, pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(pretregs),numpara,numalign,usefuncarg); + //printf(" from %s\n", funcsym_p.Sident.ptr); + //elem_print(e); + cgstate.calledafunc = 1; + // Determine if we need frame for function prolog/epilog + + if (config.memmodel == Vmodel) + { + if (tyfarfunc(funcsym_p.ty())) + cgstate.needframe = true; + } + + regm_t retregs; + Symbol* s; + + elem* e1 = e.E1; + tym_t tym1 = tybasic(e1.Ety); + char farfunc = tyfarfunc(tym1) || tym1 == TYifunc; + + CodeBuilder cdbe; + cdbe.ctor(); + + if (e1.Eoper == OPvar) + { // Call function directly + + if (!tyfunc(tym1)) + printf("%s\n", tym_str(tym1)); + assert(tyfunc(tym1)); + s = e1.Vsym; + if (s.Sflags & SFLexit) + { } + else if (s != tls_get_addr_sym) + save87(cdb); // assume 8087 regs are all trashed + + // Function calls may throw Errors, unless marked that they don't + if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow)) + funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; + + if (s.Sflags & SFLexit) + { + // Function doesn't return, so don't worry about registers + // it may use + } + else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized)) + // so we can replace func at runtime + getregs(cdbe,~fregsaved & cgstate.allregs); // XMMREGS ? + else + getregs(cdbe,~s.Sregsaved & cgstate.allregs); // XMMREGS ? + if (strcmp(s.Sident.ptr, "alloca") == 0) + { + s = getRtlsym(RTLSYM.ALLOCA); + makeitextern(s); + int areg = CX; + if (config.exe == EX_WIN64) + areg = DX; + getregs(cdbe, mask(areg)); + cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0); // LEA areg,&localsize[BP] + if (I64) + code_orrex(cdbe.last(), REX_W); + cgstate.Alloca.size = REGSIZE; + } + if (sytab[s.Sclass] & SCSS) // if function is on stack (!) + { + retregs = cgstate.allregs & ~keepmsk; + s.Sflags &= ~GTregcand; + s.Sflags |= SFLread; + cdrelconst(cgstate,cdbe,e1,retregs); + if (farfunc) + { + const reg = findregmsw(retregs); + const lsreg = findreglsw(retregs); + cgstate.floatreg = true; // use float register + cgstate.reflocal = true; + cdbe.genc1(0x89, // MOV floatreg+2,reg + modregrm(2, reg, BPRM), FLfltreg, REGSIZE); + cdbe.genc1(0x89, // MOV floatreg,lsreg + modregrm(2, lsreg, BPRM), FLfltreg, 0); + if (tym1 == TYifunc) + cdbe.gen1(0x9C); // PUSHF + cdbe.genc1(0xFF, // CALL [floatreg] + modregrm(2, 3, BPRM), FLfltreg, 0); + } + else + { + const reg = findreg(retregs); + cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg + if (I64) + code_orrex(cdbe.last(), REX_W); + } + } + else + { + FL fl = FLfunc; + if (!tyfunc(s.ty())) + fl = el_fl(e1); + if (tym1 == TYifunc) + cdbe.gen1(0x9C); // PUSHF + if (config.exe & (EX_windos | EX_OSX | EX_OSX64)) + { + cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern + } + else + { + assert(!farfunc); + if (s != tls_get_addr_sym) + { + //printf("call %s\n", s.Sident.ptr); + load_localgot(cdb); + cdbe.gencs1(INSTR.branch_imm(1, 0), 0, fl, s); // CALL extern + } + else if (I64) + { + /* Prepend 66 66 48 so GNU linker has patch room + */ + assert(!farfunc); + cdbe.gen1(0x66); + cdbe.gen1(0x66); + cdbe.gencs(0xE8, 0, fl, s); // CALL extern + cdbe.last().Irex = REX | REX_W; + } + else + cdbe.gencs(0xE8, 0, fl, s); // CALL extern + } + code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff)); + } + } + else + { // Call function via pointer + + // Function calls may throw Errors + funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; + + if (e1.Eoper != OPind) { printf("e1.fl: %s, e1.Eoper: %s\n", fl_str(el_fl(e1)), oper_str(e1.Eoper)); } + assert(e1.Eoper == OPind); + elem *e11 = e1.E1; + tym_t e11ty = tybasic(e11.Ety); + load_localgot(cdb); + if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // 32 bit only + { + if (config.flags3 & CFG3pic) + keepmsk |= mBX; + } + + /* Mask of registers destroyed by the function call + */ + regm_t desmsk = cgstate.allregs & ~fregsaved; // XMMREGS? + //printf("desmsk: %s\n", regm_str(desmsk)); + + // if we can't use loadea() + if (1) + //if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) && + //(e11.Eoper != OPind || e11.Ecount)) + { + retregs = cgstate.allregs & ~keepmsk; + cgstate.stackclean++; + scodelem(cgstate,cdbe,e11,retregs,keepmsk,true); + cgstate.stackclean--; + // Kill registers destroyed by an arbitrary function call + getregs(cdbe,desmsk); + const reg = findreg(retregs); + + cdbe.gen1(INSTR.blr(reg)); // BLR reg + } + else + { + code cs; + cs.Iflags = 0; + cgstate.stackclean++; + loadea(cdbe, e11, cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk); + cgstate.stackclean--; + freenode(e11); + } + s = null; + } + cdb.append(cdbe); + freenode(e1); + + /* See if we will need the frame pointer. + Calculate it here so we can possibly use BP to fix the stack. + */ +static if (0) +{ + if (!cgstate.needframe) + { + // If there is a register available for this basic block + if (config.flags4 & CFG4optimized && (cgstate.allregs & ~cgstate.regcon.used)) + { } + else + { + foreach (s; globsym[]) + { + if (s.Sflags & GTregcand && type_size(s.Stype) != 0) + { + if (config.flags4 & CFG4optimized) + { // If symbol is live in this basic block and + // isn't already in a register + if (s.Srange && vec_testbit(cgstate.dfoidx, s.Srange) && + s.Sfl != FLreg) + { // Then symbol must be allocated on stack + cgstate.needframe = true; + break; + } + } + else + { if (cgstate.mfuncreg == 0) // if no registers left + { cgstate.needframe = true; + break; + } + } + } + } + } + } +} + + reg_t reg1, reg2; + retregs = allocretregs(e.Ety, e.ET, tym1, reg1, reg2); + + assert(retregs || !pretregs); + + if (!usefuncarg) + { + // If stack needs cleanup + if (s && s.Sflags & SFLexit) + { + if (config.fulltypes && TARGET_WINDOS) + { + // the stack walker evaluates the return address, not a byte of the + // call instruction, so ensure there is an instruction byte after + // the call that still has the same line number information + cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); + } + /* Function never returns, so don't need to generate stack + * cleanup code. But still need to log the stack cleanup + * as if it did return. + */ + cdb.genadjesp(-(numpara + numalign)); + cgstate.stackpush -= numpara + numalign; + } + else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) && + (!typfunc(tym1) || config.exe == EX_WIN64)) + { + if (tym1 == TYhfunc) + { // Hidden parameter is popped off by the callee + cdb.genadjesp(-REGSIZE); + cgstate.stackpush -= REGSIZE; + if (numpara + numalign > REGSIZE) + genstackclean(cdb, numpara + numalign - REGSIZE, retregs); + } + else + genstackclean(cdb, numpara + numalign, retregs); + } + else + { + cdb.genadjesp(-numpara); // popped off by the callee's 'RET numpara' + cgstate.stackpush -= numpara; + if (numalign) // callee doesn't know about alignment adjustment + genstackclean(cdb,numalign,retregs); + } + } + + /* Special handling for functions that return one part + in XMM0 and the other part in AX + */ + if (pretregs && retregs) + { + if (reg1 == NOREG || reg2 == NOREG) + {} + else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS))) + { + reg_t lreg, mreg; + if (mask(reg1) & XMMREGS) + { + lreg = XMM0; + mreg = XMM1; + } + else + { + lreg = mask(reg1) & mLSW ? reg1 : AX; + mreg = mask(reg2) & mMSW ? reg2 : DX; + } + for (int v = 0; v < 2; v++) + { + if (v ^ (reg2 != lreg)) + genmovreg(cdb,lreg,reg1); + else + genmovreg(cdb,mreg,reg2); + } + retregs = mask(lreg) | mask(mreg); + } + } + + /* Special handling for functions which return complex float in XMM0 or RAX. */ + + if (I64 + && config.exe != EX_WIN64 // broken + && pretregs && tybasic(e.Ety) == TYcfloat) + { + assert(reg2 == NOREG); + // spill + if (config.exe == EX_WIN64) + { + assert(reg1 == AX); + cdb.genfltreg(STO, reg1, 0); + code_orrex(cdb.last(), REX_W); + } + else + { + assert(reg1 == XMM0); + cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble); + } + // reload real + push87(cdb); + cdb.genfltreg(0xD9, 0, 0); + genfwait(cdb); + // reload imaginary + push87(cdb); + cdb.genfltreg(0xD9, 0, tysize(TYfloat)); + genfwait(cdb); + + retregs = mST01; + } + + fixresult(cdb, e, retregs, pretregs); +} + +/*************************** + * Generate code to move argument e on the stack. + */ + +@trusted +private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf) +{ + //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos); + //printf("movParams()\n"); elem_print(e); + assert(e && e.Eoper != OPparam); + + tym_t tym = tybasic(e.Ety); + if (tyfloating(tym)) + objmod.fltused(); + + targ_size_t szb = paramsize(e, tyf); // size before alignment + targ_size_t sz = _align(stackalign, szb); // size after alignment + assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked + assert((sz & (REGSIZE - 1)) == 0); + //printf("szb = %d sz = %d\n", cast(int)szb, cast(int)sz); + + switch (e.Eoper) + { + case OPstrctor: + case OPstrthis: + case OPstrpar: + case OPnp_fp: + assert(0); + + default: + break; + } + regm_t retregs = cgstate.allregs; + if (tyvector(tym) || + config.fpxmmregs && tyxmmreg(tym) && + // If not already in x87 register from function call return + !((e.Eoper == OPcall || e.Eoper == OPucall) && I32)) + { + retregs = XMMREGS; + codelem(cgstate,cdb, e, retregs, false); + const op = xmmstore(tym); + const r = findreg(retregs); + cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - sz); // MOV funcarg[EBP],r + checkSetVex(cdb.last(),tym); + return; + } + else if (tyfloating(tym)) + { + assert(0); + } + scodelem(cgstate,cdb, e, retregs, 0, true); + if (sz <= REGSIZE) + { + reg_t reg = findreg(retregs); + code cs; + cs.reg = NOREG; + cs.base = 31; + cs.index = NOREG; + cs.IFL1 = FLunde; + storeToEA(cs, reg, cast(uint)sz); + cs.Iop = setField(cs.Iop,21,10,funcargtos >> field(cs.Iop,31,30)); + cdb.gen(&cs); + } + else if (sz == REGSIZE * 2) + { + int grex = I64 ? REX_W << 16 : 0; + uint r = findregmsw(retregs); + cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r + r = findreglsw(retregs); + cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r + assert(0); + } + else + assert(0); +} + +/****************************** + * Generate code to load data into registers. + * Called for OPconst and OPvar. + */ +@trusted +void loaddata(ref CodeBuilder cdb, elem* e, ref regm_t outretregs) +{ +static if (1) +{ + reg_t reg; + reg_t nreg; + reg_t sreg; + opcode_t op; + tym_t tym; + code cs; + regm_t flags, forregs, regm; + + debug + { + if (debugw) + printf("loaddata(e = %p,outretregs = %s)\n",e,regm_str(outretregs)); + //elem_print(e); + } + + assert(e); + elem_debug(e); + if (outretregs == 0) + return; + tym = tybasic(e.Ety); + if (tym == TYstruct) + { + cdrelconst(cgstate,cdb,e,outretregs); + return; + } + if (tyfloating(tym)) + { + objmod.fltused(); + if (config.fpxmmregs && + (tym == TYcfloat || tym == TYcdouble) && + (outretregs & (XMMREGS | mPSW)) + ) + { + cloadxmm(cdb, e, outretregs); + return; + } + } + + if (outretregs == mPSW) + { + regm_t retregs = cgstate.allregs; + loaddata(cdb, e, retregs); + fixresult(cdb, e, retregs, outretregs); + return; + } + + /* not for flags only */ + int sz = _tysize[tym]; + cs.Iflags = 0; + flags = outretregs & mPSW; /* save original */ + forregs = outretregs & cgstate.allregs; // XMMREGS ? + //if (outretregs & mSTACK) + //forregs |= DOUBLEREGS; + if (e.Eoper == OPconst) + { + if (tyvector(tym) && forregs & XMMREGS) + { + assert(!flags); + const xreg = allocreg(cdb, forregs, tym); // allocate registers + movxmmconst(cdb, xreg, tym, &e.EV, flags); + fixresult(cdb, e, forregs, outretregs); + return; + } + + targ_size_t value = e.Vint; + if (sz == 8) + value = cast(targ_size_t)e.Vullong; + + if (sz == REGSIZE && reghasvalue(forregs, value, reg)) + forregs = mask(reg); + + regm_t save = cgstate.regcon.immed.mval; + reg = allocreg(cdb, forregs, tym); // allocate registers + cgstate.regcon.immed.mval = save; // allocreg could unnecessarily clear .mval + if (sz <= REGSIZE) + { + if (sz == 1) + flags |= 1; + else if (!I16 && sz == SHORTSIZE && + !(mask(reg) & cgstate.regcon.mvar) && + !(config.flags4 & CFG4speed) + ) + flags |= 2; + if (sz == 8) + flags |= 64; + if (isXMMreg(reg)) + { + movxmmconst(cdb, reg, tym, &e.EV, 0); + flags = 0; + } + else + { + movregconst(cdb, reg, value, flags); + flags = 0; // flags are already set + } + } + else if (sz == 16) + { + movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.Vcent.lo, 64); + movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.Vcent.hi, 64); + } + else + assert(0); + // Flags may already be set + outretregs &= flags | ~mPSW; + fixresult(cdb, e, forregs, outretregs); + return; + } + else + { + // See if we can use register that parameter was passed in + //printf("xyzzy1 %s %d %d\n", e.Vsym.Sident.ptr, cast(int)cgstate.regcon.params, regParamInPreg(e.Vsym)); + if (cgstate.regcon.params && + regParamInPreg(*e.Vsym) && + !cgstate.anyiasm && // may have written to the memory for the parameter + (cgstate.regcon.params & mask(e.Vsym.Spreg) && e.Voffset == 0 || + cgstate.regcon.params & mask(e.Vsym.Spreg2) && e.Voffset == REGSIZE) && + sz <= REGSIZE) // make sure no 'paint' to a larger size happened + { + const reg_t preg = e.Voffset ? e.Vsym.Spreg2 : e.Vsym.Spreg; + const regm_t pregm = mask(preg); + + if (!(sz <= 2 && pregm & XMMREGS)) // no SIMD instructions to load 1 or 2 byte quantities + { + if (debugr) + printf("%s.%d is fastpar and using register %s\n", + e.Vsym.Sident.ptr, + cast(int)e.Voffset, + regm_str(pregm)); + + cgstate.mfuncreg &= ~pregm; + cgstate.regcon.used |= pregm; + fixresult(cdb,e,pregm,outretregs); + return; + } + } + + reg = allocreg(cdb, forregs, tym); // allocate registers + + if (sz == 1) + { regm_t nregm; + + debug + if (!(forregs & BYTEREGS)) + { + elem_print(e); + printf("forregs = %s\n", regm_str(forregs)); + } + + opcode_t opmv = 0x8A; // byte MOV + if (config.exe & (EX_OSX | EX_OSX64)) + { + if (movOnly(e)) + opmv = 0x8B; + } + assert(forregs & BYTEREGS); + if (!I16) + { + if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed && + // Workaround for OSX linker bug: + // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 + !(config.exe & EX_OSX64 && !(sytab[e.Vsym.Sclass] & SCSS)) + ) + { +// opmv = tyuns(tym) ? MOVZXb : MOVSXb; // MOVZX/MOVSX + } + loadea(cdb, e, cs, opmv, reg, 0, 0, 0); // MOV regL,data + } + else + { + nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX; + if (outretregs & nregm) + nreg = reg; // already allocated + else + nreg = allocreg(cdb, nregm, tym); + loadea(cdb, e, cs, opmv, nreg, 0, 0, 0); // MOV nregL,data + if (reg != nreg) + { + genmovreg(cdb, reg, nreg); // MOV reg,nreg + cssave(e, mask(nreg), false); + } + } + } + else if (forregs & XMMREGS) + { + // Can't load from registers directly to XMM regs + //e.Vsym.Sflags &= ~GTregcand; + + opcode_t opmv = xmmload(tym, xmmIsAligned(e)); + if (e.Eoper == OPvar) + { + Symbol *s = e.Vsym; + if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS)) + { //opmv = LODD; // MOVD/MOVQ + /* getlvalue() will unwind this and unregister s; could use a better solution */ + } + } + loadea(cdb, e, cs, opmv, reg, 0, 0, 0, RM.load); // MOVSS/MOVSD reg,data + checkSetVex(cdb.last(),tym); + } + else if (sz <= REGSIZE) + { + // LDR reg,[sp,#offset] + // https://www.scs.stanford.edu/~zyedidia/arm64/ldr_imm_gen.html + opcode_t opmv = PSOP.ldr | (29 << 5); + loadea(cdb, e, cs, opmv, reg, 0, 0, 0, RM.load); + } + else if (sz <= 2 * REGSIZE) + { + reg = findregmsw(forregs); + loadea(cdb, e, cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2 + reg = findreglsw(forregs); + loadea(cdb, e, cs, 0x8B, reg, 0, forregs, 0); // MOV reg,data + } + else if (sz >= 8) + { + if ((outretregs & (mSTACK | mPSW)) == mSTACK) + { + // Note that we allocreg(DOUBLEREGS) needlessly + cgstate.stackchanged = 1; + int i = sz - REGSIZE; + do + { + loadea(cdb,e,cs,0xFF,6,i,0,0); // PUSH EA+i + cdb.genadjesp(REGSIZE); + cgstate.stackpush += REGSIZE; + i -= REGSIZE; + } + while (i >= 0); + return; + } + else + { + assert(0); + } + } + else + assert(0); + // Flags may already be set + outretregs &= flags | ~mPSW; + fixresult(cdb, e, forregs, outretregs); + return; + } +} +} diff --git a/compiler/src/dmd/backend/arm/cod2.d b/compiler/src/dmd/backend/arm/cod2.d new file mode 100644 index 000000000000..2e90d20c04b3 --- /dev/null +++ b/compiler/src/dmd/backend/arm/cod2.d @@ -0,0 +1,1550 @@ +/** + * Code generation 2 + * + * Includes: + * - math operators (+ - * / %) and functions (abs, cos, sqrt) + * - 'string' functions (strlen, memcpy, memset) + * - pointers (address of / dereference) + * - struct assign, constructor, destructor + * + * Compiler implementation of the + * $(LINK2 https://www.dlang.org, D programming language). + * + * Copyright: Copyright (C) 1984-1998 by Symantec + * Copyright (C) 2000-2024 by The D Language Foundation, All Rights Reserved + * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) + * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/arm/cod2.d, backend/cod2.d) + * Documentation: https://dlang.org/phobos/dmd_backend_arm_cod2.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/arm/cod2.d + */ + +module dmd.backend.arm.cod2; + +import core.stdc.stdio; +import core.stdc.stdlib; +import core.stdc.string; + +import dmd.backend.cc; +import dmd.backend.cdef; +import dmd.backend.code; +import dmd.backend.x86.code_x86; +import dmd.backend.codebuilder; +import dmd.backend.mem; +import dmd.backend.el; +import dmd.backend.global; +import dmd.backend.oper; +import dmd.backend.ty; +import dmd.backend.type; +import dmd.backend.x86.xmm; +import dmd.backend.arm.cod1 : loadFromEA, storeToEA; +import dmd.backend.arm.cod3 : conditionCode, genBranch, gentstreg, movregconst, COND; +import dmd.backend.arm.instr; + +nothrow: +@safe: + +import dmd.backend.cg : segfl, stackfl; + +__gshared int cdcmp_flag; + +import dmd.backend.divcoeff : choose_multiplier, udiv_coefficients; + +/***************************** + * Handle operators which are more or less orthogonal + * OPadd, OPmin, OPand, OPor, OPxor + */ + +@trusted +void cdorth(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdorth(e = %p, pretregs = %s)\n",e,regm_str(pretregs)); + + elem* e1 = e.E1; + elem* e2 = e.E2; + if (pretregs == 0) // if don't want result + { + codelem(cg,cdb,e1,pretregs,false); // eval left leaf + pretregs = 0; // in case they got set + codelem(cg,cdb,e2,pretregs,false); + return; + } + + const ty = tybasic(e.Ety); + const ty1 = tybasic(e1.Ety); + const ty2 = tybasic(e2.Ety); + const sz = _tysize[ty]; + + if (tyfloating(ty1)) + { + assert(0); + } + + regm_t posregs = cg.allregs; + + regm_t retregs1 = posregs; + + codelem(cg, cdb, e1, retregs1, false); + regm_t retregs2 = cg.allregs & ~retregs1; + scodelem(cg, cdb, e2, retregs2, retregs1, false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, ty); + + reg_t Rn = findreg(retregs1); + reg_t Rm = findreg(retregs2); + + regm_t PSW = pretregs & mPSW; + switch (e.Eoper) + { + // ADDS/SUBS (extended register) + // http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#addsub_ext + case OPadd: + case OPmin: + { + uint sf = sz == 8; + uint op = e.Eoper == OPadd ? 0 : 1; + uint S = PSW != 0; + uint opt = 0; + uint option = tyToExtend(ty); + uint imm3 = 0; + cdb.gen1(INSTR.addsub_ext(sf, op, S, opt, Rm, option, imm3, Rn, Rd)); + PSW = 0; + pretregs &= ~mPSW; + break; + } + + // Logical (shifted register) + // http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#log_shift + case OPand: + case OPor: + case OPxor: + { + uint sf = sz == 8; + uint opc = e.Eoper == OPand ? 0 : + e.Eoper == OPor ? 1 : 2; + if (PSW && e.Eoper == OPand) + { + opc = 3; // ANDS + pretregs &= ~mPSW; + } + uint shift = 0; + uint N = 0; + uint imm6 = 0; + cdb.gen1(INSTR.log_shift(sf, opc, shift, N, Rm, imm6, Rn, Rd)); + break; + } + + default: + assert(0); + } + + pretregs = retregs | PSW; + fixresult(cdb,e,mask(Rd),pretregs); +} + +/************************************************* + * Convert from ty to type according to table: + * Params: + * ty = basic ty + * Output: + * + */ +Extend tyToExtend(tym_t ty) +{ + assert(tyintegral(ty)); + Extend extend; + const sz = tysize(ty); + with (Extend) switch (sz) + { + case 1: extend = UXTB; break; + case 2: extend = UXTH; break; + case 4: extend = UXTW; break; + case 8: extend = LSL; break; + default: + assert(0); + } + if (!tyuns(ty)) + extend = cast(Extend)(extend | 4); + return extend; +} + +/***************************** + * Handle multiply, OPmul + */ + +@trusted +void cdmul(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdmul(e = %p, pretregs = %s)\n",e,regm_str(pretregs)); + + elem* e1 = e.E1; + elem* e2 = e.E2; + if (pretregs == 0) // if don't want result + { + codelem(cg,cdb,e1,pretregs,false); // eval left leaf + pretregs = 0; // in case they got set + codelem(cg,cdb,e2,pretregs,false); + return; + } + + const ty = tybasic(e.Ety); + const ty1 = tybasic(e1.Ety); + const ty2 = tybasic(e2.Ety); + const sz = _tysize[ty]; + + if (tyfloating(ty1)) + { + assert(0); + } + + regm_t posregs = cg.allregs; + + regm_t retregs1 = posregs; + + codelem(cg, cdb, e1, retregs1, false); + regm_t retregs2 = cg.allregs & ~retregs1; + scodelem(cg, cdb, e2, retregs2, retregs1, false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, ty); + + reg_t Rn = findreg(retregs1); + reg_t Rm = findreg(retregs2); + + // http://www.scs.stanford.edu/~zyedidia/arm64/mul_madd.html + // madd Rd,Rn,Rm,Rzr + cdb.gen1(INSTR.madd(sz == 8, Rm, 31, Rn, Rd)); + + fixresult(cdb,e,retregs,pretregs); +} + +/***************************** + * Handle OPdiv, OPmod and OPremquo. + * Note that modulo isn't defined for doubles. + */ + +@trusted +void cddiv(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cddiv(e = %p, pretregs = %s)\n",e,regm_str(pretregs)); + + elem* e1 = e.E1; + elem* e2 = e.E2; + if (pretregs == 0) // if don't want result + { + codelem(cg,cdb,e1,pretregs,false); // eval left leaf + pretregs = 0; // in case they got set + codelem(cg,cdb,e2,pretregs,false); + return; + } + + const ty = tybasic(e.Ety); + const ty1 = tybasic(e1.Ety); + const ty2 = tybasic(e2.Ety); + const sz = _tysize[ty]; + const uns = tyuns(ty1) || tyuns(ty2); // 1 if unsigned operation, 0 if not + + if (tyfloating(ty1)) + { + assert(0); + } + + regm_t posregs = cg.allregs; + + regm_t retregs1 = posregs; + + codelem(cg, cdb, e1, retregs1, false); + regm_t retregs2 = cg.allregs & ~retregs1; + scodelem(cg, cdb, e2, retregs2, retregs1, false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) // if no return regs speced (i.e. flags only) + retregs = ALLREGS & posregs; // give us some + + reg_t Rdividend = findreg(retregs1); // dividend + reg_t Rdivisor = findreg(retregs2); // divisor + + reg_t Rquo; + reg_t Rmod; + final switch (e.Eoper) + { + case OPdiv: + Rquo = allocreg(cdb, retregs, ty); + break; + case OPmod: + { + regm_t regm = cg.allregs & ~(retregs1 | retregs2); + Rquo = allocreg(cdb, regm, ty); + assert(Rquo != Rdividend && Rquo != Rdivisor); + Rmod = allocreg(cdb, retregs, ty); + break; + } + case OPremquo: + { + regm_t regm = cg.allregs & ~(retregs1 | retregs2); + Rquo = allocreg(cdb, regm, ty); + assert(Rquo != Rdividend && Rquo != Rdivisor); + Rmod = findregmsw(regm); + assert(Rmod != Rquo); + break; + } + } + + // http://www.scs.stanford.edu/~zyedidia/arm64/sdiv.html + // http://www.scs.stanford.edu/~zyedidia/arm64/udiv.html + + bool sf = sz == 8; + + // DIV Rd, Rn, Rm + uint ins = INSTR.sdiv_udiv(sf, uns, Rdivisor, Rdividend, Rquo); + cdb.gen1(ins); + retregs = mask(Rquo); + + if (e.Eoper == OPmod || e.Eoper == OPremquo) + { + uint ins2 = INSTR.msub(sf, Rdivisor, Rdividend, Rquo, Rmod); + cdb.gen1(ins2); + if (e.Eoper == OPmod) + retregs = mask(Rmod); + else + // MSW = Modulo, LSW = Quotient + retregs = mask(Rquo) | mask(Rmod); + } + + fixresult(cdb,e,retregs,pretregs); +} + +/************************ + * Complement operator + */ + +@trusted +void cdcom(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdcom()\n"); + //elem_print(e); + if (pretregs == 0) + { + codelem(cgstate,cdb,e.E1,pretregs,false); + return; + } + const tyml = tybasic(e.E1.Ety); + const sz = _tysize[tyml]; + if (tyfloating(tyml)) + { + assert(0); + } + + const posregs = cgstate.allregs; + regm_t retregs1 = posregs; + codelem(cgstate,cdb,e.E1,retregs1,false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, tyml); + + const Rm = findreg(retregs1); + + /* MVN Rd, Rm{, shift #amount } + * https://www.scs.stanford.edu/~zyedidia/arm64/mvn_orn_log_shift.html + */ + uint sf = sz == 8; + cdb.gen1(INSTR.log_shift(sf, 1, 0, 1, Rm, 0, 31, Rd)); + + pretregs &= ~mPSW; // flags already set + fixresult(cdb,e,retregs,pretregs); +} + +/************************ + * Bswap operator + */ + +@trusted +void cdbswap(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdbswap()\n"); + //elem_print(e); + if (pretregs == 0) + { + codelem(cgstate,cdb,e.E1,pretregs,false); + return; + } + const tyml = tybasic(e.E1.Ety); + const sz = _tysize[tyml]; + if (tyfloating(tyml)) + { + assert(0); + } + + const posregs = cgstate.allregs; + regm_t retregs1 = posregs; + codelem(cgstate,cdb,e.E1,retregs1,false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, tyml); + + const Rn = findreg(retregs1); + + /* REV16/REV32/REV64 Rd,Rn + * https://www.scs.stanford.edu/~zyedidia/arm64/rev16_int.html + * https://www.scs.stanford.edu/~zyedidia/arm64/rev32_int.html + * https://www.scs.stanford.edu/~zyedidia/arm64/rev64_rev.html + */ + uint sf = sz >= 4; + uint S = 0; + uint opcode2 = 0; + uint opcode = sz == 2 ? 1 : sz == 4 ? 2 : 3; + cdb.gen1(INSTR.dp_1src(sf, S, opcode2, opcode, Rn, Rd)); + fixresult(cdb,e,retregs,pretregs); +} + +/************************* + * ?: operator + */ + +@trusted +void cdcond(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdcond(e = %p, pretregs = %s)\n",e,regm_str(pretregs)); + /* e1 ? e21 : e22 + */ + elem *e1 = e.E1; + elem *e2 = e.E2; + elem *e21 = e2.E1; + elem *e22 = e2.E2; + regm_t psw = pretregs & mPSW; /* save PSW bit */ + const op1 = e1.Eoper; + uint sz1 = tysize(e1.Ety); + COND jop = conditionCode(e1); + + COND jop1 = conditionCode(e21); + COND jop2 = conditionCode(e22); + + docommas(cdb,e1); + cgstate.stackclean++; + + if (0 && !OTrel(op1) && e1 == e21 && + sz1 <= REGSIZE && !tyfloating(e1.Ety)) + { // Recognize (e ? e : f) + + code *cnop1 = gen1(null, INSTR.nop); + regm_t retregs = pretregs | mPSW; + codelem(cgstate,cdb,e1,retregs,false); + + cse_flush(cdb,1); // flush CSEs to memory + genBranch(cdb,jop,FLcode,cast(block *)cnop1); + freenode(e21); + + const regconsave = cgstate.regcon; + const stackpushsave = cgstate.stackpush; + + retregs |= psw; + if (retregs & (mBP | ALLREGS)) + cgstate.regimmed_set(findreg(retregs),0); + codelem(cgstate,cdb,e22,retregs,false); + + andregcon(regconsave); + assert(stackpushsave == cgstate.stackpush); + + pretregs = retregs; + freenode(e2); + cdb.append(cnop1); + cgstate.stackclean--; + return; + } + + uint sz2; + if (0 && OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE && + !e1.Ecount && + (jop == COND.cs || jop == COND.cc) && + (sz2 = tysize(e2.Ety)) <= REGSIZE && + e21.Eoper == OPconst && + e22.Eoper == OPconst + ) + { + uint sz = tysize(e.Ety); + uint rex = (I64 && sz == 8) ? REX_W : 0; + uint grex = rex << 16; + + regm_t retregs; + targ_size_t v1,v2; + + if (sz2 != 1 || I64) + { + retregs = pretregs & (ALLREGS | mBP); + if (!retregs) + retregs = ALLREGS; + } + else + { + retregs = pretregs & BYTEREGS; + if (!retregs) + retregs = BYTEREGS; + } + + cg.cmp_flag = 1 | rex; + v1 = cast(targ_size_t)e21.Vllong; + v2 = cast(targ_size_t)e22.Vllong; + if (jop == JNC) + { v1 = v2; + v2 = cast(targ_size_t)e21.Vllong; + } + + opcode_t opcode = 0x81; + switch (sz2) + { case 1: opcode--; + v1 = cast(byte) v1; + v2 = cast(byte) v2; + break; + + case 2: v1 = cast(short) v1; + v2 = cast(short) v2; + break; + + case 4: v1 = cast(int) v1; + v2 = cast(int) v2; + break; + default: + break; + } + + if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1) + { + // only zero-extension from 32-bits is available for 'or' + } + else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2) + { + // only sign-extension from 32-bits is available for 'and' + } + else + { + codelem(cgstate,cdb,e1,retregs,false); + const reg = findreg(retregs); + + if (v1 == 0 && v2 == ~cast(targ_size_t)0) + { + cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg + if (I64 && sz2 == REGSIZE) + code_orrex(cdb.last(), REX_W); + if (I64 && sz2 == 1 && reg >= 4) + code_orrex(cdb.last(), REX); + } + else + { + v1 -= v2; + cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 + if (I64 && sz2 == 1 && reg >= 4) + code_orrex(cdb.last(), REX); + if (v2 == 1 && !I64) + cdb.gen1(0x40 + reg); // INC reg + else if (v2 == -1L && !I64) + cdb.gen1(0x48 + reg); // DEC reg + else + { cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 + if (I64 && sz2 == 1 && reg >= 4) + code_orrex(cdb.last(), REX); + } + } + + freenode(e21); + freenode(e22); + freenode(e2); + + fixresult(cdb,e,retregs,pretregs); + cgstate.stackclean--; + return; + } + } + + if (0 && op1 != OPcond && op1 != OPandand && op1 != OPoror && + op1 != OPnot && op1 != OPbool && + e21.Eoper == OPconst && + sz1 <= REGSIZE && + pretregs & (mBP | ALLREGS) && + tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety)) + { // Recognize (e ? c : f) + + code *cnop1 = gen1(null, INSTR.nop); + regm_t retregs = mPSW; + jop = conditionCode(e1); // get jmp condition + codelem(cgstate,cdb,e1,retregs,false); + + // Set the register with e21 without affecting the flags + retregs = pretregs & (ALLREGS | mBP); + if (retregs & ~cgstate.regcon.mvar) + retregs &= ~cgstate.regcon.mvar; // don't disturb register variables + // NOTE: see my email (sign extension bug? possible fix, some questions + const reg = regwithvalue(cdb,retregs,cast(targ_size_t)e21.Vllong, + tysize(e21.Ety) == 8 ? 64|8 : 8); + retregs = mask(reg); + + cse_flush(cdb,1); // flush CSE's to memory + genBranch(cdb,jop,FLcode,cast(block *)cnop1); + freenode(e21); + + const regconsave = cgstate.regcon; + const stackpushsave = cgstate.stackpush; + + codelem(cgstate,cdb,e22,retregs,false); + + andregcon(regconsave); + assert(stackpushsave == cgstate.stackpush); + + freenode(e2); + cdb.append(cnop1); + fixresult(cdb,e,retregs,pretregs); + cgstate.stackclean--; + return; + } + + code *cnop1 = gen1(null, INSTR.nop); + code *cnop2 = gen1(null, INSTR.nop); // dummy target addresses + logexp(cdb,e1,false,FLcode,cnop1); // evaluate condition + const regconold = cgstate.regcon; + const stackpushold = cgstate.stackpush; + regm_t retregs = pretregs; + CodeBuilder cdb1; + cdb1.ctor(); + if (psw && jop1 != COND.ne) + { + retregs &= ~mPSW; + if (!retregs) + retregs = ALLREGS; + codelem(cgstate,cdb1,e21,retregs,false); + fixresult(cdb1,e21,retregs,pretregs); + } + else + codelem(cgstate,cdb1,e21,retregs,false); + + if (CPP && e2.Eoper == OPcolon2) + { + code cs; + + // This is necessary so that any cleanup code on one branch + // is redone on the other branch. + cs.Iop = PSOP.mark2; + cs.Iflags = 0; + cs.Irex = 0; + cdb.gen(&cs); + cdb.append(cdb1); + cs.Iop = PSOP.release2; + cdb.gen(&cs); + } + else + cdb.append(cdb1); + + const regconsave = cgstate.regcon; + cgstate.regcon = cast()regconold; + + const stackpushsave = cgstate.stackpush; + cgstate.stackpush = stackpushold; + + retregs |= psw; // PSW bit may have been trashed + pretregs |= psw; + CodeBuilder cdb2; + cdb2.ctor(); + if (psw && jop2 != COND.ne) + { + retregs &= ~mPSW; + if (!retregs) + retregs = ALLREGS; + codelem(cgstate,cdb2,e22,retregs,false); + fixresult(cdb2,e22,retregs,pretregs); + } + else + codelem(cgstate,cdb2,e22,retregs,false); // use same regs as E1 + pretregs = retregs | psw; + andregcon(regconold); + andregcon(regconsave); + assert(cgstate.stackpush == stackpushsave); + freenode(e2); + genBranch(cdb,COND.al,FLcode,cast(block *) cnop2); + cdb.append(cnop1); + cdb.append(cdb2); + cdb.append(cnop2); + + cgstate.stackclean--; +} + +// cdcomma + +/********************************* + * Do && and || operators. + * Generate: + * (evaluate e1 and e2, if true goto cnop1) + * cnop3: NOP + * cg: [save reg code] ;if we must preserve reg + * CLR reg ;false result (set Z also) + * JMP cnop2 + * + * cnop1: NOP ;if e1 evaluates to true + * [save reg code] ;preserve reg + * + * MOV reg,1 ;true result + * or + * CLR reg ;if return result in flags + * INC reg + * + * cnop2: NOP ;mark end of code + */ + +@trusted +void cdloglog(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + /* We can trip the assert with the following: + * if ( (b<=a) ? (c=a ) + * We'll generate ugly code for it, but it's too obscure a case + * to expend much effort on it. + * assert(pretregs != mPSW); + */ + + //printf("cdloglog() pretregs: %s\n", regm_str(pretregs)); + cgstate.stackclean++; + code *cnop1 = gen1(null, INSTR.nop); + CodeBuilder cdb1; + cdb1.ctor(); + cdb1.append(cnop1); + code *cnop3 = gen1(null, INSTR.nop); + elem *e2 = e.E2; + (e.Eoper == OPoror) + ? logexp(cdb,e.E1,1,FLcode,cnop1) + : logexp(cdb,e.E1,0,FLcode,cnop3); + con_t regconsave = cgstate.regcon; + uint stackpushsave = cgstate.stackpush; + if (pretregs == 0) // if don't want result + { + int noreturn = !el_returns(e2); + codelem(cgstate,cdb,e2,pretregs,false); + if (noreturn) + { + regconsave.used |= cgstate.regcon.used; + cgstate.regcon = regconsave; + } + else + andregcon(regconsave); + assert(cgstate.stackpush == stackpushsave); + cdb.append(cnop3); + cdb.append(cdb1); // eval code, throw away result + cgstate.stackclean--; + return; + } + + if (tybasic(e2.Ety) == TYnoreturn) + { + regm_t retregs2 = 0; + codelem(cgstate,cdb, e2, retregs2, false); + regconsave.used |= cgstate.regcon.used; + cgstate.regcon = regconsave; + assert(cgstate.stackpush == stackpushsave); + + regm_t retregs = pretregs & (ALLREGS | mBP); + if (!retregs) + retregs = ALLREGS; // if mPSW only + + const reg = allocreg(cdb1,retregs,TYint); // allocate reg for result + movregconst(cdb1,reg,e.Eoper == OPoror,pretregs & mPSW); + cgstate.regcon.immed.mval &= ~mask(reg); // mark reg as unavail + pretregs = retregs; + + cdb.append(cnop3); + cdb.append(cdb1); // eval code, throw away result + cgstate.stackclean--; + return; + } + + code *cnop2 = gen1(null, INSTR.nop); + uint sz = tysize(e.Ety); + if (tybasic(e2.Ety) == TYbool && + sz == tysize(e2.Ety) && + !(pretregs & mPSW) && + e2.Eoper == OPcall) + { + codelem(cgstate,cdb,e2,pretregs,false); + + andregcon(regconsave); + + // stack depth should not change when evaluating E2 + assert(cgstate.stackpush == stackpushsave); + + assert(sz <= 4); // result better be int + regm_t retregs = pretregs & cgstate.allregs; + const reg = allocreg(cdb1,retregs,TYint); // allocate reg for result + movregconst(cdb1,reg,e.Eoper == OPoror,0); // reg = 1 + cgstate.regcon.immed.mval &= ~mask(reg); // mark reg as unavail + pretregs = retregs; + if (e.Eoper == OPoror) + { + cdb.append(cnop3); + genBranch(cdb,COND.al,FLcode,cast(block *) cnop2); // JMP cnop2 + cdb.append(cdb1); + cdb.append(cnop2); + } + else + { + genBranch(cdb,COND.al,FLcode,cast(block *) cnop2); // JMP cnop2 + cdb.append(cnop3); + cdb.append(cdb1); + cdb.append(cnop2); + } + cgstate.stackclean--; + return; + } + + logexp(cdb,e2,1,FLcode,cnop1); + andregcon(regconsave); + + // stack depth should not change when evaluating E2 + assert(cgstate.stackpush == stackpushsave); + + assert(sz <= 4); // result better be int + regm_t retregs = pretregs & (ALLREGS | mBP); + if (!retregs) + retregs = ALLREGS; // if mPSW only + CodeBuilder cdbcg; + cdbcg.ctor(); + const reg = allocreg(cdbcg,retregs,TYint); // allocate reg for result + code *cd = cdbcg.finish(); + for (code *c1 = cd; c1; c1 = code_next(c1)) // for each instruction + cdb1.gen(c1); // duplicate it + CodeBuilder cdbcg2; + cdbcg2.ctor(); + movregconst(cdbcg2,reg,0,pretregs & mPSW); // MOV reg,0 + cgstate.regcon.immed.mval &= ~mask(reg); // mark reg as unavail + genBranch(cdbcg2,COND.al,FLcode,cast(block *) cnop2); // JMP cnop2 + movregconst(cdb1,reg,1,pretregs & mPSW); // reg = 1 + cgstate.regcon.immed.mval &= ~mask(reg); // mark reg as unavail + pretregs = retregs; + cdb.append(cnop3); + cdb.append(cd); + cdb.append(cdbcg2); + cdb.append(cdb1); + cdb.append(cnop2); + cgstate.stackclean--; +} + + + +/********************* + * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror). + */ + +@trusted +void cdshift(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdshift()\n"); + + elem* e1 = e.E1; + elem* e2 = e.E2; + if (pretregs == 0) // if don't want result + { + codelem(cgstate,cdb,e1,pretregs,false); // eval left leaf + pretregs = 0; // in case they got set + codelem(cgstate,cdb,e.E2,pretregs,false); + return; + } + + tym_t tyml = tybasic(e1.Ety); + int sz = _tysize[tyml]; + assert(!tyfloating(tyml)); + + regm_t posregs = cg.allregs; + regm_t retregs1 = posregs; + codelem(cg, cdb, e1, retregs1, false); + regm_t retregs2 = cg.allregs & ~retregs1; + scodelem(cg, cdb, e2, retregs2, retregs1, false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, tyml); + + reg_t Rn = findreg(retregs1); + reg_t Rm = findreg(retregs2); + + /* https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#dp_2src + * https://www.scs.stanford.edu/~zyedidia/arm64/lsl_lslv.html + * https://www.scs.stanford.edu/~zyedidia/arm64/lsr_lsrv.html + * https://www.scs.stanford.edu/~zyedidia/arm64/asr_asrv.html + * https://www.scs.stanford.edu/~zyedidia/arm64/ror_rorv.html + */ + + uint sf = sz == 8; + uint S = 0; + uint opcode; + switch (e.Eoper) + { + case OPshl: opcode = 0x8; break; + case OPshr: opcode = 0x9; break; + case OPashr: opcode = 0xA; break; + case OPror: opcode = 0xB; break; + case OProl: assert(0); // should have rewritten (a rol b) as (a ror -b) + default: + assert(0); + } + cdb.gen1(INSTR.dp_2src(sf, S, Rm, opcode, Rn, Rd)); + + fixresult(cdb,e,retregs,pretregs); +} + +/*************************** + * Perform a 'star' reference (indirection). + */ + +@trusted +void cdind(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdind()\n"); + //elem_print(e); + if (pretregs == 0) + { + codelem(cgstate,cdb,e.E1,pretregs,false); + return; + } + const tym = tybasic(e.Ety); + const sz = _tysize[tym]; + if (tyfloating(tym)) + { + assert(0); + } + + const tym1 = tybasic(e.E1.Ety); + const sz1 = _tysize[tym1]; + const uns = tyuns(tym1) != 0; + + const posregs = cgstate.allregs; + regm_t retregs1 = posregs; + codelem(cgstate,cdb,e.E1,retregs1,false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rt = allocreg(cdb, retregs, tym); + + const Rn = findreg(retregs1); + + uint size; + uint VR = 0; + uint opc; + + uint decode(uint to, uint from, bool uns) { return to * 4 * 2 + from * 2 + uns; } + + switch (decode(4, sz, uns)) + { + /* + int = *byte ldrsb w0,[x1] 39C00020 + int = *ubyte ldrb w0,[x1] 39400020 + int = *short ldrsh w0,[x1] 79C00020 + int = *ushort ldrh w0,[x1] 79400020 + int = *int ldr w0,[x1] B9400020 + int = *uint ldr w0,[x1] B9400020 + + long = *byte ldrsb x0,[x1] 39800020 + long = *ubyte ldrb x0,[x1] 39400020 + long = *short ldrsh x0,[x1] 79800020 + long = *ushort ldrh x0,[x1] 79400020 + long = *int ldrsw x0,[x1] B9800020 + long = *uint ldr x0,[x1] B9400020 + long = *long ldr x0,[x1] B9400020 + */ + + case decode(4, 1, 0): size = 0; opc = 3; break; // ldrsb + case decode(4, 1, 1): size = 0; opc = 1; break; // ldrb + case decode(4, 2, 0): size = 1; opc = 2; break; // ldrsh + case decode(4, 2, 1): size = 1; opc = 1; break; // ldrh + case decode(4, 4, 0): + case decode(4, 4, 1): size = 2; opc = 1; break; // ldr 32 + + case decode(8, 1, 0): size = 0; opc = 2; break; // ldrsb + case decode(8, 1, 1): size = 0; opc = 1; break; // ldrb + case decode(8, 2, 0): size = 1; opc = 2; break; // ldrsh + case decode(8, 2, 1): size = 1; opc = 1; break; // ldrh + case decode(8, 4, 0): size = 2; opc = 2; break; // ldrsw + case decode(8, 4, 1): size = 2; opc = 1; break; // ldr 32 + + case decode(8, 8, 0): + case decode(8, 8, 1): size = 3; opc = 1; break; // ldr 64 + default: + printf("%d %d %d\n", sz, sz1, uns); + assert(0); + } + + uint imm12 = 0; + uint ins = (size << 30) | + (7 << 27) | + (VR << 26) | + (1 << 24) | + (opc << 22) | + (imm12 << 10) | + (Rn << 5) | + Rt; + cdb.gen1(ins); + + fixresult(cdb,e,retregs,pretregs); +} + + +// cdrelconst +/********************************* + * Load the offset portion of the address represented by e into + * reg. + */ + +@trusted +void getoffset(ref CGstate cg, ref CodeBuilder cdb,elem *e,reg_t reg) +{ + enum log = false; + if (log) printf("getoffset(e = %p, reg = %s)\n", e, regm_str(mask(reg))); + code cs = void; + cs.Iflags = 0; + ubyte rex = 0; + cs.Irex = rex; + assert(e.Eoper == OPvar || e.Eoper == OPrelconst); + auto fl = el_fl(e); + //printf("fl: %s\n", fl_str(fl)); + //symbol_print(*e.Vsym); + switch (fl) + { + case FLdatseg: + cs.IEV1.Vpointer = e.Vpointer; + goto L3; + + case FLtlsdata: + if (config.exe & EX_posix) + { + if (log) printf("posix threaded\n"); + uint ins = INSTR.systemmove(1,INSTR.tpidr_el0,reg); // MRS reg,tpidr_el0 + cdb.gen1(ins); + + ins = INSTR.addsub_imm(1,0,0,1,0,reg,reg); // ADD reg,reg,#0,lsl #12 + cdb.gencs1(ins,0,fl,e.Vsym); + + ins = INSTR.addsub_imm(1,0,0,0,0,reg,reg); // ADD reg,reg,#0 + cdb.gencs1(ins,0,fl,e.Vsym); + cdb.last.Iflags |= CFadd; + return; + } + assert(0); +static if (0) +{ + if (config.exe & EX_posix) + { + if (config.flags3 & CFG3pic) + { + if (I64) + { + /* Generate: + * LEA DI,s@TLSGD[RIP] + */ + //assert(reg == DI); + code css = void; + css.Irex = REX | REX_W; + css.Iop = LEA; + css.Irm = modregrm(0,reg,5); + if (reg & 8) + css.Irex |= REX_R; + css.Iflags = CFopsize; + css.IFL1 = cast(ubyte)fl; + css.IEV1.Vsym = e.Vsym; + css.IEV1.Voffset = e.Voffset; + cdb.gen(&css); + } + else + { + /* Generate: + * LEA EAX,s@TLSGD[1*EBX+0] + */ + assert(reg == AX); + load_localgot(cdb); + code css = void; + css.Iflags = 0; + css.Iop = LEA; // LEA + css.Irex = 0; + css.Irm = modregrm(0,AX,4); + css.Isib = modregrm(0,BX,5); + css.IFL1 = cast(ubyte)fl; + css.IEV1.Vsym = e.Vsym; + css.IEV1.Voffset = e.Voffset; + cdb.gen(&css); + } + return; + } + /* Generate: + * MOV reg,GS:[00000000] + * ADD reg, offset s@TLS_LE + * for locals, and for globals: + * MOV reg,GS:[00000000] + * ADD reg, s@TLS_IE + * note different fixup + */ + int stack = 0; + if (reg == STACK) + { regm_t retregs = ALLREGS; + + const regx = allocreg(cdb,retregs,TYoffset); + reg = findreg(retregs); + stack = 1; + } + + code css = void; + css.Irex = rex; + css.Iop = 0x8B; + css.Irm = modregrm(0, 0, BPRM); + code_newreg(&css, reg); + css.Iflags = CFgs; + css.IFL1 = FLconst; + css.IEV1.Vuns = 0; + cdb.gen(&css); // MOV reg,GS:[00000000] + + if (e.Vsym.Sclass == SC.static_ || e.Vsym.Sclass == SC.locstat) + { // ADD reg, offset s + cs.Irex = rex; + cs.Iop = 0x81; + cs.Irm = modregrm(3,0,reg & 7); + if (reg & 8) + cs.Irex |= REX_B; + cs.Iflags = CFoff; + cs.IFL1 = cast(ubyte)fl; + cs.IEV1.Vsym = e.Vsym; + cs.IEV1.Voffset = e.Voffset; + } + else + { // ADD reg, s + cs.Irex = rex; + cs.Iop = 0x03; + cs.Irm = modregrm(0,0,BPRM); + code_newreg(&cs, reg); + cs.Iflags = CFoff; + cs.IFL1 = cast(ubyte)fl; + cs.IEV1.Vsym = e.Vsym; + cs.IEV1.Voffset = e.Voffset; + } + cdb.gen(&cs); // ADD reg, xxxx + + if (stack) + { + cdb.genpush(reg); // PUSH reg + cdb.genadjesp(REGSIZE); + cgstate.stackchanged = 1; + } + } + else if (config.exe & EX_windos) + { + if (I64) + { + Lwin64: + assert(reg != STACK); + cs.IEV1.Vsym = e.Vsym; + cs.IEV1.Voffset = e.Voffset; + cs.Iop = 0xB8 + (reg & 7); // MOV Ereg,offset s + if (reg & 8) + cs.Irex |= REX_B; + cs.Iflags = CFoff; // want offset only + cs.IFL1 = cast(ubyte)fl; + cdb.gen(&cs); + break; + } + goto L4; + } + else + { + goto L4; + } +} + + case FLfunc: + fl = FLextern; /* don't want PC relative addresses */ + goto L4; + + case FLextern: + if (config.exe & EX_posix && e.Vsym.ty() & mTYthread) + { + if (log) printf("posix extern threaded\n"); + regm_t scratch = ALLREGS & ~mask(reg); + reg_t r = allocreg(cdb, scratch, TYoffset); + uint ins = INSTR.systemmove(1,INSTR.tpidr_el0,r); // MRS r,tpidr_el0 + cdb.gen1(ins); + + ins = INSTR.adr(1,0,reg); // ADRP reg,0 + cdb.gencs1(ins,0,fl,e.Vsym); + + ins = INSTR.ldr_imm_gen(1,reg,reg,0); // LDR reg,[reg] + cdb.gencs1(ins,0,fl,e.Vsym); + cdb.last.Iflags |= CFadd; + + cdb.gen1(INSTR.addsub_shift(1,0,0,0,reg,0,r,reg)); // ADD reg,r,reg + return; + } +// if (config.exe & EX_WIN64 && e.Vsym.ty() & mTYthread) +// goto Lwin64; + goto L4; + + case FLdata: + case FLudata: + case FLgot: + case FLgotoff: + case FLcsdata: + L4: + cs.IEV1.Vsym = e.Vsym; + cs.IEV1.Voffset = e.Voffset; + L3: + if (reg == STACK) + { cgstate.stackchanged = 1; + cs.Iop = 0x68; /* PUSH immed16 */ + cdb.genadjesp(REGSIZE); + } + else + { + uint ins = INSTR.adr(1,0,reg); // ADRP reg,0 + cdb.gencs1(ins,0,fl,e.Vsym); + + cs.Iop = INSTR.addsub_imm(1,0,0,0,0,reg,reg); // ADD reg,reg,#0 + cs.Iflags |= CFadd; + } + //cs.Iflags = CFoff; /* want offset only */ + cs.IFL1 = cast(ubyte)fl; + cdb.gen(&cs); + break; + + case FLreg: + /* Allow this since the tree optimizer puts & in front of */ + /* register doubles. */ + goto L2; + case FLauto: + case FLfast: + case FLbprel: + case FLfltreg: + cgstate.reflocal = true; + goto L2; + case FLpara: + cgstate.refparam = true; + L2: + if (reg == STACK) + { regm_t retregs = ALLREGS; + + const regx = allocreg(cdb,retregs,TYoffset); + reg = findreg(retregs); + loadea(cdb,e,cs,LEA,reg,0,0,0); // LEA reg,EA + if (I64) + code_orrex(cdb.last(), REX_W); + cdb.genpush(reg); // PUSH reg + cdb.genadjesp(REGSIZE); + cgstate.stackchanged = 1; + } + else + { + loadea(cdb,e,cs,LEA,reg,0,0,0); // LEA reg,EA + if (I64) + code_orrex(cdb.last(), REX_W); + } + break; + + default: + debug + { + elem_print(e); + printf("e.fl = %s\n", fl_str(el_fl(e))); + } + assert(0); + } +} + +/****************** + * OPneg, not OPsqrt OPsin OPcos OPrint + */ + +@trusted +void cdneg(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdneg()\n"); + //elem_print(e); + if (pretregs == 0) + { + codelem(cgstate,cdb,e.E1,pretregs,false); + return; + } + const tyml = tybasic(e.E1.Ety); + const sz = _tysize[tyml]; + if (tyfloating(tyml)) + { + assert(0); + } + + const posregs = cgstate.allregs; + regm_t retregs1 = posregs; + codelem(cgstate,cdb,e.E1,retregs1,false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, tyml); + + const Rm = findreg(retregs1); + + /* NEG https://www.scs.stanford.edu/~zyedidia/arm64/neg_sub_addsub_shift.html + * NEGS https://www.scs.stanford.edu/~zyedidia/arm64/negs_subs_addsub_shift.html + */ + + uint sf = sz == 8; + uint op = 1; + uint S = (pretregs & mPSW) != 0; + uint opt = 0; + uint shift = 0; + uint imm6 = 0; + uint ins = (sf << 31) | + (op << 30) | + (S << 29) | + (0xB << 24) | + (shift << 22) | + (0 << 21) | + (Rm << 16) | + (imm6 << 10) | + (0x1F << 5) | + Rd; + cdb.gen1(ins); + + pretregs &= ~mPSW; // flags already set + fixresult(cdb,e,retregs,pretregs); +} + +/****************** + * Absolute value operator, OPabs + */ + + +@trusted +void cdabs(ref CGstate cg, ref CodeBuilder cdb,elem *e, ref regm_t pretregs) +{ + //printf("cdabs(e = %p, pretregs = %s)\n", e, regm_str(pretregs)); + if (pretregs == 0) + { + codelem(cgstate,cdb,e.E1,pretregs,false); + return; + } + const tyml = tybasic(e.E1.Ety); + const sz = _tysize[tyml]; + if (tyfloating(tyml)) + { + assert(0); + } + + const posregs = cgstate.allregs; + regm_t retregs1 = posregs; + codelem(cgstate,cdb,e.E1,retregs1,false); + + regm_t retregs = pretregs & cg.allregs; + if (retregs == 0) /* if no return regs speced */ + /* (like if wanted flags only) */ + retregs = ALLREGS & posregs; // give us some + reg_t Rd = allocreg(cdb, retregs, tyml); + + const Rn = findreg(retregs1); + + /* CMP https://www.scs.stanford.edu/~zyedidia/arm64/cmp_subs_addsub_imm.html + * CMP Rn,0 + */ + + uint sf = sz == 8; + uint op = 1; + uint S = 1; + uint sh = 0; + uint imm12 = 0; + uint ins = (sf << 31) | + (op << 30) | + (S << 29) | + (0x22 << 23) | + (sh << 22) | + (imm12 << 10) | + (Rn << 5) | + 0x1F; + cdb.gen1(ins); + + /* CNEG https://www.scs.stanford.edu/~zyedidia/arm64/cneg_csneg.html + * CNEG Rd,Rn,lt + */ + op = 1; + S = 0; + uint Rm = Rn; + uint cond = 0xA ^ 1; // LT + uint o2 = 1; + ins = (sf << 31) | + (op << 30) | + (S << 29) | + (0xD4 << 21) | + (Rm << 16) | + (cond << 12) | + (0 << 11) | + (o2 << 10) | + (Rn << 5) | + Rd; + cdb.gen1(ins); + + pretregs &= ~mPSW; // flags already set + fixresult(cdb,e,retregs,pretregs); +} + +/************************** + * Post increment and post decrement. + * OPpostinc, OPpostdec + */ +@trusted +void cdpost(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + //printf("cdpost(pretregs = %s)\n", regm_str(pretregs)); + code cs = void; + const op = e.Eoper; // OPxxxx + if (pretregs == 0) // if nothing to return + { + cdaddass(cgstate,cdb,e,pretregs); + return; + } + const tym_t tyml = tybasic(e.E1.Ety); + const sz = _tysize[tyml]; + elem *e2 = e.E2; + + if (0 && tyfloating(tyml)) + { + if (config.fpxmmregs && tyxmmreg(tyml) && + !tycomplex(tyml) // SIMD code is not set up to deal with complex + ) + { + xmmpost(cdb,e,pretregs); + return; + } + } + if (0 && tyxmmreg(tyml)) + { + xmmpost(cdb,e,pretregs); + return; + } + + assert(e2.Eoper == OPconst); + regm_t possregs = cgstate.allregs; + getlvalue(cdb,cs,e.E1,0); + freenode(e.E1); + if (cs.reg && pretregs == mPSW) + { + gentstreg(cdb,cs.reg,sz == 8); // CMP cs.reg,#0 + + // If lvalue is a register variable, we must mark it as modified + getregs(cdb,cs.reg); + + const n = e2.Vint; + uint opx = OPpostinc ? 0 : 1; + uint ins = INSTR.addsub_imm(sz == 8,opx,0,0,n,cs.reg,cs.reg); // ADD/SUB cs.reg,cs.reg,n); + cdb.gen1(ins); + freenode(e2); + return; + } + else if (sz <= REGSIZE) + { + regm_t idxregs = mask(cs.base) | mask(cs.index); // mask of index regs used + regm_t retregs = possregs & ~idxregs & pretregs; + if (retregs == 0) + retregs = possregs & ~idxregs; + + const reg = allocreg(cdb,retregs,TYint); + + loadFromEA(cs,reg,sz == 8 ? 8 : 4,sz); + + cdb.gen(&cs); // MOV reg,EA + + if (pretregs & mPSW) + { + gentstreg(cdb,reg,sz == 8); // CMP reg,#0 + pretregs &= ~mPSW; + } + + /* If lvalue is a register variable, we must mark it as modified */ + getregs(cdb,reg); + + const n = e2.Vint; + uint opx = OPpostinc ? 0 : 1; + uint ins = INSTR.addsub_imm(sz == 8,opx,1,0,n,cs.reg,cs.reg); // ADD/SUB cs.reg,cs.reg,n); + cdb.gen1(ins); + + storeToEA(cs,reg,sz); + cdb.gen(&cs); // MOV EA,reg + + freenode(e2); + fixresult(cdb,e,retregs,pretregs); + return; + } + else if (0 && sz == 2 * REGSIZE) + { + /+ regm_t retregs = cgstate.allregs & ~idxregs & pretregs; + if ((retregs & mLSW) == 0) + retregs |= mLSW & ~idxregs; + if ((retregs & mMSW) == 0) + retregs |= ALLREGS & mMSW; + assert(retregs & mMSW && retregs & mLSW); + const reg = allocreg(cdb,retregs,tyml); + uint sreg = findreglsw(retregs); + cs.Iop = 0x8B; + cs.Irm |= modregrm(0,sreg,0); + cdb.gen(&cs); // MOV sreg,EA + NEWREG(cs.Irm,reg); + getlvalue_msw(cs); + cdb.gen(&cs); // MOV reg,EA+2 + cs.Iop = 0x81; + cs.Irm &= ~cast(int)modregrm(0,7,0); /* reg field = 0 for ADD */ + if (op == OPpostdec) + cs.Irm |= modregrm(0,5,0); /* SUB */ + getlvalue_lsw(cs); + cs.IFL2 = FLconst; + cs.IEV2.Vlong = e2.Vlong; + cdb.gen(&cs); // ADD/SUB EA,const + code_orflag(cdb.last(),CFpsw); + getlvalue_msw(cs); + cs.IEV2.Vlong = 0; + if (op == OPpostinc) + cs.Irm ^= modregrm(0,2,0); /* ADC */ + else + cs.Irm ^= modregrm(0,6,0); /* SBB */ + cs.IEV2.Vlong = cast(targ_long)(e2.Vullong >> (REGSIZE * 8)); + cdb.gen(&cs); // ADC/SBB EA,0 + freenode(e2); + fixresult(cdb,e,retregs,pretregs); + return; + +/ + } + else + { + assert(0); + } +} + +// cddctor +// cdddtor + +/***************************************** + */ + +@trusted +void cdhalt(ref CGstate cg, ref CodeBuilder cdb,elem *e,ref regm_t pretregs) +{ + assert(pretregs == 0); + + // https://www.scs.stanford.edu/~zyedidia/arm64/hlt.html + uint imm16 = 0; + uint ins = (0xD4 << 24) | + (2 << 21) | + (imm16 << 5) | + (0 << 1) | + 0; + cdb.gen1(ins); +} diff --git a/compiler/src/dmd/backend/arm/cod3.d b/compiler/src/dmd/backend/arm/cod3.d new file mode 100644 index 000000000000..4b3f3a723bb1 --- /dev/null +++ b/compiler/src/dmd/backend/arm/cod3.d @@ -0,0 +1,1455 @@ +/** + * Code generation 3 + * + * Includes: + * - generating a function prolog (pushing return address, loading paramters) + * - generating a function epilog (restoring registers, returning) + * - generation / peephole optimizations of jump / branch instructions + * + * Compiler implementation of the + * $(LINK2 https://www.dlang.org, D programming language). + * + * Copyright: Copyright (C) 1994-1998 by Symantec + * Copyright (C) 2000-2024 by The D Language Foundation, All Rights Reserved + * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) + * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/arm/cod3.d, backend/cod3.d) + * Documentation: https://dlang.org/phobos/dmd_backend_arm_cod3.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/arm/cod3.d + */ + +module dmd.backend.arm.cod3; + +import core.bitop; +import core.stdc.stdio; +import core.stdc.stdlib; +import core.stdc.string; + +import dmd.backend.barray; +import dmd.backend.cc; +import dmd.backend.cdef; +import dmd.backend.cgcse; +import dmd.backend.code; +import dmd.backend.x86.cgcod : disassemble; +import dmd.backend.x86.code_x86; +import dmd.backend.x86.cod3; +import dmd.backend.codebuilder; +import dmd.backend.dlist; +import dmd.backend.dvec; +import dmd.backend.melf; +import dmd.backend.mem; +import dmd.backend.el; +import dmd.backend.global; +import dmd.backend.obj; +import dmd.backend.oper; +import dmd.backend.rtlsym; +import dmd.backend.symtab; +import dmd.backend.ty; +import dmd.backend.type; +import dmd.backend.x86.xmm; + +import dmd.backend.arm.instr; + +nothrow: +@safe: + + +/************************************************* + * Generate code to save `reg` in `regsave` stack area. + * Params: + * regsave = register save areay on stack + * cdb = where to write generated code + * reg = register to save + * idx = set to location in regsave for use in REGSAVE_restore() + */ + +@trusted +void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx) +{ + if (!regsave.alignment) + regsave.alignment = REGSIZE; + idx = regsave.idx; + regsave.idx += REGSIZE; + + // STR reg, [BP, #idx] + code cs; + cs.reg = reg; + cs.base = cgstate.BP; + cs.index = NOREG; + cs.IFL1 = FLregsave; + cs.Iop = INSTR.str_imm_gen(1,reg,cs.base,idx); + cdb.gen(&cs); + + cgstate.reflocal = true; + if (regsave.idx > regsave.top) + regsave.top = regsave.idx; // keep high water mark +} + +/******************************* + * Restore `reg` from `regsave` area. + * Complement REGSAVE_save(). + */ + +@trusted +void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx) +{ + // LDR reg,[BP, #idx] + code cs; + cs.reg = reg; + cs.base = cgstate.BP; + cs.index = NOREG; + cs.IFL1 = FLregsave; + cs.Iop = INSTR.ldr_imm_gen(1,reg,cs.base,idx); + cdb.gen(&cs); +} + + +// https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html +bool isBranch(uint ins) { return (ins & ((0xFF << 24) | (1 << 4))) == ((0x54 << 24) | (0 << 4)); } + +enum MARS = true; + +// outswitab + +/* AArch64 condition codes + */ +enum COND : ubyte +{ + eq,ne,cs,cc,mi,pl,vs,vc, + hi,ls,ge,lt,gt,le,al,nv, +} + +/***************************** + * Equivalent to x86/cod3/jmpopcode + * Returns: a condition code relevant to the elem for a JMP true. + */ +@trusted +COND conditionCode(elem *e) +{ + //printf("conditionCode()\n"); elem_print(e); + + assert(e); + while (e.Eoper == OPcomma || + /* The OTleaf(e.E1.Eoper) is to line up with the case in cdeq() where */ + /* we decide if mPSW is passed on when evaluating E2 or not. */ + (e.Eoper == OPeq && OTleaf(e.E1.Eoper))) + { + e = e.E2; /* right operand determines it */ + } + + OPER op = e.Eoper; + tym_t tymx = tybasic(e.Ety); + + if (e.Ecount != e.Ecomsub) // comsubs just get Z bit set + { + return COND.ne; + } + if (!OTrel(op)) // not relational operator + { + if (op == OPu32_64) { e = e.E1; op = e.Eoper; } + if (op == OPu16_32) { e = e.E1; op = e.Eoper; } + if (op == OPu8_16) op = e.E1.Eoper; + return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? COND.cs : COND.ne; + } + + int zero; + if (e.E2.Eoper == OPconst) + zero = !boolres(e.E2); + else + zero = 0; + + const tym = e.E1.Ety; + int i; + if (tyfloating(tym)) + { + i = 0; + } + else if (tyuns(tym) || tyuns(e.E2.Ety)) + i = 1; + else if (tyintegral(tym) || typtr(tym)) + i = 0; + else + { + debug + elem_print(e); + printf("%s\n", tym_str(tym)); + assert(0); + } + + COND jp; + with (COND) + { + immutable COND[6][2][2] jops = + [ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */ + [ [ le, gt, lt, ge, eq, ne], [ le, gt, mi, pl, eq, ne] ], /* signed */ + [ [ ls, hi, cc, cs, eq, ne], [ ls, ne, nv, al, eq, ne] ], /* uint */ + ]; + + jp = jops[i][zero][op - OPle]; /* table starts with OPle */ + } + + /* Try to rewrite uint comparisons so they rely on just the Carry flag + */ + if (i == 1 && (jp == COND.hi || jp == COND.ls) && + (e.E2.Eoper != OPconst && e.E2.Eoper != OPrelconst)) + { + jp = (jp == COND.hi) ? COND.cs : COND.cc; + } + + //debug printf("%s i %d zero %d op x%x jp x%x\n",oper_str(op),i,zero,op,jp); + return jp; +} + + +// cod3_ptrchk +// cod3_useBP +// cse_simple +// gen_storecse +// gen_testcse +// gen_loadcse +// cdframeptr +// cdgot +// load_localgot +// obj_namestring +// genregs +// gentstreg +// genpush +// genpop +// genmovreg +// genmulimm +// genshift +// movregconst + +/************************** + * Generate a jump instruction. + */ + +@trusted +void genBranch(ref CodeBuilder cdb,COND cond,uint fltarg,block *targ) +{ + code cs; + cs.Iop = ((0x54 << 24) | cond); + cs.Iflags = 0; + cs.IFL1 = cast(ubyte)fltarg; // FLblock (or FLcode) + cs.IEV1.Vblock = targ; // target block (or code) + if (fltarg == FLcode) + (cast(code *)targ).Iflags |= CFtarg; + cdb.gen(&cs); +} + +// prolog_ifunc +// prolog_ifunc2 +// prolog_16bit_windows_farfunc +// prolog_frame +// prolog_stackalign +// prolog_frameadj +// prolog_frameadj2 +// prolog_setupalloca +// prolog_saveregs +// epilog_restoreregs +// prolog_genvarargs +// prolog_genva_start +// prolog_gen_win64_varargs +// prolog_namedArgs +// prolog_loadparams + +/******************************* + * Generate and return function epilog. + * Params: + * b = block that returns + * Output: + * cgstate.retsize Size of function epilog + */ + +@trusted +void epilog(block *b) +{ + enum log = false; + if (log) printf("arm.epilog()\n"); + code *cpopds; + reg_t reg; + reg_t regx; // register that's not a return reg + regm_t topop,regm; + targ_size_t xlocalsize = localsize; + + CodeBuilder cdbx; cdbx.ctor(); + tym_t tyf = funcsym_p.ty(); + tym_t tym = tybasic(tyf); + bool farfunc = tyfarfunc(tym) != 0; + if (!(b.Bflags & BFL.epilog)) // if no epilog code + goto Lret; // just generate RET + regx = (b.BC == BCret) ? AX : CX; + + cgstate.retsize = 0; + + if (tyf & mTYnaked) // if no prolog/epilog + return; + + if (config.flags & CFGtrace && + (!(config.flags4 & CFG4allcomdat) || + funcsym_p.Sclass == SC.comdat || + funcsym_p.Sclass == SC.global || + (config.flags2 & CFG2comdat && SymInline(funcsym_p)) + ) + ) + { + Symbol *s = getRtlsym(farfunc ? RTLSYM.TRACE_EPI_F : RTLSYM.TRACE_EPI_N); + makeitextern(s); + cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace + code_orflag(cdbx.last(),CFoff | CFselfrel); + useregs((ALLREGS | mBP | mES) & ~s.Sregsaved); + } + + if (cgstate.usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS)) + { + nteh_epilog(cdbx); + } + + cpopds = null; + + /* Pop all the general purpose registers saved on the stack + * by the prolog code. Remember to do them in the reverse + * order they were pushed. + */ + topop = fregsaved & ~cgstate.mfuncreg; +// epilog_restoreregs(cdbx, topop); // implement + + if (cgstate.usednteh & NTEHjmonitor) + { + regm_t retregs = 0; + if (b.BC == BCretexp) + retregs = regmask(b.Belem.Ety, tym); + nteh_monitor_epilog(cdbx,retregs); + xlocalsize += 8; + } + + if (cgstate.needframe || (xlocalsize && cgstate.hasframe)) + { + if (log) printf("epilog: needframe %d xlocalsize x%x hasframe %d\n", cgstate.needframe, cast(int)xlocalsize, cgstate.hasframe); + assert(cgstate.hasframe); + if (xlocalsize || cgstate.enforcealign) + { + if (config.flags2 & CFG2stomp) + { /* MOV ECX,0xBEAF + * L1: + * MOV [ESP],ECX + * ADD ESP,4 + * CMP EBP,ESP + * JNE L1 + * POP EBP + */ + /* Value should be: + * 1. != 0 (code checks for null pointers) + * 2. be odd (to mess up alignment) + * 3. fall in first 64K (likely marked as inaccessible) + * 4. be a value that stands out in the debugger + */ + assert(I32 || I64); + targ_size_t value = 0x0000BEAF; + reg_t regcx = CX; + cgstate.mfuncreg &= ~mask(regcx); + uint grex = I64 ? REX_W << 16 : 0; + cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value); // MOV regcx,value + cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx + code *c1 = cdbx.last(); + cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE); // ADD ESP,REGSIZE + genregs(cdbx,0x39,SP,BP); // CMP EBP,ESP + if (I64) + code_orrex(cdbx.last(),REX_W); + genjmp(cdbx,JNE,FLcode,cast(block *)c1); // JNE L1 + // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779) + cdbx.last().Iflags &= ~CFjmp16; + cdbx.gen1(0x58 + BP); // POP BP + } + else if (config.exe == EX_WIN64) + { // See https://msdn.microsoft.com/en-us/library/tawsa7cb%28v=vs.100%29.aspx + // LEA RSP,0[RBP] + cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0); + cdbx.gen1(0x58 + BP); // POP RBP + } + else + { + if (log) printf("epilog: mov sp,bp\n"); + cdbx.gen1(INSTR.ldstpair_post(2, 0, 1, cast(uint)(16 + localsize) / 8, 30, 31, 29)); // LDP x29,x30,[sp],#16 + localsize + } + } + else + { + if (log) printf("epilog: LDP\n"); + cdbx.gen1(INSTR.ldstpair_post(2, 0, 1, 16 / 8, 30, 31, 29)); // LDP x29,x30,[sp],#16 + } + } + else if (xlocalsize == REGSIZE) + { + if (log) printf("epilog: REGSIZE\n"); + cgstate.mfuncreg &= ~mask(regx); + cdbx.gen1(0x58 + regx); // POP regx + } + else if (xlocalsize) + { + if (log) printf("epilog: xlocalsize %d\n", cast(int)xlocalsize); + cod3_stackadj(cdbx, cast(int)-xlocalsize); + } + + if (b.BC == BCret || b.BC == BCretexp) + { +Lret: + if (log) printf("epilog: Lret\n"); + opcode_t op = INSTR.ret; + if (!typfunc(tym) || // if caller cleans the stack + config.exe == EX_WIN64 || + cgstate.Para.offset == 0) // or nothing pushed on the stack anyway + { + if (log) printf("epilog: RET\n"); + cdbx.gen1(INSTR.ret); // RET + } + else + { // Stack is always aligned on register size boundary + cgstate.Para.offset = (cgstate.Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); + if (cgstate.Para.offset >= 0x10000) + { + /* + POP REG + ADD ESP, Para.offset + JMP REG + */ + cdbx.gen1(0x58+regx); + cdbx.genc2(0x81, modregrm(3,0,SP), cgstate.Para.offset); + if (I64) + code_orrex(cdbx.last(), REX_W); + cdbx.genc2(0xFF, modregrm(3,4,regx), 0); + if (I64) + code_orrex(cdbx.last(), REX_W); + } + else + cdbx.genc2(op,0,cgstate.Para.offset); // RET Para.offset + } + } + + // If last instruction in ce is ADD SP,imm, and first instruction + // in c sets SP, we can dump the ADD. + CodeBuilder cdb; cdb.ctor(); + cdb.append(b.Bcode); + code *cr = cdb.last(); + code *c = cdbx.peek(); + + //pinholeopt(c, null); + cgstate.retsize += calcblksize(c); // compute size of function epilog + cdb.append(cdbx); + b.Bcode = cdb.finish(); +} + +// cod3_spoff +// gen_spill_reg +// cod3_thunk +// makeitextern + +/******************************* + * Replace JMPs in Bgotocode with JMP SHORTs whereever possible. + * This routine depends on FLcode jumps to only be forward + * referenced. + * BFL.jmpoptdone is set to true if nothing more can be done + * with this block. + * Input: + * flag !=0 means don't have correct Boffsets yet + * Returns: + * number of bytes saved + */ + +@trusted +int branch(block *bl,int flag) +{ + int bytesaved; + code* c,cn,ct; + targ_size_t offset,disp; + targ_size_t csize; + + if (!flag) + bl.Bflags |= BFL.jmpoptdone; // assume this will be all + c = bl.Bcode; + if (!c) + return 0; + //for (code* cx = c; cx; cx = code_next(cx)) printf("branch cx.Iop = x%08x\n", cx.Iop); + bytesaved = 0; + offset = bl.Boffset; /* offset of start of block */ + while (1) + { + csize = calccodsize(c); + cn = code_next(c); + uint op = c.Iop; + if (isBranch(op)) + { + L1: + switch (c.IFL1) + { + case FLblock: + if (flag) // no offsets yet, don't optimize + goto L3; + disp = c.IEV1.Vblock.Boffset - offset - csize; + + /* If this is a forward branch, and there is an aligned + * block intervening, it is possible that shrinking + * the jump instruction will cause it to be out of + * range of the target. This happens if the alignment + * prevents the target block from moving correspondingly + * closer. + */ + if (disp >= (1 << 22) - 5 && c.IEV1.Vblock.Boffset > offset) + { /* Look for intervening alignment + */ + for (block *b = bl.Bnext; b; b = b.Bnext) + { + if (b.Balign) + { + bl.Bflags = cast(BFL)(bl.Bflags & ~cast(uint)BFL.jmpoptdone); // some JMPs left + goto L3; + } + if (b == c.IEV1.Vblock) + break; + } + } + + break; + + case FLcode: + { + code *cr; + + disp = 0; + + ct = c.IEV1.Vcode; /* target of branch */ + assert(ct.Iflags & (CFtarg | CFtarg2)); + for (cr = cn; cr; cr = code_next(cr)) + { + if (cr == ct) + break; + disp += calccodsize(cr); + } + + if (!cr) + { // Didn't find it in forward search. Try backwards jump + int s = 0; + disp = 0; + for (cr = bl.Bcode; cr != cn; cr = code_next(cr)) + { + assert(cr != null); // must have found it + if (cr == ct) + s = 1; + if (s) + disp += calccodsize(cr); + } + } + + if (config.flags4 & CFG4optimized && !flag) + { + /* Propagate branch forward past junk */ + while (1) + { + if (ct.Iop == INSTR.nop || + ct.Iop == PSOP.linnum) + { + ct = code_next(ct); + if (!ct) + goto L2; + } + else + { + c.IEV1.Vcode = ct; + ct.Iflags |= CFtarg; + break; + } + } + + /* And eliminate jmps to jmps */ + if (isBranch(ct.Iop) && + ((op & 0x0F) == (ct.Iop & 0xF) || (ct.Iop & 0xF) == COND.al)) + { + c.IFL1 = ct.IFL1; + c.IEV1.Vcode = ct.IEV1.Vcode; + /*printf("eliminating branch\n");*/ + goto L1; + } + L2: + { } + } + } + break; + + default: + goto L3; + } + + if (disp == 0) // bra to next instruction + { + bytesaved += csize; + c.Iop = INSTR.nop; // del branch instruction + c.IEV1.Vcode = null; + c = cn; + if (!c) + break; + continue; + } + else if (0 && cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) && + cast(targ_size_t)cast(targ_schar)disp == disp) + { + if (op == JMP) + { + c.Iop = JMPS; // JMP SHORT + bytesaved += I16 ? 1 : 3; + } + else // else Jcond + { + c.Iflags &= ~CFjmp16; // a branch is ok + bytesaved += I16 ? 3 : 4; + + // Replace a cond jump around a call to a function that + // never returns with a cond jump to that function. + if (config.flags4 & CFG4optimized && + config.target_cpu >= TARGET_80386 && + disp == (I16 ? 3 : 5) && + cn && + cn.Iop == CALL && + cn.IFL1 == FLfunc && + cn.IEV1.Vsym.Sflags & SFLexit && + !(cn.Iflags & (CFtarg | CFtarg2)) + ) + { + cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81); + c.Iop = INSTR.nop; + c.IEV1.Vcode = null; + bytesaved++; + + // If nobody else points to ct, we can remove the CFtarg + if (flag && ct) + { + code *cx; + for (cx = bl.Bcode; 1; cx = code_next(cx)) + { + if (!cx) + { + ct.Iflags &= ~CFtarg; + break; + } + if (cx.IEV1.Vcode == ct) + break; + } + } + } + } + csize = calccodsize(c); + } + else + bl.Bflags = cast(BFL)(bl.Bflags & ~cast(uint)BFL.jmpoptdone); // some JMPs left + } +L3: + if (cn) + { + offset += csize; + c = cn; + } + else + break; + } + //printf("bytesaved = x%x\n",bytesaved); + return bytesaved; +} + + +/******************************* + * Set flags for register contents + * Params: + * cdb = code sink + * reg = register to test + * sf = true for 64 bits + */ +void gentstreg(ref CodeBuilder cdb, reg_t reg, uint sf) +{ + // CMP reg,#0 + cdb.gen1(INSTR.cmp_imm(sf, 0, 0, reg)); + code_orflag(cdb.last(),CFpsw); +} + + +/************************** + * Generate a MOV to,from register instruction. + * Smart enough to dump redundant register moves, and segment + * register moves. + */ + +@trusted +void genmovreg(ref CodeBuilder cdb, reg_t to, reg_t from, tym_t ty = TYMAX) +{ + // Only handles integer regs at the moment + uint sf = ty == TYMAX || _tysize[ty] == 8; + cdb.gen1(INSTR.mov_register(sf, from, to)); +} + + +/****************************** + * Move constant value into reg. + * Take advantage of existing values in registers. + * If flags & mPSW + * set flags based on result + * Else if flags & 8 + * do not disturb flags + * Else + * don't care about flags + * If flags & 1 then byte move + * If flags & 2 then short move (for I32 and I64) + * If flags & 4 then don't disturb unused portion of register + * If flags & 16 then reg is a byte register AL..BH + * If flags & 64 (0x40) then 64 bit move (I64 only) + * Params: + * cdb = code sink for generated output + * reg = target register + * value = value to move into register + * flags = options + */ + +@trusted +void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags) +{ + if (!(flags & 64)) + value &= 0xFFFF_FFFF; + //printf("movregconst(reg=%s, value= %lld (%llx), flags=%llx)\n", regm_str(mask(reg)), value, value, flags); + assert(!(flags & (4 | 16))); + + regm_t regm = cgstate.regcon.immed.mval & mask(reg); + targ_size_t regv = cgstate.regcon.immed.value[reg]; + + // If we already have the right value in the right register + if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64)) + { + if (flags & mPSW) + gentstreg(cdb,reg,(flags & 64) != 0); + return; + } + else if (flags & 64 && regm && regv == value) + { // Look at the full 64 bits + if (flags & mPSW) + gentstreg(cdb,reg,(flags & 64) != 0); + return; + } + else + { + + // See if another register has the right value + reg_t r = 0; + for (regm_t mreg = cgstate.regcon.immed.mval; mreg; mreg >>= 1) + { + if (mreg & 1 && cgstate.regcon.immed.value[r] == value) + { + genmovreg(cdb,reg,r); + goto done; + } + r++; + } + + uint sf = (flags & 64) != 0; + uint opc = 2; // MOVZ + uint hw = 0; + uint imm16 = value & 0xFFFF; + reg_t Rd = reg; + ulong value2 = value; + + // Look for shortcuts using ORR + // Either ORR for the whole thing, + // or ORR to OR set the high 32 bits same as the low 32 + // (not implemented) + + // Look for shortcuts using MOVN + if (sf) + { + if ((value & 0xFFFF_FFFF_FFFF_0000) == 0xFFFF_FFFF_FFFF_0000) + { + imm16 = ~imm16 & 0xFFFF; + opc = 0; // MOVN + value2 = 0; + } + else if ((value & 0xFFFF_FFFF_0000_FFFF) == 0xFFFF_FFFF_0000_FFFF) + { + imm16 = ~(value >> 16) & 0xFFFF; + opc = 0; // MOVN + value2 = 0; + hw = 1; + } + else if ((value & 0xFFFF_0000_FFFF_FFFF) == 0xFFFF_0000_FFFF_FFFF) + { + imm16 = ~(value >> 32) & 0xFFFF; + opc = 0; // MOVN + value2 = 0; + hw = 2; + } + else if ((value & 0x0000_FFFF_FFFF_FFFF) == 0x0000_FFFF_FFFF_FFFF) + { + imm16 = ~(value >> 48) & 0xFFFF; + opc = 0; // MOVN + value2 = 0; + hw = 3; + } + } + else + { + if ((value & 0xFFFF_0000) == 0xFFFF_0000) + { + imm16 = ~imm16 & 0xFFFF; + opc = 0; // MOVN + value2 = 0; + } + else if ((value & 0x0000_FFFF) == 0x0000_FFFF) + { + imm16 = ~(value >> 16) & 0xFFFF; + opc = 0; // MOVN + value2 = 0; + hw = 1; + } + } + + if ((value2 >> (hw * 16)) & 0xFFFF_FFFF_FFFF_0000) + { + // Check for ORR one instruction solution + uint N, immr, imms; + if (orr_solution(value2, N, immr, imms)) + { + // MOV Rd,#imm + // http://www.scs.stanford.edu/~zyedidia/arm64/mov_orr_log_imm.html + cdb.gen1(INSTR.log_imm(sf, 1, N, immr, imms, 31, Rd)); + goto done; + } + } + + while (1) + { + if (imm16 || value2 == 0) + { + cdb.gen1(INSTR.movewide(sf, opc, hw, imm16, Rd)); + opc = 3; // MOVK + } + value2 >>= 16; + if (!value2) + break; + imm16 = value2 & 0xFFFF; + ++hw; + } + } +done: + if (flags & mPSW) + gentstreg(cdb,reg,(flags & 64) != 0); +printf("set reg %d to %lld\n", reg, value); + cgstate.regimmed_set(reg,value); +} + +/********************************** + * See if we can do MOV (bitmask, immediate) out of value. + * Params: + * value = value to set register to + * N = N field + * immr = immr field + * imms = imms field + * Returns: + * true if we can do it, and set N, immr, imms + * References: + * . http://www.scs.stanford.edu/~zyedidia/arm64/mov_orr_log_imm.html + * . https://devblogs.microsoft.com/oldnewthing/20220802-00/?p=106927 + * . https://dinfuehr.github.io/blog/encoding-of-immediate-values-on-aarch64/ + * . https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a + */ +bool orr_solution(ulong value, out uint N, out uint immr, out uint imms) +{ + return false; +} + +/******************************************** + * Replace symbolic references with values + */ +@trusted +void assignaddrc(code *c) +{ + int sn; + Symbol *s; + ubyte rm; + uint sectionOff; + ulong offset; + reg_t Rn, Rt; + uint base = cgstate.EBPtoESP; + + for (; c; c = code_next(c)) + { + debug + { + if (0) + { printf("assignaddrc()\n"); + code_print(c); + } + if (code_next(c) && code_next(code_next(c)) == c) + assert(0); + } + + if ((c.Iop & PSOP.mask) == PSOP.root) + { + switch (c.Iop & PSOP.operator) + { + case PSOP.adjesp: + //printf("adjusting EBPtoESP (%d) by %ld\n",cgstate.EBPtoESP,cast(long)c.IEV1.Vint); + cgstate.EBPtoESP += c.IEV1.Vint; + c.Iop = INSTR.nop; + continue; + + case PSOP.fixesp: + //printf("fix ESP\n"); + if (cgstate.hasframe) + { + // LEA ESP,-EBPtoESP[EBP] + c.Iop = LEA; + if (c.Irm & 8) + c.Irex |= REX_R; + c.Irm = modregrm(2,SP,BP); + c.Iflags = CFoff; + c.IFL1 = FLconst; + c.IEV1.Vuns = -cgstate.EBPtoESP; + if (cgstate.enforcealign) + { + // AND ESP, -STACKALIGN + code *cn = code_calloc(); + cn.Iop = 0x81; + cn.Irm = modregrm(3, 4, SP); + cn.Iflags = CFoff; + cn.IFL2 = FLconst; + cn.IEV2.Vsize_t = -STACKALIGN; + if (I64) + c.Irex |= REX_W; + cn.next = c.next; + c.next = cn; + } + } + continue; + + case PSOP.frameptr: + // Convert to load of frame pointer + // c.Irm is the register to use + if (cgstate.hasframe && !cgstate.enforcealign) + { // MOV reg,EBP + c.Iop = 0x89; + if (c.Irm & 8) + c.Irex |= REX_B; + c.Irm = modregrm(3,BP,c.Irm & 7); + } + else + { // LEA reg,EBPtoESP[ESP] + c.Iop = LEA; + if (c.Irm & 8) + c.Irex |= REX_R; + c.Irm = modregrm(2,c.Irm & 7,4); + c.Isib = modregrm(0,4,SP); + c.Iflags = CFoff; + c.IFL1 = FLconst; + c.IEV1.Vuns = cgstate.EBPtoESP; + } + continue; + + case PSOP.ldr: + //printf("assignaddr: ldr\n"); + break; + + default: + continue; + } + } + + s = c.IEV1.Vsym; + uint sz = 8; + uint ins = c.Iop; +// if (c.IFL1 != FLunde) + { + printf("FL: %s ", fl_str(c.IFL1)); + disassemble(ins); + } + switch (c.IFL1) + { + case FLdata: + if (config.objfmt == OBJ_OMF && s.Sclass != SC.comdat && s.Sclass != SC.extern_) + { + c.IEV1.Vseg = s.Sseg; + c.IEV1.Vpointer += s.Soffset; + c.IFL1 = FLdatseg; + } + else + c.IFL1 = FLextern; + break; + + case FLudata: + if (config.objfmt == OBJ_OMF) + { + c.IEV1.Vseg = s.Sseg; + c.IEV1.Vpointer += s.Soffset; + c.IFL1 = FLdatseg; + } + else + c.IFL1 = FLextern; + break; + + case FLtlsdata: + if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) + c.IFL1 = FLextern; + break; + + case FLdatseg: + //c.IEV1.Vseg = DATA; + break; + + case FLfardata: + case FLcsdata: + case FLpseudo: + break; + + case FLstack: // for EE + //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n", + //s.Soffset,cgstate.EBPtoESP,base,c.IEV1.Vpointer); + c.IEV1.Vpointer += s.Soffset + cgstate.EBPtoESP - base - cgstate.EEStack.offset; + c.IFL1 = FLconst; + assert(0); //break; + + case FLreg: + if (Symbol_Sisdead(*s, cgstate.anyiasm)) + { + c.Iop = INSTR.nop; // remove references to it + break; + } + assert(field(ins,29,27) == 7 && field(ins,25,24) == 1); + Rt = cast(reg_t)field(ins,4,0); + Rn = s.Sreglsw; + //assert(!c.Voffset); // fix later + c.Iop = INSTR.mov_register(sz > 4, Rn, Rt); + c.IFL1 = FLconst; + break; + + case FLfast: + //printf("Fast.size: %d\n", cast(int)cgstate.Fast.size); + sectionOff = cast(uint)cgstate.Fast.size; + goto L1; + + case FLauto: + sectionOff = cast(uint)cgstate.Auto.size; + goto L1; + + case FLpara: + sectionOff = cast(uint)cgstate.Para.size - cgstate.BPoff; // cancel out add of BPoff + goto L1; + + L1: + if (Symbol_Sisdead(*s, cgstate.anyiasm)) + { + c.Iop = INSTR.nop; // remove references to it + break; + } + static if (1) + { + symbol_print(*s); + printf("c: %p, x%08x\n", c, c.Iop); + printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d, Voffset = %d\n", + s.Sident.ptr, cast(int)s.Soffset, cast(int)cgstate.Para.size, cast(int)cgstate.BPoff, + cast(int)cgstate.EBPtoESP, cast(int)c.IEV1.Voffset); + } + if (s.Sflags & SFLunambig) + c.Iflags |= CFunambig; + offset = c.IEV1.Voffset + s.Soffset + sectionOff + cgstate.BPoff; + sz = tysize(s.ty()); + goto L2; + + case FLfltreg: + offset = c.IEV1.Vpointer + cgstate.Foff + cgstate.BPoff; + c.Iflags |= CFunambig; + goto L2; + + case FLallocatmp: + offset = c.IEV1.Vpointer + cgstate.Alloca.offset + cgstate.BPoff; + assert(0); //goto L2; + + case FLfuncarg: + offset = c.IEV1.Vpointer + cgstate.funcarg.offset + cgstate.BPoff; + goto L2; + + case FLbprel: // at fixed offset from frame pointer (nteh only) + offset = c.IEV1.Vpointer + s.Soffset; + c.IFL1 = FLconst; + goto L2; + + case FLcs: // common subexpressions + sn = c.IEV1.Vuns; + if (!CSE.loaded(sn)) // if never loaded + { + c.Iop = INSTR.nop; + break; + } + offset = CSE.offset(sn) + cgstate.CSoff + cgstate.BPoff; + c.Iflags |= CFunambig; + goto L2; + + case FLregsave: + c.Iflags |= CFunambig; + offset = cgstate.regsave.off + cgstate.BPoff; + + L2: + offset = cast(int)offset; // sign extend + // Load/store register (unsigned immediate) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_pos + assert(field(ins,29,27) == 7); + uint opc = field(ins,23,22); + uint shift = field(ins,31,30); // 0:1 1:2 2:4 3:8 shift for imm12 + uint op24 = field(ins,25,24); +printf("offset: %lld localsize: %lld REGSIZE*2: %d\n", offset, localsize, REGSIZE*2); + if (cgstate.hasframe) + offset += REGSIZE * 2; + offset += localsize; + if (op24 == 1) + { + uint imm12 = field(ins,21,10); // unsigned 12 bits + offset += imm12 << shift; // add in imm + assert((offset & ((1 << shift) - 1)) == 0); // no misaligned access + imm12 = cast(uint)(offset >> shift); + assert(imm12 < 0x1000); + ins = setField(ins,21,10,imm12); + } + else if (op24 == 0) + { + if (opc == 2 && shift == 0) + shift = 4; + uint imm9 = field(ins,20,12); // signed 9 bits + imm9 += 0x100; // bias to being unsigned + offset += imm9 << shift; // add in imm9 + assert((offset & ((1 << shift) - 1)) == 0); // no misaligned access + imm9 = cast(uint)(offset >> shift); + assert(imm9 < 0x200); + imm9 = (imm9 - 0x100) & 0x1FF; + ins = setField(ins,20,12,imm9); + } + else + assert(0); + + Rn = cast(reg_t)field(ins,9,5); + Rt = cast(reg_t)field(ins,4,0); + if (!cgstate.hasframe || (cgstate.enforcealign && c.IFL1 != FLpara)) + { /* Convert to SP relative address instead of BP */ + assert(Rn == 29); // BP + offset += cgstate.EBPtoESP; // add difference in offset + ins = setField(ins,9,5,31); // set Rn to SP + } + c.Iop = ins; + + static if (0) + printf("is64(%d) offset(%d) = Fast.size(%d) + BPoff(%d) + EBPtoESP(%d)\n", + is64,imm12,cast(int)cgstate.Fast.size,cast(int)cgstate.BPoff,cast(int)cgstate.EBPtoESP); + + break; + + case FLndp: // no 87 FPU + assert(0); + + case FLoffset: + c.IFL1 = FLconst; + break; + + case FLlocalsize: // used by inline assembler + c.IEV1.Vpointer += localsize; + assert(0); // no inline assembler yet + + case FLconst: + case FLextern: + case FLfunc: + case FLcode: + case FLunde: + break; + + default: + printf("FL: %s\n", fl_str(c.IFL1)); + assert(0); + } + } +} + +/************************** + * Compute jump addresses for FLcode. + * Note: only works for forward referenced code. + * only direct jumps and branches are detected. + * LOOP instructions only work for backward refs. + */ +@trusted +void jmpaddr(code *c) +{ + code* ci,cn,ctarg,cstart; + targ_size_t ad; + + //printf("jmpaddr()\n"); + cstart = c; /* remember start of code */ + while (c) + { + const op = c.Iop; + if (isBranch(op)) // or CALL? + { + ci = code_next(c); + ctarg = c.IEV1.Vcode; /* target code */ + ad = 4; /* IP displacement */ + while (ci && ci != ctarg) + { + ad += calccodsize(ci); + ci = code_next(ci); + } + if (!ci) + goto Lbackjmp; // couldn't find it + c.Iop |= cast(uint)(ad >> 2) << 5; + c.IFL1 = FLunde; + } + if (op == LOOP && c.IFL1 == FLcode) /* backwards refs */ + { + Lbackjmp: + ctarg = c.IEV1.Vcode; + for (ci = cstart; ci != ctarg; ci = code_next(ci)) + if (!ci || ci == c) + assert(0); + ad = 4; /* - IP displacement */ + while (ci != c) + { + assert(ci); + ad += calccodsize(ci); + ci = code_next(ci); + } + c.Iop = cast(uint)(-(ad >> 2)) << 5; + c.IFL1 = FLunde; + } + c = code_next(c); + } +} + +/******************************* + * Calculate bl.Bsize. + */ + +uint calcblksize(code *c) +{ + uint size = 0; + for (; c; c = code_next(c)) + size += 4; + //printf("calcblksize(c = x%x) = %d\n", c, size); + return size; +} + +/***************************** + * Calculate and return code size of a code. + * Note that NOPs are sometimes used as markers, but are + * never output. LINNUMs are never output. + * Note: This routine must be fast. Profiling shows it is significant. + */ + +@trusted +uint calccodsize(code *c) +{ + if (c.Iop == INSTR.nop) + return 0; + return 4; +} + + +/************************** + * Convert instructions to object code and write them to objmod. + * Params: + * seg = code segment to write to, code starts at Offset(seg) + * c = list of instructions to write + * disasmBuf = if not null, then also write object code here + * framehandleroffset = offset of C++ frame handler + * Returns: + * offset of end of code emitted + */ + +@trusted +uint codout(int seg, code *c, Barray!ubyte* disasmBuf, ref targ_size_t framehandleroffset) +{ + code *cn; + uint flags; + + debug + if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg)); + + MiniCodeBuf ggen = void; + ggen.index = 0; + ggen.offset = cast(uint)Offset(seg); + ggen.seg = seg; + ggen.framehandleroffset = framehandleroffset; + ggen.disasmBuf = disasmBuf; + + for (; c; c = code_next(c)) + { + debug + { + if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); } + uint startOffset = ggen.getOffset(); + } + + opcode_t op = c.Iop; + //printf("codout: %08x\n", op); + if ((op & PSOP.mask) == PSOP.root) + { + switch (op) + { case PSOP.linnum: + /* put out line number stuff */ + objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset()); + break; + case PSOP.adjesp: + //printf("adjust ESP %ld\n", cast(long)c.IEV1.Vint); + break; + + default: + break; + } + + assert(calccodsize(c) == 0); + continue; + } + + switch (op) + { + case INSTR.nop: /* don't send them out */ + debug + assert(calccodsize(c) == 0); + continue; + + case ASM: + if (op != ASM) + break; + ggen.flush(); + if (c.Iflags == CFaddrsize) // kludge for DA inline asm + { + //do32bit(ggen, FLblockoff,c.IEV1,0,0); + assert(0); + } + else + { + ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes); + } + debug + assert(calccodsize(c) == c.IEV1.len); + + continue; + + default: + break; + } + flags = c.Iflags; + + // See if we need to flush (don't have room for largest code sequence) + if (ggen.available() < 4) + ggen.flush(); + + //printf("op: %08x\n", op); + //if ((op & 0xFC00_0000) == 0x9400_0000) // BL