Skip to content

Commit

Permalink
ARM64: Use stlur for volatile stores (dotnet#91553)
Browse files Browse the repository at this point in the history
Co-authored-by: Bruce Forstall <brucefo@microsoft.com>
  • Loading branch information
EgorBo and BruceForstall authored Sep 8, 2023
1 parent a974719 commit f3c7893
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 66 deletions.
4 changes: 4 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#else
instruction genGetInsForOper(genTreeOps oper, var_types type);
#endif
instruction genGetVolatileLdStIns(instruction currentIns,
regNumber targetReg,
GenTreeIndir* indir,
bool* needsBarrier);
bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd);
regNumber getCallIndirectionCellReg(GenTreeCall* call);
Expand Down
18 changes: 3 additions & 15 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4207,22 +4207,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)

if (tree->IsVolatile())
{
bool addrIsInReg = addr->isUsedFromReg();
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);
bool needsBarrier = true;
ins = genGetVolatileLdStIns(ins, dataReg, tree, &needsBarrier);

if ((ins == INS_strb) && addrIsInReg)
{
ins = INS_stlrb;
}
else if ((ins == INS_strh) && addrIsInReg && addrIsAligned)
{
ins = INS_stlrh;
}
else if ((ins == INS_str) && genIsValidIntReg(dataReg) && addrIsInReg && addrIsAligned)
{
ins = INS_stlr;
}
else
if (needsBarrier)
{
// issue a full memory barrier before a volatile StInd
// Note: We cannot issue store barrier ishst because it is a weaker barrier.
Expand Down
181 changes: 130 additions & 51 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1728,6 +1728,134 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
genProduceReg(node);
}

//------------------------------------------------------------------------
// genGetVolatileLdStIns: Determine the most efficient instruction to perform a
// volatile load or store and whether an explicit barrier is required or not.
//
// Arguments:
// currentIns - the current instruction to perform load/store
// targetReg - the target register
// indir - the indirection node representing the volatile load/store
// needsBarrier - OUT parameter. Set to true if an explicit memory barrier is required.
//
// Return Value:
// instruction to perform the volatile load/store with.
//
instruction CodeGen::genGetVolatileLdStIns(instruction currentIns,
regNumber targetReg,
GenTreeIndir* indir,
bool* needsBarrier)
{
assert(indir->IsVolatile());

if (!genIsValidIntReg(targetReg))
{
// We don't have special instructions to perform volatile loads/stores for non-integer registers.
*needsBarrier = true;
return currentIns;
}

assert(!varTypeIsFloating(indir));
assert(!varTypeIsSIMD(indir));

#ifdef TARGET_ARM64
*needsBarrier = false;

if (indir->IsUnaligned() && (currentIns != INS_ldrb) && (currentIns != INS_strb))
{
// We have to use a normal load/store + explicit memory barrier
// to avoid unaligned access exceptions.
*needsBarrier = true;
return currentIns;
}

const bool addrIsInReg = indir->Addr()->isUsedFromReg();

// With RCPC2 (arm64 v8.4+) we can work with simple addressing modes like [reg + simm9]
const bool shouldUseRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2) && !addrIsInReg &&
indir->Addr()->OperIs(GT_LEA) && !indir->HasIndex() && (indir->Scale() == 1) &&
emitter::emitIns_valid_imm_for_unscaled_ldst_offset(indir->Offset());

if (shouldUseRcpc2)
{
assert(!addrIsInReg);
switch (currentIns)
{
// Loads

case INS_ldrb:
return INS_ldapurb;

case INS_ldrh:
return INS_ldapurh;

case INS_ldr:
return INS_ldapur;

// Stores

case INS_strb:
return INS_stlurb;

case INS_strh:
return INS_stlurh;

case INS_str:
return INS_stlur;

default:
*needsBarrier = true;
return currentIns;
}
}

// Only RCPC2 (arm64 v8.4+) allows us to deal with contained addresses.
// In other cases we'll have to emit a normal load/store + explicit memory barrier.
// It means that lower should generally avoid generating contained addresses for volatile loads/stores
// so we can use cheaper instructions.
if (!addrIsInReg)
{
*needsBarrier = true;
return currentIns;
}

// RCPC1 (arm64 v8.3+) offers a bit more relaxed memory ordering than ldar. Which is sufficient for
// .NET memory model's requirements, see https://github.com/dotnet/runtime/issues/67374
const bool hasRcpc1 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);
switch (currentIns)
{
// Loads

case INS_ldrb:
return hasRcpc1 ? INS_ldaprb : INS_ldarb;

case INS_ldrh:
return hasRcpc1 ? INS_ldaprh : INS_ldarh;

case INS_ldr:
return hasRcpc1 ? INS_ldapr : INS_ldar;

// Stores

case INS_strb:
return INS_stlrb;

case INS_strh:
return INS_stlrh;

case INS_str:
return INS_stlr;

default:
*needsBarrier = true;
return currentIns;
}
#else // TARGET_ARM64
*needsBarrier = true;
return currentIns;
#endif // !TARGET_ARM64
}

//------------------------------------------------------------------------
// genCodeForIndir: Produce code for a GT_IND node.
//
Expand Down Expand Up @@ -1755,58 +1883,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)

bool emitBarrier = false;

if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
if (tree->IsVolatile())
{
#ifdef TARGET_ARM64
bool addrIsInReg = tree->Addr()->isUsedFromReg();
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);

// On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid
// full memory barriers if mixed with STLR
bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);

// On arm64-v8.4+ we can use ldapur* instructions with acquire/release semantics to
// avoid full memory barriers if address is contained and unscaled
bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2);

bool handledWithLdapur = false;
if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && !tree->HasIndex() && (tree->Scale() == 1) &&
emitter::emitIns_valid_imm_for_unscaled_ldst_offset(tree->Offset()))
{
if (ins == INS_ldrb)
{
ins = INS_ldapurb;
handledWithLdapur = true;
}
else if ((ins == INS_ldrh) && addrIsAligned)
{
ins = INS_ldapurh;
handledWithLdapur = true;
}
else if ((ins == INS_ldr) && addrIsAligned && genIsValidIntReg(targetReg))
{
ins = INS_ldapur;
handledWithLdapur = true;
}
}

if ((ins == INS_ldrb) && addrIsInReg)
{
ins = hasRcpc ? INS_ldaprb : INS_ldarb;
}
else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned)
{
ins = hasRcpc ? INS_ldaprh : INS_ldarh;
}
else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg))
{
ins = hasRcpc ? INS_ldapr : INS_ldar;
}
else if (!handledWithLdapur)
#endif // TARGET_ARM64
{
emitBarrier = true;
}
ins = genGetVolatileLdStIns(ins, targetReg, tree, &emitBarrier);
}

GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree);
Expand Down

0 comments on commit f3c7893

Please sign in to comment.