Skip to content

Commit cc4ebd7

Browse files
committed
Lower GetElement on arm64 to the correct access sequence
1 parent b59816c commit cc4ebd7

File tree

3 files changed

+82
-133
lines changed

3 files changed

+82
-133
lines changed

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+11-94
Original file line numberDiff line numberDiff line change
@@ -1649,6 +1649,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
16491649
case NI_Vector128_GetElement:
16501650
{
16511651
assert(intrin.numOperands == 2);
1652+
assert(!intrin.op1->isContained());
1653+
1654+
assert(intrin.op2->OperIsConst());
1655+
assert(intrin.op2->isContained());
16521656

16531657
var_types simdType = Compiler::getSIMDTypeForSize(node->GetSimdSize());
16541658

@@ -1658,109 +1662,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
16581662
simdType = TYP_SIMD16;
16591663
}
16601664

1661-
if (!intrin.op2->OperIsConst())
1662-
{
1663-
assert(!intrin.op2->isContained());
1664-
1665-
emitAttr baseTypeSize = emitTypeSize(intrin.baseType);
1666-
unsigned baseTypeScale = genLog2(EA_SIZE_IN_BYTES(baseTypeSize));
1667-
1668-
regNumber baseReg;
1669-
regNumber indexReg = op2Reg;
1670-
1671-
// Optimize the case of op1 is in memory and trying to access i'th element.
1672-
if (!intrin.op1->isUsedFromReg())
1673-
{
1674-
assert(intrin.op1->isContained());
1675-
1676-
if (intrin.op1->OperIsLocal())
1677-
{
1678-
unsigned varNum = intrin.op1->AsLclVarCommon()->GetLclNum();
1679-
baseReg = internalRegisters.Extract(node);
1680-
1681-
// Load the address of varNum
1682-
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, varNum, 0);
1683-
}
1684-
else
1685-
{
1686-
// Require GT_IND addr to be not contained.
1687-
assert(intrin.op1->OperIs(GT_IND));
1688-
1689-
GenTree* addr = intrin.op1->AsIndir()->Addr();
1690-
assert(!addr->isContained());
1691-
baseReg = addr->GetRegNum();
1692-
}
1693-
}
1694-
else
1695-
{
1696-
unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
1697-
noway_assert(simdInitTempVarNum != BAD_VAR_NUM);
1698-
1699-
baseReg = internalRegisters.Extract(node);
1700-
1701-
// Load the address of simdInitTempVarNum
1702-
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, baseReg, simdInitTempVarNum, 0);
1703-
1704-
// Store the vector to simdInitTempVarNum
1705-
GetEmitter()->emitIns_R_R(INS_str, emitTypeSize(simdType), op1Reg, baseReg);
1706-
}
1707-
1708-
assert(genIsValidIntReg(indexReg));
1709-
assert(genIsValidIntReg(baseReg));
1710-
assert(baseReg != indexReg);
1665+
ssize_t ival = intrin.op2->AsIntCon()->IconValue();
17111666

1712-
// Load item at baseReg[index]
1713-
GetEmitter()->emitIns_R_R_R_Ext(ins_Load(intrin.baseType), baseTypeSize, targetReg, baseReg,
1714-
indexReg, INS_OPTS_LSL, baseTypeScale);
1715-
}
1716-
else if (!GetEmitter()->isValidVectorIndex(emitTypeSize(simdType), emitTypeSize(intrin.baseType),
1717-
intrin.op2->AsIntCon()->IconValue()))
1667+
if (!GetEmitter()->isValidVectorIndex(emitTypeSize(simdType), emitTypeSize(intrin.baseType), ival))
17181668
{
17191669
// We only need to generate code for the get if the index is valid
17201670
// If the index is invalid, previously generated for the range check will throw
1671+
break;
17211672
}
1722-
else if (!intrin.op1->isUsedFromReg())
1723-
{
1724-
assert(intrin.op1->isContained());
1725-
assert(intrin.op2->IsCnsIntOrI());
1726-
1727-
int offset = (int)intrin.op2->AsIntCon()->IconValue() * genTypeSize(intrin.baseType);
1728-
instruction ins = ins_Load(intrin.baseType);
1729-
1730-
assert(!intrin.op1->isUsedFromReg());
1731-
1732-
if (intrin.op1->OperIsLocal())
1733-
{
1734-
unsigned varNum = intrin.op1->AsLclVarCommon()->GetLclNum();
1735-
GetEmitter()->emitIns_R_S(ins, emitActualTypeSize(intrin.baseType), targetReg, varNum, offset);
1736-
}
1737-
else
1738-
{
1739-
assert(intrin.op1->OperIs(GT_IND));
1740-
1741-
GenTree* addr = intrin.op1->AsIndir()->Addr();
1742-
assert(!addr->isContained());
1743-
regNumber baseReg = addr->GetRegNum();
17441673

1745-
// ldr targetReg, [baseReg, #offset]
1746-
GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(intrin.baseType), targetReg, baseReg,
1747-
offset);
1748-
}
1749-
}
1750-
else
1674+
if ((varTypeIsFloating(intrin.baseType) && (targetReg == op1Reg) && (indexValue == 0)))
17511675
{
1752-
assert(intrin.op2->IsCnsIntOrI());
1753-
ssize_t indexValue = intrin.op2->AsIntCon()->IconValue();
1754-
17551676
// no-op if vector is float/double, targetReg == op1Reg and fetching for 0th index.
1756-
if ((varTypeIsFloating(intrin.baseType) && (targetReg == op1Reg) && (indexValue == 0)))
1757-
{
1758-
break;
1759-
}
1760-
1761-
GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, indexValue,
1762-
INS_OPTS_NONE);
1677+
break;
17631678
}
1679+
1680+
GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, ival, INS_OPTS_NONE);
17641681
break;
17651682
}
17661683

src/coreclr/jit/lowerarmarch.cpp

+70-17
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,72 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
12541254
return LowerHWIntrinsicDot(node);
12551255
}
12561256

1257+
case NI_Vector64_GetElement:
1258+
case NI_Vector128_GetElement:
1259+
{
1260+
GenTree* op1 = node->Op(1);
1261+
GenTree* op2 = node->Op(2);
1262+
1263+
unsigned simdSize = node->GetSimdSize();
1264+
var_types simdBaseType = node->GetSimdBaseType();
1265+
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
1266+
unsigned scale = genTypeSize(simdBaseType);
1267+
unsigned lclNum = BAD_VAR_NUM;
1268+
1269+
if (op1->IsLocal())
1270+
{
1271+
lclNum = op1->AsLclVarCommon()->GetLclNum();
1272+
}
1273+
else if (!op2->OperIsConst())
1274+
{
1275+
compiler->getSIMDInitTempVarNum(simdType);
1276+
lclNum = comp->lvaSIMDInitTempVarNum;
1277+
1278+
GenTree* storeLclVar = comp->gtNewStoreLclVarNode(lclNum, op1);
1279+
BlockRange().InsertBefore(node, storeLclVar);
1280+
LowerNode(storeLclVar);
1281+
1282+
op1 = nullptr;
1283+
}
1284+
1285+
if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))
1286+
{
1287+
if (lclNum != BAD_VAR_NUM)
1288+
{
1289+
if (op1 != nullptr)
1290+
{
1291+
BlockRange().Remove(op1);
1292+
}
1293+
1294+
op1 = comp->gtNewLclVarAddrNode(lclNum, simdBaseType);
1295+
BlockRange().InsertBefore(node, op1);
1296+
LowerNode(op1);
1297+
}
1298+
assert(op1->isMemoryOp());
1299+
1300+
GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(simdBaseType, op1, op2, scale, 0);
1301+
BlockRange().InsertBefore(node, addr);
1302+
LowerNode(addr);
1303+
1304+
GenTreeIndir* indir = comp->gtNewIndir(simdType, addr);
1305+
BlockRange().InsertBefore(node, indir);
1306+
1307+
LIR::Use use;
1308+
if (BlockRange().TryGetUse(node, &use))
1309+
{
1310+
use.ReplaceWith(indir);
1311+
}
1312+
else
1313+
{
1314+
indir->SetUnusedValue();
1315+
}
1316+
1317+
BlockRange().Remove(node);
1318+
return LowerNode(indir);
1319+
}
1320+
break;
1321+
}
1322+
12571323
case NI_Vector64_op_Equality:
12581324
case NI_Vector128_op_Equality:
12591325
{
@@ -3310,24 +3376,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
33103376
case NI_Vector64_GetElement:
33113377
case NI_Vector128_GetElement:
33123378
{
3313-
assert(varTypeIsIntegral(intrin.op2));
3379+
assert(!IsContainableMemoryOp(intrin.op1) || !IsSafeToContainMem(node, intrin.op1);
3380+
assert(intrin.op2->OperIsConst());
33143381

3315-
if (intrin.op2->IsCnsIntOrI())
3316-
{
3317-
MakeSrcContained(node, intrin.op2);
3318-
}
3319-
3320-
// TODO: Codegen isn't currently handling this correctly
3321-
//
3322-
// if (IsContainableMemoryOp(intrin.op1) && IsSafeToContainMem(node, intrin.op1))
3323-
// {
3324-
// MakeSrcContained(node, intrin.op1);
3325-
//
3326-
// if (intrin.op1->OperIs(GT_IND))
3327-
// {
3328-
// intrin.op1->AsIndir()->Addr()->ClearContained();
3329-
// }
3330-
// }
3382+
// Loading a constant index from register
3383+
MakeSrcContained(node, intrin.op2);
33313384
break;
33323385
}
33333386

src/coreclr/jit/lsraarm64.cpp

+1-22
Original file line numberDiff line numberDiff line change
@@ -1917,7 +1917,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19171917
srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(1));
19181918
}
19191919
}
1920-
19211920
else if (intrin.op2 != nullptr)
19221921
{
19231922
// RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg
@@ -1928,28 +1927,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
19281927
bool forceOp2DelayFree = false;
19291928
SingleTypeRegSet lowVectorCandidates = RBM_NONE;
19301929
size_t lowVectorOperandNum = 0;
1931-
if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement))
1932-
{
1933-
if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal()))
1934-
{
1935-
// If the index is not a constant and the object is not contained or is a local
1936-
// we will need a general purpose register to calculate the address
1937-
// internal register must not clobber input index
1938-
// TODO-Cleanup: An internal register will never clobber a source; this code actually
1939-
// ensures that the index (op2) doesn't interfere with the target.
1940-
buildInternalIntRegisterDefForNode(intrinsicTree);
1941-
forceOp2DelayFree = true;
1942-
}
19431930

1944-
if (!intrin.op2->IsCnsIntOrI() && !intrin.op1->isContained())
1945-
{
1946-
// If the index is not a constant or op1 is in register,
1947-
// we will use the SIMD temp location to store the vector.
1948-
var_types requiredSimdTempType = (intrin.id == NI_Vector64_GetElement) ? TYP_SIMD8 : TYP_SIMD16;
1949-
compiler->getSIMDInitTempVarNum(requiredSimdTempType);
1950-
}
1951-
}
1952-
else if (HWIntrinsicInfo::IsLowVectorOperation(intrin.id))
1931+
if (HWIntrinsicInfo::IsLowVectorOperation(intrin.id))
19531932
{
19541933
getLowVectorOperandAndCandidates(intrin, &lowVectorOperandNum, &lowVectorCandidates);
19551934
}

0 commit comments

Comments
 (0)