Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64: Implement LoadAndInsertScalar APIs #93197

Merged
merged 48 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
785b198
Add APIs for LoadVector*x2
kunalspathak Sep 28, 2023
670c61e
Add implementation for LoadVector*x2
kunalspathak Sep 29, 2023
a396f9b
Add APIs for LoadVector*x3
kunalspathak Sep 29, 2023
da986ba
Add implementation for LoadVector*x3
kunalspathak Sep 29, 2023
ccba48e
Add APIs for LoadVector*x4
kunalspathak Sep 29, 2023
87ec4c0
Add implementation for LoadVector*x4
kunalspathak Sep 29, 2023
6fed5cf
Add test cases for LoadVectorx2, LoadVectorx3, LoadVectorx4
kunalspathak Sep 30, 2023
66d893c
Merge remote-tracking branch 'origin/main' into ld2
kunalspathak Oct 1, 2023
b5dca03
minor rename
kunalspathak Oct 1, 2023
92fb279
REVERT: Add Debug.Assert(false) to make sure test runs
kunalspathak Oct 2, 2023
8c8a186
Retain gtOtherReg rather than making it an array
kunalspathak Oct 2, 2023
9582068
Add APIs for LoadAndReplicateToVector64x* and LoadAndReplicateToVecto…
kunalspathak Oct 2, 2023
2bea97f
Revert "REVERT: Add Debug.Assert(false) to make sure test runs"
kunalspathak Oct 2, 2023
6a0a426
fix the test template
kunalspathak Oct 2, 2023
d1c38bc
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 2, 2023
873df44
Implement LoadAndReplicateToVector* APIs
kunalspathak Oct 2, 2023
7cf45d6
Add test coverage for LoadAndReplicateToVector* APIs
kunalspathak Oct 2, 2023
1e7629f
fix the LoadVectorx4 template
kunalspathak Oct 2, 2023
6929501
address review comment
kunalspathak Oct 2, 2023
ad60b30
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 2, 2023
fb80174
Add APIs for LoadAndInsertScalar()
kunalspathak Oct 2, 2023
6b89465
fix one more error in LoadVectorx4Test.template
kunalspathak Oct 2, 2023
90b1041
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 2, 2023
910a64b
Add APIs for LoadAndInsertScalar()
kunalspathak Oct 3, 2023
8f62949
Fix the API definition
kunalspathak Oct 5, 2023
12a75a2
wip: Implementation
kunalspathak Oct 5, 2023
13b1ecb
feedback by Bruce
kunalspathak Oct 5, 2023
b52a029
Rename the test case name
kunalspathak Oct 5, 2023
b49ebcd
Disable test for mono
kunalspathak Oct 5, 2023
4c78408
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 5, 2023
2c3540e
Fix the errors to make it work
kunalspathak Oct 5, 2023
fd2946c
Merge remote-tracking branch 'origin/main' into loadandreplicate
kunalspathak Oct 5, 2023
6c0da62
fix merge conflicts
kunalspathak Oct 5, 2023
629cf96
fix the typo in test case
kunalspathak Oct 5, 2023
f7c966b
Merge branch 'loadandreplicate' into loadandinsertscalar
kunalspathak Oct 6, 2023
19d4ae3
Merge remote-tracking branch 'origin/main' into loadandinsertscalar
kunalspathak Oct 6, 2023
a0eb7cd
code cleanup
kunalspathak Oct 6, 2023
ba21188
fix the importing of normal LoadAndInsertScalar
kunalspathak Oct 6, 2023
0d8f668
Fix some more importing and lsra
kunalspathak Oct 6, 2023
c2bdb82
fix the lsra issues
kunalspathak Oct 6, 2023
690dc69
Add test for LoadAndInsertScalarx2
kunalspathak Oct 9, 2023
3617009
Add test cases for LoadAndInsertScalarx2 and LoadAndInsertScalarx3
kunalspathak Oct 9, 2023
480fbc3
jit format
kunalspathak Oct 9, 2023
3aadc2f
fix bug
kunalspathak Oct 9, 2023
ce4a5e0
fix test build errors
kunalspathak Oct 9, 2023
20ae72e
fix the test errors
kunalspathak Oct 10, 2023
513c909
fix typos in x3 and x4
kunalspathak Oct 10, 2023
8288942
address feedback from Bruce
kunalspathak Oct 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2814,6 +2814,7 @@ class Compiler

#ifdef TARGET_ARM64
GenTreeFieldList* gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount);
GenTreeFieldList* gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd);
#endif
#endif // FEATURE_HW_INTRINSICS

Expand Down
50 changes: 50 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25046,6 +25046,43 @@ GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fi
}
return fieldList;
}

//------------------------------------------------------------------------
// gtConvertParamOpToFieldList: Convert a operand that represents tuple of struct into
// field list, where each field represents a struct in the tuple.
//
// Arguments:
// op -- Operand to convert.
// fieldCount -- Number of fields or rows present.
// clsHnd -- Class handle of the tuple.
//
// Return Value:
// The GenTreeFieldList node.
//
GenTreeFieldList* Compiler::gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd)
{
LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar());
unsigned lclNum = lvaGetLclNum(opVarDsc);
unsigned fieldSize = opVarDsc->lvSize() / fieldCount;
GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList();
int offset = 0;
unsigned sizeBytes = 0;
CORINFO_CLASS_HANDLE structType;

for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++)
{
CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(clsHnd, fieldId);
JitType2PreciseVarType(info.compCompHnd->getFieldType(fieldHandle, &structType));
getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes);
var_types simdType = getSIMDTypeForSize(sizeBytes);

GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, simdType, offset);
fieldList->AddField(this, fldNode, offset, simdType);

offset += fieldSize;
}
return fieldList;
}
#endif // TARGET_ARM64

GenTree* Compiler::gtNewSimdWithLowerNode(
Expand Down Expand Up @@ -25195,6 +25232,13 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const

#ifdef TARGET_ARM64
case NI_AdvSimd_LoadAndInsertScalar:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:

addr = Op(3);
break;
#endif // TARGET_ARM64
Expand Down Expand Up @@ -25555,6 +25599,7 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
return compiler->typGetBlkLayout(16);

Expand All @@ -25564,17 +25609,22 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
return compiler->typGetBlkLayout(32);

case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
return compiler->typGetBlkLayout(24);

case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
return compiler->typGetBlkLayout(48);

case NI_AdvSimd_Arm64_LoadVector128x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
return compiler->typGetBlkLayout(64);

Expand Down
50 changes: 48 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,9 +1071,53 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
// We don't have generic multireg APIs
assert(sizeBytes == 0);
}

#ifdef TARGET_ARM64
else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar))
{
CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF;
var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType);

if (retFieldType == TYP_STRUCT)
{
CORINFO_CLASS_HANDLE structType;
unsigned int sizeBytes = 0;

// LoadAndInsertScalar that returns 2,3 or 4 vectors
assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF);
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass);
assert(fieldCount > 1);
CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0);
CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType);
simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes);
switch (fieldCount)
{
case 2:
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2;
break;
case 3:
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3;
break;
case 4:
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4;
break;
default:
assert("unsupported");
}
}
else
{
assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16));
assert(isSupportedBaseType(intrinsic, simdBaseJitType));
retType = getSIMDTypeForSize(sizeBytes);
}
}
#endif
else
{
// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
Expand Down Expand Up @@ -1130,7 +1174,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

#ifdef TARGET_ARM64
if ((intrinsic == NI_AdvSimd_Insert) || (intrinsic == NI_AdvSimd_InsertScalar) ||
(intrinsic == NI_AdvSimd_LoadAndInsertScalar))
((intrinsic >= NI_AdvSimd_LoadAndInsertScalar) && (intrinsic <= NI_AdvSimd_LoadAndInsertScalarVector64x4)) ||
((intrinsic >= NI_AdvSimd_Arm64_LoadAndInsertScalar) &&
(intrinsic <= NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4)))
{
assert(sig->numArgs == 3);
immOp = impStackTop(1).val;
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,18 +770,24 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
return 2;

case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
return 3;

case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_Arm64_LoadVector128x4:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
return 4;
Expand Down
57 changes: 57 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ void HWIntrinsicInfo::lookupImmBounds(
case NI_AdvSimd_Insert:
case NI_AdvSimd_InsertScalar:
case NI_AdvSimd_LoadAndInsertScalar:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
case NI_AdvSimd_Arm64_InsertSelectedScalar:
Expand Down Expand Up @@ -1916,6 +1922,57 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
break;
}
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
{
assert(sig->numArgs == 3);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
op3 = getArgForHWIntrinsic(argType, argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = impPopStack().val;

if (op3->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op3->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op3 = op3->gtGetOp1();
}
}

assert(HWIntrinsicInfo::IsMultiReg(intrinsic));
assert(op1->TypeGet() == TYP_STRUCT);

info.compNeedsConsecutiveRegisters = true;
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);

if (!op1->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("LoadAndInsertScalar temp tree"));

impStoreTemp(tmp, op1, CHECK_SPILL_NONE);
op1 = gtNewLclvNode(tmp, argType);
}

op1 = gtConvertParamOpToFieldList(op1, fieldCount, argClass);
op1 = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
break;
}
case NI_AdvSimd_VectorTableLookup:
case NI_AdvSimd_Arm64_VectorTableLookup:
{
Expand Down
46 changes: 46 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,52 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
break;

case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
{
assert(isRMW);
unsigned fieldIdx = 0;
op2Reg = intrin.op2->GetRegNum();
op3Reg = intrin.op3->GetRegNum();
assert(intrin.op1->OperIsFieldList());

GenTreeFieldList* fieldList = intrin.op1->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op1Reg = firstField->GetRegNum();

regNumber targetFieldReg = REG_NA;
regNumber op1FieldReg = REG_NA;

for (GenTreeFieldList::Use& use : fieldList->Uses())
{
GenTree* fieldNode = use.GetNode();

targetFieldReg = node->GetRegByIndex(fieldIdx);
op1FieldReg = fieldNode->GetRegNum();

if (targetFieldReg != op1FieldReg)
{
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(fieldNode), targetFieldReg, op1FieldReg,
/* canSkip */ true);
}
fieldIdx++;
}

HWIntrinsicImmOpHelper helper(this, intrin.op2, node);

for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const int elementIndex = helper.ImmValue();

GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op3Reg, elementIndex);
}

break;
}
case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
Expand Down
Loading
Loading