Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3471,6 +3471,30 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Sve2_AddSaturate:
{
assert(sig->numArgs == 2);
assert(retType != TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;

var_types argType1 = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
CorInfoType op1BaseJitType = getBaseJitTypeOfSIMDType(argClass);
var_types argType2 = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
CorInfoType op2BaseJitType = getBaseJitTypeOfSIMDType(argClass);
assert(JitType2PreciseVarType(op1BaseJitType) == simdBaseType);

op2 = impPopStack().val;
op1 = impPopStack().val;

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize);
retNode->AsHWIntrinsic()->SetSimdBaseType(simdBaseType);
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(op2BaseJitType);
break;
}

default:
{
return nullptr;
Expand Down
39 changes: 38 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
bool hasImmShift = (intrinEmbMask.category == HW_Category_ShiftLeftByImmediate ||
intrinEmbMask.category == HW_Category_ShiftRightByImmediate) &&
HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id);
bool hasOptionalEmbMask = HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id);

insOpts embOpt = opt;
switch (intrinEmbMask.id)
Expand Down Expand Up @@ -722,6 +723,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
assert(intrin.op3->IsVectorZero());
break;

case NI_Sve2_AddSaturate:
{
var_types baseType = op2->AsHWIntrinsic()->GetSimdBaseType();
var_types auxType = op2->AsHWIntrinsic()->GetAuxiliaryType();
if (baseType != auxType)
{
insEmbMask = (varTypeIsUnsigned(baseType)) ? INS_sve_usqadd : INS_sve_suqadd;
// SUQADD and USQADD must be predicated.
hasOptionalEmbMask = false;
}
else
{
// SQADD and UQADD can be unpredicated.
hasOptionalEmbMask = true;
}
break;
}

case NI_Sve2_ConvertToSingleOdd:
case NI_Sve2_ConvertToSingleOddRoundToOdd:
embOpt = INS_OPTS_D_TO_S;
Expand Down Expand Up @@ -788,6 +807,24 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
break;

case NI_Sve2_AddSaturate:
assert((targetReg == op1Reg) || (targetReg != embMaskOp2Reg) ||
genIsSameLocalVar(intrinEmbMask.op1, intrinEmbMask.op2));

if (hasOptionalEmbMask && intrin.op1->IsTrueMask(intrinEmbMask.baseType))
{
// Use unpredicated SQADD/UQADD if the mask is all-true.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg,
embMaskOp2Reg, embOpt, sopt);
}
else
{
GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, maskReg,
embMaskOp1Reg, opt);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
}
break;

case NI_Sve2_AddPairwise:
case NI_Sve2_MaxNumberPairwise:
case NI_Sve2_MaxPairwise:
Expand Down Expand Up @@ -862,7 +899,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
else if (falseReg != embMaskOp1Reg)
{
// At the point, targetReg != embMaskOp1Reg != falseReg
if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id))
if (hasOptionalEmbMask)
{
// If the embedded instruction supports optional mask operation, use the "unpredicated"
// version of the instruction, followed by "sel" to select the active lanes.
Expand Down
4 changes: 1 addition & 3 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,8 @@ HARDWARE_INTRINSIC(Sve2, AddPairwiseWideningAndAdd,
HARDWARE_INTRINSIC(Sve2, AddRotateComplex, -1, 3, {INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_sve_cadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingEven, -1, 2, {INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_sve_raddhnb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
HARDWARE_INTRINSIC(Sve2, AddRoundedHighNarrowingOdd, -1, 3, {INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_sve_raddhnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, AddSaturate, -1, -1, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddSaturate, -1, -1, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddSaturateRotateComplex, -1, 3, {INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_sve_sqcadd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddSaturateWithSignedAddend, -1, -1, {INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_sve_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddSaturateWithUnsignedAddend, -1, -1, {INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_sve_suqadd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, AddWideningEven, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_sve_saddwb, INS_sve_uaddwb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve2, AddWideningEvenOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_sve_saddlbt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
HARDWARE_INTRINSIC(Sve2, AddWideningOdd, -1, 2, {INS_invalid, INS_invalid, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_sve_saddwt, INS_sve_uaddwt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
Expand Down
Loading
Loading