Skip to content

Commit

Permalink
Add xarch blsi (#66193)
Browse files Browse the repository at this point in the history
* implement blsi

* add bmi intrinsics test projects

* add using System for Console.
  • Loading branch information
Wraith2 authored Mar 15, 2022
1 parent 436b97c commit 6bf873a
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA
// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
INST3(blsi, "blsi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Extract Lowest Set Isolated Bit
INST3(blsmsk, "blsmsk", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), INS_Flags_IsDstDstSrcAVXInstruction) // Get Mask Up to Lowest Set Bit
INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF3), Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_IsDstDstSrcAVXInstruction) // Reset Lowest Set Bit
INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ class Lowering final : public Phase
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
#elif defined(TARGET_ARM64)
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
Expand Down
84 changes: 84 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,12 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
{
return replacementNode->gtNext;
}

replacementNode = TryLowerAndOpToExtractLowestSetBit(binOp);
if (replacementNode != nullptr)
{
return replacementNode->gtNext;
}
}
#endif

Expand Down Expand Up @@ -3823,6 +3829,84 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode)
return blsrNode;
}

//----------------------------------------------------------------------------------------------
// Lowering::TryLowerAndOpToExtractLowestSetIsolatedBit: Lowers a tree AND(X, NEG(X)) to
// HWIntrinsic::ExtractLowestSetBit
//
// Arguments:
// andNode - GT_AND node of integral type
//
// Return Value:
// Returns the replacement node if one is created else nullptr indicating no replacement
//
// Notes:
// Performs containment checks on the replacement node if one is created
GenTree* Lowering::TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode)
{
GenTree* opNode = nullptr;
GenTree* negNode = nullptr;
if (andNode->gtGetOp1()->OperIs(GT_NEG))
{
negNode = andNode->gtGetOp1();
opNode = andNode->gtGetOp2();
}
else if (andNode->gtGetOp2()->OperIs(GT_NEG))
{
negNode = andNode->gtGetOp2();
opNode = andNode->gtGetOp1();
}

if (opNode == nullptr)
{
return nullptr;
}

GenTree* negOp = negNode->AsUnOp()->gtGetOp1();
if (!negOp->OperIs(GT_LCL_VAR) || !opNode->OperIs(GT_LCL_VAR) ||
(negOp->AsLclVar()->GetLclNum() != opNode->AsLclVar()->GetLclNum()))
{
return nullptr;
}

NamedIntrinsic intrinsic;
if (andNode->TypeIs(TYP_LONG) && comp->compOpportunisticallyDependsOn(InstructionSet_BMI1_X64))
{
intrinsic = NamedIntrinsic::NI_BMI1_X64_ExtractLowestSetBit;
}
else if (comp->compOpportunisticallyDependsOn(InstructionSet_BMI1))
{
intrinsic = NamedIntrinsic::NI_BMI1_ExtractLowestSetBit;
}
else
{
return nullptr;
}

LIR::Use use;
if (!BlockRange().TryGetUse(andNode, &use))
{
return nullptr;
}

GenTreeHWIntrinsic* blsiNode = comp->gtNewScalarHWIntrinsicNode(andNode->TypeGet(), opNode, intrinsic);

JITDUMP("Lower: optimize AND(X, NEG(X)))\n");
DISPNODE(andNode);
JITDUMP("to:\n");
DISPNODE(blsiNode);

use.ReplaceWith(blsiNode);

BlockRange().InsertBefore(andNode, blsiNode);
BlockRange().Remove(andNode);
BlockRange().Remove(negNode);
BlockRange().Remove(negOp);

ContainCheckHWIntrinsic(blsiNode);

return blsiNode;
}

//----------------------------------------------------------------------------------------------
// Lowering::TryLowerAndOpToAndNot: Lowers a tree AND(X, NOT(Y)) to HWIntrinsic::AndNot
//
Expand Down
63 changes: 63 additions & 0 deletions src/tests/JIT/Intrinsics/BMI1Intrinsics.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System;
using System.Runtime.CompilerServices;

namespace BMI1Intrinsics
{
internal class Program
{
private static int _errorCode = 100;

static int Main(string[] args)
{
// bmi1 expression are folded to to hwintrinsics that return identical results

var values = new (uint input1, uint input2, uint andnExpected, uint blsiExpected, uint blsrExpected, uint blmskExpected)[] {
(0, 0, 0, 0 ,0 ,0),
(1, 0, 1, 1 ,0 ,0xfffffffe),
(uint.MaxValue / 2, 0, 0x7fffffff, 0x1 ,0x7ffffffe ,0xfffffffe),
((uint.MaxValue / 2) - 1, 0, 0x7FFFFFFE, 2 ,0x7FFFFFFC ,0xFFFFFFFC),
((uint.MaxValue / 2) + 1, 0, 0x80000000, 0x80000000 ,0 ,0),
(uint.MaxValue - 1, 0, 0xFFFFFFFE, 2 ,0xFFFFFFFC ,0xFFFFFFFC),
(uint.MaxValue , 0, 0xFFFFFFFF, 1 ,0xFFFFFFFE ,0xFFFFFFFE),
(0xAAAAAAAA,0xAAAAAAAA,0,2,0xAAAAAAA8,0xFFFFFFFC),
(0xAAAAAAAA,0x55555555,0xAAAAAAAA,2,0xAAAAAAA8,0xFFFFFFFC),
};

foreach (var value in values)
{
Test(value.input1, AndNot(value.input1, value.input2), value.andnExpected, nameof(AndNot));
Test(value.input1, ExtractLowestSetIsolatedBit(value.input1), value.blsiExpected, nameof(ExtractLowestSetIsolatedBit));
Test(value.input1, ResetLowestSetBit(value.input1), value.blsrExpected, nameof(ResetLowestSetBit));
Test(value.input1, GetMaskUpToLowestSetBit(value.input1), value.blmskExpected, nameof(GetMaskUpToLowestSetBit));
}

return _errorCode;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static uint AndNot(uint x, uint y) => x & (~y); // bmi1 andn

[MethodImpl(MethodImplOptions.NoInlining)]
private static uint ExtractLowestSetIsolatedBit(uint x) => (uint)(x & (-x)); // bmi1 blsi

[MethodImpl(MethodImplOptions.NoInlining)]
private static uint ResetLowestSetBit(uint x) => x & (x - 1); // bmi1 blsr

[MethodImpl(MethodImplOptions.NoInlining)]
private static uint GetMaskUpToLowestSetBit(uint x) => (uint)(x ^ (-x)); // bmi1 blmsk

[MethodImpl(MethodImplOptions.NoInlining)]
private static void Test(uint input, uint output, uint expected,string callerName)
{
if (output != expected)
{
Console.WriteLine($"{callerName} failed.");
Console.WriteLine($"Input: {input:X}");
Console.WriteLine($"Output: {output:X}");
Console.WriteLine($"Expected: {expected:X}");

_errorCode++;
}
}
}
}
10 changes: 10 additions & 0 deletions src/tests/JIT/Intrinsics/BMI1Intrinsics_ro.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="BMI1Intrinsics.cs" />
</ItemGroup>
</Project>
10 changes: 10 additions & 0 deletions src/tests/JIT/Intrinsics/BMI1Intrinsicss_r.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<DebugType>None</DebugType>
<Optimize />
</PropertyGroup>
<ItemGroup>
<Compile Include="BMI1Intrinsics.cs" />
</ItemGroup>
</Project>

0 comments on commit 6bf873a

Please sign in to comment.