-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Fixing up some hwintrinsic flags for the x86 intrinsics. #16114
Conversation
src/jit/hwintrinsiclistxarch.h
Outdated
HARDWARE_INTRINSIC(SSE_IsSupported, "get_IsSupported", SSE, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) | ||
HARDWARE_INTRINSIC(SSE_Add, "Add", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) | ||
HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative) | ||
HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the scalar intrinsics are not commutative, as they take their upper bits from the first operand.
HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment) | ||
HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) | ||
HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment) | ||
HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) | ||
HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the scalar intrinsics should have CopyUpperBits
, since the upper bits come from the first operand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although RyuJIT codegen already has the CopyUpperBits
semantic for these 2-op intrinsics, it is better to explicitly indicate it here. Thanks!
HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) | ||
HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) | ||
HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar, "CompareUnorderedScalar", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) | ||
HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ConvertToInt32 and ConvertToInt64 do support containment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As well as the WithTruncation
forms of the same instructions and Divide
src/jit/hwintrinsiclistxarch.h
Outdated
HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative) | ||
HARDWARE_INTRINSIC(SSE_Or, "Or", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) | ||
HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) | ||
HARDWARE_INTRINSIC(SSE_Or, "Or", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or
is commutative
FYI. @4creators, Edit: Nevermind, the SSE2 PR does look to be correct for all the cases (most of this impacted scalar intrinsics anyways, which aren't implemented by said PR 😄). |
Rebased to resolve a merge conflict |
test Windows_NT x64 Checked jitincompletehwintrinsic test Windows_NT x86 Checked jitincompletehwintrinsic test Ubuntu x64 Checked jitincompletehwintrinsic |
Ubuntu failures are unrelated: https://github.com/dotnet/coreclr/issues/16134 |
This should be ready for review. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM Thanks!
Just found one mistake! |
Fixed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…lags Fixing up some hwintrinsic flags for the x86 intrinsics. Commit migrated from dotnet/coreclr@0fbb86e
FYI. @fiigii, @CarolEidt.