@@ -10892,6 +10892,8 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
10892
10892
// 2.((v1 ^ AllBitsSet) & v2) to VectorXxx.AndNot(v2, v1)
10893
10893
// 3.(v1 & ~v2) to VectorXxx.AndNot(v1, v2)
10894
10894
// 4.(v1 & (v2 ^ AllBitsSet)) to VectorXxx.AndNot(v1, v2)
10895
+ // 5.(v1 & ~NegativeZero) to VectorXxx.Negate(v1)
10896
+ // 6.(~NegativeZero & v2) to VectorXxx.Negate(v2)
10895
10897
case GT_AND:
10896
10898
{
10897
10899
GenTree* op1 = node->Op(1);
@@ -10933,37 +10935,92 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
10933
10935
}
10934
10936
}
10935
10937
}
10936
-
10937
- if ((op == GT_NONE) && op2->OperIsHWIntrinsic())
10938
+ else if (varTypeIsFloating(simdBaseType) && op1->IsVectorConst())
10938
10939
{
10939
- GenTreeHWIntrinsic* hw = op2->AsHWIntrinsic();
10940
- genTreeOps hwOper = hw->HWOperGet();
10940
+ GenTreeVecCon* vecCon = op1->AsVecCon();
10941
10941
10942
- if (hwOper == GT_NOT )
10942
+ if (vecCon->IsBroadcast(retType, simdBaseType) )
10943
10943
{
10944
- // op1 & ~op2
10945
- lhs = op1;
10946
- rhs = hw->Op(1);
10947
- op = GT_AND_NOT;
10944
+ if (simdBaseType == TYP_FLOAT)
10945
+ {
10946
+ if (vecCon->gtSimdVal.u32[0] == 0x7FFFFFFF)
10947
+ {
10948
+ lhs = op2;
10949
+ op = GT_NEG;
10950
+ }
10951
+ }
10952
+ else
10953
+ {
10954
+ assert(simdBaseType == TYP_DOUBLE);
10955
+
10956
+ if (vecCon->gtSimdVal.u64[0] == 0x7FFFFFFFFFFFFFFF)
10957
+ {
10958
+ lhs = op2;
10959
+ op = GT_NEG;
10960
+ }
10961
+ }
10948
10962
}
10949
- else if (hwOper == GT_XOR)
10963
+ }
10964
+
10965
+ if (op == GT_NONE)
10966
+ {
10967
+ if (op2->OperIsHWIntrinsic())
10950
10968
{
10951
- GenTree* hwOp1 = hw->Op(1 );
10952
- GenTree* hwOp2 = hw->Op(2 );
10969
+ GenTreeHWIntrinsic* hw = op2->AsHWIntrinsic( );
10970
+ genTreeOps hwOper = hw->HWOperGet( );
10953
10971
10954
- if (hwOp1->IsVectorAllBitsSet() )
10972
+ if (hwOper == GT_NOT )
10955
10973
{
10956
- // op1 & (AllBitsSet ^ op2)
10974
+ // op1 & ~ op2
10957
10975
lhs = op1;
10958
- rhs = hwOp2 ;
10976
+ rhs = hw->Op(1) ;
10959
10977
op = GT_AND_NOT;
10960
10978
}
10961
- else if (hwOp2->IsVectorAllBitsSet() )
10979
+ else if (hwOper == GT_XOR )
10962
10980
{
10963
- // op1 & (op2 ^ AllBitsSet)
10964
- lhs = op1;
10965
- rhs = hwOp1;
10966
- op = GT_AND_NOT;
10981
+ GenTree* hwOp1 = hw->Op(1);
10982
+ GenTree* hwOp2 = hw->Op(2);
10983
+
10984
+ if (hwOp1->IsVectorAllBitsSet())
10985
+ {
10986
+ // op1 & (AllBitsSet ^ op2)
10987
+ lhs = op1;
10988
+ rhs = hwOp2;
10989
+ op = GT_AND_NOT;
10990
+ }
10991
+ else if (hwOp2->IsVectorAllBitsSet())
10992
+ {
10993
+ // op1 & (op2 ^ AllBitsSet)
10994
+ lhs = op1;
10995
+ rhs = hwOp1;
10996
+ op = GT_AND_NOT;
10997
+ }
10998
+ }
10999
+ }
11000
+ else if (varTypeIsFloating(simdBaseType) && op2->IsVectorConst())
11001
+ {
11002
+ GenTreeVecCon* vecCon = op2->AsVecCon();
11003
+
11004
+ if (vecCon->IsBroadcast(retType, simdBaseType))
11005
+ {
11006
+ if (simdBaseType == TYP_FLOAT)
11007
+ {
11008
+ if (vecCon->gtSimdVal.u32[0] == 0x7FFFFFFF)
11009
+ {
11010
+ lhs = op1;
11011
+ op = GT_NEG;
11012
+ }
11013
+ }
11014
+ else
11015
+ {
11016
+ assert(simdBaseType == TYP_DOUBLE);
11017
+
11018
+ if (vecCon->gtSimdVal.u64[0] == 0x7FFFFFFFFFFFFFFF)
11019
+ {
11020
+ lhs = op1;
11021
+ op = GT_NEG;
11022
+ }
11023
+ }
10967
11024
}
10968
11025
}
10969
11026
}
@@ -10973,6 +11030,17 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
10973
11030
break;
10974
11031
}
10975
11032
11033
+ if (rhs == nullptr)
11034
+ {
11035
+ // No need to filter side effects since we only have 1 operand
11036
+ GenTree* resNode = gtNewSimdUnOpNode(op, retType, lhs, simdBaseJitType, simdSize);
11037
+
11038
+ DEBUG_DESTROY_NODE(node);
11039
+ INDEBUG(resNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
11040
+
11041
+ return resNode;
11042
+ }
11043
+
10976
11044
// Filter out side effecting cases for several reasons:
10977
11045
// 1. gtNewSimdBinOpNode may swap operand order.
10978
11046
// 2. The code above will swap operand order.
0 commit comments