[release/9.0] Ensure that constant folding of bitwise operations for float/double are bitwise (#106830)

github-actions[bot] · tannergooding · jeffschwMSFT · web-flow · commit 0ee150e25e32 · 2024-08-23T12:36:29.000-07:00
* Ensure that constant folding of bitwise operations for float/double are bitwise

* Ensure that the new header only methods are marked `inline` to avoid duplicate definitions

* Apply formatting patch

---------

Co-authored-by: Tanner Gooding &lt;tagoo@outlook.com&gt;
Co-authored-by: Jeff Schwartz &lt;jeffschw@microsoft.com&gt;
diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h
@@ -363,6 +363,11 @@ typedef simd64_t simd_t;
 typedef simd16_t simd_t;
 #endif
 
+inline bool IsUnaryBitwiseOperation(genTreeOps oper)
+{
+    return (oper == GT_LZCNT) || (oper == GT_NOT);
+}
+
 template <typename TBase>
 TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
 {
@@ -404,27 +409,35 @@ TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
 template <>
 inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
 {
-    if (oper == GT_NEG)
+    switch (oper)
     {
-        return -arg0;
-    }
+        case GT_NEG:
+        {
+            return -arg0;
+        }
 
-    uint32_t arg0Bits   = BitOperations::SingleToUInt32Bits(arg0);
-    uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
-    return BitOperations::UInt32BitsToSingle(resultBits);
+        default:
+        {
+            unreached();
+        }
+    }
 }
 
 template <>
 inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
 {
-    if (oper == GT_NEG)
+    switch (oper)
     {
-        return -arg0;
-    }
+        case GT_NEG:
+        {
+            return -arg0;
+        }
 
-    uint64_t arg0Bits   = BitOperations::DoubleToUInt64Bits(arg0);
-    uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
-    return BitOperations::UInt64BitsToDouble(resultBits);
+        default:
+        {
+            unreached();
+        }
+    }
 }
 
 template <typename TBase>
@@ -600,13 +613,37 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
     {
         case TYP_FLOAT:
         {
-            EvaluateUnarySimd<TSimd, float>(oper, scalar, result, arg0);
+            // Some operations are bitwise and we want to ensure inputs like
+            // sNaN are preserved rather than being converted to a qNaN when
+            // the CPU encounters them. So we check for and handle that early
+            // prior to extracting the element out of the vector value.
+
+            if (IsUnaryBitwiseOperation(oper))
+            {
+                EvaluateUnarySimd<TSimd, int32_t>(oper, scalar, result, arg0);
+            }
+            else
+            {
+                EvaluateUnarySimd<TSimd, float>(oper, scalar, result, arg0);
+            }
             break;
         }
 
         case TYP_DOUBLE:
         {
-            EvaluateUnarySimd<TSimd, double>(oper, scalar, result, arg0);
+            // Some operations are bitwise and we want to ensure inputs like
+            // sNaN are preserved rather than being converted to a qNaN when
+            // the CPU encounters them. So we check for and handle that early
+            // prior to extracting the element out of the vector value.
+
+            if (IsUnaryBitwiseOperation(oper))
+            {
+                EvaluateUnarySimd<TSimd, int64_t>(oper, scalar, result, arg0);
+            }
+            else
+            {
+                EvaluateUnarySimd<TSimd, double>(oper, scalar, result, arg0);
+            }
             break;
         }
 
@@ -665,6 +702,12 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
     }
 }
 
+inline bool IsBinaryBitwiseOperation(genTreeOps oper)
+{
+    return (oper == GT_AND) || (oper == GT_AND_NOT) || (oper == GT_LSH) || (oper == GT_OR) || (oper == GT_ROL) ||
+           (oper == GT_ROR) || (oper == GT_RSH) || (oper == GT_RSZ) || (oper == GT_XOR);
+}
+
 template <typename TBase>
 TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
 {
@@ -902,11 +945,7 @@ inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0,
 
         default:
         {
-            uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
-            uint32_t arg1Bits = BitOperations::SingleToUInt32Bits(arg1);
-
-            uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
-            return BitOperations::UInt32BitsToSingle(resultBits);
+            unreached();
         }
     }
 }
@@ -948,11 +987,7 @@ inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double ar
 
         default:
         {
-            uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
-            uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits(arg1);
-
-            uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
-            return BitOperations::UInt64BitsToDouble(resultBits);
+            unreached();
         }
     }
 }
@@ -1188,13 +1223,37 @@ void EvaluateBinarySimd(
     {
         case TYP_FLOAT:
         {
-            EvaluateBinarySimd<TSimd, float>(oper, scalar, result, arg0, arg1);
+            // Some operations are bitwise and we want to ensure inputs like
+            // sNaN are preserved rather than being converted to a qNaN when
+            // the CPU encounters them. So we check for and handle that early
+            // prior to extracting the element out of the vector value.
+
+            if (IsBinaryBitwiseOperation(oper))
+            {
+                EvaluateBinarySimd<TSimd, int32_t>(oper, scalar, result, arg0, arg1);
+            }
+            else
+            {
+                EvaluateBinarySimd<TSimd, float>(oper, scalar, result, arg0, arg1);
+            }
             break;
         }
 
         case TYP_DOUBLE:
         {
-            EvaluateBinarySimd<TSimd, double>(oper, scalar, result, arg0, arg1);
+            // Some operations are bitwise and we want to ensure inputs like
+            // sNaN are preserved rather than being converted to a qNaN when
+            // the CPU encounters them. So we check for and handle that early
+            // prior to extracting the element out of the vector value.
+
+            if (IsBinaryBitwiseOperation(oper))
+            {
+                EvaluateBinarySimd<TSimd, int64_t>(oper, scalar, result, arg0, arg1);
+            }
+            else
+            {
+                EvaluateBinarySimd<TSimd, double>(oper, scalar, result, arg0, arg1);
+            }
             break;
         }
 
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106610/Runtime_106610.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106610/Runtime_106610.cs
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Numerics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using Xunit;
+
+// Generated by Fuzzlyn v2.2 on 2024-08-17 17:40:06
+// Run on X86 Windows
+// Seed: 1343518557351353159-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi2,x86fma,x86lzcnt,x86pclmulqdq,x86popcnt,x86sse,x86sse2,x86sse3,x86sse41,x86sse42,x86ssse3,x86x86base
+// Reduced from 171.2 KiB to 0.6 KiB in 00:06:37
+// Debug: Outputs <4292870144, 0, 0, 0, 0, 0, 0, 0>
+// Release: Outputs <0, 0, 0, 0, 0, 0, 0, 0>
+
+public class C1
+{
+    public Vector256<float> F5;
+
+    public C1(Vector256<float> f5)
+    {
+        F5 = f5;
+    }
+}
+
+public class Runtime_106610
+{
+    [Fact]
+    public static void TestEntryPoint()
+    {
+        if (Avx512DQ.VL.IsSupported)
+        {
+            var vr4 = Vector256.Create<float>(0);
+            var vr5 = Vector256.CreateScalar(1f);
+            var vr6 = Vector256.CreateScalar(-10f);
+            var vr7 = Avx.Or(vr5, vr6);
+            C1 vr8 = new C1(Avx512DQ.VL.Range(vr4, vr7, 0));
+            Assert.Equal(Vector256.CreateScalar<uint>(4292870144), vr8.F5.AsUInt32());
+        }
+    }
+}
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106610/Runtime_106610.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_106610/Runtime_106610.csproj
@@ -0,0 +1,8 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>