From 21715414447e99e3960e6f5625d292b4aaa3e47d Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Wed, 15 Feb 2023 09:24:32 -0800
Subject: [PATCH 1/3] Adding more SIMD constant folding support

---
 src/coreclr/jit/simd.h       |  172 +++++-
 src/coreclr/jit/valuenum.cpp | 1027 +++++++++++++++++++++++++++++++---
 src/coreclr/jit/valuenum.h   |   12 +-
 3 files changed, 1117 insertions(+), 94 deletions(-)
diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h
index 526c032480708..f2207cb92ee44 100644
--- a/src/coreclr/jit/simd.h
+++ b/src/coreclr/jit/simd.h
@@ -149,6 +149,39 @@ struct simd32_t
     }
 };
 
+template <typename TBase>
+TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
+{
+    switch (oper)
+    {
+        case GT_NOT:
+        {
+            return ~arg0;
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+}
+
+template <>
+inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
+{
+    uint32_t arg0Bits   = *reinterpret_cast<uint32_t*>(&arg0);
+    uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
+    return *reinterpret_cast<float*>(&resultBits);
+}
+
+template <>
+inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
+{
+    uint64_t arg0Bits   = *reinterpret_cast<uint64_t*>(&arg0);
+    uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
+    return *reinterpret_cast<double*>(&resultBits);
+}
+
 template <typename TBase>
 TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
 {
@@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
 
         default:
         {
-            unreached();
+            return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0);
         }
     }
 }
@@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
     }
 }
 
+template <typename TBase>
+TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
+{
+    return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
+}
+
+template <>
+inline int8_t EvaluateBinaryScalarRSZ<int8_t>(int8_t arg0, int8_t arg1)
+{
+    uint8_t arg0Bits = static_cast<uint8_t>(arg0);
+    uint8_t arg1Bits = static_cast<uint8_t>(arg1);
+
+    uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t>(arg0Bits, arg1Bits);
+    return static_cast<int8_t>(resultBits);
+}
+
+template <>
+inline int16_t EvaluateBinaryScalarRSZ<int16_t>(int16_t arg0, int16_t arg1)
+{
+    uint16_t arg0Bits = static_cast<uint16_t>(arg0);
+    uint16_t arg1Bits = static_cast<uint16_t>(arg1);
+
+    uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t>(arg0Bits, arg1Bits);
+    return static_cast<int16_t>(resultBits);
+}
+
+template <>
+inline int32_t EvaluateBinaryScalarRSZ<int32_t>(int32_t arg0, int32_t arg1)
+{
+    uint32_t arg0Bits = static_cast<uint32_t>(arg0);
+    uint32_t arg1Bits = static_cast<uint32_t>(arg1);
+
+    uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t>(arg0Bits, arg1Bits);
+    return static_cast<int32_t>(resultBits);
+}
+
+template <>
+inline int64_t EvaluateBinaryScalarRSZ<int64_t>(int64_t arg0, int64_t arg1)
+{
+    uint64_t arg0Bits = static_cast<uint64_t>(arg0);
+    uint64_t arg1Bits = static_cast<uint64_t>(arg1);
+
+    uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t>(arg0Bits, arg1Bits);
+    return static_cast<int64_t>(resultBits);
+}
+
+template <typename TBase>
+TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
+{
+    switch (oper)
+    {
+        case GT_AND:
+        {
+            return arg0 & arg1;
+        }
+
+        case GT_AND_NOT:
+        {
+            return arg0 & ~arg1;
+        }
+
+        case GT_LSH:
+        {
+            return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1));
+        }
+
+        case GT_OR:
+        {
+            return arg0 | arg1;
+        }
+
+        case GT_RSH:
+        {
+            return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
+        }
+
+        case GT_RSZ:
+        {
+            return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
+        }
+
+        case GT_XOR:
+        {
+            return arg0 ^ arg1;
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+}
+
+template <>
+inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
+{
+    uint32_t arg0Bits = *reinterpret_cast<uint32_t*>(&arg0);
+    uint32_t arg1Bits = *reinterpret_cast<uint32_t*>(&arg1);
+
+    uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
+    return *reinterpret_cast<float*>(&resultBits);
+}
+
+template <>
+inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
+{
+    uint64_t arg0Bits = *reinterpret_cast<uint64_t*>(&arg0);
+    uint64_t arg1Bits = *reinterpret_cast<uint64_t*>(&arg1);
+
+    uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
+    return *reinterpret_cast<double*>(&resultBits);
+}
+
 template <typename TBase>
 TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
 {
@@ -278,6 +424,16 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
             return arg0 + arg1;
         }
 
+        case GT_DIV:
+        {
+            return arg0 / arg1;
+        }
+
+        case GT_MUL:
+        {
+            return arg0 * arg1;
+        }
+
         case GT_SUB:
         {
             return arg0 - arg1;
@@ -285,7 +441,7 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
 
         default:
         {
-            unreached();
+            return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1);
         }
     }
 }
@@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
     }
 }
 
+template <typename TSimd, typename TBase>
+void BroadcastConstantToSimd(TSimd* result, TBase arg0)
+{
+    uint32_t count = sizeof(TSimd) / sizeof(TBase);
+
+    for (uint32_t i = 0; i < count; i++)
+    {
+        // Safely execute `result[i] = arg0`
+        memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase));
+    }
+}
+
 #ifdef FEATURE_SIMD
 
 #ifdef TARGET_XARCH
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index c43bb47a36f8d..29603b28414e3 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -1999,11 +1999,195 @@ ValueNum ValueNumStore::VNOneForType(var_types typ)
             return VNForDoubleCon(1.0);
 
         default:
+        {
+            assert(!varTypeIsSIMD(typ));
+            return NoVN;
+        }
+    }
+}
+
+ValueNum ValueNumStore::VNAllBitsForType(var_types typ)
+{
+    switch (typ)
+    {
+        case TYP_INT:
+        case TYP_UINT:
+        {
+            return VNForIntCon(0xFFFFFFFF);
+        }
+
+        case TYP_LONG:
+        case TYP_ULONG:
+        {
+            return VNForLongCon(0xFFFFFFFFFFFFFFFF);
+        }
+
+#ifdef FEATURE_SIMD
+        case TYP_SIMD8:
+        {
+            simd8_t cnsVal;
+
+            cnsVal.u32[0] = 0xFFFFFFFF;
+            cnsVal.u32[1] = 0xFFFFFFFF;
+
+            return VNForSimd8Con(cnsVal);
+        }
+
+        case TYP_SIMD12:
+        {
+            simd12_t cnsVal;
+
+            cnsVal.u32[0] = 0xFFFFFFFF;
+            cnsVal.u32[1] = 0xFFFFFFFF;
+            cnsVal.u32[2] = 0xFFFFFFFF;
+
+            return VNForSimd12Con(cnsVal);
+        }
+
+        case TYP_SIMD16:
+        {
+            simd16_t cnsVal;
+
+            cnsVal.u32[0] = 0xFFFFFFFF;
+            cnsVal.u32[1] = 0xFFFFFFFF;
+            cnsVal.u32[2] = 0xFFFFFFFF;
+            cnsVal.u32[3] = 0xFFFFFFFF;
+
+            return VNForSimd16Con(cnsVal);
+        }
+
+        case TYP_SIMD32:
+        {
+            simd32_t cnsVal;
+
+            cnsVal.u32[0] = 0xFFFFFFFF;
+            cnsVal.u32[1] = 0xFFFFFFFF;
+            cnsVal.u32[2] = 0xFFFFFFFF;
+            cnsVal.u32[3] = 0xFFFFFFFF;
+
+            cnsVal.u32[4] = 0xFFFFFFFF;
+            cnsVal.u32[5] = 0xFFFFFFFF;
+            cnsVal.u32[6] = 0xFFFFFFFF;
+            cnsVal.u32[7] = 0xFFFFFFFF;
+
+            return VNForSimd32Con(cnsVal);
+        }
+#endif // FEATURE_SIMD
+
+        default:
+        {
             return NoVN;
+        }
     }
 }
 
 #ifdef FEATURE_SIMD
+ValueNum ValueNumStore::VNOneForSimdType(var_types simdType, var_types simdBaseType)
+{
+    assert(varTypeIsSIMD(simdType));
+
+    simd32_t simd32Val = {};
+    int      simdSize  = genTypeSize(simdType);
+
+    switch (simdBaseType)
+    {
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        {
+            for (int i = 0; i < simdSize; i++)
+            {
+                simd32Val.u8[i] = 1;
+            }
+            break;
+        }
+
+        case TYP_SHORT:
+        case TYP_USHORT:
+        {
+            for (int i = 0; i < (simdSize / 2); i++)
+            {
+                simd32Val.u16[i] = 1;
+            }
+            break;
+        }
+
+        case TYP_INT:
+        case TYP_UINT:
+        {
+            for (int i = 0; i < (simdSize / 4); i++)
+            {
+                simd32Val.u32[i] = 1;
+            }
+            break;
+        }
+
+        case TYP_LONG:
+        case TYP_ULONG:
+        {
+            for (int i = 0; i < (simdSize / 8); i++)
+            {
+                simd32Val.u64[i] = 1;
+            }
+            break;
+        }
+
+        case TYP_FLOAT:
+        {
+            for (int i = 0; i < (simdSize / 4); i++)
+            {
+                simd32Val.f32[i] = 1.0f;
+            }
+            break;
+        }
+
+        case TYP_DOUBLE:
+        {
+            for (int i = 0; i < (simdSize / 8); i++)
+            {
+                simd32Val.f64[i] = 1.0;
+            }
+            break;
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+
+    switch (simdType)
+    {
+        case TYP_SIMD8:
+        {
+            return VNForSimd8Con(simd32Val.v64[0]);
+        }
+
+        case TYP_SIMD12:
+        {
+            assert(simdBaseType == TYP_FLOAT);
+
+            simd12_t simd12Val;
+            memcpy(&simd12Val, &simd32Val.f32, sizeof(simd12_t));
+            return VNForSimd12Con(simd12Val);
+        }
+
+        case TYP_SIMD16:
+        {
+            return VNForSimd16Con(simd32Val.v128[0]);
+        }
+
+        case TYP_SIMD32:
+        {
+            return VNForSimd32Con(simd32Val);
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+}
+
 ValueNum ValueNumStore::VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType)
 {
     ValueNum baseTypeVN = VNForIntCon(INT32(simdBaseJitType));
@@ -4262,6 +4446,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN
         {
             // (0 * x) == 0
             // (x * 0) == 0
+            // This identity does not apply for floating-point (when x == -0.0, NaN, +Inf, -Inf)
             ValueNum ZeroVN = VNZeroForType(typ);
             if (arg0VN == ZeroVN)
             {
@@ -4271,18 +4456,19 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN
             {
                 return ZeroVN;
             }
+        }
 
-            // (x * 1) == x
-            // (1 * x) == x
-            ValueNum OneVN = VNOneForType(typ);
-            if (arg0VN == OneVN)
-            {
-                return arg1VN;
-            }
-            else if (arg1VN == OneVN)
-            {
-                return arg0VN;
-            }
+        // (x * 1) == x
+        // (1 * x) == x
+        // This is safe for all floats since we do not fault for sNaN
+        ValueNum OneVN = VNOneForType(typ);
+        if (arg0VN == OneVN)
+        {
+            return arg1VN;
+        }
+        else if (arg1VN == OneVN)
+        {
+            return arg0VN;
         }
 
         return NoVN;
@@ -4293,6 +4479,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN
     {
         ValueNum ZeroVN;
         ValueNum OneVN;
+        ValueNum AllBitsVN;
 
         switch (genTreeOps(func))
         {
@@ -4310,53 +4497,124 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN
 
             case GT_DIV:
             case GT_UDIV:
+            {
                 // (x / 1) == x
-                // This identity does not apply for floating point
-                //
-                if (!varTypeIsFloating(typ))
+                // This is safe for all floats since we do not fault for sNaN
+                OneVN = VNOneForType(typ);
+
+                if (arg1VN == OneVN)
                 {
-                    OneVN = VNOneForType(typ);
-                    if (arg1VN == OneVN)
-                    {
-                        resultVN = arg0VN;
-                    }
+                    resultVN = arg0VN;
                 }
                 break;
+            }
 
             case GT_OR:
+            {
+                // (0 | x) == x
+                // (x | 0) == x
+                ZeroVN = VNZeroForType(typ);
+                if (arg0VN == ZeroVN)
+                {
+                    resultVN = arg1VN;
+                    break;
+                }
+                else if (arg1VN == ZeroVN)
+                {
+                    resultVN = arg0VN;
+                    break;
+                }
+
+                // (x | ~0) == ~0
+                // (~0 | x) == ~0
+                AllBitsVN = VNAllBitsForType(typ);
+                if (arg0VN == AllBitsVN)
+                {
+                    resultVN = AllBitsVN;
+                    break;
+                }
+                else if (arg1VN == AllBitsVN)
+                {
+                    resultVN = AllBitsVN;
+                    break;
+                }
+
+                // x | x == x
+                if (arg0VN == arg1VN)
+                {
+                    resultVN = arg0VN;
+                }
+                break;
+            }
+
             case GT_XOR:
-                // (0 | x) == x,  (0 ^ x) == x
-                // (x | 0) == x,  (x ^ 0) == x
+            {
+                // (0 ^ x) == x
+                // (x ^ 0) == x
                 ZeroVN = VNZeroForType(typ);
                 if (arg0VN == ZeroVN)
                 {
                     resultVN = arg1VN;
+                    break;
                 }
                 else if (arg1VN == ZeroVN)
                 {
                     resultVN = arg0VN;
+                    break;
+                }
+
+                // x ^ x == 0
+                if (arg0VN == arg1VN)
+                {
+                    resultVN = ZeroVN;
                 }
                 break;
+            }
 
             case GT_AND:
+            {
                 // (x & 0) == 0
                 // (0 & x) == 0
                 ZeroVN = VNZeroForType(typ);
                 if (arg0VN == ZeroVN)
                 {
                     resultVN = ZeroVN;
+                    break;
                 }
                 else if (arg1VN == ZeroVN)
                 {
                     resultVN = ZeroVN;
+                    break;
+                }
+
+                // (x & ~0) == x
+                // (~0 & x) == x
+                AllBitsVN = VNAllBitsForType(typ);
+                if (arg0VN == AllBitsVN)
+                {
+                    resultVN = arg1VN;
+                    break;
+                }
+                else if (arg1VN == AllBitsVN)
+                {
+                    resultVN = arg0VN;
+                    break;
+                }
+
+                // x & x == x
+                if (arg0VN == arg1VN)
+                {
+                    resultVN = arg0VN;
                 }
                 break;
+            }
 
             case GT_LSH:
             case GT_RSH:
             case GT_RSZ:
             case GT_ROL:
             case GT_ROR:
+            {
                 // (x << 0)  == x
                 // (x >> 0)  == x
                 // (x rol 0) == x
@@ -4366,6 +4624,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN
                 {
                     resultVN = arg0VN;
                 }
+
                 // (0 << x)  == 0
                 // (0 >> x)  == 0
                 // (0 rol x) == 0
@@ -4375,6 +4634,7 @@ ValueNum ValueNumStore::EvalUsingMathIdentity(var_types typ, VNFunc func, ValueN
                     resultVN = ZeroVN;
                 }
                 break;
+            }
 
             case GT_EQ:
                 // (null == non-null) == false
@@ -6012,37 +6272,60 @@ void ValueNumStore::SetVNIsCheckedBound(ValueNum vn)
 }
 
 #ifdef FEATURE_HW_INTRINSICS
-ValueNum EvaluateUnarySimd(
-    ValueNumStore* vns, genTreeOps oper, bool scalar, var_types simdType, var_types baseType, ValueNum arg0VN)
+template <typename TSimd>
+TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum argVN)
 {
-    switch (simdType)
+    assert(vns->IsVNConstant(argVN));
+    assert(!varTypeIsSIMD(vns->TypeOfVN(argVN)));
+
+    TSimd result = {};
+
+    switch (baseType)
     {
-        case TYP_SIMD8:
+        case TYP_FLOAT:
         {
-            simd8_t result = {};
-            EvaluateUnarySimd<simd8_t>(oper, scalar, baseType, &result, vns->GetConstantSimd8(arg0VN));
-            return vns->VNForSimd8Con(result);
+            float arg = vns->GetConstantSingle(argVN);
+            BroadcastConstantToSimd<TSimd, float>(&result, arg);
+            break;
         }
 
-        case TYP_SIMD12:
+        case TYP_DOUBLE:
         {
-            simd12_t result = {};
-            EvaluateUnarySimd<simd12_t>(oper, scalar, baseType, &result, vns->GetConstantSimd12(arg0VN));
-            return vns->VNForSimd12Con(result);
+            double arg = vns->GetConstantDouble(argVN);
+            BroadcastConstantToSimd<TSimd, double>(&result, arg);
+            break;
         }
 
-        case TYP_SIMD16:
+        case TYP_BYTE:
+        case TYP_UBYTE:
         {
-            simd16_t result = {};
-            EvaluateUnarySimd<simd16_t>(oper, scalar, baseType, &result, vns->GetConstantSimd16(arg0VN));
-            return vns->VNForSimd16Con(result);
+            uint8_t arg = static_cast<uint8_t>(vns->GetConstantInt32(argVN));
+            BroadcastConstantToSimd<TSimd, uint8_t>(&result, arg);
+            break;
         }
 
-        case TYP_SIMD32:
+        case TYP_SHORT:
+        case TYP_USHORT:
         {
-            simd32_t result = {};
-            EvaluateUnarySimd<simd32_t>(oper, scalar, baseType, &result, vns->GetConstantSimd32(arg0VN));
-            return vns->VNForSimd32Con(result);
+            uint16_t arg = static_cast<uint16_t>(vns->GetConstantInt32(argVN));
+            BroadcastConstantToSimd<TSimd, uint16_t>(&result, arg);
+            break;
+        }
+
+        case TYP_INT:
+        case TYP_UINT:
+        {
+            uint32_t arg = static_cast<uint32_t>(vns->GetConstantInt32(argVN));
+            BroadcastConstantToSimd<TSimd, uint32_t>(&result, arg);
+            break;
+        }
+
+        case TYP_LONG:
+        case TYP_ULONG:
+        {
+            uint64_t arg = static_cast<uint64_t>(vns->GetConstantInt64(argVN));
+            BroadcastConstantToSimd<TSimd, uint64_t>(&result, arg);
+            break;
         }
 
         default:
@@ -6050,47 +6333,153 @@ ValueNum EvaluateUnarySimd(
             unreached();
         }
     }
+
+    return result;
 }
 
-ValueNum EvaluateBinarySimd(ValueNumStore* vns,
-                            genTreeOps     oper,
-                            bool           scalar,
-                            var_types      simdType,
-                            var_types      baseType,
-                            ValueNum       arg0VN,
-                            ValueNum       arg1VN)
+simd8_t GetConstantSimd8(ValueNumStore* vns, var_types baseType, ValueNum argVN)
+{
+    assert(vns->IsVNConstant(argVN));
+
+    if (vns->TypeOfVN(argVN) == TYP_SIMD8)
+    {
+        return vns->GetConstantSimd8(argVN);
+    }
+
+    return BroadcastConstantToSimd<simd8_t>(vns, baseType, argVN);
+}
+
+simd12_t GetConstantSimd12(ValueNumStore* vns, var_types baseType, ValueNum argVN)
+{
+    assert(vns->IsVNConstant(argVN));
+
+    if (vns->TypeOfVN(argVN) == TYP_SIMD12)
+    {
+        return vns->GetConstantSimd12(argVN);
+    }
+
+    return BroadcastConstantToSimd<simd12_t>(vns, baseType, argVN);
+}
+
+simd16_t GetConstantSimd16(ValueNumStore* vns, var_types baseType, ValueNum argVN)
+{
+    assert(vns->IsVNConstant(argVN));
+
+    if (vns->TypeOfVN(argVN) == TYP_SIMD16)
+    {
+        return vns->GetConstantSimd16(argVN);
+    }
+
+    return BroadcastConstantToSimd<simd16_t>(vns, baseType, argVN);
+}
+
+simd32_t GetConstantSimd32(ValueNumStore* vns, var_types baseType, ValueNum argVN)
+{
+    assert(vns->IsVNConstant(argVN));
+
+    if (vns->TypeOfVN(argVN) == TYP_SIMD32)
+    {
+        return vns->GetConstantSimd32(argVN);
+    }
+
+    return BroadcastConstantToSimd<simd32_t>(vns, baseType, argVN);
+}
+
+ValueNum EvaluateUnarySimd(
+    ValueNumStore* vns, genTreeOps oper, bool scalar, var_types simdType, var_types baseType, ValueNum arg0VN)
+{
+    switch (simdType)
+    {
+        case TYP_SIMD8:
+        {
+            simd8_t arg0 = GetConstantSimd8(vns, baseType, arg0VN);
+
+            simd8_t result = {};
+            EvaluateUnarySimd<simd8_t>(oper, scalar, baseType, &result, arg0);
+            return vns->VNForSimd8Con(result);
+        }
+
+        case TYP_SIMD12:
+        {
+            simd12_t arg0 = GetConstantSimd12(vns, baseType, arg0VN);
+
+            simd12_t result = {};
+            EvaluateUnarySimd<simd12_t>(oper, scalar, baseType, &result, arg0);
+            return vns->VNForSimd12Con(result);
+        }
+
+        case TYP_SIMD16:
+        {
+            simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN);
+
+            simd16_t result = {};
+            EvaluateUnarySimd<simd16_t>(oper, scalar, baseType, &result, arg0);
+            return vns->VNForSimd16Con(result);
+        }
+
+        case TYP_SIMD32:
+        {
+            simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN);
+
+            simd32_t result = {};
+            EvaluateUnarySimd<simd32_t>(oper, scalar, baseType, &result, arg0);
+            return vns->VNForSimd32Con(result);
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+}
+
+ValueNum EvaluateBinarySimd(ValueNumStore* vns,
+                            genTreeOps     oper,
+                            bool           scalar,
+                            var_types      simdType,
+                            var_types      baseType,
+                            ValueNum       arg0VN,
+                            ValueNum       arg1VN)
 {
     switch (simdType)
     {
         case TYP_SIMD8:
         {
+            simd8_t arg0 = GetConstantSimd8(vns, baseType, arg0VN);
+            simd8_t arg1 = GetConstantSimd8(vns, baseType, arg1VN);
+
             simd8_t result = {};
-            EvaluateBinarySimd<simd8_t>(oper, scalar, baseType, &result, vns->GetConstantSimd8(arg0VN),
-                                        vns->GetConstantSimd8(arg1VN));
+            EvaluateBinarySimd<simd8_t>(oper, scalar, baseType, &result, arg0, arg1);
             return vns->VNForSimd8Con(result);
         }
 
         case TYP_SIMD12:
         {
+            simd12_t arg0 = GetConstantSimd12(vns, baseType, arg0VN);
+            simd12_t arg1 = GetConstantSimd12(vns, baseType, arg1VN);
+
             simd12_t result = {};
-            EvaluateBinarySimd<simd12_t>(oper, scalar, baseType, &result, vns->GetConstantSimd12(arg0VN),
-                                         vns->GetConstantSimd12(arg1VN));
+            EvaluateBinarySimd<simd12_t>(oper, scalar, baseType, &result, arg0, arg1);
             return vns->VNForSimd12Con(result);
         }
 
         case TYP_SIMD16:
         {
+            simd16_t arg0 = GetConstantSimd16(vns, baseType, arg0VN);
+            simd16_t arg1 = GetConstantSimd16(vns, baseType, arg1VN);
+
             simd16_t result = {};
-            EvaluateBinarySimd<simd16_t>(oper, scalar, baseType, &result, vns->GetConstantSimd16(arg0VN),
-                                         vns->GetConstantSimd16(arg1VN));
+            EvaluateBinarySimd<simd16_t>(oper, scalar, baseType, &result, arg0, arg1);
             return vns->VNForSimd16Con(result);
         }
 
         case TYP_SIMD32:
         {
+            simd32_t arg0 = GetConstantSimd32(vns, baseType, arg0VN);
+            simd32_t arg1 = GetConstantSimd32(vns, baseType, arg1VN);
+
             simd32_t result = {};
-            EvaluateBinarySimd<simd32_t>(oper, scalar, baseType, &result, vns->GetConstantSimd32(arg0VN),
-                                         vns->GetConstantSimd32(arg1VN));
+            EvaluateBinarySimd<simd32_t>(oper, scalar, baseType, &result, arg0, arg1);
             return vns->VNForSimd32Con(result);
         }
 
@@ -6173,6 +6562,37 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd ar
     }
 }
 
+ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types baseType, ValueNum arg0VN, int arg1)
+{
+    switch (vns->TypeOfVN(arg0VN))
+    {
+        case TYP_SIMD8:
+        {
+            return EvaluateSimdGetElement<simd8_t>(vns, baseType, vns->GetConstantSimd8(arg0VN), arg1);
+        }
+
+        case TYP_SIMD12:
+        {
+            return EvaluateSimdGetElement<simd12_t>(vns, baseType, vns->GetConstantSimd12(arg0VN), arg1);
+        }
+
+        case TYP_SIMD16:
+        {
+            return EvaluateSimdGetElement<simd16_t>(vns, baseType, vns->GetConstantSimd16(arg0VN), arg1);
+        }
+
+        case TYP_SIMD32:
+        {
+            return EvaluateSimdGetElement<simd32_t>(vns, baseType, vns->GetConstantSimd32(arg0VN), arg1);
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+}
+
 ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types      type,
                                                 var_types      baseType,
                                                 NamedIntrinsic ni,
@@ -6253,6 +6673,11 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types      type,
             {
                 return EvaluateUnarySimd(this, GT_NEG, /* scalar */ true, type, baseType, arg0VN);
             }
+
+            case NI_AdvSimd_Not:
+            {
+                return EvaluateUnarySimd(this, GT_NOT, /* scalar */ false, type, baseType, arg0VN);
+            }
 #endif // TARGET_ARM64
 
 #if defined(TARGET_XARCH)
@@ -6430,46 +6855,161 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types      type,
             }
 
 #ifdef TARGET_ARM64
-            case NI_Vector64_GetElement:
+            case NI_AdvSimd_And:
+#else
+            case NI_SSE_And:
+            case NI_SSE2_And:
+            case NI_AVX_And:
+            case NI_AVX2_And:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_AND, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_BitwiseClear:
+            {
+                return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+#else
+            case NI_SSE_AndNot:
+            case NI_SSE2_AndNot:
+            case NI_AVX_AndNot:
+            case NI_AVX2_AndNot:
+            {
+                // xarch does: ~arg0VN & arg1VN
+                return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg1VN, arg0VN);
+            }
+#endif
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Arm64_Divide:
+#else
+            case NI_SSE_Divide:
+            case NI_SSE2_Divide:
+            case NI_AVX_Divide:
 #endif
+            {
+                return EvaluateBinarySimd(this, GT_DIV, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_DivideScalar:
+#else
+            case NI_SSE_DivideScalar:
+            case NI_SSE2_DivideScalar:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_DIV, /* scalar */ true, type, baseType, arg0VN, arg1VN);
+            }
+
             case NI_Vector128_GetElement:
-#ifdef TARGET_XARCH
+#ifdef TARGET_ARM64
+            case NI_Vector64_GetElement:
+#else
             case NI_Vector256_GetElement:
 #endif
             {
-                switch (TypeOfVN(arg0VN))
-                {
-                    case TYP_SIMD8:
-                    {
-                        return EvaluateSimdGetElement<simd8_t>(this, baseType, GetConstantSimd8(arg0VN),
-                                                               GetConstantInt32(arg1VN));
-                    }
+                return EvaluateSimdGetElement(this, type, baseType, arg0VN, GetConstantInt32(arg1VN));
+            }
 
-                    case TYP_SIMD12:
-                    {
-                        return EvaluateSimdGetElement<simd12_t>(this, baseType, GetConstantSimd12(arg0VN),
-                                                                GetConstantInt32(arg1VN));
-                    }
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_MultiplyByScalar:
+            case NI_AdvSimd_Arm64_MultiplyByScalar:
+            {
+                // MultiplyByScalar takes a vector as the second operand but only utilizes element 0
+                // We need to extract it and then functionally broadcast it up for the evaluation to
+                // work as expected.
 
-                    case TYP_SIMD16:
-                    {
-                        return EvaluateSimdGetElement<simd16_t>(this, baseType, GetConstantSimd16(arg0VN),
-                                                                GetConstantInt32(arg1VN));
-                    }
+                arg1VN = EvaluateSimdGetElement(this, type, baseType, arg1VN, 0);
+                FALLTHROUGH;
+            }
+#endif
 
-                    case TYP_SIMD32:
-                    {
-                        return EvaluateSimdGetElement<simd32_t>(this, baseType, GetConstantSimd32(arg0VN),
-                                                                GetConstantInt32(arg1VN));
-                    }
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Multiply:
+            case NI_AdvSimd_Arm64_Multiply:
+#else
+            case NI_SSE_Multiply:
+            case NI_SSE2_Multiply:
+            case NI_SSE2_MultiplyLow:
+            case NI_SSE41_MultiplyLow:
+            case NI_AVX_Multiply:
+            case NI_AVX2_MultiplyLow:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
 
-                    default:
-                    {
-                        unreached();
-                    }
-                }
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_MultiplyScalar:
+#else
+            case NI_SSE_MultiplyScalar:
+            case NI_SSE2_MultiplyScalar:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_MUL, /* scalar */ true, type, baseType, arg0VN, arg1VN);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Or:
+#else
+            case NI_SSE_Or:
+            case NI_SSE2_Or:
+            case NI_AVX_Or:
+            case NI_AVX2_Or:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_OR, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_ShiftLeftLogical:
+#else
+            case NI_SSE2_ShiftLeftLogical:
+            case NI_AVX2_ShiftLeftLogical:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_LSH, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_ShiftRightArithmetic:
+#else
+            case NI_SSE2_ShiftRightArithmetic:
+            case NI_AVX2_ShiftRightArithmetic:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_RSH, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_ShiftRightLogical:
+#else
+            case NI_SSE2_ShiftRightLogical:
+            case NI_AVX2_ShiftRightLogical:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ false, type, baseType, arg0VN, arg1VN);
             }
 
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_ShiftLeftLogicalScalar:
+            {
+                return EvaluateBinarySimd(this, GT_LSH, /* scalar */ true, type, baseType, arg0VN, arg1VN);
+            }
+
+            case NI_AdvSimd_ShiftRightArithmeticScalar:
+            {
+                return EvaluateBinarySimd(this, GT_RSH, /* scalar */ true, type, baseType, arg0VN, arg1VN);
+            }
+
+            case NI_AdvSimd_ShiftRightLogicalScalar:
+            {
+                return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ true, type, baseType, arg0VN, arg1VN);
+            }
+#endif // TARGET_ARM64
+
 #ifdef TARGET_ARM64
             case NI_AdvSimd_Subtract:
             case NI_AdvSimd_Arm64_Subtract:
@@ -6493,6 +7033,18 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types      type,
                 return EvaluateBinarySimd(this, GT_SUB, /* scalar */ true, type, baseType, arg0VN, arg1VN);
             }
 
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Xor:
+#else
+            case NI_SSE_Xor:
+            case NI_SSE2_Xor:
+            case NI_AVX_Xor:
+            case NI_AVX2_Xor:
+#endif
+            {
+                return EvaluateBinarySimd(this, GT_XOR, /* scalar */ false, type, baseType, arg0VN, arg1VN);
+            }
+
             default:
                 break;
         }
@@ -6511,14 +7063,213 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types      type,
             case NI_AVX2_Add:
 #endif
             {
-                // Handle `x + 0` and `0 + x`
+                if (varTypeIsFloating(baseType))
+                {
+                    // Not safe for floating-point when x == -0.0
+                    break;
+                }
+
+                // Handle `x + 0 == x` and `0 + x == x`
+                ValueNum zeroVN = VNZeroForType(type);
+
+                if (cnsVN == zeroVN)
+                {
+                    return argVN;
+                }
+                break;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_And:
+#else
+            case NI_SSE_And:
+            case NI_SSE2_And:
+            case NI_AVX_And:
+            case NI_AVX2_And:
+#endif
+            {
+                // Handle `x & 0 == 0` and `0 & x == 0`
+                ValueNum zeroVN = VNZeroForType(type);
+
+                if (cnsVN == zeroVN)
+                {
+                    return zeroVN;
+                }
+
+                // Handle `x & ~0 == x` and `~0 & x == x`
+                ValueNum allBitsVN = VNAllBitsForType(type);
+
+                if (cnsVN == allBitsVN)
+                {
+                    return argVN;
+                }
+                break;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_BitwiseClear:
+#else
+            case NI_SSE_AndNot:
+            case NI_SSE2_AndNot:
+            case NI_AVX_AndNot:
+            case NI_AVX2_AndNot:
+            {
+#ifdef TARGET_ARM64
+                if (cnsVN == arg0VN)
+                {
+                    // arm64 preserves the args, so we can only handle `x & ~cns`
+                    break;
+                }
+#else
+                if (cnsVN == arg1VN)
+                {
+                    // xarch swaps the args, so we can only handle `~cns & x`
+                    break;
+                }
+#endif
+
+                // Handle `x & ~0 == x`
+                ValueNum zeroVN = VNZeroForType(type);
+
+                if (cnsVN == zeroVN)
+                {
+                    return argVN;
+                }
+
+                // Handle `x & 0 == 0`
+                ValueNum allBitsVN = VNAllBitsForType(type);
+
+                if (cnsVN == allBitsVN)
+                {
+                    return zeroVN;
+                }
+                break;
+            }
+#endif
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Arm64_Divide:
+#else
+            case NI_SSE_Divide:
+            case NI_SSE2_Divide:
+            case NI_AVX_Divide:
+#endif
+            {
+                // Handle `x / 1 == x`.
+                // This is safe for all floats since we do not fault for sNaN
+                ValueNum oneVN;
+
+                if (varTypeIsSIMD(TypeOfVN(arg1VN)))
+                {
+                    oneVN = VNOneForSimdType(type, baseType);
+                }
+                else
+                {
+                    oneVN = VNOneForType(baseType);
+                }
+
+                if (arg1VN == oneVN)
+                {
+                    return arg0VN;
+                }
+                break;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Multiply:
+            case NI_AdvSimd_MultiplyByScalar:
+            case NI_AdvSimd_Arm64_Multiply:
+            case NI_AdvSimd_Arm64_MultiplyByScalar:
+#else
+            case NI_SSE_Multiply:
+            case NI_SSE2_Multiply:
+            case NI_SSE2_MultiplyLow:
+            case NI_SSE41_MultiplyLow:
+            case NI_AVX_Multiply:
+            case NI_AVX2_MultiplyLow:
+#endif
+            {
+                if (!varTypeIsFloating(baseType))
+                {
+                    // Handle `x * 0 == 0` and `0 * x == 0`
+                    // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf
+                    ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN));
+
+                    if (cnsVN == zeroVN)
+                    {
+                        return zeroVN;
+                    }
+                }
 
+                // Handle `x * 1 == x` and `1 * x == x`
+                // This is safe for all floats since we do not fault for sNaN
+                ValueNum oneVN;
+
+                if (varTypeIsSIMD(TypeOfVN(cnsVN)))
+                {
+                    oneVN = VNOneForSimdType(type, baseType);
+                }
+                else
+                {
+                    oneVN = VNOneForType(baseType);
+                }
+
+                if (cnsVN == oneVN)
+                {
+                    return argVN;
+                }
+                break;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Or:
+#else
+            case NI_SSE_Or:
+            case NI_SSE2_Or:
+            case NI_AVX_Or:
+            case NI_AVX2_Or:
+#endif
+            {
+                // Handle `x | 0 == x` and `0 | x == x`
                 ValueNum zeroVN = VNZeroForType(type);
 
                 if (cnsVN == zeroVN)
                 {
                     return argVN;
                 }
+
+                // Handle `x | ~0 == ~0` and `~0 | x== ~0`
+                ValueNum allBitsVN = VNAllBitsForType(type);
+
+                if (cnsVN == allBitsVN)
+                {
+                    return allBitsVN;
+                }
+                break;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_ShiftLeftLogical:
+            case NI_AdvSimd_ShiftRightArithmetic:
+            case NI_AdvSimd_ShiftRightLogical:
+#else
+            case NI_SSE2_ShiftLeftLogical:
+            case NI_SSE2_ShiftRightArithmetic:
+            case NI_SSE2_ShiftRightLogical:
+            case NI_AVX2_ShiftLeftLogical:
+            case NI_AVX2_ShiftRightArithmetic:
+            case NI_AVX2_ShiftRightLogical:
+#endif
+            {
+                // Handle `x <<  0 == x` and `0 <<  x == 0`
+                // Handle `x >>  0 == x` and `0 >>  x == 0`
+                // Handle `x >>> 0 == x` and `0 >>> x == 0`
+                ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN));
+
+                if (cnsVN == zeroVN)
+                {
+                    return (cnsVN == arg1VN) ? argVN : zeroVN;
+                }
                 break;
             }
 
@@ -6532,14 +7283,32 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types      type,
             case NI_AVX2_Subtract:
 #endif
             {
-                // Handle `x - 0`
-
-                if (cnsVN != arg1VN)
+                if (varTypeIsFloating(baseType))
                 {
-                    // This is `0 - x` which is `NEG(x)`
+                    // Not safe for floating-point when x == -0.0
                     break;
                 }
 
+                // Handle `x - 0 == x`
+                ValueNum zeroVN = VNZeroForType(type);
+
+                if (arg1VN == zeroVN)
+                {
+                    return argVN;
+                }
+                break;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Xor:
+#else
+            case NI_SSE_Xor:
+            case NI_SSE2_Xor:
+            case NI_AVX_Xor:
+            case NI_AVX2_Xor:
+#endif
+            {
+                // Handle `x | 0 == x` and `0 | x == x`
                 ValueNum zeroVN = VNZeroForType(type);
 
                 if (cnsVN == zeroVN)
@@ -6553,6 +7322,86 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types      type,
                 break;
         }
     }
+    else if (arg0VN == arg1VN)
+    {
+        switch (ni)
+        {
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_And:
+#else
+            case NI_SSE_And:
+            case NI_SSE2_And:
+            case NI_AVX_And:
+            case NI_AVX2_And:
+#endif
+            {
+                // Handle `x & x == x`
+                return arg0VN;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_BitwiseClear:
+#else
+            case NI_SSE_AndNot:
+            case NI_SSE2_AndNot:
+            case NI_AVX_AndNot:
+            case NI_AVX2_AndNot:
+            {
+                // Handle `x & ~x == 0`
+                return VNZeroForType(type);
+            }
+#endif
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Or:
+#else
+            case NI_SSE_Or:
+            case NI_SSE2_Or:
+            case NI_AVX_Or:
+            case NI_AVX2_Or:
+#endif
+            {
+                // Handle `x | x == x`
+                return arg0VN;
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Subtract:
+            case NI_AdvSimd_Arm64_Subtract:
+#else
+            case NI_SSE_Subtract:
+            case NI_SSE2_Subtract:
+            case NI_AVX_Subtract:
+            case NI_AVX2_Subtract:
+#endif
+            {
+                if (varTypeIsFloating(baseType))
+                {
+                    // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf
+                    break;
+                }
+
+                // Handle `x - x == 0`
+                return VNZeroForType(type);
+            }
+
+#ifdef TARGET_ARM64
+            case NI_AdvSimd_Xor:
+#else
+            case NI_SSE_Xor:
+            case NI_SSE2_Xor:
+            case NI_AVX_Xor:
+            case NI_AVX2_Xor:
+#endif
+            {
+                // Handle `x ^ x == 0`
+                return arg0VN;
+            }
+
+            default:
+                break;
+        }
+    }
 
     if (encodeResultType)
     {
diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h
index 5a122070ffbbf..b3dcc252a1332 100644
--- a/src/coreclr/jit/valuenum.h
+++ b/src/coreclr/jit/valuenum.h
@@ -341,6 +341,7 @@ class ValueNumStore
     template <typename T>
     static bool IsIntZero(T v);
 
+public:
     // Given an constant value number return its value.
     int GetConstantInt32(ValueNum argVN);
     INT64 GetConstantInt64(ValueNum argVN);
@@ -348,15 +349,13 @@ class ValueNumStore
     float GetConstantSingle(ValueNum argVN);
 
 #if defined(FEATURE_SIMD)
-public:
     simd8_t GetConstantSimd8(ValueNum argVN);
     simd12_t GetConstantSimd12(ValueNum argVN);
     simd16_t GetConstantSimd16(ValueNum argVN);
     simd32_t GetConstantSimd32(ValueNum argVN);
-
-private:
 #endif // FEATURE_SIMD
 
+private:
     // Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants.
     // Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second).
     // Assume that "CanEvalForConstantArgs(vnf)" is true.
@@ -521,7 +520,14 @@ class ValueNumStore
     // It returns NoVN for a "typ" that has no one value, such as TYP_REF.
     ValueNum VNOneForType(var_types typ);
 
+    // Returns the value number for AllBitsSet of the given "typ".
+    // It has an unreached() for a "typ" that has no all bits set value, such as TYP_VOID.
+    ValueNum VNAllBitsForType(var_types typ);
+
 #ifdef FEATURE_SIMD
+    // Returns the value number for one of the given "simdType" and "simdBaseType".
+    ValueNum VNOneForSimdType(var_types simdType, var_types simdBaseType);
+
     // A helper function for constructing VNF_SimdType VNs.
     ValueNum VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType);
 #endif // FEATURE_SIMD

From e80c008acbb47f9c5e765a837b02770df90c29a6 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Feb 2023 12:06:24 -0800
Subject: [PATCH 2/3] Adding tests for the new SIMD constant folding paths

---
 .../ConstantFolding/SimdConstantFoldings.cs   | 504 +++++++++++++++++-
 1 file changed, 498 insertions(+), 6 deletions(-)

diff --git a/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs b/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs
index 56aaec012146b..8ba3b863e2dee 100644
--- a/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs
+++ b/src/tests/JIT/HardwareIntrinsics/General/ConstantFolding/SimdConstantFoldings.cs
@@ -152,15 +152,15 @@ public static void SubtractTests()
         );
 
         Assert.Equal(
-            Vector128.Create((sbyte)(+0), -3, +0, +0, +00, +00, +00, +00, +00, +00, +00, +00, +00, +00, +00, +00),
-            Vector128.Create((sbyte)(+1), -2, +3, -4, +05, -06, +07, -08, +09, -10, +11, -12, +13, -14, +15, -16)
-          - Vector128.Create((sbyte)(+1), +1, +3, -4, +05, -06, +07, -08, +09, -10, +11, -12, +13, -14, +15, -16)
+            Vector128.Create((sbyte)(+0), -3, +0, +0, +0, +0, +0, +0, +0, +00, +00, +00, +00, +00, +00, +00),
+            Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+          - Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
         );
 
         Assert.Equal(
-            Vector128.Create((short)(+0), -3, +0, +0, +00, +00, +00, +00),
-            Vector128.Create((short)(+1), -2, +3, -4, +05, -06, +07, -08)
-          - Vector128.Create((short)(+1), +1, +3, -4, +05, -06, +07, -08)
+            Vector128.Create((short)(+0), -3, +0, +0, +0, +0, +0, +0),
+            Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8)
+          - Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8)
         );
 
         Assert.Equal(
@@ -241,4 +241,496 @@ public static void GetElementTests()
             Vector128.Create((double)(+1), -2).GetElement(1)
         );
     }
+
+    [Fact]
+    public static void NotTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0xFE), 0x01, 0xFC, 0x03, 0xFA, 0x05, 0xF8, 0x07, 0xF6, 0x09, 0xF4, 0x0B, 0xF2, 0x0D, 0xF0, 0x0F),
+           ~Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0xFFFE), 0x0001, 0xFFFC, 0x0003, 0xFFFA, 0x0005, 0xFFF8, 0x0007),
+           ~Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0xFFFF_FFFE), 0x0000_0001, 0xFFFF_FFFC, 0x0000_0003),
+           ~Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0xFFFF_FFFF_FFFF_FFFE), 0x0000_0000_0000_0001),
+           ~Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE)
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(-2), +1, -4, +3, -6, +5, -8, +7, -10, +9, -12, +11, -14, +13, -16, +15),
+           ~Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(-2), +1, -4, +3, -6, +5, -8, +7),
+           ~Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(-2), +1, -4, +3),
+           ~Vector128.Create((int)(+1), -2, +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(-2), +1),
+           ~Vector128.Create((long)(+1), -2)
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(-3.9999998f), +1.9999999f, -1.4999999f, +0.99999994f),
+           ~Vector128.Create((float)(+1),          -2,          +3,          -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(-3.9999999999999996), +1.9999999999999998),
+           ~Vector128.Create((double)(+1),                  -2)
+        );
+    }
+
+    [Fact]
+    public static void AndTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x01), 0x00, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0),
+            Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+          & Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0001), 0x0000, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8),
+            Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+          & Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0001), 0x0000_0000, 0x0000_0003, 0xFFFF_FFFC),
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC)
+          & Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0000),
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE)
+          & Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001)
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+1), +0, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16),
+            Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+          & Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+1), +0, +3, -4, +5, -6, +7, -8),
+            Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8)
+          & Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+1), +0, +3, -4),
+            Vector128.Create((int)(+1), -2, +3, -4)
+          & Vector128.Create((int)(+1), +1, +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+1), +0),
+            Vector128.Create((long)(+1), -2)
+          & Vector128.Create((long)(+1), +1)
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+1), +0, +3, -4),
+            Vector128.Create((float)(+1), -2, +3, -4)
+          & Vector128.Create((float)(+1), +1, +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+1), +0),
+            Vector128.Create((double)(+1), -2)
+          & Vector128.Create((double)(+1), +1)
+        );
+    }
+
+    [Fact]
+    public static void AndNotTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x00), 0xFE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+            Vector128.AndNot(
+                Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0),
+                Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0000), 0xFFFE, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000),
+            Vector128.AndNot(
+                Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8),
+                Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0000), 0xFFFF_FFFE, 0x0000_0000, 0x0000_0000),
+            Vector128.AndNot(
+                Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC),
+                Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0000), 0xFFFF_FFFF_FFFF_FFFE),
+            Vector128.AndNot(
+                Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE),
+                Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+0), -2, +0, +0, +0, +0, +0, +0, +0, +00, +00, +00, +00, +00, +00, +00),
+            Vector128.AndNot(
+                Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16),
+                Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+0), -2, +0, +0, +0, +0, +0, +0),
+            Vector128.AndNot(
+                Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8),
+                Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+0), -2, +0, +0),
+            Vector128.AndNot(
+                Vector128.Create((int)(+1), -2, +3, -4),
+                Vector128.Create((int)(+1), +1, +3, -4)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+0), -2),
+            Vector128.AndNot(
+                Vector128.Create((long)(+1), -2),
+                Vector128.Create((long)(+1), +1)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+0), -2, +0, +0),
+            Vector128.AndNot(
+                Vector128.Create((float)(+1), -2, +3, -4),
+                Vector128.Create((float)(+1), +1, +3, -4)
+            )
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+0), -2),
+            Vector128.AndNot(
+                Vector128.Create((double)(+1), -2),
+                Vector128.Create((double)(+1), +1)
+            )
+        );
+    }
+
+    [Fact]
+    public static void LeftShiftTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x02), 0xFC, 0x06, 0xF8, 0x0A, 0xF4, 0x0E, 0xF0, 0x12, 0xEC, 0x16, 0xE8, 0x1A, 0xE4, 0x1E, 0xE0),
+            Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0002), 0xFFFC, 0x0006, 0xFFF8, 0x000A, 0xFFF4, 0x000E, 0xFFF0),
+            Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0002), 0xFFFF_FFFC, 0x0000_0006, 0xFFFF_FFF8),
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0002), 0xFFFF_FFFF_FFFF_FFFC),
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+2), -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32),
+            Vector128.Create((sbyte)(+1), -2, +3, -4, +05, -06, +07, -08, +09, -10, +11, -12, +13, -14, +15, -16) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+2), -4, +6, -8, +10, -12, +14, -16),
+            Vector128.Create((short)(+1), -2, +3, -4, +05, -06, +07, -08) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+2), -4, +6, -8),
+            Vector128.Create((int)(+1), -2, +3, -4) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+2), -4),
+            Vector128.Create((long)(+1), -2) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+1.7014118E+38f), -0.0f, -1.1754944E-38f, -2.3509887E-38f),
+            Vector128.Create((float)(+1),              -2,    +3,              -4) << 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+8.98846567431158E+307), -0.0),
+            Vector128.Create((double)(+1),                     -2) << 1
+        );
+    }
+
+    [Fact]
+    public static void OrTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x01), 0xFF, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0),
+            Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+          | Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0001), 0xFFFF, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8),
+            Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+          | Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFF, 0x0000_0003, 0xFFFF_FFFC),
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC)
+          | Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFF),
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE)
+          | Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001)
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+1), -1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16),
+            Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+          | Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+1), -1, +3, -4, +5, -6, +7, -8),
+            Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8)
+          | Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+1), -1, +3, -4),
+            Vector128.Create((int)(+1), -2, +3, -4)
+          | Vector128.Create((int)(+1), +1, +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+1), -1),
+            Vector128.Create((long)(+1), -2)
+          | Vector128.Create((long)(+1), +1)
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+1), +float.NegativeInfinity, +3, -4),
+            Vector128.Create((float)(+1), -2,                      +3, -4)
+          | Vector128.Create((float)(+1), +1,                      +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+1), +double.NegativeInfinity),
+            Vector128.Create((double)(+1), -2)
+          | Vector128.Create((double)(+1), +1)
+        );
+    }
+
+    [Fact]
+    public static void RightShiftTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x00), 0x7F, 0x01, 0x7E, 0x02, 0x7D, 0x03, 0x7C, 0x04, 0x7B, 0x05, 0x7A, 0x06, 0x79, 0x07, 0x78),
+            Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0000), 0x7FFF, 0x0001, 0x7FFE, 0x0002, 0x7FFD, 0x0003, 0x7FFC),
+            Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0000), 0x7FFF_FFFF, 0x0000_0001, 0x7FFF_FFFE),
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0000), 0x7FFF_FFFF_FFFF_FFFF),
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+0), -1, +1, -2, +2, -3, +3, -4, +4, -05, +05, -06, +06, -07, +07, -08),
+            Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+0), -1, +1, -2, +2, -3, +3, -4),
+            Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+0), -1, +1, -2),
+            Vector128.Create((int)(+1), -2, +3, -4) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+0), -1),
+            Vector128.Create((long)(+1), -2) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+8.131516E-20f), -3.689349E+19f, +1.3552527E-19f, -5.5340232E+19f),
+            Vector128.Create((float)(+1),             -2,             +3,              -4) >> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+1.118751109680031E-154), -2.6815615859885194E+154),
+            Vector128.Create((double)(+1),                      -2) >> 1
+        );
+    }
+
+    [Fact]
+    public static void UnsignedRightShiftTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x00), 0x7F, 0x01, 0x7E, 0x02, 0x7D, 0x03, 0x7C, 0x04, 0x7B, 0x05, 0x7A, 0x06, 0x79, 0x07, 0x78),
+            Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0000), 0x7FFF, 0x0001, 0x7FFE, 0x0002, 0x7FFD, 0x0003, 0x7FFC),
+            Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0000), 0x7FFF_FFFF, 0x0000_0001, 0x7FFF_FFFE),
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0000), 0x7FFF_FFFF_FFFF_FFFF),
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+0), +127, +1, +126, +2, +125, +3, +124, +4, +123, +05, +122, +06, +121, +07, +120),
+            Vector128.Create((sbyte)(+1), -002, +3, -004, +5, -006, +7, -008, +9, -010, +11, -012, +13, -014, +15, -016) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+0), +32767, +1, +32766, +2, +32765, +3, +32764),
+            Vector128.Create((short)(+1), -00002, +3, -00004, +5, -00006, +7, -00008) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+0), +2147483647, +1, +2147483646),
+            Vector128.Create((int)(+1), -0000000002, +3, -0000000004) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+0), +9223372036854775807),
+            Vector128.Create((long)(+1), -0000000000000000002) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+8.131516E-20f), +3.689349E+19f, +1.3552527E-19f, +5.5340232E+19f),
+            Vector128.Create((float)(+1),             -2,             +3,              -4) >>> 1
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+1.118751109680031E-154), +2.6815615859885194E+154),
+            Vector128.Create((double)(+1),                      -2) >>> 1
+        );
+    }
+
+    [Fact]
+    public static void XorTests()
+    {
+        Assert.Equal(
+            Vector128.Create((byte)(0x00), 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+            Vector128.Create((byte)(0x01), 0xFE, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+          ^ Vector128.Create((byte)(0x01), 0x01, 0x03, 0xFC, 0x05, 0xFA, 0x07, 0xF8, 0x09, 0xF6, 0x0B, 0xF4, 0x0D, 0xF2, 0x0F, 0xF0)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ushort)(0x0000), 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000),
+            Vector128.Create((ushort)(0x0001), 0xFFFE, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+          ^ Vector128.Create((ushort)(0x0001), 0x0001, 0x0003, 0xFFFC, 0x0005, 0xFFFA, 0x0007, 0xFFF8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((uint)(0x0000_0000), 0xFFFF_FFFF, 0x0000_0000, 0x0000_0000),
+            Vector128.Create((uint)(0x0000_0001), 0xFFFF_FFFE, 0x0000_0003, 0xFFFF_FFFC)
+          ^ Vector128.Create((uint)(0x0000_0001), 0x0000_0001, 0x0000_0003, 0xFFFF_FFFC)
+        );
+
+        Assert.Equal(
+            Vector128.Create((ulong)(0x0000_0000_0000_0000), 0xFFFF_FFFF_FFFF_FFFF),
+            Vector128.Create((ulong)(0x0000_0000_0000_0001), 0xFFFF_FFFF_FFFF_FFFE)
+          ^ Vector128.Create((ulong)(0x0000_0000_0000_0001), 0x0000_0000_0000_0001)
+        );
+
+        Assert.Equal(
+            Vector128.Create((sbyte)(+0), -1, +0, +0, +0, +0, +0, +0, +0, +00, +00, +00, +00, +00, +00, +00),
+            Vector128.Create((sbyte)(+1), -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+          ^ Vector128.Create((sbyte)(+1), +1, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16)
+        );
+
+        Assert.Equal(
+            Vector128.Create((short)(+0), -1, +0, +0, +0, +0, +0, +0),
+            Vector128.Create((short)(+1), -2, +3, -4, +5, -6, +7, -8)
+          ^ Vector128.Create((short)(+1), +1, +3, -4, +5, -6, +7, -8)
+        );
+
+        Assert.Equal(
+            Vector128.Create((int)(+0), -1, +0, +0),
+            Vector128.Create((int)(+1), -2, +3, -4)
+          ^ Vector128.Create((int)(+1), +1, +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((long)(+0), -1),
+            Vector128.Create((long)(+1), -2)
+          ^ Vector128.Create((long)(+1), +1)
+        );
+
+        Assert.Equal(
+            Vector128.Create((float)(+0), +float.NegativeInfinity, +0, +0),
+            Vector128.Create((float)(+1), -2,                      +3, -4)
+          ^ Vector128.Create((float)(+1), +1,                      +3, -4)
+        );
+
+        Assert.Equal(
+            Vector128.Create((double)(+0), +double.NegativeInfinity),
+            Vector128.Create((double)(+1), -2)
+          ^ Vector128.Create((double)(+1), +1)
+        );
+    }
 }

From 2f9c0384ad0d7dcd2e6d002ec46a4db1973e581e Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Thu, 23 Feb 2023 05:04:52 -0800
Subject: [PATCH 3/3] Ensure bitcasting float/double is using well-defined
 behavior

---
 src/coreclr/jit/simd.h    | 20 ++++++------
 src/coreclr/jit/utils.cpp | 64 +++++++++++++++++++++++++++++++++++++++
 src/coreclr/jit/utils.h   |  8 +++++
 3 files changed, 82 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h
index f2207cb92ee44..c1a0dd629ed5f 100644
--- a/src/coreclr/jit/simd.h
+++ b/src/coreclr/jit/simd.h
@@ -169,17 +169,17 @@ TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
 template <>
 inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
 {
-    uint32_t arg0Bits   = *reinterpret_cast<uint32_t*>(&arg0);
+    uint32_t arg0Bits   = BitOperations::SingleToUInt32Bits(arg0);
     uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
-    return *reinterpret_cast<float*>(&resultBits);
+    return BitOperations::UInt32BitsToSingle(resultBits);
 }
 
 template <>
 inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
 {
-    uint64_t arg0Bits   = *reinterpret_cast<uint64_t*>(&arg0);
+    uint64_t arg0Bits   = BitOperations::DoubleToUInt64Bits(arg0);
     uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
-    return *reinterpret_cast<double*>(&resultBits);
+    return BitOperations::UInt64BitsToDouble(resultBits);
 }
 
 template <typename TBase>
@@ -397,21 +397,21 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
 template <>
 inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
 {
-    uint32_t arg0Bits = *reinterpret_cast<uint32_t*>(&arg0);
-    uint32_t arg1Bits = *reinterpret_cast<uint32_t*>(&arg1);
+    uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
+    uint32_t arg1Bits = BitOperations::SingleToUInt32Bits(arg1);
 
     uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
-    return *reinterpret_cast<float*>(&resultBits);
+    return BitOperations::UInt32BitsToSingle(resultBits);
 }
 
 template <>
 inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
 {
-    uint64_t arg0Bits = *reinterpret_cast<uint64_t*>(&arg0);
-    uint64_t arg1Bits = *reinterpret_cast<uint64_t*>(&arg1);
+    uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
+    uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits(arg1);
 
     uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
-    return *reinterpret_cast<double*>(&resultBits);
+    return BitOperations::UInt64BitsToDouble(resultBits);
 }
 
 template <typename TBase>
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index bf5181be76a0a..93bcf7435c853 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -2645,6 +2645,22 @@ uint32_t BitOperations::BitScanReverse(uint64_t value)
 #endif
 }
 
+//------------------------------------------------------------------------
+// BitOperations::DoubleToUInt64Bits: Gets the underlying bits for a double-precision floating-point value.
+//
+// Arguments:
+//    value - The number to convert
+//
+// Return Value:
+//    The underlying bits for value.
+//
+uint64_t BitOperations::DoubleToUInt64Bits(double value)
+{
+    uint64_t result;
+    memcpy(&result, &value, sizeof(double));
+    return result;
+}
+
 //------------------------------------------------------------------------
 // BitOperations::LeadingZeroCount: Count the number of leading zero bits in a mask.
 //
@@ -2932,6 +2948,22 @@ uint64_t BitOperations::RotateRight(uint64_t value, uint32_t offset)
     return (value >> (offset & 0x3F)) | (value << ((64 - offset) & 0x3F));
 }
 
+//------------------------------------------------------------------------
+// BitOperations::SingleToUInt32Bits: Gets the underlying bits for a single-precision floating-point value.
+//
+// Arguments:
+//    value - The number to convert
+//
+// Return Value:
+//    The underlying bits for value.
+//
+uint32_t BitOperations::SingleToUInt32Bits(float value)
+{
+    uint32_t result;
+    memcpy(&result, &value, sizeof(float));
+    return result;
+}
+
 //------------------------------------------------------------------------
 // BitOperations::TrailingZeroCount: Count the number of trailing zero bits in an integer value.
 //
@@ -2980,6 +3012,38 @@ uint32_t BitOperations::TrailingZeroCount(uint64_t value)
 #endif
 }
 
+//------------------------------------------------------------------------
+// BitOperations::UInt32BitsToSingle: Gets a single-precision floating-point from its underlying bit value.
+//
+// Arguments:
+//    value - The underlying bit value.
+//
+// Return Value:
+//    The single-precision floating-point from value.
+//
+float BitOperations::UInt32BitsToSingle(uint32_t value)
+{
+    float result;
+    memcpy(&result, &value, sizeof(uint32_t));
+    return result;
+}
+
+//------------------------------------------------------------------------
+// BitOperations::UInt64BitsToDouble: Gets a double-precision floating-point from its underlying bit value.
+//
+// Arguments:
+//    value - The underlying bit value.
+//
+// Return Value:
+//    The double-precision floating-point from value.
+//
+double BitOperations::UInt64BitsToDouble(uint64_t value)
+{
+    double result;
+    memcpy(&result, &value, sizeof(uint64_t));
+    return result;
+}
+
 namespace MagicDivide
 {
 template <int TableBase = 0, int TableSize, typename Magic>
diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h
index 6f661eef4d15c..d78000bfb7d26 100644
--- a/src/coreclr/jit/utils.h
+++ b/src/coreclr/jit/utils.h
@@ -751,6 +751,8 @@ class BitOperations
 
     static uint32_t BitScanReverse(uint64_t value);
 
+    static uint64_t DoubleToUInt64Bits(double value);
+
     static uint32_t LeadingZeroCount(uint32_t value);
 
     static uint32_t LeadingZeroCount(uint64_t value);
@@ -775,9 +777,15 @@ class BitOperations
 
     static uint64_t RotateRight(uint64_t value, uint32_t offset);
 
+    static uint32_t SingleToUInt32Bits(float value);
+
     static uint32_t TrailingZeroCount(uint32_t value);
 
     static uint32_t TrailingZeroCount(uint64_t value);
+
+    static float UInt32BitsToSingle(uint32_t value);
+
+    static double UInt64BitsToDouble(uint64_t value);
 };
 
 // The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but