diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp
index 56d8a1bca6c1f..49a2cdaa9b34b 100644
--- a/src/coreclr/jit/fgbasic.cpp
+++ b/src/coreclr/jit/fgbasic.cpp
@@ -1403,7 +1403,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
                             case NI_Vector128_Create:
                             case NI_Vector128_CreateScalar:
                             case NI_Vector128_CreateScalarUnsafe:
-                            case NI_VectorT_CreateBroadcast:
 #if defined(TARGET_XARCH)
                             case NI_BMI1_TrailingZeroCount:
                             case NI_BMI1_X64_TrailingZeroCount:
@@ -1647,8 +1646,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
 #endif // TARGET_ARM64
                             case NI_Vector128_get_AllBitsSet:
                             case NI_Vector128_get_One:
-                            case NI_VectorT_get_AllBitsSet:
-                            case NI_VectorT_get_One:
 #if defined(TARGET_XARCH)
                             case NI_Vector256_get_AllBitsSet:
                             case NI_Vector256_get_One:
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 949d611519e4c..000def1e8f1eb 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -25510,20 +25510,48 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
     {
         assert(IsBaselineVector512IsaSupportedDebugOnly());
         GenTree* op1Dup = fgMakeMultiUse(&op1);
-        op1             = gtNewSimdGetUpperNode(TYP_SIMD32, op1, simdBaseJitType, simdSize);
-        op1Dup          = gtNewSimdGetLowerNode(TYP_SIMD32, op1Dup, simdBaseJitType, simdSize);
-        simdSize        = simdSize / 2;
-        op1             = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD32, op1, op1Dup, simdBaseJitType, simdSize);
+
+        op1    = gtNewSimdGetLowerNode(TYP_SIMD32, op1, simdBaseJitType, simdSize);
+        op1Dup = gtNewSimdGetUpperNode(TYP_SIMD32, op1Dup, simdBaseJitType, simdSize);
+
+        if (varTypeIsFloating(simdBaseType))
+        {
+            // We need to ensure deterministic results which requires
+            // consistently adding values together. Since many operations
+            // end up operating on 128-bit lanes, we break sum the same way.
+
+            op1    = gtNewSimdSumNode(type, op1, simdBaseJitType, 32);
+            op1Dup = gtNewSimdSumNode(type, op1Dup, simdBaseJitType, 32);
+
+            return gtNewOperNode(GT_ADD, type, op1, op1Dup);
+        }
+
+        simdSize = 32;
+        op1      = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD32, op1, op1Dup, simdBaseJitType, 32);
     }
 
     if (simdSize == 32)
     {
         assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
         GenTree* op1Dup = fgMakeMultiUse(&op1);
-        op1             = gtNewSimdGetUpperNode(TYP_SIMD16, op1, simdBaseJitType, simdSize);
-        op1Dup          = gtNewSimdGetLowerNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize);
-        simdSize        = simdSize / 2;
-        op1             = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Dup, simdBaseJitType, simdSize);
+
+        op1    = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize);
+        op1Dup = gtNewSimdGetUpperNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize);
+
+        if (varTypeIsFloating(simdBaseType))
+        {
+            // We need to ensure deterministic results which requires
+            // consistently adding values together. Since many operations
+            // end up operating on 128-bit lanes, we break sum the same way.
+
+            op1    = gtNewSimdSumNode(type, op1, simdBaseJitType, 16);
+            op1Dup = gtNewSimdSumNode(type, op1Dup, simdBaseJitType, 16);
+
+            return gtNewOperNode(GT_ADD, type, op1, op1Dup);
+        }
+
+        simdSize = 16;
+        op1      = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Dup, simdBaseJitType, 16);
     }
 
     assert(simdSize == 16);
@@ -25534,6 +25562,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
         {
             assert(compIsaSupportedDebugOnly(InstructionSet_SSE2));
             GenTree* op1Shuffled = fgMakeMultiUse(&op1);
+
             if (compOpportunisticallyDependsOn(InstructionSet_AVX))
             {
                 assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
@@ -25571,6 +25600,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
         {
             assert(compIsaSupportedDebugOnly(InstructionSet_SSE2));
             GenTree* op1Shuffled = fgMakeMultiUse(&op1);
+
             if (compOpportunisticallyDependsOn(InstructionSet_AVX))
             {
                 assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index 095f31246d0c6..b4eb52ab53410 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -534,39 +534,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
     switch (intrinsic)
     {
-        case NI_Vector64_Abs:
-        case NI_Vector128_Abs:
-        {
-            assert(sig->numArgs == 1);
-            op1     = impSIMDPopStack();
-            retNode = gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector64_op_Addition:
-        case NI_Vector128_op_Addition:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector64_AndNot:
-        case NI_Vector128_AndNot:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_AND_NOT, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_AsVector:
         {
             assert(!sig->hasThis());
@@ -682,30 +649,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector64_op_BitwiseAnd:
-        case NI_Vector128_op_BitwiseAnd:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector64_op_BitwiseOr:
-        case NI_Vector128_op_BitwiseOr:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector64_Ceiling:
         case NI_Vector128_Ceiling:
         {
@@ -1062,32 +1005,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector64_op_Division:
-        case NI_Vector128_op_Division:
-        {
-            assert(sig->numArgs == 2);
-
-            if (!varTypeIsFloating(simdBaseType))
-            {
-                // We can't trivially handle division for integral types using SIMD
-                break;
-            }
-
-            CORINFO_ARG_LIST_HANDLE arg1     = sig->args;
-            CORINFO_ARG_LIST_HANDLE arg2     = info.compCompHnd->getArgNext(arg1);
-            var_types               argType  = TYP_UNKNOWN;
-            CORINFO_CLASS_HANDLE    argClass = NO_CLASS_HANDLE;
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
-            op2     = getArgForHWIntrinsic(argType, argClass);
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
-            op1     = getArgForHWIntrinsic(argType, argClass);
-
-            retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector64_Dot:
         case NI_Vector128_Dot:
         {
@@ -1653,32 +1570,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector64_op_Multiply:
-        case NI_Vector128_op_Multiply:
-        {
-            assert(sig->numArgs == 2);
-
-            if (varTypeIsLong(simdBaseType))
-            {
-                // TODO-ARM64-CQ: We should support long/ulong multiplication.
-                break;
-            }
-
-            CORINFO_ARG_LIST_HANDLE arg1     = sig->args;
-            CORINFO_ARG_LIST_HANDLE arg2     = info.compCompHnd->getArgNext(arg1);
-            var_types               argType  = TYP_UNKNOWN;
-            CORINFO_CLASS_HANDLE    argClass = NO_CLASS_HANDLE;
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
-            op2     = getArgForHWIntrinsic(argType, argClass);
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
-            op1     = getArgForHWIntrinsic(argType, argClass);
-
-            retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector64_MultiplyAddEstimate:
         case NI_Vector128_MultiplyAddEstimate:
         {
@@ -1725,15 +1616,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector64_op_OnesComplement:
-        case NI_Vector128_op_OnesComplement:
-        {
-            assert(sig->numArgs == 1);
-            op1     = impSIMDPopStack();
-            retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector64_op_Inequality:
         case NI_Vector128_op_Inequality:
         {
@@ -1747,55 +1629,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector64_op_Subtraction:
-        case NI_Vector128_op_Subtraction:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector64_op_LeftShift:
-        case NI_Vector128_op_LeftShift:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impPopStack().val;
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector64_op_RightShift:
-        case NI_Vector128_op_RightShift:
-        {
-            assert(sig->numArgs == 2);
-            genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH;
-
-            op2 = impPopStack().val;
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector64_op_UnsignedRightShift:
-        case NI_Vector128_op_UnsignedRightShift:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impPopStack().val;
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector64_Shuffle:
         case NI_Vector128_Shuffle:
         {
@@ -2205,18 +2038,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector64_op_ExclusiveOr:
-        case NI_Vector128_op_ExclusiveOr:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_AdvSimd_LoadVector64x2AndUnzip:
         case NI_AdvSimd_LoadVector64x3AndUnzip:
         case NI_AdvSimd_LoadVector64x4AndUnzip:
diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h
index d3c880f38e0fd..69e1abf8f1eae 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64.h
@@ -15,8 +15,6 @@
 //                                                                                                                          {TYP_BYTE,           TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector64 Intrinsics
-HARDWARE_INTRINSIC(Vector64,      Abs,                                                               8,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector64,      AndNot,                                                            8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      Ceiling,                                                           8,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      ConditionalSelect,                                                 8,      3,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      ConvertToDouble,                                                   8,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
@@ -62,18 +60,10 @@ HARDWARE_INTRINSIC(Vector64,      Max,
 HARDWARE_INTRINSIC(Vector64,      Min,                                                               8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      MultiplyAddEstimate,                                               8,      3,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      Narrow,                                                            8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector64,      op_Addition,                                                       8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector64,      op_BitwiseAnd,                                                     8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(Vector64,      op_BitwiseOr,                                                      8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(Vector64,      op_Division,                                                       8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      op_Equality,                                                       8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
-HARDWARE_INTRINSIC(Vector64,      op_ExclusiveOr,                                                    8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      op_Inequality,                                                     8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
 HARDWARE_INTRINSIC(Vector64,      op_LeftShift,                                                      8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector64,      op_Multiply,                                                       8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector64,      op_OnesComplement,                                                 8,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      op_RightShift,                                                     8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector64,      op_Subtraction,                                                    8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      op_UnaryNegation,                                                  8,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      op_UnsignedRightShift,                                             8,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector64,      Shuffle,                                                           8,     -1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
@@ -95,8 +85,6 @@ HARDWARE_INTRINSIC(Vector64,      WithElement,
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector128 Intrinsics
 
-HARDWARE_INTRINSIC(Vector128,     Abs,                                                              16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,     AndNot,                                                           16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     AsVector,                                                         16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,     AsVector2,                                                        16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,     AsVector3,                                                        16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov,            INS_invalid},     HW_Category_SIMD,                  HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
@@ -148,19 +136,11 @@ HARDWARE_INTRINSIC(Vector128,     Max,
 HARDWARE_INTRINSIC(Vector128,     Min,                                                              16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     MultiplyAddEstimate,                                              16,      3,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     Narrow,                                                           16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,     op_Addition,                                                      16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,     op_BitwiseAnd,                                                    16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(Vector128,     op_BitwiseOr,                                                     16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(Vector128,     op_Division,                                                      16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     op_Equality,                                                      16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
-HARDWARE_INTRINSIC(Vector128,     op_ExclusiveOr,                                                   16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     op_Inequality,                                                    16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
 HARDWARE_INTRINSIC(Vector128,     op_LeftShift,                                                     16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     op_RightShift,                                                    16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     op_UnsignedRightShift,                                            16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,     op_Multiply,                                                      16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,     op_OnesComplement,                                                16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,     op_Subtraction,                                                   16,      2,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     op_UnaryNegation,                                                 16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,     Shuffle,                                                          16,     -1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Sqrt,                                                             16,      1,     false,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_InvalidNodeId)
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 578e50f76821c..3f508d9b2d0be 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -29,8 +29,6 @@
 //                                                                                                              {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector128 Intrinsics
-HARDWARE_INTRINSIC(Vector128,       Abs,                                        16,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       AndNot,                                     16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       AsVector,                                   16,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,       AsVector2,                                  16,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movsd_simd,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,       AsVector3,                                  16,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movups,             INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
@@ -80,18 +78,10 @@ HARDWARE_INTRINSIC(Vector128,       Max,
 HARDWARE_INTRINSIC(Vector128,       Min,                                        16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       MultiplyAddEstimate,                        16,             3,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       Narrow,                                     16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_Addition,                                16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_BitwiseAnd,                              16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_BitwiseOr,                               16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_Division,                                16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       op_Equality,                                16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
-HARDWARE_INTRINSIC(Vector128,       op_ExclusiveOr,                             16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       op_Inequality,                              16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
 HARDWARE_INTRINSIC(Vector128,       op_LeftShift,                               16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_Multiply,                                16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_OnesComplement,                          16,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       op_RightShift,                              16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector128,       op_Subtraction,                             16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       op_UnaryNegation,                           16,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       op_UnsignedRightShift,                      16,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector128,       Shuffle,                                    16,            -1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
@@ -113,8 +103,6 @@ HARDWARE_INTRINSIC(Vector128,       WithElement,
 //                                                                                                              {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector256 Intrinsics
-HARDWARE_INTRINSIC(Vector256,       Abs,                                        32,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector256,       AndNot,                                     32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
 HARDWARE_INTRINSIC(Vector256,       AsVector,                                   32,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
 HARDWARE_INTRINSIC(Vector256,       AsVector256,                                32,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
 HARDWARE_INTRINSIC(Vector256,       Ceiling,                                    32,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
@@ -164,18 +152,10 @@ HARDWARE_INTRINSIC(Vector256,       Max,
 HARDWARE_INTRINSIC(Vector256,       Min,                                        32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector256,       MultiplyAddEstimate,                        32,             3,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector256,       Narrow,                                     32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector256,       op_Addition,                                32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector256,       op_BitwiseAnd,                              32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
-HARDWARE_INTRINSIC(Vector256,       op_BitwiseOr,                               32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
-HARDWARE_INTRINSIC(Vector256,       op_Division,                                32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector256,       op_Equality,                                32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
-HARDWARE_INTRINSIC(Vector256,       op_ExclusiveOr,                             32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
 HARDWARE_INTRINSIC(Vector256,       op_Inequality,                              32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
 HARDWARE_INTRINSIC(Vector256,       op_LeftShift,                               32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector256,       op_Multiply,                                32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector256,       op_OnesComplement,                          32,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
 HARDWARE_INTRINSIC(Vector256,       op_RightShift,                              32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector256,       op_Subtraction,                             32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector256,       op_UnaryNegation,                           32,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector256,       op_UnsignedRightShift,                      32,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector256,       Shuffle,                                    32,            -1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
@@ -198,8 +178,6 @@ HARDWARE_INTRINSIC(Vector256,       WithUpper,
 //                                                                                                              {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector512 Intrinsics
-HARDWARE_INTRINSIC(Vector512,       Abs,                                        64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       AndNot,                                     64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       AsVector,                                   64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       AsVector512,                                64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector512,       ConditionalSelect,                          64,             3,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -249,18 +227,10 @@ HARDWARE_INTRINSIC(Vector512,       Max,
 HARDWARE_INTRINSIC(Vector512,       Min,                                        64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       MultiplyAddEstimate,                        64,             3,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       Narrow,                                     64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_Addition,                                64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_BitwiseAnd,                              64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_BitwiseOr,                               64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_Division,                                64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       op_Equality,                                64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
-HARDWARE_INTRINSIC(Vector512,       op_ExclusiveOr,                             64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       op_Inequality,                              64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
 HARDWARE_INTRINSIC(Vector512,       op_LeftShift,                               64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_Multiply,                                64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_OnesComplement,                          64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       op_RightShift,                              64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
-HARDWARE_INTRINSIC(Vector512,       op_Subtraction,                             64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       op_UnaryNegation,                           64,             1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       op_UnsignedRightShift,                      64,             2,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_InvalidNodeId)
 HARDWARE_INTRINSIC(Vector512,       Shuffle,                                    64,            -1,      false,  {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 917c0c9d95641..9bf7ab94a62db 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -996,54 +996,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
     switch (intrinsic)
     {
-        case NI_Vector128_Abs:
-        case NI_Vector256_Abs:
-        case NI_Vector512_Abs:
-        {
-            assert(sig->numArgs == 1);
-
-            if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || varTypeIsUnsigned(simdBaseType) ||
-                compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                op1     = impSIMDPopStack();
-                retNode = gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize);
-            }
-            break;
-        }
-
-        case NI_Vector128_op_Addition:
-        case NI_Vector256_op_Addition:
-        case NI_Vector512_op_Addition:
-        {
-            assert(sig->numArgs == 2);
-
-            if ((simdSize != 32) || varTypeIsFloating(simdBaseType) ||
-                compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                op2 = impSIMDPopStack();
-                op1 = impSIMDPopStack();
-
-                retNode = gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize);
-            }
-            break;
-        }
-
-        case NI_Vector128_AndNot:
-        case NI_Vector256_AndNot:
-        case NI_Vector512_AndNot:
-        {
-            assert(sig->numArgs == 2);
-
-            impSpillSideEffect(true,
-                               verCurrentState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic"));
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_AND_NOT, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_AsVector:
         {
             assert(sig->numArgs == 1);
@@ -1276,32 +1228,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_op_BitwiseAnd:
-        case NI_Vector256_op_BitwiseAnd:
-        case NI_Vector512_op_BitwiseAnd:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
-        case NI_Vector128_op_BitwiseOr:
-        case NI_Vector256_op_BitwiseOr:
-        case NI_Vector512_op_BitwiseOr:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_Ceiling:
         case NI_Vector256_Ceiling:
         case NI_Vector512_Ceiling:
@@ -1842,33 +1768,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_op_Division:
-        case NI_Vector256_op_Division:
-        case NI_Vector512_op_Division:
-        {
-            assert(sig->numArgs == 2);
-
-            if (!varTypeIsFloating(simdBaseType))
-            {
-                // We can't trivially handle division for integral types using SIMD
-                break;
-            }
-
-            CORINFO_ARG_LIST_HANDLE arg1     = sig->args;
-            CORINFO_ARG_LIST_HANDLE arg2     = info.compCompHnd->getArgNext(arg1);
-            var_types               argType  = TYP_UNKNOWN;
-            CORINFO_CLASS_HANDLE    argClass = NO_CLASS_HANDLE;
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
-            op2     = getArgForHWIntrinsic(argType, argClass);
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
-            op1     = getArgForHWIntrinsic(argType, argClass);
-
-            retNode = gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_Dot:
         case NI_Vector256_Dot:
         {
@@ -2540,51 +2439,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_op_Multiply:
-        case NI_Vector256_op_Multiply:
-        case NI_Vector512_op_Multiply:
-        {
-            assert(sig->numArgs == 2);
-
-            if ((simdSize == 32) && !varTypeIsFloating(simdBaseType) &&
-                !compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2
-                break;
-            }
-
-            assert(simdSize != 64 || IsBaselineVector512IsaSupportedDebugOnly());
-
-            if (varTypeIsLong(simdBaseType))
-            {
-                if (simdSize != 64 && !compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
-                {
-                    // TODO-XARCH-CQ: We should support long/ulong multiplication
-                    break;
-                }
-                // else if simdSize == 64 then above assert would check if baseline isa supported
-
-#if defined(TARGET_X86)
-                // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
-                break;
-#endif // TARGET_X86
-            }
-
-            CORINFO_ARG_LIST_HANDLE arg1     = sig->args;
-            CORINFO_ARG_LIST_HANDLE arg2     = info.compCompHnd->getArgNext(arg1);
-            var_types               argType  = TYP_UNKNOWN;
-            CORINFO_CLASS_HANDLE    argClass = NO_CLASS_HANDLE;
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
-            op2     = getArgForHWIntrinsic(argType, argClass);
-
-            argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
-            op1     = getArgForHWIntrinsic(argType, argClass);
-
-            retNode = gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_MultiplyAddEstimate:
         case NI_Vector256_MultiplyAddEstimate:
         case NI_Vector512_MultiplyAddEstimate:
@@ -2647,16 +2501,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_op_OnesComplement:
-        case NI_Vector256_op_OnesComplement:
-        case NI_Vector512_op_OnesComplement:
-        {
-            assert(sig->numArgs == 1);
-            op1     = impSIMDPopStack();
-            retNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_Vector128_op_Inequality:
         case NI_Vector256_op_Inequality:
         {
@@ -2692,94 +2536,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_op_Subtraction:
-        case NI_Vector256_op_Subtraction:
-        case NI_Vector512_op_Subtraction:
-        {
-            assert(sig->numArgs == 2);
-
-            if ((simdSize != 32) || varTypeIsFloating(simdBaseType) ||
-                compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                op2 = impSIMDPopStack();
-                op1 = impSIMDPopStack();
-
-                retNode = gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize);
-            }
-            break;
-        }
-
-        case NI_Vector128_op_LeftShift:
-        case NI_Vector256_op_LeftShift:
-        case NI_Vector512_op_LeftShift:
-        {
-            assert(sig->numArgs == 2);
-
-            if (varTypeIsByte(simdBaseType))
-            {
-                // byte and sbyte would require more work to support
-                break;
-            }
-
-            if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                op2 = impPopStack().val;
-                op1 = impSIMDPopStack();
-
-                retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize);
-            }
-            break;
-        }
-
-        case NI_Vector128_op_RightShift:
-        case NI_Vector256_op_RightShift:
-        case NI_Vector512_op_RightShift:
-        {
-            assert(sig->numArgs == 2);
-
-            if (varTypeIsByte(simdBaseType))
-            {
-                // byte and sbyte would require more work to support
-                break;
-            }
-
-            if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
-            {
-                if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
-                {
-                    // long, ulong, and double would require more work to support
-                    break;
-                }
-            }
-
-            if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH;
-
-                op2 = impPopStack().val;
-                op1 = impSIMDPopStack();
-
-                retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize);
-            }
-            break;
-        }
-
-        case NI_Vector128_op_UnsignedRightShift:
-        case NI_Vector256_op_UnsignedRightShift:
-        case NI_Vector512_op_UnsignedRightShift:
-        {
-            assert(sig->numArgs == 2);
-
-            if ((simdSize != 32) || compOpportunisticallyDependsOn(InstructionSet_AVX2))
-            {
-                op2 = impPopStack().val;
-                op1 = impSIMDPopStack();
-
-                retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize);
-            }
-            break;
-        }
-
         case NI_Vector128_Shuffle:
         case NI_Vector256_Shuffle:
         case NI_Vector512_Shuffle:
@@ -3219,19 +2975,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_op_ExclusiveOr:
-        case NI_Vector256_op_ExclusiveOr:
-        case NI_Vector512_op_ExclusiveOr:
-        {
-            assert(sig->numArgs == 2);
-
-            op2 = impSIMDPopStack();
-            op1 = impSIMDPopStack();
-
-            retNode = gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize);
-            break;
-        }
-
         case NI_X86Base_Pause:
         case NI_X86Serialize_Serialize:
         {
diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
index c7ae69f4ca1e2..2b88668399a1a 100644
--- a/src/coreclr/jit/simdashwintrinsic.cpp
+++ b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -514,37 +514,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
     switch (intrinsic)
     {
-        case NI_VectorT_ConvertToInt32Native:
-        {
-            if (BlockNonDeterministicIntrinsics(mustExpand))
-            {
-                return nullptr;
-            }
-            break;
-        }
-
-        case NI_VectorT_ConvertToInt64Native:
-        case NI_VectorT_ConvertToUInt32Native:
-        case NI_VectorT_ConvertToUInt64Native:
-        {
-            if (BlockNonDeterministicIntrinsics(mustExpand))
-            {
-                return nullptr;
-            }
-
-#if defined(TARGET_XARCH)
-            if (!IsBaselineVector512IsaSupportedOpportunistically())
-            {
-                return nullptr;
-            }
-#endif // TARGET_XARCH
-
-            break;
-        }
-
         case NI_Vector2_MultiplyAddEstimate:
         case NI_Vector3_MultiplyAddEstimate:
-        case NI_VectorT_MultiplyAddEstimate:
         {
             if (BlockNonDeterministicIntrinsics(mustExpand))
             {
@@ -554,149 +525,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
         }
 
 #if defined(TARGET_XARCH)
-        case NI_VectorT_ConvertToDouble:
-        {
-            if (IsBaselineVector512IsaSupportedOpportunistically())
-            {
-                break;
-            }
-            return nullptr;
-        }
-
-        case NI_VectorT_ConvertToInt32:
-        {
-            if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
-            {
-                break;
-            }
-            return nullptr;
-        }
-
-        case NI_VectorT_ConvertToInt64:
-        case NI_VectorT_ConvertToUInt32:
-        case NI_VectorT_ConvertToUInt64:
-        {
-            if (IsBaselineVector512IsaSupportedOpportunistically())
-            {
-                break;
-            }
-            return nullptr;
-        }
-
-        case NI_VectorT_ConvertToSingle:
-        {
-            if ((simdBaseType == TYP_INT) ||
-                (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()))
-            {
-                break;
-            }
-            return nullptr;
-        }
-#endif // TARGET_XARCH
-
-#if defined(TARGET_X86)
-        case NI_VectorT_CreateBroadcast:
-        {
-            if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->IsIntegralConst())
-            {
-                // TODO-XARCH-CQ: It may be beneficial to emit the movq
-                // instruction, which takes a 64-bit memory address and
-                // works on 32-bit x86 systems.
-                return nullptr;
-            }
-            break;
-        }
-#endif // TARGET_X86
-
-        case NI_VectorT_CreateSequence:
-        {
-            if (varTypeIsLong(simdBaseType) && !impStackTop(0).val->OperIsConst())
-            {
-#if defined(TARGET_XARCH)
-                if (!compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
-                {
-                    // TODO-XARCH-CQ: We should support long/ulong multiplication
-                    return nullptr;
-                }
-#endif // TARGET_XARCH
-
-#if defined(TARGET_X86) || defined(TARGET_ARM64)
-                // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
-                // TODO-ARM64-CQ: We should support long/ulong multiplication.
-                return nullptr;
-#endif // TARGET_X86 || TARGET_ARM64
-            }
-            break;
-        }
-
-#if defined(TARGET_XARCH)
-        case NI_VectorT_GetElement:
-        {
-            op2 = impStackTop(0).val;
-
-            switch (simdBaseType)
-            {
-                case TYP_BYTE:
-                case TYP_UBYTE:
-                case TYP_INT:
-                case TYP_UINT:
-                case TYP_LONG:
-                case TYP_ULONG:
-                {
-                    bool useToScalar = op2->IsIntegralConst(0);
-
-#if defined(TARGET_X86)
-                    useToScalar &= !varTypeIsLong(simdBaseType);
-#endif // TARGET_X86
-
-                    if (!useToScalar && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
-                    {
-                        // Using software fallback if simdBaseType is not supported by hardware
-                        return nullptr;
-                    }
-                    break;
-                }
-
-                case TYP_DOUBLE:
-                case TYP_FLOAT:
-                case TYP_SHORT:
-                case TYP_USHORT:
-                {
-                    // short/ushort/float/double is supported by SSE2
-                    break;
-                }
-
-                default:
-                {
-                    unreached();
-                }
-            }
-            break;
-        }
-#endif // TARGET_XARCH
-
-#if defined(TARGET_XARCH)
-        case NI_VectorT_Dot:
-        {
-            if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT))
-            {
-                if (!compOpportunisticallyDependsOn(InstructionSet_SSE41))
-                {
-                    // TODO-XARCH-CQ: We can support 32-bit integers if we updating multiplication
-                    // to be lowered rather than imported as the relevant operations.
-                    return nullptr;
-                }
-            }
-            else
-            {
-                assert(varTypeIsShort(simdBaseType) || varTypeIsFloating(simdBaseType));
-            }
-            break;
-        }
-
         case NI_Vector2_WithElement:
         case NI_Vector3_WithElement:
-        case NI_VectorT_WithElement:
         {
             assert(sig->numArgs == 3);
             GenTree* indexOp = impStackTop(1).val;
@@ -761,24 +591,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 #endif // TARGET_XARCH
 
 #if defined(TARGET_ARM64)
-        case NI_VectorT_LoadAligned:
-        case NI_VectorT_LoadAlignedNonTemporal:
-        case NI_VectorT_StoreAligned:
-        case NI_VectorT_StoreAlignedNonTemporal:
-        {
-            if (opts.OptimizationDisabled())
-            {
-                // ARM64 doesn't have aligned loads/stores, but aligned simd ops are only validated
-                // to be aligned when optimizations are disable, so only skip the intrinsic handling
-                // if optimizations are enabled
-                return nullptr;
-            }
-            break;
-        }
-
         case NI_Vector2_WithElement:
         case NI_Vector3_WithElement:
-        case NI_VectorT_WithElement:
         {
             assert(numArgs == 3);
             GenTree* indexOp = impStackTop(1).val;
@@ -803,21 +617,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
         }
 #endif
 
-#if defined(TARGET_XARCH)
-        case NI_VectorT_Floor:
-        case NI_VectorT_Ceiling:
-        {
-            if (!compOpportunisticallyDependsOn(InstructionSet_SSE41))
-            {
-                return nullptr;
-            }
-            break;
-        }
-#endif // TARGET_XARCH
-
         case NI_Vector2_FusedMultiplyAdd:
         case NI_Vector3_FusedMultiplyAdd:
-        case NI_VectorT_FusedMultiplyAdd:
         {
             bool isFmaAccelerated = false;
 
@@ -834,41 +635,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
             break;
         }
 
-#if defined(TARGET_XARCH)
-        case NI_VectorT_op_Multiply:
-        {
-            if (varTypeIsLong(simdBaseType))
-            {
-                if (!compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
-                {
-                    // TODO-XARCH-CQ: We should support long/ulong multiplication
-                    return nullptr;
-                }
-
-#if defined(TARGET_X86)
-                // TODO-XARCH-CQ: We need to support 64-bit CreateBroadcast
-                return nullptr;
-#endif // TARGET_X86
-            }
-            break;
-        }
-#endif // TARGET_XARCH
-
-#if defined(TARGET_XARCH)
-        case NI_VectorT_op_RightShift:
-        {
-            if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
-            {
-                if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
-                {
-                    // TODO-XARCH-CQ: We should support long/ulong arithmetic shift
-                    return nullptr;
-                }
-            }
-            break;
-        }
-#endif // TARGET_XARCH
-
         default:
         {
             // Most intrinsics have some path that works even if only SSE2/AdvSimd is available
@@ -881,38 +647,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
     switch (numArgs)
     {
-        case 0:
-        {
-            assert(newobjThis == nullptr);
-
-            switch (intrinsic)
-            {
-                case NI_VectorT_get_AllBitsSet:
-                {
-                    return gtNewAllBitsSetConNode(retType);
-                }
-
-                case NI_VectorT_get_Indices:
-                {
-                    assert(sig->numArgs == 0);
-                    return gtNewSimdGetIndicesNode(retType, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_get_One:
-                {
-                    return gtNewOneConNode(retType, simdBaseType);
-                }
-
-                default:
-                {
-                    // Some platforms warn about unhandled switch cases
-                    // We handle it more generally via the assert and nullptr return below.
-                    break;
-                }
-            }
-            break;
-        }
-
         case 1:
         {
             assert(newobjThis == nullptr);
@@ -925,238 +659,22 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
             {
                 case NI_Vector2_Abs:
                 case NI_Vector3_Abs:
-                case NI_VectorT_Abs:
                 {
                     return gtNewSimdAbsNode(retType, op1, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_Ceiling:
-                {
-                    return gtNewSimdCeilNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_Floor:
-                {
-                    return gtNewSimdFloorNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LoadUnsafe:
-                {
-                    if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op1 = op1->gtGetOp1();
-                    }
-
-                    return gtNewSimdLoadNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LoadAligned:
-                {
-                    if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op1 = op1->gtGetOp1();
-                    }
-
-                    return gtNewSimdLoadAlignedNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LoadAlignedNonTemporal:
-                {
-                    if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op1 = op1->gtGetOp1();
-                    }
-
-                    return gtNewSimdLoadNonTemporalNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_op_UnaryNegation:
                 case NI_Vector3_op_UnaryNegation:
-                case NI_VectorT_op_UnaryNegation:
                 {
                     return gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_op_OnesComplement:
-                {
-                    return gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_Sqrt:
                 case NI_Vector3_Sqrt:
-                case NI_VectorT_Sqrt:
                 {
                     return gtNewSimdSqrtNode(retType, op1, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_Sum:
-                {
-                    return gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ToScalar:
-                {
-#if defined(TARGET_X86)
-                    if (varTypeIsLong(simdBaseType))
-                    {
-                        op2 = gtNewIconNode(0);
-                        return gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize);
-                    }
-#endif // TARGET_X86
-
-                    return gtNewSimdToScalarNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_WidenLower:
-                {
-                    return gtNewSimdWidenLowerNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_WidenUpper:
-                {
-                    return gtNewSimdWidenUpperNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
-#if defined(TARGET_XARCH)
-                case NI_VectorT_ConvertToDouble:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(varTypeIsLong(simdBaseType));
-                    NamedIntrinsic intrinsic = NI_Illegal;
-                    if (simdSize == 64)
-                    {
-                        intrinsic = NI_AVX512DQ_ConvertToVector512Double;
-                    }
-                    else if (simdSize == 32)
-                    {
-                        intrinsic = NI_AVX512DQ_VL_ConvertToVector256Double;
-                    }
-                    else
-                    {
-                        assert(simdSize == 16);
-                        intrinsic = NI_AVX512DQ_VL_ConvertToVector128Double;
-                    }
-                    return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToSingle:
-                {
-                    assert(varTypeIsInt(simdBaseType));
-                    NamedIntrinsic intrinsic = NI_Illegal;
-                    if (simdBaseType == TYP_INT)
-                    {
-                        switch (simdSize)
-                        {
-                            case 16:
-                                intrinsic = NI_SSE2_ConvertToVector128Single;
-                                break;
-                            case 32:
-                                intrinsic = NI_AVX_ConvertToVector256Single;
-                                break;
-                            case 64:
-                                intrinsic = NI_AVX512F_ConvertToVector512Single;
-                                break;
-                            default:
-                                unreached();
-                        }
-                    }
-                    else if (simdBaseType == TYP_UINT)
-                    {
-                        switch (simdSize)
-                        {
-                            case 16:
-                                intrinsic = NI_AVX512F_VL_ConvertToVector128Single;
-                                break;
-                            case 32:
-                                intrinsic = NI_AVX512F_VL_ConvertToVector256Single;
-                                break;
-                            case 64:
-                                intrinsic = NI_AVX512F_ConvertToVector512Single;
-                                break;
-                            default:
-                                unreached();
-                        }
-                    }
-                    assert(intrinsic != NI_Illegal);
-                    return gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
-                }
-#elif defined(TARGET_ARM64)
-                case NI_VectorT_ConvertToDouble:
-                {
-                    assert((simdBaseType == TYP_LONG) || (simdBaseType == TYP_ULONG));
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_Arm64_ConvertToDouble, simdBaseJitType,
-                                                    simdSize);
-                }
-
-                case NI_VectorT_ConvertToSingle:
-                {
-                    assert((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT));
-                    return gtNewSimdHWIntrinsicNode(retType, op1, NI_AdvSimd_ConvertToSingle, simdBaseJitType,
-                                                    simdSize);
-                }
-#else
-#error Unsupported platform
-#endif // !TARGET_XARCH && !TARGET_ARM64
-
-                case NI_VectorT_ConvertToInt32:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_FLOAT);
-                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToInt32Native:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_FLOAT);
-                    return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToInt64:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_DOUBLE);
-                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToInt64Native:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_DOUBLE);
-                    return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToUInt32:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_FLOAT);
-                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToUInt32Native:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_FLOAT);
-                    return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToUInt64:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_DOUBLE);
-                    return gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_ConvertToUInt64Native:
-                {
-                    assert(sig->numArgs == 1);
-                    assert(simdBaseType == TYP_DOUBLE);
-                    return gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize);
-                }
-
                 default:
                 {
                     // Some platforms warn about unhandled switch cases
@@ -1201,29 +719,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
             {
                 case NI_Vector2_op_Addition:
                 case NI_Vector3_op_Addition:
-                case NI_VectorT_op_Addition:
                 {
                     return gtNewSimdBinOpNode(GT_ADD, retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_AndNot:
-                {
-                    return gtNewSimdBinOpNode(GT_AND_NOT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_op_BitwiseAnd:
-                {
-                    return gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_op_BitwiseOr:
-                {
-                    return gtNewSimdBinOpNode(GT_OR, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_CreateBroadcast:
                 case NI_Vector3_CreateBroadcast:
-                case NI_VectorT_CreateBroadcast:
                 {
                     assert(retType == TYP_VOID);
 
@@ -1232,223 +733,57 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     break;
                 }
 
-                case NI_VectorT_CreateSequence:
-                {
-                    return gtNewSimdCreateSequenceNode(simdType, op1, op2, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_op_Division:
                 case NI_Vector3_op_Division:
-                case NI_VectorT_op_Division:
                 {
                     return gtNewSimdBinOpNode(GT_DIV, retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
                 case NI_Vector2_Dot:
                 case NI_Vector3_Dot:
-                case NI_VectorT_Dot:
                 {
                     op1 = gtNewSimdDotProdNode(simdType, op1, op2, simdBaseJitType, simdSize);
                     return gtNewSimdGetElementNode(retType, op1, gtNewIconNode(0), simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_Equals:
-                {
-                    return gtNewSimdCmpOpNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_op_Equality:
                 case NI_Vector3_op_Equality:
-                case NI_VectorT_op_Equality:
                 {
                     return gtNewSimdCmpOpAllNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_EqualsAny:
-                {
-                    return gtNewSimdCmpOpAnyNode(GT_EQ, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_op_ExclusiveOr:
-                {
-                    return gtNewSimdBinOpNode(GT_XOR, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_GetElement:
                 case NI_Vector3_GetElement:
-                case NI_VectorT_GetElement:
                 {
                     return gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_GreaterThan:
-                {
-                    return gtNewSimdCmpOpNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_GreaterThanAll:
-                {
-                    return gtNewSimdCmpOpAllNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_GreaterThanAny:
-                {
-                    return gtNewSimdCmpOpAnyNode(GT_GT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_GreaterThanOrEqual:
-                {
-                    return gtNewSimdCmpOpNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_GreaterThanOrEqualAll:
-                {
-                    return gtNewSimdCmpOpAllNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_GreaterThanOrEqualAny:
-                {
-                    return gtNewSimdCmpOpAnyNode(GT_GE, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_op_Inequality:
                 case NI_Vector3_op_Inequality:
-                case NI_VectorT_op_Inequality:
                 {
                     return gtNewSimdCmpOpAnyNode(GT_NE, retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_LessThan:
-                {
-                    return gtNewSimdCmpOpNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LessThanAll:
-                {
-                    return gtNewSimdCmpOpAllNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LessThanAny:
-                {
-                    return gtNewSimdCmpOpAnyNode(GT_LT, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LessThanOrEqual:
-                {
-                    return gtNewSimdCmpOpNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LessThanOrEqualAll:
-                {
-                    return gtNewSimdCmpOpAllNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LessThanOrEqualAny:
-                {
-                    return gtNewSimdCmpOpAnyNode(GT_LE, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_LoadUnsafeIndex:
-                {
-                    GenTree* tmp;
-
-                    if (op1->OperIs(GT_CAST) && op1->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op1 = op1->gtGetOp1();
-                    }
-
-                    tmp = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet());
-                    op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, tmp);
-                    op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2);
-
-                    return gtNewSimdLoadNode(retType, op1, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_Max:
                 case NI_Vector3_Max:
-                case NI_VectorT_Max:
                 {
                     return gtNewSimdMaxNode(retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
                 case NI_Vector2_Min:
                 case NI_Vector3_Min:
-                case NI_VectorT_Min:
                 {
                     return gtNewSimdMinNode(retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
                 case NI_Vector2_op_Multiply:
                 case NI_Vector3_op_Multiply:
-                case NI_VectorT_op_Multiply:
                 {
                     return gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_Narrow:
-                {
-                    return gtNewSimdNarrowNode(retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_op_LeftShift:
-                {
-                    return gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_op_RightShift:
-                {
-                    genTreeOps op = varTypeIsUnsigned(simdBaseType) ? GT_RSZ : GT_RSH;
-                    return gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_op_UnsignedRightShift:
-                {
-                    return gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_StoreUnsafe:
-                {
-                    assert(retType == TYP_VOID);
-
-                    if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op2 = op2->gtGetOp1();
-                    }
-
-                    return gtNewSimdStoreNode(op2, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_StoreAligned:
-                {
-                    assert(retType == TYP_VOID);
-
-                    if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op2 = op2->gtGetOp1();
-                    }
-
-                    return gtNewSimdStoreAlignedNode(op2, op1, simdBaseJitType, simdSize);
-                }
-
-                case NI_VectorT_StoreAlignedNonTemporal:
-                {
-                    assert(retType == TYP_VOID);
-
-                    if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op2 = op2->gtGetOp1();
-                    }
-
-                    return gtNewSimdStoreNonTemporalNode(op2, op1, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_op_Subtraction:
                 case NI_Vector3_op_Subtraction:
-                case NI_VectorT_op_Subtraction:
                 {
                     return gtNewSimdBinOpNode(GT_SUB, retType, op1, op2, simdBaseJitType, simdSize);
                 }
@@ -1504,14 +839,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
             switch (intrinsic)
             {
-                case NI_VectorT_ConditionalSelect:
-                {
-                    return gtNewSimdCndSelNode(retType, op1, op2, op3, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_FusedMultiplyAdd:
                 case NI_Vector3_FusedMultiplyAdd:
-                case NI_VectorT_FusedMultiplyAdd:
                 {
                     return gtNewSimdFmaNode(retType, op1, op2, op3, simdBaseJitType, simdSize);
                 }
@@ -1558,7 +887,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
                 case NI_Vector2_MultiplyAddEstimate:
                 case NI_Vector3_MultiplyAddEstimate:
-                case NI_VectorT_MultiplyAddEstimate:
                 {
                     bool isFmaAccelerated = false;
 
@@ -1577,24 +905,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     return gtNewSimdBinOpNode(GT_ADD, retType, mulNode, op3, simdBaseJitType, simdSize);
                 }
 
-                case NI_VectorT_StoreUnsafeIndex:
-                {
-                    assert(retType == TYP_VOID);
-                    GenTree* tmp;
-
-                    if (op2->OperIs(GT_CAST) && op2->gtGetOp1()->TypeIs(TYP_BYREF))
-                    {
-                        // If what we have is a BYREF, that's what we really want, so throw away the cast.
-                        op2 = op2->gtGetOp1();
-                    }
-
-                    tmp = gtNewIconNode(genTypeSize(simdBaseType), op3->TypeGet());
-                    op3 = gtNewOperNode(GT_MUL, op3->TypeGet(), op3, tmp);
-                    op2 = gtNewOperNode(GT_ADD, op2->TypeGet(), op2, op3);
-
-                    return gtNewSimdStoreNode(op2, op1, simdBaseJitType, simdSize);
-                }
-
                 case NI_Vector2_Create:
                 {
                     assert(retType == TYP_VOID);
@@ -1678,7 +988,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
                 case NI_Vector2_WithElement:
                 case NI_Vector3_WithElement:
-                case NI_VectorT_WithElement:
                 {
                     return gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseJitType, simdSize);
                 }
@@ -1779,6 +1088,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
             }
             break;
         }
+
+        default:
+        {
+            break;
+        }
     }
 
     if (copyBlkDst != nullptr)
diff --git a/src/coreclr/jit/simdashwintrinsiclistarm64.h b/src/coreclr/jit/simdashwintrinsiclistarm64.h
index 3f73df38f13dd..b10b15d347220 100644
--- a/src/coreclr/jit/simdashwintrinsiclistarm64.h
+++ b/src/coreclr/jit/simdashwintrinsiclistarm64.h
@@ -83,81 +83,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3,     op_UnaryNegation,
 SIMD_AS_HWINTRINSIC_NM(Vector3,     Sqrt,                       "SquareRoot",               1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector3_Sqrt,                                NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     WithElement,                                            3,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Vector3_WithElement,                         NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 
-// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                     ISA          ID                          Name                        NumArg                                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                                                   Flags
-//                                                                                                     {TYP_BYTE,                                      TYP_UBYTE,                                      TYP_SHORT,                                      TYP_USHORT,                                     TYP_INT,                                        TYP_UINT,                                       TYP_LONG,                                       TYP_ULONG,                                      TYP_FLOAT,                                      TYP_DOUBLE}
-// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//  Vector<T> Intrinsics
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Abs,                                                    1,         {NI_VectorT_Abs,                                NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs,                                 NI_VectorT_Abs},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     AndNot,                                                 2,         {NI_VectorT_AndNot,                             NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot,                              NI_VectorT_AndNot},                             SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Ceiling,                                                1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_Ceiling,                             NI_VectorT_Ceiling},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConditionalSelect,                                      3,         {NI_VectorT_ConditionalSelect,                  NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect,                   NI_VectorT_ConditionalSelect},                  SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToDouble,                                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToDouble,                     NI_VectorT_ConvertToDouble,                     NI_Illegal,                                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt32,                                         1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToInt32,                      NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt32Native,                                   1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToInt32Native,                NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt64,                                         1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToInt64},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt64Native,                                   1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToInt64Native},               SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToSingle,                                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToSingle,                     NI_VectorT_ConvertToSingle,                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt32,                                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToUInt32,                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt32Native,                                  1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToUInt32Native,               NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt64,                                        1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToUInt64},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt64Native,                                  1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_ConvertToUInt64Native},              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     CreateBroadcast,            ".ctor",                    2,         {NI_VectorT_CreateBroadcast,                    NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast,                     NI_VectorT_CreateBroadcast},                    SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     CreateSequence,                                         2,         {NI_VectorT_CreateSequence,                     NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence,                      NI_VectorT_CreateSequence},                     SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Dot,                                                    2,         {NI_VectorT_Dot,                                NI_VectorT_Dot,                                 NI_VectorT_Dot,                                 NI_VectorT_Dot,                                 NI_VectorT_Dot,                                 NI_VectorT_Dot,                                 NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_Dot,                                 NI_VectorT_Dot},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Equals,                                                 2,         {NI_VectorT_Equals,                             NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals,                              NI_VectorT_Equals},                             SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     EqualsAny,                                              2,         {NI_VectorT_EqualsAny,                          NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny,                           NI_VectorT_EqualsAny},                          SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Floor,                                                  1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_Floor,                               NI_VectorT_Floor},                              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     FusedMultiplyAdd,                                       3,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_FusedMultiplyAdd,                    NI_VectorT_FusedMultiplyAdd},                   SimdAsHWIntrinsicFlag::SpillSideEffectsOp1 | SimdAsHWIntrinsicFlag::SpillSideEffectsOp2)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     get_AllBitsSet,                                         0,         {NI_VectorT_get_AllBitsSet,                     NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet,                      NI_VectorT_get_AllBitsSet},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     get_Indices,                                            0,         {NI_VectorT_get_Indices,                        NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices,                         NI_VectorT_get_Indices},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     get_One,                                                0,         {NI_VectorT_get_One,                            NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One,                             NI_VectorT_get_One},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GetElement,                                             2,         {NI_VectorT_GetElement,                         NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement,                          NI_VectorT_GetElement},                         SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThan,                                            2,         {NI_VectorT_GreaterThan,                        NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan,                         NI_VectorT_GreaterThan},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanAll,                                         2,         {NI_VectorT_GreaterThanAll,                     NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll,                      NI_VectorT_GreaterThanAll},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanAny,                                         2,         {NI_VectorT_GreaterThanAny,                     NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny,                      NI_VectorT_GreaterThanAny},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanOrEqual,                                     2,         {NI_VectorT_GreaterThanOrEqual,                 NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual,                  NI_VectorT_GreaterThanOrEqual},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanOrEqualAll,                                  2,         {NI_VectorT_GreaterThanOrEqualAll,              NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll,               NI_VectorT_GreaterThanOrEqualAll},              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanOrEqualAny,                                  2,         {NI_VectorT_GreaterThanOrEqualAny,              NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny,               NI_VectorT_GreaterThanOrEqualAny},              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThan,                                               2,         {NI_VectorT_LessThan,                           NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan,                            NI_VectorT_LessThan},                           SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanAll,                                            2,         {NI_VectorT_LessThanAll,                        NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll,                         NI_VectorT_LessThanAll},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanAny,                                            2,         {NI_VectorT_LessThanAny,                        NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny,                         NI_VectorT_LessThanAny},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanOrEqual,                                        2,         {NI_VectorT_LessThanOrEqual,                    NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual,                     NI_VectorT_LessThanOrEqual},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanOrEqualAll,                                     2,         {NI_VectorT_LessThanOrEqualAll,                 NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll,                  NI_VectorT_LessThanOrEqualAll},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanOrEqualAny,                                     2,         {NI_VectorT_LessThanOrEqualAny,                 NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny,                  NI_VectorT_LessThanOrEqualAny},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LoadAligned,                                            1,         {NI_VectorT_LoadAligned,                        NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned,                         NI_VectorT_LoadAligned},                        SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LoadAlignedNonTemporal,                                 1,         {NI_VectorT_LoadAlignedNonTemporal,             NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal,              NI_VectorT_LoadAlignedNonTemporal},             SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LoadUnsafe,                                             1,         {NI_VectorT_LoadUnsafe,                         NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe,                          NI_VectorT_LoadUnsafe},                         SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     LoadUnsafeIndex,            "LoadUnsafe",               2,         {NI_VectorT_LoadUnsafeIndex,                    NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex,                     NI_VectorT_LoadUnsafeIndex},                    SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Max,                                                    2,         {NI_VectorT_Max,                                NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max,                                 NI_VectorT_Max},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Min,                                                    2,         {NI_VectorT_Min,                                NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min,                                 NI_VectorT_Min},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     MultiplyAddEstimate,                                    3,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_MultiplyAddEstimate,                 NI_VectorT_MultiplyAddEstimate},                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Narrow,                                                 2,         {NI_VectorT_Narrow,                             NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow,                              NI_VectorT_Narrow},                             SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Addition,                                            2,         {NI_VectorT_op_Addition,                        NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition,                         NI_VectorT_op_Addition},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_BitwiseAnd,                                          2,         {NI_VectorT_op_BitwiseAnd,                      NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd,                       NI_VectorT_op_BitwiseAnd},                      SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_BitwiseOr,                                           2,         {NI_VectorT_op_BitwiseOr,                       NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr,                        NI_VectorT_op_BitwiseOr},                       SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Division,                                            2,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_op_Division,                         NI_VectorT_op_Division},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Equality,                                            2,         {NI_VectorT_op_Equality,                        NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality,                         NI_VectorT_op_Equality},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_ExclusiveOr,                                         2,         {NI_VectorT_op_ExclusiveOr,                     NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr,                      NI_VectorT_op_ExclusiveOr},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Inequality,                                          2,         {NI_VectorT_op_Inequality,                      NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality,                       NI_VectorT_op_Inequality},                      SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_LeftShift,                                           2,         {NI_VectorT_op_LeftShift,                       NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift,                        NI_VectorT_op_LeftShift},                       SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Multiply,                                            2,         {NI_VectorT_op_Multiply,                        NI_VectorT_op_Multiply,                         NI_VectorT_op_Multiply,                         NI_VectorT_op_Multiply,                         NI_VectorT_op_Multiply,                         NI_VectorT_op_Multiply,                         NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_op_Multiply,                         NI_VectorT_op_Multiply},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_OnesComplement,                                      1,         {NI_VectorT_op_OnesComplement,                  NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement,                   NI_VectorT_op_OnesComplement},                  SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_RightShift,                                          2,         {NI_VectorT_op_RightShift,                      NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift,                       NI_VectorT_op_RightShift},                      SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Subtraction,                                         2,         {NI_VectorT_op_Subtraction,                     NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction,                      NI_VectorT_op_Subtraction},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_UnaryNegation,                                       1,         {NI_VectorT_op_UnaryNegation,                   NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation,                    NI_VectorT_op_UnaryNegation},                   SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_UnsignedRightShift,                                  2,         {NI_VectorT_op_UnsignedRightShift,              NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift,               NI_VectorT_op_UnsignedRightShift},              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     Sqrt,                       "SquareRoot",               1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_VectorT_Sqrt,                                NI_VectorT_Sqrt},                               SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     StoreAligned,                                           2,         {NI_VectorT_StoreAligned,                       NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned,                        NI_VectorT_StoreAligned},                       SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     StoreAlignedNonTemporal,                                2,         {NI_VectorT_StoreAlignedNonTemporal,            NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal,             NI_VectorT_StoreAlignedNonTemporal},            SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     StoreUnsafe,                                            2,         {NI_VectorT_StoreUnsafe,                        NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe,                         NI_VectorT_StoreUnsafe},                        SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     StoreUnsafeIndex,           "StoreUnsafe",              3,         {NI_VectorT_StoreUnsafeIndex,                   NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex,                    NI_VectorT_StoreUnsafeIndex},                   SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Sum,                                                    1,         {NI_VectorT_Sum,                                NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum,                                 NI_VectorT_Sum},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ToScalar,                                               1,         {NI_VectorT_ToScalar,                           NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar,                            NI_VectorT_ToScalar},                           SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     WidenLower,                                             1,         {NI_VectorT_WidenLower,                         NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower,                          NI_VectorT_WidenLower},                         SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     WidenUpper,                                             1,         {NI_VectorT_WidenUpper,                         NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper,                          NI_VectorT_WidenUpper},                         SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     WithElement,                                            3,         {NI_VectorT_WithElement,                        NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement,                         NI_VectorT_WithElement},                        SimdAsHWIntrinsicFlag::None)
-
 #undef SIMD_AS_HWINTRINSIC_NM
 #undef SIMD_AS_HWINTRINSIC_ID
 
diff --git a/src/coreclr/jit/simdashwintrinsiclistxarch.h b/src/coreclr/jit/simdashwintrinsiclistxarch.h
index f119a4e051d59..a70045bbd8f7f 100644
--- a/src/coreclr/jit/simdashwintrinsiclistxarch.h
+++ b/src/coreclr/jit/simdashwintrinsiclistxarch.h
@@ -83,81 +83,6 @@ SIMD_AS_HWINTRINSIC_ID(Vector3,     op_UnaryNegation,
 SIMD_AS_HWINTRINSIC_NM(Vector3,     Sqrt,                       "SquareRoot",               1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_Sqrt,                            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(Vector3,     WithElement,                                            3,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Vector3_WithElement,                     NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 
-// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                     ISA          ID                          Name                        NumArg                                                                                                                                                                                                      Instructions                                                                                                                                                                                                                                           Flags
-//                                                                                                     {TYP_BYTE,                                  TYP_UBYTE,                                  TYP_SHORT,                                  TYP_USHORT,                                 TYP_INT,                                    TYP_UINT,                                   TYP_LONG,                                   TYP_ULONG,                                  TYP_FLOAT,                                  TYP_DOUBLE}
-// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//  Vector<T> Intrinsics
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Abs,                                                    1,         {NI_VectorT_Abs,                            NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs,                             NI_VectorT_Abs},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     AndNot,                                                 2,         {NI_VectorT_AndNot,                         NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot,                          NI_VectorT_AndNot},                         SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Ceiling,                                                1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_Ceiling,                         NI_VectorT_Ceiling},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConditionalSelect,                                      3,         {NI_VectorT_ConditionalSelect,              NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect,               NI_VectorT_ConditionalSelect},              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToDouble,                                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToDouble,                 NI_VectorT_ConvertToDouble,                 NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt32,                                         1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToInt32,                  NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt32Native,                                   1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToInt32Native,            NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt64,                                         1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToInt64},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToInt64Native,                                   1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToInt64Native},           SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToSingle,                                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToSingle,                 NI_VectorT_ConvertToSingle,                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt32,                                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToUInt32,                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt32Native,                                  1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToUInt32Native,           NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt64,                                        1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToUInt64},                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ConvertToUInt64Native,                                  1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_ConvertToUInt64Native},          SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     CreateBroadcast,            ".ctor",                    2,         {NI_VectorT_CreateBroadcast,                NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast,                 NI_VectorT_CreateBroadcast},                SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     CreateSequence,                                         2,         {NI_VectorT_CreateSequence,                 NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence,                  NI_VectorT_CreateSequence},                 SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Dot,                                                    2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT_Dot,                             NI_VectorT_Dot,                             NI_VectorT_Dot,                             NI_VectorT_Dot,                             NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_Dot,                             NI_VectorT_Dot},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Equals,                                                 2,         {NI_VectorT_Equals,                         NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals,                          NI_VectorT_Equals},                         SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     EqualsAny,                                              2,         {NI_VectorT_EqualsAny,                      NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny,                       NI_VectorT_EqualsAny},                      SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Floor,                                                  1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_Floor,                           NI_VectorT_Floor},                          SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     FusedMultiplyAdd,                                       3,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_FusedMultiplyAdd,                NI_VectorT_FusedMultiplyAdd},               SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     get_AllBitsSet,                                         0,         {NI_VectorT_get_AllBitsSet,                 NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet,                  NI_VectorT_get_AllBitsSet},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     get_Indices,                                            0,         {NI_VectorT_get_Indices,                    NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices,                     NI_VectorT_get_Indices},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     get_One,                                                0,         {NI_VectorT_get_One,                        NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One,                         NI_VectorT_get_One},                        SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GetElement,                                             2,         {NI_VectorT_GetElement,                     NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement,                      NI_VectorT_GetElement},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThan,                                            2,         {NI_VectorT_GreaterThan,                    NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan,                     NI_VectorT_GreaterThan},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanAll,                                         2,         {NI_VectorT_GreaterThanAll,                 NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll,                  NI_VectorT_GreaterThanAll},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanAny,                                         2,         {NI_VectorT_GreaterThanAny,                 NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny,                  NI_VectorT_GreaterThanAny},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanOrEqual,                                     2,         {NI_VectorT_GreaterThanOrEqual,             NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual,              NI_VectorT_GreaterThanOrEqual},             SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanOrEqualAll,                                  2,         {NI_VectorT_GreaterThanOrEqualAll,          NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll,           NI_VectorT_GreaterThanOrEqualAll},          SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     GreaterThanOrEqualAny,                                  2,         {NI_VectorT_GreaterThanOrEqualAny,          NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny,           NI_VectorT_GreaterThanOrEqualAny},          SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThan,                                               2,         {NI_VectorT_LessThan,                       NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan,                        NI_VectorT_LessThan},                       SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanAll,                                            2,         {NI_VectorT_LessThanAll,                    NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll,                     NI_VectorT_LessThanAll},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanAny,                                            2,         {NI_VectorT_LessThanAny,                    NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny,                     NI_VectorT_LessThanAny},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanOrEqual,                                        2,         {NI_VectorT_LessThanOrEqual,                NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual,                 NI_VectorT_LessThanOrEqual},                SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanOrEqualAll,                                     2,         {NI_VectorT_LessThanOrEqualAll,             NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll,              NI_VectorT_LessThanOrEqualAll},             SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LessThanOrEqualAny,                                     2,         {NI_VectorT_LessThanOrEqualAny,             NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny,              NI_VectorT_LessThanOrEqualAny},             SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LoadAligned,                                            1,         {NI_VectorT_LoadAligned,                    NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned,                     NI_VectorT_LoadAligned},                    SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LoadAlignedNonTemporal,                                 1,         {NI_VectorT_LoadAlignedNonTemporal,         NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal,          NI_VectorT_LoadAlignedNonTemporal},         SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     LoadUnsafe,                                             1,         {NI_VectorT_LoadUnsafe,                     NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe,                      NI_VectorT_LoadUnsafe},                     SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     LoadUnsafeIndex,            "LoadUnsafe",               2,         {NI_VectorT_LoadUnsafeIndex,                NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex,                 NI_VectorT_LoadUnsafeIndex},                SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Max,                                                    2,         {NI_VectorT_Max,                            NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max,                             NI_VectorT_Max},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Min,                                                    2,         {NI_VectorT_Min,                            NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min,                             NI_VectorT_Min},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     MultiplyAddEstimate,                                    3,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_MultiplyAddEstimate,             NI_VectorT_MultiplyAddEstimate},            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Narrow,                                                 2,         {NI_VectorT_Narrow,                         NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow,                          NI_VectorT_Narrow},                         SimdAsHWIntrinsicFlag::KeepBaseTypeFromRet)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Addition,                                            2,         {NI_VectorT_op_Addition,                    NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition,                     NI_VectorT_op_Addition},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_BitwiseAnd,                                          2,         {NI_VectorT_op_BitwiseAnd,                  NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd,                   NI_VectorT_op_BitwiseAnd},                  SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_BitwiseOr,                                           2,         {NI_VectorT_op_BitwiseOr,                   NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr,                    NI_VectorT_op_BitwiseOr},                   SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Division,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_op_Division,                     NI_VectorT_op_Division},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Equality,                                            2,         {NI_VectorT_op_Equality,                    NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality,                     NI_VectorT_op_Equality},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_ExclusiveOr,                                         2,         {NI_VectorT_op_ExclusiveOr,                 NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr,                  NI_VectorT_op_ExclusiveOr},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Inequality,                                          2,         {NI_VectorT_op_Inequality,                  NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality,                   NI_VectorT_op_Inequality},                  SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_LeftShift,                                           2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift,                    NI_VectorT_op_LeftShift},                   SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Multiply,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT_op_Multiply,                     NI_VectorT_op_Multiply,                     NI_VectorT_op_Multiply,                     NI_VectorT_op_Multiply,                     NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_op_Multiply,                     NI_VectorT_op_Multiply},                    SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_OnesComplement,                                      2,         {NI_VectorT_op_OnesComplement,              NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement,               NI_VectorT_op_OnesComplement},              SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_RightShift,                                          2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift,                   NI_VectorT_op_RightShift},                  SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_Subtraction,                                         2,         {NI_VectorT_op_Subtraction,                 NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction,                  NI_VectorT_op_Subtraction},                 SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_UnaryNegation,                                       1,         {NI_VectorT_op_UnaryNegation,               NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation,                NI_VectorT_op_UnaryNegation},               SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     op_UnsignedRightShift,                                  2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift,           NI_VectorT_op_UnsignedRightShift},          SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     Sqrt,                       "SquareRoot",               1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_Sqrt,                            NI_VectorT_Sqrt},                           SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     StoreAligned,                                           2,         {NI_VectorT_StoreAligned,                   NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned,                    NI_VectorT_StoreAligned},                   SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     StoreAlignedNonTemporal,                                2,         {NI_VectorT_StoreAlignedNonTemporal,        NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal,         NI_VectorT_StoreAlignedNonTemporal},        SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     StoreUnsafe,                                           -1,         {NI_VectorT_StoreUnsafe,                    NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe,                     NI_VectorT_StoreUnsafe},                    SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_NM(VectorT,     StoreUnsafeIndex,           "StoreUnsafe",              3,         {NI_VectorT_StoreUnsafeIndex,               NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex,                NI_VectorT_StoreUnsafeIndex},               SimdAsHWIntrinsicFlag::SpillSideEffectsOp1)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     Sum,                                                    1,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT_Sum,                             NI_VectorT_Sum,                             NI_VectorT_Sum,                             NI_VectorT_Sum,                             NI_Illegal,                                 NI_Illegal,                                 NI_VectorT_Sum,                             NI_VectorT_Sum},                            SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     ToScalar,                                               1,         {NI_VectorT_ToScalar,                       NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar,                        NI_VectorT_ToScalar},                       SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     WidenLower,                                             1,         {NI_VectorT_WidenLower,                     NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower,                      NI_VectorT_WidenLower},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     WidenUpper,                                             1,         {NI_VectorT_WidenUpper,                     NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper,                      NI_VectorT_WidenUpper},                     SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT,     WithElement,                                            3,         {NI_VectorT_WithElement,                    NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement,                     NI_VectorT_WithElement},                    SimdAsHWIntrinsicFlag::None)
-
 #undef SIMD_AS_HWINTRINSIC_NM
 #undef SIMD_AS_HWINTRINSIC_ID
 
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index 392cd58611e6e..f230a7299dc2c 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -180,13 +180,13 @@ ValueNumFuncDef(SimdType, 2, false, false, false, false)  // A value number func
 #define HARDWARE_INTRINSIC(isa, name, size, argCount, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
 ValueNumFuncDef(HWI_##isa##_##name, argCount, ((flag) & HW_Flag_Commutative) >> 0, false, false, extra)   // All of the HARDWARE_INTRINSICS for x86/x64
 #include "hwintrinsiclistxarch.h"
-#define VNF_HWI_FIRST VNF_HWI_Vector128_Abs
+#define VNF_HWI_FIRST VNF_HWI_Vector128_AsVector
 
 #elif defined (TARGET_ARM64)
 #define HARDWARE_INTRINSIC(isa, name, size, argCount, extra, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
 ValueNumFuncDef(HWI_##isa##_##name, argCount, ((flag) & HW_Flag_Commutative) >> 0, false, false, extra)   // All of the HARDWARE_INTRINSICS for arm64
 #include "hwintrinsiclistarm64.h"
-#define VNF_HWI_FIRST VNF_HWI_Vector64_Abs
+#define VNF_HWI_FIRST VNF_HWI_Vector64_Ceiling
 
 #elif defined (TARGET_ARM)
 // No Hardware Intrinsics on ARM32
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
index 41961bbb54c79..2111de2ab68b2 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
@@ -4,8 +4,8 @@
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-using static Interop;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
 namespace System.Numerics
 {
@@ -32,25 +32,18 @@ public static bool IsHardwareAccelerated
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> Abs<T>(Vector<T> value)
         {
-            if ((typeof(T) == typeof(byte))
-             || (typeof(T) == typeof(ushort))
-             || (typeof(T) == typeof(uint))
-             || (typeof(T) == typeof(ulong))
-             || (typeof(T) == typeof(nuint)))
+            if (sizeof(Vector<T>) == 64)
+            {
+                return Vector512.Abs(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
             {
-                return value;
+                return Vector256.Abs(value.AsVector256()).AsVector();
             }
             else
             {
-                Unsafe.SkipInit(out Vector<T> result);
-
-                for (int index = 0; index < Vector<T>.Count; index++)
-                {
-                    T element = Scalar<T>.Abs(value.GetElementUnsafe(index));
-                    result.SetElementUnsafe(index, element);
-                }
-
-                return result;
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Abs(value.AsVector128()).AsVector();
             }
         }
 
@@ -68,7 +61,23 @@ public static Vector<T> Abs<T>(Vector<T> value)
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The bitwise-and of <paramref name="left" /> and the ones-complement of <paramref name="right" />.</returns>
         [Intrinsic]
-        public static Vector<T> AndNot<T>(Vector<T> left, Vector<T> right) => left & ~right;
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<T> AndNot<T>(Vector<T> left, Vector<T> right)
+        {
+            if (sizeof(Vector<T>) == 64)
+            {
+                return Vector512.AndNot(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.AndNot(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.AndNot(left.AsVector128(), right.AsVector128()).AsVector();
+            }
+        }
 
         /// <summary>Reinterprets a <see cref="Vector{T}" /> as a new <see cref="Vector{U}" />.</summary>
         /// <typeparam name="TFrom">The type of the input vector.</typeparam>
@@ -215,15 +224,19 @@ public static Vector<TTo> As<TFrom, TTo>(this Vector<TFrom> vector)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> Ceiling(Vector<double> value)
         {
-            Unsafe.SkipInit(out Vector<double> result);
-
-            for (int index = 0; index < Vector<double>.Count; index++)
+            if (sizeof(Vector<double>) == 64)
             {
-                double element = Scalar<double>.Ceiling(value.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.Ceiling(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.Ceiling(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.Ceiling(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes the ceiling of each element in a vector.</summary>
@@ -234,15 +247,19 @@ public static Vector<double> Ceiling(Vector<double> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> Ceiling(Vector<float> value)
         {
-            Unsafe.SkipInit(out Vector<float> result);
-
-            for (int index = 0; index < Vector<float>.Count; index++)
+            if (sizeof(Vector<float>) == 64)
             {
-                float element = Scalar<float>.Ceiling(value.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.Ceiling(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.Ceiling(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<float>) == 16);
+                return Vector128.Ceiling(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Conditionally selects a value from two vectors on a bitwise basis.</summary>
@@ -253,7 +270,22 @@ public static Vector<float> Ceiling(Vector<float> value)
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector<T> ConditionalSelect<T>(Vector<T> condition, Vector<T> left, Vector<T> right) => (left & condition) | (right & ~condition);
+        public static Vector<T> ConditionalSelect<T>(Vector<T> condition, Vector<T> left, Vector<T> right)
+        {
+            if (sizeof(Vector<T>) == 64)
+            {
+                return Vector512.ConditionalSelect(condition.AsVector512(), left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.ConditionalSelect(condition.AsVector256(), left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.ConditionalSelect(condition.AsVector128(), left.AsVector128(), right.AsVector128()).AsVector();
+            }
+        }
 
         /// <summary>Conditionally selects a value from two vectors on a bitwise basis.</summary>
         /// <param name="condition">The mask that is used to select a value from <paramref name="left" /> or <paramref name="right" />.</param>
@@ -278,14 +310,17 @@ public static Vector<float> Ceiling(Vector<float> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> ConvertToDouble(Vector<long> value)
         {
-            if (Avx2.IsSupported)
+            if (sizeof(Vector<double>) == 64)
+            {
+                return Vector512.ConvertToDouble(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
             {
-                Debug.Assert(Vector<double>.Count == Vector256<double>.Count);
                 return Vector256.ConvertToDouble(value.AsVector256()).AsVector();
             }
             else
             {
-                Debug.Assert(Vector<double>.Count == Vector128<double>.Count);
+                Debug.Assert(sizeof(Vector<double>) == 16);
                 return Vector128.ConvertToDouble(value.AsVector128()).AsVector();
             }
         }
@@ -298,14 +333,17 @@ public static Vector<double> ConvertToDouble(Vector<long> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> ConvertToDouble(Vector<ulong> value)
         {
-            if (Avx2.IsSupported)
+            if (sizeof(Vector<double>) == 64)
+            {
+                return Vector512.ConvertToDouble(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
             {
-                Debug.Assert(Vector<double>.Count == Vector256<double>.Count);
                 return Vector256.ConvertToDouble(value.AsVector256()).AsVector();
             }
             else
             {
-                Debug.Assert(Vector<double>.Count == Vector128<double>.Count);
+                Debug.Assert(sizeof(Vector<double>) == 16);
                 return Vector128.ConvertToDouble(value.AsVector128()).AsVector();
             }
         }
@@ -314,85 +352,110 @@ public static Vector<double> ConvertToDouble(Vector<ulong> value)
         /// <param name="value">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<int> ConvertToInt32(Vector<float> value)
         {
-            Unsafe.SkipInit(out Vector<int> result);
-
-            for (int i = 0; i < Vector<int>.Count; i++)
+            if (sizeof(Vector<int>) == 64)
             {
-                int element = float.ConvertToInteger<int>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToInt32(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<int>) == 32)
+            {
+                return Vector256.ConvertToInt32(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<int>) == 16);
+                return Vector128.ConvertToInt32(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Single}" /> to a <see cref="Vector{Int32}" /> using platform specific behavior on overflow.</summary>
         /// <param name="value">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<int> ConvertToInt32Native(Vector<float> value)
         {
-            Unsafe.SkipInit(out Vector<int> result);
-
-            for (int i = 0; i < Vector<int>.Count; i++)
+            if (sizeof(Vector<int>) == 64)
             {
-                int element = float.ConvertToIntegerNative<int>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToInt32Native(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<int>) == 32)
+            {
+                return Vector256.ConvertToInt32Native(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<int>) == 16);
+                return Vector128.ConvertToInt32Native(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Double}" /> to a <see cref="Vector{Int64}" /> using saturation on overflow.</summary>
         /// <param name="value">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<long> ConvertToInt64(Vector<double> value)
         {
-            Unsafe.SkipInit(out Vector<long> result);
-
-            for (int i = 0; i < Vector<long>.Count; i++)
+            if (sizeof(Vector<long>) == 64)
             {
-                long element = double.ConvertToInteger<long>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToInt64(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<long>) == 32)
+            {
+                return Vector256.ConvertToInt64(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<long>) == 16);
+                return Vector128.ConvertToInt64(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Double}" /> to a <see cref="Vector{Int64}" /> using platform specific behavior on overflow.</summary>
         /// <param name="value">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<long> ConvertToInt64Native(Vector<double> value)
         {
-            Unsafe.SkipInit(out Vector<long> result);
-
-            for (int i = 0; i < Vector<long>.Count; i++)
+            if (sizeof(Vector<long>) == 64)
             {
-                long element = double.ConvertToIntegerNative<long>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToInt64Native(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<long>) == 32)
+            {
+                return Vector256.ConvertToInt64Native(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<long>) == 16);
+                return Vector128.ConvertToInt64Native(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Int32}" /> to a <see cref="Vector{Single}" />.</summary>
         /// <param name="value">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> ConvertToSingle(Vector<int> value)
         {
-            Unsafe.SkipInit(out Vector<float> result);
-
-            for (int i = 0; i < Vector<float>.Count; i++)
+            if (sizeof(Vector<float>) == 64)
             {
-                float element = value.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToSingle(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.ConvertToSingle(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<float>) == 16);
+                return Vector128.ConvertToSingle(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{UInt32}" /> to a <see cref="Vector{Single}" />.</summary>
@@ -403,14 +466,17 @@ public static Vector<float> ConvertToSingle(Vector<int> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> ConvertToSingle(Vector<uint> value)
         {
-            if (Avx2.IsSupported)
+            if (sizeof(Vector<float>) == 64)
+            {
+                return Vector512.ConvertToSingle(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
             {
-                Debug.Assert(Vector<float>.Count == Vector256<float>.Count);
                 return Vector256.ConvertToSingle(value.AsVector256()).AsVector();
             }
             else
             {
-                Debug.Assert(Vector<float>.Count == Vector128<float>.Count);
+                Debug.Assert(sizeof(Vector<float>) == 16);
                 return Vector128.ConvertToSingle(value.AsVector128()).AsVector();
             }
         }
@@ -420,17 +486,22 @@ public static Vector<float> ConvertToSingle(Vector<uint> value)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<uint> ConvertToUInt32(Vector<float> value)
         {
-            Unsafe.SkipInit(out Vector<uint> result);
-
-            for (int i = 0; i < Vector<uint>.Count; i++)
+            if (sizeof(Vector<uint>) == 64)
             {
-                uint element = float.ConvertToInteger<uint>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToUInt32(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<uint>) == 32)
+            {
+                return Vector256.ConvertToUInt32(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<uint>) == 16);
+                return Vector128.ConvertToUInt32(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Single}" /> to a <see cref="Vector{UInt32}" /> using platform specific behavior on overflow.</summary>
@@ -438,17 +509,22 @@ public static Vector<uint> ConvertToUInt32(Vector<float> value)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<uint> ConvertToUInt32Native(Vector<float> value)
         {
-            Unsafe.SkipInit(out Vector<uint> result);
-
-            for (int i = 0; i < Vector<uint>.Count; i++)
+            if (sizeof(Vector<uint>) == 64)
             {
-                uint element = float.ConvertToIntegerNative<uint>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToUInt32Native(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<uint>) == 32)
+            {
+                return Vector256.ConvertToUInt32Native(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<uint>) == 16);
+                return Vector128.ConvertToUInt32Native(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Double}" /> to a <see cref="Vector{UInt64}" /> using saturation on overflow.</summary>
@@ -456,17 +532,22 @@ public static Vector<uint> ConvertToUInt32Native(Vector<float> value)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ulong> ConvertToUInt64(Vector<double> value)
         {
-            Unsafe.SkipInit(out Vector<ulong> result);
-
-            for (int i = 0; i < Vector<ulong>.Count; i++)
+            if (sizeof(Vector<ulong>) == 64)
             {
-                ulong element = double.ConvertToInteger<ulong>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToUInt64(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<ulong>) == 32)
+            {
+                return Vector256.ConvertToUInt64(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ulong>) == 16);
+                return Vector128.ConvertToUInt64(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Converts a <see cref="Vector{Double}" /> to a <see cref="Vector{UInt64}" /> using platform specific behavior on overflow.</summary>
@@ -474,17 +555,22 @@ public static Vector<ulong> ConvertToUInt64(Vector<double> value)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ulong> ConvertToUInt64Native(Vector<double> value)
         {
-            Unsafe.SkipInit(out Vector<ulong> result);
-
-            for (int i = 0; i < Vector<ulong>.Count; i++)
+            if (sizeof(Vector<ulong>) == 64)
             {
-                ulong element = double.ConvertToIntegerNative<ulong>(value.GetElementUnsafe(i));
-                result.SetElementUnsafe(i, element);
+                return Vector512.ConvertToUInt64Native(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<ulong>) == 32)
+            {
+                return Vector256.ConvertToUInt64Native(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ulong>) == 16);
+                return Vector128.ConvertToUInt64Native(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Creates a new <see cref="Vector{T}" /> instance where the elements begin at a specified value and which are spaced apart according to another specified value.</summary>
@@ -494,7 +580,22 @@ public static Vector<ulong> ConvertToUInt64Native(Vector<double> value)
         /// <returns>A new <see cref="Vector{T}" /> instance with the first element initialized to <paramref name="start" /> and each subsequent element initialized to the the value of the previous element plus <paramref name="step" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector<T> CreateSequence<T>(T start, T step) => (Vector<T>.Indices * step) + new Vector<T>(start);
+        public static Vector<T> CreateSequence<T>(T start, T step)
+        {
+            if (sizeof(Vector<T>) == 64)
+            {
+                return Vector512.CreateSequence<T>(start, step).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.CreateSequence<T>(start, step).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.CreateSequence<T>(start, step).AsVector();
+            }
+        }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
         /// <param name="left">The vector that will be divided by <paramref name="right" />.</param>
@@ -518,7 +619,23 @@ public static Vector<ulong> ConvertToUInt64Native(Vector<double> value)
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The dot product of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
-        public static T Dot<T>(Vector<T> left, Vector<T> right) => Sum(left * right);
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static T Dot<T>(Vector<T> left, Vector<T> right)
+        {
+            if (sizeof(Vector<T>) == 64)
+            {
+                return Vector512.Dot(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.Dot(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Dot(left.AsVector128(), right.AsVector128());
+            }
+        }
 
         /// <summary>Compares two vectors to determine if they are equal on a per-element basis.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
@@ -529,15 +646,19 @@ public static Vector<ulong> ConvertToUInt64Native(Vector<double> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> Equals<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default!;
-                result.SetElementUnsafe(index, value);
+                return Vector512.Equals(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.Equals(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Equals(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Compares two vectors to determine if they are equal on a per-element basis.</summary>
@@ -585,15 +706,19 @@ public static Vector<T> Equals<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool EqualsAny<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (Scalar<T>.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return true;
-                }
+                return Vector512.EqualsAny(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.EqualsAny(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.EqualsAny(left.AsVector128(), right.AsVector128());
             }
-
-            return false;
         }
 
         /// <summary>Computes the floor of each element in a vector.</summary>
@@ -604,15 +729,19 @@ public static bool EqualsAny<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> Floor(Vector<double> value)
         {
-            Unsafe.SkipInit(out Vector<double> result);
-
-            for (int index = 0; index < Vector<double>.Count; index++)
+            if (sizeof(Vector<double>) == 64)
             {
-                double element = Scalar<double>.Floor(value.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.Floor(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.Floor(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.Floor(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes the floor of each element in a vector.</summary>
@@ -623,15 +752,19 @@ public static Vector<double> Floor(Vector<double> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> Floor(Vector<float> value)
         {
-            Unsafe.SkipInit(out Vector<float> result);
-
-            for (int index = 0; index < Vector<float>.Count; index++)
+            if (sizeof(Vector<float>) == 64)
             {
-                float element = Scalar<float>.Floor(value.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.Floor(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.Floor(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<float>) == 16);
+                return Vector128.Floor(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes (<paramref name="left"/> * <paramref name="right"/>) + <paramref name="addend"/>, rounded as one ternary operation.</summary>
@@ -647,15 +780,19 @@ public static Vector<float> Floor(Vector<float> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> FusedMultiplyAdd(Vector<double> left, Vector<double> right, Vector<double> addend)
         {
-            Unsafe.SkipInit(out Vector<double> result);
-
-            for (int index = 0; index < Vector<double>.Count; index++)
+            if (sizeof(Vector<double>) == 64)
             {
-                double value = double.FusedMultiplyAdd(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                return Vector512.FusedMultiplyAdd(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.FusedMultiplyAdd(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.FusedMultiplyAdd(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes (<paramref name="left"/> * <paramref name="right"/>) + <paramref name="addend"/>, rounded as one ternary operation.</summary>
@@ -671,15 +808,19 @@ public static Vector<double> FusedMultiplyAdd(Vector<double> left, Vector<double
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> FusedMultiplyAdd(Vector<float> left, Vector<float> right, Vector<float> addend)
         {
-            Unsafe.SkipInit(out Vector<float> result);
-
-            for (int index = 0; index < Vector<float>.Count; index++)
+            if (sizeof(Vector<float>) == 64)
             {
-                float value = float.FusedMultiplyAdd(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                return Vector512.FusedMultiplyAdd(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.FusedMultiplyAdd(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<float>) == 16);
+                return Vector128.FusedMultiplyAdd(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Gets the element at the specified index.</summary>
@@ -693,12 +834,19 @@ public static Vector<float> FusedMultiplyAdd(Vector<float> left, Vector<float> r
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T GetElement<T>(this Vector<T> vector, int index)
         {
-            if ((uint)(index) >= (uint)(Vector<T>.Count))
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
+                return vector.AsVector512().GetElement(index);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return vector.AsVector256().GetElement(index);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return vector.AsVector128().GetElement(index);
             }
-
-            return vector.GetElementUnsafe(index);
         }
 
         /// <summary>Compares two vectors to determine which is greater on a per-element basis.</summary>
@@ -710,15 +858,19 @@ public static T GetElement<T>(this Vector<T> vector, int index)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> GreaterThan<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default!;
-                result.SetElementUnsafe(index, value);
+                return Vector512.GreaterThan(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.GreaterThan(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.GreaterThan(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Compares two vectors to determine which is greater on a per-element basis.</summary>
@@ -758,15 +910,19 @@ public static Vector<T> GreaterThan<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanAll<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (!Scalar<T>.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return false;
-                }
+                return Vector512.GreaterThanAll(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.GreaterThanAll(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.GreaterThanAll(left.AsVector128(), right.AsVector128());
             }
-
-            return true;
         }
 
         /// <summary>Compares two vectors to determine if any elements are greater.</summary>
@@ -778,15 +934,19 @@ public static bool GreaterThanAll<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanAny<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (Scalar<T>.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return true;
-                }
+                return Vector512.GreaterThanAny(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.GreaterThanAny(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.GreaterThanAny(left.AsVector128(), right.AsVector128());
             }
-
-            return false;
         }
 
         /// <summary>Compares two vectors to determine which is greater or equal on a per-element basis.</summary>
@@ -798,15 +958,19 @@ public static bool GreaterThanAny<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> GreaterThanOrEqual<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default!;
-                result.SetElementUnsafe(index, value);
+                return Vector512.GreaterThanOrEqual(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.GreaterThanOrEqual(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.GreaterThanOrEqual(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Compares two vectors to determine which is greater or equal on a per-element basis.</summary>
@@ -846,15 +1010,19 @@ public static Vector<T> GreaterThanOrEqual<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanOrEqualAll<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (!Scalar<T>.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return false;
-                }
+                return Vector512.GreaterThanOrEqualAll(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.GreaterThanOrEqualAll(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.GreaterThanOrEqualAll(left.AsVector128(), right.AsVector128());
             }
-
-            return true;
         }
 
         /// <summary>Compares two vectors to determine if any elements are greater or equal.</summary>
@@ -866,15 +1034,19 @@ public static bool GreaterThanOrEqualAll<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanOrEqualAny<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (Scalar<T>.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return true;
-                }
+                return Vector512.GreaterThanOrEqualAny(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.GreaterThanOrEqualAny(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.GreaterThanOrEqualAny(left.AsVector128(), right.AsVector128());
             }
-
-            return false;
         }
 
         /// <summary>Compares two vectors to determine which is less on a per-element basis.</summary>
@@ -886,15 +1058,19 @@ public static bool GreaterThanOrEqualAny<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> LessThan<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default!;
-                result.SetElementUnsafe(index, value);
+                return Vector512.LessThan(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.LessThan(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.LessThan(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Compares two vectors to determine which is less on a per-element basis.</summary>
@@ -934,15 +1110,19 @@ public static Vector<T> LessThan<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanAll<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (!Scalar<T>.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return false;
-                }
+                return Vector512.LessThanAll(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.LessThanAll(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.LessThanAll(left.AsVector128(), right.AsVector128());
             }
-
-            return true;
         }
 
         /// <summary>Compares two vectors to determine if any elements are less.</summary>
@@ -954,15 +1134,19 @@ public static bool LessThanAll<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanAny<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (Scalar<T>.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return true;
-                }
+                return Vector512.LessThanAny(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.LessThanAny(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.LessThanAny(left.AsVector128(), right.AsVector128());
             }
-
-            return false;
         }
 
         /// <summary>Compares two vectors to determine which is less or equal on a per-element basis.</summary>
@@ -974,15 +1158,19 @@ public static bool LessThanAny<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> LessThanOrEqual<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default!;
-                result.SetElementUnsafe(index, value);
+                return Vector512.LessThanOrEqual(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.LessThanOrEqual(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.LessThanOrEqual(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Compares two vectors to determine which is less or equal on a per-element basis.</summary>
@@ -1022,15 +1210,19 @@ public static Vector<T> LessThanOrEqual<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanOrEqualAll<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (!Scalar<T>.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return false;
-                }
+                return Vector512.LessThanOrEqualAll(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.LessThanOrEqualAll(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.LessThanOrEqualAll(left.AsVector128(), right.AsVector128());
             }
-
-            return true;
         }
 
         /// <summary>Compares two vectors to determine if any elements are less or equal.</summary>
@@ -1042,18 +1234,21 @@ public static bool LessThanOrEqualAll<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanOrEqualAny<T>(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (Scalar<T>.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return true;
-                }
+                return Vector512.LessThanOrEqualAny(left.AsVector512(), right.AsVector512());
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.LessThanOrEqualAny(left.AsVector256(), right.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.LessThanOrEqualAny(left.AsVector128(), right.AsVector128());
             }
-
-            return false;
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The source from which the vector will be loaded.</param>
@@ -1061,7 +1256,7 @@ public static bool LessThanOrEqualAny<T>(Vector<T> left, Vector<T> right)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static Vector<T> Load<T>(T* source) => LoadUnsafe(ref *source);
+        public static Vector<T> Load<T>(T* source) => LoadUnsafe(in *source);
 
         /// <summary>Loads a vector from the given aligned source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1073,14 +1268,19 @@ public static bool LessThanOrEqualAny<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> LoadAligned<T>(T* source)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-
-            if (((nuint)(source) % Alignment) != 0)
+            if (sizeof(Vector<double>) == 64)
             {
-                ThrowHelper.ThrowAccessViolationException();
+                return Vector512.LoadAligned(source).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.LoadAligned(source).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.LoadAligned(source).AsVector();
             }
-
-            return *(Vector<T>*)source;
         }
 
         /// <summary>Loads a vector from the given aligned source.</summary>
@@ -1091,8 +1291,22 @@ public static Vector<T> LoadAligned<T>(T* source)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static Vector<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        public static Vector<T> LoadAlignedNonTemporal<T>(T* source)
+        {
+            if (sizeof(Vector<double>) == 64)
+            {
+                return Vector512.LoadAlignedNonTemporal(source).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.LoadAlignedNonTemporal(source).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.LoadAlignedNonTemporal(source).AsVector();
+            }
+        }
 
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1103,9 +1317,19 @@ public static Vector<T> LoadAligned<T>(T* source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> LoadUnsafe<T>(ref readonly T source)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            ref readonly byte address = ref Unsafe.As<T, byte>(ref Unsafe.AsRef(in source));
-            return Unsafe.ReadUnaligned<Vector<T>>(in address);
+            if (sizeof(Vector<double>) == 64)
+            {
+                return Vector512.LoadUnsafe(in source).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.LoadUnsafe(in source).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.LoadUnsafe(in source).AsVector();
+            }
         }
 
         /// <summary>Loads a vector from the given source and element offset.</summary>
@@ -1119,9 +1343,19 @@ public static Vector<T> LoadUnsafe<T>(ref readonly T source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> LoadUnsafe<T>(ref readonly T source, nuint elementOffset)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            ref readonly byte address = ref Unsafe.As<T, byte>(ref Unsafe.Add(ref Unsafe.AsRef(in source), (nint)elementOffset));
-            return Unsafe.ReadUnaligned<Vector<T>>(in address);
+            if (sizeof(Vector<double>) == 64)
+            {
+                return Vector512.LoadUnsafe(in source, elementOffset).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.LoadUnsafe(in source, elementOffset).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.LoadUnsafe(in source, elementOffset).AsVector();
+            }
         }
 
         /// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
@@ -1133,15 +1367,19 @@ public static Vector<T> LoadUnsafe<T>(ref readonly T source, nuint elementOffset
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> Max<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? left.GetElementUnsafe(index) : right.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
+                return Vector512.Max(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.Max(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Max(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes the minimum of two vectors on a per-element basis.</summary>
@@ -1153,15 +1391,19 @@ public static Vector<T> Max<T>(Vector<T> left, Vector<T> right)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> Min<T>(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? left.GetElementUnsafe(index) : right.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
+                return Vector512.Min(left.AsVector512(), right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return Vector256.Min(left.AsVector256(), right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Min(left.AsVector128(), right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
@@ -1186,22 +1428,26 @@ public static Vector<T> Min<T>(Vector<T> left, Vector<T> right)
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
-        public static Vector<T> Multiply<T>(T left, Vector<T> right) => left * right;
+        public static Vector<T> Multiply<T>(T left, Vector<T> right) => right * left;
 
         /// <inheritdoc cref="Vector128.MultiplyAddEstimate(Vector128{double}, Vector128{double}, Vector128{double})" />
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> MultiplyAddEstimate(Vector<double> left, Vector<double> right, Vector<double> addend)
         {
-            Unsafe.SkipInit(out Vector<double> result);
-
-            for (int index = 0; index < Vector<double>.Count; index++)
+            if (sizeof(Vector<double>) == 64)
             {
-                double element = double.MultiplyAddEstimate(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.MultiplyAddEstimate(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.MultiplyAddEstimate(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.MultiplyAddEstimate(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <inheritdoc cref="Vector128.MultiplyAddEstimate(Vector128{float}, Vector128{float}, Vector128{float})" />
@@ -1209,15 +1455,19 @@ public static Vector<double> MultiplyAddEstimate(Vector<double> left, Vector<dou
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> MultiplyAddEstimate(Vector<float> left, Vector<float> right, Vector<float> addend)
         {
-            Unsafe.SkipInit(out Vector<float> result);
-
-            for (int index = 0; index < Vector<float>.Count; index++)
+            if (sizeof(Vector<float>) == 64)
             {
-                float element = float.MultiplyAddEstimate(left.GetElementUnsafe(index), right.GetElementUnsafe(index), addend.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.MultiplyAddEstimate(left.AsVector512(), right.AsVector512(), addend.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.MultiplyAddEstimate(left.AsVector256(), right.AsVector256(), addend.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<float>) == 16);
+                return Vector128.MultiplyAddEstimate(left.AsVector128(), right.AsVector128(), addend.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{Double}"/> instances into one <see cref="Vector{Single}" />.</summary>
@@ -1228,21 +1478,19 @@ public static Vector<float> MultiplyAddEstimate(Vector<float> left, Vector<float
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<float> Narrow(Vector<double> low, Vector<double> high)
         {
-            Unsafe.SkipInit(out Vector<float> result);
-
-            for (int i = 0; i < Vector<double>.Count; i++)
+            if (sizeof(Vector<float>) == 64)
             {
-                float value = (float)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<double>.Count; i < Vector<float>.Count; i++)
+            else if (sizeof(Vector<float>) == 32)
             {
-                float value = (float)high.GetElementUnsafe(i - Vector<double>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<float>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{Int16}"/> instances into one <see cref="Vector{SByte}" />.</summary>
@@ -1254,21 +1502,19 @@ public static Vector<float> Narrow(Vector<double> low, Vector<double> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<sbyte> Narrow(Vector<short> low, Vector<short> high)
         {
-            Unsafe.SkipInit(out Vector<sbyte> result);
-
-            for (int i = 0; i < Vector<short>.Count; i++)
+            if (sizeof(Vector<sbyte>) == 64)
             {
-                sbyte value = (sbyte)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<short>.Count; i < Vector<sbyte>.Count; i++)
+            else if (sizeof(Vector<sbyte>) == 32)
             {
-                sbyte value = (sbyte)high.GetElementUnsafe(i - Vector<short>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<sbyte>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{Int32}"/> instances into one <see cref="Vector{Int16}" />.</summary>
@@ -1279,21 +1525,19 @@ public static Vector<sbyte> Narrow(Vector<short> low, Vector<short> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<short> Narrow(Vector<int> low, Vector<int> high)
         {
-            Unsafe.SkipInit(out Vector<short> result);
-
-            for (int i = 0; i < Vector<int>.Count; i++)
+            if (sizeof(Vector<short>) == 64)
             {
-                short value = (short)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<int>.Count; i < Vector<short>.Count; i++)
+            else if (sizeof(Vector<short>) == 32)
             {
-                short value = (short)high.GetElementUnsafe(i - Vector<int>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<short>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{Int64}"/> instances into one <see cref="Vector{Int32}" />.</summary>
@@ -1304,21 +1548,19 @@ public static Vector<short> Narrow(Vector<int> low, Vector<int> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<int> Narrow(Vector<long> low, Vector<long> high)
         {
-            Unsafe.SkipInit(out Vector<int> result);
-
-            for (int i = 0; i < Vector<long>.Count; i++)
+            if (sizeof(Vector<int>) == 64)
             {
-                int value = (int)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<long>.Count; i < Vector<int>.Count; i++)
+            else if (sizeof(Vector<int>) == 32)
             {
-                int value = (int)high.GetElementUnsafe(i - Vector<long>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<int>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{UInt16}"/> instances into one <see cref="Vector{Byte}" />.</summary>
@@ -1330,21 +1572,19 @@ public static Vector<int> Narrow(Vector<long> low, Vector<long> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<byte> Narrow(Vector<ushort> low, Vector<ushort> high)
         {
-            Unsafe.SkipInit(out Vector<byte> result);
-
-            for (int i = 0; i < Vector<ushort>.Count; i++)
+            if (sizeof(Vector<byte>) == 64)
             {
-                byte value = (byte)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<ushort>.Count; i < Vector<byte>.Count; i++)
+            else if (sizeof(Vector<byte>) == 32)
             {
-                byte value = (byte)high.GetElementUnsafe(i - Vector<ushort>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<byte>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{UInt32}"/> instances into one <see cref="Vector{UInt16}" />.</summary>
@@ -1356,21 +1596,19 @@ public static Vector<byte> Narrow(Vector<ushort> low, Vector<ushort> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ushort> Narrow(Vector<uint> low, Vector<uint> high)
         {
-            Unsafe.SkipInit(out Vector<ushort> result);
-
-            for (int i = 0; i < Vector<uint>.Count; i++)
+            if (sizeof(Vector<ushort>) == 64)
             {
-                ushort value = (ushort)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<uint>.Count; i < Vector<ushort>.Count; i++)
+            else if (sizeof(Vector<ushort>) == 32)
             {
-                ushort value = (ushort)high.GetElementUnsafe(i - Vector<uint>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ushort>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Narrows two <see cref="Vector{UInt64}"/> instances into one <see cref="Vector{UInt32}" />.</summary>
@@ -1382,21 +1620,19 @@ public static Vector<ushort> Narrow(Vector<uint> low, Vector<uint> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<uint> Narrow(Vector<ulong> low, Vector<ulong> high)
         {
-            Unsafe.SkipInit(out Vector<uint> result);
-
-            for (int i = 0; i < Vector<ulong>.Count; i++)
+            if (sizeof(Vector<uint>) == 64)
             {
-                uint value = (uint)low.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
+                return Vector512.Narrow(low.AsVector512(), high.AsVector512()).AsVector();
             }
-
-            for (int i = Vector<ulong>.Count; i < Vector<uint>.Count; i++)
+            else if (sizeof(Vector<uint>) == 32)
             {
-                uint value = (uint)high.GetElementUnsafe(i - Vector<ulong>.Count);
-                result.SetElementUnsafe(i, value);
+                return Vector256.Narrow(low.AsVector256(), high.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<uint>) == 16);
+                return Vector128.Narrow(low.AsVector128(), high.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -1607,18 +1843,21 @@ public static Vector<uint> Narrow(Vector<ulong> low, Vector<ulong> high)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> SquareRoot<T>(Vector<T> value)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<float>) == 64)
             {
-                T element = Scalar<T>.Sqrt(value.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, element);
+                return Vector512.Sqrt(value.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.Sqrt(value.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Sqrt(value.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
@@ -1638,14 +1877,19 @@ public static Vector<T> SquareRoot<T>(Vector<T> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static void StoreAligned<T>(this Vector<T> source, T* destination)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-
-            if (((nuint)destination % Alignment) != 0)
+            if (sizeof(Vector<float>) == 64)
             {
-                ThrowHelper.ThrowAccessViolationException();
+                source.AsVector512().StoreAligned(destination);
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                source.AsVector256().StoreAligned(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                source.AsVector128().StoreAligned(destination);
             }
-
-            *(Vector<T>*)destination = source;
         }
 
         /// <summary>Stores a vector at the given aligned destination.</summary>
@@ -1656,8 +1900,22 @@ public static void StoreAligned<T>(this Vector<T> source, T* destination)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static void StoreAlignedNonTemporal<T>(this Vector<T> source, T* destination) => source.StoreAligned(destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        public static void StoreAlignedNonTemporal<T>(this Vector<T> source, T* destination)
+        {
+            if (sizeof(Vector<float>) == 64)
+            {
+                source.AsVector512().StoreAlignedNonTemporal(destination);
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                source.AsVector256().StoreAlignedNonTemporal(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                source.AsVector128().StoreAlignedNonTemporal(destination);
+            }
+        }
 
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1668,9 +1926,19 @@ public static void StoreAligned<T>(this Vector<T> source, T* destination)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static void StoreUnsafe<T>(this Vector<T> source, ref T destination)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            ref byte address = ref Unsafe.As<T, byte>(ref destination);
-            Unsafe.WriteUnaligned(ref address, source);
+            if (sizeof(Vector<float>) == 64)
+            {
+                source.AsVector512().StoreUnsafe(ref destination);
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                source.AsVector256().StoreUnsafe(ref destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                source.AsVector128().StoreUnsafe(ref destination);
+            }
         }
 
         /// <summary>Stores a vector at the given destination.</summary>
@@ -1684,9 +1952,19 @@ public static void StoreUnsafe<T>(this Vector<T> source, ref T destination)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static void StoreUnsafe<T>(this Vector<T> source, ref T destination, nuint elementOffset)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            destination = ref Unsafe.Add(ref destination, (nint)elementOffset);
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref destination), source);
+            if (sizeof(Vector<float>) == 64)
+            {
+                source.AsVector512().StoreUnsafe(ref destination, elementOffset);
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                source.AsVector256().StoreUnsafe(ref destination, elementOffset);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                source.AsVector128().StoreUnsafe(ref destination, elementOffset);
+            }
         }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
@@ -1704,14 +1982,19 @@ public static void StoreUnsafe<T>(this Vector<T> source, ref T destination, nuin
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T Sum<T>(Vector<T> value)
         {
-            T sum = default!;
-
-            for (int index = 0; index < Vector<T>.Count; index++)
+            if (sizeof(Vector<float>) == 64)
             {
-                sum = Scalar<T>.Add(sum, value.GetElementUnsafe(index));
+                return Vector512.Sum(value.AsVector512());
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return Vector256.Sum(value.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return Vector128.Sum(value.AsVector128());
             }
-
-            return sum;
         }
 
         /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary>
@@ -1720,10 +2003,22 @@ public static T Sum<T>(Vector<T> value)
         /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T ToScalar<T>(this Vector<T> vector)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            return vector.GetElementUnsafe(0);
+            if (sizeof(Vector<float>) == 64)
+            {
+                return vector.AsVector512().ToScalar();
+            }
+            else if (sizeof(Vector<float>) == 32)
+            {
+                return vector.AsVector256().ToScalar();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return vector.AsVector128().ToScalar();
+            }
         }
 
         /// <summary>Widens a <see cref="Vector{Byte}" /> into two <see cref="Vector{UInt16} " />.</summary>
@@ -1815,15 +2110,19 @@ public static void Widen(Vector<uint> source, out Vector<ulong> low, out Vector<
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ushort> WidenLower(Vector<byte> source)
         {
-            Unsafe.SkipInit(out Vector<ushort> lower);
-
-            for (int i = 0; i < Vector<ushort>.Count; i++)
+            if (sizeof(Vector<ushort>) == 64)
             {
-                ushort value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<ushort>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ushort>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the lower half of a <see cref="Vector{Int16}" /> into a <see cref="Vector{Int32} " />.</summary>
@@ -1833,15 +2132,19 @@ public static Vector<ushort> WidenLower(Vector<byte> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<int> WidenLower(Vector<short> source)
         {
-            Unsafe.SkipInit(out Vector<int> lower);
-
-            for (int i = 0; i < Vector<int>.Count; i++)
+            if (sizeof(Vector<int>) == 64)
             {
-                int value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<int>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<int>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the lower half of a <see cref="Vector{Int32}" /> into a <see cref="Vector{Int64} " />.</summary>
@@ -1851,15 +2154,19 @@ public static Vector<int> WidenLower(Vector<short> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<long> WidenLower(Vector<int> source)
         {
-            Unsafe.SkipInit(out Vector<long> lower);
-
-            for (int i = 0; i < Vector<long>.Count; i++)
+            if (sizeof(Vector<long>) == 64)
             {
-                long value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<long>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<long>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the lower half of a <see cref="Vector{SByte}" /> into a <see cref="Vector{Int16} " />.</summary>
@@ -1870,15 +2177,19 @@ public static Vector<long> WidenLower(Vector<int> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<short> WidenLower(Vector<sbyte> source)
         {
-            Unsafe.SkipInit(out Vector<short> lower);
-
-            for (int i = 0; i < Vector<short>.Count; i++)
+            if (sizeof(Vector<short>) == 64)
             {
-                short value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<short>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<short>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the lower half of a <see cref="Vector{Single}" /> into a <see cref="Vector{Double} " />.</summary>
@@ -1888,15 +2199,19 @@ public static Vector<short> WidenLower(Vector<sbyte> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> WidenLower(Vector<float> source)
         {
-            Unsafe.SkipInit(out Vector<double> lower);
-
-            for (int i = 0; i < Vector<double>.Count; i++)
+            if (sizeof(Vector<double>) == 64)
             {
-                double value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the lower half of a <see cref="Vector{UInt16}" /> into a <see cref="Vector{UInt32} " />.</summary>
@@ -1907,15 +2222,19 @@ public static Vector<double> WidenLower(Vector<float> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<uint> WidenLower(Vector<ushort> source)
         {
-            Unsafe.SkipInit(out Vector<uint> lower);
-
-            for (int i = 0; i < Vector<uint>.Count; i++)
+            if (sizeof(Vector<uint>) == 64)
             {
-                uint value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<uint>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<uint>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the lower half of a <see cref="Vector{UInt32}" /> into a <see cref="Vector{UInt64} " />.</summary>
@@ -1926,15 +2245,19 @@ public static Vector<uint> WidenLower(Vector<ushort> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ulong> WidenLower(Vector<uint> source)
         {
-            Unsafe.SkipInit(out Vector<ulong> lower);
-
-            for (int i = 0; i < Vector<ulong>.Count; i++)
+            if (sizeof(Vector<ulong>) == 64)
             {
-                ulong value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
+                return Vector512.WidenLower(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<ulong>) == 32)
+            {
+                return Vector256.WidenLower(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ulong>) == 16);
+                return Vector128.WidenLower(source.AsVector128()).AsVector();
             }
-
-            return lower;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{Byte}" /> into a <see cref="Vector{UInt16} " />.</summary>
@@ -1945,15 +2268,19 @@ public static Vector<ulong> WidenLower(Vector<uint> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ushort> WidenUpper(Vector<byte> source)
         {
-            Unsafe.SkipInit(out Vector<ushort> upper);
-
-            for (int i = Vector<ushort>.Count; i < Vector<byte>.Count; i++)
+            if (sizeof(Vector<ushort>) == 64)
             {
-                ushort value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<ushort>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<ushort>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ushort>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{Int16}" /> into a <see cref="Vector{Int32} " />.</summary>
@@ -1963,15 +2290,19 @@ public static Vector<ushort> WidenUpper(Vector<byte> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<int> WidenUpper(Vector<short> source)
         {
-            Unsafe.SkipInit(out Vector<int> upper);
-
-            for (int i = Vector<int>.Count; i < Vector<short>.Count; i++)
+            if (sizeof(Vector<int>) == 64)
             {
-                int value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<int>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<int>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<int>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{Int32}" /> into a <see cref="Vector{Int64} " />.</summary>
@@ -1981,15 +2312,19 @@ public static Vector<int> WidenUpper(Vector<short> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<long> WidenUpper(Vector<int> source)
         {
-            Unsafe.SkipInit(out Vector<long> upper);
-
-            for (int i = Vector<long>.Count; i < Vector<int>.Count; i++)
+            if (sizeof(Vector<long>) == 64)
             {
-                long value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<long>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<long>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<long>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{SByte}" /> into a <see cref="Vector{Int16} " />.</summary>
@@ -2000,15 +2335,19 @@ public static Vector<long> WidenUpper(Vector<int> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<short> WidenUpper(Vector<sbyte> source)
         {
-            Unsafe.SkipInit(out Vector<short> upper);
-
-            for (int i = Vector<short>.Count; i < Vector<sbyte>.Count; i++)
+            if (sizeof(Vector<short>) == 64)
             {
-                short value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<short>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<short>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<short>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{Single}" /> into a <see cref="Vector{Double} " />.</summary>
@@ -2018,15 +2357,19 @@ public static Vector<short> WidenUpper(Vector<sbyte> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<double> WidenUpper(Vector<float> source)
         {
-            Unsafe.SkipInit(out Vector<double> upper);
-
-            for (int i = Vector<double>.Count; i < Vector<float>.Count; i++)
+            if (sizeof(Vector<double>) == 64)
             {
-                double value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<double>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<double>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<double>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{UInt16}" /> into a <see cref="Vector{UInt32} " />.</summary>
@@ -2037,15 +2380,19 @@ public static Vector<double> WidenUpper(Vector<float> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<uint> WidenUpper(Vector<ushort> source)
         {
-            Unsafe.SkipInit(out Vector<uint> upper);
-
-            for (int i = Vector<uint>.Count; i < Vector<ushort>.Count; i++)
+            if (sizeof(Vector<uint>) == 64)
             {
-                uint value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<uint>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<uint>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<uint>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Widens the upper half of a <see cref="Vector{UInt32}" /> into a <see cref="Vector{UInt64} " />.</summary>
@@ -2056,15 +2403,19 @@ public static Vector<uint> WidenUpper(Vector<ushort> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<ulong> WidenUpper(Vector<uint> source)
         {
-            Unsafe.SkipInit(out Vector<ulong> upper);
-
-            for (int i = Vector<ulong>.Count; i < Vector<uint>.Count; i++)
+            if (sizeof(Vector<ulong>) == 64)
             {
-                ulong value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector<ulong>.Count, value);
+                return Vector512.WidenUpper(source.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<ulong>) == 32)
+            {
+                return Vector256.WidenUpper(source.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<ulong>) == 16);
+                return Vector128.WidenUpper(source.AsVector128()).AsVector();
             }
-
-            return upper;
         }
 
         /// <summary>Creates a new <see cref="Vector{T}" /> with the element at the specified index set to the specified value and the remaining elements set to the same value as that in the given vector.</summary>
@@ -2078,14 +2429,19 @@ public static Vector<ulong> WidenUpper(Vector<uint> source)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> WithElement<T>(this Vector<T> vector, int index, T value)
         {
-            if ((uint)(index) >= (uint)(Vector<T>.Count))
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index);
+                return vector.AsVector512().WithElement(index, value).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return vector.AsVector256().WithElement(index, value).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return vector.AsVector128().WithElement(index, value).AsVector();
             }
-
-            Vector<T> result = vector;
-            result.SetElementUnsafe(index, value);
-            return result;
         }
 
         /// <summary>Computes the exclusive-or of two vectors.</summary>
@@ -2095,21 +2451,5 @@ public static Vector<T> WithElement<T>(this Vector<T> vector, int index, T value
         /// <returns>The exclusive-or of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
         public static Vector<T> Xor<T>(Vector<T> left, Vector<T> right) => left ^ right;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static T GetElementUnsafe<T>(in this Vector<T> vector, int index)
-        {
-            Debug.Assert((index >= 0) && (index < Vector<T>.Count));
-            ref T address = ref Unsafe.As<Vector<T>, T>(ref Unsafe.AsRef(in vector));
-            return Unsafe.Add(ref address, index);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static void SetElementUnsafe<T>(in this Vector<T> vector, int index, T value)
-        {
-            Debug.Assert((index >= 0) && (index < Vector<T>.Count));
-            ref T address = ref Unsafe.As<Vector<T>, T>(ref Unsafe.AsRef(in vector));
-            Unsafe.Add(ref address, index) = value;
-        }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs
index 4009387a4f796..4d1c62cf4876e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs
@@ -5,9 +5,9 @@
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
 using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
-using System.Text;
+
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
 namespace System.Numerics
 {
@@ -28,7 +28,7 @@ namespace System.Numerics
     [Intrinsic]
     [DebuggerDisplay("{DisplayString,nq}")]
     [DebuggerTypeProxy(typeof(VectorDebugView<>))]
-    public readonly struct Vector<T> : IEquatable<Vector<T>>, IFormattable
+    public readonly unsafe struct Vector<T> : IEquatable<Vector<T>>, IFormattable
     {
         // These fields exist to ensure the alignment is 8, rather than 1.
         internal readonly ulong _00;
@@ -38,13 +38,21 @@ namespace System.Numerics
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector{T}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector(T value)
         {
-            Unsafe.SkipInit(out this);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                this.SetElementUnsafe(index, value);
+                this = Vector512.Create(value).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this = Vector256.Create(value).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this = Vector128.Create(value).AsVector();
             }
         }
 
@@ -56,14 +64,19 @@ public Vector(T value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector(T[] values)
         {
-            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
-
-            if (values.Length < Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException();
+                this = Vector512.Create(values).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this = Vector256.Create(values).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this = Vector128.Create(values).AsVector();
             }
-
-            this = Unsafe.ReadUnaligned<Vector<T>>(ref Unsafe.As<T, byte>(ref values[0]));
         }
 
         /// <summary>Creates a new <see cref="Vector{T}" /> from a given array.</summary>
@@ -75,14 +88,19 @@ public Vector(T[] values)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector(T[] values, int index)
         {
-            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
-
-            if ((index < 0) || ((values.Length - index) < Count))
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException();
+                this = Vector512.Create(values, index).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this = Vector256.Create(values, index).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this = Vector128.Create(values, index).AsVector();
             }
-
-            this = Unsafe.ReadUnaligned<Vector<T>>(ref Unsafe.As<T, byte>(ref values[index]));
         }
 
         /// <summary>Creates a new <see cref="Vector{T}" /> from a given readonly span.</summary>
@@ -92,14 +110,19 @@ public Vector(T[] values, int index)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector(ReadOnlySpan<T> values)
         {
-            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
-
-            if (values.Length < Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values);
+                this = Vector512.Create(values).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this = Vector256.Create(values).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this = Vector128.Create(values).AsVector();
             }
-
-            this = Unsafe.ReadUnaligned<Vector<T>>(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(values)));
         }
 
         /// <summary>Creates a new <see cref="Vector{T}" /> from a given readonly span.</summary>
@@ -107,17 +130,21 @@ public Vector(ReadOnlySpan<T> values)
         /// <returns>A new <see cref="Vector{T}" /> with its elements set to the first <c>sizeof(<see cref="Vector{T}" />)</c> elements from <paramref name="values" />.</returns>
         /// <exception cref="ArgumentOutOfRangeException">The length of <paramref name="values" /> is less than <c>sizeof(<see cref="Vector{T}" />)</c>.</exception>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public unsafe Vector(ReadOnlySpan<byte> values)
+        public Vector(ReadOnlySpan<byte> values)
         {
-            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-
-            if (values.Length < Vector<byte>.Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values);
+                this = Vector512.Create(values).AsVector().As<byte, T>();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this = Vector256.Create(values).AsVector().As<byte, T>();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this = Vector128.Create(values).AsVector().As<byte, T>();
             }
-
-            this = Unsafe.ReadUnaligned<Vector<T>>(ref MemoryMarshal.GetReference(values));
         }
 
         /// <summary>Creates a new <see cref="Vector{T}" /> from a given span.</summary>
@@ -136,19 +163,28 @@ public static Vector<T> AllBitsSet
             get => new Vector<T>(Scalar<T>.AllBitsSet);
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Gets the number of <typeparamref name="T" /> that are in a <see cref="Vector{T}" />.</summary>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
-        public static unsafe int Count
+        public static int Count
         {
             [Intrinsic]
             get
             {
-                ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-                return sizeof(Vector<T>) / sizeof(T);
+                if (sizeof(Vector<T>) == 64)
+                {
+                    return Vector512<T>.Count;
+                }
+                else if (sizeof(Vector<T>) == 32)
+                {
+                    return Vector256<T>.Count;
+                }
+                else
+                {
+                    Debug.Assert(sizeof(Vector<T>) == 16);
+                    return Vector128<T>.Count;
+                }
             }
         }
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <summary>Gets a new <see cref="Vector{T}" /> with the elements set to their index.</summary>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
@@ -158,15 +194,19 @@ public static Vector<T> Indices
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get
             {
-                ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-                Unsafe.SkipInit(out Vector<T> result);
-
-                for (int i = 0; i < Count; i++)
+                if (sizeof(Vector<T>) == 64)
                 {
-                    result.SetElementUnsafe(i, Scalar<T>.Convert(i));
+                    return Vector512<T>.Indices.AsVector();
+                }
+                else if (sizeof(Vector<T>) == 32)
+                {
+                    return Vector256<T>.Indices.AsVector();
+                }
+                else
+                {
+                    Debug.Assert(sizeof(Vector<T>) == 16);
+                    return Vector128<T>.Indices.AsVector();
                 }
-
-                return result;
             }
         }
 
@@ -231,15 +271,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator +(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() + right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() + right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() + right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
@@ -250,19 +294,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator &(Vector<T> left, Vector<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            Unsafe.SkipInit(out Vector<ulong> result);
-
-            Vector<ulong> vleft = left.As<T, ulong>();
-            Vector<ulong> vright = right.As<T, ulong>();
-
-            for (int index = 0; index < Vector<ulong>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                ulong value = vleft.GetElementUnsafe(index) & vright.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() & right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() & right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() & right.AsVector128()).AsVector();
             }
-
-            return result.As<ulong, T>();
         }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
@@ -273,19 +317,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator |(Vector<T> left, Vector<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            Unsafe.SkipInit(out Vector<ulong> result);
-
-            Vector<ulong> vleft = left.As<T, ulong>();
-            Vector<ulong> vright = right.As<T, ulong>();
-
-            for (int index = 0; index < Vector<ulong>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                ulong value = vleft.GetElementUnsafe(index) | vright.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() | right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() | right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() | right.AsVector128()).AsVector();
             }
-
-            return result.As<ulong, T>();
         }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
@@ -296,15 +340,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator /(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() / right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() / right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() / right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Divides a vector by a scalar to compute the per-element quotient.</summary>
@@ -313,18 +361,7 @@ public T this[int index]
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector<T> operator /(Vector<T> left, T right)
-        {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Divide(left.GetElementUnsafe(index), right);
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
-        }
+        public static Vector<T> operator /(Vector<T> left, T right) => left / new Vector<T>(right);
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
@@ -334,14 +371,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool operator ==(Vector<T> left, Vector<T> right)
         {
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                if (!Scalar<T>.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return false;
-                }
+                return left.AsVector512() == right.AsVector512();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return left.AsVector256() == right.AsVector256();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return left.AsVector128() == right.AsVector128();
             }
-            return true;
         }
 
         /// <summary>Computes the exclusive-or of two vectors.</summary>
@@ -352,19 +394,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator ^(Vector<T> left, Vector<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            Unsafe.SkipInit(out Vector<ulong> result);
-
-            Vector<ulong> vleft = left.As<T, ulong>();
-            Vector<ulong> vright = right.As<T, ulong>();
-
-            for (int index = 0; index < Vector<ulong>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                ulong value = vleft.GetElementUnsafe(index) ^ vright.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() ^ right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() ^ right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() ^ right.AsVector128()).AsVector();
             }
-
-            return result.As<ulong, T>();
         }
 
         /// <summary>Reinterprets a <see cref="Vector{T}" /> as a new <see cref="Vector{Byte}" />.</summary>
@@ -471,15 +513,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator <<(Vector<T> value, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T element = Scalar<T>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
+                return (value.AsVector512() << shiftCount).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (value.AsVector256() << shiftCount).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (value.AsVector128() << shiftCount).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
@@ -490,15 +536,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator *(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() * right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() * right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() * right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -513,7 +563,7 @@ public T this[int index]
         /// <param name="value">The vector to multiply with <paramref name="factor" />.</param>
         /// <returns>The product of <paramref name="factor" /> and <paramref name="value" />.</returns>
         [Intrinsic]
-        public static Vector<T> operator *(T factor, Vector<T> value) => value * factor;
+        public static Vector<T> operator *(T factor, Vector<T> value) => value * new Vector<T>(factor);
 
         /// <summary>Computes the ones-complement of a vector.</summary>
         /// <param name="value">The vector whose ones-complement is to be computed.</param>
@@ -522,18 +572,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator ~(Vector<T> value)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-            Unsafe.SkipInit(out Vector<ulong> result);
-
-            Vector<ulong> vector = value.As<T, ulong>();
-
-            for (int index = 0; index < Vector<ulong>.Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                ulong element = ~vector.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, element);
+                return (~(value.AsVector512())).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (~(value.AsVector256())).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (~(value.AsVector128())).AsVector();
             }
-
-            return result.As<ulong, T>();
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -544,15 +595,19 @@ public T this[int index]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator >>(Vector<T> value, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T element = Scalar<T>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
+                return (value.AsVector512() >> shiftCount).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (value.AsVector256() >> shiftCount).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (value.AsVector128() >> shiftCount).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
@@ -563,15 +618,19 @@ public static Vector<T> operator >>(Vector<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator -(Vector<T> left, Vector<T> right)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = Scalar<T>.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                return (left.AsVector512() - right.AsVector512()).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (left.AsVector256() - right.AsVector256()).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (left.AsVector128() - right.AsVector128()).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -599,15 +658,19 @@ public static Vector<T> operator >>(Vector<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> operator >>>(Vector<T> value, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector<T> result);
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T element = Scalar<T>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
+                return (value.AsVector512() >>> shiftCount).AsVector();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return (value.AsVector256() >>> shiftCount).AsVector();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return (value.AsVector128() >>> shiftCount).AsVector();
             }
-
-            return result;
         }
 
         /// <summary>Copies a <see cref="Vector{T}" /> to a given array.</summary>
@@ -617,14 +680,19 @@ public static Vector<T> operator >>>(Vector<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void CopyTo(T[] destination)
         {
-            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
-
-            if (destination.Length < Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentException_DestinationTooShort();
+                this.AsVector512().CopyTo(destination);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this.AsVector256().CopyTo(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this.AsVector128().CopyTo(destination);
             }
-
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref destination[0]), this);
         }
 
         /// <summary>Copies a <see cref="Vector{T}" /> to a given array starting at the specified index.</summary>
@@ -636,35 +704,40 @@ public void CopyTo(T[] destination)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void CopyTo(T[] destination, int startIndex)
         {
-            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
-
-            if ((uint)startIndex >= (uint)destination.Length)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowStartIndexArgumentOutOfRange_ArgumentOutOfRange_IndexMustBeLess();
+                this.AsVector512().CopyTo(destination, startIndex);
             }
-
-            if ((destination.Length - startIndex) < Count)
+            else if (sizeof(Vector<T>) == 32)
             {
-                ThrowHelper.ThrowArgumentException_DestinationTooShort();
+                this.AsVector256().CopyTo(destination, startIndex);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this.AsVector128().CopyTo(destination, startIndex);
             }
-
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref destination[startIndex]), this);
         }
 
         /// <summary>Copies a <see cref="Vector{T}" /> to a given span.</summary>
         /// <param name="destination">The span to which the current instance is copied.</param>
         /// <exception cref="ArgumentException">The length of <paramref name="destination" /> is less than <c>sizeof(<see cref="Vector{T}" />)</c>.</exception>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public unsafe void CopyTo(Span<byte> destination)
+        public void CopyTo(Span<byte> destination)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-
-            if (destination.Length < Vector<byte>.Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentException_DestinationTooShort();
+                this.AsVector512().As<T, byte>().CopyTo(destination);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this.AsVector256().As<T, byte>().CopyTo(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this.AsVector128().As<T, byte>().CopyTo(destination);
             }
-
-            Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this);
         }
 
         /// <summary>Copies a <see cref="Vector{T}" /> to a given span.</summary>
@@ -673,12 +746,19 @@ public unsafe void CopyTo(Span<byte> destination)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public void CopyTo(Span<T> destination)
         {
-            if (destination.Length < Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                ThrowHelper.ThrowArgumentException_DestinationTooShort();
+                this.AsVector512().CopyTo(destination);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                this.AsVector256().CopyTo(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                this.AsVector128().CopyTo(destination);
             }
-
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), this);
         }
 
         /// <summary>Returns a boolean indicating whether the given Object is equal to this vector instance.</summary>
@@ -692,34 +772,18 @@ public void CopyTo(Span<T> destination)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public bool Equals(Vector<T> other)
         {
-            // This function needs to account for floating-point equality around NaN
-            // and so must behave equivalently to the underlying float/double.Equals
-
-            if (Vector.IsHardwareAccelerated)
+            if (sizeof(Vector<T>) == 64)
             {
-                if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float)))
-                {
-                    Vector<T> result = Vector.Equals(this, other) | ~(Vector.Equals(this, this) | Vector.Equals(other, other));
-                    return result.As<T, int>() == Vector<int>.AllBitsSet;
-                }
-                else
-                {
-                    return this == other;
-                }
+                return this.AsVector512().Equals(other.AsVector512());
             }
-
-            return SoftwareFallback(in this, other);
-
-            static bool SoftwareFallback(in Vector<T> self, Vector<T> other)
+            else if (sizeof(Vector<T>) == 32)
             {
-                for (int index = 0; index < Count; index++)
-                {
-                    if (!Scalar<T>.ObjectEquals(self.GetElementUnsafe(index), other.GetElementUnsafe(index)))
-                    {
-                        return false;
-                    }
-                }
-                return true;
+                return this.AsVector256().Equals(other.AsVector256());
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return this.AsVector128().Equals(other.AsVector128());
             }
         }
 
@@ -727,15 +791,19 @@ static bool SoftwareFallback(in Vector<T> self, Vector<T> other)
         /// <returns>The hash code.</returns>
         public override int GetHashCode()
         {
-            HashCode hashCode = default;
-
-            for (int index = 0; index < Count; index++)
+            if (sizeof(Vector<T>) == 64)
             {
-                T value = this.GetElementUnsafe(index);
-                hashCode.Add(value);
+                return this.AsVector512().GetHashCode();
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return this.AsVector256().GetHashCode();
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return this.AsVector128().GetHashCode();
             }
-
-            return hashCode.ToHashCode();
         }
 
         /// <summary>Returns a String representing this vector.</summary>
@@ -753,40 +821,40 @@ public override int GetHashCode()
         /// <returns>The string representation.</returns>
         public string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-
-            var sb = new ValueStringBuilder(stackalloc char[64]);
-            string separator = NumberFormatInfo.GetInstance(formatProvider).NumberGroupSeparator;
-
-            sb.Append('<');
-            sb.Append(((IFormattable)this.GetElementUnsafe(0)).ToString(format, formatProvider));
-
-            for (int i = 1; i < Count; i++)
+            if (sizeof(Vector<T>) == 64)
             {
-                sb.Append(separator);
-                sb.Append(' ');
-                sb.Append(((IFormattable)this.GetElementUnsafe(i)).ToString(format, formatProvider));
+                return this.AsVector512().ToString(format, formatProvider);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return this.AsVector256().ToString(format, formatProvider);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return this.AsVector128().ToString(format, formatProvider);
             }
-            sb.Append('>');
-
-            return sb.ToString();
         }
 
         /// <summary>Tries to copy a <see cref="Vector{T}" /> to a given span.</summary>
         /// <param name="destination">The span to which the current instance is copied.</param>
         /// <returns><c>true</c> if the current instance was successfully copied to <paramref name="destination" />; otherwise, <c>false</c> if the length of <paramref name="destination" /> is less than <c>sizeof(<see cref="Vector{T}" />)</c>.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public unsafe bool TryCopyTo(Span<byte> destination)
+        public bool TryCopyTo(Span<byte> destination)
         {
-            ThrowHelper.ThrowForUnsupportedNumericsVectorBaseType<T>();
-
-            if (destination.Length < Vector<byte>.Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                return false;
+                return this.AsVector512().As<T, byte>().TryCopyTo(destination);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return this.AsVector256().As<T, byte>().TryCopyTo(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return this.AsVector128().As<T, byte>().TryCopyTo(destination);
             }
-
-            Unsafe.WriteUnaligned(ref MemoryMarshal.GetReference(destination), this);
-            return true;
         }
 
         /// <summary>Tries to copy a <see cref="Vector{T}" /> to a given span.</summary>
@@ -795,13 +863,19 @@ public unsafe bool TryCopyTo(Span<byte> destination)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public bool TryCopyTo(Span<T> destination)
         {
-            if (destination.Length < Count)
+            if (sizeof(Vector<T>) == 64)
             {
-                return false;
+                return this.AsVector512().TryCopyTo(destination);
+            }
+            else if (sizeof(Vector<T>) == 32)
+            {
+                return this.AsVector256().TryCopyTo(destination);
+            }
+            else
+            {
+                Debug.Assert(sizeof(Vector<T>) == 16);
+                return this.AsVector128().TryCopyTo(destination);
             }
-
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), this);
-            return true;
         }
     }
 }
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index 7f83b8c09d79e..cce0407615b60 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -9,6 +9,8 @@
 using System.Runtime.Intrinsics.Wasm;
 using System.Runtime.Intrinsics.X86;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -30,7 +32,7 @@ namespace System.Runtime.Intrinsics
     // the internal inlining limits of the JIT.
 
     /// <summary>Provides a collection of static methods for creating, manipulating, and otherwise operating on 128-bit vectors.</summary>
-    public static class Vector128
+    public static unsafe class Vector128
     {
         internal const int Size = 16;
 
@@ -58,10 +60,149 @@ public static bool IsHardwareAccelerated
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> Abs<T>(Vector128<T> vector)
         {
-            return Create(
-                Vector64.Abs(vector._lower),
-                Vector64.Abs(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return vector;
+            }
+            else if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(vector);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(vector);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Abs(vector.AsSingle()).As<float, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Abs(vector.AsSByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Abs(vector.AsInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Abs(vector.AsInt32()).As<uint, T>();
+                }
+                else if (AdvSimd.Arm64.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return AdvSimd.Arm64.Abs(vector.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return AdvSimd.Arm64.Abs(vector.AsInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return PackedSimd.Abs(vector.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return PackedSimd.Abs(vector.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return PackedSimd.Abs(vector.AsSByte()).As<sbyte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Abs(vector.AsInt16()).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Abs(vector.AsInt32()).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Abs(vector.AsInt64()).As<long, T>();
+                }
+                return SoftwareImpl(vector);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [CompExactlyDependsOn(typeof(Ssse3))]
+            [CompExactlyDependsOn(typeof(Avx512F.VL))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.AndNot(Vector128.Create(-0.0f), vector.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.AndNot(Vector128.Create(-0.0), vector.AsDouble()).As<double, T>();
+                    }
+                    else if (Sse3.IsSupported)
+                    {
+                        if (sizeof(T) == 1)
+                        {
+                            return Ssse3.Abs(vector.AsSByte()).As<byte, T>();
+                        }
+                        else if (sizeof(T) == 2)
+                        {
+                            return Ssse3.Abs(vector.AsInt16()).As<ushort, T>();
+                        }
+                        else if (sizeof(T) == 4)
+                        {
+                            return Ssse3.Abs(vector.AsInt32()).As<uint, T>();
+                        }
+                        else if (sizeof(T) == 8)
+                        {
+                            if (Avx512F.VL.IsSupported)
+                            {
+                                return Avx512F.VL.Abs(vector.AsInt64()).As<ulong, T>();
+                            }
+                        }
+                    }
+
+                    if (sizeof(T) is 1 or 2 or 4 or 8)
+                    {
+                        return ConditionalSelect(LessThan(vector, Vector128<T>.Zero), Vector128<T>.Zero - vector, vector);
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> vector)
+            {
+                return Create(
+                    Vector64.Abs(vector._lower),
+                    Vector64.Abs(vector._upper)
+                );
+            }
         }
 
         /// <summary>Adds two vectors to compute their sum.</summary>
@@ -80,13 +221,7 @@ public static Vector128<T> Abs<T>(Vector128<T> vector)
         /// <returns>The bitwise-and of <paramref name="left" /> and the ones-complement of <paramref name="right" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<T> AndNot<T>(Vector128<T> left, Vector128<T> right)
-        {
-            return Create(
-                Vector64.AndNot(left._lower, right._lower),
-                Vector64.AndNot(left._upper, right._upper)
-            );
-        }
+        public static Vector128<T> AndNot<T>(Vector128<T> left, Vector128<T> right) => left & ~right;
 
         /// <summary>Reinterprets a <see cref="Vector128{TFrom}" /> as a new <see cref="Vector128{TTo}" />.</summary>
         /// <typeparam name="TFrom">The type of the elements in the input vector.</typeparam>
@@ -377,10 +512,26 @@ public static Vector<T> AsVector<T>(this Vector128<T> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector128<T> Ceiling<T>(Vector128<T> vector)
         {
-            return Create(
-                Vector64.Ceiling(vector._lower),
-                Vector64.Ceiling(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(short))
+             || (typeof(T) == typeof(int))
+             || (typeof(T) == typeof(long))
+             || (typeof(T) == typeof(nint))
+             || (typeof(T) == typeof(nuint))
+             || (typeof(T) == typeof(sbyte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong)))
+            {
+                return vector;
+            }
+            else
+            {
+                return Create(
+                    Vector64.Ceiling(vector._lower),
+                    Vector64.Ceiling(vector._upper)
+                );
+            }
         }
 
         /// <summary>Computes the ceiling of each element in a vector.</summary>
@@ -406,20 +557,14 @@ internal static Vector128<T> Ceiling<T>(Vector128<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<T> ConditionalSelect<T>(Vector128<T> condition, Vector128<T> left, Vector128<T> right)
-        {
-            return Create(
-                Vector64.ConditionalSelect(condition._lower, left._lower, right._lower),
-                Vector64.ConditionalSelect(condition._upper, left._upper, right._upper)
-            );
-        }
+        public static Vector128<T> ConditionalSelect<T>(Vector128<T> condition, Vector128<T> left, Vector128<T> right) => (left & condition) | AndNot(right, condition);
 
         /// <summary>Converts a <see cref="Vector128{Int64}" /> to a <see cref="Vector128{Double}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<double> ConvertToDouble(Vector128<long> vector)
+        public static Vector128<double> ConvertToDouble(Vector128<long> vector)
         {
             if (Sse2.IsSupported)
             {
@@ -460,7 +605,7 @@ public static unsafe Vector128<double> ConvertToDouble(Vector128<long> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<double> ConvertToDouble(Vector128<ulong> vector)
+        public static Vector128<double> ConvertToDouble(Vector128<ulong> vector)
         {
             if (Sse2.IsSupported)
             {
@@ -500,7 +645,7 @@ public static unsafe Vector128<double> ConvertToDouble(Vector128<ulong> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<int> ConvertToInt32(Vector128<float> vector)
+        public static Vector128<int> ConvertToInt32(Vector128<float> vector)
         {
             return Create(
                 Vector64.ConvertToInt32(vector._lower),
@@ -513,7 +658,7 @@ public static unsafe Vector128<int> ConvertToInt32(Vector128<float> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<int> ConvertToInt32Native(Vector128<float> vector)
+        public static Vector128<int> ConvertToInt32Native(Vector128<float> vector)
         {
             return Create(
                 Vector64.ConvertToInt32Native(vector._lower),
@@ -526,7 +671,7 @@ public static unsafe Vector128<int> ConvertToInt32Native(Vector128<float> vector
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<long> ConvertToInt64(Vector128<double> vector)
+        public static Vector128<long> ConvertToInt64(Vector128<double> vector)
         {
             return Create(
                 Vector64.ConvertToInt64(vector._lower),
@@ -539,7 +684,7 @@ public static unsafe Vector128<long> ConvertToInt64(Vector128<double> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<long> ConvertToInt64Native(Vector128<double> vector)
+        public static Vector128<long> ConvertToInt64Native(Vector128<double> vector)
         {
             return Create(
                 Vector64.ConvertToInt64Native(vector._lower),
@@ -552,7 +697,7 @@ public static unsafe Vector128<long> ConvertToInt64Native(Vector128<double> vect
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<float> ConvertToSingle(Vector128<int> vector)
+        public static Vector128<float> ConvertToSingle(Vector128<int> vector)
         {
             return Create(
                 Vector64.ConvertToSingle(vector._lower),
@@ -566,7 +711,7 @@ public static unsafe Vector128<float> ConvertToSingle(Vector128<int> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<float> ConvertToSingle(Vector128<uint> vector)
+        public static Vector128<float> ConvertToSingle(Vector128<uint> vector)
         {
             if (Sse2.IsSupported)
             {
@@ -623,7 +768,7 @@ static Vector128<float> SoftwareFallback(Vector128<uint> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<uint> ConvertToUInt32(Vector128<float> vector)
+        public static Vector128<uint> ConvertToUInt32(Vector128<float> vector)
         {
             return Create(
                 Vector64.ConvertToUInt32(vector._lower),
@@ -637,7 +782,7 @@ public static unsafe Vector128<uint> ConvertToUInt32(Vector128<float> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<uint> ConvertToUInt32Native(Vector128<float> vector)
+        public static Vector128<uint> ConvertToUInt32Native(Vector128<float> vector)
         {
             return Create(
                 Vector64.ConvertToUInt32Native(vector._lower),
@@ -651,7 +796,7 @@ public static unsafe Vector128<uint> ConvertToUInt32Native(Vector128<float> vect
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ulong> ConvertToUInt64(Vector128<double> vector)
+        public static Vector128<ulong> ConvertToUInt64(Vector128<double> vector)
         {
             return Create(
                 Vector64.ConvertToUInt64(vector._lower),
@@ -665,7 +810,7 @@ public static unsafe Vector128<ulong> ConvertToUInt64(Vector128<double> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ulong> ConvertToUInt64Native(Vector128<double> vector)
+        public static Vector128<ulong> ConvertToUInt64Native(Vector128<double> vector)
         {
             return Create(
                 Vector64.ConvertToUInt64Native(vector._lower),
@@ -703,7 +848,7 @@ public static void CopyTo<T>(this Vector128<T> vector, T[] destination)
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="NullReferenceException"><paramref name="destination" /> is <c>null</c>.</exception>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void CopyTo<T>(this Vector128<T> vector, T[] destination, int startIndex)
+        public static void CopyTo<T>(this Vector128<T> vector, T[] destination, int startIndex)
         {
             // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
 
@@ -743,7 +888,7 @@ public static void CopyTo<T>(this Vector128<T> vector, Span<T> destination)
         /// <returns>A new <see cref="Vector128{T}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static unsafe Vector128<T> Create<T>(T value)
+        public static Vector128<T> Create<T>(T value)
         {
             Vector64<T> vector = Vector64.Create(value);
             return Create(vector, vector);
@@ -754,48 +899,48 @@ public static unsafe Vector128<T> Create<T>(T value)
         /// <returns>A new <see cref="Vector128{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi8</remarks>
         [Intrinsic]
-        public static unsafe Vector128<byte> Create(byte value) => Create<byte>(value);
+        public static Vector128<byte> Create(byte value) => Create<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <remarks>On x86, this method corresponds to __m128d _mm_set1_pd</remarks>
         [Intrinsic]
-        public static unsafe Vector128<double> Create(double value) => Create<double>(value);
+        public static Vector128<double> Create(double value) => Create<double>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi16</remarks>
         [Intrinsic]
-        public static unsafe Vector128<short> Create(short value) => Create<short>(value);
+        public static Vector128<short> Create(short value) => Create<short>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi32</remarks>
         [Intrinsic]
-        public static unsafe Vector128<int> Create(int value) => Create<int>(value);
+        public static Vector128<int> Create(int value) => Create<int>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi64x</remarks>
         [Intrinsic]
-        public static unsafe Vector128<long> Create(long value) => Create<long>(value);
+        public static Vector128<long> Create(long value) => Create<long>(value);
 
         /// <summary>Creates a new <see cref="Vector128{IntPtr}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{IntPtr}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector128<nint> Create(nint value) => Create<nint>(value);
+        public static Vector128<nint> Create(nint value) => Create<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UIntPtr}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UIntPtr}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<nuint> Create(nuint value) => Create<nuint>(value);
+        public static Vector128<nuint> Create(nuint value) => Create<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -803,14 +948,14 @@ public static unsafe Vector128<T> Create<T>(T value)
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi8</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<sbyte> Create(sbyte value) => Create<sbyte>(value);
+        public static Vector128<sbyte> Create(sbyte value) => Create<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
         /// <remarks>On x86, this method corresponds to __m128 _mm_set1_ps</remarks>
         [Intrinsic]
-        public static unsafe Vector128<float> Create(float value) => Create<float>(value);
+        public static Vector128<float> Create(float value) => Create<float>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -818,7 +963,7 @@ public static unsafe Vector128<T> Create<T>(T value)
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi16</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<ushort> Create(ushort value) => Create<ushort>(value);
+        public static Vector128<ushort> Create(ushort value) => Create<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -826,7 +971,7 @@ public static unsafe Vector128<T> Create<T>(T value)
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi32</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<uint> Create(uint value) => Create<uint>(value);
+        public static Vector128<uint> Create(uint value) => Create<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -834,7 +979,7 @@ public static unsafe Vector128<T> Create<T>(T value)
         /// <remarks>On x86, this method corresponds to __m128i _mm_set1_epi64x</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<ulong> Create(ulong value) => Create<ulong>(value);
+        public static Vector128<ulong> Create(ulong value) => Create<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector128{T}" /> from a given array.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -915,7 +1060,7 @@ public static Vector128<T> Create<T>(ReadOnlySpan<T> values)
         /// <remarks>On x86, this method corresponds to __m128i _mm_setr_epi8</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15)
+        public static Vector128<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15)
         {
             return Create(
                 Vector64.Create(e0, e1, e2,  e3,  e4,  e5,  e6,  e7),
@@ -930,7 +1075,7 @@ public static unsafe Vector128<byte> Create(byte e0, byte e1, byte e2, byte e3,
         /// <remarks>On x86, this method corresponds to __m128d _mm_setr_pd</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<double> Create(double e0, double e1)
+        public static Vector128<double> Create(double e0, double e1)
         {
             return Create(
                 Vector64.Create(e0),
@@ -951,7 +1096,7 @@ public static unsafe Vector128<double> Create(double e0, double e1)
         /// <remarks>On x86, this method corresponds to __m128i _mm_setr_epi16</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<short> Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7)
+        public static Vector128<short> Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7)
         {
             return Create(
                 Vector64.Create(e0, e1, e2, e3),
@@ -968,7 +1113,7 @@ public static unsafe Vector128<short> Create(short e0, short e1, short e2, short
         /// <remarks>On x86, this method corresponds to __m128i _mm_setr_epi32</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<int> Create(int e0, int e1, int e2, int e3)
+        public static Vector128<int> Create(int e0, int e1, int e2, int e3)
         {
             return Create(
                 Vector64.Create(e0, e1),
@@ -983,7 +1128,7 @@ public static unsafe Vector128<int> Create(int e0, int e1, int e2, int e3)
         /// <remarks>On x86, this method corresponds to __m128i _mm_setr_epi64x</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<long> Create(long e0, long e1)
+        public static Vector128<long> Create(long e0, long e1)
         {
             return Create(
                 Vector64.Create(e0),
@@ -1013,7 +1158,7 @@ public static unsafe Vector128<long> Create(long e0, long e1)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15)
+        public static Vector128<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15)
         {
             return Create(
                 Vector64.Create(e0, e1, e2,  e3,  e4,  e5,  e6,  e7),
@@ -1030,7 +1175,7 @@ public static unsafe Vector128<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte
         /// <remarks>On x86, this method corresponds to __m128 _mm_setr_ps</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<float> Create(float e0, float e1, float e2, float e3)
+        public static Vector128<float> Create(float e0, float e1, float e2, float e3)
         {
             return Create(
                 Vector64.Create(e0, e1),
@@ -1052,7 +1197,7 @@ public static unsafe Vector128<float> Create(float e0, float e1, float e2, float
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7)
+        public static Vector128<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7)
         {
             return Create(
                 Vector64.Create(e0, e1, e2, e3),
@@ -1070,7 +1215,7 @@ public static unsafe Vector128<ushort> Create(ushort e0, ushort e1, ushort e2, u
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<uint> Create(uint e0, uint e1, uint e2, uint e3)
+        public static Vector128<uint> Create(uint e0, uint e1, uint e2, uint e3)
         {
             return Create(
                 Vector64.Create(e0, e1),
@@ -1086,7 +1231,7 @@ public static unsafe Vector128<uint> Create(uint e0, uint e1, uint e2, uint e3)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ulong> Create(ulong e0, ulong e1)
+        public static Vector128<ulong> Create(ulong e0, ulong e1)
         {
             return Create(
                 Vector64.Create(e0),
@@ -1123,65 +1268,65 @@ public static Vector128<T> Create<T>(Vector64<T> lower, Vector64<T> upper)
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<byte> Create(Vector64<byte> lower, Vector64<byte> upper) => Create<byte>(lower, upper);
+        public static Vector128<byte> Create(Vector64<byte> lower, Vector64<byte> upper) => Create<byte>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance from two <see cref="Vector64{Double}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<double> Create(Vector64<double> lower, Vector64<double> upper) => Create<double>(lower, upper);
+        public static Vector128<double> Create(Vector64<double> lower, Vector64<double> upper) => Create<double>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance from two <see cref="Vector64{Int16}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<short> Create(Vector64<short> lower, Vector64<short> upper) => Create<short>(lower, upper);
+        public static Vector128<short> Create(Vector64<short> lower, Vector64<short> upper) => Create<short>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance from two <see cref="Vector64{Int32}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <remarks>On x86, this method corresponds to __m128i _mm_setr_epi64</remarks>
         /// <returns>A new <see cref="Vector128{Int32}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<int> Create(Vector64<int> lower, Vector64<int> upper) => Create<int>(lower, upper);
+        public static Vector128<int> Create(Vector64<int> lower, Vector64<int> upper) => Create<int>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance from two <see cref="Vector64{Int64}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int64}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<long> Create(Vector64<long> lower, Vector64<long> upper) => Create<long>(lower, upper);
+        public static Vector128<long> Create(Vector64<long> lower, Vector64<long> upper) => Create<long>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{IntPtr}" /> instance from two <see cref="Vector64{IntPtr}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{IntPtr}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<nint> Create(Vector64<nint> lower, Vector64<nint> upper) => Create<nint>(lower, upper);
+        public static Vector128<nint> Create(Vector64<nint> lower, Vector64<nint> upper) => Create<nint>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{UIntPtr}" /> instance from two <see cref="Vector64{UIntPtr}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UIntPtr}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe Vector128<nuint> Create(Vector64<nuint> lower, Vector64<nuint> upper) => Create<nuint>(lower, upper);
+        public static Vector128<nuint> Create(Vector64<nuint> lower, Vector64<nuint> upper) => Create<nuint>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance from two <see cref="Vector64{SByte}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{SByte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe Vector128<sbyte> Create(Vector64<sbyte> lower, Vector64<sbyte> upper) => Create<sbyte>(lower, upper);
+        public static Vector128<sbyte> Create(Vector64<sbyte> lower, Vector64<sbyte> upper) => Create<sbyte>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance from two <see cref="Vector64{Single}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        public static unsafe Vector128<float> Create(Vector64<float> lower, Vector64<float> upper) => Create<float>(lower, upper);
+        public static Vector128<float> Create(Vector64<float> lower, Vector64<float> upper) => Create<float>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance from two <see cref="Vector64{UInt16}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt16}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe Vector128<ushort> Create(Vector64<ushort> lower, Vector64<ushort> upper) => Create<ushort>(lower, upper);
+        public static Vector128<ushort> Create(Vector64<ushort> lower, Vector64<ushort> upper) => Create<ushort>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance from two <see cref="Vector64{UInt32}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
@@ -1189,14 +1334,14 @@ public static Vector128<T> Create<T>(Vector64<T> lower, Vector64<T> upper)
         /// <remarks>On x86, this method corresponds to __m128i _mm_setr_epi64</remarks>
         /// <returns>A new <see cref="Vector128{UInt32}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe Vector128<uint> Create(Vector64<uint> lower, Vector64<uint> upper) => Create<uint>(lower, upper);
+        public static Vector128<uint> Create(Vector64<uint> lower, Vector64<uint> upper) => Create<uint>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance from two <see cref="Vector64{UInt64}" /> instances.</summary>
         /// <param name="lower">The value that the lower 64-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 64-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt64}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe Vector128<ulong> Create(Vector64<ulong> lower, Vector64<ulong> upper) => Create<ulong>(lower, upper);
+        public static Vector128<ulong> Create(Vector64<ulong> lower, Vector64<ulong> upper) => Create<ulong>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector128{T}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1204,84 +1349,84 @@ public static Vector128<T> Create<T>(Vector64<T> lower, Vector64<T> upper)
         /// <returns>A new <see cref="Vector128{T}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static unsafe Vector128<T> CreateScalar<T>(T value) => Vector64.CreateScalar(value).ToVector128();
+        public static Vector128<T> CreateScalar<T>(T value) => Vector64.CreateScalar(value).ToVector128();
 
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
+        public static Vector128<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<double> CreateScalar(double value) => CreateScalar<double>(value);
+        public static Vector128<double> CreateScalar(double value) => CreateScalar<double>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<short> CreateScalar(short value) => CreateScalar<short>(value);
+        public static Vector128<short> CreateScalar(short value) => CreateScalar<short>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<int> CreateScalar(int value) => CreateScalar<int>(value);
+        public static Vector128<int> CreateScalar(int value) => CreateScalar<int>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<long> CreateScalar(long value) => CreateScalar<long>(value);
+        public static Vector128<long> CreateScalar(long value) => CreateScalar<long>(value);
 
         /// <summary>Creates a new <see cref="Vector128{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
+        public static Vector128<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
+        public static Vector128<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
+        public static Vector128<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector128<float> CreateScalar(float value) => CreateScalar<float>(value);
+        public static Vector128<float> CreateScalar(float value) => CreateScalar<float>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
+        public static Vector128<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
+        public static Vector128<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
+        public static Vector128<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector128{T}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1306,78 +1451,78 @@ public static Vector128<T> CreateScalarUnsafe<T>(T value)
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<byte> CreateScalarUnsafe(byte value) => CreateScalarUnsafe<byte>(value);
+        public static Vector128<byte> CreateScalarUnsafe(byte value) => CreateScalarUnsafe<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<double> CreateScalarUnsafe(double value) => CreateScalarUnsafe<double>(value);
+        public static Vector128<double> CreateScalarUnsafe(double value) => CreateScalarUnsafe<double>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<short> CreateScalarUnsafe(short value) => CreateScalarUnsafe<short>(value);
+        public static Vector128<short> CreateScalarUnsafe(short value) => CreateScalarUnsafe<short>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<int> CreateScalarUnsafe(int value) => CreateScalarUnsafe<int>(value);
+        public static Vector128<int> CreateScalarUnsafe(int value) => CreateScalarUnsafe<int>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<long> CreateScalarUnsafe(long value) => CreateScalarUnsafe<long>(value);
+        public static Vector128<long> CreateScalarUnsafe(long value) => CreateScalarUnsafe<long>(value);
 
         /// <summary>Creates a new <see cref="Vector128{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<nint> CreateScalarUnsafe(nint value) => CreateScalarUnsafe<nint>(value);
+        public static Vector128<nint> CreateScalarUnsafe(nint value) => CreateScalarUnsafe<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<nuint> CreateScalarUnsafe(nuint value) => CreateScalarUnsafe<nuint>(value);
+        public static Vector128<nuint> CreateScalarUnsafe(nuint value) => CreateScalarUnsafe<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<sbyte> CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe<sbyte>(value);
+        public static Vector128<sbyte> CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector128<float> CreateScalarUnsafe(float value) => CreateScalarUnsafe<float>(value);
+        public static Vector128<float> CreateScalarUnsafe(float value) => CreateScalarUnsafe<float>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<ushort> CreateScalarUnsafe(ushort value) => CreateScalarUnsafe<ushort>(value);
+        public static Vector128<ushort> CreateScalarUnsafe(ushort value) => CreateScalarUnsafe<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<uint> CreateScalarUnsafe(uint value) => CreateScalarUnsafe<uint>(value);
+        public static Vector128<uint> CreateScalarUnsafe(uint value) => CreateScalarUnsafe<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<ulong> CreateScalarUnsafe(ulong value) => CreateScalarUnsafe<ulong>(value);
+        public static Vector128<ulong> CreateScalarUnsafe(ulong value) => CreateScalarUnsafe<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector128{T}" /> instance where the elements begin at a specified value and which are spaced apart according to another specified value.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1413,16 +1558,7 @@ public static Vector128<T> CreateScalarUnsafe<T>(T value)
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static T Dot<T>(Vector128<T> left, Vector128<T> right)
-        {
-            // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism
-            // This is because the underlying dpps instruction on x86/x64 will do this equivalently
-            // and otherwise the software vs accelerated implementations may differ in returned result.
-
-            T result = Vector64.Dot(left._lower, right._lower);
-            result = Scalar<T>.Add(result, Vector64.Dot(left._upper, right._upper));
-            return result;
-        }
+        public static T Dot<T>(Vector128<T> left, Vector128<T> right) => Sum(left * right);
 
         /// <summary>Compares two vectors to determine if they are equal on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1519,10 +1655,26 @@ public static uint ExtractMostSignificantBits<T>(this Vector128<T> vector)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector128<T> Floor<T>(Vector128<T> vector)
         {
-            return Create(
-                Vector64.Floor(vector._lower),
-                Vector64.Floor(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+                 || (typeof(T) == typeof(short))
+                 || (typeof(T) == typeof(int))
+                 || (typeof(T) == typeof(long))
+                 || (typeof(T) == typeof(nint))
+                 || (typeof(T) == typeof(nuint))
+                 || (typeof(T) == typeof(sbyte))
+                 || (typeof(T) == typeof(ushort))
+                 || (typeof(T) == typeof(uint))
+                 || (typeof(T) == typeof(ulong)))
+            {
+                return vector;
+            }
+            else
+            {
+                return Create(
+                    Vector64.Floor(vector._lower),
+                    Vector64.Floor(vector._upper)
+                );
+            }
         }
 
         /// <summary>Computes the floor of each element in a vector.</summary>
@@ -1782,7 +1934,6 @@ public static bool LessThanOrEqualAny<T>(Vector128<T> left, Vector128<T> right)
                 || Vector64.LessThanOrEqualAny(left._upper, right._upper);
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The source from which the vector will be loaded.</param>
@@ -1790,7 +1941,7 @@ public static bool LessThanOrEqualAny<T>(Vector128<T> left, Vector128<T> right)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<T> Load<T>(T* source) => LoadUnsafe(ref *source);
+        public static Vector128<T> Load<T>(T* source) => LoadUnsafe(ref *source);
 
         /// <summary>Loads a vector from the given aligned source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1800,7 +1951,7 @@ public static bool LessThanOrEqualAny<T>(Vector128<T> left, Vector128<T> right)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<T> LoadAligned<T>(T* source)
+        public static Vector128<T> LoadAligned<T>(T* source)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
 
@@ -1820,8 +1971,7 @@ public static unsafe Vector128<T> LoadAligned<T>(T* source)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector128<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        public static Vector128<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
 
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1989,7 +2139,7 @@ public static Vector128<T> Min<T>(Vector128<T> left, Vector128<T> right)
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right"/> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector128<T> Multiply<T>(T left, Vector128<T> right) => left * right;
+        public static Vector128<T> Multiply<T>(T left, Vector128<T> right) => right * left;
 
         /// <inheritdoc cref="Vector64.MultiplyAddEstimate(Vector64{double}, Vector64{double}, Vector64{double})" />
         [Intrinsic]
@@ -2019,7 +2169,7 @@ public static Vector128<float> MultiplyAddEstimate(Vector128<float> left, Vector
         /// <returns>A <see cref="Vector128{Single}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<float> Narrow(Vector128<double> lower, Vector128<double> upper)
+        public static Vector128<float> Narrow(Vector128<double> lower, Vector128<double> upper)
         {
             return Create(
                 Vector64.Narrow(lower._lower, lower._upper),
@@ -2034,7 +2184,7 @@ public static unsafe Vector128<float> Narrow(Vector128<double> lower, Vector128<
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<sbyte> Narrow(Vector128<short> lower, Vector128<short> upper)
+        public static Vector128<sbyte> Narrow(Vector128<short> lower, Vector128<short> upper)
         {
             return Create(
                 Vector64.Narrow(lower._lower, lower._upper),
@@ -2048,7 +2198,7 @@ public static unsafe Vector128<sbyte> Narrow(Vector128<short> lower, Vector128<s
         /// <returns>A <see cref="Vector128{Int16}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<short> Narrow(Vector128<int> lower, Vector128<int> upper)
+        public static Vector128<short> Narrow(Vector128<int> lower, Vector128<int> upper)
         {
             return Create(
                 Vector64.Narrow(lower._lower, lower._upper),
@@ -2062,7 +2212,7 @@ public static unsafe Vector128<short> Narrow(Vector128<int> lower, Vector128<int
         /// <returns>A <see cref="Vector128{Int32}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<int> Narrow(Vector128<long> lower, Vector128<long> upper)
+        public static Vector128<int> Narrow(Vector128<long> lower, Vector128<long> upper)
         {
              return Create(
                  Vector64.Narrow(lower._lower, lower._upper),
@@ -2077,7 +2227,7 @@ public static unsafe Vector128<int> Narrow(Vector128<long> lower, Vector128<long
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<byte> Narrow(Vector128<ushort> lower, Vector128<ushort> upper)
+        public static Vector128<byte> Narrow(Vector128<ushort> lower, Vector128<ushort> upper)
         {
             return Create(
                 Vector64.Narrow(lower._lower, lower._upper),
@@ -2092,7 +2242,7 @@ public static unsafe Vector128<byte> Narrow(Vector128<ushort> lower, Vector128<u
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ushort> Narrow(Vector128<uint> lower, Vector128<uint> upper)
+        public static Vector128<ushort> Narrow(Vector128<uint> lower, Vector128<uint> upper)
         {
             return Create(
                 Vector64.Narrow(lower._lower, lower._upper),
@@ -2107,7 +2257,7 @@ public static unsafe Vector128<ushort> Narrow(Vector128<uint> lower, Vector128<u
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<uint> Narrow(Vector128<ulong> lower, Vector128<ulong> upper)
+        public static Vector128<uint> Narrow(Vector128<ulong> lower, Vector128<ulong> upper)
         {
             return Create(
                 Vector64.Narrow(lower._lower, lower._upper),
@@ -2629,7 +2779,6 @@ public static Vector128<T> Sqrt<T>(Vector128<T> vector)
             );
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
@@ -2637,7 +2786,7 @@ public static Vector128<T> Sqrt<T>(Vector128<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe void Store<T>(this Vector128<T> source, T* destination) => source.StoreUnsafe(ref *destination);
+        public static void Store<T>(this Vector128<T> source, T* destination) => source.StoreUnsafe(ref *destination);
 
         /// <summary>Stores a vector at the given aligned destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2647,7 +2796,7 @@ public static Vector128<T> Sqrt<T>(Vector128<T> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void StoreAligned<T>(this Vector128<T> source, T* destination)
+        public static void StoreAligned<T>(this Vector128<T> source, T* destination)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
 
@@ -2667,8 +2816,7 @@ public static unsafe void StoreAligned<T>(this Vector128<T> source, T* destinati
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe void StoreAlignedNonTemporal<T>(this Vector128<T> source, T* destination) => source.StoreAligned(destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        public static void StoreAlignedNonTemporal<T>(this Vector128<T> source, T* destination) => source.StoreAligned(destination);
 
         /// <summary>
         /// Stores to lower 64 bits of <paramref name="source"/> to memory destination of <paramref name="destination"/>[<paramref name="elementOffset"/>]
@@ -2735,14 +2883,13 @@ public static void StoreUnsafe<T>(this Vector128<T> source, ref T destination, n
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T Sum<T>(Vector128<T> vector)
         {
-            T sum = default!;
-
-            for (int index = 0; index < Vector128<T>.Count; index++)
-            {
-                sum = Scalar<T>.Add(sum, vector.GetElementUnsafe(index));
-            }
+            // Doing this as Sum(lower) + Sum(upper) is important for floating-point determinism
+            // This is because the underlying dpps instruction on x86/x64 will do this equivalently
+            // and otherwise the software vs accelerated implementations may differ in returned result.
 
-            return sum;
+            T result = Vector64.Sum(vector._lower);
+            result = Scalar<T>.Add(result, Vector64.Sum(vector._upper));
+            return result;
         }
 
         /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary>
@@ -2780,7 +2927,7 @@ public static Vector256<T> ToVector256<T>(this Vector128<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<T> ToVector256Unsafe<T>(this Vector128<T> vector)
+        public static Vector256<T> ToVector256Unsafe<T>(this Vector128<T> vector)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
 
@@ -2815,46 +2962,46 @@ public static bool TryCopyTo<T>(this Vector128<T> vector, Span<T> destination)
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<ushort> Lower, Vector128<ushort> Upper) Widen(Vector128<byte> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<ushort> Lower, Vector128<ushort> Upper) Widen(Vector128<byte> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector128{Int16}" /> into two <see cref="Vector128{Int32} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<int> Lower, Vector128<int> Upper) Widen(Vector128<short> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<int> Lower, Vector128<int> Upper) Widen(Vector128<short> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector128{Int32}" /> into two <see cref="Vector128{Int64} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<long> Lower, Vector128<long> Upper) Widen(Vector128<int> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<long> Lower, Vector128<long> Upper) Widen(Vector128<int> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector128{SByte}" /> into two <see cref="Vector128{Int16} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<short> Lower, Vector128<short> Upper) Widen(Vector128<sbyte> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<short> Lower, Vector128<short> Upper) Widen(Vector128<sbyte> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector128{Single}" /> into two <see cref="Vector128{Double} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<double> Lower, Vector128<double> Upper) Widen(Vector128<float> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<double> Lower, Vector128<double> Upper) Widen(Vector128<float> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector128{UInt16}" /> into two <see cref="Vector128{UInt32} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<uint> Lower, Vector128<uint> Upper) Widen(Vector128<ushort> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<uint> Lower, Vector128<uint> Upper) Widen(Vector128<ushort> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector128{UInt32}" /> into two <see cref="Vector128{UInt64} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector128<ulong> Lower, Vector128<ulong> Upper) Widen(Vector128<uint> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector128<ulong> Lower, Vector128<ulong> Upper) Widen(Vector128<uint> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens the lower half of a <see cref="Vector128{Byte}" /> into a <see cref="Vector128{UInt16} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
@@ -2877,7 +3024,7 @@ public static Vector128<ushort> WidenLower(Vector128<byte> source)
         /// <returns>A vector that contain the widened lower half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<int> WidenLower(Vector128<short> source)
+        public static Vector128<int> WidenLower(Vector128<short> source)
         {
             Vector64<short> lower = source._lower;
 
@@ -2892,7 +3039,7 @@ public static unsafe Vector128<int> WidenLower(Vector128<short> source)
         /// <returns>A vector that contain the widened lower half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<long> WidenLower(Vector128<int> source)
+        public static Vector128<long> WidenLower(Vector128<int> source)
         {
             Vector64<int> lower = source._lower;
 
@@ -2908,7 +3055,7 @@ public static unsafe Vector128<long> WidenLower(Vector128<int> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<short> WidenLower(Vector128<sbyte> source)
+        public static Vector128<short> WidenLower(Vector128<sbyte> source)
         {
             Vector64<sbyte> lower = source._lower;
 
@@ -2923,7 +3070,7 @@ public static unsafe Vector128<short> WidenLower(Vector128<sbyte> source)
         /// <returns>A vector that contain the widened lower half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<double> WidenLower(Vector128<float> source)
+        public static Vector128<double> WidenLower(Vector128<float> source)
         {
             Vector64<float> lower = source._lower;
 
@@ -2939,7 +3086,7 @@ public static unsafe Vector128<double> WidenLower(Vector128<float> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<uint> WidenLower(Vector128<ushort> source)
+        public static Vector128<uint> WidenLower(Vector128<ushort> source)
         {
             Vector64<ushort> lower = source._lower;
 
@@ -2955,7 +3102,7 @@ public static unsafe Vector128<uint> WidenLower(Vector128<ushort> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ulong> WidenLower(Vector128<uint> source)
+        public static Vector128<ulong> WidenLower(Vector128<uint> source)
         {
             Vector64<uint> lower = source._lower;
 
@@ -2986,7 +3133,7 @@ public static Vector128<ushort> WidenUpper(Vector128<byte> source)
         /// <returns>A vector that contain the widened upper half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<int> WidenUpper(Vector128<short> source)
+        public static Vector128<int> WidenUpper(Vector128<short> source)
         {
             Vector64<short> upper = source._upper;
 
@@ -3001,7 +3148,7 @@ public static unsafe Vector128<int> WidenUpper(Vector128<short> source)
         /// <returns>A vector that contain the widened upper half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<long> WidenUpper(Vector128<int> source)
+        public static Vector128<long> WidenUpper(Vector128<int> source)
         {
             Vector64<int> upper = source._upper;
 
@@ -3017,7 +3164,7 @@ public static unsafe Vector128<long> WidenUpper(Vector128<int> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<short> WidenUpper(Vector128<sbyte> source)
+        public static Vector128<short> WidenUpper(Vector128<sbyte> source)
         {
             Vector64<sbyte> upper = source._upper;
 
@@ -3032,7 +3179,7 @@ public static unsafe Vector128<short> WidenUpper(Vector128<sbyte> source)
         /// <returns>A vector that contain the widened upper half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<double> WidenUpper(Vector128<float> source)
+        public static Vector128<double> WidenUpper(Vector128<float> source)
         {
             Vector64<float> upper = source._upper;
 
@@ -3048,7 +3195,7 @@ public static unsafe Vector128<double> WidenUpper(Vector128<float> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<uint> WidenUpper(Vector128<ushort> source)
+        public static Vector128<uint> WidenUpper(Vector128<ushort> source)
         {
             Vector64<ushort> upper = source._upper;
 
@@ -3064,7 +3211,7 @@ public static unsafe Vector128<uint> WidenUpper(Vector128<ushort> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<ulong> WidenUpper(Vector128<uint> source)
+        public static Vector128<ulong> WidenUpper(Vector128<uint> source)
         {
             Vector64<uint> upper = source._upper;
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
index c9f1b6e3f21ef..7b0f847fd37bf 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
@@ -7,8 +7,13 @@
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.Wasm;
+using System.Runtime.Intrinsics.X86;
 using System.Text;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -22,7 +27,6 @@ namespace System.Runtime.Intrinsics
     // This ensures we get good codegen for the "fast-path" and allows the JIT to
     // determine inline profitability of the other paths as it would normally.
 
-
     /// <summary>Represents a 128-bit vector of a specified numeric type that is suitable for low-level optimization of parallel algorithms.</summary>
     /// <typeparam name="T">The type of the elements in the vector.</typeparam>
     [Intrinsic]
@@ -39,11 +43,7 @@ namespace System.Runtime.Intrinsics
         public static Vector128<T> AllBitsSet
         {
             [Intrinsic]
-            get
-            {
-                Vector64<T> vector = Vector64<T>.AllBitsSet;
-                return Vector128.Create(vector, vector);
-            }
+            get => Vector128.Create(Scalar<T>.AllBitsSet);
         }
 
         /// <summary>Gets the number of <typeparamref name="T" /> that are in a <see cref="Vector128{T}" />.</summary>
@@ -51,7 +51,11 @@ public static Vector128<T> AllBitsSet
         public static int Count
         {
             [Intrinsic]
-            get => Vector64<T>.Count * 2;
+            get
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+                return Vector128.Size / sizeof(T);
+            }
         }
 
         /// <summary>Gets a new <see cref="Vector128{T}" /> with the elements set to their index.</summary>
@@ -102,11 +106,7 @@ public static bool IsSupported
         public static Vector128<T> One
         {
             [Intrinsic]
-            get
-            {
-                Vector64<T> vector = Vector64<T>.One;
-                return Vector128.Create(vector, vector);
-            }
+            get => Vector128.Create(Scalar<T>.One);
         }
 
         /// <summary>Gets a new <see cref="Vector128{T}" /> with all elements initialized to zero.</summary>
@@ -139,10 +139,128 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator +(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower + right._lower,
-                left._upper + right._upper
-            );
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Add(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    if (AdvSimd.Arm64.IsSupported)
+                    {
+                        return AdvSimd.Arm64.Add(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Add(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Add(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Add(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Add(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower + right._lower,
+                    left._upper + right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return PackedSimd.Add(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return PackedSimd.Add(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return PackedSimd.Add(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Add(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Add(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Add(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Add(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Add(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        return Sse2.Add(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.Add(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Sse2.Add(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Sse2.Add(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
@@ -154,10 +272,112 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator &(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower & right._lower,
-                left._upper & right._upper
-            );
+            // While op_BitwiseAnd is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.And(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.And(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.And(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.And(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower & right._lower,
+                    left._upper & right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.And(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.And(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.And(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.And(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.And(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.And(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        return Sse2.And(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.And(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Sse2.And(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Sse2.And(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
@@ -169,10 +389,112 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator |(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower | right._lower,
-                left._upper | right._upper
-            );
+            // While op_BitwiseOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Or(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Or(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Or(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Or(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower | right._lower,
+                    left._upper | right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.Or(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Or(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Or(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Or(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Or(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Or(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        return Sse2.Or(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.Or(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Sse2.Or(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Sse2.Or(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
@@ -184,10 +506,76 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator /(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower / right._lower,
-                left._upper / right._upper
-            );
+            if (AdvSimd.Arm64.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Arm64.Divide(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return AdvSimd.Arm64.Divide(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower / right._lower,
+                    left._upper / right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return PackedSimd.Divide(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return PackedSimd.Divide(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Divide(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Divide(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Divides a vector by a scalar to compute the per-element quotient.</summary>
@@ -196,13 +584,7 @@ public static Vector128<T> Zero
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<T> operator /(Vector128<T> left, T right)
-        {
-            return Vector128.Create(
-                left._lower / right,
-                left._upper / right
-            );
-        }
+        public static Vector128<T> operator /(Vector128<T> left, T right) => left / Vector128.Create(right);
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
@@ -226,10 +608,112 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator ^(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower ^ right._lower,
-                left._upper ^ right._upper
-            );
+            // While op_ExclusiveOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Xor(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Xor(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Xor(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Xor(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower ^ right._lower,
+                    left._upper ^ right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.Xor(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Xor(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Xor(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Xor(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Xor(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Xor(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        return Sse2.Xor(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.Xor(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Sse2.Xor(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Sse2.Xor(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Compares two vectors to determine if any elements are not equal.</summary>
@@ -248,10 +732,98 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator <<(Vector128<T> value, int shiftCount)
         {
-            return Vector128.Create(
-                value._lower << shiftCount,
-                value._upper << shiftCount
-            );
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(value, shiftCount);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(value, shiftCount);
+            }
+            else if (Sse2.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.ShiftLogical(value.AsByte(), Vector128.Create<sbyte>((sbyte)(shiftCount & 0x7))).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt16(), Vector128.Create<short>((short)(shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt32(), Vector128.Create<int>(shiftCount & 0x1F)).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt64(), Vector128.Create<long>(shiftCount & 0x3F)).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> value, int shiftCount)
+            {
+                return Vector128.Create(
+                    value._lower << shiftCount,
+                    value._upper << shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.ShiftLeft(value.AsByte(), shiftCount & 0x7).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.ShiftLeft(value.AsUInt16(), shiftCount & 0xF).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.ShiftLeft(value.AsUInt32(), shiftCount & 0x1F).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.ShiftLeft(value.AsUInt64(), shiftCount & 0x3F).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    byte maskedShiftCount = (byte)(shiftCount & 0x7);
+                    Vector128<ushort> tmp = Sse2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>(maskedShiftCount));
+                    return Sse2.And(tmp, Vector128.Create<ushort>((ushort)(0xFF << maskedShiftCount))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Sse2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>((ushort)(shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Sse2.ShiftLeftLogical(value.AsUInt32(), Vector128.CreateScalar<uint>((uint)(shiftCount & 0x1F))).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Sse2.ShiftLeftLogical(value.AsUInt64(), Vector128.CreateScalar<ulong>((uint)(shiftCount & 0x3F))).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
@@ -263,10 +835,190 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator *(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower * right._lower,
-                left._upper * right._upper
-            );
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Multiply(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    if (AdvSimd.Arm64.IsSupported)
+                    {
+                        return AdvSimd.Arm64.Multiply(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Multiply(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Multiply(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Multiply(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    // TODO-ARM64-CQ: We should support long/ulong multiplication.
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower * right._lower,
+                    left._upper * right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return PackedSimd.Multiply(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return PackedSimd.Multiply(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    (Vector128<ushort> al, Vector128<ushort> ah) = Vector128.Widen(left.AsByte());
+                    (Vector128<ushort> bl, Vector128<ushort> bh) = Vector128.Widen(right.AsByte());
+
+                    Vector128<ushort> rl = PackedSimd.Multiply(al, bl);
+                    Vector128<ushort> rh = PackedSimd.Multiply(ah, bh);
+
+                    return Vector128.Narrow(rl, rh).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Multiply(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Multiply(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Multiply(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [CompExactlyDependsOn(typeof(Sse41))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [CompExactlyDependsOn(typeof(Avx512BW.VL))]
+            [CompExactlyDependsOn(typeof(Avx512DQ.VL))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Multiply(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Multiply(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        if (Avx2.IsSupported)
+                        {
+                            Vector256<short> a = Avx2.ConvertToVector256Int16(left.AsByte());
+                            Vector256<short> b = Avx2.ConvertToVector256Int16(right.AsByte());
+
+                            Vector256<short> r = Avx2.MultiplyLow(a, b);
+
+                            if (Avx512BW.VL.IsSupported)
+                            {
+                                return Avx512BW.VL.ConvertToVector128Byte(r).As<byte, T>();
+                            }
+                            else
+                            {
+                                r = Avx2.And(r, Vector256.Create<short>(0x00FF));
+                                return Avx2.Permute4x64(Avx2.PackUnsignedSaturate(r, r).AsUInt64(), 0b11_01_10_00).GetLower().As<ulong, T>();
+                            }
+                        }
+                        else
+                        {
+                            (Vector128<ushort> al, Vector128<ushort> ah) = Vector128.Widen(left.AsByte());
+                            (Vector128<ushort> bl, Vector128<ushort> bh) = Vector128.Widen(right.AsByte());
+
+                            Vector128<ushort> rl = Sse2.MultiplyLow(al, bl);
+                            Vector128<ushort> rh = Sse2.MultiplyLow(ah, bh);
+
+                            return Vector128.Narrow(rl, rh).As<byte, T>();
+                        }
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.MultiplyLow(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        if (Sse41.IsSupported)
+                        {
+                            return Sse41.MultiplyLow(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                        }
+                        else
+                        {
+                            Vector128<uint> al = left.AsUInt32();
+                            Vector128<uint> bl = right.AsUInt32();
+
+                            Vector128<uint> rl = Sse2.Multiply(al, bl).AsUInt32();
+                            rl = Sse2.Shuffle(rl, 0b00_00_10_00);
+
+                            Vector128<uint> ah = Sse2.ShiftRightLogical128BitLane(al, 4);
+                            Vector128<uint> bh = Sse2.ShiftRightLogical128BitLane(bl, 4);
+
+                            Vector128<uint> rh = Sse2.Multiply(ah, bh).AsUInt32();
+                            rh = Sse2.Shuffle(rh, 0b00_00_10_00);
+
+                            return Sse2.UnpackLow(rl, rh).As<uint, T>();
+                        }
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        if (Avx512DQ.VL.IsSupported)
+                        {
+                            return Avx512DQ.VL.MultiplyLow(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                        }
+                        else
+                        {
+                            // TODO-XARCH-CQ: We should support long/ulong multiplication.
+                        }
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -276,13 +1028,7 @@ public static Vector128<T> Zero
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<T> operator *(Vector128<T> left, T right)
-        {
-            return Vector128.Create(
-                left._lower * right,
-                left._upper * right
-            );
-        }
+        public static Vector128<T> operator *(Vector128<T> left, T right) => left * Vector128.Create(right);
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
         /// <param name="left">The scalar to multiply with <paramref name="right" />.</param>
@@ -291,7 +1037,7 @@ public static Vector128<T> Zero
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<T> operator *(T left, Vector128<T> right) => right * left;
+        public static Vector128<T> operator *(T left, Vector128<T> right) => Vector128.Create(left) * right;
 
         /// <summary>Computes the ones-complement of a vector.</summary>
         /// <param name="vector">The vector whose ones-complement is to be computed.</param>
@@ -301,10 +1047,112 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator ~(Vector128<T> vector)
         {
-            return Vector128.Create(
-                ~vector._lower,
-                ~vector._upper
-            );
+            // While op_OnesComplement is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(vector);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(vector);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> vector)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Not(vector.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Not(vector.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Not(vector.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Not(vector.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(vector);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> vector)
+            {
+                return Vector128.Create(
+                    ~vector._lower,
+                    ~vector._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> vector)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.Not(vector.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Not(vector.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Not(vector.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Not(vector.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(vector);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Xor(vector.AsSingle(), Vector128<float>.AllBitsSet).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Xor(vector.AsDouble(), Vector128<double>.AllBitsSet).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        return Sse2.Xor(vector.AsByte(), Vector128<byte>.AllBitsSet).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.Xor(vector.AsUInt16(), Vector128<ushort>.AllBitsSet).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Sse2.Xor(vector.AsUInt32(), Vector128<uint>.AllBitsSet).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Sse2.Xor(vector.AsUInt64(), Vector128<ulong>.AllBitsSet).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -315,10 +1163,112 @@ public static Vector128<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator >>(Vector128<T> value, int shiftCount)
         {
-            return Vector128.Create(
-                value._lower >> shiftCount,
-                value._upper >> shiftCount
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return value >>> shiftCount;
+            }
+            else if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(value, shiftCount);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(value, shiftCount);
+            }
+            else if (Sse2.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsSByte(), Vector128.Create<sbyte>((sbyte)(-shiftCount & 0x7))).As<sbyte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsInt16(), Vector128.Create<short>((short)(-shiftCount & 0xF))).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsInt32(), Vector128.Create<int>(-shiftCount & 0x1F)).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsInt64(), Vector128.Create<long>(-shiftCount & 0x3F)).As<long, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> value, int shiftCount)
+            {
+                return Vector128.Create(
+                    value._lower >> shiftCount,
+                    value._upper >> shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.ShiftRightArithmetic(value.AsSByte(), shiftCount & 0x7).As<sbyte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.ShiftRightArithmetic(value.AsInt16(), shiftCount & 0xF).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.ShiftRightArithmetic(value.AsInt32(), shiftCount & 0x1F).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.ShiftRightArithmetic(value.AsInt64(), shiftCount & 0x3F).As<long, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [CompExactlyDependsOn(typeof(Avx512F.VL))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    // TODO-XARCH-CQ: We should support sbyte arithmetic shift.
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Sse2.ShiftRightArithmetic(value.AsInt16(), Vector128.CreateScalar<short>((short)(shiftCount & 0xF))).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Sse2.ShiftRightArithmetic(value.AsInt32(), Vector128.CreateScalar<int>(shiftCount & 0x1F)).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    if (Avx512F.VL.IsSupported)
+                    {
+                        return Avx512F.VL.ShiftRightArithmetic(value.AsInt64(), Vector128.CreateScalar<long>(shiftCount & 0x3F)).As<long, T>();
+                    }
+                    else
+                    {
+                        // TODO-XARCH-CQ: We should support double/long arithmetic shift.
+                    }
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
@@ -330,10 +1280,128 @@ public static Vector128<T> operator >>(Vector128<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator -(Vector128<T> left, Vector128<T> right)
         {
-            return Vector128.Create(
-                left._lower - right._lower,
-                left._upper - right._upper
-            );
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(left, right);
+            }
+            else if (Sse.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Subtract(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    if (AdvSimd.Arm64.IsSupported)
+                    {
+                        return AdvSimd.Arm64.Subtract(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Subtract(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Subtract(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Subtract(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Subtract(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> left, Vector128<T> right)
+            {
+                return Vector128.Create(
+                    left._lower - right._lower,
+                    left._upper - right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return PackedSimd.Subtract(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return PackedSimd.Subtract(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return PackedSimd.Subtract(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.Subtract(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.Subtract(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.Subtract(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse))]
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> left, Vector128<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Sse.Subtract(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (Sse2.IsSupported)
+                {
+                    if (typeof(T) == typeof(double))
+                    {
+                        return Sse2.Subtract(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                    else if (sizeof(T) == 1)
+                    {
+                        return Sse2.Subtract(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Sse2.Subtract(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Sse2.Subtract(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Sse2.Subtract(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -341,14 +1409,7 @@ public static Vector128<T> operator >>(Vector128<T> value, int shiftCount)
         /// <returns>A vector whose elements are the unary negation of the corresponding elements in <paramref name="vector" />.</returns>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<T> operator -(Vector128<T> vector)
-        {
-            return Vector128.Create(
-                -vector._lower,
-                -vector._upper
-            );
-        }
+        public static Vector128<T> operator -(Vector128<T> vector) => Zero - vector;
 
         /// <summary>Returns a given vector unchanged.</summary>
         /// <param name="value">The vector.</param>
@@ -369,10 +1430,98 @@ public static Vector128<T> operator >>(Vector128<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> operator >>>(Vector128<T> value, int shiftCount)
         {
-            return Vector128.Create(
-                value._lower >>> shiftCount,
-                value._upper >>> shiftCount
-            );
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(value, shiftCount);
+            }
+            else if (PackedSimd.IsSupported)
+            {
+                return WasmImpl(value, shiftCount);
+            }
+            else if (Sse2.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> ArmImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.ShiftLogical(value.AsByte(), Vector128.Create<sbyte>((sbyte)(-shiftCount & 0x7))).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt16(), Vector128.Create<short>((short)(-shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt32(), Vector128.Create<int>(-shiftCount & 0x1F)).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt64(), Vector128.Create<long>(-shiftCount & 0x3F)).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
+
+            static Vector128<T> SoftwareImpl(Vector128<T> value, int shiftCount)
+            {
+                return Vector128.Create(
+                    value._lower >>> shiftCount,
+                    value._upper >>> shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(PackedSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> WasmImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return PackedSimd.ShiftRightLogical(value.AsByte(), shiftCount & 0x7).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return PackedSimd.ShiftRightLogical(value.AsUInt16(), shiftCount & 0xF).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return PackedSimd.ShiftRightLogical(value.AsUInt32(), shiftCount & 0x1F).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return PackedSimd.ShiftRightLogical(value.AsUInt64(), shiftCount & 0x3F).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
+
+            [CompExactlyDependsOn(typeof(Sse2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector128<T> XarchImpl(Vector128<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    byte maskedShiftCount = (byte)(shiftCount & 0x7);
+                    Vector128<ushort> tmp = Sse2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>(maskedShiftCount));
+                    return Sse2.And(tmp, Vector128.Create<ushort>((ushort)(0xFF >>> maskedShiftCount))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Sse2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>((ushort)(shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Sse2.ShiftRightLogical(value.AsUInt32(), Vector128.CreateScalar<uint>((uint)(shiftCount & 0x1F))).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Sse2.ShiftRightLogical(value.AsUInt64(), Vector128.CreateScalar<ulong>((uint)(shiftCount & 0x3F))).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Determines whether the specified object is equal to the current instance.</summary>
@@ -439,7 +1588,7 @@ public override int GetHashCode()
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         public override string ToString() => ToString("G", CultureInfo.InvariantCulture);
 
-        private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
+        internal string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
 
@@ -474,16 +1623,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector128<T> ISimdVector<Vector128<T>, T>.Abs(Vector128<T> vector) => Vector128.Abs(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Add(TSelf, TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Add(Vector128<T> left, Vector128<T> right) => Vector128.Add(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Add(Vector128<T> left, Vector128<T> right) => left + right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.AndNot(TSelf, TSelf)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.AndNot(Vector128<T> left, Vector128<T> right) => Vector128.AndNot(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseAnd(TSelf, TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.BitwiseAnd(Vector128<T> left, Vector128<T> right) => Vector128.BitwiseAnd(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.BitwiseAnd(Vector128<T> left, Vector128<T> right) => left & right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseOr(TSelf, TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.BitwiseOr(Vector128<T> left, Vector128<T> right) => Vector128.BitwiseOr(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.BitwiseOr(Vector128<T> left, Vector128<T> right) => left | right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Ceiling(TSelf)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.Ceiling(Vector128<T> vector) => Vector128.Ceiling(vector);
@@ -519,10 +1668,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector128<T> ISimdVector<Vector128<T>, T>.CreateScalarUnsafe(T value) => Vector128.CreateScalarUnsafe(value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Divide(Vector128<T> left, Vector128<T> right) => Vector128.Divide(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Divide(Vector128<T> left, Vector128<T> right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Divide(Vector128<T> left, T right) => Vector128.Divide(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Divide(Vector128<T> left, T right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Dot(TSelf, TSelf)" />
         static T ISimdVector<Vector128<T>, T>.Dot(Vector128<T> left, Vector128<T> right) => Vector128.Dot(left, right);
@@ -531,7 +1680,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector128<T> ISimdVector<Vector128<T>, T>.Equals(Vector128<T> left, Vector128<T> right) => Vector128.Equals(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAll(TSelf, TSelf)" />
-        static bool ISimdVector<Vector128<T>, T>.EqualsAll(Vector128<T> left, Vector128<T> right) => Vector128.EqualsAll(left, right);
+        static bool ISimdVector<Vector128<T>, T>.EqualsAll(Vector128<T> left, Vector128<T> right) => left == right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector128<T>, T>.EqualsAny(Vector128<T> left, Vector128<T> right) => Vector128.EqualsAny(left, right);
@@ -540,7 +1689,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector128<T> ISimdVector<Vector128<T>, T>.Floor(Vector128<T> vector) => Vector128.Floor(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GetElement(TSelf, int)" />
-        static T ISimdVector<Vector128<T>, T>.GetElement(Vector128<T> vector, int index) => Vector128.GetElement(vector, index);
+        static T ISimdVector<Vector128<T>, T>.GetElement(Vector128<T> vector, int index) => vector.GetElement(index);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GreaterThan(TSelf, TSelf)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.GreaterThan(Vector128<T> left, Vector128<T> right) => Vector128.GreaterThan(left, right);
@@ -578,7 +1727,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LessThanOrEqualAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector128<T>, T>.LessThanOrEqualAny(Vector128<T> left, Vector128<T> right) => Vector128.LessThanOrEqualAny(left, right);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Load(T*)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.Load(T* source) => Vector128.Load(source);
 
@@ -587,7 +1735,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadAlignedNonTemporal(T*)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.LoadAlignedNonTemporal(T* source) => Vector128.LoadAlignedNonTemporal(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadUnsafe(ref readonly T)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.LoadUnsafe(ref readonly T source) => Vector128.LoadUnsafe(in source);
@@ -602,63 +1749,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector128<T> ISimdVector<Vector128<T>, T>.Min(Vector128<T> left, Vector128<T> right) => Vector128.Min(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, T)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Multiply(Vector128<T> left, Vector128<T> right) => Vector128.Multiply(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Multiply(Vector128<T> left, Vector128<T> right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Multiply(Vector128<T> left, T right) => Vector128.Multiply(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Multiply(Vector128<T> left, T right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Negate(TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Negate(Vector128<T> vector) => Vector128.Negate(vector);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Negate(Vector128<T> vector) => -vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.OnesComplement(TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.OnesComplement(Vector128<T> vector) => Vector128.OnesComplement(vector);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.OnesComplement(Vector128<T> vector) => ~vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftLeft(TSelf, int)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.ShiftLeft(Vector128<T> vector, int shiftCount) => Vector128.ShiftLeft(vector, shiftCount);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.ShiftLeft(Vector128<T> vector, int shiftCount) => vector << shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightArithmetic(TSelf, int)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.ShiftRightArithmetic(Vector128<T> vector, int shiftCount) => Vector128.ShiftRightArithmetic(vector, shiftCount);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.ShiftRightArithmetic(Vector128<T> vector, int shiftCount) => vector >> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightLogical(TSelf, int)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.ShiftRightLogical(Vector128<T> vector, int shiftCount) => Vector128.ShiftRightLogical(vector, shiftCount);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.ShiftRightLogical(Vector128<T> vector, int shiftCount) => vector >>> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sqrt(TSelf)" />
         static Vector128<T> ISimdVector<Vector128<T>, T>.Sqrt(Vector128<T> vector) => Vector128.Sqrt(vector);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Store(TSelf, T*)" />
-        static void ISimdVector<Vector128<T>, T>.Store(Vector128<T> source, T* destination) => Vector128.Store(source, destination);
+        static void ISimdVector<Vector128<T>, T>.Store(Vector128<T> source, T* destination) => source.Store(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAligned(TSelf, T*)" />
-        static void ISimdVector<Vector128<T>, T>.StoreAligned(Vector128<T> source, T* destination) => Vector128.StoreAligned(source, destination);
+        static void ISimdVector<Vector128<T>, T>.StoreAligned(Vector128<T> source, T* destination) => source.StoreAligned(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAlignedNonTemporal(TSelf, T*)" />
-        static void ISimdVector<Vector128<T>, T>.StoreAlignedNonTemporal(Vector128<T> source, T* destination) => Vector128.StoreAlignedNonTemporal(source, destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        static void ISimdVector<Vector128<T>, T>.StoreAlignedNonTemporal(Vector128<T> source, T* destination) => source.StoreAlignedNonTemporal(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T)" />
-        static void ISimdVector<Vector128<T>, T>.StoreUnsafe(Vector128<T> vector, ref T destination) => Vector128.StoreUnsafe(vector, ref destination);
+        static void ISimdVector<Vector128<T>, T>.StoreUnsafe(Vector128<T> vector, ref T destination) => vector.StoreUnsafe(ref destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T, nuint)" />
-        static void ISimdVector<Vector128<T>, T>.StoreUnsafe(Vector128<T> vector, ref T destination, nuint elementOffset) => Vector128.StoreUnsafe(vector, ref destination, elementOffset);
+        static void ISimdVector<Vector128<T>, T>.StoreUnsafe(Vector128<T> vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Subtract(TSelf, TSelf)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Subtract(Vector128<T> left, Vector128<T> right) => Vector128.Subtract(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Subtract(Vector128<T> left, Vector128<T> right) => left - right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sum(TSelf)" />
         static T ISimdVector<Vector128<T>, T>.Sum(Vector128<T> vector) => Vector128.Sum(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ToScalar(TSelf)" />
-        static T ISimdVector<Vector128<T>, T>.ToScalar(Vector128<T> vector) => Vector128.ToScalar(vector);
+        static T ISimdVector<Vector128<T>, T>.ToScalar(Vector128<T> vector) => vector.ToScalar();
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.TryCopyTo(TSelf, Span{T})" />
-        static bool ISimdVector<Vector128<T>, T>.TryCopyTo(Vector128<T> vector, Span<T> destination) => Vector128.TryCopyTo(vector, destination);
+        static bool ISimdVector<Vector128<T>, T>.TryCopyTo(Vector128<T> vector, Span<T> destination) => vector.TryCopyTo(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.WithElement(TSelf, int, T)" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.WithElement(Vector128<T> vector, int index, T value) => Vector128.WithElement(vector, index, value);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.WithElement(Vector128<T> vector, int index, T value) => vector.WithElement(index, value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Xor" />
-        static Vector128<T> ISimdVector<Vector128<T>, T>.Xor(Vector128<T> left, Vector128<T> right) => Vector128.Xor(left, right);
+        static Vector128<T> ISimdVector<Vector128<T>, T>.Xor(Vector128<T> left, Vector128<T> right) => left ^ right;
 
         //
         // New Surface Area
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index e8f16472b3695..34abfc4c8984f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -6,8 +6,11 @@
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.Wasm;
 using System.Runtime.Intrinsics.X86;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -62,10 +65,70 @@ public static bool IsHardwareAccelerated
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Abs<T>(Vector256<T> vector)
         {
-            return Create(
-                Vector128.Abs(vector._lower),
-                Vector128.Abs(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return vector;
+            }
+            else if (Avx.IsSupported)
+            {
+                return XarchImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [CompExactlyDependsOn(typeof(Avx512F.VL))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.AndNot(Vector256.Create(-0.0f), vector.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.AndNot(Vector256.Create(-0.0), vector.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.Abs(vector.AsSByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.Abs(vector.AsInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.Abs(vector.AsInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        if (Avx512F.VL.IsSupported)
+                        {
+                            return Avx512F.VL.Abs(vector.AsInt64()).As<ulong, T>();
+                        }
+                        else
+                        {
+                            return ConditionalSelect(LessThan(vector, Vector256<T>.Zero), Vector256<T>.Zero - vector, vector);
+                        }
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
+
+            static Vector256<T> SoftwareImpl(Vector256<T> vector)
+            {
+                return Create(
+                    Vector128.Abs(vector._lower),
+                    Vector128.Abs(vector._upper)
+                );
+            }
         }
 
         /// <summary>Adds two vectors to compute their sum.</summary>
@@ -85,13 +148,7 @@ public static Vector256<T> Abs<T>(Vector256<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> AndNot<T>(Vector256<T> left, Vector256<T> right)
-        {
-            return Create(
-                Vector128.AndNot(left._lower, right._lower),
-                Vector128.AndNot(left._upper, right._upper)
-            );
-        }
+        public static Vector256<T> AndNot<T>(Vector256<T> left, Vector256<T> right) => left & ~right;
 
         /// <summary>Reinterprets a <see cref="Vector256{TFrom}" /> as a new <see cref="Vector256{TTo}" />.</summary>
         /// <typeparam name="TFrom">The type of the elements in the input vector.</typeparam>
@@ -272,10 +329,26 @@ public static Vector<T> AsVector<T>(this Vector256<T> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector256<T> Ceiling<T>(Vector256<T> vector)
         {
-            return Create(
-                Vector128.Ceiling(vector._lower),
-                Vector128.Ceiling(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(short))
+             || (typeof(T) == typeof(int))
+             || (typeof(T) == typeof(long))
+             || (typeof(T) == typeof(nint))
+             || (typeof(T) == typeof(nuint))
+             || (typeof(T) == typeof(sbyte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong)))
+            {
+                return vector;
+            }
+            else
+            {
+                return Create(
+                    Vector128.Ceiling(vector._lower),
+                    Vector128.Ceiling(vector._upper)
+                );
+            }
         }
 
         /// <summary>Computes the ceiling of each element in a vector.</summary>
@@ -301,13 +374,7 @@ internal static Vector256<T> Ceiling<T>(Vector256<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> ConditionalSelect<T>(Vector256<T> condition, Vector256<T> left, Vector256<T> right)
-        {
-            return Create(
-                Vector128.ConditionalSelect(condition._lower, left._lower, right._lower),
-                Vector128.ConditionalSelect(condition._upper, left._upper, right._upper)
-            );
-        }
+        public static Vector256<T> ConditionalSelect<T>(Vector256<T> condition, Vector256<T> left, Vector256<T> right) => (left & condition) | AndNot(right, condition);
 
         /// <summary>Converts a <see cref="Vector256{Int64}" /> to a <see cref="Vector256{Double}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
@@ -1353,16 +1420,7 @@ public static Vector256<T> CreateScalarUnsafe<T>(T value)
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static T Dot<T>(Vector256<T> left, Vector256<T> right)
-        {
-            // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism
-            // This is because the underlying dpps instruction on x86/x64 will do this equivalently
-            // and otherwise the software vs accelerated implementations may differ in returned result.
-
-            T result = Vector128.Dot(left._lower, right._lower);
-            result = Scalar<T>.Add(result, Vector128.Dot(left._upper, right._upper));
-            return result;
-        }
+        public static T Dot<T>(Vector256<T> left, Vector256<T> right) => Sum(left * right);
 
         /// <summary>Compares two vectors to determine if they are equal on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1459,10 +1517,26 @@ public static uint ExtractMostSignificantBits<T>(this Vector256<T> vector)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector256<T> Floor<T>(Vector256<T> vector)
         {
-            return Create(
-                Vector128.Floor(vector._lower),
-                Vector128.Floor(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+                 || (typeof(T) == typeof(short))
+                 || (typeof(T) == typeof(int))
+                 || (typeof(T) == typeof(long))
+                 || (typeof(T) == typeof(nint))
+                 || (typeof(T) == typeof(nuint))
+                 || (typeof(T) == typeof(sbyte))
+                 || (typeof(T) == typeof(ushort))
+                 || (typeof(T) == typeof(uint))
+                 || (typeof(T) == typeof(ulong)))
+            {
+                return vector;
+            }
+            else
+            {
+                return Create(
+                    Vector128.Floor(vector._lower),
+                    Vector128.Floor(vector._upper)
+                );
+            }
         }
 
         /// <summary>Computes the floor of each element in a vector.</summary>
@@ -1720,7 +1794,6 @@ public static bool LessThanOrEqualAny<T>(Vector256<T> left, Vector256<T> right)
                 || Vector128.LessThanOrEqualAny(left._upper, right._upper);
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The source from which the vector will be loaded.</param>
@@ -1759,7 +1832,6 @@ public static Vector256<T> LoadAligned<T>(T* source)
         [Intrinsic]
         [CLSCompliant(false)]
         public static Vector256<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1927,7 +1999,7 @@ public static Vector256<T> Min<T>(Vector256<T> left, Vector256<T> right)
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector256<T> Multiply<T>(T left, Vector256<T> right) => left * right;
+        public static Vector256<T> Multiply<T>(T left, Vector256<T> right) => right * left;
 
         /// <inheritdoc cref="Vector128.MultiplyAddEstimate(Vector128{double}, Vector128{double}, Vector128{double})" />
         [Intrinsic]
@@ -2067,13 +2139,7 @@ public static Vector256<uint> Narrow(Vector256<ulong> lower, Vector256<ulong> up
         /// <returns>A vector whose elements are the ones-complement of the corresponding elements in <paramref name="vector" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector256<T> OnesComplement<T>(Vector256<T> vector)
-        {
-            return Create(
-                Vector128.OnesComplement(vector._lower),
-                Vector128.OnesComplement(vector._upper)
-            );
-        }
+        public static Vector256<T> OnesComplement<T>(Vector256<T> vector) => ~vector;
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
         /// <param name="vector">The vector whose elements are to be shifted.</param>
@@ -2541,7 +2607,6 @@ public static Vector256<T> Sqrt<T>(Vector256<T> vector)
             );
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
@@ -2580,7 +2645,6 @@ public static void StoreAligned<T>(this Vector256<T> source, T* destination)
         [Intrinsic]
         [CLSCompliant(false)]
         public static void StoreAlignedNonTemporal<T>(this Vector256<T> source, T* destination) => source.StoreAligned(destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2672,7 +2736,7 @@ public static Vector512<T> ToVector512<T>(this Vector256<T> vector)
         /// <returns>A new <see cref="Vector512{T}" /> with the lower 256-bits set to the value of <paramref name="vector" /> and the upper 256-bits left uninitialized.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static unsafe Vector512<T> ToVector512Unsafe<T>(this Vector256<T> vector)
+        public static Vector512<T> ToVector512Unsafe<T>(this Vector256<T> vector)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs
index 36b704682d320..46d09538cb432 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs
@@ -7,8 +7,13 @@
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.Wasm;
+using System.Runtime.Intrinsics.X86;
 using System.Text;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -38,11 +43,7 @@ namespace System.Runtime.Intrinsics
         public static Vector256<T> AllBitsSet
         {
             [Intrinsic]
-            get
-            {
-                Vector128<T> vector = Vector128<T>.AllBitsSet;
-                return Vector256.Create(vector, vector);
-            }
+            get => Vector256.Create(Scalar<T>.AllBitsSet);
         }
 
         /// <summary>Gets the number of <typeparamref name="T" /> that are in a <see cref="Vector256{T}" />.</summary>
@@ -50,7 +51,11 @@ public static Vector256<T> AllBitsSet
         public static int Count
         {
             [Intrinsic]
-            get => Vector128<T>.Count * 2;
+            get
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+                return Vector256.Size / sizeof(T);
+            }
         }
 
         /// <summary>Gets a new <see cref="Vector256{T}" /> with the elements set to their index.</summary>
@@ -101,11 +106,7 @@ public static bool IsSupported
         public static Vector256<T> One
         {
             [Intrinsic]
-            get
-            {
-                Vector128<T> vector = Vector128<T>.One;
-                return Vector256.Create(vector, vector);
-            }
+            get => Vector256.Create(Scalar<T>.One);
         }
 
         /// <summary>Gets a new <see cref="Vector256{T}" /> with all elements initialized to zero.</summary>
@@ -138,10 +139,54 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator +(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower + right._lower,
-                left._upper + right._upper
-            );
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower + right._lower,
+                    left._upper + right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Add(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Add(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.Add(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.Add(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.Add(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Avx2.Add(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
@@ -153,10 +198,58 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator &(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower & right._lower,
-                left._upper & right._upper
-            );
+            // While op_BitwiseAnd is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower & right._lower,
+                    left._upper & right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.And(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.And(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.And(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.And(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.And(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Avx2.And(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
@@ -168,10 +261,58 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator |(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower | right._lower,
-                left._upper | right._upper
-            );
+            // While op_BitwiseOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower | right._lower,
+                    left._upper | right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Or(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Or(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.Or(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.Or(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.Or(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Avx2.Or(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
@@ -183,10 +324,34 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator /(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower / right._lower,
-                left._upper / right._upper
-            );
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower / right._lower,
+                    left._upper / right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Divide(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Divide(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Divides a vector by a scalar to compute the per-element quotient.</summary>
@@ -195,13 +360,7 @@ public static Vector256<T> Zero
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> operator /(Vector256<T> left, T right)
-        {
-            return Vector256.Create(
-                left._lower / right,
-                left._upper / right
-            );
-        }
+        public static Vector256<T> operator /(Vector256<T> left, T right) => left / Vector256.Create(right);
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
@@ -225,10 +384,58 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator ^(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower ^ right._lower,
-                left._upper ^ right._upper
-            );
+            // While op_ExclusiveOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower ^ right._lower,
+                    left._upper ^ right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Xor(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Xor(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.Xor(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.Xor(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.Xor(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Avx2.Xor(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Compares two vectors to determine if any elements are not equal.</summary>
@@ -247,10 +454,44 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator <<(Vector256<T> value, int shiftCount)
         {
-            return Vector256.Create(
-                value._lower << shiftCount,
-                value._upper << shiftCount
-            );
+            if (Avx2.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> value, int shiftCount)
+            {
+                return Vector256.Create(
+                    value._lower << shiftCount,
+                    value._upper << shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    byte maskedShiftCount = (byte)(shiftCount & 0x7);
+                    Vector256<ushort> tmp = Avx2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>(maskedShiftCount));
+                    return Avx2.And(tmp, Vector256.Create<ushort>((ushort)(0xFF << maskedShiftCount))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx2.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>((ushort)(shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx2.ShiftLeftLogical(value.AsUInt32(), Vector128.CreateScalar<uint>((uint)(shiftCount & 0x1F))).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx2.ShiftLeftLogical(value.AsUInt64(), Vector128.CreateScalar<ulong>((uint)(shiftCount & 0x3F))).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
@@ -262,10 +503,81 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator *(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower * right._lower,
-                left._upper * right._upper
-            );
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower * right._lower,
+                    left._upper * right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [CompExactlyDependsOn(typeof(Avx512BW.VL))]
+            [CompExactlyDependsOn(typeof(Avx512DQ.VL))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Multiply(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Multiply(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        if (Avx512BW.VL.IsSupported)
+                        {
+                            Vector512<ushort> a = Avx512BW.ConvertToVector512UInt16(left.AsByte());
+                            Vector512<ushort> b = Avx512BW.ConvertToVector512UInt16(right.AsByte());
+
+                            Vector512<ushort> r = Avx512BW.MultiplyLow(a, b);
+
+                            return Avx512BW.ConvertToVector256Byte(r).As<byte, T>();
+                        }
+                        else
+                        {
+                            (Vector256<ushort> al, Vector256<ushort> ah) = Vector256.Widen(left.AsByte());
+                            (Vector256<ushort> bl, Vector256<ushort> bh) = Vector256.Widen(right.AsByte());
+
+                            Vector256<ushort> rl = Avx2.MultiplyLow(al, bl);
+                            Vector256<ushort> rh = Avx2.MultiplyLow(ah, bh);
+
+                            return Vector256.Narrow(rl, rh).As<byte, T>();
+                        }
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.MultiplyLow(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.MultiplyLow(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        if (Avx512DQ.VL.IsSupported)
+                        {
+                            return Avx512DQ.VL.MultiplyLow(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                        }
+                        else
+                        {
+                            // TODO-XARCH-CQ: We should support long/ulong multiplication.
+                        }
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -275,13 +587,7 @@ public static Vector256<T> Zero
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> operator *(Vector256<T> left, T right)
-        {
-            return Vector256.Create(
-                left._lower * right,
-                left._upper * right
-            );
-        }
+        public static Vector256<T> operator *(Vector256<T> left, T right) => left * Vector256.Create(right);
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
         /// <param name="left">The scalar to multiply with <paramref name="right" />.</param>
@@ -289,7 +595,7 @@ public static Vector256<T> Zero
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector256<T> operator *(T left, Vector256<T> right) => right * left;
+        public static Vector256<T> operator *(T left, Vector256<T> right) => Vector256.Create(left) * right;
 
         /// <summary>Computes the ones-complement of a vector.</summary>
         /// <param name="vector">The vector whose ones-complement is to be computed.</param>
@@ -299,10 +605,58 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator ~(Vector256<T> vector)
         {
-            return Vector256.Create(
-                ~vector._lower,
-                ~vector._upper
-            );
+            // While op_OnesComplement is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> vector)
+            {
+                return Vector256.Create(
+                    ~vector._lower,
+                    ~vector._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Xor(vector.AsSingle(), Vector256<float>.AllBitsSet).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Xor(vector.AsDouble(), Vector256<double>.AllBitsSet).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.Xor(vector.AsByte(), Vector256<byte>.AllBitsSet).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.Xor(vector.AsUInt16(), Vector256<ushort>.AllBitsSet).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.Xor(vector.AsUInt32(), Vector256<uint>.AllBitsSet).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Avx2.Xor(vector.AsUInt64(), Vector256<ulong>.AllBitsSet).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -313,10 +667,58 @@ public static Vector256<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator >>(Vector256<T> value, int shiftCount)
         {
-            return Vector256.Create(
-                value._lower >> shiftCount,
-                value._upper >> shiftCount
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return value >>> shiftCount;
+            }
+            else if (Avx2.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> value, int shiftCount)
+            {
+                return Vector256.Create(
+                    value._lower >> shiftCount,
+                    value._upper >> shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [CompExactlyDependsOn(typeof(Avx512F.VL))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    // TODO-XARCH-CQ: We should support sbyte arithmetic shift.
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx2.ShiftRightArithmetic(value.AsInt16(), Vector128.CreateScalar<short>((short)(shiftCount & 0xF))).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx2.ShiftRightArithmetic(value.AsInt32(), Vector128.CreateScalar<int>(shiftCount & 0x1F)).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    if (Avx512F.VL.IsSupported)
+                    {
+                        return Avx512F.VL.ShiftRightArithmetic(value.AsInt64(), Vector128.CreateScalar<long>(shiftCount & 0x3F)).As<long, T>();
+                    }
+                    else
+                    {
+                        // TODO-XARCH-CQ: We should support double/long arithmetic shift.
+                    }
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
@@ -328,10 +730,54 @@ public static Vector256<T> operator >>(Vector256<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator -(Vector256<T> left, Vector256<T> right)
         {
-            return Vector256.Create(
-                left._lower - right._lower,
-                left._upper - right._upper
-            );
+            if (Avx.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> left, Vector256<T> right)
+            {
+                return Vector256.Create(
+                    left._lower - right._lower,
+                    left._upper - right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx))]
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> left, Vector256<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx.Subtract(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx.Subtract(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (Avx2.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx2.Subtract(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx2.Subtract(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 4)
+                    {
+                        return Avx2.Subtract(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                    }
+                    else if (sizeof(T) == 8)
+                    {
+                        return Avx2.Subtract(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -339,14 +785,7 @@ public static Vector256<T> operator >>(Vector256<T> value, int shiftCount)
         /// <returns>A vector whose elements are the unary negation of the corresponding elements in <paramref name="vector" />.</returns>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> operator -(Vector256<T> vector)
-        {
-            return Vector256.Create(
-                -vector._lower,
-                -vector._upper
-            );
-        }
+        public static Vector256<T> operator -(Vector256<T> vector) => Zero - vector;
 
         /// <summary>Returns a given vector unchanged.</summary>
         /// <param name="value">The vector.</param>
@@ -367,10 +806,44 @@ public static Vector256<T> operator >>(Vector256<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator >>>(Vector256<T> value, int shiftCount)
         {
-            return Vector256.Create(
-                value._lower >>> shiftCount,
-                value._upper >>> shiftCount
-            );
+            if (Avx2.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            static Vector256<T> SoftwareImpl(Vector256<T> value, int shiftCount)
+            {
+                return Vector256.Create(
+                    value._lower >>> shiftCount,
+                    value._upper >>> shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx2))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector256<T> XarchImpl(Vector256<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 1)
+                {
+                    byte maskedShiftCount = (byte)(shiftCount & 0x7);
+                    Vector256<ushort> tmp = Avx2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>(maskedShiftCount));
+                    return Avx2.And(tmp, Vector256.Create<ushort>((ushort)(0xFF >>> maskedShiftCount))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx2.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>((ushort)(shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx2.ShiftRightLogical(value.AsUInt32(), Vector128.CreateScalar<uint>((uint)(shiftCount & 0x1F))).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx2.ShiftRightLogical(value.AsUInt64(), Vector128.CreateScalar<ulong>((uint)(shiftCount & 0x3F))).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Determines whether the specified object is equal to the current instance.</summary>
@@ -428,7 +901,7 @@ public override int GetHashCode()
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         public override string ToString() => ToString("G", CultureInfo.InvariantCulture);
 
-        private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
+        internal string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
 
@@ -463,16 +936,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector256<T> ISimdVector<Vector256<T>, T>.Abs(Vector256<T> vector) => Vector256.Abs(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Add(TSelf, TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Add(Vector256<T> left, Vector256<T> right) => Vector256.Add(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Add(Vector256<T> left, Vector256<T> right) => left + right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.AndNot(TSelf, TSelf)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.AndNot(Vector256<T> left, Vector256<T> right) => Vector256.AndNot(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseAnd(TSelf, TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.BitwiseAnd(Vector256<T> left, Vector256<T> right) => Vector256.BitwiseAnd(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.BitwiseAnd(Vector256<T> left, Vector256<T> right) => left & right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseOr(TSelf, TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.BitwiseOr(Vector256<T> left, Vector256<T> right) => Vector256.BitwiseOr(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.BitwiseOr(Vector256<T> left, Vector256<T> right) => left | right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Ceiling(TSelf)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.Ceiling(Vector256<T> vector) => Vector256.Ceiling(vector);
@@ -508,10 +981,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector256<T> ISimdVector<Vector256<T>, T>.CreateScalarUnsafe(T value) => Vector256.CreateScalarUnsafe(value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Divide(Vector256<T> left, Vector256<T> right) => Vector256.Divide(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Divide(Vector256<T> left, Vector256<T> right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Divide(Vector256<T> left, T right) => Vector256.Divide(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Divide(Vector256<T> left, T right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Dot(TSelf, TSelf)" />
         static T ISimdVector<Vector256<T>, T>.Dot(Vector256<T> left, Vector256<T> right) => Vector256.Dot(left, right);
@@ -520,7 +993,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector256<T> ISimdVector<Vector256<T>, T>.Equals(Vector256<T> left, Vector256<T> right) => Vector256.Equals(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAll(TSelf, TSelf)" />
-        static bool ISimdVector<Vector256<T>, T>.EqualsAll(Vector256<T> left, Vector256<T> right) => Vector256.EqualsAll(left, right);
+        static bool ISimdVector<Vector256<T>, T>.EqualsAll(Vector256<T> left, Vector256<T> right) => left == right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector256<T>, T>.EqualsAny(Vector256<T> left, Vector256<T> right) => Vector256.EqualsAny(left, right);
@@ -529,7 +1002,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector256<T> ISimdVector<Vector256<T>, T>.Floor(Vector256<T> vector) => Vector256.Floor(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GetElement(TSelf, int)" />
-        static T ISimdVector<Vector256<T>, T>.GetElement(Vector256<T> vector, int index) => Vector256.GetElement(vector, index);
+        static T ISimdVector<Vector256<T>, T>.GetElement(Vector256<T> vector, int index) => vector.GetElement(index);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GreaterThan(TSelf, TSelf)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.GreaterThan(Vector256<T> left, Vector256<T> right) => Vector256.GreaterThan(left, right);
@@ -567,7 +1040,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LessThanOrEqualAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector256<T>, T>.LessThanOrEqualAny(Vector256<T> left, Vector256<T> right) => Vector256.LessThanOrEqualAny(left, right);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Load(T*)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.Load(T* source) => Vector256.Load(source);
 
@@ -576,7 +1048,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadAlignedNonTemporal(T*)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.LoadAlignedNonTemporal(T* source) => Vector256.LoadAlignedNonTemporal(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadUnsafe(ref readonly T)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.LoadUnsafe(ref readonly T source) => Vector256.LoadUnsafe(in source);
@@ -591,63 +1062,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector256<T> ISimdVector<Vector256<T>, T>.Min(Vector256<T> left, Vector256<T> right) => Vector256.Min(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, T)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Multiply(Vector256<T> left, Vector256<T> right) => Vector256.Multiply(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Multiply(Vector256<T> left, Vector256<T> right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Multiply(Vector256<T> left, T right) => Vector256.Multiply(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Multiply(Vector256<T> left, T right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Negate(TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Negate(Vector256<T> vector) => Vector256.Negate(vector);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Negate(Vector256<T> vector) => -vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.OnesComplement(TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.OnesComplement(Vector256<T> vector) => Vector256.OnesComplement(vector);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.OnesComplement(Vector256<T> vector) => ~vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftLeft(TSelf, int)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.ShiftLeft(Vector256<T> vector, int shiftCount) => Vector256.ShiftLeft(vector, shiftCount);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.ShiftLeft(Vector256<T> vector, int shiftCount) => vector << shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightArithmetic(TSelf, int)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.ShiftRightArithmetic(Vector256<T> vector, int shiftCount) => Vector256.ShiftRightArithmetic(vector, shiftCount);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.ShiftRightArithmetic(Vector256<T> vector, int shiftCount) => vector >> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightLogical(TSelf, int)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.ShiftRightLogical(Vector256<T> vector, int shiftCount) => Vector256.ShiftRightLogical(vector, shiftCount);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.ShiftRightLogical(Vector256<T> vector, int shiftCount) => vector >>> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sqrt(TSelf)" />
         static Vector256<T> ISimdVector<Vector256<T>, T>.Sqrt(Vector256<T> vector) => Vector256.Sqrt(vector);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Store(TSelf, T*)" />
-        static void ISimdVector<Vector256<T>, T>.Store(Vector256<T> source, T* destination) => Vector256.Store(source, destination);
+        static void ISimdVector<Vector256<T>, T>.Store(Vector256<T> source, T* destination) => source.Store(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAligned(TSelf, T*)" />
-        static void ISimdVector<Vector256<T>, T>.StoreAligned(Vector256<T> source, T* destination) => Vector256.StoreAligned(source, destination);
+        static void ISimdVector<Vector256<T>, T>.StoreAligned(Vector256<T> source, T* destination) => source.StoreAligned(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAlignedNonTemporal(TSelf, T*)" />
-        static void ISimdVector<Vector256<T>, T>.StoreAlignedNonTemporal(Vector256<T> source, T* destination) => Vector256.StoreAlignedNonTemporal(source, destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        static void ISimdVector<Vector256<T>, T>.StoreAlignedNonTemporal(Vector256<T> source, T* destination) => source.StoreAlignedNonTemporal(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T)" />
-        static void ISimdVector<Vector256<T>, T>.StoreUnsafe(Vector256<T> vector, ref T destination) => Vector256.StoreUnsafe(vector, ref destination);
+        static void ISimdVector<Vector256<T>, T>.StoreUnsafe(Vector256<T> vector, ref T destination) => vector.StoreUnsafe(ref destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T, nuint)" />
-        static void ISimdVector<Vector256<T>, T>.StoreUnsafe(Vector256<T> vector, ref T destination, nuint elementOffset) => Vector256.StoreUnsafe(vector, ref destination, elementOffset);
+        static void ISimdVector<Vector256<T>, T>.StoreUnsafe(Vector256<T> vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Subtract(TSelf, TSelf)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Subtract(Vector256<T> left, Vector256<T> right) => Vector256.Subtract(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Subtract(Vector256<T> left, Vector256<T> right) => left - right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sum(TSelf)" />
         static T ISimdVector<Vector256<T>, T>.Sum(Vector256<T> vector) => Vector256.Sum(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ToScalar(TSelf)" />
-        static T ISimdVector<Vector256<T>, T>.ToScalar(Vector256<T> vector) => Vector256.ToScalar(vector);
+        static T ISimdVector<Vector256<T>, T>.ToScalar(Vector256<T> vector) => vector.ToScalar();
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.TryCopyTo(TSelf, Span{T})" />
-        static bool ISimdVector<Vector256<T>, T>.TryCopyTo(Vector256<T> vector, Span<T> destination) => Vector256.TryCopyTo(vector, destination);
+        static bool ISimdVector<Vector256<T>, T>.TryCopyTo(Vector256<T> vector, Span<T> destination) => vector.TryCopyTo(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.WithElement(TSelf, int, T)" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.WithElement(Vector256<T> vector, int index, T value) => Vector256.WithElement(vector, index, value);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.WithElement(Vector256<T> vector, int index, T value) => vector.WithElement(index, value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Xor" />
-        static Vector256<T> ISimdVector<Vector256<T>, T>.Xor(Vector256<T> left, Vector256<T> right) => Vector256.Xor(left, right);
+        static Vector256<T> ISimdVector<Vector256<T>, T>.Xor(Vector256<T> left, Vector256<T> right) => left ^ right;
 
         //
         // New Surface Area
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index 8d44bf2899841..7fb23ba1d3b9f 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -8,6 +8,8 @@
 using System.Runtime.Intrinsics.Arm;
 using System.Runtime.Intrinsics.X86;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -62,10 +64,69 @@ public static bool IsHardwareAccelerated
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> Abs<T>(Vector512<T> vector)
         {
-            return Create(
-                Vector256.Abs(vector._lower),
-                Vector256.Abs(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return vector;
+            }
+            else if (Avx512F.IsSupported)
+            {
+                return XarchImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [CompExactlyDependsOn(typeof(Avx512DQ))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    if (Avx512DQ.IsSupported)
+                    {
+                        return Avx512DQ.AndNot(Vector512.Create(-0.0f), vector.AsSingle()).As<float, T>();
+                    }
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    if (Avx512DQ.IsSupported)
+                    {
+                        return Avx512DQ.AndNot(Vector512.Create(-0.0), vector.AsDouble()).As<double, T>();
+                    }
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.Abs(vector.AsInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.Abs(vector.AsInt64()).As<ulong, T>();
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx512BW.Abs(vector.AsSByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.Abs(vector.AsInt16()).As<ushort, T>();
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
+
+            static Vector512<T> SoftwareImpl(Vector512<T> vector)
+            {
+                return Create(
+                    Vector256.Abs(vector._lower),
+                    Vector256.Abs(vector._upper)
+                );
+            }
         }
 
         /// <summary>Adds two vectors to compute their sum.</summary>
@@ -85,13 +146,7 @@ public static Vector512<T> Abs<T>(Vector512<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector512<T> AndNot<T>(Vector512<T> left, Vector512<T> right)
-        {
-            return Create(
-                Vector256.AndNot(left._lower, right._lower),
-                Vector256.AndNot(left._upper, right._upper)
-            );
-        }
+        public static Vector512<T> AndNot<T>(Vector512<T> left, Vector512<T> right) => left & ~right;
 
         /// <summary>Reinterprets a <see cref="Vector512{TFrom}" /> as a new <see cref="Vector512{TTo}" />.</summary>
         /// <typeparam name="TFrom">The type of the elements in the input vector.</typeparam>
@@ -272,10 +327,26 @@ public static Vector<T> AsVector<T>(this Vector512<T> value)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector512<T> Ceiling<T>(Vector512<T> vector)
         {
-            return Create(
-                Vector256.Ceiling(vector._lower),
-                Vector256.Ceiling(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(short))
+             || (typeof(T) == typeof(int))
+             || (typeof(T) == typeof(long))
+             || (typeof(T) == typeof(nint))
+             || (typeof(T) == typeof(nuint))
+             || (typeof(T) == typeof(sbyte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong)))
+            {
+                return vector;
+            }
+            else
+            {
+                return Create(
+                    Vector256.Ceiling(vector._lower),
+                    Vector256.Ceiling(vector._upper)
+                );
+            }
         }
 
         /// <summary>Computes the ceiling of each element in a vector.</summary>
@@ -301,13 +372,7 @@ internal static Vector512<T> Ceiling<T>(Vector512<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector512<T> ConditionalSelect<T>(Vector512<T> condition, Vector512<T> left, Vector512<T> right)
-        {
-            return Create(
-                Vector256.ConditionalSelect(condition._lower, left._lower, right._lower),
-                Vector256.ConditionalSelect(condition._upper, left._upper, right._upper)
-            );
-        }
+        public static Vector512<T> ConditionalSelect<T>(Vector512<T> condition, Vector512<T> left, Vector512<T> right) => (left & condition) | AndNot(right, condition);
 
         /// <summary>Converts a <see cref="Vector512{Int64}" /> to a <see cref="Vector512{Double}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
@@ -1390,13 +1455,7 @@ public static Vector512<T> CreateScalarUnsafe<T>(T value)
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector512<T> Divide<T>(Vector512<T> left, Vector512<T> right)
-        {
-            return Create(
-                Vector256.Divide(left._lower, right._lower),
-                Vector256.Divide(left._upper, right._upper)
-            );
-        }
+        public static Vector512<T> Divide<T>(Vector512<T> left, Vector512<T> right) => left / right;
 
         /// <summary>Divides a vector by a scalar to compute the per-element quotient.</summary>
         /// <param name="left">The vector that will be divided by <paramref name="right" />.</param>
@@ -1414,16 +1473,7 @@ public static Vector512<T> Divide<T>(Vector512<T> left, Vector512<T> right)
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static T Dot<T>(Vector512<T> left, Vector512<T> right)
-        {
-            // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism
-            // This is because the underlying dpps instruction on x86/x64 will do this equivalently
-            // and otherwise the software vs accelerated implementations may differ in returned result.
-
-            T result = Vector256.Dot(left._lower, right._lower);
-            result = Scalar<T>.Add(result, Vector256.Dot(left._upper, right._upper));
-            return result;
-        }
+        public static T Dot<T>(Vector512<T> left, Vector512<T> right) => Sum(left * right);
 
         /// <summary>Compares two vectors to determine if they are equal on a per-element basis.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1520,10 +1570,26 @@ public static ulong ExtractMostSignificantBits<T>(this Vector512<T> vector)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector512<T> Floor<T>(Vector512<T> vector)
         {
-            return Create(
-                Vector256.Floor(vector._lower),
-                Vector256.Floor(vector._upper)
-            );
+            if ((typeof(T) == typeof(byte))
+                 || (typeof(T) == typeof(short))
+                 || (typeof(T) == typeof(int))
+                 || (typeof(T) == typeof(long))
+                 || (typeof(T) == typeof(nint))
+                 || (typeof(T) == typeof(nuint))
+                 || (typeof(T) == typeof(sbyte))
+                 || (typeof(T) == typeof(ushort))
+                 || (typeof(T) == typeof(uint))
+                 || (typeof(T) == typeof(ulong)))
+            {
+                return vector;
+            }
+            else
+            {
+                return Create(
+                    Vector256.Floor(vector._lower),
+                    Vector256.Floor(vector._upper)
+                );
+            }
         }
 
         /// <summary>Computes the floor of each element in a vector.</summary>
@@ -1781,7 +1847,6 @@ public static bool LessThanOrEqualAny<T>(Vector512<T> left, Vector512<T> right)
                 || Vector256.LessThanOrEqualAny(left._upper, right._upper);
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The source from which the vector will be loaded.</param>
@@ -1820,7 +1885,6 @@ public static Vector512<T> LoadAligned<T>(T* source)
         [Intrinsic]
         [CLSCompliant(false)]
         public static Vector512<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1988,7 +2052,7 @@ public static Vector512<T> Min<T>(Vector512<T> left, Vector512<T> right)
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector512<T> Multiply<T>(T left, Vector512<T> right) => left * right;
+        public static Vector512<T> Multiply<T>(T left, Vector512<T> right) => right * left;
 
         /// <inheritdoc cref="Vector256.MultiplyAddEstimate(Vector256{double}, Vector256{double}, Vector256{double})" />
         [Intrinsic]
@@ -2127,13 +2191,7 @@ public static Vector512<uint> Narrow(Vector512<ulong> lower, Vector512<ulong> up
         /// <returns>A vector whose elements are the ones-complement of the corresponding elements in <paramref name="vector" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector512<T> OnesComplement<T>(Vector512<T> vector)
-        {
-            return Create(
-                Vector256.OnesComplement(vector._lower),
-                Vector256.OnesComplement(vector._upper)
-            );
-        }
+        public static Vector512<T> OnesComplement<T>(Vector512<T> vector) => ~vector;
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
         /// <param name="vector">The vector whose elements are to be shifted.</param>
@@ -2601,7 +2659,6 @@ public static Vector512<T> Sqrt<T>(Vector512<T> vector)
             );
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
@@ -2640,7 +2697,6 @@ public static void StoreAligned<T>(this Vector512<T> source, T* destination)
         [Intrinsic]
         [CLSCompliant(false)]
         public static void StoreAlignedNonTemporal<T>(this Vector512<T> source, T* destination) => source.StoreAligned(destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs
index 235871dbe6069..9e117e67eeb47 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs
@@ -7,8 +7,11 @@
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
 using System.Text;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -38,11 +41,7 @@ namespace System.Runtime.Intrinsics
         public static Vector512<T> AllBitsSet
         {
             [Intrinsic]
-            get
-            {
-                Vector256<T> vector = Vector256<T>.AllBitsSet;
-                return Vector512.Create(vector, vector);
-            }
+            get => Vector512.Create(Scalar<T>.AllBitsSet);
         }
 
         /// <summary>Gets the number of <typeparamref name="T" /> that are in a <see cref="Vector512{T}" />.</summary>
@@ -50,7 +49,11 @@ public static Vector512<T> AllBitsSet
         public static int Count
         {
             [Intrinsic]
-            get => Vector256<T>.Count * 2;
+            get
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+                return Vector512.Size / sizeof(T);
+            }
         }
 
         /// <summary>Gets a new <see cref="Vector512{T}" /> with the elements set to their index.</summary>
@@ -101,11 +104,7 @@ public static bool IsSupported
         public static Vector512<T> One
         {
             [Intrinsic]
-            get
-            {
-                Vector256<T> vector = Vector256<T>.One;
-                return Vector512.Create(vector, vector);
-            }
+            get => Vector512.Create(Scalar<T>.One);
         }
 
         /// <summary>Gets a new <see cref="Vector512{T}" /> with all elements initialized to zero.</summary>
@@ -138,10 +137,54 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator +(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower + right._lower,
-                left._upper + right._upper
-            );
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower + right._lower,
+                    left._upper + right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx512F.Add(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx512F.Add(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.Add(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.Add(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx512BW.Add(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.Add(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
@@ -153,10 +196,60 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator &(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower & right._lower,
-                left._upper & right._upper
-            );
+            // While op_BitwiseAnd is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower & right._lower,
+                    left._upper & right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512DQ))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (Avx512DQ.IsSupported)
+                {
+                    if ((typeof(T) == typeof(float)))
+                    {
+                        return Avx512DQ.And(left.AsSingle(), right.AsSingle()).As<float, T>();
+                    }
+                    else if (typeof(T) == typeof(double))
+                    {
+                        return Avx512DQ.And(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+
+                if (sizeof(T) == 1)
+                {
+                    return Avx512F.And(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx512F.And(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.And(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.And(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
@@ -168,10 +261,60 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator |(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower | right._lower,
-                left._upper | right._upper
-            );
+            // While op_BitwiseOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower | right._lower,
+                    left._upper | right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512DQ))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (Avx512DQ.IsSupported)
+                {
+                    if ((typeof(T) == typeof(float)))
+                    {
+                        return Avx512DQ.Or(left.AsSingle(), right.AsSingle()).As<float, T>();
+                    }
+                    else if (typeof(T) == typeof(double))
+                    {
+                        return Avx512DQ.Or(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+
+                if (sizeof(T) == 1)
+                {
+                    return Avx512F.Or(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx512F.Or(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.Or(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.Or(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
@@ -183,10 +326,34 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator /(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower / right._lower,
-                left._upper / right._upper
-            );
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower / right._lower,
+                    left._upper / right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx512F.Divide(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx512F.Divide(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Divides a vector by a scalar to compute the per-element quotient.</summary>
@@ -195,13 +362,7 @@ public static Vector512<T> Zero
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector512<T> operator /(Vector512<T> left, T right)
-        {
-            return Vector512.Create(
-                left._lower / right,
-                left._upper / right
-            );
-        }
+        public static Vector512<T> operator /(Vector512<T> left, T right) => left / Vector512.Create(right);
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
@@ -225,10 +386,60 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator ^(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower ^ right._lower,
-                left._upper ^ right._upper
-            );
+            // While op_ExclusiveOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower ^ right._lower,
+                    left._upper ^ right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512DQ))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (Avx512DQ.IsSupported)
+                {
+                    if ((typeof(T) == typeof(float)))
+                    {
+                        return Avx512DQ.Xor(left.AsSingle(), right.AsSingle()).As<float, T>();
+                    }
+                    else if (typeof(T) == typeof(double))
+                    {
+                        return Avx512DQ.Xor(left.AsDouble(), right.AsDouble()).As<double, T>();
+                    }
+                }
+
+                if (sizeof(T) == 1)
+                {
+                    return Avx512F.Xor(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx512F.Xor(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.Xor(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.Xor(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Compares two vectors to determine if any elements are not equal.</summary>
@@ -247,10 +458,48 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator <<(Vector512<T> value, int shiftCount)
         {
-            return Vector512.Create(
-                value._lower << shiftCount,
-                value._upper << shiftCount
-            );
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> value, int shiftCount)
+            {
+                return Vector512.Create(
+                    value._lower << shiftCount,
+                    value._upper << shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 4)
+                {
+                    return Avx512F.ShiftLeftLogical(value.AsUInt32(), Vector128.CreateScalar<uint>((uint)(shiftCount & 0x1F))).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.ShiftLeftLogical(value.AsUInt64(), Vector128.CreateScalar<ulong>((uint)(shiftCount & 0x3F))).As<ulong, T>();
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        byte maskedShiftCount = (byte)(shiftCount & 0x7);
+                        Vector512<ushort> tmp = Avx512BW.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>(maskedShiftCount));
+                        return Avx512F.And(tmp, Vector512.Create<ushort>((ushort)(0xFF << maskedShiftCount))).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.ShiftLeftLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>((ushort)(shiftCount & 0xF))).As<ushort, T>();
+                    }
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
@@ -262,10 +511,64 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator *(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower * right._lower,
-                left._upper * right._upper
-            );
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower * right._lower,
+                    left._upper * right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [CompExactlyDependsOn(typeof(Avx512DQ))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx512F.Multiply(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx512F.Multiply(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.MultiplyLow(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    if (Avx512DQ.IsSupported)
+                    {
+                        return Avx512DQ.MultiplyLow(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                    }
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        (Vector512<ushort> al, Vector512<ushort> ah) = Vector512.Widen(left.AsByte());
+                        (Vector512<ushort> bl, Vector512<ushort> bh) = Vector512.Widen(right.AsByte());
+
+                        Vector512<ushort> rl = Avx512BW.MultiplyLow(al, bl);
+                        Vector512<ushort> rh = Avx512BW.MultiplyLow(ah, bh);
+
+                        return Vector512.Narrow(rl, rh).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.MultiplyLow(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -275,13 +578,7 @@ public static Vector512<T> Zero
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector512<T> operator *(Vector512<T> left, T right)
-        {
-            return Vector512.Create(
-                left._lower * right,
-                left._upper * right
-            );
-        }
+        public static Vector512<T> operator *(Vector512<T> left, T right) => left * Vector512.Create(right);
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
         /// <param name="left">The scalar to multiply with <paramref name="right" />.</param>
@@ -289,7 +586,7 @@ public static Vector512<T> Zero
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector512<T> operator *(T left, Vector512<T> right) => right * left;
+        public static Vector512<T> operator *(T left, Vector512<T> right) => Vector512.Create(left) * right;
 
         /// <summary>Computes the ones-complement of a vector.</summary>
         /// <param name="vector">The vector whose ones-complement is to be computed.</param>
@@ -299,10 +596,60 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator ~(Vector512<T> vector)
         {
-            return Vector512.Create(
-                ~vector._lower,
-                ~vector._upper
-            );
+            // While op_OnesComplement is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> vector)
+            {
+                return Vector512.Create(
+                    ~vector._lower,
+                    ~vector._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512DQ))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> vector)
+            {
+                if (Avx512DQ.IsSupported)
+                {
+                    if (typeof(T) == typeof(float))
+                    {
+                        return Avx512DQ.Xor(vector.AsSingle(), Vector512<float>.AllBitsSet).As<float, T>();
+                    }
+                    else if (typeof(T) == typeof(double))
+                    {
+                        return Avx512DQ.Xor(vector.AsDouble(), Vector512<double>.AllBitsSet).As<double, T>();
+                    }
+                }
+
+                if (sizeof(T) == 1)
+                {
+                    return Avx512F.Xor(vector.AsByte(), Vector512<byte>.AllBitsSet).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return Avx512F.Xor(vector.AsUInt16(), Vector512<ushort>.AllBitsSet).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.Xor(vector.AsUInt32(), Vector512<uint>.AllBitsSet).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.Xor(vector.AsUInt64(), Vector512<ulong>.AllBitsSet).As<ulong, T>();
+                }
+
+                return SoftwareImpl(vector);
+            }
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -313,10 +660,54 @@ public static Vector512<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator >>(Vector512<T> value, int shiftCount)
         {
-            return Vector512.Create(
-                value._lower >> shiftCount,
-                value._upper >> shiftCount
-            );
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return value >>> shiftCount;
+            }
+            else if (Avx512F.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> value, int shiftCount)
+            {
+                return Vector512.Create(
+                    value._lower >> shiftCount,
+                    value._upper >> shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 4)
+                {
+                    return Avx512F.ShiftRightArithmetic(value.AsInt32(), Vector128.CreateScalar<int>(shiftCount & 0x1F)).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.ShiftRightArithmetic(value.AsInt64(), Vector128.CreateScalar<long>(shiftCount & 0x3F)).As<long, T>();
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        // TODO-XARCH-CQ: We should support sbyte arithmetic shift.
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.ShiftRightArithmetic(value.AsInt16(), Vector128.CreateScalar<short>((short)(shiftCount & 0xF))).As<short, T>();
+                    }
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
@@ -328,10 +719,54 @@ public static Vector512<T> operator >>(Vector512<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator -(Vector512<T> left, Vector512<T> right)
         {
-            return Vector512.Create(
-                left._lower - right._lower,
-                left._upper - right._upper
-            );
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> left, Vector512<T> right)
+            {
+                return Vector512.Create(
+                    left._lower - right._lower,
+                    left._upper - right._upper
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> left, Vector512<T> right)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return Avx512F.Subtract(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return Avx512F.Subtract(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return Avx512F.Subtract(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.Subtract(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        return Avx512BW.Subtract(left.AsByte(), right.AsByte()).As<byte, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.Subtract(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                    }
+                }
+                return SoftwareImpl(left, right);
+            }
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -339,14 +774,7 @@ public static Vector512<T> operator >>(Vector512<T> value, int shiftCount)
         /// <returns>A vector whose elements are the unary negation of the corresponding elements in <paramref name="vector" />.</returns>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector512<T> operator -(Vector512<T> vector)
-        {
-            return Vector512.Create(
-                -vector._lower,
-                -vector._upper
-            );
-        }
+        public static Vector512<T> operator -(Vector512<T> vector) => Zero - vector;
 
         /// <summary>Returns a given vector unchanged.</summary>
         /// <param name="value">The vector.</param>
@@ -367,10 +795,48 @@ public static Vector512<T> operator >>(Vector512<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> operator >>>(Vector512<T> value, int shiftCount)
         {
-            return Vector512.Create(
-                value._lower >>> shiftCount,
-                value._upper >>> shiftCount
-            );
+            if (Avx512F.IsSupported)
+            {
+                return XarchImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
+
+            static Vector512<T> SoftwareImpl(Vector512<T> value, int shiftCount)
+            {
+                return Vector512.Create(
+                    value._lower >>> shiftCount,
+                    value._upper >>> shiftCount
+                );
+            }
+
+            [CompExactlyDependsOn(typeof(Avx512F))]
+            [CompExactlyDependsOn(typeof(Avx512BW))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector512<T> XarchImpl(Vector512<T> value, int shiftCount)
+            {
+                if (sizeof(T) == 4)
+                {
+                    return Avx512F.ShiftRightLogical(value.AsUInt32(), Vector128.CreateScalar<uint>((uint)(shiftCount & 0x1F))).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return Avx512F.ShiftRightLogical(value.AsUInt64(), Vector128.CreateScalar<ulong>((uint)(shiftCount & 0x3F))).As<ulong, T>();
+                }
+                else if (Avx512BW.IsSupported)
+                {
+                    if (sizeof(T) == 1)
+                    {
+                        byte maskedShiftCount = (byte)(shiftCount & 0x7);
+                        Vector512<ushort> tmp = Avx512BW.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>(maskedShiftCount));
+                        return Avx512F.And(tmp, Vector512.Create<ushort>((ushort)(0xFF >>> maskedShiftCount))).As<ushort, T>();
+                    }
+                    else if (sizeof(T) == 2)
+                    {
+                        return Avx512BW.ShiftRightLogical(value.AsUInt16(), Vector128.CreateScalar<ushort>((ushort)(shiftCount & 0xF))).As<ushort, T>();
+                    }
+                }
+                return SoftwareImpl(value, shiftCount);
+            }
         }
 
         /// <summary>Determines whether the specified object is equal to the current instance.</summary>
@@ -428,7 +894,7 @@ public override int GetHashCode()
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         public override string ToString() => ToString("G", CultureInfo.InvariantCulture);
 
-        private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
+        internal string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector512BaseType<T>();
 
@@ -463,16 +929,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector512<T> ISimdVector<Vector512<T>, T>.Abs(Vector512<T> vector) => Vector512.Abs(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Add(TSelf, TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Add(Vector512<T> left, Vector512<T> right) => Vector512.Add(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Add(Vector512<T> left, Vector512<T> right) => left + right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.AndNot(TSelf, TSelf)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.AndNot(Vector512<T> left, Vector512<T> right) => Vector512.AndNot(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseAnd(TSelf, TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.BitwiseAnd(Vector512<T> left, Vector512<T> right) => Vector512.BitwiseAnd(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.BitwiseAnd(Vector512<T> left, Vector512<T> right) => left & right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseOr(TSelf, TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.BitwiseOr(Vector512<T> left, Vector512<T> right) => Vector512.BitwiseOr(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.BitwiseOr(Vector512<T> left, Vector512<T> right) => left | right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Ceiling(TSelf)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.Ceiling(Vector512<T> vector) => Vector512.Ceiling(vector);
@@ -508,10 +974,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector512<T> ISimdVector<Vector512<T>, T>.CreateScalarUnsafe(T value) => Vector512.CreateScalarUnsafe(value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Divide(Vector512<T> left, Vector512<T> right) => Vector512.Divide(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Divide(Vector512<T> left, Vector512<T> right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Divide(Vector512<T> left, T right) => Vector512.Divide(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Divide(Vector512<T> left, T right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Dot(TSelf, TSelf)" />
         static T ISimdVector<Vector512<T>, T>.Dot(Vector512<T> left, Vector512<T> right) => Vector512.Dot(left, right);
@@ -520,7 +986,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector512<T> ISimdVector<Vector512<T>, T>.Equals(Vector512<T> left, Vector512<T> right) => Vector512.Equals(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAll(TSelf, TSelf)" />
-        static bool ISimdVector<Vector512<T>, T>.EqualsAll(Vector512<T> left, Vector512<T> right) => Vector512.EqualsAll(left, right);
+        static bool ISimdVector<Vector512<T>, T>.EqualsAll(Vector512<T> left, Vector512<T> right) => left == right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector512<T>, T>.EqualsAny(Vector512<T> left, Vector512<T> right) => Vector512.EqualsAny(left, right);
@@ -529,7 +995,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector512<T> ISimdVector<Vector512<T>, T>.Floor(Vector512<T> vector) => Vector512.Floor(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GetElement(TSelf, int)" />
-        static T ISimdVector<Vector512<T>, T>.GetElement(Vector512<T> vector, int index) => Vector512.GetElement(vector, index);
+        static T ISimdVector<Vector512<T>, T>.GetElement(Vector512<T> vector, int index) => vector.GetElement(index);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GreaterThan(TSelf, TSelf)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.GreaterThan(Vector512<T> left, Vector512<T> right) => Vector512.GreaterThan(left, right);
@@ -567,7 +1033,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LessThanOrEqualAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector512<T>, T>.LessThanOrEqualAny(Vector512<T> left, Vector512<T> right) => Vector512.LessThanOrEqualAny(left, right);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Load(T*)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.Load(T* source) => Vector512.Load(source);
 
@@ -576,7 +1041,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadAlignedNonTemporal(T*)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.LoadAlignedNonTemporal(T* source) => Vector512.LoadAlignedNonTemporal(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadUnsafe(ref readonly T)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.LoadUnsafe(ref readonly T source) => Vector512.LoadUnsafe(in source);
@@ -591,63 +1055,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector512<T> ISimdVector<Vector512<T>, T>.Min(Vector512<T> left, Vector512<T> right) => Vector512.Min(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, T)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Multiply(Vector512<T> left, Vector512<T> right) => Vector512.Multiply(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Multiply(Vector512<T> left, Vector512<T> right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Multiply(Vector512<T> left, T right) => Vector512.Multiply(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Multiply(Vector512<T> left, T right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Negate(TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Negate(Vector512<T> vector) => Vector512.Negate(vector);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Negate(Vector512<T> vector) => -vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.OnesComplement(TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.OnesComplement(Vector512<T> vector) => Vector512.OnesComplement(vector);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.OnesComplement(Vector512<T> vector) => ~vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftLeft(TSelf, int)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.ShiftLeft(Vector512<T> vector, int shiftCount) => Vector512.ShiftLeft(vector, shiftCount);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.ShiftLeft(Vector512<T> vector, int shiftCount) => vector << shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightArithmetic(TSelf, int)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.ShiftRightArithmetic(Vector512<T> vector, int shiftCount) => Vector512.ShiftRightArithmetic(vector, shiftCount);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.ShiftRightArithmetic(Vector512<T> vector, int shiftCount) => vector >> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightLogical(TSelf, int)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.ShiftRightLogical(Vector512<T> vector, int shiftCount) => Vector512.ShiftRightLogical(vector, shiftCount);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.ShiftRightLogical(Vector512<T> vector, int shiftCount) => vector >>> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sqrt(TSelf)" />
         static Vector512<T> ISimdVector<Vector512<T>, T>.Sqrt(Vector512<T> vector) => Vector512.Sqrt(vector);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Store(TSelf, T*)" />
-        static void ISimdVector<Vector512<T>, T>.Store(Vector512<T> source, T* destination) => Vector512.Store(source, destination);
+        static void ISimdVector<Vector512<T>, T>.Store(Vector512<T> source, T* destination) => source.Store(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAligned(TSelf, T*)" />
-        static void ISimdVector<Vector512<T>, T>.StoreAligned(Vector512<T> source, T* destination) => Vector512.StoreAligned(source, destination);
+        static void ISimdVector<Vector512<T>, T>.StoreAligned(Vector512<T> source, T* destination) => source.StoreAligned(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAlignedNonTemporal(TSelf, T*)" />
-        static void ISimdVector<Vector512<T>, T>.StoreAlignedNonTemporal(Vector512<T> source, T* destination) => Vector512.StoreAlignedNonTemporal(source, destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        static void ISimdVector<Vector512<T>, T>.StoreAlignedNonTemporal(Vector512<T> source, T* destination) => source.StoreAlignedNonTemporal(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T)" />
-        static void ISimdVector<Vector512<T>, T>.StoreUnsafe(Vector512<T> vector, ref T destination) => Vector512.StoreUnsafe(vector, ref destination);
+        static void ISimdVector<Vector512<T>, T>.StoreUnsafe(Vector512<T> vector, ref T destination) => vector.StoreUnsafe(ref destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T, nuint)" />
-        static void ISimdVector<Vector512<T>, T>.StoreUnsafe(Vector512<T> vector, ref T destination, nuint elementOffset) => Vector512.StoreUnsafe(vector, ref destination, elementOffset);
+        static void ISimdVector<Vector512<T>, T>.StoreUnsafe(Vector512<T> vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Subtract(TSelf, TSelf)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Subtract(Vector512<T> left, Vector512<T> right) => Vector512.Subtract(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Subtract(Vector512<T> left, Vector512<T> right) => left - right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sum(TSelf)" />
         static T ISimdVector<Vector512<T>, T>.Sum(Vector512<T> vector) => Vector512.Sum(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ToScalar(TSelf)" />
-        static T ISimdVector<Vector512<T>, T>.ToScalar(Vector512<T> vector) => Vector512.ToScalar(vector);
+        static T ISimdVector<Vector512<T>, T>.ToScalar(Vector512<T> vector) => vector.ToScalar();
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.TryCopyTo(TSelf, Span{T})" />
-        static bool ISimdVector<Vector512<T>, T>.TryCopyTo(Vector512<T> vector, Span<T> destination) => Vector512.TryCopyTo(vector, destination);
+        static bool ISimdVector<Vector512<T>, T>.TryCopyTo(Vector512<T> vector, Span<T> destination) => vector.TryCopyTo(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.WithElement(TSelf, int, T)" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.WithElement(Vector512<T> vector, int index, T value) => Vector512.WithElement(vector, index, value);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.WithElement(Vector512<T> vector, int index, T value) => vector.WithElement(index, value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Xor" />
-        static Vector512<T> ISimdVector<Vector512<T>, T>.Xor(Vector512<T> left, Vector512<T> right) => Vector512.Xor(left, right);
+        static Vector512<T> ISimdVector<Vector512<T>, T>.Xor(Vector512<T> left, Vector512<T> right) => left ^ right;
 
         //
         // New Surface Area
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index 743aad0a6503f..908d28f9ef153 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -9,10 +9,12 @@
 using System.Runtime.Intrinsics.X86;
 using System.Text;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     /// <summary>Provides a collection of static methods for creating, manipulating, and otherwise operating on 64-bit vectors.</summary>
-    public static class Vector64
+    public static unsafe class Vector64
     {
         internal const int Size = 8;
 
@@ -44,7 +46,48 @@ public static Vector64<T> Abs<T>(Vector64<T> vector)
             {
                 return vector;
             }
-            else
+            else if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> vector)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Abs(vector.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return AdvSimd.AbsScalar(vector.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Abs(vector.AsSByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Abs(vector.AsInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Abs(vector.AsInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    if (AdvSimd.Arm64.IsSupported)
+                    {
+                        return AdvSimd.Arm64.AbsScalar(vector.AsInt64()).As<ulong, T>();
+                    }
+                }
+                return SoftwareImpl(vector);
+            }
+
+            static Vector64<T> SoftwareImpl(Vector64<T> vector)
             {
                 Unsafe.SkipInit(out Vector64<T> result);
 
@@ -272,14 +315,14 @@ internal static Vector64<T> Ceiling<T>(Vector64<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector64<T> ConditionalSelect<T>(Vector64<T> condition, Vector64<T> left, Vector64<T> right) => (left & condition) | (right & ~condition);
+        public static Vector64<T> ConditionalSelect<T>(Vector64<T> condition, Vector64<T> left, Vector64<T> right) => (left & condition) | AndNot(right, condition);
 
         /// <summary>Converts a <see cref="Vector64{Int64}" /> to a <see cref="Vector64{Double}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<double> ConvertToDouble(Vector64<long> vector)
+        public static Vector64<double> ConvertToDouble(Vector64<long> vector)
         {
             Unsafe.SkipInit(out Vector64<double> result);
 
@@ -298,7 +341,7 @@ public static unsafe Vector64<double> ConvertToDouble(Vector64<long> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<double> ConvertToDouble(Vector64<ulong> vector)
+        public static Vector64<double> ConvertToDouble(Vector64<ulong> vector)
         {
             Unsafe.SkipInit(out Vector64<double> result);
 
@@ -316,7 +359,7 @@ public static unsafe Vector64<double> ConvertToDouble(Vector64<ulong> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<int> ConvertToInt32(Vector64<float> vector)
+        public static Vector64<int> ConvertToInt32(Vector64<float> vector)
         {
             Unsafe.SkipInit(out Vector64<int> result);
 
@@ -334,7 +377,7 @@ public static unsafe Vector64<int> ConvertToInt32(Vector64<float> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<int> ConvertToInt32Native(Vector64<float> vector)
+        public static Vector64<int> ConvertToInt32Native(Vector64<float> vector)
         {
             Unsafe.SkipInit(out Vector64<int> result);
 
@@ -352,7 +395,7 @@ public static unsafe Vector64<int> ConvertToInt32Native(Vector64<float> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<long> ConvertToInt64(Vector64<double> vector)
+        public static Vector64<long> ConvertToInt64(Vector64<double> vector)
         {
             Unsafe.SkipInit(out Vector64<long> result);
 
@@ -370,7 +413,7 @@ public static unsafe Vector64<long> ConvertToInt64(Vector64<double> vector)
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<long> ConvertToInt64Native(Vector64<double> vector)
+        public static Vector64<long> ConvertToInt64Native(Vector64<double> vector)
         {
             Unsafe.SkipInit(out Vector64<long> result);
 
@@ -388,7 +431,7 @@ public static unsafe Vector64<long> ConvertToInt64Native(Vector64<double> vector
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<float> ConvertToSingle(Vector64<int> vector)
+        public static Vector64<float> ConvertToSingle(Vector64<int> vector)
         {
             Unsafe.SkipInit(out Vector64<float> result);
 
@@ -407,7 +450,7 @@ public static unsafe Vector64<float> ConvertToSingle(Vector64<int> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<float> ConvertToSingle(Vector64<uint> vector)
+        public static Vector64<float> ConvertToSingle(Vector64<uint> vector)
         {
             Unsafe.SkipInit(out Vector64<float> result);
 
@@ -426,7 +469,7 @@ public static unsafe Vector64<float> ConvertToSingle(Vector64<uint> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<uint> ConvertToUInt32(Vector64<float> vector)
+        public static Vector64<uint> ConvertToUInt32(Vector64<float> vector)
         {
             Unsafe.SkipInit(out Vector64<uint> result);
 
@@ -445,7 +488,7 @@ public static unsafe Vector64<uint> ConvertToUInt32(Vector64<float> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<uint> ConvertToUInt32Native(Vector64<float> vector)
+        public static Vector64<uint> ConvertToUInt32Native(Vector64<float> vector)
         {
             Unsafe.SkipInit(out Vector64<uint> result);
 
@@ -464,7 +507,7 @@ public static unsafe Vector64<uint> ConvertToUInt32Native(Vector64<float> vector
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<ulong> ConvertToUInt64(Vector64<double> vector)
+        public static Vector64<ulong> ConvertToUInt64(Vector64<double> vector)
         {
             Unsafe.SkipInit(out Vector64<ulong> result);
 
@@ -483,7 +526,7 @@ public static unsafe Vector64<ulong> ConvertToUInt64(Vector64<double> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<ulong> ConvertToUInt64Native(Vector64<double> vector)
+        public static Vector64<ulong> ConvertToUInt64Native(Vector64<double> vector)
         {
             Unsafe.SkipInit(out Vector64<ulong> result);
 
@@ -526,7 +569,7 @@ public static void CopyTo<T>(this Vector64<T> vector, T[] destination)
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="NullReferenceException"><paramref name="destination" /> is <c>null</c>.</exception>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void CopyTo<T>(this Vector64<T> vector, T[] destination, int startIndex)
+        public static void CopyTo<T>(this Vector64<T> vector, T[] destination, int startIndex)
         {
             // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
 
@@ -566,7 +609,7 @@ public static void CopyTo<T>(this Vector64<T> vector, Span<T> destination)
         /// <returns>A new <see cref="Vector64{T}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<T> Create<T>(T value)
+        public static Vector64<T> Create<T>(T value)
         {
             Unsafe.SkipInit(out Vector64<T> result);
 
@@ -583,46 +626,46 @@ public static unsafe Vector64<T> Create<T>(T value)
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi8</remarks>
         /// <returns>A new <see cref="Vector64{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<byte> Create(byte value) => Create<byte>(value);
+        public static Vector64<byte> Create(byte value) => Create<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Double}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<double> Create(double value) => Create<double>(value);
+        public static Vector64<double> Create(double value) => Create<double>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi16</remarks>
         /// <returns>A new <see cref="Vector64{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<short> Create(short value) => Create<short>(value);
+        public static Vector64<short> Create(short value) => Create<short>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <remarks>On x86, this method corresponds to __m64 _mm_set1_pi32</remarks>
         /// <returns>A new <see cref="Vector64{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<int> Create(int value) => Create<int>(value);
+        public static Vector64<int> Create(int value) => Create<int>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<long> Create(long value) => Create<long>(value);
+        public static Vector64<long> Create(long value) => Create<long>(value);
 
         /// <summary>Creates a new <see cref="Vector64{IntPtr}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{IntPtr}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<nint> Create(nint value) => Create<nint>(value);
+        public static Vector64<nint> Create(nint value) => Create<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UIntPtr}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UIntPtr}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<nuint> Create(nuint value) => Create<nuint>(value);
+        public static Vector64<nuint> Create(nuint value) => Create<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{SByte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -630,13 +673,13 @@ public static unsafe Vector64<T> Create<T>(T value)
         /// <returns>A new <see cref="Vector64{SByte}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<sbyte> Create(sbyte value) => Create<sbyte>(value);
+        public static Vector64<sbyte> Create(sbyte value) => Create<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Single}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector64<float> Create(float value) => Create<float>(value);
+        public static Vector64<float> Create(float value) => Create<float>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -644,7 +687,7 @@ public static unsafe Vector64<T> Create<T>(T value)
         /// <returns>A new <see cref="Vector64{UInt16}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<ushort> Create(ushort value) => Create<ushort>(value);
+        public static Vector64<ushort> Create(ushort value) => Create<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
@@ -652,14 +695,14 @@ public static unsafe Vector64<T> Create<T>(T value)
         /// <returns>A new <see cref="Vector64{UInt32}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<uint> Create(uint value) => Create<uint>(value);
+        public static Vector64<uint> Create(uint value) => Create<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt64}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<ulong> Create(ulong value) => Create<ulong>(value);
+        public static Vector64<ulong> Create(ulong value) => Create<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector64{T}" /> from a given array.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -732,7 +775,7 @@ public static Vector64<T> Create<T>(ReadOnlySpan<T> values)
         /// <returns>A new <see cref="Vector64{Byte}" /> with each element initialized to corresponding specified value.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7)
+        public static Vector64<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7)
         {
             Unsafe.SkipInit(out Vector64<byte> result);
             result.SetElementUnsafe(0, e0);
@@ -755,7 +798,7 @@ public static unsafe Vector64<byte> Create(byte e0, byte e1, byte e2, byte e3, b
         /// <returns>A new <see cref="Vector64{Int16}" /> with each element initialized to corresponding specified value.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<short> Create(short e0, short e1, short e2, short e3)
+        public static Vector64<short> Create(short e0, short e1, short e2, short e3)
         {
             Unsafe.SkipInit(out Vector64<short> result);
             result.SetElementUnsafe(0, e0);
@@ -772,7 +815,7 @@ public static unsafe Vector64<short> Create(short e0, short e1, short e2, short
         /// <returns>A new <see cref="Vector64{Int32}" /> with each element initialized to corresponding specified value.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<int> Create(int e0, int e1)
+        public static Vector64<int> Create(int e0, int e1)
         {
             Unsafe.SkipInit(out Vector64<int> result);
             result.SetElementUnsafe(0, e0);
@@ -794,7 +837,7 @@ public static unsafe Vector64<int> Create(int e0, int e1)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7)
+        public static Vector64<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7)
         {
             Unsafe.SkipInit(out Vector64<sbyte> result);
             result.SetElementUnsafe(0, e0);
@@ -814,7 +857,7 @@ public static unsafe Vector64<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte
         /// <returns>A new <see cref="Vector64{Single}" /> with each element initialized to corresponding specified value.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<float> Create(float e0, float e1)
+        public static Vector64<float> Create(float e0, float e1)
         {
             Unsafe.SkipInit(out Vector64<float> result);
             result.SetElementUnsafe(0, e0);
@@ -832,7 +875,7 @@ public static unsafe Vector64<float> Create(float e0, float e1)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3)
+        public static Vector64<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3)
         {
             Unsafe.SkipInit(out Vector64<ushort> result);
             result.SetElementUnsafe(0, e0);
@@ -850,7 +893,7 @@ public static unsafe Vector64<ushort> Create(ushort e0, ushort e1, ushort e2, us
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<uint> Create(uint e0, uint e1)
+        public static Vector64<uint> Create(uint e0, uint e1)
         {
             Unsafe.SkipInit(out Vector64<uint> result);
             result.SetElementUnsafe(0, e0);
@@ -865,7 +908,7 @@ public static unsafe Vector64<uint> Create(uint e0, uint e1)
         /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<T> CreateScalar<T>(T value)
+        public static Vector64<T> CreateScalar<T>(T value)
         {
             Vector64<T> result = Vector64<T>.Zero;
             result.SetElementUnsafe(0, value);
@@ -876,78 +919,78 @@ public static unsafe Vector64<T> CreateScalar<T>(T value)
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Byte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
+        public static Vector64<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Double}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<double> CreateScalar(double value) => CreateScalar<double>(value);
+        public static Vector64<double> CreateScalar(double value) => CreateScalar<double>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int16}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<short> CreateScalar(short value) => CreateScalar<short>(value);
+        public static Vector64<short> CreateScalar(short value) => CreateScalar<short>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int32}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<int> CreateScalar(int value) => CreateScalar<int>(value);
+        public static Vector64<int> CreateScalar(int value) => CreateScalar<int>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int64}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<long> CreateScalar(long value) => CreateScalar<long>(value);
+        public static Vector64<long> CreateScalar(long value) => CreateScalar<long>(value);
 
         /// <summary>Creates a new <see cref="Vector64{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
+        public static Vector64<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
+        public static Vector64<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{SByte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
+        public static Vector64<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Single}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
-        public static unsafe Vector64<float> CreateScalar(float value) => CreateScalar<float>(value);
+        public static Vector64<float> CreateScalar(float value) => CreateScalar<float>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt16}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
+        public static Vector64<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt32}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
+        public static Vector64<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt64}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
+        public static Vector64<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector64{T}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -972,78 +1015,78 @@ public static Vector64<T> CreateScalarUnsafe<T>(T value)
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Byte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<byte> CreateScalarUnsafe(byte value) => CreateScalarUnsafe<byte>(value);
+        public static Vector64<byte> CreateScalarUnsafe(byte value) => CreateScalarUnsafe<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Double}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Double}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<double> CreateScalarUnsafe(double value) => CreateScalarUnsafe<double>(value);
+        public static Vector64<double> CreateScalarUnsafe(double value) => CreateScalarUnsafe<double>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int16}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<short> CreateScalarUnsafe(short value) => CreateScalarUnsafe<short>(value);
+        public static Vector64<short> CreateScalarUnsafe(short value) => CreateScalarUnsafe<short>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int32}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<int> CreateScalarUnsafe(int value) => CreateScalarUnsafe<int>(value);
+        public static Vector64<int> CreateScalarUnsafe(int value) => CreateScalarUnsafe<int>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Int64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Int64}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<long> CreateScalarUnsafe(long value) => CreateScalarUnsafe<long>(value);
+        public static Vector64<long> CreateScalarUnsafe(long value) => CreateScalarUnsafe<long>(value);
 
         /// <summary>Creates a new <see cref="Vector64{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<nint> CreateScalarUnsafe(nint value) => CreateScalarUnsafe<nint>(value);
+        public static Vector64<nint> CreateScalarUnsafe(nint value) => CreateScalarUnsafe<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<nuint> CreateScalarUnsafe(nuint value) => CreateScalarUnsafe<nuint>(value);
+        public static Vector64<nuint> CreateScalarUnsafe(nuint value) => CreateScalarUnsafe<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{SByte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{SByte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<sbyte> CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe<sbyte>(value);
+        public static Vector64<sbyte> CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector64{Single}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{Single}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector64<float> CreateScalarUnsafe(float value) => CreateScalarUnsafe<float>(value);
+        public static Vector64<float> CreateScalarUnsafe(float value) => CreateScalarUnsafe<float>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt16}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<ushort> CreateScalarUnsafe(ushort value) => CreateScalarUnsafe<ushort>(value);
+        public static Vector64<ushort> CreateScalarUnsafe(ushort value) => CreateScalarUnsafe<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt32}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<uint> CreateScalarUnsafe(uint value) => CreateScalarUnsafe<uint>(value);
+        public static Vector64<uint> CreateScalarUnsafe(uint value) => CreateScalarUnsafe<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector64{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector64{UInt64}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<ulong> CreateScalarUnsafe(ulong value) => CreateScalarUnsafe<ulong>(value);
+        public static Vector64<ulong> CreateScalarUnsafe(ulong value) => CreateScalarUnsafe<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector64{T}" /> instance where the elements begin at a specified value and which are spaced apart according to another specified value.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1565,7 +1608,6 @@ public static bool LessThanOrEqualAny<T>(Vector64<T> left, Vector64<T> right)
             return false;
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The source from which the vector will be loaded.</param>
@@ -1573,7 +1615,7 @@ public static bool LessThanOrEqualAny<T>(Vector64<T> left, Vector64<T> right)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<T> Load<T>(T* source) => LoadUnsafe(ref *source);
+        public static Vector64<T> Load<T>(T* source) => LoadUnsafe(ref *source);
 
         /// <summary>Loads a vector from the given aligned source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1583,7 +1625,7 @@ public static bool LessThanOrEqualAny<T>(Vector64<T> left, Vector64<T> right)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<T> LoadAligned<T>(T* source)
+        public static Vector64<T> LoadAligned<T>(T* source)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
 
@@ -1603,8 +1645,7 @@ public static unsafe Vector64<T> LoadAligned<T>(T* source)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector64<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        public static Vector64<T> LoadAlignedNonTemporal<T>(T* source) => LoadAligned(source);
 
         /// <summary>Loads a vector from the given source.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -1795,7 +1836,7 @@ public static Vector64<T> Min<T>(Vector64<T> left, Vector64<T> right)
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right"/> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector64<T> Multiply<T>(T left, Vector64<T> right) => left * right;
+        public static Vector64<T> Multiply<T>(T left, Vector64<T> right) => right * left;
 
         /// <summary>Computes an estimate of (<paramref name="left"/> * <paramref name="right"/>) + <paramref name="addend"/>.</summary>
         /// <param name="left">The vector to be multiplied with <paramref name="right" />.</param>
@@ -1851,7 +1892,7 @@ public static Vector64<float> MultiplyAddEstimate(Vector64<float> left, Vector64
         /// <returns>A <see cref="Vector64{Single}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<float> Narrow(Vector64<double> lower, Vector64<double> upper)
+        public static Vector64<float> Narrow(Vector64<double> lower, Vector64<double> upper)
         {
             Unsafe.SkipInit(out Vector64<float> result);
 
@@ -1877,7 +1918,7 @@ public static unsafe Vector64<float> Narrow(Vector64<double> lower, Vector64<dou
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<sbyte> Narrow(Vector64<short> lower, Vector64<short> upper)
+        public static Vector64<sbyte> Narrow(Vector64<short> lower, Vector64<short> upper)
         {
             Unsafe.SkipInit(out Vector64<sbyte> result);
 
@@ -1902,7 +1943,7 @@ public static unsafe Vector64<sbyte> Narrow(Vector64<short> lower, Vector64<shor
         /// <returns>A <see cref="Vector64{Int16}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<short> Narrow(Vector64<int> lower, Vector64<int> upper)
+        public static Vector64<short> Narrow(Vector64<int> lower, Vector64<int> upper)
         {
             Unsafe.SkipInit(out Vector64<short> result);
 
@@ -1927,7 +1968,7 @@ public static unsafe Vector64<short> Narrow(Vector64<int> lower, Vector64<int> u
         /// <returns>A <see cref="Vector64{Int32}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<int> Narrow(Vector64<long> lower, Vector64<long> upper)
+        public static Vector64<int> Narrow(Vector64<long> lower, Vector64<long> upper)
         {
             Unsafe.SkipInit(out Vector64<int> result);
 
@@ -1953,7 +1994,7 @@ public static unsafe Vector64<int> Narrow(Vector64<long> lower, Vector64<long> u
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<byte> Narrow(Vector64<ushort> lower, Vector64<ushort> upper)
+        public static Vector64<byte> Narrow(Vector64<ushort> lower, Vector64<ushort> upper)
         {
             Unsafe.SkipInit(out Vector64<byte> result);
 
@@ -1979,7 +2020,7 @@ public static unsafe Vector64<byte> Narrow(Vector64<ushort> lower, Vector64<usho
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<ushort> Narrow(Vector64<uint> lower, Vector64<uint> upper)
+        public static Vector64<ushort> Narrow(Vector64<uint> lower, Vector64<uint> upper)
         {
             Unsafe.SkipInit(out Vector64<ushort> result);
 
@@ -2005,7 +2046,7 @@ public static unsafe Vector64<ushort> Narrow(Vector64<uint> lower, Vector64<uint
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<uint> Narrow(Vector64<ulong> lower, Vector64<ulong> upper)
+        public static Vector64<uint> Narrow(Vector64<ulong> lower, Vector64<ulong> upper)
         {
             Unsafe.SkipInit(out Vector64<uint> result);
 
@@ -2438,7 +2479,6 @@ public static Vector64<T> Sqrt<T>(Vector64<T> vector)
             return result;
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
@@ -2446,7 +2486,7 @@ public static Vector64<T> Sqrt<T>(Vector64<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe void Store<T>(this Vector64<T> source, T* destination) => source.StoreUnsafe(ref *destination);
+        public static void Store<T>(this Vector64<T> source, T* destination) => source.StoreUnsafe(ref *destination);
 
         /// <summary>Stores a vector at the given aligned destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2456,7 +2496,7 @@ public static Vector64<T> Sqrt<T>(Vector64<T> vector)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void StoreAligned<T>(this Vector64<T> source, T* destination)
+        public static void StoreAligned<T>(this Vector64<T> source, T* destination)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
 
@@ -2476,8 +2516,7 @@ public static unsafe void StoreAligned<T>(this Vector64<T> source, T* destinatio
         /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe void StoreAlignedNonTemporal<T>(this Vector64<T> source, T* destination) => source.StoreAligned(destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        public static void StoreAlignedNonTemporal<T>(this Vector64<T> source, T* destination) => source.StoreAligned(destination);
 
         /// <summary>Stores a vector at the given destination.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2572,7 +2611,7 @@ public static Vector128<T> ToVector128<T>(this Vector64<T> vector)
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector128<T> ToVector128Unsafe<T>(this Vector64<T> vector)
+        public static Vector128<T> ToVector128Unsafe<T>(this Vector64<T> vector)
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
 
@@ -2607,46 +2646,46 @@ public static bool TryCopyTo<T>(this Vector64<T> vector, Span<T> destination)
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<ushort> Lower, Vector64<ushort> Upper) Widen(Vector64<byte> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<ushort> Lower, Vector64<ushort> Upper) Widen(Vector64<byte> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector64{Int16}" /> into two <see cref="Vector64{Int32} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<int> Lower, Vector64<int> Upper) Widen(Vector64<short> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<int> Lower, Vector64<int> Upper) Widen(Vector64<short> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector64{Int32}" /> into two <see cref="Vector64{Int64} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<long> Lower, Vector64<long> Upper) Widen(Vector64<int> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<long> Lower, Vector64<long> Upper) Widen(Vector64<int> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector64{SByte}" /> into two <see cref="Vector64{Int16} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<short> Lower, Vector64<short> Upper) Widen(Vector64<sbyte> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<short> Lower, Vector64<short> Upper) Widen(Vector64<sbyte> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector64{Single}" /> into two <see cref="Vector64{Double} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<double> Lower, Vector64<double> Upper) Widen(Vector64<float> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<double> Lower, Vector64<double> Upper) Widen(Vector64<float> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector64{UInt16}" /> into two <see cref="Vector64{UInt32} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<uint> Lower, Vector64<uint> Upper) Widen(Vector64<ushort> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<uint> Lower, Vector64<uint> Upper) Widen(Vector64<ushort> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector64{UInt32}" /> into two <see cref="Vector64{UInt64} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe (Vector64<ulong> Lower, Vector64<ulong> Upper) Widen(Vector64<uint> source) => (WidenLower(source), WidenUpper(source));
+        public static (Vector64<ulong> Lower, Vector64<ulong> Upper) Widen(Vector64<uint> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens the lower half of a <see cref="Vector64{Byte}" /> into a <see cref="Vector64{UInt16} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
@@ -2672,7 +2711,7 @@ public static Vector64<ushort> WidenLower(Vector64<byte> source)
         /// <returns>A vector that contain the widened lower half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<int> WidenLower(Vector64<short> source)
+        public static Vector64<int> WidenLower(Vector64<short> source)
         {
             Unsafe.SkipInit(out Vector64<int> lower);
 
@@ -2690,7 +2729,7 @@ public static unsafe Vector64<int> WidenLower(Vector64<short> source)
         /// <returns>A vector that contain the widened lower half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<long> WidenLower(Vector64<int> source)
+        public static Vector64<long> WidenLower(Vector64<int> source)
         {
             Unsafe.SkipInit(out Vector64<long> lower);
 
@@ -2709,7 +2748,7 @@ public static unsafe Vector64<long> WidenLower(Vector64<int> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<short> WidenLower(Vector64<sbyte> source)
+        public static Vector64<short> WidenLower(Vector64<sbyte> source)
         {
             Unsafe.SkipInit(out Vector64<short> lower);
 
@@ -2727,7 +2766,7 @@ public static unsafe Vector64<short> WidenLower(Vector64<sbyte> source)
         /// <returns>A vector that contain the widened lower half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<double> WidenLower(Vector64<float> source)
+        public static Vector64<double> WidenLower(Vector64<float> source)
         {
             Unsafe.SkipInit(out Vector64<double> lower);
 
@@ -2746,7 +2785,7 @@ public static unsafe Vector64<double> WidenLower(Vector64<float> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<uint> WidenLower(Vector64<ushort> source)
+        public static Vector64<uint> WidenLower(Vector64<ushort> source)
         {
             Unsafe.SkipInit(out Vector64<uint> lower);
 
@@ -2765,7 +2804,7 @@ public static unsafe Vector64<uint> WidenLower(Vector64<ushort> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<ulong> WidenLower(Vector64<uint> source)
+        public static Vector64<ulong> WidenLower(Vector64<uint> source)
         {
             Unsafe.SkipInit(out Vector64<ulong> lower);
 
@@ -2802,7 +2841,7 @@ public static Vector64<ushort> WidenUpper(Vector64<byte> source)
         /// <returns>A vector that contain the widened upper half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<int> WidenUpper(Vector64<short> source)
+        public static Vector64<int> WidenUpper(Vector64<short> source)
         {
             Unsafe.SkipInit(out Vector64<int> upper);
 
@@ -2820,7 +2859,7 @@ public static unsafe Vector64<int> WidenUpper(Vector64<short> source)
         /// <returns>A vector that contain the widened upper half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<long> WidenUpper(Vector64<int> source)
+        public static Vector64<long> WidenUpper(Vector64<int> source)
         {
             Unsafe.SkipInit(out Vector64<long> upper);
 
@@ -2839,7 +2878,7 @@ public static unsafe Vector64<long> WidenUpper(Vector64<int> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<short> WidenUpper(Vector64<sbyte> source)
+        public static Vector64<short> WidenUpper(Vector64<sbyte> source)
         {
             Unsafe.SkipInit(out Vector64<short> upper);
 
@@ -2857,7 +2896,7 @@ public static unsafe Vector64<short> WidenUpper(Vector64<sbyte> source)
         /// <returns>A vector that contain the widened upper half of <paramref name="source" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<double> WidenUpper(Vector64<float> source)
+        public static Vector64<double> WidenUpper(Vector64<float> source)
         {
             Unsafe.SkipInit(out Vector64<double> upper);
 
@@ -2876,7 +2915,7 @@ public static unsafe Vector64<double> WidenUpper(Vector64<float> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<uint> WidenUpper(Vector64<ushort> source)
+        public static Vector64<uint> WidenUpper(Vector64<ushort> source)
         {
             Unsafe.SkipInit(out Vector64<uint> upper);
 
@@ -2895,7 +2934,7 @@ public static unsafe Vector64<uint> WidenUpper(Vector64<ushort> source)
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector64<ulong> WidenUpper(Vector64<uint> source)
+        public static Vector64<ulong> WidenUpper(Vector64<uint> source)
         {
             Unsafe.SkipInit(out Vector64<ulong> upper);
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs
index 23730bb5b0090..bd87e547eb712 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs
@@ -7,8 +7,11 @@
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
 using System.Text;
 
+#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+
 namespace System.Runtime.Intrinsics
 {
     // We mark certain methods with AggressiveInlining to ensure that the JIT will
@@ -38,22 +41,12 @@ namespace System.Runtime.Intrinsics
         public static Vector64<T> AllBitsSet
         {
             [Intrinsic]
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            get
-            {
-                ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
-
-                Unsafe.SkipInit(out Vector64<T> result);
-                Unsafe.AsRef(in result._00) = ulong.MaxValue;
-
-                return result;
-            }
+            get => Vector64.Create(Scalar<T>.AllBitsSet);
         }
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <summary>Gets the number of <typeparamref name="T" /> that are in a <see cref="Vector64{T}" />.</summary>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
-        public static unsafe int Count
+        public static int Count
         {
             [Intrinsic]
             get
@@ -62,7 +55,6 @@ public static unsafe int Count
                 return Vector64.Size / sizeof(T);
             }
         }
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <summary>Gets a new <see cref="Vector64{T}" /> with the elements set to their index.</summary>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
@@ -145,15 +137,55 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator +(Vector64<T> left, Vector64<T> right)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
             {
-                T value = Scalar<T>.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Add(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return AdvSimd.AddScalar(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Add(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Add(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Add(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.AddScalar(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T value = Scalar<T>.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
+                    result.SetElementUnsafe(index, value);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
@@ -165,12 +197,48 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator &(Vector64<T> left, Vector64<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            // While op_BitwiseAnd is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.And(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.And(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.And(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.And(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
 
-            Unsafe.SkipInit(out Vector64<T> result);
-            Unsafe.AsRef(in result._00) = left._00 & right._00;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+
+                Unsafe.SkipInit(out Vector64<T> result);
+                Unsafe.AsRef(in result._00) = left._00 & right._00;
 
-            return result;
+                return result;
+            }
         }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
@@ -182,12 +250,48 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator |(Vector64<T> left, Vector64<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            // While op_BitwiseOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Or(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Or(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Or(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Or(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
+
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
 
-            Unsafe.SkipInit(out Vector64<T> result);
-            Unsafe.AsRef(in result._00) = left._00 | right._00;
+                Unsafe.SkipInit(out Vector64<T> result);
+                Unsafe.AsRef(in result._00) = left._00 | right._00;
 
-            return result;
+                return result;
+            }
         }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
@@ -199,15 +303,43 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator /(Vector64<T> left, Vector64<T> right)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
             {
-                T value = Scalar<T>.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                if (typeof(T) == typeof(float))
+                {
+                    if (AdvSimd.Arm64.IsSupported)
+                    {
+                        return AdvSimd.Arm64.Divide(left.AsSingle(), right.AsSingle()).As<float, T>();
+                    }
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return AdvSimd.DivideScalar(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                return SoftwareImpl(left, right);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T value = Scalar<T>.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
+                    result.SetElementUnsafe(index, value);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Divides a vector by a scalar to compute the per-element quotient.</summary>
@@ -215,19 +347,7 @@ public static Vector64<T> Zero
         /// <param name="right">The scalar that will divide <paramref name="left" />.</param>
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
         [Intrinsic]
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector64<T> operator /(Vector64<T> left, T right)
-        {
-            Unsafe.SkipInit(out Vector64<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Divide(left.GetElementUnsafe(index), right);
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
-        }
+        public static Vector64<T> operator /(Vector64<T> left, T right) => left / Vector64.Create(right);
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
@@ -257,12 +377,48 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator ^(Vector64<T> left, Vector64<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            // While op_ExclusiveOr is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Xor(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Xor(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Xor(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Xor(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
+            }
 
-            Unsafe.SkipInit(out Vector64<T> result);
-            Unsafe.AsRef(in result._00) = left._00 ^ right._00;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
 
-            return result;
+                Unsafe.SkipInit(out Vector64<T> result);
+                Unsafe.AsRef(in result._00) = left._00 ^ right._00;
+
+                return result;
+            }
         }
 
         /// <summary>Compares two vectors to determine if any elements are not equal.</summary>
@@ -281,15 +437,47 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator <<(Vector64<T> value, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> value, int shiftCount)
             {
-                T element = Scalar<T>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.ShiftLogical(value.AsByte(), Vector64.Create<sbyte>((sbyte)(shiftCount & 0x7))).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt16(), Vector64.Create<short>((short)(shiftCount & 0xF))).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.ShiftLogical(value.AsUInt32(), Vector64.Create<int>(shiftCount & 0x1F)).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.ShiftLogicalScalar(value.AsUInt64(), Vector64.Create<long>(shiftCount & 0x3F)).As<ulong, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> value, int shiftCount)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T element = Scalar<T>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                    result.SetElementUnsafe(index, element);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
@@ -301,15 +489,55 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator *(Vector64<T> left, Vector64<T> right)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
             {
-                T value = Scalar<T>.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Multiply(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return AdvSimd.MultiplyScalar(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Multiply(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Multiply(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Multiply(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    // TODO-ARM64-CQ: We should support long/ulong multiplication.
+                }
+                return SoftwareImpl(left, right);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T value = Scalar<T>.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
+                    result.SetElementUnsafe(index, value);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -319,18 +547,7 @@ public static Vector64<T> Zero
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector64<T> operator *(Vector64<T> left, T right)
-        {
-            Unsafe.SkipInit(out Vector64<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Multiply(left.GetElementUnsafe(index), right);
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
-        }
+        public static Vector64<T> operator *(Vector64<T> left, T right) => left * Vector64.Create(right);
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
         /// <param name="left">The scalar to multiply with <paramref name="right" />.</param>
@@ -338,7 +555,7 @@ public static Vector64<T> Zero
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         /// <exception cref="NotSupportedException">The type of the vector (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static Vector64<T> operator *(T left, Vector64<T> right) => right * left;
+        public static Vector64<T> operator *(T left, Vector64<T> right) => Vector64.Create(left) * right;
 
         /// <summary>Computes the ones-complement of a vector.</summary>
         /// <param name="vector">The vector whose ones-complement is to be computed.</param>
@@ -348,12 +565,48 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator ~(Vector64<T> vector)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            // While op_OnesComplement is technically size independent, there are
+            // some opportunistic lightup optimizations that can kick in depending
+            // on the size of T. One such example is embedded masking.
+
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(vector);
+            }
+            return SoftwareImpl(vector);
+
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> vector)
+            {
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Not(vector.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Not(vector.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Not(vector.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.Not(vector.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(vector);
+            }
 
-            Unsafe.SkipInit(out Vector64<T> result);
-            Unsafe.AsRef(in result._00) = ~vector._00;
+            static Vector64<T> SoftwareImpl(Vector64<T> vector)
+            {
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+
+                Unsafe.SkipInit(out Vector64<T> result);
+                Unsafe.AsRef(in result._00) = ~vector._00;
 
-            return result;
+                return result;
+            }
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -364,15 +617,55 @@ public static Vector64<T> Zero
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator >>(Vector64<T> value, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if ((typeof(T) == typeof(byte))
+             || (typeof(T) == typeof(ushort))
+             || (typeof(T) == typeof(uint))
+             || (typeof(T) == typeof(ulong))
+             || (typeof(T) == typeof(nuint)))
+            {
+                return value >>> shiftCount;
+            }
+            else if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> value, int shiftCount)
             {
-                T element = Scalar<T>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsSByte(), Vector64.Create<sbyte>((sbyte)(-shiftCount & 0x7))).As<sbyte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsInt16(), Vector64.Create<short>((short)(-shiftCount & 0xF))).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.ShiftArithmetic(value.AsInt32(), Vector64.Create<int>(-shiftCount & 0x1F)).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.ShiftArithmeticScalar(value.AsInt64(), Vector64.Create<long>(-shiftCount & 0x3F)).As<long, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, int shiftCount)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T value = Scalar<T>.ShiftRightArithmetic(left.GetElementUnsafe(index), shiftCount);
+                    result.SetElementUnsafe(index, value);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
@@ -384,15 +677,55 @@ public static Vector64<T> operator >>(Vector64<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator -(Vector64<T> left, Vector64<T> right)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(left, right);
+            }
+            return SoftwareImpl(left, right);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> left, Vector64<T> right)
             {
-                T value = Scalar<T>.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
+                if (typeof(T) == typeof(float))
+                {
+                    return AdvSimd.Subtract(left.AsSingle(), right.AsSingle()).As<float, T>();
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    return AdvSimd.SubtractScalar(left.AsDouble(), right.AsDouble()).As<double, T>();
+                }
+                else if (sizeof(T) == 1)
+                {
+                    return AdvSimd.Subtract(left.AsByte(), right.AsByte()).As<byte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.Subtract(left.AsUInt16(), right.AsUInt16()).As<ushort, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.Subtract(left.AsUInt32(), right.AsUInt32()).As<uint, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.SubtractScalar(left.AsUInt64(), right.AsUInt64()).As<ulong, T>();
+                }
+                return SoftwareImpl(left, right);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, Vector64<T> right)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T value = Scalar<T>.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
+                    result.SetElementUnsafe(index, value);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -421,15 +754,47 @@ public static Vector64<T> operator >>(Vector64<T> value, int shiftCount)
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> operator >>>(Vector64<T> value, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector64<T> result);
+            if (AdvSimd.IsSupported)
+            {
+                return ArmImpl(value, shiftCount);
+            }
+            return SoftwareImpl(value, shiftCount);
 
-            for (int index = 0; index < Count; index++)
+            [CompExactlyDependsOn(typeof(AdvSimd))]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            static Vector64<T> ArmImpl(Vector64<T> value, int shiftCount)
             {
-                T element = Scalar<T>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
+                if (sizeof(T) == 1)
+                {
+                    return AdvSimd.ShiftLogical(value.AsSByte(), Vector64.Create<sbyte>((sbyte)(-shiftCount & 0x7))).As<sbyte, T>();
+                }
+                else if (sizeof(T) == 2)
+                {
+                    return AdvSimd.ShiftLogical(value.AsInt16(), Vector64.Create<short>((short)(-shiftCount & 0xF))).As<short, T>();
+                }
+                else if (sizeof(T) == 4)
+                {
+                    return AdvSimd.ShiftLogical(value.AsInt32(), Vector64.Create<int>(-shiftCount & 0x1F)).As<int, T>();
+                }
+                else if (sizeof(T) == 8)
+                {
+                    return AdvSimd.ShiftLogicalScalar(value.AsInt64(), Vector64.Create<long>(-shiftCount & 0x3F)).As<long, T>();
+                }
+                return SoftwareImpl(value, shiftCount);
             }
 
-            return result;
+            static Vector64<T> SoftwareImpl(Vector64<T> left, int shiftCount)
+            {
+                Unsafe.SkipInit(out Vector64<T> result);
+
+                for (int index = 0; index < Count; index++)
+                {
+                    T value = Scalar<T>.ShiftRightLogical(left.GetElementUnsafe(index), shiftCount);
+                    result.SetElementUnsafe(index, value);
+                }
+
+                return result;
+            }
         }
 
         /// <summary>Determines whether the specified object is equal to the current instance.</summary>
@@ -531,16 +896,16 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector64<T> ISimdVector<Vector64<T>, T>.Abs(Vector64<T> vector) => Vector64.Abs(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Add(TSelf, TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Add(Vector64<T> left, Vector64<T> right) => Vector64.Add(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Add(Vector64<T> left, Vector64<T> right) => left + right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.AndNot(TSelf, TSelf)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.AndNot(Vector64<T> left, Vector64<T> right) => Vector64.AndNot(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseAnd(TSelf, TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.BitwiseAnd(Vector64<T> left, Vector64<T> right) => Vector64.BitwiseAnd(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.BitwiseAnd(Vector64<T> left, Vector64<T> right) => left & right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.BitwiseOr(TSelf, TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.BitwiseOr(Vector64<T> left, Vector64<T> right) => Vector64.BitwiseOr(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.BitwiseOr(Vector64<T> left, Vector64<T> right) => left | right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Ceiling(TSelf)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.Ceiling(Vector64<T> vector) => Vector64.Ceiling(vector);
@@ -576,10 +941,10 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector64<T> ISimdVector<Vector64<T>, T>.CreateScalarUnsafe(T value) => Vector64.CreateScalarUnsafe(value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Divide(Vector64<T> left, Vector64<T> right) => Vector64.Divide(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Divide(Vector64<T> left, Vector64<T> right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Divide(TSelf, T)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Divide(Vector64<T> left, T right) => Vector64.Divide(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Divide(Vector64<T> left, T right) => left / right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Dot(TSelf, TSelf)" />
         static T ISimdVector<Vector64<T>, T>.Dot(Vector64<T> left, Vector64<T> right) => Vector64.Dot(left, right);
@@ -588,7 +953,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector64<T> ISimdVector<Vector64<T>, T>.Equals(Vector64<T> left, Vector64<T> right) => Vector64.Equals(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAll(TSelf, TSelf)" />
-        static bool ISimdVector<Vector64<T>, T>.EqualsAll(Vector64<T> left, Vector64<T> right) => Vector64.EqualsAll(left, right);
+        static bool ISimdVector<Vector64<T>, T>.EqualsAll(Vector64<T> left, Vector64<T> right) => left == right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.EqualsAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector64<T>, T>.EqualsAny(Vector64<T> left, Vector64<T> right) => Vector64.EqualsAny(left, right);
@@ -597,7 +962,7 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector64<T> ISimdVector<Vector64<T>, T>.Floor(Vector64<T> vector) => Vector64.Floor(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GetElement(TSelf, int)" />
-        static T ISimdVector<Vector64<T>, T>.GetElement(Vector64<T> vector, int index) => Vector64.GetElement(vector, index);
+        static T ISimdVector<Vector64<T>, T>.GetElement(Vector64<T> vector, int index) => vector.GetElement(index);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.GreaterThan(TSelf, TSelf)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.GreaterThan(Vector64<T> left, Vector64<T> right) => Vector64.GreaterThan(left, right);
@@ -635,7 +1000,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LessThanOrEqualAny(TSelf, TSelf)" />
         static bool ISimdVector<Vector64<T>, T>.LessThanOrEqualAny(Vector64<T> left, Vector64<T> right) => Vector64.LessThanOrEqualAny(left, right);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Load(T*)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.Load(T* source) => Vector64.Load(source);
 
@@ -644,7 +1008,6 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadAlignedNonTemporal(T*)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.LoadAlignedNonTemporal(T* source) => Vector64.LoadAlignedNonTemporal(source);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.LoadUnsafe(ref readonly T)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.LoadUnsafe(ref readonly T source) => Vector64.LoadUnsafe(in source);
@@ -659,63 +1022,61 @@ private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] stri
         static Vector64<T> ISimdVector<Vector64<T>, T>.Min(Vector64<T> left, Vector64<T> right) => Vector64.Min(left, right);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, T)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Multiply(Vector64<T> left, Vector64<T> right) => Vector64.Multiply(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Multiply(Vector64<T> left, Vector64<T> right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Multiply(TSelf, TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Multiply(Vector64<T> left, T right) => Vector64.Multiply(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Multiply(Vector64<T> left, T right) => left * right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Negate(TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Negate(Vector64<T> vector) => Vector64.Negate(vector);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Negate(Vector64<T> vector) => -vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.OnesComplement(TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.OnesComplement(Vector64<T> vector) => Vector64.OnesComplement(vector);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.OnesComplement(Vector64<T> vector) => ~vector;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftLeft(TSelf, int)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.ShiftLeft(Vector64<T> vector, int shiftCount) => Vector64.ShiftLeft(vector, shiftCount);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.ShiftLeft(Vector64<T> vector, int shiftCount) => vector << shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightArithmetic(TSelf, int)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.ShiftRightArithmetic(Vector64<T> vector, int shiftCount) => Vector64.ShiftRightArithmetic(vector, shiftCount);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.ShiftRightArithmetic(Vector64<T> vector, int shiftCount) => vector >> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ShiftRightLogical(TSelf, int)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.ShiftRightLogical(Vector64<T> vector, int shiftCount) => Vector64.ShiftRightLogical(vector, shiftCount);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.ShiftRightLogical(Vector64<T> vector, int shiftCount) => vector >>> shiftCount;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sqrt(TSelf)" />
         static Vector64<T> ISimdVector<Vector64<T>, T>.Sqrt(Vector64<T> vector) => Vector64.Sqrt(vector);
 
-#pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Store(TSelf, T*)" />
-        static void ISimdVector<Vector64<T>, T>.Store(Vector64<T> source, T* destination) => Vector64.Store(source, destination);
+        static void ISimdVector<Vector64<T>, T>.Store(Vector64<T> source, T* destination) => source.Store(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAligned(TSelf, T*)" />
-        static void ISimdVector<Vector64<T>, T>.StoreAligned(Vector64<T> source, T* destination) => Vector64.StoreAligned(source, destination);
+        static void ISimdVector<Vector64<T>, T>.StoreAligned(Vector64<T> source, T* destination) => source.StoreAligned(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreAlignedNonTemporal(TSelf, T*)" />
-        static void ISimdVector<Vector64<T>, T>.StoreAlignedNonTemporal(Vector64<T> source, T* destination) => Vector64.StoreAlignedNonTemporal(source, destination);
-#pragma warning restore CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type ('T')
+        static void ISimdVector<Vector64<T>, T>.StoreAlignedNonTemporal(Vector64<T> source, T* destination) => source.StoreAlignedNonTemporal(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T)" />
-        static void ISimdVector<Vector64<T>, T>.StoreUnsafe(Vector64<T> vector, ref T destination) => Vector64.StoreUnsafe(vector, ref destination);
+        static void ISimdVector<Vector64<T>, T>.StoreUnsafe(Vector64<T> vector, ref T destination) => vector.StoreUnsafe(ref destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.StoreUnsafe(TSelf, ref T, nuint)" />
-        static void ISimdVector<Vector64<T>, T>.StoreUnsafe(Vector64<T> vector, ref T destination, nuint elementOffset) => Vector64.StoreUnsafe(vector, ref destination, elementOffset);
+        static void ISimdVector<Vector64<T>, T>.StoreUnsafe(Vector64<T> vector, ref T destination, nuint elementOffset) => vector.StoreUnsafe(ref destination, elementOffset);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Subtract(TSelf, TSelf)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Subtract(Vector64<T> left, Vector64<T> right) => Vector64.Subtract(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Subtract(Vector64<T> left, Vector64<T> right) => left - right;
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Sum(TSelf)" />
         static T ISimdVector<Vector64<T>, T>.Sum(Vector64<T> vector) => Vector64.Sum(vector);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.ToScalar(TSelf)" />
-        static T ISimdVector<Vector64<T>, T>.ToScalar(Vector64<T> vector) => Vector64.ToScalar(vector);
+        static T ISimdVector<Vector64<T>, T>.ToScalar(Vector64<T> vector) => vector.ToScalar();
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.TryCopyTo(TSelf, Span{T})" />
-        static bool ISimdVector<Vector64<T>, T>.TryCopyTo(Vector64<T> vector, Span<T> destination) => Vector64.TryCopyTo(vector, destination);
+        static bool ISimdVector<Vector64<T>, T>.TryCopyTo(Vector64<T> vector, Span<T> destination) => vector.TryCopyTo(destination);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.WithElement(TSelf, int, T)" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.WithElement(Vector64<T> vector, int index, T value) => Vector64.WithElement(vector, index, value);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.WithElement(Vector64<T> vector, int index, T value) => vector.WithElement(index, value);
 
         /// <inheritdoc cref="ISimdVector{TSelf, T}.Xor" />
-        static Vector64<T> ISimdVector<Vector64<T>, T>.Xor(Vector64<T> left, Vector64<T> right) => Vector64.Xor(left, right);
+        static Vector64<T> ISimdVector<Vector64<T>, T>.Xor(Vector64<T> left, Vector64<T> right) => left ^ right;
 
         //
         // New Surface Area