From e1a5ac91397faca2cb2c29aaedbb2933a8c398eb Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 12:37:32 -0400
Subject: [PATCH 01/10] Add AddRotateComplex

---
 src/coreclr/jit/codegenarm64test.cpp          |   8 +-
 src/coreclr/jit/emitarm64sve.cpp              |   4 +-
 src/coreclr/jit/hwintrinsicarm64.cpp          |   5 +
 src/coreclr/jit/hwintrinsiccodegenarm64.cpp   | 206 ++++++++++--------
 src/coreclr/jit/hwintrinsiclistarm64sve.h     |   1 +
 src/coreclr/jit/lowerarmarch.cpp              |   1 +
 .../Arm/Sve.PlatformNotSupported.cs           |  20 ++
 .../src/System/Runtime/Intrinsics/Arm/Sve.cs  |  20 ++
 .../ref/System.Runtime.Intrinsics.cs          |   3 +
 .../GenerateHWIntrinsicTests_Arm.cs           |  10 +-
 .../HardwareIntrinsics/Arm/Shared/Helpers.cs  |  38 ++++
 .../_SveImmBinaryOpTestTemplate.template      |  39 +++-
 12 files changed, 246 insertions(+), 109 deletions(-)
diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp
index 0406bc4a6e19a..588c968846c9f 100644
--- a/src/coreclr/jit/codegenarm64test.cpp
+++ b/src/coreclr/jit/codegenarm64test.cpp
@@ -8391,13 +8391,13 @@ void CodeGen::genArm64EmitterUnitTestsSve()
                                 INS_OPTS_SCALABLE_D); // ST1B    {<Zt>.D }, <Pg>, [<Xn|SP>, <Zm>.D]
 
     // IF_SVE_GP_3A
-    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 90,
+    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 0,
                                 INS_OPTS_SCALABLE_H); // FCADD   <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
-    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270,
+    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 1,
                                 INS_OPTS_SCALABLE_H); // FCADD   <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
-    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270,
+    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 1,
                                 INS_OPTS_SCALABLE_S); // FCADD   <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
-    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270,
+    theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 1,
                                 INS_OPTS_SCALABLE_D); // FCADD   <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>, <const>
 
     // IF_SVE_GT_4A
diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp
index 589cc1b29bae6..22cb131827e48 100644
--- a/src/coreclr/jit/emitarm64sve.cpp
+++ b/src/coreclr/jit/emitarm64sve.cpp
@@ -4410,7 +4410,6 @@ void emitter::emitInsSve_R_R_R(instruction     ins,
 /*****************************************************************************
  *
  *  Add a SVE instruction referencing three registers and a constant.
- *  Do not call this directly. Use 'emitIns_R_R_R_I' instead.
  */
 
 void emitter::emitInsSve_R_R_R_I(instruction     ins,
@@ -5577,7 +5576,7 @@ void emitter::emitInsSve_R_R_R_I(instruction     ins,
             assert(isLowPredicateRegister(reg2));
             assert(isVectorRegister(reg3));
             assert(isScalableVectorSize(size));
-            imm = emitEncodeRotationImm90_or_270(imm);
+            assert(emitIsValidEncodedRotationImm90_or_270(imm));
             fmt = IF_SVE_GP_3A;
             break;
 
@@ -5860,7 +5859,6 @@ void emitter::emitInsSve_R_R_R_I_I(instruction ins,
 /*****************************************************************************
  *
  *  Add a SVE instruction referencing four registers.
- *  Do not call this directly. Use 'emitIns_R_R_R_R' instead.
  */
 
 void emitter::emitInsSve_R_R_R_R(instruction     ins,
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index a3fed94ee71a8..d1e33b3bc00ab 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -504,6 +504,11 @@ void HWIntrinsicInfo::lookupImmBounds(
                 immUpperBound = (int)SVE_PRFOP_CONST15;
                 break;
 
+            case NI_Sve_AddRotateComplex:
+                immLowerBound = 0;
+                immUpperBound = 1;
+                break;
+
             default:
                 unreached();
         }
diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
index bb9e340d03d37..5bd8685cd28ad 100644
--- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -706,102 +706,105 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 case 3:
                 {
                     assert(instrIsRMW);
-                    assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id));
-                    assert(falseReg != embMaskOp3Reg);
-
-                    // For FMA, the operation we are trying to perform is:
-                    //      result = op1 + (op2 * op3)
-                    //
-                    // There are two instructions that can be used depending on which operand's register,
-                    // optionally, will store the final result.
-                    //
-                    // 1. If the result is stored in the operand that was used as an "addend" in the operation,
-                    // then we use `FMLA` format:
-                    //      reg1 = reg1 + (reg2 * reg3)
-                    //
-                    // 2. If the result is stored in the operand that was used as a "multiplicand" in the operation,
-                    // then we use `FMAD` format:
-                    //      reg1 = (reg1 * reg2) + reg3
-                    //
-                    // Check if the result's register is same as that of one of the operand's register and accordingly
-                    // pick the appropriate format. Suppose `targetReg` holds the result, then we have following cases:
-                    //
-                    // Case# 1: Result is stored in the operand that held the "addend"
-                    //      targetReg == reg1
-                    //
-                    // We generate the FMLA instruction format and no further changes are needed.
-                    //
-                    // Case# 2: Result is stored in the operand `op2` that held the "multiplicand"
-                    //      targetReg == reg2
-                    //
-                    // So we basically have an operation:
-                    //      reg2 = reg1 + (reg2 * reg3)
-                    //
-                    // Since, the result will be stored in the "multiplicand", we pick format `FMAD`.
-                    // Then, we rearrange the operands to ensure that the operation is done correctly.
-                    //      reg2 = reg1 + (reg2 * reg3)  // to start with
-                    //      reg2 = reg3 + (reg2 * reg1)  // swap reg1 <--> reg3
-                    //      reg1 = reg3 + (reg1 * reg2)  // swap reg1 <--> reg2
-                    //      reg1 = (reg1 * reg2) + reg3  // rearrange to get FMAD format
-                    //
-                    // Case# 3: Result is stored in the operand `op3` that held the "multiplier"
-                    //      targetReg == reg3
-                    //
-                    // So we basically have an operation:
-                    //      reg3 = reg1 + (reg2 * reg3)
-                    // Since, the result will be stored in the "multiplier", we again pick format `FMAD`.
-                    // Then, we rearrange the operands to ensure that the operation is done correctly.
-                    //      reg3 = reg1 + (reg2 * reg3)  // to start with
-                    //      reg1 = reg3 + (reg2 * reg1)  // swap reg1 <--> reg3
-                    //      reg1 = (reg1 * reg2) + reg3  // rearrange to get FMAD format
-
-                    bool useAddend = true;
-                    if (targetReg == embMaskOp2Reg)
-                    {
-                        // Case# 2
-                        useAddend = false;
-                        std::swap(embMaskOp1Reg, embMaskOp3Reg);
-                        std::swap(embMaskOp1Reg, embMaskOp2Reg);
-                    }
-                    else if (targetReg == embMaskOp3Reg)
-                    {
-                        // Case# 3
-                        useAddend = false;
-                        std::swap(embMaskOp1Reg, embMaskOp3Reg);
-                    }
-                    else
-                    {
-                        // Case# 1
-                    }
 
-                    switch (intrinEmbMask.id)
+                    if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id))
                     {
-                        case NI_Sve_FusedMultiplyAdd:
-                            insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad;
-                            break;
+                        assert(falseReg != embMaskOp3Reg);
+                        // For FMA, the operation we are trying to perform is:
+                        //      result = op1 + (op2 * op3)
+                        //
+                        // There are two instructions that can be used depending on which operand's register,
+                        // optionally, will store the final result.
+                        //
+                        // 1. If the result is stored in the operand that was used as an "addend" in the operation,
+                        // then we use `FMLA` format:
+                        //      reg1 = reg1 + (reg2 * reg3)
+                        //
+                        // 2. If the result is stored in the operand that was used as a "multiplicand" in the operation,
+                        // then we use `FMAD` format:
+                        //      reg1 = (reg1 * reg2) + reg3
+                        //
+                        // Check if the result's register is same as that of one of the operand's register and
+                        // accordingly pick the appropriate format. Suppose `targetReg` holds the result, then we have
+                        // following cases:
+                        //
+                        // Case# 1: Result is stored in the operand that held the "addend"
+                        //      targetReg == reg1
+                        //
+                        // We generate the FMLA instruction format and no further changes are needed.
+                        //
+                        // Case# 2: Result is stored in the operand `op2` that held the "multiplicand"
+                        //      targetReg == reg2
+                        //
+                        // So we basically have an operation:
+                        //      reg2 = reg1 + (reg2 * reg3)
+                        //
+                        // Since, the result will be stored in the "multiplicand", we pick format `FMAD`.
+                        // Then, we rearrange the operands to ensure that the operation is done correctly.
+                        //      reg2 = reg1 + (reg2 * reg3)  // to start with
+                        //      reg2 = reg3 + (reg2 * reg1)  // swap reg1 <--> reg3
+                        //      reg1 = reg3 + (reg1 * reg2)  // swap reg1 <--> reg2
+                        //      reg1 = (reg1 * reg2) + reg3  // rearrange to get FMAD format
+                        //
+                        // Case# 3: Result is stored in the operand `op3` that held the "multiplier"
+                        //      targetReg == reg3
+                        //
+                        // So we basically have an operation:
+                        //      reg3 = reg1 + (reg2 * reg3)
+                        // Since, the result will be stored in the "multiplier", we again pick format `FMAD`.
+                        // Then, we rearrange the operands to ensure that the operation is done correctly.
+                        //      reg3 = reg1 + (reg2 * reg3)  // to start with
+                        //      reg1 = reg3 + (reg2 * reg1)  // swap reg1 <--> reg3
+                        //      reg1 = (reg1 * reg2) + reg3  // rearrange to get FMAD format
+
+                        bool useAddend = true;
+                        if (targetReg == embMaskOp2Reg)
+                        {
+                            // Case# 2
+                            useAddend = false;
+                            std::swap(embMaskOp1Reg, embMaskOp3Reg);
+                            std::swap(embMaskOp1Reg, embMaskOp2Reg);
+                        }
+                        else if (targetReg == embMaskOp3Reg)
+                        {
+                            // Case# 3
+                            useAddend = false;
+                            std::swap(embMaskOp1Reg, embMaskOp3Reg);
+                        }
+                        else
+                        {
+                            // Case# 1
+                        }
 
-                        case NI_Sve_FusedMultiplyAddNegated:
-                            insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad;
-                            break;
+                        switch (intrinEmbMask.id)
+                        {
+                            case NI_Sve_FusedMultiplyAdd:
+                                insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad;
+                                break;
 
-                        case NI_Sve_FusedMultiplySubtract:
-                            insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb;
-                            break;
+                            case NI_Sve_FusedMultiplyAddNegated:
+                                insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad;
+                                break;
 
-                        case NI_Sve_FusedMultiplySubtractNegated:
-                            insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb;
-                            break;
+                            case NI_Sve_FusedMultiplySubtract:
+                                insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb;
+                                break;
 
-                        case NI_Sve_MultiplyAdd:
-                            insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad;
-                            break;
+                            case NI_Sve_FusedMultiplySubtractNegated:
+                                insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb;
+                                break;
 
-                        case NI_Sve_MultiplySubtract:
-                            insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb;
-                            break;
+                            case NI_Sve_MultiplyAdd:
+                                insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad;
+                                break;
 
-                        default:
-                            unreached();
+                            case NI_Sve_MultiplySubtract:
+                                insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb;
+                                break;
+
+                            default:
+                                unreached();
+                        }
                     }
 
                     if (intrin.op3->IsVectorZero())
@@ -811,7 +814,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
                         assert(targetReg != embMaskOp2Reg);
                         assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet());
-                        GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
+                        GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg,
+                                                       opt);
                     }
                     else
                     {
@@ -851,8 +855,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                                 // into the targetReg using `sel`. Since this is RMW, the active lanes should
                                 // have the value from embMaskOp1Reg
 
-                                GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, embMaskOp1Reg,
-                                                              falseReg, opt);
+                                GetEmitter()->emitInsSve_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg,
+                                                                 embMaskOp1Reg, falseReg, opt);
                             }
                         }
                         else if (targetReg != embMaskOp1Reg)
@@ -860,14 +864,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                             // If target already contains the values of `falseReg`, just merge the lanes from
                             // `embMaskOp1Reg`, again because this is RMW semantics.
 
-                            GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg,
-                                                        opt, INS_SCALABLE_OPTS_PREDICATE_MERGE);
+                            GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg,
+                                                           opt, INS_SCALABLE_OPTS_PREDICATE_MERGE);
                         }
                     }
 
                     // Finally, perform the desired operation.
-                    GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
-                                                  embMaskOp3Reg, opt);
+                    if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id))
+                    {
+                        HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
+                        for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
+                        {
+                            GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                             helper.ImmValue(), opt);
+                        }
+                    }
+                    else
+                    {
+                        GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                         embMaskOp3Reg, opt);
+                    }
 
                     break;
                 }
diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h
index ab78d2f0f1af6..59cc14902d5d1 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64sve.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h
@@ -24,6 +24,7 @@ HARDWARE_INTRINSIC(Sve,           AbsoluteCompareLessThanOrEqual,
 HARDWARE_INTRINSIC(Sve,           AbsoluteDifference,                                               -1,     -1,      false, {INS_sve_sabd,       INS_sve_uabd,       INS_sve_sabd,       INS_sve_uabd,       INS_sve_sabd,       INS_sve_uabd,       INS_sve_sabd,       INS_sve_uabd,       INS_sve_fabd,       INS_sve_fabd},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           Add,                                                              -1,     -1,      false, {INS_sve_add,        INS_sve_add,        INS_sve_add,        INS_sve_add,        INS_sve_add,        INS_sve_add,        INS_sve_add,        INS_sve_add,        INS_sve_fadd,       INS_sve_fadd},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           AddAcross,                                                        -1,      1,      true,  {INS_sve_saddv,      INS_sve_uaddv,      INS_sve_saddv,      INS_sve_uaddv,      INS_sve_saddv,      INS_sve_uaddv,      INS_sve_uaddv,      INS_sve_uaddv,      INS_sve_faddv,      INS_sve_faddv},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
+HARDWARE_INTRINSIC(Sve,           AddRotateComplex,                                                 -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fcadd,      INS_sve_fcadd},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
 HARDWARE_INTRINSIC(Sve,           AddSaturate,                                                      -1,      2,      true,  {INS_sve_sqadd,      INS_sve_uqadd,      INS_sve_sqadd,      INS_sve_uqadd,      INS_sve_sqadd,      INS_sve_uqadd,      INS_sve_sqadd,      INS_sve_uqadd,      INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           AddSequentialAcross,                                              -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fadda,      INS_sve_fadda},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(Sve,           And,                                                              -1,     -1,      false, {INS_sve_and,        INS_sve_and,        INS_sve_and,        INS_sve_and,        INS_sve_and,        INS_sve_and,        INS_sve_and,        INS_sve_and,        INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index b04e97844f1f2..2ec3137cb573d 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -3434,6 +3434,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
             case NI_Sve_PrefetchInt32:
             case NI_Sve_PrefetchInt64:
             case NI_Sve_ExtractVector:
+            case NI_Sve_AddRotateComplex:
                 assert(hasImmediateOperand);
                 assert(varTypeIsIntegral(intrin.op3));
                 if (intrin.op3->IsCnsIntOrI())
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
index bfb5314bb97a7..19be11cd6c79e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
@@ -347,6 +347,26 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector<ulong> AddAcross(Vector<ulong> value) { throw new PlatformNotSupportedException(); }
 
+        ///  Complex add with rotate
+
+        /// <summary>
+        /// svfloat64_t svcadd[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation
+        /// svfloat64_t svcadd[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation
+        /// svfloat64_t svcadd[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<double> AddRotateComplex(Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svfloat32_t svcadd[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation
+        /// svfloat32_t svcadd[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation
+        /// svfloat32_t svcadd[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<float> AddRotateComplex(Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); }
+
         ///  AddSaturate : Saturating add
 
         /// <summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
index a0b8086b67992..2b678fbdeb41c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
@@ -377,6 +377,26 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector<ulong> AddAcross(Vector<ulong> value) => AddAcross(value);
 
+        ///  Complex add with rotate
+
+        /// <summary>
+        /// svfloat64_t svcadd[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation
+        /// svfloat64_t svcadd[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation
+        /// svfloat64_t svcadd[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<double> AddRotateComplex(Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) => AddRotateComplex(left, right, rotation);
+
+        /// <summary>
+        /// svfloat32_t svcadd[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation
+        /// svfloat32_t svcadd[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation)
+        ///   FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation
+        /// svfloat32_t svcadd[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<float> AddRotateComplex(Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) => AddRotateComplex(left, right, rotation);
+
         ///  AddSaturate : Saturating add
 
         /// <summary>
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index 1afa8c2c7f750..3bd94b13edb07 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -4364,6 +4364,9 @@ internal Arm64() { }
         public static System.Numerics.Vector<ulong> AddAcross(System.Numerics.Vector<uint> value) { throw null; }
         public static System.Numerics.Vector<ulong> AddAcross(System.Numerics.Vector<ulong> value) { throw null; }
 
+        public static System.Numerics.Vector<double> AddRotateComplex(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw null; }
+        public static System.Numerics.Vector<float> AddRotateComplex(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw null; }
+
         public static System.Numerics.Vector<byte> AddSaturate(System.Numerics.Vector<byte> left, System.Numerics.Vector<byte> right) { throw null; }
         public static System.Numerics.Vector<short> AddSaturate(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
         public static System.Numerics.Vector<int> AddSaturate(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }
diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
index 6e548bd1926db..3076b270cad15 100644
--- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
+++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
@@ -280,6 +280,7 @@
     ("_SveBinaryOpDifferentTypesTestTemplate.template",  "SveVecBinOpDifferentTypesTest.template",      new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveBinaryMaskOpTestTemplate.template",            "SveMaskVecBinOpConvertTest.template",         new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveImmBinaryOpTestTemplate.template",             "SveVecImmBinOpTest.template",                 new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }),
+    ("_SveImmBinaryOpTestTemplate.template",             "SveVecImmBinOpVecTest.template",              new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic,       ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }),
     ("_SveImmUnaryOpTestTemplate.template",              "SveVecImmUnOpTest.template",                  new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveTernOpTestTemplate.template",                  "SveVecTernOpTest.template",                   new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveTernOpTestTemplate.template",                  "SveVecTernOpVecTest.template",                new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic,       ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }),
@@ -3113,6 +3114,11 @@
     ("SveVecReduceUnOpTest.template",     new Dictionary<string, string> { ["TestName"] = "Sve_AddAcross_ulong_uint",                                                                              ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddAcross",                                                            ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt32",                                                                                                                           ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",                                                                                                                                                      ["ValidateReduceOpResult"] = "Helpers.AddAcrossWidening(firstOp) != result[0]",                                                                                                                   ["ValidateRemainingResults"] = "result[i] != 0"}),
     ("SveVecReduceUnOpTest.template",     new Dictionary<string, string> { ["TestName"] = "Sve_AddAcross_ulong",                                                                                   ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddAcross",                                                            ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt64",                                                                                                                           ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",                                                                                                                                                      ["ValidateReduceOpResult"] = "Helpers.AddAcross(firstOp) != result[0]",                                                                                                                           ["ValidateRemainingResults"] = "result[i] != 0"}),
 
+    ("SveVecImmBinOpVecTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_AddRotateComplex_float_0",                                                                          ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddRotateComplex",                                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "0", ["InvalidImm"] = "2",                   ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}),
+    ("SveVecImmBinOpVecTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_AddRotateComplex_float_1",                                                                          ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddRotateComplex",                                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "2",                   ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}),
+    ("SveVecImmBinOpVecTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_AddRotateComplex_double_0",                                                                         ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddRotateComplex",                                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2",                   ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}),
+    ("SveVecImmBinOpVecTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_AddRotateComplex_double_1",                                                                         ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddRotateComplex",                                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "1", ["InvalidImm"] = "2",                   ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}),
+
     ("SveVecBinOpTest.template",          new Dictionary<string, string> { ["TestName"] = "Sve_AddSaturate_sbyte",                                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddSaturate",                                                                  ["RetVectorType"] = "Vector",    ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "(sbyte)TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                         ["ValidateIterResult"] = "Helpers.AddSaturate(left[i], right[i]) != result[i]",        ["GetIterResult"] = "Helpers.AddSaturate(left[i], right[i])"}),
     ("SveVecBinOpTest.template",          new Dictionary<string, string> { ["TestName"] = "Sve_AddSaturate_short",                                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddSaturate",                                                                  ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Int16",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Int16",                                                               ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "(short)TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                                                         ["ValidateIterResult"] = "Helpers.AddSaturate(left[i], right[i]) != result[i]",        ["GetIterResult"] = "Helpers.AddSaturate(left[i], right[i])"}),
     ("SveVecBinOpTest.template",          new Dictionary<string, string> { ["TestName"] = "Sve_AddSaturate_int",                                                                                   ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "AddSaturate",                                                                  ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Int32",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Int32",                                                               ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",        ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                                                         ["ValidateIterResult"] = "Helpers.AddSaturate(left[i], right[i]) != result[i]",        ["GetIterResult"] = "Helpers.AddSaturate(left[i], right[i])"}),
@@ -3850,8 +3856,8 @@
     ("SveVecTernOpTest.template",           new Dictionary<string, string> { ["TestName"] = "Sve_MultiplyAdd_uint",                                                                                  ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAdd",                                                          ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}),
     ("SveVecTernOpTest.template",           new Dictionary<string, string> { ["TestName"] = "Sve_MultiplyAdd_ulong",                                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAdd",                                                          ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}),
 
-    ("SveVecImmBinOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_MultiplyBySelectedScalar_float",                                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyBySelectedScalar",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",   ["Op3VectorType"] = "Vector",   ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",      ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",        ["Imm"] = "1",  ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
-    ("SveVecImmBinOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_MultiplyBySelectedScalar_double",                                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyBySelectedScalar",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",   ["Op3VectorType"] = "Vector",   ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",       ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",       ["Imm"] = "0",  ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
+    ("SveVecImmBinOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_MultiplyBySelectedScalar_float",                                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyBySelectedScalar",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",   ["Op3VectorType"] = "Vector",   ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",      ["NextValueMask"] = "Helpers.getMaskSingle()",  ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
+    ("SveVecImmBinOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_MultiplyBySelectedScalar_double",                                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyBySelectedScalar",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",   ["Op3VectorType"] = "Vector",   ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",      ["NextValueMask"] = "Helpers.getMaskDouble()",  ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
 
     ("SveVecBinOpTest.template",     new Dictionary<string, string> { ["TestName"] = "Sve_MultiplyExtended_float",                                                                                ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyExtended",                                                               ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                           ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",       ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                              ["ValidateIterResult"] = "Helpers.MultiplyExtended(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyExtended(left[i], right[i])"}),
     ("SveVecBinOpTest.template",     new Dictionary<string, string> { ["TestName"] = "Sve_MultiplyExtended_double",                                                                               ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyExtended",                                                               ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                           ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",       ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                              ["ValidateIterResult"] = "Helpers.MultiplyExtended(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyExtended(left[i], right[i])"}),
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
index 95619d36dfe3b..0efc53fc9e7b2 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
@@ -6030,6 +6030,25 @@ private static ulong Pairwise(Func<ulong, ulong, ulong> pairOp, ulong[] op1, ulo
 
         public static float AddPairwise(float[] op1, float[] op2, int i) => Pairwise(Add, op1, op2, i);
 
+        public static float[] AddRotateComplex(float[] op1, float[] op2, byte rot)
+        {
+            for (int i = 0; i < op1.Length; i += 2)
+            {
+                if (rot == 0)
+                {
+                    op1[i] -= op2[i + 1];
+                    op1[i + 1] += op2[i];
+                }
+                else
+                {
+                    op1[i] += op2[i + 1];
+                    op1[i + 1] -= op2[i];
+                }
+            }
+
+            return op1;
+        }
+
         public static float Max(float op1, float op2) => Math.Max(op1, op2);
 
         public static float MaxPairwise(float[] op1, int i) => Pairwise(Max, op1, i);
@@ -6080,6 +6099,25 @@ private static float Pairwise(Func<float, float, float> pairOp, float[] op1, flo
 
         public static double AddPairwise(double[] op1, double[] op2, int i) => Pairwise(Add, op1, op2, i);
 
+        public static double[] AddRotateComplex(double[] op1, double[] op2, byte rot)
+        {
+            for (int i = 0; i < op1.Length; i += 2)
+            {
+                if (rot == 0)
+                {
+                    op1[i] -= op2[i + 1];
+                    op1[i + 1] += op2[i];
+                }
+                else
+                {
+                    op1[i] += op2[i + 1];
+                    op1[i + 1] -= op2[i];
+                }
+            }
+
+            return op1;
+        }
+
         public static double Max(double op1, double op2) => Math.Max(op1, op2);
 
         public static double MaxPairwise(double[] op1, int i) => Pairwise(Max, op1, i);
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template
index 33d9c59e8dde1..27e0221bc83eb 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template
@@ -9,6 +9,7 @@
  ******************************************************************************/
 
 using System;
+using System.Linq;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
@@ -62,6 +63,9 @@ namespace JIT.HardwareIntrinsics.Arm
 
                 // Validates executing the test inside conditional, with op3 as zero
                 test.ConditionalSelect_ZeroOp();
+
+                // Validates basic functionality fails with an invalid imm, using Unsafe.ReadUnaligned
+                test.RunBasicScenario_UnsafeRead_InvalidImm();
             }
             else
             {
@@ -142,7 +146,7 @@ namespace JIT.HardwareIntrinsics.Arm
 
                 for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
                 Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
-                for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; }
+                for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; }
                 Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
 
                 return testStruct;
@@ -170,7 +174,7 @@ namespace JIT.HardwareIntrinsics.Arm
         private {Op1VectorType}<{Op1BaseType}> _mask;
         private {Op1VectorType}<{Op1BaseType}> _fld1;
         private {Op1VectorType}<{Op1BaseType}> _fld2;
-	private {Op1VectorType}<{Op1BaseType}> _falseFld;
+        private {Op1VectorType}<{Op1BaseType}> _falseFld;
 
         private DataTable _dataTable;
 
@@ -178,16 +182,16 @@ namespace JIT.HardwareIntrinsics.Arm
         {
             Succeeded = true;
 
-            for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); }
+            for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueMask} % 2); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
             for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
-            for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());            
 
             for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
-            for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; }
             _dataTable = new DataTable(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize);
         }
 
@@ -209,6 +213,31 @@ namespace JIT.HardwareIntrinsics.Arm
             ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
+        public void RunBasicScenario_UnsafeRead_InvalidImm()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead_InvalidImm));
+
+            bool succeeded = false;
+            try
+            {
+                var result = {Isa}.{Method}(
+                    Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr),
+                    Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr),
+                    {InvalidImm}
+                );
+                Console.WriteLine(result);
+            }
+            catch (ArgumentOutOfRangeException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
         public void RunBasicScenario_Load()
         {
             TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

From d6a0afe01dad956327fa9b68054e5144a7d9ca63 Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 17:08:59 -0400
Subject: [PATCH 02/10] Add MultiplyAddRotateComplex; fix template

---
 src/coreclr/jit/codegenarm64test.cpp          |  6 +--
 src/coreclr/jit/emitarm64sve.cpp              |  6 +--
 src/coreclr/jit/hwintrinsicarm64.cpp          |  5 +++
 src/coreclr/jit/hwintrinsiccodegenarm64.cpp   | 41 +++++++++++++++++--
 src/coreclr/jit/hwintrinsiclistarm64sve.h     |  1 +
 src/coreclr/jit/lowerarmarch.cpp              |  1 +
 src/coreclr/jit/lsraarm64.cpp                 | 16 +++++++-
 .../Arm/Sve.PlatformNotSupported.cs           | 20 +++++++++
 .../src/System/Runtime/Intrinsics/Arm/Sve.cs  | 20 +++++++++
 .../ref/System.Runtime.Intrinsics.cs          |  3 ++
 .../GenerateHWIntrinsicTests_Arm.cs           | 18 ++++++--
 .../HardwareIntrinsics/Arm/Shared/Helpers.cs  | 40 ++++++++++++++++++
 .../Shared/_SveImmTernOpTestTemplate.template | 32 ++++++++++++++-
 13 files changed, 192 insertions(+), 17 deletions(-)

diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp
index 588c968846c9f..2bc3da3b9d7f8 100644
--- a/src/coreclr/jit/codegenarm64test.cpp
+++ b/src/coreclr/jit/codegenarm64test.cpp
@@ -8403,11 +8403,11 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     // IF_SVE_GT_4A
     theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P1, REG_V3, REG_V4, 0,
                                   INS_OPTS_SCALABLE_H); // FCMLA   <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
-    theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_P2, REG_V1, REG_V5, 90,
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_P2, REG_V1, REG_V5, 1,
                                   INS_OPTS_SCALABLE_S); // FCMLA   <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
-    theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 180,
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 2,
                                   INS_OPTS_SCALABLE_D); // FCMLA   <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
-    theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 270,
+    theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 3,
                                   INS_OPTS_SCALABLE_D); // FCMLA   <Zda>.<T>, <Pg>/M, <Zn>.<T>, <Zm>.<T>, <const>
 
     // IF_SVE_GI_4A
diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp
index 22cb131827e48..6fb6723eb6381 100644
--- a/src/coreclr/jit/emitarm64sve.cpp
+++ b/src/coreclr/jit/emitarm64sve.cpp
@@ -6989,7 +6989,7 @@ void emitter::emitInsSve_R_R_R_R_I(instruction ins,
             assert(isVectorRegister(reg3));
             assert(isVectorRegister(reg4));
             assert(isScalableVectorSize(size));
-            imm = emitEncodeRotationImm0_to_270(imm);
+            assert(emitIsValidEncodedRotationImm0_to_270(imm));
             fmt = IF_SVE_GT_4A;
             break;
 
@@ -9796,7 +9796,7 @@ void emitter::emitIns_PRFOP_R_R_I(instruction ins,
 
 /*static*/ bool emitter::emitIsValidEncodedRotationImm90_or_270(ssize_t imm)
 {
-    return (imm == 0) || (imm == 1);
+    return isValidUimm<1>(imm);
 }
 
 /************************************************************************
@@ -9865,7 +9865,7 @@ void emitter::emitIns_PRFOP_R_R_I(instruction ins,
 
 /*static*/ bool emitter::emitIsValidEncodedRotationImm0_to_270(ssize_t imm)
 {
-    return (imm >= 0) && (imm <= 3);
+    return isValidUimm<2>(imm);
 }
 
 /*****************************************************************************
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index d1e33b3bc00ab..2d494b65fd945 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -509,6 +509,11 @@ void HWIntrinsicInfo::lookupImmBounds(
                 immUpperBound = 1;
                 break;
 
+            case NI_Sve_MultiplyAddRotateComplex:
+                immLowerBound = 0;
+                immUpperBound = 3;
+                break;
+
             default:
                 unreached();
         }
diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
index 5bd8685cd28ad..595d95520d183 100644
--- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -484,6 +484,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
             switch (intrinEmbMask.numOperands)
             {
+                case 4:
+                    assert(intrinEmbMask.op4 != nullptr);
+                    assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id));
+                    assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id));
+                    FALLTHROUGH;
+
                 case 3:
                     assert(intrinEmbMask.op3 != nullptr);
                     embMaskOp3Reg = intrinEmbMask.op3->GetRegNum();
@@ -704,12 +710,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     break;
                 }
                 case 3:
+                    assert(!HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id) || (falseReg != embMaskOp3Reg));
+                    FALLTHROUGH;
+
+                case 4:
                 {
                     assert(instrIsRMW);
 
                     if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id))
                     {
-                        assert(falseReg != embMaskOp3Reg);
                         // For FMA, the operation we are trying to perform is:
                         //      result = op1 + (op2 * op3)
                         //
@@ -802,6 +811,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                                 insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb;
                                 break;
 
+                            case NI_Sve_MultiplyAddRotateComplex:
+                                assert(useAddend);
+                                break;
+
                             default:
                                 unreached();
                         }
@@ -870,7 +883,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     }
 
                     // Finally, perform the desired operation.
-                    if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id))
+                    const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id);
+
+                    if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id))
+                    {
+                        if (embHasImmediateOperand)
+                        {
+                            assert(intrinEmbMask.id == NI_Sve_MultiplyAddRotateComplex);
+                            HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic());
+                            for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
+                            {
+                                GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg,
+                                                                   embMaskOp2Reg, embMaskOp3Reg, helper.ImmValue(),
+                                                                   opt);
+                            }
+                        }
+                        else
+                        {
+                            GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                             embMaskOp3Reg, opt);
+                        }
+                    }
+                    else if (embHasImmediateOperand)
                     {
                         HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
                         for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
@@ -881,8 +915,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     }
                     else
                     {
-                        GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
-                                                         embMaskOp3Reg, opt);
+                        unreached();
                     }
 
                     break;
diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h
index 59cc14902d5d1..506fb10e7a7be 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64sve.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h
@@ -187,6 +187,7 @@ HARDWARE_INTRINSIC(Sve,           MinNumber,
 HARDWARE_INTRINSIC(Sve,           MinNumberAcross,                                                  -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fminnmv,    INS_sve_fminnmv}, HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           Multiply,                                                         -1,      2,      true,  {INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_fmul,       INS_sve_fmul},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           MultiplyAdd,                                                      -1,     -1,      false, {INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(Sve,           MultiplyAddRotateComplex,                                         -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fcmla,      INS_sve_fcmla},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_FmaIntrinsic)
 HARDWARE_INTRINSIC(Sve,           MultiplyBySelectedScalar,                                         -1,      3,      true,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fmul,       INS_sve_fmul},    HW_Category_SIMDByIndexedElement,  HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation)
 HARDWARE_INTRINSIC(Sve,           MultiplyExtended,                                                 -1,     -1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fmulx,      INS_sve_fmulx},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           MultiplySubtract,                                                 -1,     -1,      false, {INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index 2ec3137cb573d..c28aaf3064d0b 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -3638,6 +3638,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 
             case NI_Sve_FusedMultiplyAddBySelectedScalar:
             case NI_Sve_FusedMultiplySubtractBySelectedScalar:
+            case NI_Sve_MultiplyAddRotateComplex:
                 assert(hasImmediateOperand);
                 assert(varTypeIsIntegral(intrin.op4));
                 if (intrin.op4->IsCnsIntOrI())
diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
index d1fb48fc16d93..7a0ee39cc99f4 100644
--- a/src/coreclr/jit/lsraarm64.cpp
+++ b/src/coreclr/jit/lsraarm64.cpp
@@ -1875,8 +1875,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
 
         if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id))
         {
+            const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmb.id);
             assert(embOp2Node->isRMWHWIntrinsic(compiler));
-            assert(numArgs == 3);
+            assert((numArgs == 3) || (embHasImmediateOperand && (numArgs == 4)));
 
             LIR::Use use;
             GenTree* user = nullptr;
@@ -1915,6 +1916,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
             srcCount += 1;
             srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
             srcCount += BuildDelayFreeUses(emitOp3, emitOp1);
+
+            if (embHasImmediateOperand)
+            {
+                assert(numArgs == 4);
+                srcCount += BuildDelayFreeUses(intrinEmb.op4, emitOp1);
+                if (!embOp2Node->Op(4)->isContainedIntOrIImmed())
+                {
+                    buildInternalIntRegisterDefForNode(embOp2Node);
+                }
+            }
+
             srcCount += BuildDelayFreeUses(intrin.op3, emitOp1);
         }
         else
@@ -1926,7 +1938,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
             // that encodes the immediate
             if (intrinEmb.id == NI_Sve_ShiftRightArithmeticForDivide)
             {
-                assert(embOp2Node->GetOperandCount() == 2);
+                assert(numArgs == 2);
                 if (!embOp2Node->Op(2)->isContainedIntOrIImmed())
                 {
                     buildInternalIntRegisterDefForNode(embOp2Node);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
index 19be11cd6c79e..2d5ee333213c4 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
@@ -5737,6 +5737,26 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector<ulong> MultiplyAdd(Vector<ulong> addend, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
 
+        ///  Complex multiply-add with rotate
+
+        /// <summary>
+        /// svfloat64_t svcmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation
+        /// svfloat64_t svcmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation
+        /// svfloat64_t svcmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<double> MultiplyAddRotateComplex(Vector<double> addend, Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svfloat32_t svcmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation
+        /// svfloat32_t svcmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation
+        /// svfloat32_t svcmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<float> MultiplyAddRotateComplex(Vector<float> addend, Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw new PlatformNotSupportedException(); }
+
         ///  MultiplyBySelectedScalar : Multiply
 
         /// <summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
index 2b678fbdeb41c..21b62f267546b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
@@ -5793,6 +5793,26 @@ internal Arm64() { }
         /// </summary>
         public static unsafe Vector<ulong> MultiplyAdd(Vector<ulong> addend, Vector<ulong> left, Vector<ulong> right) => MultiplyAdd(addend, left, right);
 
+        ///  Complex multiply-add with rotate
+
+        /// <summary>
+        /// svfloat64_t svcmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation
+        /// svfloat64_t svcmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation
+        /// svfloat64_t svcmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<double> MultiplyAddRotateComplex(Vector<double> addend, Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) => MultiplyAddRotateComplex(addend, left, right, rotation);
+
+        /// <summary>
+        /// svfloat32_t svcmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation
+        /// svfloat32_t svcmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation)
+        ///   FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation
+        /// svfloat32_t svcmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation)
+        /// </summary>
+        public static unsafe Vector<float> MultiplyAddRotateComplex(Vector<float> addend, Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) => MultiplyAddRotateComplex(addend, left, right, rotation);
+
         ///  MultiplyBySelectedScalar : Multiply
 
         /// <summary>
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index 3bd94b13edb07..7225bf11491a6 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -5181,6 +5181,9 @@ internal Arm64() { }
         public static System.Numerics.Vector<uint> MultiplyAdd(System.Numerics.Vector<uint> addend, System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }
         public static System.Numerics.Vector<ulong> MultiplyAdd(System.Numerics.Vector<ulong> addend, System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }
 
+        public static System.Numerics.Vector<double> MultiplyAddRotateComplex(System.Numerics.Vector<double> addend, System.Numerics.Vector<double> left, System.Numerics.Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
+        public static System.Numerics.Vector<float> MultiplyAddRotateComplex(System.Numerics.Vector<float> addend, System.Numerics.Vector<float> left, System.Numerics.Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; }
+
         public static System.Numerics.Vector<double> MultiplyBySelectedScalar(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right, [ConstantExpected] byte rightIndex) { throw null; }
         public static System.Numerics.Vector<float> MultiplyBySelectedScalar(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right, [ConstantExpected] byte rightIndex) { throw null; }
 
diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
index 3076b270cad15..bd2b2f2a5be2a 100644
--- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
+++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
@@ -286,6 +286,7 @@
     ("_SveTernOpTestTemplate.template",                  "SveVecTernOpVecTest.template",                new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic,       ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }),
     ("_SveTernOpFirstArgTestTemplate.template",          "SveVecTernOpFirstArgTest.template",           new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveImmTernOpTestTemplate.template",               "SveVecImmTernOpTest.template",                new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }),
+    ("_SveImmTernOpTestTemplate.template",               "SveVecImmTernOpVecTest.template",             new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic,       ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }),
     ("_SveTernOpMaskedOpTestTemplate.template",          "SveVecTernOpMaskedTest.template",             new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveImmTernOpFirstArgTestTemplate.template",       "SveVecImmTernOpFirstArgTest.template",        new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic,             ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }),
     ("_SveScalarTernOpTestTemplate.template",            "SveScalarTernOpTest.template",                new Dictionary<string, string> { ["TemplateName"] = "Simple",     ["TemplateValidationLogic"] = SimpleScalarOpTest_ValidationLogic }),
@@ -3370,8 +3371,8 @@
     ("SveVecTernOpTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_FusedMultiplyAdd_float",                                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAdd",                                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",    ["Op2BaseType"] = "Single",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                                 ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecTernOpTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_FusedMultiplyAdd_double",                                                                                ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAdd",                                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",    ["Op2BaseType"] = "Double",    ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "8",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",  ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",  ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                                 ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
 
-    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float",                                                                  ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAddBySelectedScalar",                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",       ["Imm"] = "1",  ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
-    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double",                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAddBySelectedScalar",                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",       ["Imm"] = "0",  ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
+    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float",                                                                  ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAddBySelectedScalar",                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
+    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double",                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAddBySelectedScalar",                                     ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
 
     ("SveVecTernOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplyAddNegated_float",                                                                           ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAddNegated",                                              ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",    ["Op2BaseType"] = "Single",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                                 ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecTernOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplyAddNegated_double",                                                                          ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplyAddNegated",                                              ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",    ["Op2BaseType"] = "Double",    ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",  ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                                 ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
@@ -3379,8 +3380,8 @@
     ("SveVecTernOpTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_FusedMultiplySubtract_float",                                                                            ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtract",                                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",    ["Op2BaseType"] = "Single",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                                 ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecTernOpTest.template",    new Dictionary<string, string> { ["TestName"] = "Sve_FusedMultiplySubtract_double",                                                                           ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtract",                                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",    ["Op2BaseType"] = "Double",    ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",  ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",  ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                                 ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
 
-    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float",                                                             ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtractBySelectedScalar",                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",       ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
-    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double",                                                            ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtractBySelectedScalar",                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",       ["Imm"] = "0",  ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
+    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float",                                                             ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtractBySelectedScalar",                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
+    ("SveVecImmTernOpTest.template", new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double",                                                            ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtractBySelectedScalar",                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",  ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()",  ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
 
     ("SveVecTernOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplySubtractNegated_float",                                                                      ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtractNegated",                                         ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",    ["Op2BaseType"] = "Single",    ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",  ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()",                                 ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecTernOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_FusedMultiplySubtractNegated_double",                                                                     ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "FusedMultiplySubtractNegated",                                         ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",    ["Op2BaseType"] = "Double",    ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",   ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()",                               ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
@@ -3856,6 +3857,15 @@
     ("SveVecTernOpTest.template",           new Dictionary<string, string> { ["TestName"] = "Sve_MultiplyAdd_uint",                                                                                  ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAdd",                                                          ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt32",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()",                        ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}),
     ("SveVecTernOpTest.template",           new Dictionary<string, string> { ["TestName"] = "Sve_MultiplyAdd_ulong",                                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAdd",                                                          ["RetVectorType"] = "Vector",    ["RetBaseType"] = "UInt64",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",        ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",        ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()",                        ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}),
 
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_float_0",                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "0", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_float_1",                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_float_2",                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "2", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_float_3",                                                                 ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "3", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_double_0",                                                                ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_double_1",                                                                ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_double_2",                                                                ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "2", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+    ("SveVecImmTernOpVecTest.template",     new Dictionary<string, string> {["TestName"] = "Sve_MultiplyAddRotateComplex_double_3",                                                                ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyAddRotateComplex",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector",    ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "3", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}),
+
     ("SveVecImmBinOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_MultiplyBySelectedScalar_float",                                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyBySelectedScalar",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",   ["Op3VectorType"] = "Vector",   ["Op3BaseType"] = "Single",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",      ["NextValueMask"] = "Helpers.getMaskSingle()",  ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecImmBinOpTest.template",    new Dictionary<string, string> {["TestName"] = "Sve_MultiplyBySelectedScalar_double",                                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "MultiplyBySelectedScalar",                                             ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",   ["Op3VectorType"] = "Vector",   ["Op3BaseType"] = "Double",  ["LargestVectorSize"] = "64",  ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",      ["NextValueMask"] = "Helpers.getMaskDouble()",  ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
 
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
index 0efc53fc9e7b2..57b5891c265d9 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
@@ -5234,6 +5234,26 @@ private static (ulong val, bool ovf) ShiftOvf(ulong value, int shift)
 
         public static float MinNumberPairwise(float[] op1, float[] op2, int i) => Pairwise(MinNumber, op1, op2, i);
 
+        public static float[] MultiplyAddRotateComplex(float[] op1, float[] op2, float[] op3, byte imm)
+        {
+            for (int i = 0; i < op1.Length; i += 2)
+            {
+                (float ans1, float ans2) = imm switch
+                {
+                    0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])),
+                    1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])),
+                    2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])),
+                    3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])),
+                    _ => (0.0f, 0.0f)
+                };
+
+                op1[i] = ans1;
+                op1[i + 1] = ans2;
+            }
+
+            return op1;
+        }
+
         public static float MultiplyExtended(float op1, float op2)
         {
             bool inf1 = float.IsInfinity(op1);
@@ -5384,6 +5404,26 @@ public static float FPExponentialAccelerator(uint op1)
 
         public static double MinNumberPairwise(double[] op1, double[] op2, int i) => Pairwise(MinNumber, op1, op2, i);
 
+        public static double[] MultiplyAddRotateComplex(double[] op1, double[] op2, double[] op3, byte imm)
+        {
+            for (int i = 0; i < op1.Length; i += 2)
+            {
+                (double ans1, double ans2) = imm switch
+                {
+                    0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])),
+                    1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])),
+                    2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])),
+                    3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])),
+                    _ => (0.0, 0.0)
+                };
+
+                op1[i] = ans1;
+                op1[i + 1] = ans2;
+            }
+
+            return op1;
+        }
+
         public static double MultiplyExtended(double op1, double op2)
         {
             bool inf1 = double.IsInfinity(op1);
diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template
index c1bcca1eb32c6..9ce3a334f7d7a 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template
@@ -9,6 +9,7 @@
  ******************************************************************************/
 
 using System;
+using System.Linq;
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
@@ -65,6 +66,9 @@ namespace JIT.HardwareIntrinsics.Arm
 
                 // Validates executing the test inside conditional, with op3 as zero
                 test.ConditionalSelect_ZeroOp();
+
+                // Validates basic functionality fails with an invalid imm, using Unsafe.ReadUnaligned
+                test.RunBasicScenario_UnsafeRead_InvalidImm();
             }
             else
             {
@@ -194,7 +198,7 @@ namespace JIT.HardwareIntrinsics.Arm
         {
             Succeeded = true;
 
-            for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); }
+            for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueMask} % 2); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
             for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
@@ -229,6 +233,32 @@ namespace JIT.HardwareIntrinsics.Arm
             ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
         }
 
+        public void RunBasicScenario_UnsafeRead_InvalidImm()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead_InvalidImm));
+
+            bool succeeded = false;
+            try
+            {
+                var result = {Isa}.{Method}(
+                    Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr),
+                    Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr),
+                    Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr),
+                    {InvalidImm}
+                );
+                Console.WriteLine(result);
+            }
+            catch (ArgumentOutOfRangeException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
         public void RunBasicScenario_Load()
         {
             TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

From aecfb4f4460e3765dc6c71605202ba5e02916302 Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 17:32:32 -0400
Subject: [PATCH 03/10] Fix unit tests

---
 src/coreclr/jit/codegenarm64test.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp
index 2bc3da3b9d7f8..79a5f82754c8c 100644
--- a/src/coreclr/jit/codegenarm64test.cpp
+++ b/src/coreclr/jit/codegenarm64test.cpp
@@ -6096,6 +6096,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()
     theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_SCALABLE, REG_V11, SVE_PATTERN_ALL, 16,
                                     INS_OPTS_SCALABLE_S); // UQINCW <Zdn>.S{, <pattern>{, MUL #<imm>}}
 
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
     // IF_SVE_BQ_2A
     theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, INS_OPTS_SCALABLE_B,
                               INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
@@ -6105,6 +6106,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()
                               INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
     theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, INS_OPTS_SCALABLE_B,
                               INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
 
     // IF_SVE_BQ_2B
     theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0,

From f1841cad6bff54bb48fc180930c78483d2347211 Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 17:34:34 -0400
Subject: [PATCH 04/10] Update trig tests

---
 .../GenerateHWIntrinsicTests_Arm.cs              | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
index e443fa2713722..2abaf62dcd680 100644
--- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
+++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
@@ -4155,14 +4155,14 @@
     ("SveSimpleVecOpTest.template",                  new Dictionary<string, string> { ["TestName"] = "Sve_Sqrt_float",                                                                             ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Sqrt",                                                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",                                                                                                                                 ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",                                                                                                                                                  ["ValidateIterResult"] = "result[i] != Helpers.Sqrt(firstOp[i])", ["GetIterResult"] = "Helpers.Sqrt(leftOp[i])"}),
     ("SveSimpleVecOpTest.template",                  new Dictionary<string, string> { ["TestName"] = "Sve_Sqrt_double",                                                                            ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Sqrt",                                                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",                                                                                                                                 ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",                                                                                                                                                  ["ValidateIterResult"] = "result[i] != Helpers.Sqrt(firstOp[i])", ["GetIterResult"] = "Helpers.Sqrt(leftOp[i])"}),
 
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_0",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "0", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_2",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "2", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_4",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "4", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_6",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "6", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_1",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "1", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_3",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "3", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_5",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "5", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
-    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_7",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "7", ["InvalidImm"] = "8",         ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_0",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "0", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_2",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "2", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_4",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "4", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_6",                                                        ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "6", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_1",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "1", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_3",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "3", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_5",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "5", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_7",                                                       ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "7", ["InvalidImm"] = "8",    ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}),
 
     ("SveVecTernOpMaskedTest.template",   new Dictionary<string, string> { ["TestName"] = "Sve_Splice_float",                                                                                      ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Splice",                                                                    ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                        ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)",  ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecTernOpMaskedTest.template",   new Dictionary<string, string> { ["TestName"] = "Sve_Splice_double",                                                                                     ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Splice",                                                                    ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                        ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)",  ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),

From e5ccd78c4cbb1324292f1cbb6762fd2d6475328e Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 18:30:33 -0400
Subject: [PATCH 05/10] style

---
 src/coreclr/jit/codegenarm64test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp
index 79a5f82754c8c..e136de5a07595 100644
--- a/src/coreclr/jit/codegenarm64test.cpp
+++ b/src/coreclr/jit/codegenarm64test.cpp
@@ -6106,7 +6106,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()
                               INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
     theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, INS_OPTS_SCALABLE_B,
                               INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT <Zd>.B, {<Zn1>.B, <Zn2>.B }, #<imm>
-#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
+#endif                                                             // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED
 
     // IF_SVE_BQ_2B
     theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0,

From 9aa87551f2d48b2b5dac18e108327d08be1144cf Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 19:40:43 -0400
Subject: [PATCH 06/10] Remote FMA intrin flag

---
 src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 37 ++++++++-------------
 src/coreclr/jit/hwintrinsiclistarm64sve.h   |  2 +-
 src/coreclr/jit/lsraarm64.cpp               | 37 +++++++++++++++------
 3 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
index 026b9084f7f7d..5b0639223b751 100644
--- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -486,7 +486,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             {
                 case 4:
                     assert(intrinEmbMask.op4 != nullptr);
-                    assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id));
                     assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id));
                     FALLTHROUGH;
 
@@ -709,16 +708,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
                     break;
                 }
-                case 3:
-                    assert(!HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id) || (falseReg != embMaskOp3Reg));
-                    FALLTHROUGH;
 
+                case 3:
                 case 4:
                 {
                     assert(instrIsRMW);
 
                     if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id))
                     {
+                        assert(falseReg != embMaskOp3Reg);
                         // For FMA, the operation we are trying to perform is:
                         //      result = op1 + (op2 * op3)
                         //
@@ -811,10 +809,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                                 insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb;
                                 break;
 
-                            case NI_Sve_MultiplyAddRotateComplex:
-                                assert(useAddend);
-                                break;
-
                             default:
                                 unreached();
                         }
@@ -883,11 +877,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     }
 
                     // Finally, perform the desired operation.
-                    const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id);
-
-                    if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id))
+                    if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id))
                     {
-                        if (embHasImmediateOperand)
+                        if (intrinEmbMask.numOperands == 4)
                         {
                             assert(intrinEmbMask.id == NI_Sve_MultiplyAddRotateComplex);
                             HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic());
@@ -900,22 +892,19 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                         }
                         else
                         {
-                            GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
-                                                             embMaskOp3Reg, opt);
-                        }
-                    }
-                    else if (embHasImmediateOperand)
-                    {
-                        HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
-                        for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
-                        {
-                            GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
-                                                             helper.ImmValue(), opt);
+                            HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
+                            for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
+                            {
+                                GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg,
+                                                                 embMaskOp2Reg, helper.ImmValue(), opt);
+                            }
                         }
                     }
                     else
                     {
-                        unreached();
+                        assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id));
+                        GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                         embMaskOp3Reg, opt);
                     }
 
                     break;
diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h
index dedda44081bcf..c7df23557516e 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64sve.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h
@@ -187,7 +187,7 @@ HARDWARE_INTRINSIC(Sve,           MinNumber,
 HARDWARE_INTRINSIC(Sve,           MinNumberAcross,                                                  -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fminnmv,    INS_sve_fminnmv}, HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           Multiply,                                                         -1,      2,      true,  {INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_mul,        INS_sve_fmul,       INS_sve_fmul},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           MultiplyAdd,                                                      -1,     -1,      false, {INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_sve_mla,        INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(Sve,           MultiplyAddRotateComplex,                                         -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fcmla,      INS_sve_fcmla},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_FmaIntrinsic)
+HARDWARE_INTRINSIC(Sve,           MultiplyAddRotateComplex,                                         -1,      -1,     false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fcmla,      INS_sve_fcmla},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
 HARDWARE_INTRINSIC(Sve,           MultiplyBySelectedScalar,                                         -1,      3,      true,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fmul,       INS_sve_fmul},    HW_Category_SIMDByIndexedElement,  HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation)
 HARDWARE_INTRINSIC(Sve,           MultiplyExtended,                                                 -1,     -1,      false, {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_fmulx,      INS_sve_fmulx},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
 HARDWARE_INTRINSIC(Sve,           MultiplySubtract,                                                 -1,     -1,      false, {INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_sve_mls,        INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
index 0c0c339694a15..f542cf7357912 100644
--- a/src/coreclr/jit/lsraarm64.cpp
+++ b/src/coreclr/jit/lsraarm64.cpp
@@ -1876,9 +1876,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
 
         if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id))
         {
-            const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmb.id);
             assert(embOp2Node->isRMWHWIntrinsic(compiler));
-            assert((numArgs == 3) || (embHasImmediateOperand && (numArgs == 4)));
+            assert(numArgs == 3);
 
             LIR::Use use;
             GenTree* user = nullptr;
@@ -1918,7 +1917,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
             srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
             srcCount += BuildDelayFreeUses(emitOp3, emitOp1);
 
-            if (embHasImmediateOperand)
             {
                 assert(numArgs == 4);
                 srcCount += BuildDelayFreeUses(intrinEmb.op4, emitOp1);
@@ -1932,18 +1930,35 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
         }
         else
         {
-            assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3));
+            const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmb.id);
+            assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3) || (embHasImmediateOperand && (numArgs == 4)));
 
-            // Special handling for ShiftRightArithmeticForDivide:
+            // Special handling for embedded intrinsics with immediates:
             // We might need an additional register to hold branch targets into the switch table
             // that encodes the immediate
-            if (intrinEmb.id == NI_Sve_ShiftRightArithmeticForDivide)
+            switch (intrinEmb.id)
             {
-                assert(numArgs == 2);
-                if (!embOp2Node->Op(2)->isContainedIntOrIImmed())
-                {
-                    buildInternalIntRegisterDefForNode(embOp2Node);
-                }
+                case NI_Sve_ShiftRightArithmeticForDivide:
+                    assert(embHasImmediateOperand);
+                    assert(numArgs == 2);
+                    if (!embOp2Node->Op(2)->isContainedIntOrIImmed())
+                    {
+                        buildInternalIntRegisterDefForNode(embOp2Node);
+                    }
+                    break;
+
+                case NI_Sve_MultiplyAddRotateComplex:
+                    assert(embHasImmediateOperand);
+                    assert(numArgs == 4);
+                    if (!embOp2Node->Op(4)->isContainedIntOrIImmed())
+                    {
+                        buildInternalIntRegisterDefForNode(embOp2Node);
+                    }
+                    break;
+
+                default:
+                    assert(!embHasImmediateOperand);
+                    break;
             }
 
             tgtPrefUse = BuildUse(embOp2Node->Op(1));

From c8bce7af045ff2684589a1eb897787cde995ddf5 Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 20:07:28 -0400
Subject: [PATCH 07/10] Tweak helpers

---
 .../HardwareIntrinsics/Arm/Shared/Helpers.cs  | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
index 7bee3741f5cc6..6e6dd313561fa 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
@@ -5238,17 +5238,19 @@ public static float[] MultiplyAddRotateComplex(float[] op1, float[] op2, float[]
         {
             for (int i = 0; i < op1.Length; i += 2)
             {
+                int real = i;
+                int img = i + 1;
                 (float ans1, float ans2) = imm switch
                 {
-                    0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])),
-                    1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])),
-                    2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])),
-                    3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])),
+                    0 => (FusedMultiplyAdd(op1[real], op2[real], op3[real]), FusedMultiplyAdd(op1[img], op2[real], op3[img])),
+                    1 => (FusedMultiplySubtract(op1[real], op2[img], op3[img]), FusedMultiplyAdd(op1[img], op2[img], op3[i])),
+                    2 => (FusedMultiplySubtract(op1[real], op2[real], op3[real]), FusedMultiplySubtract(op1[img], op2[real], op3[img])),
+                    3 => (FusedMultiplyAdd(op1[real], op2[img], op3[img]), FusedMultiplySubtract(op1[img], op2[img], op3[real])),
                     _ => (0.0f, 0.0f)
                 };
 
-                op1[i] = ans1;
-                op1[i + 1] = ans2;
+                op1[real] = ans1;
+                op1[img] = ans2;
             }
 
             return op1;
@@ -5435,17 +5437,19 @@ public static double[] MultiplyAddRotateComplex(double[] op1, double[] op2, doub
         {
             for (int i = 0; i < op1.Length; i += 2)
             {
+                int real = i;
+                int img = i + 1;
                 (double ans1, double ans2) = imm switch
                 {
-                    0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])),
-                    1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])),
-                    2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])),
-                    3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])),
+                    0 => (FusedMultiplyAdd(op1[real], op2[real], op3[real]), FusedMultiplyAdd(op1[img], op2[real], op3[img])),
+                    1 => (FusedMultiplySubtract(op1[real], op2[img], op3[img]), FusedMultiplyAdd(op1[img], op2[img], op3[i])),
+                    2 => (FusedMultiplySubtract(op1[real], op2[real], op3[real]), FusedMultiplySubtract(op1[img], op2[real], op3[img])),
+                    3 => (FusedMultiplyAdd(op1[real], op2[img], op3[img]), FusedMultiplySubtract(op1[img], op2[img], op3[real])),
                     _ => (0.0, 0.0)
                 };
 
-                op1[i] = ans1;
-                op1[i + 1] = ans2;
+                op1[real] = ans1;
+                op1[img] = ans2;
             }
 
             return op1;

From 3443b5d0bac6a90258f2763cf27043981990e362 Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 20:13:53 -0400
Subject: [PATCH 08/10] Fix LinearScan::BuildHWIntrinsic

---
 src/coreclr/jit/lsraarm64.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
index f542cf7357912..5fbcc0b468f00 100644
--- a/src/coreclr/jit/lsraarm64.cpp
+++ b/src/coreclr/jit/lsraarm64.cpp
@@ -1916,16 +1916,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
             srcCount += 1;
             srcCount += BuildDelayFreeUses(emitOp2, emitOp1);
             srcCount += BuildDelayFreeUses(emitOp3, emitOp1);
-
-            {
-                assert(numArgs == 4);
-                srcCount += BuildDelayFreeUses(intrinEmb.op4, emitOp1);
-                if (!embOp2Node->Op(4)->isContainedIntOrIImmed())
-                {
-                    buildInternalIntRegisterDefForNode(embOp2Node);
-                }
-            }
-
             srcCount += BuildDelayFreeUses(intrin.op3, emitOp1);
         }
         else
@@ -1947,6 +1937,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
                     }
                     break;
 
+                case NI_Sve_AddRotateComplex:
+                    assert(embHasImmediateOperand);
+                    assert(numArgs == 3);
+                    if (!embOp2Node->Op(3)->isContainedIntOrIImmed())
+                    {
+                        buildInternalIntRegisterDefForNode(embOp2Node);
+                    }
+                    break;
+
                 case NI_Sve_MultiplyAddRotateComplex:
                     assert(embHasImmediateOperand);
                     assert(numArgs == 4);

From cafe4a5c1625dcb366474a95b57f532d12112615 Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 21:05:25 -0400
Subject: [PATCH 09/10] Fix helpers

---
 .../HardwareIntrinsics/Arm/Shared/Helpers.cs  | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
index 6e6dd313561fa..54ddade391f93 100644
--- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
+++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
@@ -6132,15 +6132,18 @@ public static float[] AddRotateComplex(float[] op1, float[] op2, byte rot)
         {
             for (int i = 0; i < op1.Length; i += 2)
             {
+                int real = i;
+                int img = i + 1;
+
                 if (rot == 0)
                 {
-                    op1[i] -= op2[i + 1];
-                    op1[i + 1] += op2[i];
+                    op1[real] -= op2[img];
+                    op1[img] += op2[real];
                 }
                 else
                 {
-                    op1[i] += op2[i + 1];
-                    op1[i + 1] -= op2[i];
+                    op1[real] += op2[img];
+                    op1[img] -= op2[real];
                 }
             }
 
@@ -6201,15 +6204,18 @@ public static double[] AddRotateComplex(double[] op1, double[] op2, byte rot)
         {
             for (int i = 0; i < op1.Length; i += 2)
             {
+                int real = i;
+                int img = i + 1;
+
                 if (rot == 0)
                 {
-                    op1[i] -= op2[i + 1];
-                    op1[i + 1] += op2[i];
+                    op1[real] -= op2[img];
+                    op1[img] += op2[real];
                 }
                 else
                 {
-                    op1[i] += op2[i + 1];
-                    op1[i + 1] -= op2[i];
+                    op1[real] += op2[img];
+                    op1[img] -= op2[real];
                 }
             }
 

From de2ae6d7c870f36884f15d81ea6910b004631c2e Mon Sep 17 00:00:00 2001
From: "Aman Khalid (from Dev Box)" <amankhalid@microsoft.com>
Date: Mon, 15 Jul 2024 21:24:20 -0400
Subject: [PATCH 10/10] JIT feedback

---
 src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 171 ++++++++++----------
 1 file changed, 90 insertions(+), 81 deletions(-)

diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
index 5b0639223b751..17f929fef2bb7 100644
--- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -480,13 +480,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             regNumber embMaskOp1Reg = REG_NA;
             regNumber embMaskOp2Reg = REG_NA;
             regNumber embMaskOp3Reg = REG_NA;
+            regNumber embMaskOp4Reg = REG_NA;
             regNumber falseReg      = op3Reg;
 
             switch (intrinEmbMask.numOperands)
             {
                 case 4:
                     assert(intrinEmbMask.op4 != nullptr);
-                    assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id));
+                    embMaskOp4Reg = intrinEmbMask.op4->GetRegNum();
                     FALLTHROUGH;
 
                 case 3:
@@ -508,6 +509,70 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     unreached();
             }
 
+            // Shared code for setting up embedded mask arg for intrinsics with 3+ operands
+            auto emitEmbeddedMaskSetup = [&] {
+                if (intrin.op3->IsVectorZero())
+                {
+                    // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
+                    // destination using /Z.
+
+                    assert(targetReg != embMaskOp2Reg);
+                    assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet());
+                    GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
+                }
+                else
+                {
+                    // Below are the considerations we need to handle:
+                    //
+                    // targetReg == falseReg && targetReg == embMaskOp1Reg
+                    //      fmla    Zd, P/m, Zn, Zm
+                    //
+                    // targetReg == falseReg && targetReg != embMaskOp1Reg
+                    //      movprfx target, P/m, embMaskOp1Reg
+                    //      fmla    target, P/m, embMaskOp2Reg, embMaskOp3Reg
+                    //
+                    // targetReg != falseReg && targetReg == embMaskOp1Reg
+                    //      sel     target, P/m, embMaskOp1Reg, falseReg
+                    //      fmla    target, P/m, embMaskOp2Reg, embMaskOp3Reg
+                    //
+                    // targetReg != falseReg && targetReg != embMaskOp1Reg
+                    //      sel     target, P/m, embMaskOp1Reg, falseReg
+                    //      fmla    target, P/m, embMaskOp2Reg, embMaskOp3Reg
+                    //
+                    // Note that, we just check if the targetReg/falseReg or targetReg/embMaskOp1Reg
+                    // coincides or not.
+
+                    if (targetReg != falseReg)
+                    {
+                        if (falseReg == embMaskOp1Reg)
+                        {
+                            // If falseReg value and embMaskOp1Reg value are same, then just mov the value
+                            // to the target.
+
+                            GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, embMaskOp1Reg,
+                                                      /* canSkip */ true);
+                        }
+                        else
+                        {
+                            // If falseReg value is not present in targetReg yet, move the inactive lanes
+                            // into the targetReg using `sel`. Since this is RMW, the active lanes should
+                            // have the value from embMaskOp1Reg
+
+                            GetEmitter()->emitInsSve_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, embMaskOp1Reg,
+                                                             falseReg, opt);
+                        }
+                    }
+                    else if (targetReg != embMaskOp1Reg)
+                    {
+                        // If target already contains the values of `falseReg`, just merge the lanes from
+                        // `embMaskOp1Reg`, again because this is RMW semantics.
+
+                        GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg,
+                                                       opt, INS_SCALABLE_OPTS_PREDICATE_MERGE);
+                    }
+                }
+            };
+
             switch (intrinEmbMask.numOperands)
             {
                 case 1:
@@ -710,7 +775,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
 
                 case 3:
-                case 4:
                 {
                     assert(instrIsRMW);
 
@@ -814,90 +878,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                         }
                     }
 
-                    if (intrin.op3->IsVectorZero())
-                    {
-                        // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
-                        // destination using /Z.
-
-                        assert(targetReg != embMaskOp2Reg);
-                        assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet());
-                        GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg,
-                                                       opt);
-                    }
-                    else
-                    {
-                        // Below are the considerations we need to handle:
-                        //
-                        // targetReg == falseReg && targetReg == embMaskOp1Reg
-                        //      fmla    Zd, P/m, Zn, Zm
-                        //
-                        // targetReg == falseReg && targetReg != embMaskOp1Reg
-                        //      movprfx target, P/m, embMaskOp1Reg
-                        //      fmla    target, P/m, embMaskOp2Reg, embMaskOp3Reg
-                        //
-                        // targetReg != falseReg && targetReg == embMaskOp1Reg
-                        //      sel     target, P/m, embMaskOp1Reg, falseReg
-                        //      fmla    target, P/m, embMaskOp2Reg, embMaskOp3Reg
-                        //
-                        // targetReg != falseReg && targetReg != embMaskOp1Reg
-                        //      sel     target, P/m, embMaskOp1Reg, falseReg
-                        //      fmla    target, P/m, embMaskOp2Reg, embMaskOp3Reg
-                        //
-                        // Note that, we just check if the targetReg/falseReg or targetReg/embMaskOp1Reg
-                        // coincides or not.
-
-                        if (targetReg != falseReg)
-                        {
-                            if (falseReg == embMaskOp1Reg)
-                            {
-                                // If falseReg value and embMaskOp1Reg value are same, then just mov the value
-                                // to the target.
-
-                                GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, embMaskOp1Reg,
-                                                          /* canSkip */ true);
-                            }
-                            else
-                            {
-                                // If falseReg value is not present in targetReg yet, move the inactive lanes
-                                // into the targetReg using `sel`. Since this is RMW, the active lanes should
-                                // have the value from embMaskOp1Reg
-
-                                GetEmitter()->emitInsSve_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg,
-                                                                 embMaskOp1Reg, falseReg, opt);
-                            }
-                        }
-                        else if (targetReg != embMaskOp1Reg)
-                        {
-                            // If target already contains the values of `falseReg`, just merge the lanes from
-                            // `embMaskOp1Reg`, again because this is RMW semantics.
-
-                            GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg,
-                                                           opt, INS_SCALABLE_OPTS_PREDICATE_MERGE);
-                        }
-                    }
+                    emitEmbeddedMaskSetup();
 
                     // Finally, perform the desired operation.
                     if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id))
                     {
-                        if (intrinEmbMask.numOperands == 4)
-                        {
-                            assert(intrinEmbMask.id == NI_Sve_MultiplyAddRotateComplex);
-                            HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic());
-                            for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
-                            {
-                                GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg,
-                                                                   embMaskOp2Reg, embMaskOp3Reg, helper.ImmValue(),
-                                                                   opt);
-                            }
-                        }
-                        else
+                        HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
+                        for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
                         {
-                            HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic());
-                            for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
-                            {
-                                GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg,
-                                                                 embMaskOp2Reg, helper.ImmValue(), opt);
-                            }
+                            GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                             helper.ImmValue(), opt);
                         }
                     }
                     else
@@ -909,6 +899,25 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
                     break;
                 }
+
+                case 4:
+                {
+                    assert(instrIsRMW);
+                    assert(intrinEmbMask.op4->isContained() == (embMaskOp4Reg == REG_NA));
+                    assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id));
+
+                    emitEmbeddedMaskSetup();
+
+                    HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic());
+                    for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
+                    {
+                        GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
+                                                           embMaskOp3Reg, helper.ImmValue(), opt);
+                    }
+
+                    break;
+                }
+
                 default:
                     unreached();
             }