Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64/Sve: Implement divide/multiply/subtract Math APIs #101578

Merged
merged 11 commits into from
Apr 29, 2024
27 changes: 23 additions & 4 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
@@ -511,11 +511,30 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

if (falseReg != embMaskOp1Reg)
{
// None of targetReg, embMaskOp1Reg and falseReg are same. In such case, use the
// "unpredicated" version of the instruction and then use "sel" to select the active lanes.
// At the point, targetReg != embMaskOp1Reg != falseReg
if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id))
{
// If the embedded instruction supports optional mask operation, use the "unpredicated"
// version of the instruction, followed by "sel" to select the active lanes.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg,
embMaskOp2Reg, opt, INS_SCALABLE_OPTS_UNPREDICATED);
}
else
{
// If the instruction just has "predicated" version, then move the "embMaskOp1Reg"
// into targetReg. Next, do the predicated operation on the targetReg and last,
// use "sel" to select the active lanes based on mask, and set inactive lanes
// to falseReg.

assert(targetReg != embMaskOp2Reg);
assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id));

GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);

GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
opt);
}

GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg, embMaskOp2Reg,
opt, INS_SCALABLE_OPTS_UNPREDICATED);
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED);
break;
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
@@ -42,6 +42,7 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit,
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
@@ -67,6 +68,8 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32,
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)

HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Original file line number Diff line number Diff line change
@@ -552,6 +552,32 @@ internal Arm64() { }
public static unsafe Vector<byte> CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); }


/// Divide : Divide

/// <summary>
/// svfloat32_t svdiv[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FDIV Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FDIV Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svdiv[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FDIV Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FDIV Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svdiv[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; FDIV Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Divide(Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svdiv[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FDIV Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FDIV Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svdiv[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FDIV Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FDIV Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svdiv[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; FDIV Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Divide(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// LoadVector : Unextended load

/// <summary>
@@ -840,6 +866,273 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt32ZeroExtendToUInt64(Vector<ulong> mask, uint* address) { throw new PlatformNotSupportedException(); }

/// Multiply : Multiply

/// <summary>
/// svint8_t svmul[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svint8_t svmul[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MUL Ztied2.B, Pg/M, Ztied2.B, Zop1.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svint8_t svmul[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// MOVPRFX Zresult.B, Pg/Z, Zop2.B; MUL Zresult.B, Pg/M, Zresult.B, Zop1.B
/// </summary>
public static unsafe Vector<sbyte> Multiply(Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svmul[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svint16_t svmul[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MUL Ztied2.H, Pg/M, Ztied2.H, Zop1.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svint16_t svmul[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// MOVPRFX Zresult.H, Pg/Z, Zop2.H; MUL Zresult.H, Pg/M, Zresult.H, Zop1.H
/// </summary>
public static unsafe Vector<short> Multiply(Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svmul[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svint32_t svmul[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MUL Ztied2.S, Pg/M, Ztied2.S, Zop1.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svint32_t svmul[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// MOVPRFX Zresult.S, Pg/Z, Zop2.S; MUL Zresult.S, Pg/M, Zresult.S, Zop1.S
/// </summary>
public static unsafe Vector<int> Multiply(Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svmul[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svint64_t svmul[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MUL Ztied2.D, Pg/M, Ztied2.D, Zop1.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svint64_t svmul[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// MOVPRFX Zresult.D, Pg/Z, Zop2.D; MUL Zresult.D, Pg/M, Zresult.D, Zop1.D
/// </summary>
public static unsafe Vector<long> Multiply(Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint8_t svmul[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svuint8_t svmul[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MUL Ztied2.B, Pg/M, Ztied2.B, Zop1.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svuint8_t svmul[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// MOVPRFX Zresult.B, Pg/Z, Zop2.B; MUL Zresult.B, Pg/M, Zresult.B, Zop1.B
/// </summary>
public static unsafe Vector<byte> Multiply(Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svmul[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svuint16_t svmul[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MUL Ztied2.H, Pg/M, Ztied2.H, Zop1.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svuint16_t svmul[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// MOVPRFX Zresult.H, Pg/Z, Zop2.H; MUL Zresult.H, Pg/M, Zresult.H, Zop1.H
/// </summary>
public static unsafe Vector<ushort> Multiply(Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svmul[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svuint32_t svmul[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MUL Ztied2.S, Pg/M, Ztied2.S, Zop1.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svuint32_t svmul[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// MOVPRFX Zresult.S, Pg/Z, Zop2.S; MUL Zresult.S, Pg/M, Zresult.S, Zop1.S
/// </summary>
public static unsafe Vector<uint> Multiply(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svmul[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svuint64_t svmul[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MUL Ztied2.D, Pg/M, Ztied2.D, Zop1.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svuint64_t svmul[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// MOVPRFX Zresult.D, Pg/Z, Zop2.D; MUL Zresult.D, Pg/M, Zresult.D, Zop1.D
/// </summary>
public static unsafe Vector<ulong> Multiply(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svmul[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FMUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FMUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svmul[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FMUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// FMUL Ztied2.S, Pg/M, Ztied2.S, Zop1.S
/// FMUL Zresult.S, Zop1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FMUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svmul[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; FMUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// MOVPRFX Zresult.S, Pg/Z, Zop2.S; FMUL Zresult.S, Pg/M, Zresult.S, Zop1.S
/// </summary>
public static unsafe Vector<float> Multiply(Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svmul[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FMUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FMUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svmul[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FMUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// FMUL Ztied2.D, Pg/M, Ztied2.D, Zop1.D
/// FMUL Zresult.D, Zop1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FMUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svmul[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; FMUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// MOVPRFX Zresult.D, Pg/Z, Zop2.D; FMUL Zresult.D, Pg/M, Zresult.D, Zop1.D
/// </summary>
public static unsafe Vector<double> Multiply(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// Subtract : Subtract

/// <summary>
/// svint8_t svsub[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svint8_t svsub[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// SUB Zresult.B, Zop1.B, Zop2.B
/// svint8_t svsub[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> Subtract(Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svsub[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svint16_t svsub[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// SUB Zresult.H, Zop1.H, Zop2.H
/// svint16_t svsub[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// </summary>
public static unsafe Vector<short> Subtract(Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svsub[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svint32_t svsub[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// SUB Zresult.S, Zop1.S, Zop2.S
/// svint32_t svsub[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<int> Subtract(Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svsub[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svint64_t svsub[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// SUB Zresult.D, Zop1.D, Zop2.D
/// svint64_t svsub[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<long> Subtract(Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint8_t svsub[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svuint8_t svsub[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// SUB Zresult.B, Zop1.B, Zop2.B
/// svuint8_t svsub[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> Subtract(Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svsub[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svuint16_t svsub[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// SUB Zresult.H, Zop1.H, Zop2.H
/// svuint16_t svsub[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> Subtract(Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svsub[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svuint32_t svsub[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// SUB Zresult.S, Zop1.S, Zop2.S
/// svuint32_t svsub[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> Subtract(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svsub[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svuint64_t svsub[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// SUB Zresult.D, Zop1.D, Zop2.D
/// svuint64_t svsub[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> Subtract(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svsub[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FSUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FSUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svsub[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FSUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// FSUB Zresult.S, Zop1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FSUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svsub[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; FSUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Subtract(Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svsub[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FSUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FSUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svsub[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FSUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// FSUB Zresult.D, Zop1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FSUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svsub[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; FSUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Subtract(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// UnzipEven : Concatenate even elements from two inputs

Original file line number Diff line number Diff line change
@@ -608,6 +608,32 @@ internal Arm64() { }
public static unsafe Vector<byte> CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) => CreateWhileLessThanOrEqualMask8Bit(left, right);


/// Divide : Divide

/// <summary>
/// svfloat32_t svdiv[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FDIV Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FDIV Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svdiv[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FDIV Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FDIV Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svdiv[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; FDIV Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Divide(Vector<float> left, Vector<float> right) => Divide(left, right);

/// <summary>
/// svfloat64_t svdiv[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FDIV Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FDIV Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svdiv[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FDIV Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FDIV Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svdiv[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; FDIV Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Divide(Vector<double> left, Vector<double> right) => Divide(left, right);

/// LoadVector : Unextended load

/// <summary>
@@ -897,6 +923,272 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorUInt32ZeroExtendToUInt64(Vector<ulong> mask, uint* address) => LoadVectorUInt32ZeroExtendToUInt64(mask, address);


/// <summary>
/// svint8_t svmul[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svint8_t svmul[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MUL Ztied2.B, Pg/M, Ztied2.B, Zop1.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svint8_t svmul[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// MOVPRFX Zresult.B, Pg/Z, Zop2.B; MUL Zresult.B, Pg/M, Zresult.B, Zop1.B
/// </summary>
public static unsafe Vector<sbyte> Multiply(Vector<sbyte> left, Vector<sbyte> right) => Multiply(left, right);

/// <summary>
/// svint16_t svmul[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svint16_t svmul[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MUL Ztied2.H, Pg/M, Ztied2.H, Zop1.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svint16_t svmul[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// MOVPRFX Zresult.H, Pg/Z, Zop2.H; MUL Zresult.H, Pg/M, Zresult.H, Zop1.H
/// </summary>
public static unsafe Vector<short> Multiply(Vector<short> left, Vector<short> right) => Multiply(left, right);

/// <summary>
/// svint32_t svmul[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svint32_t svmul[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MUL Ztied2.S, Pg/M, Ztied2.S, Zop1.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svint32_t svmul[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// MOVPRFX Zresult.S, Pg/Z, Zop2.S; MUL Zresult.S, Pg/M, Zresult.S, Zop1.S
/// </summary>
public static unsafe Vector<int> Multiply(Vector<int> left, Vector<int> right) => Multiply(left, right);

/// <summary>
/// svint64_t svmul[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svint64_t svmul[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MUL Ztied2.D, Pg/M, Ztied2.D, Zop1.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svint64_t svmul[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// MOVPRFX Zresult.D, Pg/Z, Zop2.D; MUL Zresult.D, Pg/M, Zresult.D, Zop1.D
/// </summary>
public static unsafe Vector<long> Multiply(Vector<long> left, Vector<long> right) => Multiply(left, right);

/// <summary>
/// svuint8_t svmul[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svuint8_t svmul[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MUL Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MUL Ztied2.B, Pg/M, Ztied2.B, Zop1.B
/// MOVPRFX Zresult, Zop1; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svuint8_t svmul[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; MUL Zresult.B, Pg/M, Zresult.B, Zop2.B
/// MOVPRFX Zresult.B, Pg/Z, Zop2.B; MUL Zresult.B, Pg/M, Zresult.B, Zop1.B
/// </summary>
public static unsafe Vector<byte> Multiply(Vector<byte> left, Vector<byte> right) => Multiply(left, right);

/// <summary>
/// svuint16_t svmul[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svuint16_t svmul[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MUL Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MUL Ztied2.H, Pg/M, Ztied2.H, Zop1.H
/// MOVPRFX Zresult, Zop1; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svuint16_t svmul[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; MUL Zresult.H, Pg/M, Zresult.H, Zop2.H
/// MOVPRFX Zresult.H, Pg/Z, Zop2.H; MUL Zresult.H, Pg/M, Zresult.H, Zop1.H
/// </summary>
public static unsafe Vector<ushort> Multiply(Vector<ushort> left, Vector<ushort> right) => Multiply(left, right);

/// <summary>
/// svuint32_t svmul[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svuint32_t svmul[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MUL Ztied2.S, Pg/M, Ztied2.S, Zop1.S
/// MOVPRFX Zresult, Zop1; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svuint32_t svmul[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; MUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// MOVPRFX Zresult.S, Pg/Z, Zop2.S; MUL Zresult.S, Pg/M, Zresult.S, Zop1.S
/// </summary>
public static unsafe Vector<uint> Multiply(Vector<uint> left, Vector<uint> right) => Multiply(left, right);

/// <summary>
/// svuint64_t svmul[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svuint64_t svmul[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MUL Ztied2.D, Pg/M, Ztied2.D, Zop1.D
/// MOVPRFX Zresult, Zop1; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svuint64_t svmul[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; MUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// MOVPRFX Zresult.D, Pg/Z, Zop2.D; MUL Zresult.D, Pg/M, Zresult.D, Zop1.D
/// </summary>
public static unsafe Vector<ulong> Multiply(Vector<ulong> left, Vector<ulong> right) => Multiply(left, right);

/// <summary>
/// svfloat32_t svmul[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FMUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FMUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svmul[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FMUL Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// FMUL Ztied2.S, Pg/M, Ztied2.S, Zop1.S
/// FMUL Zresult.S, Zop1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FMUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svmul[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; FMUL Zresult.S, Pg/M, Zresult.S, Zop2.S
/// MOVPRFX Zresult.S, Pg/Z, Zop2.S; FMUL Zresult.S, Pg/M, Zresult.S, Zop1.S
/// </summary>
public static unsafe Vector<float> Multiply(Vector<float> left, Vector<float> right) => Multiply(left, right);

/// <summary>
/// svfloat64_t svmul[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FMUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FMUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svmul[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FMUL Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// FMUL Ztied2.D, Pg/M, Ztied2.D, Zop1.D
/// FMUL Zresult.D, Zop1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FMUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svmul[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; FMUL Zresult.D, Pg/M, Zresult.D, Zop2.D
/// MOVPRFX Zresult.D, Pg/Z, Zop2.D; FMUL Zresult.D, Pg/M, Zresult.D, Zop1.D
/// </summary>
public static unsafe Vector<double> Multiply(Vector<double> left, Vector<double> right) => Multiply(left, right);

/// Subtract : Subtract

/// <summary>
/// svint8_t svsub[_s8]_m(svbool_t pg, svint8_t op1, svint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svint8_t svsub[_s8]_x(svbool_t pg, svint8_t op1, svint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// SUB Zresult.B, Zop1.B, Zop2.B
/// svint8_t svsub[_s8]_z(svbool_t pg, svint8_t op1, svint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> Subtract(Vector<sbyte> left, Vector<sbyte> right) => Subtract(left, right);

/// <summary>
/// svint16_t svsub[_s16]_m(svbool_t pg, svint16_t op1, svint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svint16_t svsub[_s16]_x(svbool_t pg, svint16_t op1, svint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// SUB Zresult.H, Zop1.H, Zop2.H
/// svint16_t svsub[_s16]_z(svbool_t pg, svint16_t op1, svint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// </summary>
public static unsafe Vector<short> Subtract(Vector<short> left, Vector<short> right) => Subtract(left, right);

/// <summary>
/// svint32_t svsub[_s32]_m(svbool_t pg, svint32_t op1, svint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svint32_t svsub[_s32]_x(svbool_t pg, svint32_t op1, svint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// SUB Zresult.S, Zop1.S, Zop2.S
/// svint32_t svsub[_s32]_z(svbool_t pg, svint32_t op1, svint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<int> Subtract(Vector<int> left, Vector<int> right) => Subtract(left, right);

/// <summary>
/// svint64_t svsub[_s64]_m(svbool_t pg, svint64_t op1, svint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svint64_t svsub[_s64]_x(svbool_t pg, svint64_t op1, svint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// SUB Zresult.D, Zop1.D, Zop2.D
/// svint64_t svsub[_s64]_z(svbool_t pg, svint64_t op1, svint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<long> Subtract(Vector<long> left, Vector<long> right) => Subtract(left, right);

/// <summary>
/// svuint8_t svsub[_u8]_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// MOVPRFX Zresult, Zop1; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// svuint8_t svsub[_u8]_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SUB Ztied1.B, Pg/M, Ztied1.B, Zop2.B
/// SUB Zresult.B, Zop1.B, Zop2.B
/// svuint8_t svsub[_u8]_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
/// MOVPRFX Zresult.B, Pg/Z, Zop1.B; SUB Zresult.B, Pg/M, Zresult.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> Subtract(Vector<byte> left, Vector<byte> right) => Subtract(left, right);

/// <summary>
/// svuint16_t svsub[_u16]_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// MOVPRFX Zresult, Zop1; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// svuint16_t svsub[_u16]_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SUB Ztied1.H, Pg/M, Ztied1.H, Zop2.H
/// SUB Zresult.H, Zop1.H, Zop2.H
/// svuint16_t svsub[_u16]_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
/// MOVPRFX Zresult.H, Pg/Z, Zop1.H; SUB Zresult.H, Pg/M, Zresult.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> Subtract(Vector<ushort> left, Vector<ushort> right) => Subtract(left, right);

/// <summary>
/// svuint32_t svsub[_u32]_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svuint32_t svsub[_u32]_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// SUB Zresult.S, Zop1.S, Zop2.S
/// svuint32_t svsub[_u32]_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; SUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> Subtract(Vector<uint> left, Vector<uint> right) => Subtract(left, right);

/// <summary>
/// svuint64_t svsub[_u64]_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svuint64_t svsub[_u64]_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// SUB Zresult.D, Zop1.D, Zop2.D
/// svuint64_t svsub[_u64]_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; SUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> Subtract(Vector<ulong> left, Vector<ulong> right) => Subtract(left, right);

/// <summary>
/// svfloat32_t svsub[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FSUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FSUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svsub[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// FSUB Ztied1.S, Pg/M, Ztied1.S, Zop2.S
/// FSUB Zresult.S, Zop1.S, Zop2.S
/// MOVPRFX Zresult, Zop1; FSUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// svfloat32_t svsub[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// MOVPRFX Zresult.S, Pg/Z, Zop1.S; FSUB Zresult.S, Pg/M, Zresult.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Subtract(Vector<float> left, Vector<float> right) => Subtract(left, right);

/// <summary>
/// svfloat64_t svsub[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FSUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FSUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svsub[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// FSUB Ztied1.D, Pg/M, Ztied1.D, Zop2.D
/// FSUB Zresult.D, Zop1.D, Zop2.D
/// MOVPRFX Zresult, Zop1; FSUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// svfloat64_t svsub[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// MOVPRFX Zresult.D, Pg/Z, Zop1.D; FSUB Zresult.D, Pg/M, Zresult.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Subtract(Vector<double> left, Vector<double> right) => Subtract(left, right);

/// UnzipEven : Concatenate even elements from two inputs

/// <summary>
Original file line number Diff line number Diff line change
@@ -4243,6 +4243,8 @@ internal Arm64() { }
public static System.Numerics.Vector<byte> CreateWhileLessThanOrEqualMask8Bit(long left, long right) { throw null; }
public static System.Numerics.Vector<byte> CreateWhileLessThanOrEqualMask8Bit(uint left, uint right) { throw null; }
public static System.Numerics.Vector<byte> CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) { throw null; }
public static System.Numerics.Vector<float> Divide(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right) { throw null; }
public static System.Numerics.Vector<double> Divide(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right) { throw null; }
public static unsafe System.Numerics.Vector<sbyte> LoadVector(System.Numerics.Vector<sbyte> mask, sbyte* address) { throw null; }
public static unsafe System.Numerics.Vector<short> LoadVector(System.Numerics.Vector<short> mask, short* address) { throw null; }
public static unsafe System.Numerics.Vector<int> LoadVector(System.Numerics.Vector<int> mask, int* address) { throw null; }
@@ -4277,7 +4279,26 @@ internal Arm64() { }
public static unsafe System.Numerics.Vector<ulong> LoadVectorUInt16ZeroExtendToUInt64(System.Numerics.Vector<ulong> mask, ushort* address) { throw null; }
public static unsafe System.Numerics.Vector<long> LoadVectorUInt32ZeroExtendToInt64(System.Numerics.Vector<long> mask, uint* address) { throw null; }
public static unsafe System.Numerics.Vector<ulong> LoadVectorUInt32ZeroExtendToUInt64(System.Numerics.Vector<ulong> mask, uint* address) { throw null; }

public static System.Numerics.Vector<sbyte> Subtract(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<sbyte> right) { throw null; }
public static System.Numerics.Vector<short> Subtract(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
public static System.Numerics.Vector<int> Subtract(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }
public static System.Numerics.Vector<long> Subtract(System.Numerics.Vector<long> left, System.Numerics.Vector<long> right) { throw null; }
public static System.Numerics.Vector<byte> Subtract(System.Numerics.Vector<byte> left, System.Numerics.Vector<byte> right) { throw null; }
public static System.Numerics.Vector<ushort> Subtract(System.Numerics.Vector<ushort> left, System.Numerics.Vector<ushort> right) { throw null; }
public static System.Numerics.Vector<uint> Subtract(System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }
public static System.Numerics.Vector<ulong> Subtract(System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }
public static System.Numerics.Vector<float> Subtract(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right) { throw null; }
public static System.Numerics.Vector<double> Subtract(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right) { throw null; }
public static System.Numerics.Vector<sbyte> Multiply(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<sbyte> right) { throw null; }
public static System.Numerics.Vector<short> Multiply(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
public static System.Numerics.Vector<int> Multiply(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }
public static System.Numerics.Vector<long> Multiply(System.Numerics.Vector<long> left, System.Numerics.Vector<long> right) { throw null; }
public static System.Numerics.Vector<byte> Multiply(System.Numerics.Vector<byte> left, System.Numerics.Vector<byte> right) { throw null; }
public static System.Numerics.Vector<ushort> Multiply(System.Numerics.Vector<ushort> left, System.Numerics.Vector<ushort> right) { throw null; }
public static System.Numerics.Vector<uint> Multiply(System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }
public static System.Numerics.Vector<ulong> Multiply(System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }
public static System.Numerics.Vector<float> Multiply(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right) { throw null; }
public static System.Numerics.Vector<double> Multiply(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right) { throw null; }
public static System.Numerics.Vector<sbyte> UnzipEven(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<sbyte> right) { throw null; }
public static System.Numerics.Vector<short> UnzipEven(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
public static System.Numerics.Vector<int> UnzipEven(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }

Large diffs are not rendered by default.