-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[API Proposal]: Arm64: FEAT_SVE: stores #94011
Comments
Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics Issue Detailsnamespace System.Runtime.Intrinsics.Arm
/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: stores
{
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void StoreVector(T *base, Vector<T> data); // ST1W or ST1D or ST1B or ST1H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe void StoreVector(T *base, long vnum, Vector<T> data); // ST1W or ST1D or ST1B or ST1H
public static unsafe void StoreVector(long *base, long vnum, Vector<long> data);
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void StoreVectorNonTemporal(T *base, Vector<T> data); // STNT1W or STNT1D or STNT1B or STNT1H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe void StoreVectorNonTemporal(T *base, long vnum, Vector<T> data); // STNT1W or STNT1D or STNT1B or STNT1H
public static unsafe void StoreVectorNonTemporal(long *base, long vnum, Vector<long> data);
/// T: int, long
public static unsafe void StoreVectorTruncate16(short *base, Vector<T> data); // ST1H
/// T: uint, ulong
public static unsafe void StoreVectorTruncate16(ushort *base, Vector<T> data); // ST1H
/// T: int, long
public static unsafe void StoreVectorTruncate16(short *base, long vnum, Vector<T> data); // ST1H
/// T: uint, ulong
public static unsafe void StoreVectorTruncate16(ushort *base, long vnum, Vector<T> data); // ST1H
public static unsafe void StoreVectorTruncate32(int *base, Vector<long> data);
public static unsafe void StoreVectorTruncate32(uint *base, Vector<ulong> data);
public static unsafe void StoreVectorTruncate32(int *base, long vnum, Vector<long> data);
public static unsafe void StoreVectorTruncate32(uint *base, long vnum, Vector<ulong> data);
/// T: short, int, long
public static unsafe void StoreVectorTruncate8(sbyte *base, Vector<T> data); // ST1B
/// T: ushort, uint, ulong
public static unsafe void StoreVectorTruncate8(byte *base, Vector<T> data); // ST1B
/// T: short, int, long
public static unsafe void StoreVectorTruncate8(sbyte *base, long vnum, Vector<T> data); // ST1B
/// T: ushort, uint, ulong
public static unsafe void StoreVectorTruncate8(byte *base, long vnum, Vector<T> data); // ST1B
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void StoreVectorx2(T *base, (Vector<T> data1, Vector<T> data2)); // ST2W or ST2D or ST2B or ST2H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe void StoreVectorx2(T *base, long vnum, (Vector<T> data1, Vector<T> data2)); // ST2W or ST2D or ST2B or ST2H
public static unsafe void StoreVectorx2(long *base, long vnum, (Vector<long> data1, Vector<long> data2));
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void StoreVectorx3(T *base, (Vector<T> data1, Vector<T> data2, Vector<T> data3)); // ST3W or ST3D or ST3B or ST3H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe void StoreVectorx3(T *base, long vnum, (Vector<T> data1, Vector<T> data2, Vector<T> data3)); // ST3W or ST3D or ST3B or ST3H
public static unsafe void StoreVectorx3(long *base, long vnum, (Vector<long> data1, Vector<long> data2, Vector<long> data3));
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void StoreVectorx4(T *base, (Vector<T> data1, Vector<T> data2, Vector<T> data3, Vector<T> data4)); // ST4W or ST4D or ST4B or ST4H
/// T: float, double, sbyte, short, int, byte, ushort, uint, ulong
public static unsafe void StoreVectorx4(T *base, long vnum, (Vector<T> data1, Vector<T> data2, Vector<T> data3, Vector<T> data4)); // ST4W or ST4D or ST4B or ST4H
public static unsafe void StoreVectorx4(long *base, long vnum, (Vector<long> data1, Vector<long> data2, Vector<long> data3, Vector<long> data4));
/// total method signatures: 27
}
|
/// Full API
public abstract partial class Sve : AdvSimd /// Feature: FEAT_SVE Category: stores
{
/// Store : Non-truncating store
/// void svst1[_f32](svbool_t pg, float32_t *base, svfloat32_t data) : "ST1W Zdata.S, Pg, [Xarray, Xindex, LSL #2]" or "ST1W Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<float> mask, float* base, Vector<float> data);
/// void svst1[_f64](svbool_t pg, float64_t *base, svfloat64_t data) : "ST1D Zdata.D, Pg, [Xarray, Xindex, LSL #3]" or "ST1D Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<double> mask, double* base, Vector<double> data);
/// void svst1[_s8](svbool_t pg, int8_t *base, svint8_t data) : "ST1B Zdata.B, Pg, [Xarray, Xindex]" or "ST1B Zdata.B, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<sbyte> mask, sbyte* base, Vector<sbyte> data);
/// void svst1[_s16](svbool_t pg, int16_t *base, svint16_t data) : "ST1H Zdata.H, Pg, [Xarray, Xindex, LSL #1]" or "ST1H Zdata.H, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<short> mask, short* base, Vector<short> data);
/// void svst1[_s32](svbool_t pg, int32_t *base, svint32_t data) : "ST1W Zdata.S, Pg, [Xarray, Xindex, LSL #2]" or "ST1W Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<int> mask, int* base, Vector<int> data);
/// void svst1[_s64](svbool_t pg, int64_t *base, svint64_t data) : "ST1D Zdata.D, Pg, [Xarray, Xindex, LSL #3]" or "ST1D Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<long> mask, long* base, Vector<long> data);
/// void svst1[_u8](svbool_t pg, uint8_t *base, svuint8_t data) : "ST1B Zdata.B, Pg, [Xarray, Xindex]" or "ST1B Zdata.B, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<byte> mask, byte* base, Vector<byte> data);
/// void svst1[_u16](svbool_t pg, uint16_t *base, svuint16_t data) : "ST1H Zdata.H, Pg, [Xarray, Xindex, LSL #1]" or "ST1H Zdata.H, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<ushort> mask, ushort* base, Vector<ushort> data);
/// void svst1[_u32](svbool_t pg, uint32_t *base, svuint32_t data) : "ST1W Zdata.S, Pg, [Xarray, Xindex, LSL #2]" or "ST1W Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<uint> mask, uint* base, Vector<uint> data);
/// void svst1[_u64](svbool_t pg, uint64_t *base, svuint64_t data) : "ST1D Zdata.D, Pg, [Xarray, Xindex, LSL #3]" or "ST1D Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Store(Vector<ulong> mask, ulong* base, Vector<ulong> data);
/// StoreInt16NarrowToSByte : Truncate to 8 bits and store
/// void svst1b[_s16](svbool_t pg, int8_t *base, svint16_t data) : "ST1B Zdata.H, Pg, [Xarray, Xindex]" or "ST1B Zdata.H, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreInt16NarrowToSByte(Vector<short> mask, sbyte* base, Vector<short> data);
/// StoreInt32NarrowToInt16 : Truncate to 16 bits and store
/// void svst1h[_s32](svbool_t pg, int16_t *base, svint32_t data) : "ST1H Zdata.S, Pg, [Xarray, Xindex, LSL #1]" or "ST1H Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreInt32NarrowToInt16(Vector<int> mask, short* base, Vector<int> data);
/// StoreInt32NarrowToSByte : Truncate to 8 bits and store
/// void svst1b[_s32](svbool_t pg, int8_t *base, svint32_t data) : "ST1B Zdata.S, Pg, [Xarray, Xindex]" or "ST1B Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreInt32NarrowToSByte(Vector<int> mask, sbyte* base, Vector<int> data);
/// StoreInt64NarrowToInt16 : Truncate to 16 bits and store
/// void svst1h[_s64](svbool_t pg, int16_t *base, svint64_t data) : "ST1H Zdata.D, Pg, [Xarray, Xindex, LSL #1]" or "ST1H Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreInt64NarrowToInt16(Vector<long> mask, short* base, Vector<long> data);
/// StoreInt64NarrowToInt32 : Truncate to 32 bits and store
/// void svst1w[_s64](svbool_t pg, int32_t *base, svint64_t data) : "ST1W Zdata.D, Pg, [Xarray, Xindex, LSL #2]" or "ST1W Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreInt64NarrowToInt32(Vector<long> mask, int* base, Vector<long> data);
/// StoreInt64NarrowToSByte : Truncate to 8 bits and store
/// void svst1b[_s64](svbool_t pg, int8_t *base, svint64_t data) : "ST1B Zdata.D, Pg, [Xarray, Xindex]" or "ST1B Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreInt64NarrowToSByte(Vector<long> mask, sbyte* base, Vector<long> data);
/// StoreNonTemporal : Non-truncating store, non-temporal
/// void svstnt1[_f32](svbool_t pg, float32_t *base, svfloat32_t data) : "STNT1W Zdata.S, Pg, [Xarray, Xindex, LSL #2]" or "STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<float> mask, float* base, Vector<float> data);
/// void svstnt1[_f64](svbool_t pg, float64_t *base, svfloat64_t data) : "STNT1D Zdata.D, Pg, [Xarray, Xindex, LSL #3]" or "STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<double> mask, double* base, Vector<double> data);
/// void svstnt1[_s8](svbool_t pg, int8_t *base, svint8_t data) : "STNT1B Zdata.B, Pg, [Xarray, Xindex]" or "STNT1B Zdata.B, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<sbyte> mask, sbyte* base, Vector<sbyte> data);
/// void svstnt1[_s16](svbool_t pg, int16_t *base, svint16_t data) : "STNT1H Zdata.H, Pg, [Xarray, Xindex, LSL #1]" or "STNT1H Zdata.H, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<short> mask, short* base, Vector<short> data);
/// void svstnt1[_s32](svbool_t pg, int32_t *base, svint32_t data) : "STNT1W Zdata.S, Pg, [Xarray, Xindex, LSL #2]" or "STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<int> mask, int* base, Vector<int> data);
/// void svstnt1[_s64](svbool_t pg, int64_t *base, svint64_t data) : "STNT1D Zdata.D, Pg, [Xarray, Xindex, LSL #3]" or "STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<long> mask, long* base, Vector<long> data);
/// void svstnt1[_u8](svbool_t pg, uint8_t *base, svuint8_t data) : "STNT1B Zdata.B, Pg, [Xarray, Xindex]" or "STNT1B Zdata.B, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<byte> mask, byte* base, Vector<byte> data);
/// void svstnt1[_u16](svbool_t pg, uint16_t *base, svuint16_t data) : "STNT1H Zdata.H, Pg, [Xarray, Xindex, LSL #1]" or "STNT1H Zdata.H, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<ushort> mask, ushort* base, Vector<ushort> data);
/// void svstnt1[_u32](svbool_t pg, uint32_t *base, svuint32_t data) : "STNT1W Zdata.S, Pg, [Xarray, Xindex, LSL #2]" or "STNT1W Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<uint> mask, uint* base, Vector<uint> data);
/// void svstnt1[_u64](svbool_t pg, uint64_t *base, svuint64_t data) : "STNT1D Zdata.D, Pg, [Xarray, Xindex, LSL #3]" or "STNT1D Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreNonTemporal(Vector<ulong> mask, ulong* base, Vector<ulong> data);
/// StoreUInt16NarrowToByte : Truncate to 8 bits and store
/// void svst1b[_u16](svbool_t pg, uint8_t *base, svuint16_t data) : "ST1B Zdata.H, Pg, [Xarray, Xindex]" or "ST1B Zdata.H, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreUInt16NarrowToByte(Vector<ushort> mask, byte* base, Vector<ushort> data);
/// StoreUInt32NarrowToByte : Truncate to 8 bits and store
/// void svst1b[_u32](svbool_t pg, uint8_t *base, svuint32_t data) : "ST1B Zdata.S, Pg, [Xarray, Xindex]" or "ST1B Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreUInt32NarrowToByte(Vector<uint> mask, byte* base, Vector<uint> data);
/// StoreUInt32NarrowToUInt16 : Truncate to 16 bits and store
/// void svst1h[_u32](svbool_t pg, uint16_t *base, svuint32_t data) : "ST1H Zdata.S, Pg, [Xarray, Xindex, LSL #1]" or "ST1H Zdata.S, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreUInt32NarrowToUInt16(Vector<uint> mask, ushort* base, Vector<uint> data);
/// StoreUInt64NarrowToByte : Truncate to 8 bits and store
/// void svst1b[_u64](svbool_t pg, uint8_t *base, svuint64_t data) : "ST1B Zdata.D, Pg, [Xarray, Xindex]" or "ST1B Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreUInt64NarrowToByte(Vector<ulong> mask, byte* base, Vector<ulong> data);
/// StoreUInt64NarrowToUInt16 : Truncate to 16 bits and store
/// void svst1h[_u64](svbool_t pg, uint16_t *base, svuint64_t data) : "ST1H Zdata.D, Pg, [Xarray, Xindex, LSL #1]" or "ST1H Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreUInt64NarrowToUInt16(Vector<ulong> mask, ushort* base, Vector<ulong> data);
/// StoreUInt64NarrowToUInt32 : Truncate to 32 bits and store
/// void svst1w[_u64](svbool_t pg, uint32_t *base, svuint64_t data) : "ST1W Zdata.D, Pg, [Xarray, Xindex, LSL #2]" or "ST1W Zdata.D, Pg, [Xbase, #0, MUL VL]"
public static unsafe void StoreUInt64NarrowToUInt32(Vector<ulong> mask, uint* base, Vector<ulong> data);
/// Storex2 : Store two vectors into two-element tuples
/// void svst2[_f32](svbool_t pg, float32_t *base, svfloat32x2_t data) : "ST2W {Zdata0.S, Zdata1.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST2W {Zdata0.S, Zdata1.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<float> mask, float* base, (Vector<float> data1, Vector<float> data2));
/// void svst2[_f64](svbool_t pg, float64_t *base, svfloat64x2_t data) : "ST2D {Zdata0.D, Zdata1.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST2D {Zdata0.D, Zdata1.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<double> mask, double* base, (Vector<double> data1, Vector<double> data2));
/// void svst2[_s8](svbool_t pg, int8_t *base, svint8x2_t data) : "ST2B {Zdata0.B, Zdata1.B}, Pg, [Xarray, Xindex]" or "ST2B {Zdata0.B, Zdata1.B}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<sbyte> mask, sbyte* base, (Vector<sbyte> data1, Vector<sbyte> data2));
/// void svst2[_s16](svbool_t pg, int16_t *base, svint16x2_t data) : "ST2H {Zdata0.H, Zdata1.H}, Pg, [Xarray, Xindex, LSL #1]" or "ST2H {Zdata0.H, Zdata1.H}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<short> mask, short* base, (Vector<short> data1, Vector<short> data2));
/// void svst2[_s32](svbool_t pg, int32_t *base, svint32x2_t data) : "ST2W {Zdata0.S, Zdata1.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST2W {Zdata0.S, Zdata1.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<int> mask, int* base, (Vector<int> data1, Vector<int> data2));
/// void svst2[_s64](svbool_t pg, int64_t *base, svint64x2_t data) : "ST2D {Zdata0.D, Zdata1.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST2D {Zdata0.D, Zdata1.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<long> mask, long* base, (Vector<long> data1, Vector<long> data2));
/// void svst2[_u8](svbool_t pg, uint8_t *base, svuint8x2_t data) : "ST2B {Zdata0.B, Zdata1.B}, Pg, [Xarray, Xindex]" or "ST2B {Zdata0.B, Zdata1.B}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<byte> mask, byte* base, (Vector<byte> data1, Vector<byte> data2));
/// void svst2[_u16](svbool_t pg, uint16_t *base, svuint16x2_t data) : "ST2H {Zdata0.H, Zdata1.H}, Pg, [Xarray, Xindex, LSL #1]" or "ST2H {Zdata0.H, Zdata1.H}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<ushort> mask, ushort* base, (Vector<ushort> data1, Vector<ushort> data2));
/// void svst2[_u32](svbool_t pg, uint32_t *base, svuint32x2_t data) : "ST2W {Zdata0.S, Zdata1.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST2W {Zdata0.S, Zdata1.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<uint> mask, uint* base, (Vector<uint> data1, Vector<uint> data2));
/// void svst2[_u64](svbool_t pg, uint64_t *base, svuint64x2_t data) : "ST2D {Zdata0.D, Zdata1.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST2D {Zdata0.D, Zdata1.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex2(Vector<ulong> mask, ulong* base, (Vector<ulong> data1, Vector<ulong> data2));
/// Storex3 : Store three vectors into three-element tuples
/// void svst3[_f32](svbool_t pg, float32_t *base, svfloat32x3_t data) : "ST3W {Zdata0.S - Zdata2.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST3W {Zdata0.S - Zdata2.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<float> mask, float* base, (Vector<float> data1, Vector<float> data2, Vector<float> data3));
/// void svst3[_f64](svbool_t pg, float64_t *base, svfloat64x3_t data) : "ST3D {Zdata0.D - Zdata2.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST3D {Zdata0.D - Zdata2.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<double> mask, double* base, (Vector<double> data1, Vector<double> data2, Vector<double> data3));
/// void svst3[_s8](svbool_t pg, int8_t *base, svint8x3_t data) : "ST3B {Zdata0.B - Zdata2.B}, Pg, [Xarray, Xindex]" or "ST3B {Zdata0.B - Zdata2.B}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<sbyte> mask, sbyte* base, (Vector<sbyte> data1, Vector<sbyte> data2, Vector<sbyte> data3));
/// void svst3[_s16](svbool_t pg, int16_t *base, svint16x3_t data) : "ST3H {Zdata0.H - Zdata2.H}, Pg, [Xarray, Xindex, LSL #1]" or "ST3H {Zdata0.H - Zdata2.H}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<short> mask, short* base, (Vector<short> data1, Vector<short> data2, Vector<short> data3));
/// void svst3[_s32](svbool_t pg, int32_t *base, svint32x3_t data) : "ST3W {Zdata0.S - Zdata2.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST3W {Zdata0.S - Zdata2.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<int> mask, int* base, (Vector<int> data1, Vector<int> data2, Vector<int> data3));
/// void svst3[_s64](svbool_t pg, int64_t *base, svint64x3_t data) : "ST3D {Zdata0.D - Zdata2.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST3D {Zdata0.D - Zdata2.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<long> mask, long* base, (Vector<long> data1, Vector<long> data2, Vector<long> data3));
/// void svst3[_u8](svbool_t pg, uint8_t *base, svuint8x3_t data) : "ST3B {Zdata0.B - Zdata2.B}, Pg, [Xarray, Xindex]" or "ST3B {Zdata0.B - Zdata2.B}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<byte> mask, byte* base, (Vector<byte> data1, Vector<byte> data2, Vector<byte> data3));
/// void svst3[_u16](svbool_t pg, uint16_t *base, svuint16x3_t data) : "ST3H {Zdata0.H - Zdata2.H}, Pg, [Xarray, Xindex, LSL #1]" or "ST3H {Zdata0.H - Zdata2.H}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<ushort> mask, ushort* base, (Vector<ushort> data1, Vector<ushort> data2, Vector<ushort> data3));
/// void svst3[_u32](svbool_t pg, uint32_t *base, svuint32x3_t data) : "ST3W {Zdata0.S - Zdata2.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST3W {Zdata0.S - Zdata2.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<uint> mask, uint* base, (Vector<uint> data1, Vector<uint> data2, Vector<uint> data3));
/// void svst3[_u64](svbool_t pg, uint64_t *base, svuint64x3_t data) : "ST3D {Zdata0.D - Zdata2.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST3D {Zdata0.D - Zdata2.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex3(Vector<ulong> mask, ulong* base, (Vector<ulong> data1, Vector<ulong> data2, Vector<ulong> data3));
/// Storex4 : Store four vectors into four-element tuples
/// void svst4[_f32](svbool_t pg, float32_t *base, svfloat32x4_t data) : "ST4W {Zdata0.S - Zdata3.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST4W {Zdata0.S - Zdata3.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<float> mask, float* base, (Vector<float> data1, Vector<float> data2, Vector<float> data3, Vector<float> data4));
/// void svst4[_f64](svbool_t pg, float64_t *base, svfloat64x4_t data) : "ST4D {Zdata0.D - Zdata3.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST4D {Zdata0.D - Zdata3.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<double> mask, double* base, (Vector<double> data1, Vector<double> data2, Vector<double> data3, Vector<double> data4));
/// void svst4[_s8](svbool_t pg, int8_t *base, svint8x4_t data) : "ST4B {Zdata0.B - Zdata3.B}, Pg, [Xarray, Xindex]" or "ST4B {Zdata0.B - Zdata3.B}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<sbyte> mask, sbyte* base, (Vector<sbyte> data1, Vector<sbyte> data2, Vector<sbyte> data3, Vector<sbyte> data4));
/// void svst4[_s16](svbool_t pg, int16_t *base, svint16x4_t data) : "ST4H {Zdata0.H - Zdata3.H}, Pg, [Xarray, Xindex, LSL #1]" or "ST4H {Zdata0.H - Zdata3.H}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<short> mask, short* base, (Vector<short> data1, Vector<short> data2, Vector<short> data3, Vector<short> data4));
/// void svst4[_s32](svbool_t pg, int32_t *base, svint32x4_t data) : "ST4W {Zdata0.S - Zdata3.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST4W {Zdata0.S - Zdata3.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<int> mask, int* base, (Vector<int> data1, Vector<int> data2, Vector<int> data3, Vector<int> data4));
/// void svst4[_s64](svbool_t pg, int64_t *base, svint64x4_t data) : "ST4D {Zdata0.D - Zdata3.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST4D {Zdata0.D - Zdata3.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<long> mask, long* base, (Vector<long> data1, Vector<long> data2, Vector<long> data3, Vector<long> data4));
/// void svst4[_u8](svbool_t pg, uint8_t *base, svuint8x4_t data) : "ST4B {Zdata0.B - Zdata3.B}, Pg, [Xarray, Xindex]" or "ST4B {Zdata0.B - Zdata3.B}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<byte> mask, byte* base, (Vector<byte> data1, Vector<byte> data2, Vector<byte> data3, Vector<byte> data4));
/// void svst4[_u16](svbool_t pg, uint16_t *base, svuint16x4_t data) : "ST4H {Zdata0.H - Zdata3.H}, Pg, [Xarray, Xindex, LSL #1]" or "ST4H {Zdata0.H - Zdata3.H}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<ushort> mask, ushort* base, (Vector<ushort> data1, Vector<ushort> data2, Vector<ushort> data3, Vector<ushort> data4));
/// void svst4[_u32](svbool_t pg, uint32_t *base, svuint32x4_t data) : "ST4W {Zdata0.S - Zdata3.S}, Pg, [Xarray, Xindex, LSL #2]" or "ST4W {Zdata0.S - Zdata3.S}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<uint> mask, uint* base, (Vector<uint> data1, Vector<uint> data2, Vector<uint> data3, Vector<uint> data4));
/// void svst4[_u64](svbool_t pg, uint64_t *base, svuint64x4_t data) : "ST4D {Zdata0.D - Zdata3.D}, Pg, [Xarray, Xindex, LSL #3]" or "ST4D {Zdata0.D - Zdata3.D}, Pg, [Xbase, #0, MUL VL]"
public static unsafe void Storex4(Vector<ulong> mask, ulong* base, (Vector<ulong> data1, Vector<ulong> data2, Vector<ulong> data3, Vector<ulong> data4));
/// total method signatures: 62
/// total method names: 17
} |
/// Rejected:
/// public static unsafe void Store(Vector<float> mask, float* base, long vnum, Vector<float> data); // svst1_vnum[_f32]
/// public static unsafe void Store(Vector<double> mask, double* base, long vnum, Vector<double> data); // svst1_vnum[_f64]
/// public static unsafe void Store(Vector<sbyte> mask, sbyte* base, long vnum, Vector<sbyte> data); // svst1_vnum[_s8]
/// public static unsafe void Store(Vector<short> mask, short* base, long vnum, Vector<short> data); // svst1_vnum[_s16]
/// public static unsafe void Store(Vector<int> mask, int* base, long vnum, Vector<int> data); // svst1_vnum[_s32]
/// public static unsafe void Store(Vector<long> mask, long* base, long vnum, Vector<long> data); // svst1_vnum[_s64]
/// public static unsafe void Store(Vector<byte> mask, byte* base, long vnum, Vector<byte> data); // svst1_vnum[_u8]
/// public static unsafe void Store(Vector<ushort> mask, ushort* base, long vnum, Vector<ushort> data); // svst1_vnum[_u16]
/// public static unsafe void Store(Vector<uint> mask, uint* base, long vnum, Vector<uint> data); // svst1_vnum[_u32]
/// public static unsafe void Store(Vector<ulong> mask, ulong* base, long vnum, Vector<ulong> data); // svst1_vnum[_u64]
/// public static unsafe void StoreInt16NarrowToSByte(Vector<short> mask, sbyte* base, long vnum, Vector<short> data); // svst1b_vnum[_s16]
/// public static unsafe void StoreInt32NarrowToInt16(Vector<int> mask, short* base, long vnum, Vector<int> data); // svst1h_vnum[_s32]
/// public static unsafe void StoreInt32NarrowToSByte(Vector<int> mask, sbyte* base, long vnum, Vector<int> data); // svst1b_vnum[_s32]
/// public static unsafe void StoreInt64NarrowToInt16(Vector<long> mask, short* base, long vnum, Vector<long> data); // svst1h_vnum[_s64]
/// public static unsafe void StoreInt64NarrowToInt32(Vector<long> mask, int* base, long vnum, Vector<long> data); // svst1w_vnum[_s64]
/// public static unsafe void StoreInt64NarrowToSByte(Vector<long> mask, sbyte* base, long vnum, Vector<long> data); // svst1b_vnum[_s64]
/// public static unsafe void StoreNonTemporal(Vector<float> mask, float* base, long vnum, Vector<float> data); // svstnt1_vnum[_f32]
/// public static unsafe void StoreNonTemporal(Vector<double> mask, double* base, long vnum, Vector<double> data); // svstnt1_vnum[_f64]
/// public static unsafe void StoreNonTemporal(Vector<sbyte> mask, sbyte* base, long vnum, Vector<sbyte> data); // svstnt1_vnum[_s8]
/// public static unsafe void StoreNonTemporal(Vector<short> mask, short* base, long vnum, Vector<short> data); // svstnt1_vnum[_s16]
/// public static unsafe void StoreNonTemporal(Vector<int> mask, int* base, long vnum, Vector<int> data); // svstnt1_vnum[_s32]
/// public static unsafe void StoreNonTemporal(Vector<long> mask, long* base, long vnum, Vector<long> data); // svstnt1_vnum[_s64]
/// public static unsafe void StoreNonTemporal(Vector<byte> mask, byte* base, long vnum, Vector<byte> data); // svstnt1_vnum[_u8]
/// public static unsafe void StoreNonTemporal(Vector<ushort> mask, ushort* base, long vnum, Vector<ushort> data); // svstnt1_vnum[_u16]
/// public static unsafe void StoreNonTemporal(Vector<uint> mask, uint* base, long vnum, Vector<uint> data); // svstnt1_vnum[_u32]
/// public static unsafe void StoreNonTemporal(Vector<ulong> mask, ulong* base, long vnum, Vector<ulong> data); // svstnt1_vnum[_u64]
/// public static unsafe void StoreUInt16NarrowToByte(Vector<ushort> mask, byte* base, long vnum, Vector<ushort> data); // svst1b_vnum[_u16]
/// public static unsafe void StoreUInt32NarrowToByte(Vector<uint> mask, byte* base, long vnum, Vector<uint> data); // svst1b_vnum[_u32]
/// public static unsafe void StoreUInt32NarrowToUInt16(Vector<uint> mask, ushort* base, long vnum, Vector<uint> data); // svst1h_vnum[_u32]
/// public static unsafe void StoreUInt64NarrowToByte(Vector<ulong> mask, byte* base, long vnum, Vector<ulong> data); // svst1b_vnum[_u64]
/// public static unsafe void StoreUInt64NarrowToUInt16(Vector<ulong> mask, ushort* base, long vnum, Vector<ulong> data); // svst1h_vnum[_u64]
/// public static unsafe void StoreUInt64NarrowToUInt32(Vector<ulong> mask, uint* base, long vnum, Vector<ulong> data); // svst1w_vnum[_u64]
/// public static unsafe void Storex2(Vector<float> mask, float* base, long vnum, (Vector<float> data1, Vector<float> data2)); // svst2_vnum[_f32]
/// public static unsafe void Storex2(Vector<double> mask, double* base, long vnum, (Vector<double> data1, Vector<double> data2)); // svst2_vnum[_f64]
/// public static unsafe void Storex2(Vector<sbyte> mask, sbyte* base, long vnum, (Vector<sbyte> data1, Vector<sbyte> data2)); // svst2_vnum[_s8]
/// public static unsafe void Storex2(Vector<short> mask, short* base, long vnum, (Vector<short> data1, Vector<short> data2)); // svst2_vnum[_s16]
/// public static unsafe void Storex2(Vector<int> mask, int* base, long vnum, (Vector<int> data1, Vector<int> data2)); // svst2_vnum[_s32]
/// public static unsafe void Storex2(Vector<long> mask, long* base, long vnum, (Vector<long> data1, Vector<long> data2)); // svst2_vnum[_s64]
/// public static unsafe void Storex2(Vector<byte> mask, byte* base, long vnum, (Vector<byte> data1, Vector<byte> data2)); // svst2_vnum[_u8]
/// public static unsafe void Storex2(Vector<ushort> mask, ushort* base, long vnum, (Vector<ushort> data1, Vector<ushort> data2)); // svst2_vnum[_u16]
/// public static unsafe void Storex2(Vector<uint> mask, uint* base, long vnum, (Vector<uint> data1, Vector<uint> data2)); // svst2_vnum[_u32]
/// public static unsafe void Storex2(Vector<ulong> mask, ulong* base, long vnum, (Vector<ulong> data1, Vector<ulong> data2)); // svst2_vnum[_u64]
/// public static unsafe void Storex3(Vector<float> mask, float* base, long vnum, (Vector<float> data1, Vector<float> data2, Vector<float> data3)); // svst3_vnum[_f32]
/// public static unsafe void Storex3(Vector<double> mask, double* base, long vnum, (Vector<double> data1, Vector<double> data2, Vector<double> data3)); // svst3_vnum[_f64]
/// public static unsafe void Storex3(Vector<sbyte> mask, sbyte* base, long vnum, (Vector<sbyte> data1, Vector<sbyte> data2, Vector<sbyte> data3)); // svst3_vnum[_s8]
/// public static unsafe void Storex3(Vector<short> mask, short* base, long vnum, (Vector<short> data1, Vector<short> data2, Vector<short> data3)); // svst3_vnum[_s16]
/// public static unsafe void Storex3(Vector<int> mask, int* base, long vnum, (Vector<int> data1, Vector<int> data2, Vector<int> data3)); // svst3_vnum[_s32]
/// public static unsafe void Storex3(Vector<long> mask, long* base, long vnum, (Vector<long> data1, Vector<long> data2, Vector<long> data3)); // svst3_vnum[_s64]
/// public static unsafe void Storex3(Vector<byte> mask, byte* base, long vnum, (Vector<byte> data1, Vector<byte> data2, Vector<byte> data3)); // svst3_vnum[_u8]
/// public static unsafe void Storex3(Vector<ushort> mask, ushort* base, long vnum, (Vector<ushort> data1, Vector<ushort> data2, Vector<ushort> data3)); // svst3_vnum[_u16]
/// public static unsafe void Storex3(Vector<uint> mask, uint* base, long vnum, (Vector<uint> data1, Vector<uint> data2, Vector<uint> data3)); // svst3_vnum[_u32]
/// public static unsafe void Storex3(Vector<ulong> mask, ulong* base, long vnum, (Vector<ulong> data1, Vector<ulong> data2, Vector<ulong> data3)); // svst3_vnum[_u64]
/// public static unsafe void Storex4(Vector<float> mask, float* base, long vnum, (Vector<float> data1, Vector<float> data2, Vector<float> data3, Vector<float> data4)); // svst4_vnum[_f32]
/// public static unsafe void Storex4(Vector<double> mask, double* base, long vnum, (Vector<double> data1, Vector<double> data2, Vector<double> data3, Vector<double> data4)); // svst4_vnum[_f64]
/// public static unsafe void Storex4(Vector<sbyte> mask, sbyte* base, long vnum, (Vector<sbyte> data1, Vector<sbyte> data2, Vector<sbyte> data3, Vector<sbyte> data4)); // svst4_vnum[_s8]
/// public static unsafe void Storex4(Vector<short> mask, short* base, long vnum, (Vector<short> data1, Vector<short> data2, Vector<short> data3, Vector<short> data4)); // svst4_vnum[_s16]
/// public static unsafe void Storex4(Vector<int> mask, int* base, long vnum, (Vector<int> data1, Vector<int> data2, Vector<int> data3, Vector<int> data4)); // svst4_vnum[_s32]
/// public static unsafe void Storex4(Vector<long> mask, long* base, long vnum, (Vector<long> data1, Vector<long> data2, Vector<long> data3, Vector<long> data4)); // svst4_vnum[_s64]
/// public static unsafe void Storex4(Vector<byte> mask, byte* base, long vnum, (Vector<byte> data1, Vector<byte> data2, Vector<byte> data3, Vector<byte> data4)); // svst4_vnum[_u8]
/// public static unsafe void Storex4(Vector<ushort> mask, ushort* base, long vnum, (Vector<ushort> data1, Vector<ushort> data2, Vector<ushort> data3, Vector<ushort> data4)); // svst4_vnum[_u16]
/// public static unsafe void Storex4(Vector<uint> mask, uint* base, long vnum, (Vector<uint> data1, Vector<uint> data2, Vector<uint> data3, Vector<uint> data4)); // svst4_vnum[_u32]
/// public static unsafe void Storex4(Vector<ulong> mask, ulong* base, long vnum, (Vector<ulong> data1, Vector<ulong> data2, Vector<ulong> data3, Vector<ulong> data4)); // svst4_vnum[_u64]
/// Total Rejected: 62
/// Total ACLE covered across API: 124 |
This contributes to #93095 It covers instructions in FEAT_SVE related to stores. Note there are more store methods in scatter stores This list was auto generated from the C ACLE for SVE, and is in three parts: The methods list reduced down to Vector versions. All possible varaints of T are given above the method. Many of the C functions include predicate argument(s), of type svbool_t as the first argument. These are missing from the C# method. It is expected that the Jit will create predicates where required, or combine with uses of conditionalSelect(). For more discussion see #88140 comment. |
Unlike the However, |
This issue has been marked |
Updated to As far as I can tell, Also, I feel that the type at the end (the destination type) isn't clear as to whether the destination type is signed or not. |
That seems fine. We only need to differentiate by behavior or return type. If the behavior is identical and the delimiter would only be the input type (which can simply be an overload), then dropping the delimiter is fine.
We'd use |
All stores are predicated ("Inactive elements are not written to memory"). How did you see this working with the use of conditionalSelect? |
Predicated stores/loads likely need an explicit overload that takes a mask since the operation happens to memory and the There are potentially patterns that could be recognized, given the memory ordering rules. But exposing a couple extra overloads for this important concept should be fine. Something like Regular |
That would be fine. Warning though, it's going to bloat the API. Instead of writing all of them out, I might just add a note to the top of the API block. |
This is because every I wonder if we could compensate for this by avoiding the
Most notably, we could just expose We could then expose On another note, I see these APIs explicitly take The way most of the ISAs are exposed today we have: public abstract class IsaName
{
// only true if hardware supports IsaName
public static bool IsSupported { get; }
// Methods supported by both Arm32 and Arm64
public static void MethodA();
public abstract class Arm32
{
// only true if hardware supports IsaName and we're 32-bit
public static bool IsSupported { get; }
// Methods supported by only Arm32
public static void MethodB();
}
public abstract class Arm64
{
// only true if hardware supports IsaName and we're 64-bit
public static bool IsSupported { get; }
// Methods supported by only Arm64
public static void MethodC();
}
} If there is zero chance of SVE ever being supported on Arm32, then we could potentially consider an exception to this normal design layout. But if that (or some other size) might be a consideration in the future, we may want to slightly tweak it to take |
Yup. I just totalled what we had already.
That should work. I'm assuming the pattern recognition is generic enough so that it can be used mostly as is without much refactoring? Alternatively, the user can use a generic Happy with either.
SVE is Arm64 only. It would take far too much silicon/power to be viable for 32bit. Closest there is for Arm32 is MVE aka Helium on M class. |
That sounds reasonable to me as well. They already have a "convenience" API in the form of
👍. We'll probably end up discussing this a bit in API review and whether we want to be "consistent" and have it only under |
Updated:
|
Also updated the scatter,load,gather,firstfault APIs in the same way. |
For this API and all the other SVE1 APIs that haven't been reviewed yet I've updated the API proposals. These updates take into account the changes made in the other APIs. My updated scripts have automatically applied to entires in these APIs. Eg: Any addressing modes with a |
namespace System.Runtime.Intrinsics.Arm;
/// VectorT Summary
public abstract partial class Sve : AdvSimd /// Feature: FEAT_SVE Category: stores
{
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void Store(Vector<T> mask, T* address, Vector<T> data); // ST1W or ST1D or ST1B or ST1H
/// T: [short, sbyte], [int, short], [int, sbyte], [long, short], [long, int], [long, sbyte]
/// T: [ushort, byte], [uint, ushort], [uint, byte], [ulong, ushort], [ulong, uint], [ulong, byte]
public static unsafe void StoreNarrowing(Vector<T> mask, T2* address, Vector<T> data); // ST1B or ST1H or ST1W
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void StoreNonTemporal(Vector<T> mask, T* address, Vector<T> data); // STNT1W or STNT1D or STNT1B or STNT1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void Store(Vector<T> mask, T* address, (Vector<T> Value1, Vector<T> Value2) data); // ST2W or ST2D or ST2B or ST2H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void Store(Vector<T> mask, T* address, (Vector<T> Value1, Vector<T> Value2, Vector<T> Value3) data); // ST3W or ST3D or ST3B or ST3H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe void Store(Vector<T> mask, T* address, (Vector<T> Value1, Vector<T> Value2, Vector<T> Value3, Vector<T> Value4) data); // ST4W or ST4D or ST4B or ST4H
/// total method signatures: 17
} |
That means for the one arg version, eg:
This will only store the first 32bits from the input vector. How does the user store a full vector to memory (using Suggestion:
|
Ignore the previous comment, it was wrong. |
The text was updated successfully, but these errors were encountered: