-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Closed
Closed
Copy link
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarch-arm64area-System.Runtime.Intrinsicsarm-sveWork related to arm64 SVE/SVE2 supportWork related to arm64 SVE/SVE2 supportin-prThere is an active PR which will close this issue when it is mergedThere is an active PR which will close this issue when it is merged
Milestone
Description
Background and motivation
LoadVectorNonFaulting()
does not take a mask argument because the load is guaranteed not to fault and therefore:
LoadVectorNonFaulting(mask, addr) === ConditionalSelect(mask, LoadVectorNonFaulting(addr), zero)
However, this is not entirely correct.
If a lane that would have been false is on a faulting memory address then that lane in the FFR
register is cleared and successive loads to that lane with FFR
become unknown/unpredictable.
See the pseudo code at the bottom of LDNF1B
I recommend adding a mask
to all non-faulting SVE APIs.
API Proposal
Using the same T syntax as other SVE proposals.
These are all changes of existing API methods - they all differ from the existing methods due to adding an additional mask
argument.
namespace System.Runtime.Intrinsics.Arm;
public partial class Sve
{
public static unsafe Vector<short> LoadVectorByteNonFaultingZeroExtendToInt16(Vector<short> mask, byte* address); // LDNF1B
public static unsafe Vector<int> LoadVectorByteNonFaultingZeroExtendToInt32(Vector<int> mask, byte* address); // LDNF1B
public static unsafe Vector<long> LoadVectorByteNonFaultingZeroExtendToInt64(Vector<long> mask, byte* address); // LDNF1B
public static unsafe Vector<ushort> LoadVectorByteNonFaultingZeroExtendToUInt16(Vector<ushort> mask, byte* address); // LDNF1B
public static unsafe Vector<uint> LoadVectorByteNonFaultingZeroExtendToUInt32(Vector<uint> mask, byte* address); // LDNF1B
public static unsafe Vector<ulong> LoadVectorByteNonFaultingZeroExtendToUInt64(Vector<ulong> mask, byte* address); // LDNF1B
public static unsafe Vector<int> LoadVectorInt16NonFaultingSignExtendToInt32(Vector<int> mask, short* address); // LDNF1SH
public static unsafe Vector<long> LoadVectorInt16NonFaultingSignExtendToInt64(Vector<long> mask, short* address); // LDNF1SH
public static unsafe Vector<uint> LoadVectorInt16NonFaultingSignExtendToUInt32(Vector<uint> mask, short* address); // LDNF1SH
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingSignExtendToUInt64(Vector<ulong> mask, short* address); // LDNF1SH
public static unsafe Vector<long> LoadVectorInt32NonFaultingSignExtendToInt64(Vector<long> mask, int* address); // LDNF1SW
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingSignExtendToUInt64(Vector<ulong> mask, int* address); // LDNF1SW
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(Vector<T> mask, T* address); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
public static unsafe Vector<short> LoadVectorSByteNonFaultingSignExtendToInt16(Vector<short> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<int> LoadVectorSByteNonFaultingSignExtendToInt32(Vector<int> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<long> LoadVectorSByteNonFaultingSignExtendToInt64(Vector<long> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<ushort> LoadVectorSByteNonFaultingSignExtendToUInt16(Vector<ushort> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<uint> LoadVectorSByteNonFaultingSignExtendToUInt32(Vector<uint> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<ulong> LoadVectorSByteNonFaultingSignExtendToUInt64(Vector<ulong> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<int> LoadVectorUInt16NonFaultingZeroExtendToInt32(Vector<int> mask, ushort* address); // LDNF1H
public static unsafe Vector<long> LoadVectorUInt16NonFaultingZeroExtendToInt64(Vector<long> mask, ushort* address); // LDNF1H
public static unsafe Vector<uint> LoadVectorUInt16NonFaultingZeroExtendToUInt32(Vector<uint> mask, ushort* address); // LDNF1H
public static unsafe Vector<ulong> LoadVectorUInt16NonFaultingZeroExtendToUInt64(Vector<ulong> mask, ushort* address); // LDNF1H
public static unsafe Vector<long> LoadVectorUInt32NonFaultingZeroExtendToInt64(Vector<long> mask, uint* address); // LDNF1W
public static unsafe Vector<ulong> LoadVectorUInt32NonFaultingZeroExtendToUInt64(Vector<ulong> mask, uint* address); // LDNF1W
}
Metadata
Metadata
Assignees
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarch-arm64area-System.Runtime.Intrinsicsarm-sveWork related to arm64 SVE/SVE2 supportWork related to arm64 SVE/SVE2 supportin-prThere is an active PR which will close this issue when it is mergedThere is an active PR which will close this issue when it is merged