Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64: Implement LoadAndInsertScalar APIs #93197

Merged
merged 48 commits into from
Oct 11, 2023
Merged
Changes from 3 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
785b198
Add APIs for LoadVector*x2
kunalspathak Sep 28, 2023
670c61e
Add implementation for LoadVector*x2
kunalspathak Sep 29, 2023
a396f9b
Add APIs for LoadVector*x3
kunalspathak Sep 29, 2023
da986ba
Add implementation for LoadVector*x3
kunalspathak Sep 29, 2023
ccba48e
Add APIs for LoadVector*x4
kunalspathak Sep 29, 2023
87ec4c0
Add implementation for LoadVector*x4
kunalspathak Sep 29, 2023
6fed5cf
Add test cases for LoadVectorx2, LoadVectorx3, LoadVectorx4
kunalspathak Sep 30, 2023
66d893c
Merge remote-tracking branch 'origin/main' into ld2
kunalspathak Oct 1, 2023
b5dca03
minor rename
kunalspathak Oct 1, 2023
92fb279
REVERT: Add Debug.Assert(false) to make sure test runs
kunalspathak Oct 2, 2023
8c8a186
Retain gtOtherReg rather than making it an array
kunalspathak Oct 2, 2023
9582068
Add APIs for LoadAndReplicateToVector64x* and LoadAndReplicateToVecto…
kunalspathak Oct 2, 2023
2bea97f
Revert "REVERT: Add Debug.Assert(false) to make sure test runs"
kunalspathak Oct 2, 2023
6a0a426
fix the test template
kunalspathak Oct 2, 2023
d1c38bc
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 2, 2023
873df44
Implement LoadAndReplicateToVector* APIs
kunalspathak Oct 2, 2023
7cf45d6
Add test coverage for LoadAndReplicateToVector* APIs
kunalspathak Oct 2, 2023
1e7629f
fix the LoadVectorx4 template
kunalspathak Oct 2, 2023
6929501
address review comment
kunalspathak Oct 2, 2023
ad60b30
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 2, 2023
fb80174
Add APIs for LoadAndInsertScalar()
kunalspathak Oct 2, 2023
6b89465
fix one more error in LoadVectorx4Test.template
kunalspathak Oct 2, 2023
90b1041
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 2, 2023
910a64b
Add APIs for LoadAndInsertScalar()
kunalspathak Oct 3, 2023
8f62949
Fix the API definition
kunalspathak Oct 5, 2023
12a75a2
wip: Implementation
kunalspathak Oct 5, 2023
13b1ecb
feedback by Bruce
kunalspathak Oct 5, 2023
b52a029
Rename the test case name
kunalspathak Oct 5, 2023
b49ebcd
Disable test for mono
kunalspathak Oct 5, 2023
4c78408
Merge branch 'ld2-3-4' into ld2r-ld3r-ld4r
kunalspathak Oct 5, 2023
2c3540e
Fix the errors to make it work
kunalspathak Oct 5, 2023
fd2946c
Merge remote-tracking branch 'origin/main' into loadandreplicate
kunalspathak Oct 5, 2023
6c0da62
fix merge conflicts
kunalspathak Oct 5, 2023
629cf96
fix the typo in test case
kunalspathak Oct 5, 2023
f7c966b
Merge branch 'loadandreplicate' into loadandinsertscalar
kunalspathak Oct 6, 2023
19d4ae3
Merge remote-tracking branch 'origin/main' into loadandinsertscalar
kunalspathak Oct 6, 2023
a0eb7cd
code cleanup
kunalspathak Oct 6, 2023
ba21188
fix the importing of normal LoadAndInsertScalar
kunalspathak Oct 6, 2023
0d8f668
Fix some more importing and lsra
kunalspathak Oct 6, 2023
c2bdb82
fix the lsra issues
kunalspathak Oct 6, 2023
690dc69
Add test for LoadAndInsertScalarx2
kunalspathak Oct 9, 2023
3617009
Add test cases for LoadAndInsertScalarx2 and LoadAndInsertScalarx3
kunalspathak Oct 9, 2023
480fbc3
jit format
kunalspathak Oct 9, 2023
3aadc2f
fix bug
kunalspathak Oct 9, 2023
ce4a5e0
fix test build errors
kunalspathak Oct 9, 2023
20ae72e
fix the test errors
kunalspathak Oct 10, 2023
513c909
fix typos in x3 and x4
kunalspathak Oct 10, 2023
8288942
address feedback from Bruce
kunalspathak Oct 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
@@ -6146,7 +6146,7 @@ struct GenTreeJitIntrinsic : public GenTreeMultiOp
return;
}
#endif
// should only be used to get otherReg
// should only be used to set otherReg
assert(idx == 1);
gtOtherReg = (regNumberSmall)reg;
}
Original file line number Diff line number Diff line change
@@ -145,7 +145,7 @@ namespace JIT.HardwareIntrinsics.Arm
private static readonly int LargestVectorSize = {LargestVectorSize};

private static readonly int OpElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType});
private static readonly int DestElementCount = OpElementCount * 3;
private static readonly int DestElementCount = OpElementCount * 4;

private static {Op1BaseType}[] _data = new {Op1BaseType}[DestElementCount];

@@ -257,7 +257,7 @@ namespace JIT.HardwareIntrinsics.Arm
{Op1BaseType}[] outArray3 = new {Op1BaseType}[OpElementCount];
{Op1BaseType}[] outArray4 = new {Op1BaseType}[OpElementCount];

Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(input), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() * 3);
Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(input), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() * 4);
Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref outArray1[0]), ref Unsafe.AsRef<byte>(result1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref outArray2[0]), ref Unsafe.AsRef<byte>(result2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref outArray3[0]), ref Unsafe.AsRef<byte>(result3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
@@ -274,7 +274,7 @@ namespace JIT.HardwareIntrinsics.Arm
{Op1BaseType}[] outArray3 = new {Op1BaseType}[OpElementCount];
{Op1BaseType}[] outArray4 = new {Op1BaseType}[OpElementCount];

Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(input), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() * 3);
Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(input), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() * 4);
Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref outArray1[0]), result1);
Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref outArray2[0]), result2);
Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref outArray3[0]), result3);