Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#5036 A64 scatter/gather, part 11: First-fault loads #6776

Merged
merged 1 commit into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,34 @@ Hello, world!
Basic counts tool results:
Total counts:
#ifdef __ARM_FEATURE_SVE2
772 total \(fetched\) instructions
286 total unique \(fetched\) instructions
997 total \(fetched\) instructions
361 total unique \(fetched\) instructions
#else
733 total \(fetched\) instructions
271 total unique \(fetched\) instructions
958 total \(fetched\) instructions
346 total unique \(fetched\) instructions
#endif
0 total non-fetched instructions
0 total prefetches
#ifdef __ARM_FEATURE_SVE2
#if (__ARM_FEATURE_SVE_BITS == 128)
1248 total data loads
1547 total data loads
873 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2234 total data loads
2757 total data loads
1615 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4206 total data loads
5177 total data loads
3099 total data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#else
#if (__ARM_FEATURE_SVE_BITS == 128)
1227 total data loads
1526 total data loads
861 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2199 total data loads
2722 total data loads
1595 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4143 total data loads
5114 total data loads
3063 total data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#endif /* __ARM_FEATURE_SVE2 */
Expand All @@ -41,34 +41,34 @@ Total counts:
.*
Thread .* counts:
#ifdef __ARM_FEATURE_SVE2
772 \(fetched\) instructions
286 unique \(fetched\) instructions
997 \(fetched\) instructions
361 unique \(fetched\) instructions
#else
733 \(fetched\) instructions
271 unique \(fetched\) instructions
958 \(fetched\) instructions
346 unique \(fetched\) instructions
#endif
0 non-fetched instructions
0 prefetches
#ifdef __ARM_FEATURE_SVE2
#if (__ARM_FEATURE_SVE_BITS == 128)
1248 data loads
1547 data loads
873 data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2234 data loads
2757 data loads
1615 data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4206 data loads
5177 data loads
3099 data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#else
#if (__ARM_FEATURE_SVE_BITS == 128)
1227 data loads
1526 data loads
861 data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2199 data loads
2722 data loads
1595 data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4143 data loads
5114 data loads
3063 data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#endif /* __ARM_FEATURE_SVE2 */
Expand Down
149 changes: 120 additions & 29 deletions clients/drcachesim/tests/allasm_scattergather_aarch64.asm

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@ Hello, world!
Basic counts tool results:
Total counts:
#ifdef __ARM_FEATURE_SVE2
772 total \(fetched\) instructions
286 total unique \(fetched\) instructions
997 total \(fetched\) instructions
361 total unique \(fetched\) instructions
#else
733 total \(fetched\) instructions
271 total unique \(fetched\) instructions
958 total \(fetched\) instructions
346 total unique \(fetched\) instructions
#endif
0 total non-fetched instructions
0 total prefetches
#ifdef __ARM_FEATURE_SVE2
#if (__ARM_FEATURE_SVE_BITS == 128)
1248 total data loads
1547 total data loads
873 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2234 total data loads
2757 total data loads
1615 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4206 total data loads
5177 total data loads
3099 total data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#else
#if (__ARM_FEATURE_SVE_BITS == 128)
1227 total data loads
1526 total data loads
861 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2199 total data loads
2722 total data loads
1595 total data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4143 total data loads
5114 total data loads
3063 total data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#endif /* __ARM_FEATURE_SVE2 */
Expand All @@ -40,35 +40,35 @@ Total counts:
.*
Thread .* counts:
#ifdef __ARM_FEATURE_SVE2
772 \(fetched\) instructions
286 unique \(fetched\) instructions
997 \(fetched\) instructions
361 unique \(fetched\) instructions
#else
733 \(fetched\) instructions
271 unique \(fetched\) instructions
958 \(fetched\) instructions
346 unique \(fetched\) instructions
#endif

0 non-fetched instructions
0 prefetches
#ifdef __ARM_FEATURE_SVE2
#if (__ARM_FEATURE_SVE_BITS == 128)
1248 data loads
1547 data loads
873 data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2223 data loads
2757 data loads
1615 data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4206 data loads
5177 data loads
3099 data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#else
#if (__ARM_FEATURE_SVE_BITS == 128)
1227 data loads
1526 data loads
861 data stores
#elif (__ARM_FEATURE_SVE_BITS == 256)
2199 data loads
2722 data loads
1595 data stores
#elif (__ARM_FEATURE_SVE_BITS == 512)
4143 data loads
5114 data loads
3063 data stores
#endif /* __ARM_FEATURE_SVE_BITS */
#endif /* __ARM_FEATURE_SVE2 */
Expand Down
90 changes: 90 additions & 0 deletions clients/drcachesim/tests/scattergather-aarch64.templatex
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,65 @@ ld1d scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ld1d scalar\+vector 64bit scaled offset: PASS
ld1d scalar\+vector 64bit unscaled offset: PASS
ld1d scalar\+vector 64bit unscaled offset Zt==Zm: PASS
ldff1b scalar\+vector 32bit unscaled offset uxtw: PASS
ldff1b scalar\+vector 32bit unscaled offset sxtw: PASS
ldff1b scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1b scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1b scalar\+vector 64bit unscaled offset: PASS
ldff1b scalar\+vector 64bit unscaled offset Zt==Zm: PASS
ldff1sb scalar\+vector 32bit unscaled offset uxtw: PASS
ldff1sb scalar\+vector 32bit unscaled offset sxtw: PASS
ldff1sb scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1sb scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1sb scalar\+vector 64bit unscaled offset: PASS
ldff1sb scalar\+vector 64bit unscaled offset: PASS
ldff1h scalar\+vector 32bit scaled offset uxtw: PASS
ldff1h scalar\+vector 32bit scaled offset sxtw: PASS
ldff1h scalar\+vector 32bit unpacked scaled offset uxtw: PASS
ldff1h scalar\+vector 32bit unpacked scaled offset sxtw: PASS
ldff1h scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1h scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1h scalar\+vector 32bit unscaled offset uxtw: PASS
ldff1h scalar\+vector 32bit unscaled offset sxtw: PASS
ldff1h scalar\+vector 64bit scaled offset: PASS
ldff1h scalar\+vector 64bit unscaled offset: PASS
ldff1h scalar\+vector 64bit unscaled offset Zt==Zm: PASS
ldff1sh scalar\+vector 32bit scaled offset uxtw: PASS
ldff1sh scalar\+vector 32bit scaled offset sxtw: PASS
ldff1sh scalar\+vector 32bit unpacked scaled offset uxtw: PASS
ldff1sh scalar\+vector 32bit unpacked scaled offset sxtw: PASS
ldff1sh scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1sh scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1sh scalar\+vector 32bit unscaled offset uxtw: PASS
ldff1sh scalar\+vector 32bit unscaled offset sxtw: PASS
ldff1sh scalar\+vector 64bit scaled offset: PASS
ldff1sh scalar\+vector 64bit unscaled offset: PASS
ldff1sh scalar\+vector 64bit unscaled offset Zt==Zm: PASS
ldff1w scalar\+vector 32bit scaled offset uxtw: PASS
ldff1w scalar\+vector 32bit scaled offset sxtw: PASS
ldff1w scalar\+vector 32bit unpacked scaled offset uxtw: PASS
ldff1w scalar\+vector 32bit unpacked scaled offset sxtw: PASS
ldff1w scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1w scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1w scalar\+vector 32bit unscaled offset uxtw: PASS
ldff1w scalar\+vector 32bit unscaled offset sxtw: PASS
ldff1w scalar\+vector 64bit scaled offset: PASS
ldff1w scalar\+vector 64bit unscaled offset: PASS
ldff1w scalar\+vector 64bit unscaled offset Zt==Zm: PASS
ldff1sw scalar\+vector 32bit unpacked scaled offset uxtw: PASS
ldff1sw scalar\+vector 32bit unpacked scaled offset sxtw: PASS
ldff1sw scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1sw scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1sw scalar\+vector 64bit scaled offset: PASS
ldff1sw scalar\+vector 64bit unscaled offset: PASS
ldff1sw scalar\+vector 64bit unscaled offset Zt==Zm: PASS
ldff1d scalar\+vector 32bit unpacked scaled offset uxtw: PASS
ldff1d scalar\+vector 32bit unpacked scaled offset sxtw: PASS
ldff1d scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
ldff1d scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
ldff1d scalar\+vector 64bit scaled offset: PASS
ldff1d scalar\+vector 64bit unscaled offset: PASS
ldff1d scalar\+vector 64bit unscaled offset Zt==Zm: PASS
st1b scalar\+vector 32bit unpacked unscaled offset uxtw: PASS
st1b scalar\+vector 32bit unpacked unscaled offset sxtw: PASS
st1b scalar\+vector 32bit unscaled offset uxtw: PASS
Expand Down Expand Up @@ -112,6 +171,21 @@ ld1sw vector\+immediate 64bit element \(max index\): PASS
ld1d vector\+immediate 64bit element: PASS
ld1d vector\+immediate 64bit element \(max index\): PASS
ld1d vector\+immediate 64bit element Zt==Zn: PASS
ldff1b vector\+immediate 64bit element: PASS
ldff1b vector\+immediate 64bit element \(max index\): PASS
ldff1sb vector\+immediate 64bit element: PASS
ldff1sb vector\+immediate 64bit element \(max index\): PASS
ldff1h vector\+immediate 64bit element: PASS
ldff1h vector\+immediate 64bit element \(max index\): PASS
ldff1sh vector\+immediate 64bit element: PASS
ldff1sh vector\+immediate 64bit element \(max index\): PASS
ldff1w vector\+immediate 64bit element: PASS
ldff1w vector\+immediate 64bit element \(max index\): PASS
ldff1sw vector\+immediate 64bit element: PASS
ldff1sw vector\+immediate 64bit element \(max index\): PASS
ldff1d vector\+immediate 64bit element: PASS
ldff1d vector\+immediate 64bit element \(max index\): PASS
ldff1d vector\+immediate 64bit element Zt==Zn: PASS
st1b vector\+immediate 64bit element: PASS
st1b vector\+immediate 64bit element \(max index\): PASS
st1b vector\+immediate 64bit element \(repeated base\): PASS
Expand Down Expand Up @@ -148,6 +222,22 @@ ld1rqb scalar\+scalar: PASS
ld1rqh scalar\+scalar: PASS
ld1rqw scalar\+scalar: PASS
ld1rqd scalar\+scalar: PASS
ldff1b scalar\+scalar 8bit element: PASS
ldff1b scalar\+scalar 16bit element: PASS
ldff1b scalar\+scalar 32bit element: PASS
ldff1b scalar\+scalar 64bit element: PASS
ldff1sb scalar\+scalar 16bit element: PASS
ldff1sb scalar\+scalar 32bit element: PASS
ldff1sb scalar\+scalar 64bit element: PASS
ldff1h scalar\+scalar 16bit element: PASS
ldff1h scalar\+scalar 32bit element: PASS
ldff1h scalar\+scalar 64bit element: PASS
ldff1sh scalar\+scalar 32bit element: PASS
ldff1sh scalar\+scalar 64bit element: PASS
ldff1w scalar\+scalar 32bit element: PASS
ldff1w scalar\+scalar 64bit element: PASS
ldff1sw scalar\+scalar: PASS
ldff1d scalar\+scalar: PASS
ld2b scalar\+scalar: PASS
ld2h scalar\+scalar: PASS
ld2w scalar\+scalar: PASS
Expand Down
1 change: 0 additions & 1 deletion ext/drx/drx.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
#endif

#if defined(X86) || defined(AARCH64)
/* TODO i#5036: Complete AArch64 support. */
# define PLATFORM_SUPPORTS_SCATTER_GATHER
#endif

Expand Down
Loading
Loading