Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 1341b75

Browse files
committedAug 9, 2023
Document movnt needs sfence
For every intrinsic that may generate any of the MOVNT family of instructions, specify it must be followed by `_mm_sfence`. Also, ask people to not think too hard about what actually happens with write-combining memory buffers. They probably don't want to know, and in terms of the Rust abstract machine, we aren't actually entirely sure yet.
1 parent 195e56f commit 1341b75

File tree

5 files changed

+133
-0
lines changed

5 files changed

+133
-0
lines changed
 

‎crates/core_arch/src/x86/avx.rs

+33
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,17 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
16831683
/// aligned memory location. To minimize caching, the data is flagged as
16841684
/// non-temporal (unlikely to be used again soon)
16851685
///
1686+
/// # Safety
1687+
///
1688+
/// After using this intrinsic, but before any atomic operations occur, a call
1689+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
1690+
/// usage of this intrinsic must always end in `_mm_sfence()`.
1691+
///
1692+
/// Reading and writing to the memory stored-to by any other means, after any
1693+
/// nontemporal store has been used to write to that memory, but before the
1694+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
1695+
/// stalls and yet-unspecified program behavior.
1696+
///
16861697
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
16871698
#[inline]
16881699
#[target_feature(enable = "avx")]
@@ -1696,6 +1707,17 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
16961707
/// to a 32-byte aligned memory location. To minimize caching, the data is
16971708
/// flagged as non-temporal (unlikely to be used again soon).
16981709
///
1710+
/// # Safety
1711+
///
1712+
/// After using this intrinsic, but before any atomic operations occur, a call
1713+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
1714+
/// usage of this intrinsic must always end in `_mm_sfence()`.
1715+
///
1716+
/// Reading and writing to the memory stored-to by any other means, after any
1717+
/// nontemporal store has been used to write to that memory, but before the
1718+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
1719+
/// stalls and yet-unspecified program behavior.
1720+
///
16991721
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
17001722
#[inline]
17011723
#[target_feature(enable = "avx")]
@@ -1711,6 +1733,17 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
17111733
/// caching, the data is flagged as non-temporal (unlikely to be used again
17121734
/// soon).
17131735
///
1736+
/// # Safety
1737+
///
1738+
/// After using this intrinsic, but before any atomic operations occur, a call
1739+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
1740+
/// usage of this intrinsic must always end in `_mm_sfence()`.
1741+
///
1742+
/// Reading and writing to the memory stored-to by any other means, after any
1743+
/// nontemporal store has been used to write to that memory, but before the
1744+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
1745+
/// stalls and yet-unspecified program behavior.
1746+
///
17141747
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
17151748
#[inline]
17161749
#[target_feature(enable = "avx")]

‎crates/core_arch/src/x86/avx512f.rs

+33
Original file line numberDiff line numberDiff line change
@@ -26144,6 +26144,17 @@ pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) ->
2614426144

2614526145
/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
2614626146
///
26147+
/// # Safety
26148+
///
26149+
/// After using this intrinsic, but before any atomic operations occur, a call
26150+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
26151+
/// usage of this intrinsic must always end in `_mm_sfence()`.
26152+
///
26153+
/// Reading and writing to the memory stored-to by any other means, after any
26154+
/// nontemporal store has been used to write to that memory, but before the
26155+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
26156+
/// stalls and yet-unspecified program behavior.
26157+
///
2614726158
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
2614826159
#[inline]
2614926160
#[target_feature(enable = "avx512f")]
@@ -26155,6 +26166,17 @@ pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
2615526166

2615626167
/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
2615726168
///
26169+
/// # Safety
26170+
///
26171+
/// After using this intrinsic, but before any atomic operations occur, a call
26172+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
26173+
/// usage of this intrinsic must always end in `_mm_sfence()`.
26174+
///
26175+
/// Reading and writing to the memory stored-to by any other means, after any
26176+
/// nontemporal store has been used to write to that memory, but before the
26177+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
26178+
/// stalls and yet-unspecified program behavior.
26179+
///
2615826180
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
2615926181
#[inline]
2616026182
#[target_feature(enable = "avx512f")]
@@ -26166,6 +26188,17 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
2616626188

2616726189
/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
2616826190
///
26191+
/// # Safety
26192+
///
26193+
/// After using this intrinsic, but before any atomic operations occur, a call
26194+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
26195+
/// usage of this intrinsic must always end in `_mm_sfence()`.
26196+
///
26197+
/// Reading and writing to the memory stored-to by any other means, after any
26198+
/// nontemporal store has been used to write to that memory, but before the
26199+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
26200+
/// stalls and yet-unspecified program behavior.
26201+
///
2616926202
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
2617026203
#[inline]
2617126204
#[target_feature(enable = "avx512f")]

‎crates/core_arch/src/x86/sse2.rs

+33
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,17 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
12761276
/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
12771277
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
12781278
/// used again soon).
1279+
///
1280+
/// # Safety
1281+
///
1282+
/// After using this intrinsic, but before any atomic operations occur, a call
1283+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
1284+
/// usage of this intrinsic must always end in `_mm_sfence()`.
1285+
///
1286+
/// Reading and writing to the memory stored-to by any other means, after any
1287+
/// nontemporal store has been used to write to that memory, but before the
1288+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
1289+
/// stalls and yet-unspecified program behavior.
12791290
///
12801291
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
12811292
#[inline]
@@ -1289,6 +1300,17 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
12891300
/// Stores a 32-bit integer value in the specified memory location.
12901301
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
12911302
/// used again soon).
1303+
///
1304+
/// # Safety
1305+
///
1306+
/// After using this intrinsic, but before any atomic operations occur, a call
1307+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
1308+
/// usage of this intrinsic must always end in `_mm_sfence()`.
1309+
///
1310+
/// Reading and writing to the memory stored-to by any other means, after any
1311+
/// nontemporal store has been used to write to that memory, but before the
1312+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
1313+
/// stalls and yet-unspecified program behavior.
12921314
///
12931315
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
12941316
#[inline]
@@ -2468,6 +2490,17 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
24682490
/// aligned memory location.
24692491
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
24702492
/// used again soon).
2493+
///
2494+
/// # Safety
2495+
///
2496+
/// After using this intrinsic, but before any atomic operations occur, a call
2497+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
2498+
/// usage of this intrinsic must always end in `_mm_sfence()`.
2499+
///
2500+
/// Reading and writing to the memory stored-to by any other means, after any
2501+
/// nontemporal store has been used to write to that memory, but before the
2502+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
2503+
/// stalls and yet-unspecified program behavior.
24712504
///
24722505
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
24732506
#[inline]

‎crates/core_arch/src/x86/sse4a.rs

+24
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
6262
/// Non-temporal store of `a.0` into `p`.
6363
///
6464
/// Writes 64-bit data to a memory location without polluting the caches.
65+
///
66+
/// # Safety
67+
///
68+
/// After using this intrinsic, but before any atomic operations occur, a call
69+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
70+
/// usage of this intrinsic must always end in `_mm_sfence()`.
71+
///
72+
/// Reading and writing to the memory stored-to by any other means, after any
73+
/// nontemporal store has been used to write to that memory, but before the
74+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
75+
/// stalls and yet-unspecified program behavior.
76+
///
6577
#[inline]
6678
#[target_feature(enable = "sse4a")]
6779
#[cfg_attr(test, assert_instr(movntsd))]
@@ -73,6 +85,18 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
7385
/// Non-temporal store of `a.0` into `p`.
7486
///
7587
/// Writes 32-bit data to a memory location without polluting the caches.
88+
///
89+
/// # Safety
90+
///
91+
/// After using this intrinsic, but before any atomic operations occur, a call
92+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
93+
/// usage of this intrinsic must always end in `_mm_sfence()`.
94+
///
95+
/// Reading and writing to the memory stored-to by any other means, after any
96+
/// nontemporal store has been used to write to that memory, but before the
97+
/// use of `_mm_sfence()`, is discouraged. Such reads can lead to pipeline
98+
/// stalls and yet-unspecified program behavior.
99+
///
76100
#[inline]
77101
#[target_feature(enable = "sse4a")]
78102
#[cfg_attr(test, assert_instr(movntss))]

‎crates/core_arch/src/x86_64/sse2.rs

+10
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
6666
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
6767
/// used again soon).
6868
///
69+
/// # Safety
70+
///
71+
/// After using this intrinsic, but before any atomic operations occur, a call
72+
/// to `_mm_sfence()` must be performed. A safe function that includes unsafe
73+
/// usage of this intrinsic must always end in `_mm_sfence()`.
74+
///
75+
/// Reading and writing to the memory stored-to by any other means, after any
76+
/// nontemporal store has been used to write to that memory, is discouraged.
77+
/// Doing so can lead to pipeline stalls and yet-unspecified program behavior.
78+
///
6979
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si64)
7080
#[inline]
7181
#[target_feature(enable = "sse2")]

0 commit comments

Comments
 (0)