-
Notifications
You must be signed in to change notification settings - Fork 215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement LLVM's elementwise unordered atomic memory intrinsics #311
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,10 @@ type c_int = i16; | |
#[cfg(not(target_pointer_width = "16"))] | ||
type c_int = i32; | ||
|
||
use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, unchecked_div}; | ||
use core::mem; | ||
use core::ops::{BitOr, Shl}; | ||
|
||
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] | ||
pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { | ||
let mut i = 0; | ||
|
@@ -58,3 +62,105 @@ pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { | |
} | ||
0 | ||
} | ||
|
||
fn memcpy_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) { | ||
unsafe { | ||
let n = unchecked_div(bytes, mem::size_of::<T>()); | ||
let mut i = 0; | ||
while i < n { | ||
atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); | ||
i += 1; | ||
} | ||
} | ||
} | ||
|
||
fn memmove_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) { | ||
unsafe { | ||
let n = unchecked_div(bytes, mem::size_of::<T>()); | ||
if src < dest as *const T { | ||
// copy from end | ||
let mut i = n; | ||
while i != 0 { | ||
i -= 1; | ||
atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); | ||
} | ||
} else { | ||
// copy from beginning | ||
let mut i = 0; | ||
while i < n { | ||
atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); | ||
i += 1; | ||
} | ||
} | ||
} | ||
} | ||
|
||
fn memset_element_unordered_atomic<T>(s: *mut T, c: u8, bytes: usize) | ||
where | ||
T: Copy + From<u8> + Shl<u32, Output = T> + BitOr<T, Output = T>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks odd, seems worth adding a comment explaining how exactly the value What is the reason this does not match the normal There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The difference is that we must atomically store an entire Fortunately since this is not exported we don't need to worry too much about someone passing something gratuitously weird. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the spec says that e.g. the setting happens in 4-byte-atomic chunks? LLVM is allowed to transform adjacent subsequent atomic operations into a single one. Basically what you are doing here is realizing that optimization by hand.
Fair. Could you add a comment explaining that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, writing each complete
Right, but we need to guarantee this happens for correctness, so relying on an optimization wouldn't be appropriate. |
||
{ | ||
unsafe { | ||
let n = unchecked_div(bytes, mem::size_of::<T>()); | ||
let mut x = T::from(c); | ||
let mut i = 1; | ||
while i < mem::size_of::<T>() { | ||
x = x << 8 | T::from(c); | ||
i += 1; | ||
} | ||
let mut i = 0; | ||
while i < n { | ||
atomic_store_unordered(s.add(i), x); | ||
i += 1; | ||
} | ||
} | ||
} | ||
|
||
intrinsics! { | ||
pub extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { | ||
memcpy_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { | ||
memcpy_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { | ||
memcpy_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { | ||
memcpy_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { | ||
memcpy_element_unordered_atomic(dest, src, bytes); | ||
} | ||
|
||
pub extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { | ||
memmove_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { | ||
memmove_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { | ||
memmove_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { | ||
memmove_element_unordered_atomic(dest, src, bytes); | ||
} | ||
pub extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { | ||
memmove_element_unordered_atomic(dest, src, bytes); | ||
} | ||
|
||
pub extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { | ||
memset_element_unordered_atomic(s, c, bytes); | ||
} | ||
pub extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { | ||
memset_element_unordered_atomic(s, c, bytes); | ||
} | ||
pub extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { | ||
memset_element_unordered_atomic(s, c, bytes); | ||
} | ||
pub extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { | ||
memset_element_unordered_atomic(s, c, bytes); | ||
} | ||
pub extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { | ||
memset_element_unordered_atomic(s, c, bytes); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we assume that
bytes
is a multiple ofsize_of::<T>
? If not, what happens when there is rounding going on?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this guaranteed by LLVM:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like you could use https://doc.rust-lang.org/nightly/std/intrinsics/fn.exact_div.html then. Also IMO this is a precondition worth stating in a comment here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ooh, didn't see that one. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Resolved in #312.)