Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simd_bitmask intrinsic: add a non-power-of-2 multi-byte example #126171

Merged
merged 2 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1121,8 +1121,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
if name == sym::simd_select_bitmask {
let (len, _) = require_simd!(arg_tys[1], SimdArgument);

let expected_int_bits = (len.max(8) - 1).next_power_of_two();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why the old code did -1 here. next_power_of_two is already idempotent; when the input is a power of 2 it is returned unchanged.

let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
let expected_int_bits = len.max(8).next_power_of_two();
let expected_bytes = len.div_ceil(8);

let mask_ty = arg_tys[0];
let mask = match mask_ty.kind() {
Expand Down Expand Up @@ -1379,17 +1379,16 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
}

if name == sym::simd_bitmask {
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
// vector mask and returns the most significant bit (MSB) of each lane in the form
// of either:
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and
// returns one bit for each lane (which must all be `0` or `!0`) in the form of either:
// * an unsigned integer
// * an array of `u8`
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
//
// The bit order of the result depends on the byte endianness, LSB-first for little
// endian and MSB-first for big endian.
let expected_int_bits = in_len.max(8);
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
let expected_int_bits = in_len.max(8).next_power_of_two();
let expected_bytes = in_len.div_ceil(8);

// Integer vector <i{in_bitwidth} x in_len>:
let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
Expand All @@ -1409,7 +1408,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
}),
};

// Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
// LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to
// `pmovmskb` and similar on x86.
let shift_indices =
vec![
bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
Expand Down
19 changes: 13 additions & 6 deletions library/core/src/intrinsics/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ extern "rust-intrinsic" {
/// `T` must be an integer vector.
///
/// `U` must be either the smallest unsigned integer with at least as many bits as the length
/// of `T`, or the smallest array of `u8` with as many bits as the length of `T`.
/// of `T`, or the smallest array of `u8` with at least as many bits as the length of `T`.
///
/// Each element is truncated to a single bit and packed into the result.
///
Expand All @@ -475,12 +475,19 @@ extern "rust-intrinsic" {
/// * On little endian, the least significant bit corresponds to the first vector element.
/// * On big endian, the least significant bit corresponds to the last vector element.
///
/// For example, `[!0, 0, !0, !0]` packs to `0b1101` on little endian and `0b1011` on big
/// endian.
/// For example, `[!0, 0, !0, !0]` packs to
/// - `0b1101u8` or `[0b1101]` on little endian, and
/// - `0b1011u8` or `[0b1011]` on big endian.
///
/// To consider a larger example, `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs
/// to `[0b00000001, 0b01000011]` or `0b0100001100000001` on little endian, and `[0b10000000,
/// 0b11000010]` or `0b1000000011000010` on big endian.
/// To consider a larger example,
/// `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs to
/// - `0b0100001100000001u16` or `[0b00000001, 0b01000011]` on little endian, and
/// - `0b1000000011000010u16` or `[0b10000000, 0b11000010]` on big endian.
///
/// And finally, a non-power-of-2 example with multiple bytes:
/// `[!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]` packs to
/// - `0b0101001011u16` or `[0b01001011, 0b01]` on little endian, and
/// - `0b1101001010u16` or `[0b11, 0b01001010]` on big endian.
///
/// # Safety
/// `x` must contain only `0` and `!0`.
Expand Down
90 changes: 90 additions & 0 deletions tests/ui/simd/simd-bitmask-notpow2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//@run-pass
// SEGFAULTS on LLVM 17. This should be merged into `simd-bitmask` once we require LLVM 18.
//@ min-llvm-version: 18
// FIXME: broken codegen on big-endian (https://github.com/rust-lang/rust/issues/127205)
//@ ignore-endian-big
#![feature(repr_simd, intrinsics)]

extern "rust-intrinsic" {
fn simd_bitmask<T, U>(v: T) -> U;
fn simd_select_bitmask<T, U>(m: T, a: U, b: U) -> U;
}

fn main() {
// Non-power-of-2 multi-byte mask.
#[repr(simd, packed)]
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, Debug, PartialEq)]
struct i32x10([i32; 10]);
impl i32x10 {
fn splat(x: i32) -> Self {
Self([x; 10])
}
}
unsafe {
let mask = i32x10([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]);
let mask_bits = if cfg!(target_endian = "little") { 0b0101001011 } else { 0b1101001010 };
let mask_bytes =
if cfg!(target_endian = "little") { [0b01001011, 0b01] } else { [0b11, 0b01001010] };

let bitmask1: u16 = simd_bitmask(mask);
let bitmask2: [u8; 2] = simd_bitmask(mask);
assert_eq!(bitmask1, mask_bits);
assert_eq!(bitmask2, mask_bytes);

let selected1 = simd_select_bitmask::<u16, _>(
mask_bits,
i32x10::splat(!0), // yes
i32x10::splat(0), // no
);
let selected2 = simd_select_bitmask::<[u8; 2], _>(
mask_bytes,
i32x10::splat(!0), // yes
i32x10::splat(0), // no
);
assert_eq!(selected1, mask);
assert_eq!(selected2, mask);
}

// Test for a mask where the next multiple of 8 is not a power of two.
#[repr(simd, packed)]
#[allow(non_camel_case_types)]
#[derive(Copy, Clone, Debug, PartialEq)]
struct i32x20([i32; 20]);
impl i32x20 {
fn splat(x: i32) -> Self {
Self([x; 20])
}
}
unsafe {
let mask = i32x20([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0, 0, 0, 0, !0, !0, !0, !0, !0, !0, !0]);
let mask_bits = if cfg!(target_endian = "little") {
0b11111110000101001011
} else {
0b11010010100001111111
};
let mask_bytes = if cfg!(target_endian = "little") {
[0b01001011, 0b11100001, 0b1111]
} else {
[0b1101, 0b00101000, 0b01111111]
};

let bitmask1: u32 = simd_bitmask(mask);
let bitmask2: [u8; 3] = simd_bitmask(mask);
assert_eq!(bitmask1, mask_bits);
assert_eq!(bitmask2, mask_bytes);

let selected1 = simd_select_bitmask::<u32, _>(
mask_bits,
i32x20::splat(!0), // yes
i32x20::splat(0), // no
);
let selected2 = simd_select_bitmask::<[u8; 3], _>(
mask_bytes,
i32x20::splat(!0), // yes
i32x20::splat(0), // no
);
assert_eq!(selected1, mask);
assert_eq!(selected2, mask);
}
}
45 changes: 33 additions & 12 deletions tests/ui/simd/simd-bitmask.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//@run-pass
//@ignore-endian-big behavior of simd_select_bitmask is endian-specific
#![feature(repr_simd, intrinsics)]

extern "rust-intrinsic" {
Expand All @@ -17,36 +16,58 @@ fn main() {
let i: u8 = simd_bitmask(v);
let a: [u8; 1] = simd_bitmask(v);

assert_eq!(i, 0b0101);
assert_eq!(a, [0b0101]);
if cfg!(target_endian = "little") {
assert_eq!(i, 0b0101);
assert_eq!(a, [0b0101]);
} else {
assert_eq!(i, 0b1010);
assert_eq!(a, [0b1010]);
}

let v = Simd::<i8, 16>([0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0]);
let i: u16 = simd_bitmask(v);
let a: [u8; 2] = simd_bitmask(v);

assert_eq!(i, 0b0101000000001100);
assert_eq!(a, [0b1100, 0b01010000]);
if cfg!(target_endian = "little") {
assert_eq!(i, 0b0101000000001100);
assert_eq!(a, [0b00001100, 0b01010000]);
} else {
assert_eq!(i, 0b0011000000001010);
assert_eq!(a, [0b00110000, 0b00001010]);
}
}

unsafe {
let a = Simd::<i32, 8>([0, 1, 2, 3, 4, 5, 6, 7]);
let b = Simd::<i32, 8>([8, 9, 10, 11, 12, 13, 14, 15]);
let e = [0, 9, 2, 11, 12, 13, 14, 15];
let a = Simd::<i32, 4>([0, 1, 2, 3]);
let b = Simd::<i32, 4>([8, 9, 10, 11]);
let e = [0, 9, 2, 11];

let r = simd_select_bitmask(0b0101u8, a, b);
let mask = if cfg!(target_endian = "little") { 0b0101u8 } else { 0b1010u8 };
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);

let r = simd_select_bitmask([0b0101u8], a, b);
let mask = if cfg!(target_endian = "little") { [0b0101u8] } else { [0b1010u8] };
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);

let a = Simd::<i32, 16>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
let b = Simd::<i32, 16>([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]);
let e = [16, 17, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 29, 14, 31];

let r = simd_select_bitmask(0b0101000000001100u16, a, b);
let mask = if cfg!(target_endian = "little") {
0b0101000000001100u16
} else {
0b0011000000001010u16
};
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);

let r = simd_select_bitmask([0b1100u8, 0b01010000u8], a, b);
let mask = if cfg!(target_endian = "little") {
[0b00001100u8, 0b01010000u8]
} else {
[0b00110000u8, 0b00001010u8]
};
let r = simd_select_bitmask(mask, a, b);
assert_eq!(r.0, e);
}
}
Loading