Skip to content

Commit 86d445e

Browse files
Support vec zero-alloc optimization for tuples and byte arrays
* Implement IsZero trait for tuples up to 8 IsZero elements; * Implement IsZero for u8/i8, leading to implementation of it for arrays of them too; * Add more codegen tests for this optimization. * Lower size of array for IsZero trait because it fails to inline checks
1 parent b4151a4 commit 86d445e

File tree

3 files changed

+170
-29
lines changed

3 files changed

+170
-29
lines changed

library/alloc/src/vec/is_zero.rs

+32-4
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ macro_rules! impl_is_zero {
1717
};
1818
}
1919

20+
impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8.
2021
impl_is_zero!(i16, |x| x == 0);
2122
impl_is_zero!(i32, |x| x == 0);
2223
impl_is_zero!(i64, |x| x == 0);
2324
impl_is_zero!(i128, |x| x == 0);
2425
impl_is_zero!(isize, |x| x == 0);
2526

27+
impl_is_zero!(u8, |x| x == 0); // It is needed to impl for arrays and tuples of u8.
2628
impl_is_zero!(u16, |x| x == 0);
2729
impl_is_zero!(u32, |x| x == 0);
2830
impl_is_zero!(u64, |x| x == 0);
@@ -54,15 +56,41 @@ unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] {
5456
fn is_zero(&self) -> bool {
5557
// Because this is generated as a runtime check, it's not obvious that
5658
// it's worth doing if the array is really long. The threshold here
57-
// is largely arbitrary, but was picked because as of 2022-05-01 LLVM
58-
// can const-fold the check in `vec![[0; 32]; n]` but not in
59-
// `vec![[0; 64]; n]`: https://godbolt.org/z/WTzjzfs5b
59+
// is largely arbitrary, but was picked because as of 2022-07-01 LLVM
60+
// fails to const-fold the check in `vec![[1; 32]; n]`
61+
// See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
6062
// Feel free to tweak if you have better evidence.
6163

62-
N <= 32 && self.iter().all(IsZero::is_zero)
64+
N <= 16 && self.iter().all(IsZero::is_zero)
6365
}
6466
}
6567

68+
// This is recursive macro.
69+
macro_rules! impl_for_tuples {
70+
// Stopper
71+
() => {
72+
// No use for implementing for empty tuple because it is ZST.
73+
};
74+
($first_arg:ident $(,$rest:ident)*) => {
75+
unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){
76+
#[inline]
77+
fn is_zero(&self) -> bool{
78+
// Destructure tuple to N references
79+
// Rust allows to hide generic params by local variable names.
80+
#[allow(non_snake_case)]
81+
let ($first_arg, $($rest,)*) = self;
82+
83+
$first_arg.is_zero()
84+
$( && $rest.is_zero() )*
85+
}
86+
}
87+
88+
impl_for_tuples!($($rest),*);
89+
}
90+
}
91+
92+
impl_for_tuples!(A, B, C, D, E, F, G, H);
93+
6694
// `Option<&T>` and `Option<Box<T>>` are guaranteed to represent `None` as null.
6795
// For fat pointers, the bytes that would be the pointer metadata in the `Some`
6896
// variant are padding in the `None` variant, so ignoring them and

library/alloc/src/vec/spec_from_elem.rs

+14-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
use core::ptr;
2+
13
use crate::alloc::Allocator;
24
use crate::raw_vec::RawVec;
3-
use core::ptr::{self};
45

56
use super::{ExtendElement, IsZero, Vec};
67

@@ -17,6 +18,18 @@ impl<T: Clone> SpecFromElem for T {
1718
}
1819
}
1920

21+
impl<T: Clone + IsZero> SpecFromElem for T {
22+
#[inline]
23+
default fn from_elem<A: Allocator>(elem: T, n: usize, alloc: A) -> Vec<T, A> {
24+
if elem.is_zero() {
25+
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
26+
}
27+
let mut v = Vec::with_capacity_in(n, alloc);
28+
v.extend_with(n, ExtendElement(elem));
29+
v
30+
}
31+
}
32+
2033
impl SpecFromElem for i8 {
2134
#[inline]
2235
fn from_elem<A: Allocator>(elem: i8, n: usize, alloc: A) -> Vec<i8, A> {
@@ -46,15 +59,3 @@ impl SpecFromElem for u8 {
4659
}
4760
}
4861
}
49-
50-
impl<T: Clone + IsZero> SpecFromElem for T {
51-
#[inline]
52-
fn from_elem<A: Allocator>(elem: T, n: usize, alloc: A) -> Vec<T, A> {
53-
if elem.is_zero() {
54-
return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
55-
}
56-
let mut v = Vec::with_capacity_in(n, alloc);
57-
v.extend_with(n, ExtendElement(elem));
58-
v
59-
}
60-
}

src/test/codegen/vec-calloc.rs

+124-12
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,141 @@
44

55
#![crate_type = "lib"]
66

7+
// CHECK-LABEL: @vec_zero_bytes
8+
#[no_mangle]
9+
pub fn vec_zero_bytes(n: usize) -> Vec<u8> {
10+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
11+
// CHECK-NOT: call {{.*}}reserve
12+
// CHECK-NOT: call {{.*}}__rust_alloc(
13+
// CHECK-NOT: call {{.*}}llvm.memset
14+
15+
// CHECK: call {{.*}}__rust_alloc_zeroed(
16+
17+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
18+
// CHECK-NOT: call {{.*}}reserve
19+
// CHECK-NOT: call {{.*}}__rust_alloc(
20+
// CHECK-NOT: call {{.*}}llvm.memset
21+
22+
// CHECK: ret void
23+
vec![0; n]
24+
}
25+
26+
// CHECK-LABEL: @vec_one_bytes
27+
#[no_mangle]
28+
pub fn vec_one_bytes(n: usize) -> Vec<u8> {
29+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
30+
// CHECK-NOT: call {{.*}}reserve
31+
// CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
32+
33+
// CHECK: call {{.*}}__rust_alloc(
34+
// CHECK: call {{.*}}llvm.memset
35+
36+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
37+
// CHECK-NOT: call {{.*}}reserve
38+
// CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
39+
40+
// CHECK: ret void
41+
vec![1; n]
42+
}
43+
744
// CHECK-LABEL: @vec_zero_scalar
845
#[no_mangle]
946
pub fn vec_zero_scalar(n: usize) -> Vec<i32> {
10-
// CHECK-NOT: __rust_alloc(
11-
// CHECK: __rust_alloc_zeroed(
12-
// CHECK-NOT: __rust_alloc(
47+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
48+
// CHECK-NOT: call {{.*}}reserve
49+
// CHECK-NOT: call {{.*}}__rust_alloc(
50+
51+
// CHECK: call {{.*}}__rust_alloc_zeroed(
52+
53+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
54+
// CHECK-NOT: call {{.*}}reserve
55+
// CHECK-NOT: call {{.*}}__rust_alloc(
56+
57+
// CHECK: ret void
1358
vec![0; n]
1459
}
1560

61+
// CHECK-LABEL: @vec_one_scalar
62+
#[no_mangle]
63+
pub fn vec_one_scalar(n: usize) -> Vec<i32> {
64+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
65+
// CHECK-NOT: call {{.*}}reserve
66+
// CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
67+
68+
// CHECK: call {{.*}}__rust_alloc(
69+
70+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
71+
// CHECK-NOT: call {{.*}}reserve
72+
// CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
73+
74+
// CHECK: ret void
75+
vec![1; n]
76+
}
77+
1678
// CHECK-LABEL: @vec_zero_rgb48
1779
#[no_mangle]
1880
pub fn vec_zero_rgb48(n: usize) -> Vec<[u16; 3]> {
19-
// CHECK-NOT: __rust_alloc(
20-
// CHECK: __rust_alloc_zeroed(
21-
// CHECK-NOT: __rust_alloc(
81+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
82+
// CHECK-NOT: call {{.*}}reserve
83+
// CHECK-NOT: call {{.*}}__rust_alloc(
84+
85+
// CHECK: call {{.*}}__rust_alloc_zeroed(
86+
87+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
88+
// CHECK-NOT: call {{.*}}reserve
89+
// CHECK-NOT: call {{.*}}__rust_alloc(
90+
91+
// CHECK: ret void
2292
vec![[0, 0, 0]; n]
2393
}
2494

25-
// CHECK-LABEL: @vec_zero_array_32
95+
// CHECK-LABEL: @vec_zero_array_16
96+
#[no_mangle]
97+
pub fn vec_zero_array_16(n: usize) -> Vec<[i64; 16]> {
98+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
99+
// CHECK-NOT: call {{.*}}reserve
100+
// CHECK-NOT: call {{.*}}__rust_alloc(
101+
102+
// CHECK: call {{.*}}__rust_alloc_zeroed(
103+
104+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
105+
// CHECK-NOT: call {{.*}}reserve
106+
// CHECK-NOT: call {{.*}}__rust_alloc(
107+
108+
// CHECK: ret void
109+
vec![[0_i64; 16]; n]
110+
}
111+
112+
// CHECK-LABEL: @vec_zero_tuple
113+
#[no_mangle]
114+
pub fn vec_zero_tuple(n: usize) -> Vec<(i16, u8, char)> {
115+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
116+
// CHECK-NOT: call {{.*}}reserve
117+
// CHECK-NOT: call {{.*}}__rust_alloc(
118+
119+
// CHECK: call {{.*}}__rust_alloc_zeroed(
120+
121+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
122+
// CHECK-NOT: call {{.*}}reserve
123+
// CHECK-NOT: call {{.*}}__rust_alloc(
124+
125+
// CHECK: ret void
126+
vec![(0, 0, '\0'); n]
127+
}
128+
129+
// CHECK-LABEL: @vec_non_zero_tuple
26130
#[no_mangle]
27-
pub fn vec_zero_array_32(n: usize) -> Vec<[i64; 32]> {
28-
// CHECK-NOT: __rust_alloc(
29-
// CHECK: __rust_alloc_zeroed(
30-
// CHECK-NOT: __rust_alloc(
31-
vec![[0_i64; 32]; n]
131+
pub fn vec_non_zero_tuple(n: usize) -> Vec<(i16, u8, char)> {
132+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
133+
// CHECK-NOT: call {{.*}}reserve
134+
// CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
135+
136+
// CHECK: call {{.*}}__rust_alloc(
137+
138+
// CHECK-NOT: call {{.*}}alloc::vec::from_elem
139+
// CHECK-NOT: call {{.*}}reserve
140+
// CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
141+
142+
// CHECK: ret void
143+
vec![(0, 0, 'A'); n]
32144
}

0 commit comments

Comments
 (0)