Skip to content

Commit b6d6d25

Browse files
committed
Auto merge of #126444 - tesuji:gvn-const-arrays, r=<try>
gvn: Promote/propagate const local array Rewriting of #125916 which used `PromoteTemps` pass. This allows promoting constant local arrays as anonymous constants. So that's in codegen for a local array, rustc outputs `llvm.memcpy` (which is easy for LLVM to optimize) instead of a series of `store` on stack (a.k.a in-place initialization). This makes rustc on par with clang on this specific case. See more in #73825 or [zulip][opsem] for more info. [Here is a simple micro benchmark][bench] that shows the performance differences between promoting arrays or not. [Prior discussions on zulip][opsem]. This patch [saves about 600 KB][perf] (~0.5%) of `librustc_driver.so`. ![image](https://github.com/rust-lang/rust/assets/15225902/0e37559c-f5d9-4cdf-b7e3-a2956fd17bc1) Fix #73825 r? cjgillot ### Unresolved questions - [ ] Should we ignore nested arrays? I think that promoting nested arrays is bloating codegen. - [ ] Should stack_threshold be at least 32 bytes? Like the benchmark showed. If yes, the test should be updated to make arrays larger than 32 bytes. - [x] ~Is this concerning that `call(move _1)` is now `call(const [array])`?~ It reverted back to `call(move _1)` [opsem]: https://rust-lang.zulipchat.com/#narrow/stream/136281-t-opsem/topic/Could.20const.20read-only.20arrays.20be.20const.20promoted.3F [bench]: rust-lang/rust-clippy#12854 (comment) [perf]: https://perf.rust-lang.org/compare.html?start=f9515fdd5aa132e27d9b580a35b27f4b453251c1&end=7e160d4b55bb5a27be0696f45db247ccc2e166d9&stat=size%3Alinked_artifact&tab=artifact-size
2 parents 88fa119 + c15eb60 commit b6d6d25

10 files changed

+266
-16
lines changed

compiler/rustc_mir_transform/src/gvn.rs

+24-12
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ fn propagate_ssa<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
138138
let value = state.simplify_rvalue(rvalue, location);
139139
// FIXME(#112651) `rvalue` may have a subtype to `local`. We can only mark `local` as
140140
// reusable if we have an exact type match.
141-
if state.local_decls[local].ty != rvalue.ty(state.local_decls, tcx) {
141+
if state.local_decls[local].ty != rvalue.ty(state.local_decls, state.tcx) {
142142
return;
143143
}
144144
value
@@ -382,7 +382,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
382382
let ty = match kind {
383383
AggregateTy::Array => {
384384
assert!(fields.len() > 0);
385-
Ty::new_array(self.tcx, fields[0].layout.ty, fields.len() as u64)
385+
let field_ty = fields[0].layout.ty;
386+
Ty::new_array(self.tcx, field_ty, fields.len() as u64)
386387
}
387388
AggregateTy::Tuple => {
388389
Ty::new_tup_from_iter(self.tcx, fields.iter().map(|f| f.layout.ty))
@@ -406,7 +407,9 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
406407
};
407408
let ptr_imm = Immediate::new_pointer_with_meta(data, meta, &self.ecx);
408409
ImmTy::from_immediate(ptr_imm, ty).into()
409-
} else if matches!(ty.abi, Abi::Scalar(..) | Abi::ScalarPair(..)) {
410+
} else if matches!(kind, AggregateTy::Array)
411+
|| matches!(ty.abi, Abi::Scalar(..) | Abi::ScalarPair(..))
412+
{
410413
let dest = self.ecx.allocate(ty, MemoryKind::Stack).ok()?;
411414
let variant_dest = if let Some(variant) = variant {
412415
self.ecx.project_downcast(&dest, variant).ok()?
@@ -418,9 +421,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
418421
self.ecx.copy_op(op, &field_dest).ok()?;
419422
}
420423
self.ecx.write_discriminant(variant.unwrap_or(FIRST_VARIANT), &dest).ok()?;
421-
self.ecx
422-
.alloc_mark_immutable(dest.ptr().provenance.unwrap().alloc_id())
423-
.ok()?;
424+
let dest = dest.map_provenance(|prov| prov.as_immutable());
424425
dest.into()
425426
} else {
426427
return None;
@@ -704,7 +705,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
704705
place.projection = self.tcx.mk_place_elems(&projection);
705706
}
706707

707-
trace!(?place);
708+
trace!(after_place = ?place);
708709
}
709710

710711
/// Represent the *value* which would be read from `place`, and point `place` to a preexisting
@@ -884,7 +885,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
884885
}
885886

886887
let (mut ty, variant_index) = match *kind {
887-
AggregateKind::Array(..) => {
888+
AggregateKind::Array(_) => {
888889
assert!(!field_ops.is_empty());
889890
(AggregateTy::Array, FIRST_VARIANT)
890891
}
@@ -1347,6 +1348,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
13471348
}
13481349
}
13491350

1351+
#[instrument(level = "trace", skip(ecx), ret)]
13501352
fn op_to_prop_const<'tcx>(
13511353
ecx: &mut InterpCx<'tcx, DummyMachine>,
13521354
op: &OpTy<'tcx>,
@@ -1361,8 +1363,11 @@ fn op_to_prop_const<'tcx>(
13611363
return Some(ConstValue::ZeroSized);
13621364
}
13631365

1364-
// Do not synthetize too large constants. Codegen will just memcpy them, which we'd like to avoid.
1365-
if !matches!(op.layout.abi, Abi::Scalar(..) | Abi::ScalarPair(..)) {
1366+
// Do not synthesize too large constants, except constant arrays.
1367+
// For arrays, codegen will just memcpy them, but LLVM will optimize out those unneeded memcpy.
1368+
// For others, we'd prefer in-place initialization over memcpy them.
1369+
if !(op.layout.ty.is_array() || matches!(op.layout.abi, Abi::Scalar(..) | Abi::ScalarPair(..)))
1370+
{
13661371
return None;
13671372
}
13681373

@@ -1433,6 +1438,7 @@ impl<'tcx> VnState<'_, 'tcx> {
14331438
}
14341439

14351440
/// If `index` is a `Value::Constant`, return the `Constant` to be put in the MIR.
1441+
#[instrument(level = "trace", skip(self, index), ret)]
14361442
fn try_as_constant(&mut self, index: VnIndex) -> Option<ConstOperand<'tcx>> {
14371443
// This was already constant in MIR, do not change it.
14381444
if let Value::Constant { value, disambiguator: _ } = *self.get(index)
@@ -1444,8 +1450,13 @@ impl<'tcx> VnState<'_, 'tcx> {
14441450
}
14451451

14461452
let op = self.evaluated[index].as_ref()?;
1447-
if op.layout.is_unsized() {
1448-
// Do not attempt to propagate unsized locals.
1453+
1454+
// Ignore promoted arrays. Promoted arrays are already placed in `.rodata`.
1455+
// Which is what we try to archive for running gvn on constant local arrays.
1456+
if let Either::Left(mplace) = op.as_mplace_or_imm()
1457+
&& mplace.layout.ty.is_array()
1458+
&& let Value::Projection(_index, ProjectionElem::Deref) = self.get(index)
1459+
{
14491460
return None;
14501461
}
14511462

@@ -1484,6 +1495,7 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
14841495
self.simplify_operand(operand, location);
14851496
}
14861497

1498+
#[instrument(level = "trace", skip(self, stmt))]
14871499
fn visit_statement(&mut self, stmt: &mut Statement<'tcx>, location: Location) {
14881500
if let StatementKind::Assign(box (ref mut lhs, ref mut rvalue)) = stmt.kind {
14891501
self.simplify_place_projection(lhs, location);

compiler/rustc_mir_transform/src/ssa.rs

+1
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ impl SsaLocals {
147147
})
148148
}
149149

150+
#[instrument(level = "trace", skip_all)]
150151
pub fn for_each_assignment_mut<'tcx>(
151152
&self,
152153
basic_blocks: &mut IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// issue: <https://github.com/rust-lang/rust/issues/73825>
2+
//@ compile-flags: -C opt-level=1
3+
#![crate_type = "lib"]
4+
5+
// CHECK-LABEL: @foo
6+
// CHECK-NEXT: start:
7+
// CHECK-NEXT: %_3 = and i64 %x, 63
8+
// CHECK-NEXT: %0 = getelementptr inbounds [64 x i32], ptr @0, i64 0, i64 %_3
9+
// CHECK-NEXT: %_0 = load i32, ptr %0, align 4
10+
// CHECK-NEXT: ret i32 %_0
11+
#[no_mangle]
12+
#[rustfmt::skip]
13+
pub fn foo(x: usize) -> i32 {
14+
let base: [i32; 64] = [
15+
67, 754, 860, 559, 368, 870, 548, 972,
16+
141, 731, 351, 664, 32, 4, 996, 741,
17+
203, 292, 237, 480, 151, 940, 777, 540,
18+
143, 587, 747, 65, 152, 517, 882, 880,
19+
712, 595, 370, 901, 237, 53, 789, 785,
20+
912, 650, 896, 367, 316, 392, 62, 473,
21+
675, 691, 281, 192, 445, 970, 225, 425,
22+
628, 324, 322, 206, 912, 867, 462, 92
23+
];
24+
base[x % 64]
25+
}
26+
27+
// This checks whether LLVM de-duplicates `promoted` array and `base` array.
28+
// Because in MIR, `&[..]` is already promoted by promote pass. GVN keeps promoting
29+
// `*&[..]` to `const [..]` again.
30+
//
31+
// CHECK-LABEL: @deduplicability
32+
// CHECK-NEXT: start:
33+
// CHECK-NEXT: %_3 = and i64 %x, 63
34+
// CHECK-NEXT: %0 = getelementptr inbounds [64 x i32], ptr @0, i64 0, i64 %_3
35+
// CHECK-NEXT: %_0 = load i32, ptr %0, align 4
36+
// CHECK-NEXT: ret i32 %_0
37+
#[no_mangle]
38+
#[rustfmt::skip]
39+
pub fn deduplicability(x: usize) -> i32 {
40+
let promoted = *&[
41+
67i32, 754, 860, 559, 368, 870, 548, 972,
42+
141, 731, 351, 664, 32, 4, 996, 741,
43+
203, 292, 237, 480, 151, 940, 777, 540,
44+
143, 587, 747, 65, 152, 517, 882, 880,
45+
712, 595, 370, 901, 237, 53, 789, 785,
46+
912, 650, 896, 367, 316, 392, 62, 473,
47+
675, 691, 281, 192, 445, 970, 225, 425,
48+
628, 324, 322, 206, 912, 867, 462, 92
49+
];
50+
promoted[x % 64]
51+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
- // MIR for `main` before GVN
2+
+ // MIR for `main` after GVN
3+
4+
fn main() -> () {
5+
let mut _0: ();
6+
let _1: [i32; 5];
7+
let mut _4: [i32; 5];
8+
let mut _5: [i32; 5];
9+
let mut _7: &[i32; 5];
10+
let _8: [i32; 5];
11+
let _9: ();
12+
let mut _10: [u32; 5];
13+
let mut _12: [f32; 8];
14+
let _13: [[i32; 3]; 3];
15+
let mut _14: [i32; 3];
16+
let mut _15: [i32; 3];
17+
let mut _16: [i32; 3];
18+
scope 1 {
19+
debug _arr => _1;
20+
let _2: [i32; 5];
21+
scope 2 {
22+
debug _duplicated_arr => _2;
23+
let _3: [[i32; 5]; 2];
24+
scope 3 {
25+
debug _foo => _3;
26+
let _6: [i32; 5];
27+
let mut _17: &[i32; 5];
28+
scope 4 {
29+
debug _darr => _6;
30+
let _11: F32x8;
31+
scope 5 {
32+
debug _f => _11;
33+
}
34+
}
35+
}
36+
}
37+
}
38+
39+
bb0: {
40+
StorageLive(_1);
41+
- _1 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32];
42+
+ _1 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
43+
StorageLive(_2);
44+
- _2 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32];
45+
+ _2 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
46+
StorageLive(_3);
47+
StorageLive(_4);
48+
- _4 = [const 178_i32, const 9_i32, const 4_i32, const 56_i32, const 221_i32];
49+
+ _4 = const [178_i32, 9_i32, 4_i32, 56_i32, 221_i32];
50+
StorageLive(_5);
51+
- _5 = [const 193_i32, const 164_i32, const 194_i32, const 197_i32, const 6_i32];
52+
- _3 = [move _4, move _5];
53+
+ _5 = const [193_i32, 164_i32, 194_i32, 197_i32, 6_i32];
54+
+ _3 = const [[178_i32, 9_i32, 4_i32, 56_i32, 221_i32], [193_i32, 164_i32, 194_i32, 197_i32, 6_i32]];
55+
StorageDead(_5);
56+
StorageDead(_4);
57+
StorageLive(_6);
58+
StorageLive(_7);
59+
_17 = const main::promoted[0];
60+
_7 = &(*_17);
61+
- _6 = (*_7);
62+
+ _6 = (*_17);
63+
StorageDead(_7);
64+
StorageLive(_9);
65+
StorageLive(_10);
66+
- _10 = [const 31_u32, const 96_u32, const 173_u32, const 50_u32, const 1_u32];
67+
- _9 = consume(move _10) -> [return: bb1, unwind continue];
68+
+ _10 = const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32];
69+
+ _9 = consume(const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32]) -> [return: bb1, unwind continue];
70+
}
71+
72+
bb1: {
73+
StorageDead(_10);
74+
StorageDead(_9);
75+
StorageLive(_11);
76+
StorageLive(_12);
77+
- _12 = [const 1f32, const 2f32, const 3f32, const 1f32, const 1f32, const 1f32, const 1f32, const 42f32];
78+
- _11 = F32x8(move _12);
79+
+ _12 = const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32];
80+
+ _11 = F32x8(const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32]);
81+
StorageDead(_12);
82+
StorageLive(_13);
83+
StorageLive(_14);
84+
- _14 = [const 1_i32, const 0_i32, const 0_i32];
85+
+ _14 = const [1_i32, 0_i32, 0_i32];
86+
StorageLive(_15);
87+
- _15 = [const 0_i32, const 1_i32, const 0_i32];
88+
+ _15 = const [0_i32, 1_i32, 0_i32];
89+
StorageLive(_16);
90+
- _16 = [const 0_i32, const 0_i32, const 1_i32];
91+
- _13 = [move _14, move _15, move _16];
92+
+ _16 = const [0_i32, 0_i32, 1_i32];
93+
+ _13 = const [[1_i32, 0_i32, 0_i32], [0_i32, 1_i32, 0_i32], [0_i32, 0_i32, 1_i32]];
94+
StorageDead(_16);
95+
StorageDead(_15);
96+
StorageDead(_14);
97+
StorageDead(_13);
98+
_0 = const ();
99+
StorageDead(_11);
100+
StorageDead(_6);
101+
StorageDead(_3);
102+
StorageDead(_2);
103+
StorageDead(_1);
104+
return;
105+
}
106+
}
107+
+
108+
+ ALLOC0 (size: 36, align: 4) { .. }
109+
+
110+
+ ALLOC1 (size: 12, align: 4) { .. }
111+
+
112+
+ ALLOC2 (size: 12, align: 4) { .. }
113+
+
114+
+ ALLOC3 (size: 12, align: 4) { .. }
115+
+
116+
+ ALLOC4 (size: 32, align: 4) { .. }
117+
+
118+
+ ALLOC5 (size: 20, align: 4) { .. }
119+
+
120+
+ ALLOC6 (size: 40, align: 4) { .. }
121+
+
122+
+ ALLOC7 (size: 20, align: 4) { .. }
123+
+
124+
+ ALLOC8 (size: 20, align: 4) { .. }
125+
+
126+
+ ALLOC9 (size: 20, align: 4) { .. }
127+

tests/mir-opt/const_array_locals.rs

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//@ test-mir-pass: GVN
2+
//@ compile-flags: -Zdump-mir-exclude-alloc-bytes
3+
#![feature(repr_simd)]
4+
5+
#[repr(simd)]
6+
struct F32x8([f32; 8]);
7+
8+
// EMIT_MIR const_array_locals.main.GVN.diff
9+
// CHECK-LABEL: fn main(
10+
// CHECK: debug _arr => [[_arr:_[0-9]+]];
11+
// CHECK: debug _duplicated_arr => [[_duplicated_arr:_[0-9]+]];
12+
// CHECK: debug _foo => [[_foo:_[0-9]+]];
13+
// CHECK: debug _darr => [[_darr:_[0-9]+]];
14+
// CHECK: debug _f => [[_f:_[0-9]+]];
15+
pub fn main() {
16+
// CHECK: [[_arr]] = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
17+
let _arr = [255, 105, 15, 39, 62];
18+
// CHECK: [[_duplicated_arr]] = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32];
19+
let _duplicated_arr = [255, 105, 15, 39, 62];
20+
// CHECK: [[subarray1:_[0-9]+]] = const [178_i32, 9_i32, 4_i32, 56_i32, 221_i32];
21+
// CHECK: [[subarray2:_[0-9]+]] = const [193_i32, 164_i32, 194_i32, 197_i32, 6_i32];
22+
// CHECK{LITERAL}: const [[178_i32, 9_i32, 4_i32, 56_i32, 221_i32], [193_i32, 164_i32, 194_i32, 197_i32, 6_i32]];
23+
let _foo = [[178, 9, 4, 56, 221], [193, 164, 194, 197, 6]];
24+
// CHECK: [[PROMOTED:_[0-9]+]] = const main::promoted[0];
25+
// CHECK: [[_darr]] = (*[[PROMOTED]]);
26+
let _darr = *&[254, 42, 15, 39, 62];
27+
28+
// CHECK: [[ARG:_[0-9]+]] = const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32];
29+
// CHECK: consume(const [31_u32, 96_u32, 173_u32, 50_u32, 1_u32])
30+
consume([31, 96, 173, 50, 1]);
31+
32+
// CHECK: [[OP:_[0-9]+]] = const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32];
33+
// CHECK: [[_f]] = F32x8(const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32]);
34+
let _f = F32x8([1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 42.0]);
35+
36+
// ice with small arrays
37+
// CHECK: [[A:_[0-9]+]] = const [1_i32, 0_i32, 0_i32];
38+
// CHECK: [[B:_[0-9]+]] = const [0_i32, 1_i32, 0_i32];
39+
// CHECK: [[C:_[0-9]+]] = const [0_i32, 0_i32, 1_i32];
40+
// CHECK{LITERAL}: const [[1_i32, 0_i32, 0_i32], [0_i32, 1_i32, 0_i32], [0_i32, 0_i32, 1_i32]];
41+
[[1, 0, 0], [0, 1, 0], [0, 0, 1]]; // 2D array
42+
}
43+
44+
fn consume(_arr: [u32; 5]) {
45+
unimplemented!()
46+
}

tests/mir-opt/const_prop/array_index.main.GVN.32bit.panic-unwind.diff

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
bb0: {
1616
StorageLive(_1);
1717
StorageLive(_2);
18-
_2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
18+
- _2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
19+
+ _2 = const [0_u32, 1_u32, 2_u32, 3_u32];
1920
StorageLive(_3);
2021
_3 = const 2_usize;
2122
- _4 = Len(_2);
@@ -36,4 +37,6 @@
3637
return;
3738
}
3839
}
40+
+
41+
+ ALLOC0 (size: 16, align: 4) { .. }
3942

tests/mir-opt/const_prop/array_index.main.GVN.64bit.panic-unwind.diff

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
bb0: {
1616
StorageLive(_1);
1717
StorageLive(_2);
18-
_2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
18+
- _2 = [const 0_u32, const 1_u32, const 2_u32, const 3_u32];
19+
+ _2 = const [0_u32, 1_u32, 2_u32, 3_u32];
1920
StorageLive(_3);
2021
_3 = const 2_usize;
2122
- _4 = Len(_2);
@@ -36,4 +37,6 @@
3637
return;
3738
}
3839
}
40+
+
41+
+ ALLOC0 (size: 16, align: 4) { .. }
3942

tests/mir-opt/const_prop/array_index.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//@ test-mir-pass: GVN
2+
//@ compile-flags: -Zdump-mir-exclude-alloc-bytes
23
// EMIT_MIR_FOR_EACH_PANIC_STRATEGY
34
// EMIT_MIR_FOR_EACH_BIT_WIDTH
45

0 commit comments

Comments
 (0)