Skip to content

Commit 848e0a2

Browse files
committed
Auto merge of #61922 - tmandry:moar-generator-optimization, r=matthewjasper
Don't store locals that have been moved from in generators This avoids reserving storage in generators for locals that are moved out of (and not re-initialized) prior to yield points. Fixes #59123. This adds a new dataflow analysis, `RequiresStorage`, to determine whether the storage of a local can be destroyed without being observed by the program. The rules are: 1. StorageLive(x) => mark x live 2. StorageDead(x) => mark x dead 3. If a local is moved from, _and has never had its address taken_, mark it dead 4. If (any part of) a local is initialized, mark it live' This is used to determine whether to save a local in the generator object at all, as well as which locals can be overlapped in the generator layout. Here's the size in bytes of all testcases included in the change, before and after the change: async fn test |Size before |Size after -----------------|------------|---------- single | 1028 | 1028 single_with_noop | 2056 | 1032 joined | 5132 | 3084 joined_with_noop | 8208 | 3084 generator test |Size before |Size after ----------------------------|------------|---------- move_before_yield | 1028 | 1028 move_before_yield_with_noop | 2056 | 1032 overlap_move_points | 3080 | 2056 ## Future work Note that there is a possible extension to this optimization, which modifies rule 3 to read: "If a local is moved from, _**and either has never had its address taken, or is Freeze and has never been mutably borrowed**_, mark it dead." This was discussed at length in #59123 and then #61849. Because this would cause some behavior to be UB which was not UB before, it's a step that needs to be taken carefully. A more immediate priority for me is inlining `std::mem::size_of_val(&x)` so it becomes apparent that the address of `x` is not taken. This way, using `size_of_val` to look at the size of your inner futures does not affect the size of your outer future. cc @cramertj @eddyb @Matthias247 @nikomatsakis @RalfJung @Zoxc
2 parents ef064d2 + a68e2c7 commit 848e0a2

File tree

6 files changed

+444
-43
lines changed

6 files changed

+444
-43
lines changed

src/librustc_mir/dataflow/at_location.rs

+21-11
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::dataflow::{BitDenotation, DataflowResults, GenKillSet};
88
use crate::dataflow::move_paths::{HasMoveData, MovePathIndex};
99

1010
use std::iter;
11+
use std::borrow::Borrow;
1112

1213
/// A trait for "cartesian products" of multiple FlowAtLocation.
1314
///
@@ -60,18 +61,20 @@ pub trait FlowsAtLocation {
6061
/// (e.g., via `reconstruct_statement_effect` and
6162
/// `reconstruct_terminator_effect`; don't forget to call
6263
/// `apply_local_effect`).
63-
pub struct FlowAtLocation<'tcx, BD>
64+
pub struct FlowAtLocation<'tcx, BD, DR = DataflowResults<'tcx, BD>>
6465
where
6566
BD: BitDenotation<'tcx>,
67+
DR: Borrow<DataflowResults<'tcx, BD>>,
6668
{
67-
base_results: DataflowResults<'tcx, BD>,
69+
base_results: DR,
6870
curr_state: BitSet<BD::Idx>,
6971
stmt_trans: GenKillSet<BD::Idx>,
7072
}
7173

72-
impl<'tcx, BD> FlowAtLocation<'tcx, BD>
74+
impl<'tcx, BD, DR> FlowAtLocation<'tcx, BD, DR>
7375
where
7476
BD: BitDenotation<'tcx>,
77+
DR: Borrow<DataflowResults<'tcx, BD>>,
7578
{
7679
/// Iterate over each bit set in the current state.
7780
pub fn each_state_bit<F>(&self, f: F)
@@ -91,8 +94,8 @@ where
9194
self.stmt_trans.gen_set.iter().for_each(f)
9295
}
9396

94-
pub fn new(results: DataflowResults<'tcx, BD>) -> Self {
95-
let bits_per_block = results.sets().bits_per_block();
97+
pub fn new(results: DR) -> Self {
98+
let bits_per_block = results.borrow().sets().bits_per_block();
9699
let curr_state = BitSet::new_empty(bits_per_block);
97100
let stmt_trans = GenKillSet::from_elem(HybridBitSet::new_empty(bits_per_block));
98101
FlowAtLocation {
@@ -104,7 +107,7 @@ where
104107

105108
/// Access the underlying operator.
106109
pub fn operator(&self) -> &BD {
107-
self.base_results.operator()
110+
self.base_results.borrow().operator()
108111
}
109112

110113
pub fn contains(&self, x: BD::Idx) -> bool {
@@ -134,39 +137,45 @@ where
134137
}
135138
}
136139

137-
impl<'tcx, BD> FlowsAtLocation for FlowAtLocation<'tcx, BD>
138-
where BD: BitDenotation<'tcx>
140+
impl<'tcx, BD, DR> FlowsAtLocation for FlowAtLocation<'tcx, BD, DR>
141+
where
142+
BD: BitDenotation<'tcx>,
143+
DR: Borrow<DataflowResults<'tcx, BD>>,
139144
{
140145
fn reset_to_entry_of(&mut self, bb: BasicBlock) {
141-
self.curr_state.overwrite(self.base_results.sets().entry_set_for(bb.index()));
146+
self.curr_state.overwrite(self.base_results.borrow().sets().entry_set_for(bb.index()));
142147
}
143148

144149
fn reset_to_exit_of(&mut self, bb: BasicBlock) {
145150
self.reset_to_entry_of(bb);
146-
let trans = self.base_results.sets().trans_for(bb.index());
151+
let trans = self.base_results.borrow().sets().trans_for(bb.index());
147152
trans.apply(&mut self.curr_state)
148153
}
149154

150155
fn reconstruct_statement_effect(&mut self, loc: Location) {
151156
self.stmt_trans.clear();
152157
self.base_results
158+
.borrow()
153159
.operator()
154160
.before_statement_effect(&mut self.stmt_trans, loc);
155161
self.stmt_trans.apply(&mut self.curr_state);
156162

157163
self.base_results
164+
.borrow()
158165
.operator()
159166
.statement_effect(&mut self.stmt_trans, loc);
160167
}
161168

162169
fn reconstruct_terminator_effect(&mut self, loc: Location) {
163170
self.stmt_trans.clear();
164171
self.base_results
172+
.borrow()
165173
.operator()
166174
.before_terminator_effect(&mut self.stmt_trans, loc);
167175
self.stmt_trans.apply(&mut self.curr_state);
168176

169177
self.base_results
178+
.borrow()
170179
.operator()
171180
.terminator_effect(&mut self.stmt_trans, loc);
172181
}
@@ -177,9 +186,10 @@ impl<'tcx, BD> FlowsAtLocation for FlowAtLocation<'tcx, BD>
177186
}
178187

179188

180-
impl<'tcx, T> FlowAtLocation<'tcx, T>
189+
impl<'tcx, T, DR> FlowAtLocation<'tcx, T, DR>
181190
where
182191
T: HasMoveData<'tcx> + BitDenotation<'tcx, Idx = MovePathIndex>,
192+
DR: Borrow<DataflowResults<'tcx, T>>,
183193
{
184194
pub fn has_any_child_of(&self, mpi: T::Idx) -> Option<T::Idx> {
185195
// We process `mpi` before the loop below, for two reasons:

src/librustc_mir/dataflow/impls/storage_liveness.rs

+129-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
pub use super::*;
22

33
use rustc::mir::*;
4+
use rustc::mir::visit::{
5+
PlaceContext, Visitor, NonMutatingUseContext,
6+
};
7+
use std::cell::RefCell;
48
use crate::dataflow::BitDenotation;
9+
use crate::dataflow::HaveBeenBorrowedLocals;
10+
use crate::dataflow::{DataflowResults, DataflowResultsCursor, DataflowResultsRefCursor};
511

612
#[derive(Copy, Clone)]
713
pub struct MaybeStorageLive<'a, 'tcx> {
@@ -27,7 +33,9 @@ impl<'a, 'tcx> BitDenotation<'tcx> for MaybeStorageLive<'a, 'tcx> {
2733
}
2834

2935
fn start_block_effect(&self, _on_entry: &mut BitSet<Local>) {
30-
// Nothing is live on function entry
36+
// Nothing is live on function entry (generators only have a self
37+
// argument, and we don't care about that)
38+
assert_eq!(1, self.body.arg_count);
3139
}
3240

3341
fn statement_effect(&self,
@@ -63,3 +71,123 @@ impl<'a, 'tcx> BottomValue for MaybeStorageLive<'a, 'tcx> {
6371
/// bottom = dead
6472
const BOTTOM_VALUE: bool = false;
6573
}
74+
75+
/// Dataflow analysis that determines whether each local requires storage at a
76+
/// given location; i.e. whether its storage can go away without being observed.
77+
pub struct RequiresStorage<'mir, 'tcx> {
78+
body: &'mir Body<'tcx>,
79+
borrowed_locals:
80+
RefCell<DataflowResultsRefCursor<'mir, 'tcx, HaveBeenBorrowedLocals<'mir, 'tcx>>>,
81+
}
82+
83+
impl<'mir, 'tcx: 'mir> RequiresStorage<'mir, 'tcx> {
84+
pub fn new(
85+
body: &'mir Body<'tcx>,
86+
borrowed_locals: &'mir DataflowResults<'tcx, HaveBeenBorrowedLocals<'mir, 'tcx>>,
87+
) -> Self {
88+
RequiresStorage {
89+
body,
90+
borrowed_locals: RefCell::new(DataflowResultsCursor::new(borrowed_locals, body)),
91+
}
92+
}
93+
94+
pub fn body(&self) -> &Body<'tcx> {
95+
self.body
96+
}
97+
}
98+
99+
impl<'mir, 'tcx> BitDenotation<'tcx> for RequiresStorage<'mir, 'tcx> {
100+
type Idx = Local;
101+
fn name() -> &'static str { "requires_storage" }
102+
fn bits_per_block(&self) -> usize {
103+
self.body.local_decls.len()
104+
}
105+
106+
fn start_block_effect(&self, _sets: &mut BitSet<Local>) {
107+
// Nothing is live on function entry (generators only have a self
108+
// argument, and we don't care about that)
109+
assert_eq!(1, self.body.arg_count);
110+
}
111+
112+
fn statement_effect(&self,
113+
sets: &mut GenKillSet<Local>,
114+
loc: Location) {
115+
self.check_for_move(sets, loc);
116+
self.check_for_borrow(sets, loc);
117+
118+
let stmt = &self.body[loc.block].statements[loc.statement_index];
119+
match stmt.kind {
120+
StatementKind::StorageLive(l) => sets.gen(l),
121+
StatementKind::StorageDead(l) => sets.kill(l),
122+
StatementKind::Assign(ref place, _)
123+
| StatementKind::SetDiscriminant { ref place, .. } => {
124+
place.base_local().map(|l| sets.gen(l));
125+
}
126+
StatementKind::InlineAsm(box InlineAsm { ref outputs, .. }) => {
127+
for p in &**outputs {
128+
p.base_local().map(|l| sets.gen(l));
129+
}
130+
}
131+
_ => (),
132+
}
133+
}
134+
135+
fn terminator_effect(&self,
136+
sets: &mut GenKillSet<Local>,
137+
loc: Location) {
138+
self.check_for_move(sets, loc);
139+
self.check_for_borrow(sets, loc);
140+
}
141+
142+
fn propagate_call_return(
143+
&self,
144+
in_out: &mut BitSet<Local>,
145+
_call_bb: mir::BasicBlock,
146+
_dest_bb: mir::BasicBlock,
147+
dest_place: &mir::Place<'tcx>,
148+
) {
149+
dest_place.base_local().map(|l| in_out.insert(l));
150+
}
151+
}
152+
153+
impl<'mir, 'tcx> RequiresStorage<'mir, 'tcx> {
154+
/// Kill locals that are fully moved and have not been borrowed.
155+
fn check_for_move(&self, sets: &mut GenKillSet<Local>, loc: Location) {
156+
let mut visitor = MoveVisitor {
157+
sets,
158+
borrowed_locals: &self.borrowed_locals,
159+
};
160+
visitor.visit_location(self.body, loc);
161+
}
162+
163+
/// Gen locals that are newly borrowed. This includes borrowing any part of
164+
/// a local (we rely on this behavior of `HaveBeenBorrowedLocals`).
165+
fn check_for_borrow(&self, sets: &mut GenKillSet<Local>, loc: Location) {
166+
let mut borrowed_locals = self.borrowed_locals.borrow_mut();
167+
borrowed_locals.seek(loc);
168+
borrowed_locals.each_gen_bit(|l| sets.gen(l));
169+
}
170+
}
171+
172+
impl<'mir, 'tcx> BottomValue for RequiresStorage<'mir, 'tcx> {
173+
/// bottom = dead
174+
const BOTTOM_VALUE: bool = false;
175+
}
176+
177+
struct MoveVisitor<'a, 'mir, 'tcx> {
178+
borrowed_locals:
179+
&'a RefCell<DataflowResultsRefCursor<'mir, 'tcx, HaveBeenBorrowedLocals<'mir, 'tcx>>>,
180+
sets: &'a mut GenKillSet<Local>,
181+
}
182+
183+
impl<'a, 'mir: 'a, 'tcx> Visitor<'tcx> for MoveVisitor<'a, 'mir, 'tcx> {
184+
fn visit_local(&mut self, local: &Local, context: PlaceContext, loc: Location) {
185+
if PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) == context {
186+
let mut borrowed_locals = self.borrowed_locals.borrow_mut();
187+
borrowed_locals.seek(loc);
188+
if !borrowed_locals.contains(*local) {
189+
self.sets.kill(*local);
190+
}
191+
}
192+
}
193+
}

src/librustc_mir/dataflow/mod.rs

+94-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::io;
1717
use std::path::PathBuf;
1818
use std::usize;
1919

20-
pub use self::impls::{MaybeStorageLive};
20+
pub use self::impls::{MaybeStorageLive, RequiresStorage};
2121
pub use self::impls::{MaybeInitializedPlaces, MaybeUninitializedPlaces};
2222
pub use self::impls::DefinitelyInitializedPlaces;
2323
pub use self::impls::EverInitializedPlaces;
@@ -360,6 +360,99 @@ pub(crate) trait DataflowResultsConsumer<'a, 'tcx: 'a> {
360360
fn body(&self) -> &'a Body<'tcx>;
361361
}
362362

363+
/// Allows iterating dataflow results in a flexible and reasonably fast way.
364+
pub struct DataflowResultsCursor<'mir, 'tcx, BD, DR = DataflowResults<'tcx, BD>>
365+
where
366+
BD: BitDenotation<'tcx>,
367+
DR: Borrow<DataflowResults<'tcx, BD>>,
368+
{
369+
flow_state: FlowAtLocation<'tcx, BD, DR>,
370+
371+
// The statement (or terminator) whose effect has been reconstructed in
372+
// flow_state.
373+
curr_loc: Option<Location>,
374+
375+
body: &'mir Body<'tcx>,
376+
}
377+
378+
pub type DataflowResultsRefCursor<'mir, 'tcx, BD> =
379+
DataflowResultsCursor<'mir, 'tcx, BD, &'mir DataflowResults<'tcx, BD>>;
380+
381+
impl<'mir, 'tcx, BD, DR> DataflowResultsCursor<'mir, 'tcx, BD, DR>
382+
where
383+
BD: BitDenotation<'tcx>,
384+
DR: Borrow<DataflowResults<'tcx, BD>>,
385+
{
386+
pub fn new(result: DR, body: &'mir Body<'tcx>) -> Self {
387+
DataflowResultsCursor {
388+
flow_state: FlowAtLocation::new(result),
389+
curr_loc: None,
390+
body,
391+
}
392+
}
393+
394+
/// Seek to the given location in MIR. This method is fast if you are
395+
/// traversing your MIR statements in order.
396+
///
397+
/// After calling `seek`, the current state will reflect all effects up to
398+
/// and including the `before_statement_effect` of the statement at location
399+
/// `loc`. The `statement_effect` of the statement at `loc` will be
400+
/// available as the current effect (see e.g. `each_gen_bit`).
401+
///
402+
/// If `loc.statement_index` equals the number of statements in the block,
403+
/// we will reconstruct the terminator effect in the same way as described
404+
/// above.
405+
pub fn seek(&mut self, loc: Location) {
406+
if self.curr_loc.map(|cur| loc == cur).unwrap_or(false) {
407+
return;
408+
}
409+
410+
let start_index;
411+
let should_reset = match self.curr_loc {
412+
None => true,
413+
Some(cur)
414+
if loc.block != cur.block || loc.statement_index < cur.statement_index => true,
415+
_ => false,
416+
};
417+
if should_reset {
418+
self.flow_state.reset_to_entry_of(loc.block);
419+
start_index = 0;
420+
} else {
421+
let curr_loc = self.curr_loc.unwrap();
422+
start_index = curr_loc.statement_index;
423+
// Apply the effect from the last seek to the current state.
424+
self.flow_state.apply_local_effect(curr_loc);
425+
}
426+
427+
for stmt in start_index..loc.statement_index {
428+
let mut stmt_loc = loc;
429+
stmt_loc.statement_index = stmt;
430+
self.flow_state.reconstruct_statement_effect(stmt_loc);
431+
self.flow_state.apply_local_effect(stmt_loc);
432+
}
433+
434+
if loc.statement_index == self.body[loc.block].statements.len() {
435+
self.flow_state.reconstruct_terminator_effect(loc);
436+
} else {
437+
self.flow_state.reconstruct_statement_effect(loc);
438+
}
439+
self.curr_loc = Some(loc);
440+
}
441+
442+
/// Return whether the current state contains bit `x`.
443+
pub fn contains(&self, x: BD::Idx) -> bool {
444+
self.flow_state.contains(x)
445+
}
446+
447+
/// Iterate over each `gen` bit in the current effect (invoke `seek` first).
448+
pub fn each_gen_bit<F>(&self, f: F)
449+
where
450+
F: FnMut(BD::Idx),
451+
{
452+
self.flow_state.each_gen_bit(f)
453+
}
454+
}
455+
363456
pub fn state_for_location<'tcx, T: BitDenotation<'tcx>>(loc: Location,
364457
analysis: &T,
365458
result: &DataflowResults<'tcx, T>,

0 commit comments

Comments
 (0)