Skip to content

Commit

Permalink
Auto merge of #61922 - tmandry:moar-generator-optimization, r=matthew…
Browse files Browse the repository at this point in the history
…jasper

Don't store locals that have been moved from in generators

This avoids reserving storage in generators for locals that are moved
out of (and not re-initialized) prior to yield points. Fixes #59123.

This adds a new dataflow analysis, `RequiresStorage`, to determine whether the storage of a local can be destroyed without being observed by the program. The rules are:

1. StorageLive(x) => mark x live
2. StorageDead(x) => mark x dead
3. If a local is moved from, _and has never had its address taken_, mark it dead
4. If (any part of) a local is initialized, mark it live'

This is used to determine whether to save a local in the generator object at all, as well as which locals can be overlapped in the generator layout.

Here's the size in bytes of all testcases included in the change, before and after the change:

async fn test    |Size before |Size after
-----------------|------------|----------
single           | 1028       | 1028
single_with_noop | 2056       | 1032
joined           | 5132       | 3084
joined_with_noop | 8208       | 3084

generator test              |Size before |Size after
----------------------------|------------|----------
move_before_yield           | 1028       | 1028
move_before_yield_with_noop | 2056       | 1032
overlap_move_points         | 3080       | 2056

## Future work

Note that there is a possible extension to this optimization, which modifies rule 3 to read: "If a local is moved from, _**and either has never had its address taken, or is Freeze and has never been mutably borrowed**_, mark it dead." This was discussed at length in #59123 and then #61849. Because this would cause some behavior to be UB which was not UB before, it's a step that needs to be taken carefully.

A more immediate priority for me is inlining `std::mem::size_of_val(&x)` so it becomes apparent that the address of `x` is not taken. This way, using `size_of_val` to look at the size of your inner futures does not affect the size of your outer future.

cc @cramertj @eddyb @Matthias247 @nikomatsakis @RalfJung @Zoxc
  • Loading branch information
bors committed Jul 2, 2019
2 parents ef064d2 + a68e2c7 commit 848e0a2
Show file tree
Hide file tree
Showing 6 changed files with 444 additions and 43 deletions.
32 changes: 21 additions & 11 deletions src/librustc_mir/dataflow/at_location.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::dataflow::{BitDenotation, DataflowResults, GenKillSet};
use crate::dataflow::move_paths::{HasMoveData, MovePathIndex};

use std::iter;
use std::borrow::Borrow;

/// A trait for "cartesian products" of multiple FlowAtLocation.
///
Expand Down Expand Up @@ -60,18 +61,20 @@ pub trait FlowsAtLocation {
/// (e.g., via `reconstruct_statement_effect` and
/// `reconstruct_terminator_effect`; don't forget to call
/// `apply_local_effect`).
pub struct FlowAtLocation<'tcx, BD>
pub struct FlowAtLocation<'tcx, BD, DR = DataflowResults<'tcx, BD>>
where
BD: BitDenotation<'tcx>,
DR: Borrow<DataflowResults<'tcx, BD>>,
{
base_results: DataflowResults<'tcx, BD>,
base_results: DR,
curr_state: BitSet<BD::Idx>,
stmt_trans: GenKillSet<BD::Idx>,
}

impl<'tcx, BD> FlowAtLocation<'tcx, BD>
impl<'tcx, BD, DR> FlowAtLocation<'tcx, BD, DR>
where
BD: BitDenotation<'tcx>,
DR: Borrow<DataflowResults<'tcx, BD>>,
{
/// Iterate over each bit set in the current state.
pub fn each_state_bit<F>(&self, f: F)
Expand All @@ -91,8 +94,8 @@ where
self.stmt_trans.gen_set.iter().for_each(f)
}

pub fn new(results: DataflowResults<'tcx, BD>) -> Self {
let bits_per_block = results.sets().bits_per_block();
pub fn new(results: DR) -> Self {
let bits_per_block = results.borrow().sets().bits_per_block();
let curr_state = BitSet::new_empty(bits_per_block);
let stmt_trans = GenKillSet::from_elem(HybridBitSet::new_empty(bits_per_block));
FlowAtLocation {
Expand All @@ -104,7 +107,7 @@ where

/// Access the underlying operator.
pub fn operator(&self) -> &BD {
self.base_results.operator()
self.base_results.borrow().operator()
}

pub fn contains(&self, x: BD::Idx) -> bool {
Expand Down Expand Up @@ -134,39 +137,45 @@ where
}
}

impl<'tcx, BD> FlowsAtLocation for FlowAtLocation<'tcx, BD>
where BD: BitDenotation<'tcx>
impl<'tcx, BD, DR> FlowsAtLocation for FlowAtLocation<'tcx, BD, DR>
where
BD: BitDenotation<'tcx>,
DR: Borrow<DataflowResults<'tcx, BD>>,
{
fn reset_to_entry_of(&mut self, bb: BasicBlock) {
self.curr_state.overwrite(self.base_results.sets().entry_set_for(bb.index()));
self.curr_state.overwrite(self.base_results.borrow().sets().entry_set_for(bb.index()));
}

fn reset_to_exit_of(&mut self, bb: BasicBlock) {
self.reset_to_entry_of(bb);
let trans = self.base_results.sets().trans_for(bb.index());
let trans = self.base_results.borrow().sets().trans_for(bb.index());
trans.apply(&mut self.curr_state)
}

fn reconstruct_statement_effect(&mut self, loc: Location) {
self.stmt_trans.clear();
self.base_results
.borrow()
.operator()
.before_statement_effect(&mut self.stmt_trans, loc);
self.stmt_trans.apply(&mut self.curr_state);

self.base_results
.borrow()
.operator()
.statement_effect(&mut self.stmt_trans, loc);
}

fn reconstruct_terminator_effect(&mut self, loc: Location) {
self.stmt_trans.clear();
self.base_results
.borrow()
.operator()
.before_terminator_effect(&mut self.stmt_trans, loc);
self.stmt_trans.apply(&mut self.curr_state);

self.base_results
.borrow()
.operator()
.terminator_effect(&mut self.stmt_trans, loc);
}
Expand All @@ -177,9 +186,10 @@ impl<'tcx, BD> FlowsAtLocation for FlowAtLocation<'tcx, BD>
}


impl<'tcx, T> FlowAtLocation<'tcx, T>
impl<'tcx, T, DR> FlowAtLocation<'tcx, T, DR>
where
T: HasMoveData<'tcx> + BitDenotation<'tcx, Idx = MovePathIndex>,
DR: Borrow<DataflowResults<'tcx, T>>,
{
pub fn has_any_child_of(&self, mpi: T::Idx) -> Option<T::Idx> {
// We process `mpi` before the loop below, for two reasons:
Expand Down
130 changes: 129 additions & 1 deletion src/librustc_mir/dataflow/impls/storage_liveness.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
pub use super::*;

use rustc::mir::*;
use rustc::mir::visit::{
PlaceContext, Visitor, NonMutatingUseContext,
};
use std::cell::RefCell;
use crate::dataflow::BitDenotation;
use crate::dataflow::HaveBeenBorrowedLocals;
use crate::dataflow::{DataflowResults, DataflowResultsCursor, DataflowResultsRefCursor};

#[derive(Copy, Clone)]
pub struct MaybeStorageLive<'a, 'tcx> {
Expand All @@ -27,7 +33,9 @@ impl<'a, 'tcx> BitDenotation<'tcx> for MaybeStorageLive<'a, 'tcx> {
}

fn start_block_effect(&self, _on_entry: &mut BitSet<Local>) {
// Nothing is live on function entry
// Nothing is live on function entry (generators only have a self
// argument, and we don't care about that)
assert_eq!(1, self.body.arg_count);
}

fn statement_effect(&self,
Expand Down Expand Up @@ -63,3 +71,123 @@ impl<'a, 'tcx> BottomValue for MaybeStorageLive<'a, 'tcx> {
/// bottom = dead
const BOTTOM_VALUE: bool = false;
}

/// Dataflow analysis that determines whether each local requires storage at a
/// given location; i.e. whether its storage can go away without being observed.
pub struct RequiresStorage<'mir, 'tcx> {
body: &'mir Body<'tcx>,
borrowed_locals:
RefCell<DataflowResultsRefCursor<'mir, 'tcx, HaveBeenBorrowedLocals<'mir, 'tcx>>>,
}

impl<'mir, 'tcx: 'mir> RequiresStorage<'mir, 'tcx> {
pub fn new(
body: &'mir Body<'tcx>,
borrowed_locals: &'mir DataflowResults<'tcx, HaveBeenBorrowedLocals<'mir, 'tcx>>,
) -> Self {
RequiresStorage {
body,
borrowed_locals: RefCell::new(DataflowResultsCursor::new(borrowed_locals, body)),
}
}

pub fn body(&self) -> &Body<'tcx> {
self.body
}
}

impl<'mir, 'tcx> BitDenotation<'tcx> for RequiresStorage<'mir, 'tcx> {
type Idx = Local;
fn name() -> &'static str { "requires_storage" }
fn bits_per_block(&self) -> usize {
self.body.local_decls.len()
}

fn start_block_effect(&self, _sets: &mut BitSet<Local>) {
// Nothing is live on function entry (generators only have a self
// argument, and we don't care about that)
assert_eq!(1, self.body.arg_count);
}

fn statement_effect(&self,
sets: &mut GenKillSet<Local>,
loc: Location) {
self.check_for_move(sets, loc);
self.check_for_borrow(sets, loc);

let stmt = &self.body[loc.block].statements[loc.statement_index];
match stmt.kind {
StatementKind::StorageLive(l) => sets.gen(l),
StatementKind::StorageDead(l) => sets.kill(l),
StatementKind::Assign(ref place, _)
| StatementKind::SetDiscriminant { ref place, .. } => {
place.base_local().map(|l| sets.gen(l));
}
StatementKind::InlineAsm(box InlineAsm { ref outputs, .. }) => {
for p in &**outputs {
p.base_local().map(|l| sets.gen(l));
}
}
_ => (),
}
}

fn terminator_effect(&self,
sets: &mut GenKillSet<Local>,
loc: Location) {
self.check_for_move(sets, loc);
self.check_for_borrow(sets, loc);
}

fn propagate_call_return(
&self,
in_out: &mut BitSet<Local>,
_call_bb: mir::BasicBlock,
_dest_bb: mir::BasicBlock,
dest_place: &mir::Place<'tcx>,
) {
dest_place.base_local().map(|l| in_out.insert(l));
}
}

impl<'mir, 'tcx> RequiresStorage<'mir, 'tcx> {
/// Kill locals that are fully moved and have not been borrowed.
fn check_for_move(&self, sets: &mut GenKillSet<Local>, loc: Location) {
let mut visitor = MoveVisitor {
sets,
borrowed_locals: &self.borrowed_locals,
};
visitor.visit_location(self.body, loc);
}

/// Gen locals that are newly borrowed. This includes borrowing any part of
/// a local (we rely on this behavior of `HaveBeenBorrowedLocals`).
fn check_for_borrow(&self, sets: &mut GenKillSet<Local>, loc: Location) {
let mut borrowed_locals = self.borrowed_locals.borrow_mut();
borrowed_locals.seek(loc);
borrowed_locals.each_gen_bit(|l| sets.gen(l));
}
}

impl<'mir, 'tcx> BottomValue for RequiresStorage<'mir, 'tcx> {
/// bottom = dead
const BOTTOM_VALUE: bool = false;
}

struct MoveVisitor<'a, 'mir, 'tcx> {
borrowed_locals:
&'a RefCell<DataflowResultsRefCursor<'mir, 'tcx, HaveBeenBorrowedLocals<'mir, 'tcx>>>,
sets: &'a mut GenKillSet<Local>,
}

impl<'a, 'mir: 'a, 'tcx> Visitor<'tcx> for MoveVisitor<'a, 'mir, 'tcx> {
fn visit_local(&mut self, local: &Local, context: PlaceContext, loc: Location) {
if PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) == context {
let mut borrowed_locals = self.borrowed_locals.borrow_mut();
borrowed_locals.seek(loc);
if !borrowed_locals.contains(*local) {
self.sets.kill(*local);
}
}
}
}
95 changes: 94 additions & 1 deletion src/librustc_mir/dataflow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::io;
use std::path::PathBuf;
use std::usize;

pub use self::impls::{MaybeStorageLive};
pub use self::impls::{MaybeStorageLive, RequiresStorage};
pub use self::impls::{MaybeInitializedPlaces, MaybeUninitializedPlaces};
pub use self::impls::DefinitelyInitializedPlaces;
pub use self::impls::EverInitializedPlaces;
Expand Down Expand Up @@ -360,6 +360,99 @@ pub(crate) trait DataflowResultsConsumer<'a, 'tcx: 'a> {
fn body(&self) -> &'a Body<'tcx>;
}

/// Allows iterating dataflow results in a flexible and reasonably fast way.
pub struct DataflowResultsCursor<'mir, 'tcx, BD, DR = DataflowResults<'tcx, BD>>
where
BD: BitDenotation<'tcx>,
DR: Borrow<DataflowResults<'tcx, BD>>,
{
flow_state: FlowAtLocation<'tcx, BD, DR>,

// The statement (or terminator) whose effect has been reconstructed in
// flow_state.
curr_loc: Option<Location>,

body: &'mir Body<'tcx>,
}

pub type DataflowResultsRefCursor<'mir, 'tcx, BD> =
DataflowResultsCursor<'mir, 'tcx, BD, &'mir DataflowResults<'tcx, BD>>;

impl<'mir, 'tcx, BD, DR> DataflowResultsCursor<'mir, 'tcx, BD, DR>
where
BD: BitDenotation<'tcx>,
DR: Borrow<DataflowResults<'tcx, BD>>,
{
pub fn new(result: DR, body: &'mir Body<'tcx>) -> Self {
DataflowResultsCursor {
flow_state: FlowAtLocation::new(result),
curr_loc: None,
body,
}
}

/// Seek to the given location in MIR. This method is fast if you are
/// traversing your MIR statements in order.
///
/// After calling `seek`, the current state will reflect all effects up to
/// and including the `before_statement_effect` of the statement at location
/// `loc`. The `statement_effect` of the statement at `loc` will be
/// available as the current effect (see e.g. `each_gen_bit`).
///
/// If `loc.statement_index` equals the number of statements in the block,
/// we will reconstruct the terminator effect in the same way as described
/// above.
pub fn seek(&mut self, loc: Location) {
if self.curr_loc.map(|cur| loc == cur).unwrap_or(false) {
return;
}

let start_index;
let should_reset = match self.curr_loc {
None => true,
Some(cur)
if loc.block != cur.block || loc.statement_index < cur.statement_index => true,
_ => false,
};
if should_reset {
self.flow_state.reset_to_entry_of(loc.block);
start_index = 0;
} else {
let curr_loc = self.curr_loc.unwrap();
start_index = curr_loc.statement_index;
// Apply the effect from the last seek to the current state.
self.flow_state.apply_local_effect(curr_loc);
}

for stmt in start_index..loc.statement_index {
let mut stmt_loc = loc;
stmt_loc.statement_index = stmt;
self.flow_state.reconstruct_statement_effect(stmt_loc);
self.flow_state.apply_local_effect(stmt_loc);
}

if loc.statement_index == self.body[loc.block].statements.len() {
self.flow_state.reconstruct_terminator_effect(loc);
} else {
self.flow_state.reconstruct_statement_effect(loc);
}
self.curr_loc = Some(loc);
}

/// Return whether the current state contains bit `x`.
pub fn contains(&self, x: BD::Idx) -> bool {
self.flow_state.contains(x)
}

/// Iterate over each `gen` bit in the current effect (invoke `seek` first).
pub fn each_gen_bit<F>(&self, f: F)
where
F: FnMut(BD::Idx),
{
self.flow_state.each_gen_bit(f)
}
}

pub fn state_for_location<'tcx, T: BitDenotation<'tcx>>(loc: Location,
analysis: &T,
result: &DataflowResults<'tcx, T>,
Expand Down
Loading

0 comments on commit 848e0a2

Please sign in to comment.