Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Profile queries #43345

Merged
merged 5 commits into from
Aug 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/librustc/dep_graph/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use rustc_data_structures::fx::FxHashMap;
use session::config::OutputType;
use std::cell::{Ref, RefCell};
use std::rc::Rc;
use util::common::{ProfileQueriesMsg, profq_msg};

use super::dep_node::{DepNode, DepKind, WorkProductId};
use super::query::DepGraphQuery;
Expand Down Expand Up @@ -118,7 +119,13 @@ impl DepGraph {
{
if let Some(ref data) = self.data {
data.edges.borrow_mut().push_task(key);
if cfg!(debug_assertions) {
profq_msg(ProfileQueriesMsg::TaskBegin(key.clone()))
};
let result = task(cx, arg);
if cfg!(debug_assertions) {
profq_msg(ProfileQueriesMsg::TaskEnd)
};
let dep_node_index = data.edges.borrow_mut().pop_task(key);
(result, dep_node_index)
} else {
Expand Down
4 changes: 4 additions & 0 deletions src/librustc/session/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,10 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"dump the dependency graph to $RUST_DEP_GRAPH (default: /tmp/dep_graph.gv)"),
query_dep_graph: bool = (false, parse_bool, [UNTRACKED],
"enable queries of the dependency graph for regression testing"),
profile_queries: bool = (false, parse_bool, [UNTRACKED],
"trace and profile the queries of the incremental compilation framework"),
profile_queries_and_keys: bool = (false, parse_bool, [UNTRACKED],
"trace and profile the queries and keys of the incremental compilation framework"),
no_analysis: bool = (false, parse_bool, [UNTRACKED],
"parse and expand the source, but run no analysis"),
extra_plugins: Vec<String> = (Vec::new(), parse_list, [TRACKED],
Expand Down
7 changes: 7 additions & 0 deletions src/librustc/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,13 @@ impl Session {
}
pub fn verbose(&self) -> bool { self.opts.debugging_opts.verbose }
pub fn time_passes(&self) -> bool { self.opts.debugging_opts.time_passes }
pub fn profile_queries(&self) -> bool {
self.opts.debugging_opts.profile_queries ||
self.opts.debugging_opts.profile_queries_and_keys
}
pub fn profile_queries_and_keys(&self) -> bool {
self.opts.debugging_opts.profile_queries_and_keys
}
pub fn count_llvm_insns(&self) -> bool {
self.opts.debugging_opts.count_llvm_insns
}
Expand Down
41 changes: 41 additions & 0 deletions src/librustc/ty/maps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use ty::steal::Steal;
use ty::subst::Substs;
use ty::fast_reject::SimplifiedType;
use util::nodemap::{DefIdSet, NodeSet};
use util::common::{profq_msg, ProfileQueriesMsg};

use rustc_data_structures::indexed_vec::IndexVec;
use rustc_data_structures::fx::FxHashMap;
Expand Down Expand Up @@ -513,6 +514,29 @@ impl<'tcx> QueryDescription for queries::lint_levels<'tcx> {
}
}

// If enabled, send a message to the profile-queries thread
macro_rules! profq_msg {
($tcx:expr, $msg:expr) => {
if cfg!(debug_assertions) {
if $tcx.sess.profile_queries() {
profq_msg($msg)
}
}
}
}

// If enabled, format a key using its debug string, which can be
// expensive to compute (in terms of time).
macro_rules! profq_key {
($tcx:expr, $key:expr) => {
if cfg!(debug_assertions) {
if $tcx.sess.profile_queries_and_keys() {
Some(format!("{:?}", $key))
} else { None }
} else { None }
}
}

macro_rules! define_maps {
(<$tcx:tt>
$($(#[$attr:meta])*
Expand All @@ -539,6 +563,12 @@ macro_rules! define_maps {
$($(#[$attr])* $name($K)),*
}

#[allow(bad_style)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum QueryMsg {
$($name(Option<String>)),*
}

impl<$tcx> Query<$tcx> {
pub fn describe(&self, tcx: TyCtxt) -> String {
match *self {
Expand Down Expand Up @@ -581,10 +611,20 @@ macro_rules! define_maps {
key,
span);

profq_msg!(tcx,
ProfileQueriesMsg::QueryBegin(
span.clone(),
QueryMsg::$name(profq_key!(tcx, key))
)
);

if let Some(&(ref result, dep_node_index)) = tcx.maps.$name.borrow().map.get(&key) {
tcx.dep_graph.read_index(dep_node_index);
profq_msg!(tcx, ProfileQueriesMsg::CacheHit);
return Ok(f(result));
}
// else, we are going to run the provider:
profq_msg!(tcx, ProfileQueriesMsg::ProviderBegin);

// FIXME(eddyb) Get more valid Span's on queries.
// def_span guard is necessary to prevent a recursive loop,
Expand Down Expand Up @@ -612,6 +652,7 @@ macro_rules! define_maps {
tcx.dep_graph.with_task(dep_node, tcx, key, run_provider)
}
})?;
profq_msg!(tcx, ProfileQueriesMsg::ProviderEnd);

tcx.dep_graph.read_index(dep_node_index);

Expand Down
77 changes: 77 additions & 0 deletions src/librustc/util/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ use std::iter::repeat;
use std::path::Path;
use std::time::{Duration, Instant};

use std::sync::mpsc::{Sender};
use syntax_pos::{Span};
use ty::maps::{QueryMsg};
use dep_graph::{DepNode};

// The name of the associated type for `Fn` return types
pub const FN_OUTPUT_NAME: &'static str = "Output";

Expand All @@ -29,6 +34,72 @@ pub struct ErrorReported;

thread_local!(static TIME_DEPTH: Cell<usize> = Cell::new(0));

/// Initialized for -Z profile-queries
thread_local!(static PROFQ_CHAN: RefCell<Option<Sender<ProfileQueriesMsg>>> = RefCell::new(None));
Copy link
Member

@eddyb eddyb Jul 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a separate thread? Isn't the overhead of synchronization more than that of pushing to a Vec or something?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. The separate thread design was @nikomatsakis's suggestion, so I'll let him respond first.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that explains a bunch. If I get a satisfactory explanation out of him this may be good to go.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, we can measure. Just to be sure we all share the same concerns: I presume you are worried that the overhead of making and sending messages will distort the measurements, right? I admit my inclination is that separate threads are basically useful and good overall, if we can fit them in easily enough. =)

I think the reason I suggested a thread was that, with incremental compilation, we found that when we moved processing of the dependency graph into one thread, there was a significant hit, and that copying data to a separate thread was cheaper. But it's not obviously comparable -- in the incremental case, we did some effort to keep overhead low, such as pushing a series of messages into a vec and then sending the entire vec over to the thread to be processed (and using a double-buffering scheme).

In this case, in contrast, using a separate thread causes some difficulty since our keys contain region pointers and hence have to be converted to strings. And of course it's just using a channel rather than a vector, so there may be some overhead there (I'm not sure).

One question also: how many events would we wind up accumulating in this vector? I could imagine it being quite a few! But I guess we just accumulating in the current code too so that doesn't make much difference. I think I was imagining before that we would be doing some "consolidation" (e.g., summing up data or whatever) in the separate thread.

Regardless, seems like we can measure the "distortion" of a separate thread (versus a vector) just by measuring how much time the "main thread" of compilation takes, right?

TL;DR: I am happy either way really, just want to do the thing that gives us the best results.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, another point --

There are things we'd eventually like to profile that occur before the tcx is created, or after it is destroyed. I suspect that we will eventually refactor all the things that occur before the tcx is created away completely, but I'm not sure so sure about the stuff that comes after it is destroyed -- in particular, I think it'll always be an important win to drop the arenas before we do LLVM translation, to reduce peak memory usage. So if we have the vector containing references into those arenas, that will make it harder to include those LLVM optimizations into our measurements, presumably (but not impossible, I suppose).


/// Parameters to the `Dump` variant of type `ProfileQueriesMsg`.
#[derive(Clone,Debug)]
pub struct ProfQDumpParams {
/// A base path for the files we will dump
pub path:String,
/// To ensure that the compiler waits for us to finish our dumps
pub ack:Sender<()>,
/// toggle dumping a log file with every `ProfileQueriesMsg`
pub dump_profq_msg_log:bool,
}

/// A sequence of these messages induce a trace of query-based incremental compilation.
/// FIXME(matthewhammer): Determine whether we should include cycle detection here or not.
#[derive(Clone,Debug)]
pub enum ProfileQueriesMsg {
/// begin a timed pass
TimeBegin(String),
/// end a timed pass
TimeEnd,
/// begin a task (see dep_graph::graph::with_task)
TaskBegin(DepNode),
/// end a task
TaskEnd,
/// begin a new query
QueryBegin(Span, QueryMsg),
/// query is satisfied by using an already-known value for the given key
CacheHit,
/// query requires running a provider; providers may nest, permitting queries to nest.
ProviderBegin,
/// query is satisfied by a provider terminating with a value
ProviderEnd,
/// dump a record of the queries to the given path
Dump(ProfQDumpParams),
/// halt the profiling/monitoring background thread
Halt
}

/// If enabled, send a message to the profile-queries thread
pub fn profq_msg(msg: ProfileQueriesMsg) {
PROFQ_CHAN.with(|sender|{
if let Some(s) = sender.borrow().as_ref() {
s.send(msg).unwrap()
} else {
// Do nothing.
//
// FIXME(matthewhammer): Multi-threaded translation phase triggers the panic below.
// From backtrace: rustc_trans::back::write::spawn_work::{{closure}}.
//
// panic!("no channel on which to send profq_msg: {:?}", msg)
}
})
}

/// Set channel for profile queries channel
pub fn profq_set_chan(s: Sender<ProfileQueriesMsg>) -> bool {
PROFQ_CHAN.with(|chan|{
if chan.borrow().is_none() {
*chan.borrow_mut() = Some(s);
true
} else { false }
})
}

/// Read the current depth of `time()` calls. This is used to
/// encourage indentation across threads.
pub fn time_depth() -> usize {
Expand All @@ -53,9 +124,15 @@ pub fn time<T, F>(do_it: bool, what: &str, f: F) -> T where
r
});

if cfg!(debug_assertions) {
profq_msg(ProfileQueriesMsg::TimeBegin(what.to_string()))
};
let start = Instant::now();
let rv = f();
let dur = start.elapsed();
if cfg!(debug_assertions) {
profq_msg(ProfileQueriesMsg::TimeEnd)
};

print_time_passes_entry_internal(what, dur);

Expand Down
14 changes: 14 additions & 0 deletions src/librustc_driver/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ use arena::DroplessArena;

use derive_registrar;

use profile;

pub fn compile_input(sess: &Session,
cstore: &CStore,
input: &Input,
Expand Down Expand Up @@ -105,6 +107,10 @@ pub fn compile_input(sess: &Session,
sess.abort_if_errors();
}

if sess.profile_queries() {
profile::begin();
}

// We need nested scopes here, because the intermediate results can keep
// large chunks of memory alive and we want to free them as soon as
// possible to keep the peak memory usage low
Expand Down Expand Up @@ -537,6 +543,10 @@ pub fn phase_1_parse_input<'a>(control: &CompileController,
-> PResult<'a, ast::Crate> {
sess.diagnostic().set_continue_after_error(control.continue_parse_after_error);

if sess.profile_queries() {
profile::begin();
}

let krate = time(sess.time_passes(), "parsing", || {
match *input {
Input::File(ref file) => {
Expand Down Expand Up @@ -1120,6 +1130,10 @@ pub fn phase_4_translate_to_llvm<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"translation",
move || trans::trans_crate(tcx, analysis, incremental_hashes_map, output_filenames));

if tcx.sess.profile_queries() {
profile::dump("profile_queries".to_string())
}

translation
}

Expand Down
1 change: 1 addition & 0 deletions src/librustc_driver/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ use syntax_pos::{DUMMY_SP, MultiSpan};
#[cfg(test)]
mod test;

pub mod profile;
pub mod driver;
pub mod pretty;
pub mod target_features;
Expand Down
Loading