@@ -20,6 +20,8 @@ use rustc_data_structures::sharded::Sharded;
20
20
use rustc_data_structures::thin_vec::ThinVec;
21
21
#[cfg(not(parallel_compiler))]
22
22
use rustc_data_structures::cold_path;
23
+ #[cfg(parallel_compiler)]
24
+ use rustc_data_structures::profiling::TimingGuard;
23
25
use std::hash::{Hash, Hasher};
24
26
use std::mem;
25
27
use std::ptr;
@@ -91,6 +93,19 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
91
93
/// for some compile-time benchmarks.
92
94
#[inline(always)]
93
95
pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
96
+ // Handling the `query_blocked_prof_timer` is a bit weird because of the
97
+ // control flow in this function: Blocking is implemented by
98
+ // awaiting a running job and, once that is done, entering the loop below
99
+ // again from the top. In that second iteration we will hit the
100
+ // cache which provides us with the information we need for
101
+ // finishing the "query-blocked" event.
102
+ //
103
+ // We thus allocate `query_blocked_prof_timer` outside the loop,
104
+ // initialize it during the first iteration and finish it during the
105
+ // second iteration.
106
+ #[cfg(parallel_compiler)]
107
+ let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;
108
+
94
109
let cache = Q::query_cache(tcx);
95
110
loop {
96
111
// We compute the key's hash once and then use it for both the
@@ -104,7 +119,17 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
104
119
if let Some((_, value)) =
105
120
lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
106
121
{
107
- tcx.prof.query_cache_hit(value.index.into());
122
+ if unlikely!(tcx.prof.enabled()) {
123
+ tcx.prof.query_cache_hit(value.index.into());
124
+
125
+ #[cfg(parallel_compiler)]
126
+ {
127
+ if let Some(prof_timer) = query_blocked_prof_timer.take() {
128
+ prof_timer.finish_with_query_invocation_id(value.index.into());
129
+ }
130
+ }
131
+ }
132
+
108
133
let result = (value.value.clone(), value.index);
109
134
#[cfg(debug_assertions)]
110
135
{
@@ -113,9 +138,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
113
138
return TryGetJob::JobCompleted(result);
114
139
}
115
140
116
- #[cfg(parallel_compiler)]
117
- let query_blocked_prof_timer;
118
-
119
141
let job = match lock.active.entry((*key).clone()) {
120
142
Entry::Occupied(entry) => {
121
143
match *entry.get() {
@@ -125,7 +147,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
125
147
// self-profiler.
126
148
#[cfg(parallel_compiler)]
127
149
{
128
- query_blocked_prof_timer = tcx.prof.query_blocked(Q::NAME );
150
+ query_blocked_prof_timer = Some( tcx.prof.query_blocked() );
129
151
}
130
152
131
153
job.clone()
@@ -169,11 +191,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
169
191
{
170
192
let result = job.r#await(tcx, span);
171
193
172
- // This `drop()` is not strictly necessary as the binding
173
- // would go out of scope anyway. But it's good to have an
174
- // explicit marker of how far the measurement goes.
175
- drop(query_blocked_prof_timer);
176
-
177
194
if let Err(cycle) = result {
178
195
return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
179
196
}
0 commit comments