From 939f3971c520317aae11a6393a36bb0f8dadb487 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Sun, 16 Jun 2019 14:58:05 +0000 Subject: [PATCH] Beta backport of: Auto merge of #61754 - nikomatsakis:trait-caching-perf-3, r=pnkfelix create a "provisional cache" to restore performance in the case of cycles Introduce a "provisional cache" that caches the results of auto trait resolutions but keeps them from entering the *main* cache until everything is ready. This turned out a bit more complex than I hoped, but I don't see another short term fix -- happy to take suggestions! In the meantime, it's very clear we need to rework the trait solver. This resolves the extreme performance slowdown experienced in #60846 -- I plan to add a perf.rust-lang.org regression test to track this. Caveat: I've not run `x.py test` in full yet. r? @pnkfelix cc @arielb1 Fixes #60846 --- .../traits/query/evaluate_obligation.rs | 2 +- src/librustc/traits/select.rs | 486 +++++++++++++++--- src/librustc_traits/evaluate_obligation.rs | 2 +- 3 files changed, 407 insertions(+), 83 deletions(-) diff --git a/src/librustc/traits/query/evaluate_obligation.rs b/src/librustc/traits/query/evaluate_obligation.rs index d5230f15c2565..5f1af668a9e48 100644 --- a/src/librustc/traits/query/evaluate_obligation.rs +++ b/src/librustc/traits/query/evaluate_obligation.rs @@ -64,7 +64,7 @@ impl<'cx, 'gcx, 'tcx> InferCtxt<'cx, 'gcx, 'tcx> { Err(OverflowError) => { let mut selcx = SelectionContext::with_query_mode(&self, TraitQueryMode::Standard); - selcx.evaluate_obligation_recursively(obligation) + selcx.evaluate_root_obligation(obligation) .unwrap_or_else(|r| { span_bug!( obligation.cause.span, diff --git a/src/librustc/traits/select.rs b/src/librustc/traits/select.rs index c4be85050dbc2..771c0759d6501 100644 --- a/src/librustc/traits/select.rs +++ b/src/librustc/traits/select.rs @@ -43,7 +43,7 @@ use crate::hir; use rustc_data_structures::bit_set::GrowableBitSet; use rustc_data_structures::sync::Lock; use rustc_target::spec::abi::Abi; -use std::cell::Cell; +use std::cell::{Cell, RefCell}; use std::cmp; use std::fmt::{self, Display}; use std::iter; @@ -154,12 +154,12 @@ struct TraitObligationStack<'prev, 'tcx: 'prev> { /// selection-context's freshener. Used to check for recursion. fresh_trait_ref: ty::PolyTraitRef<'tcx>, - /// Starts out as false -- if, during evaluation, we encounter a - /// cycle, then we will set this flag to true for all participants - /// in the cycle (apart from the "head" node). These participants - /// will then forego caching their results. This is not the most - /// efficient solution, but it addresses #60010. The problem we - /// are trying to prevent: + /// Starts out equal to `depth` -- if, during evaluation, we + /// encounter a cycle, then we will set this flag to the minimum + /// depth of that cycle for all participants in the cycle. These + /// participants will then forego caching their results. This is + /// not the most efficient solution, but it addresses #60010. The + /// problem we are trying to prevent: /// /// - If you have `A: AutoTrait` requires `B: AutoTrait` and `C: NonAutoTrait` /// - `B: AutoTrait` requires `A: AutoTrait` (coinductive cycle, ok) @@ -182,9 +182,16 @@ struct TraitObligationStack<'prev, 'tcx: 'prev> { /// evaluate each member of a cycle up to N times, where N is the /// length of the cycle. This means the performance impact is /// bounded and we shouldn't have any terrible worst-cases. - in_cycle: Cell, + reached_depth: Cell, previous: TraitObligationStackList<'prev, 'tcx>, + + /// Number of parent frames plus one -- so the topmost frame has depth 1. + depth: usize, + + /// Depth-first number of this node in the search graph -- a + /// pre-order index. Basically a freshly incremented counter. + dfn: usize, } #[derive(Clone, Default)] @@ -603,7 +610,8 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> { debug!("select({:?})", obligation); debug_assert!(!obligation.predicate.has_escaping_bound_vars()); - let stack = self.push_stack(TraitObligationStackList::empty(), obligation); + let pec = &ProvisionalEvaluationCache::default(); + let stack = self.push_stack(TraitObligationStackList::empty(pec), obligation); let candidate = match self.candidate_from_obligation(&stack) { Err(SelectionError::Overflow) => { @@ -649,20 +657,23 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> { // where we do not expect overflow to be propagated. assert!(self.query_mode == TraitQueryMode::Standard); - self.evaluate_obligation_recursively(obligation) + self.evaluate_root_obligation(obligation) .expect("Overflow should be caught earlier in standard query mode") .may_apply() } - /// Evaluates whether the obligation `obligation` can be satisfied and returns - /// an `EvaluationResult`. - pub fn evaluate_obligation_recursively( + /// Evaluates whether the obligation `obligation` can be satisfied + /// and returns an `EvaluationResult`. This is meant for the + /// *initial* call. + pub fn evaluate_root_obligation( &mut self, obligation: &PredicateObligation<'tcx>, ) -> Result { self.evaluation_probe(|this| { - this.evaluate_predicate_recursively(TraitObligationStackList::empty(), - obligation.clone()) + this.evaluate_predicate_recursively( + TraitObligationStackList::empty(&ProvisionalEvaluationCache::default()), + obligation.clone(), + ) }) } @@ -868,23 +879,131 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> { return Ok(result); } + if let Some(result) = stack.cache().get_provisional(fresh_trait_ref) { + debug!("PROVISIONAL CACHE HIT: EVAL({:?})={:?}", fresh_trait_ref, result); + stack.update_reached_depth(stack.cache().current_reached_depth()); + return Ok(result); + } + + // Check if this is a match for something already on the + // stack. If so, we don't want to insert the result into the + // main cache (it is cycle dependent) nor the provisional + // cache (which is meant for things that have completed but + // for a "backedge" -- this result *is* the backedge). + if let Some(cycle_result) = self.check_evaluation_cycle(&stack) { + return Ok(cycle_result); + } + let (result, dep_node) = self.in_task(|this| this.evaluate_stack(&stack)); let result = result?; - if !stack.in_cycle.get() { + if !result.must_apply_modulo_regions() { + stack.cache().on_failure(stack.dfn); + } + + let reached_depth = stack.reached_depth.get(); + if reached_depth >= stack.depth { debug!("CACHE MISS: EVAL({:?})={:?}", fresh_trait_ref, result); self.insert_evaluation_cache(obligation.param_env, fresh_trait_ref, dep_node, result); + + stack.cache().on_completion(stack.depth, |fresh_trait_ref, provisional_result| { + self.insert_evaluation_cache( + obligation.param_env, + fresh_trait_ref, + dep_node, + provisional_result.max(result), + ); + }); } else { + debug!("PROVISIONAL: {:?}={:?}", fresh_trait_ref, result); debug!( - "evaluate_trait_predicate_recursively: skipping cache because {:?} \ - is a cycle participant", + "evaluate_trait_predicate_recursively: caching provisionally because {:?} \ + is a cycle participant (at depth {}, reached depth {})", + fresh_trait_ref, + stack.depth, + reached_depth, + ); + + stack.cache().insert_provisional( + stack.dfn, + reached_depth, fresh_trait_ref, + result, ); } + Ok(result) } + /// If there is any previous entry on the stack that precisely + /// matches this obligation, then we can assume that the + /// obligation is satisfied for now (still all other conditions + /// must be met of course). One obvious case this comes up is + /// marker traits like `Send`. Think of a linked list: + /// + /// struct List { data: T, next: Option>> } + /// + /// `Box>` will be `Send` if `T` is `Send` and + /// `Option>>` is `Send`, and in turn + /// `Option>>` is `Send` if `Box>` is + /// `Send`. + /// + /// Note that we do this comparison using the `fresh_trait_ref` + /// fields. Because these have all been freshened using + /// `self.freshener`, we can be sure that (a) this will not + /// affect the inferencer state and (b) that if we see two + /// fresh regions with the same index, they refer to the same + /// unbound type variable. + fn check_evaluation_cycle( + &mut self, + stack: &TraitObligationStack<'_, 'tcx>, + ) -> Option { + if let Some(cycle_depth) = stack.iter() + .skip(1) // skip top-most frame + .find(|prev| stack.obligation.param_env == prev.obligation.param_env && + stack.fresh_trait_ref == prev.fresh_trait_ref) + .map(|stack| stack.depth) + { + debug!( + "evaluate_stack({:?}) --> recursive at depth {}", + stack.fresh_trait_ref, + cycle_depth, + ); + + // If we have a stack like `A B C D E A`, where the top of + // the stack is the final `A`, then this will iterate over + // `A, E, D, C, B` -- i.e., all the participants apart + // from the cycle head. We mark them as participating in a + // cycle. This suppresses caching for those nodes. See + // `in_cycle` field for more details. + stack.update_reached_depth(cycle_depth); + + // Subtle: when checking for a coinductive cycle, we do + // not compare using the "freshened trait refs" (which + // have erased regions) but rather the fully explicit + // trait refs. This is important because it's only a cycle + // if the regions match exactly. + let cycle = stack.iter().skip(1).take_while(|s| s.depth >= cycle_depth); + let cycle = cycle.map(|stack| ty::Predicate::Trait(stack.obligation.predicate)); + if self.coinductive_match(cycle) { + debug!( + "evaluate_stack({:?}) --> recursive, coinductive", + stack.fresh_trait_ref + ); + Some(EvaluatedToOk) + } else { + debug!( + "evaluate_stack({:?}) --> recursive, inductive", + stack.fresh_trait_ref + ); + Some(EvaluatedToRecur) + } + } else { + None + } + } + fn evaluate_stack<'o>( &mut self, stack: &TraitObligationStack<'o, 'tcx>, @@ -961,65 +1080,6 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> { return Ok(EvaluatedToUnknown); } - // If there is any previous entry on the stack that precisely - // matches this obligation, then we can assume that the - // obligation is satisfied for now (still all other conditions - // must be met of course). One obvious case this comes up is - // marker traits like `Send`. Think of a linked list: - // - // struct List { data: T, next: Option>> } - // - // `Box>` will be `Send` if `T` is `Send` and - // `Option>>` is `Send`, and in turn - // `Option>>` is `Send` if `Box>` is - // `Send`. - // - // Note that we do this comparison using the `fresh_trait_ref` - // fields. Because these have all been freshened using - // `self.freshener`, we can be sure that (a) this will not - // affect the inferencer state and (b) that if we see two - // fresh regions with the same index, they refer to the same - // unbound type variable. - if let Some(rec_index) = stack.iter() - .skip(1) // skip top-most frame - .position(|prev| stack.obligation.param_env == prev.obligation.param_env && - stack.fresh_trait_ref == prev.fresh_trait_ref) - { - debug!("evaluate_stack({:?}) --> recursive", stack.fresh_trait_ref); - - // If we have a stack like `A B C D E A`, where the top of - // the stack is the final `A`, then this will iterate over - // `A, E, D, C, B` -- i.e., all the participants apart - // from the cycle head. We mark them as participating in a - // cycle. This suppresses caching for those nodes. See - // `in_cycle` field for more details. - for item in stack.iter().take(rec_index + 1) { - debug!("evaluate_stack: marking {:?} as cycle participant", item.fresh_trait_ref); - item.in_cycle.set(true); - } - - // Subtle: when checking for a coinductive cycle, we do - // not compare using the "freshened trait refs" (which - // have erased regions) but rather the fully explicit - // trait refs. This is important because it's only a cycle - // if the regions match exactly. - let cycle = stack.iter().skip(1).take(rec_index + 1); - let cycle = cycle.map(|stack| ty::Predicate::Trait(stack.obligation.predicate)); - if self.coinductive_match(cycle) { - debug!( - "evaluate_stack({:?}) --> recursive, coinductive", - stack.fresh_trait_ref - ); - return Ok(EvaluatedToOk); - } else { - debug!( - "evaluate_stack({:?}) --> recursive, inductive", - stack.fresh_trait_ref - ); - return Ok(EvaluatedToRecur); - } - } - match self.candidate_from_obligation(stack) { Ok(Some(c)) => self.evaluate_candidate(stack, &c), Ok(None) => Ok(EvaluatedToAmbig), @@ -1222,6 +1282,11 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> { } // If no match, compute result and insert into cache. + // + // FIXME(nikomatsakis) -- this cache is not taking into + // account cycles that may have occurred in forming the + // candidate. I don't know of any specific problems that + // result but it seems awfully suspicious. let (candidate, dep_node) = self.in_task(|this| this.candidate_from_obligation_no_cache(stack)); @@ -3737,11 +3802,15 @@ impl<'cx, 'gcx, 'tcx> SelectionContext<'cx, 'gcx, 'tcx> { .to_poly_trait_ref() .fold_with(&mut self.freshener); + let dfn = previous_stack.cache.next_dfn(); + let depth = previous_stack.depth() + 1; TraitObligationStack { obligation, fresh_trait_ref, - in_cycle: Cell::new(false), + reached_depth: Cell::new(depth), previous: previous_stack, + dfn, + depth, } } @@ -3934,28 +4003,283 @@ impl<'o, 'tcx> TraitObligationStack<'o, 'tcx> { TraitObligationStackList::with(self) } + fn cache(&self) -> &'o ProvisionalEvaluationCache<'tcx> { + self.previous.cache + } + fn iter(&'o self) -> TraitObligationStackList<'o, 'tcx> { self.list() } + + /// Indicates that attempting to evaluate this stack entry + /// required accessing something from the stack at depth `reached_depth`. + fn update_reached_depth(&self, reached_depth: usize) { + assert!( + self.depth > reached_depth, + "invoked `update_reached_depth` with something under this stack: \ + self.depth={} reached_depth={}", + self.depth, + reached_depth, + ); + debug!("update_reached_depth(reached_depth={})", reached_depth); + let mut p = self; + while reached_depth < p.depth { + debug!("update_reached_depth: marking {:?} as cycle participant", p.fresh_trait_ref); + p.reached_depth.set(p.reached_depth.get().min(reached_depth)); + p = p.previous.head.unwrap(); + } + } +} + +/// The "provisional evaluation cache" is used to store intermediate cache results +/// when solving auto traits. Auto traits are unusual in that they can support +/// cycles. So, for example, a "proof tree" like this would be ok: +/// +/// - `Foo: Send` :- +/// - `Bar: Send` :- +/// - `Foo: Send` -- cycle, but ok +/// - `Baz: Send` +/// +/// Here, to prove `Foo: Send`, we have to prove `Bar: Send` and +/// `Baz: Send`. Proving `Bar: Send` in turn required `Foo: Send`. +/// For non-auto traits, this cycle would be an error, but for auto traits (because +/// they are coinductive) it is considered ok. +/// +/// However, there is a complication: at the point where we have +/// "proven" `Bar: Send`, we have in fact only proven it +/// *provisionally*. In particular, we proved that `Bar: Send` +/// *under the assumption* that `Foo: Send`. But what if we later +/// find out this assumption is wrong? Specifically, we could +/// encounter some kind of error proving `Baz: Send`. In that case, +/// `Bar: Send` didn't turn out to be true. +/// +/// In Issue #60010, we found a bug in rustc where it would cache +/// these intermediate results. This was fixed in #60444 by disabling +/// *all* caching for things involved in a cycle -- in our example, +/// that would mean we don't cache that `Bar: Send`. But this led +/// to large slowdowns. +/// +/// Specifically, imagine this scenario, where proving `Baz: Send` +/// first requires proving `Bar: Send` (which is true: +/// +/// - `Foo: Send` :- +/// - `Bar: Send` :- +/// - `Foo: Send` -- cycle, but ok +/// - `Baz: Send` +/// - `Bar: Send` -- would be nice for this to be a cache hit! +/// - `*const T: Send` -- but what if we later encounter an error? +/// +/// The *provisional evaluation cache* resolves this issue. It stores +/// cache results that we've proven but which were involved in a cycle +/// in some way. We track the minimal stack depth (i.e., the +/// farthest from the top of the stack) that we are dependent on. +/// The idea is that the cache results within are all valid -- so long as +/// none of the nodes in between the current node and the node at that minimum +/// depth result in an error (in which case the cached results are just thrown away). +/// +/// During evaluation, we consult this provisional cache and rely on +/// it. Accessing a cached value is considered equivalent to accessing +/// a result at `reached_depth`, so it marks the *current* solution as +/// provisional as well. If an error is encountered, we toss out any +/// provisional results added from the subtree that encountered the +/// error. When we pop the node at `reached_depth` from the stack, we +/// can commit all the things that remain in the provisional cache. +struct ProvisionalEvaluationCache<'tcx> { + /// next "depth first number" to issue -- just a counter + dfn: Cell, + + /// Stores the "coldest" depth (bottom of stack) reached by any of + /// the evaluation entries. The idea here is that all things in the provisional + /// cache are always dependent on *something* that is colder in the stack: + /// therefore, if we add a new entry that is dependent on something *colder still*, + /// we have to modify the depth for all entries at once. + /// + /// Example: + /// + /// Imagine we have a stack `A B C D E` (with `E` being the top of + /// the stack). We cache something with depth 2, which means that + /// it was dependent on C. Then we pop E but go on and process a + /// new node F: A B C D F. Now F adds something to the cache with + /// depth 1, meaning it is dependent on B. Our original cache + /// entry is also dependent on B, because there is a path from E + /// to C and then from C to F and from F to B. + reached_depth: Cell, + + /// Map from cache key to the provisionally evaluated thing. + /// The cache entries contain the result but also the DFN in which they + /// were added. The DFN is used to clear out values on failure. + /// + /// Imagine we have a stack like: + /// + /// - `A B C` and we add a cache for the result of C (DFN 2) + /// - Then we have a stack `A B D` where `D` has DFN 3 + /// - We try to solve D by evaluating E: `A B D E` (DFN 4) + /// - `E` generates various cache entries which have cyclic dependices on `B` + /// - `A B D E F` and so forth + /// - the DFN of `F` for example would be 5 + /// - then we determine that `E` is in error -- we will then clear + /// all cache values whose DFN is >= 4 -- in this case, that + /// means the cached value for `F`. + map: RefCell, ProvisionalEvaluation>>, +} + +/// A cache value for the provisional cache: contains the depth-first +/// number (DFN) and result. +#[derive(Copy, Clone, Debug)] +struct ProvisionalEvaluation { + from_dfn: usize, + result: EvaluationResult, +} + +impl<'tcx> Default for ProvisionalEvaluationCache<'tcx> { + fn default() -> Self { + Self { + dfn: Cell::new(0), + reached_depth: Cell::new(std::usize::MAX), + map: Default::default(), + } + } +} + +impl<'tcx> ProvisionalEvaluationCache<'tcx> { + /// Get the next DFN in sequence (basically a counter). + fn next_dfn(&self) -> usize { + let result = self.dfn.get(); + self.dfn.set(result + 1); + result + } + + /// Check the provisional cache for any result for + /// `fresh_trait_ref`. If there is a hit, then you must consider + /// it an access to the stack slots at depth + /// `self.current_reached_depth()` and above. + fn get_provisional(&self, fresh_trait_ref: ty::PolyTraitRef<'tcx>) -> Option { + debug!( + "get_provisional(fresh_trait_ref={:?}) = {:#?} with reached-depth {}", + fresh_trait_ref, + self.map.borrow().get(&fresh_trait_ref), + self.reached_depth.get(), + ); + Some(self.map.borrow().get(&fresh_trait_ref)?.result) + } + + /// Current value of the `reached_depth` counter -- all the + /// provisional cache entries are dependent on the item at this + /// depth. + fn current_reached_depth(&self) -> usize { + self.reached_depth.get() + } + + /// Insert a provisional result into the cache. The result came + /// from the node with the given DFN. It accessed a minimum depth + /// of `reached_depth` to compute. It evaluated `fresh_trait_ref` + /// and resulted in `result`. + fn insert_provisional( + &self, + from_dfn: usize, + reached_depth: usize, + fresh_trait_ref: ty::PolyTraitRef<'tcx>, + result: EvaluationResult, + ) { + debug!( + "insert_provisional(from_dfn={}, reached_depth={}, fresh_trait_ref={:?}, result={:?})", + from_dfn, + reached_depth, + fresh_trait_ref, + result, + ); + let r_d = self.reached_depth.get(); + self.reached_depth.set(r_d.min(reached_depth)); + + debug!("insert_provisional: reached_depth={:?}", self.reached_depth.get()); + + self.map.borrow_mut().insert(fresh_trait_ref, ProvisionalEvaluation { from_dfn, result }); + } + + /// Invoked when the node with dfn `dfn` does not get a successful + /// result. This will clear out any provisional cache entries + /// that were added since `dfn` was created. This is because the + /// provisional entries are things which must assume that the + /// things on the stack at the time of their creation succeeded -- + /// since the failing node is presently at the top of the stack, + /// these provisional entries must either depend on it or some + /// ancestor of it. + fn on_failure(&self, dfn: usize) { + debug!( + "on_failure(dfn={:?})", + dfn, + ); + self.map.borrow_mut().retain(|key, eval| { + if !eval.from_dfn >= dfn { + debug!("on_failure: removing {:?}", key); + false + } else { + true + } + }); + } + + /// Invoked when the node at depth `depth` completed without + /// depending on anything higher in the stack (if that completion + /// was a failure, then `on_failure` should have been invoked + /// already). The callback `op` will be invoked for each + /// provisional entry that we can now confirm. + fn on_completion( + &self, + depth: usize, + mut op: impl FnMut(ty::PolyTraitRef<'tcx>, EvaluationResult), + ) { + debug!( + "on_completion(depth={}, reached_depth={})", + depth, + self.reached_depth.get(), + ); + + if self.reached_depth.get() < depth { + debug!("on_completion: did not yet reach depth to complete"); + return; + } + + for (fresh_trait_ref, eval) in self.map.borrow_mut().drain() { + debug!( + "on_completion: fresh_trait_ref={:?} eval={:?}", + fresh_trait_ref, + eval, + ); + + op(fresh_trait_ref, eval.result); + } + + self.reached_depth.set(std::usize::MAX); + } } #[derive(Copy, Clone)] struct TraitObligationStackList<'o, 'tcx: 'o> { + cache: &'o ProvisionalEvaluationCache<'tcx>, head: Option<&'o TraitObligationStack<'o, 'tcx>>, } impl<'o, 'tcx> TraitObligationStackList<'o, 'tcx> { - fn empty() -> TraitObligationStackList<'o, 'tcx> { - TraitObligationStackList { head: None } + fn empty(cache: &'o ProvisionalEvaluationCache<'tcx>) -> TraitObligationStackList<'o, 'tcx> { + TraitObligationStackList { cache, head: None } } fn with(r: &'o TraitObligationStack<'o, 'tcx>) -> TraitObligationStackList<'o, 'tcx> { - TraitObligationStackList { head: Some(r) } + TraitObligationStackList { cache: r.cache(), head: Some(r) } } fn head(&self) -> Option<&'o TraitObligationStack<'o, 'tcx>> { self.head } + + fn depth(&self) -> usize { + if let Some(head) = self.head { + head.depth + } else { + 0 + } + } } impl<'o, 'tcx> Iterator for TraitObligationStackList<'o, 'tcx> { diff --git a/src/librustc_traits/evaluate_obligation.rs b/src/librustc_traits/evaluate_obligation.rs index 83aebd16e2400..59b743abaf14b 100644 --- a/src/librustc_traits/evaluate_obligation.rs +++ b/src/librustc_traits/evaluate_obligation.rs @@ -29,7 +29,7 @@ fn evaluate_obligation<'tcx>( let mut selcx = SelectionContext::with_query_mode(&infcx, TraitQueryMode::Canonical); let obligation = Obligation::new(ObligationCause::dummy(), param_env, predicate); - selcx.evaluate_obligation_recursively(&obligation) + selcx.evaluate_root_obligation(&obligation) }, ) }