From 9d4e53e08d838299bea748c832b67712cf73cf05 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Wed, 6 Jul 2022 15:37:42 -0700 Subject: [PATCH 1/2] Record ongoing backtrack attempts (#16075) A race condition was possible in backtracking where if a `Node` which produced a particular `Digest` had already been invalidated by one consumer, a second consumer would fail to find a source for the `Digest`, and would report "Could not identify a process to backtrack to". Fixes #15995. [ci skip-build-wheels] --- src/rust/engine/src/context.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/rust/engine/src/context.rs b/src/rust/engine/src/context.rs index 4a4b41b871f..bb4384b7ba8 100644 --- a/src/rust/engine/src/context.rs +++ b/src/rust/engine/src/context.rs @@ -22,6 +22,7 @@ use async_oncecell::OnceCell; use cache::PersistentCache; use fs::{safe_create_dir_all_ioerror, GitignoreStyleExcludes, PosixFS}; use graph::{self, EntryId, Graph, InvalidationResult, NodeContext}; +use hashing::Digest; use log::info; use parking_lot::Mutex; use process_execution::{ @@ -650,10 +651,12 @@ pub struct Context { run_id: RunId, /// The number of attempts which have been made to backtrack to a particular ExecuteProcess node. /// - /// Presence in this map at process runtime indicates that the pricess is being retried, and that + /// Presence in this map at process runtime indicates that the process is being retried, and that /// there was something invalid or unusable about previous attempts. Successive attempts should /// run in a different mode (skipping caches, etc) to attempt to produce a valid result. backtrack_levels: Arc>>, + /// The Digests that we have successfully invalidated a Node for. + backtrack_digests: Arc>>, stats: Arc>, } @@ -666,6 +669,7 @@ impl Context { session, run_id, backtrack_levels: Arc::default(), + backtrack_digests: Arc::default(), stats: Arc::default(), } } @@ -704,7 +708,7 @@ impl Context { return result; }; - // Locate the source(s) of this Digest and their backtracking levels. + // Locate live source(s) of this Digest and their backtracking levels. // TODO: Currently needs a combination of `visit_live` and `invalidate_from_roots` because // `invalidate_from_roots` cannot view `Node` results. Would be more efficient as a merged // method. @@ -719,8 +723,23 @@ impl Context { }); if candidate_roots.is_empty() { - // We did not identify any roots to invalidate: allow the Node to fail. - return result; + // If there are no live sources of the Digest, see whether any have already been invalidated + // by other consumers. + if self.backtrack_digests.lock().get(&digest).is_some() { + // Some other consumer has already identified and invalidated the source of this Digest: we + // can wait for the attempt to complete. + return Err(Failure::Invalidated); + } else { + // There are no live or invalidated sources of this Digest. Directly fail. + return result.map_err(|e| { + throw(format!( + "Could not identify a process to backtrack to for: {e}" + )) + }); + } + } else { + // We have identified a Node to backtrack on. Record it. + self.backtrack_digests.lock().insert(digest); } // Attempt to trigger backtrack attempts for the matched Nodes. It's possible that we are not @@ -803,6 +822,7 @@ impl NodeContext for Context { session: self.session.clone(), run_id: self.run_id, backtrack_levels: self.backtrack_levels.clone(), + backtrack_digests: self.backtrack_digests.clone(), stats: self.stats.clone(), } } From a084dfde7dd25a01db8bbf3368631693ee0545b4 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Wed, 6 Jul 2022 19:18:56 -0700 Subject: [PATCH 2/2] Add missing import due to merge conflict. # Building wheels and fs_util will be skipped. Delete if not intended. [ci skip-build-wheels] --- src/rust/engine/src/context.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/engine/src/context.rs b/src/rust/engine/src/context.rs index bb4384b7ba8..c5f21bfa5bb 100644 --- a/src/rust/engine/src/context.rs +++ b/src/rust/engine/src/context.rs @@ -13,7 +13,7 @@ use std::time::Duration; use crate::intrinsics::Intrinsics; use crate::nodes::{ExecuteProcess, NodeKey, NodeOutput, NodeResult, WrappedNode}; -use crate::python::Failure; +use crate::python::{throw, Failure}; use crate::session::{Session, Sessions}; use crate::tasks::{Rule, Tasks}; use crate::types::Types;