Skip to content

Commit 565f90a

Browse files
jensengchris-olszewskianthonyshew
authored
feat(turborepo): --continue=dependencies-successful (#10023)
### Description Context: #7461 Convert `--continue` into a tri-state option, introducing a new mode that allows independent tasks to continue running when an error occurs (dependent tasks will be canceled). The possible values for `--continue` are now: - `never` (default) -- on error: cancel all tasks - `dependencies-successful` -- on error: continue running tasks whose dependencies have succeeded - `always` -- on error: continue running all tasks, even those whose dependencies have failed Setting `--continue` without a value is equivalent to setting it to `always` ### Testing Instructions See modified/added tests in the PR, but basically: - `turbo run <cmd> --continue=never` (or omitting it) will abort when an error occurs - `turbo run <cmd> --continue=dependencies-successful` will cancel dependent tasks when an error occurs, but keep running others - `turbo run <cmd> --continue=always` (or `--continue`) will keep running all other tasks when an error occurs --------- Co-authored-by: Chris Olszewski <chris.olszewski@vercel.com> Co-authored-by: Anthony Shew <anthonyshew@gmail.com>
1 parent 20736f9 commit 565f90a

File tree

16 files changed

+285
-91
lines changed

16 files changed

+285
-91
lines changed

crates/turborepo-graph-utils/src/walker.rs

+77-20
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ use tracing::log::trace;
1414
pub struct Walker<N, S> {
1515
marker: std::marker::PhantomData<S>,
1616
cancel: watch::Sender<bool>,
17-
node_events: Option<mpsc::Receiver<(N, oneshot::Sender<()>)>>,
17+
node_events: Option<mpsc::Receiver<(N, oneshot::Sender<bool>)>>,
1818
join_handles: FuturesUnordered<JoinHandle<()>>,
1919
}
2020

2121
pub struct Start;
2222
pub struct Walking;
2323

24-
pub type WalkMessage<N> = (N, oneshot::Sender<()>);
24+
pub type WalkMessage<N> = (N, oneshot::Sender<bool>);
2525

2626
// These constraint might look very stiff, but since all of the petgraph graph
2727
// types use integers as node ids and GraphBase already constraints these types
@@ -37,7 +37,7 @@ impl<N: Eq + Hash + Copy + Send + 'static> Walker<N, Start> {
3737
let mut rxs = HashMap::new();
3838
for node in graph.node_identifiers() {
3939
// Each node can finish at most once so we set the capacity to 1
40-
let (tx, rx) = broadcast::channel::<()>(1);
40+
let (tx, rx) = broadcast::channel::<bool>(1);
4141
txs.insert(node, tx);
4242
rxs.insert(node, rx);
4343
}
@@ -76,8 +76,14 @@ impl<N: Eq + Hash + Copy + Send + 'static> Walker<N, Start> {
7676
results = deps_fut => {
7777
for res in results {
7878
match res {
79-
// No errors from reading dependency channels
80-
Ok(()) => (),
79+
// Dependency channel signaled this subgraph is terminal;
80+
// let our dependents know too (if any)
81+
Ok(false) => {
82+
tx.send(false).ok();
83+
return;
84+
}
85+
// Otherwise continue
86+
Ok(true) => (),
8187
// A dependency finished without sending a finish
8288
// Could happen if a cancel is sent and is racing with deps
8389
// so we interpret this as a cancel.
@@ -95,7 +101,7 @@ impl<N: Eq + Hash + Copy + Send + 'static> Walker<N, Start> {
95101
}
96102
}
97103

98-
let (callback_tx, callback_rx) = oneshot::channel::<()>();
104+
let (callback_tx, callback_rx) = oneshot::channel::<bool>();
99105
// do some err handling with the send failure?
100106
if node_tx.send((node, callback_tx)).await.is_err() {
101107
// Receiving end of node channel has been closed/dropped
@@ -104,14 +110,15 @@ impl<N: Eq + Hash + Copy + Send + 'static> Walker<N, Start> {
104110
trace!("Receiver was dropped before walk finished without calling cancel");
105111
return;
106112
}
107-
if callback_rx.await.is_err() {
113+
let Ok(callback_result) = callback_rx.await else {
108114
// If the caller drops the callback sender without signaling
109115
// that the node processing is finished we assume that it is finished.
110-
trace!("Callback sender was dropped without sending a finish signal")
111-
}
116+
trace!("Callback sender was dropped without sending a finish signal");
117+
return;
118+
};
112119
// Send errors indicate that there are no receivers which
113120
// happens when this node has no dependents
114-
tx.send(()).ok();
121+
tx.send(callback_result).ok();
115122
}
116123
}
117124
}));
@@ -204,7 +211,7 @@ mod test {
204211
let (walker, mut node_emitter) = walker.walk();
205212
while let Some((index, done)) = node_emitter.recv().await {
206213
visited.push(index);
207-
done.send(()).unwrap();
214+
done.send(true).unwrap();
208215
}
209216
walker.wait().await.unwrap();
210217
assert_eq!(visited, vec![c, b, a]);
@@ -228,7 +235,7 @@ mod test {
228235
walker.cancel().unwrap();
229236

230237
visited.push(index);
231-
done.send(()).unwrap();
238+
done.send(true).unwrap();
232239
}
233240
assert_eq!(visited, vec![c]);
234241
let Walker { join_handles, .. } = walker;
@@ -272,16 +279,16 @@ mod test {
272279
tokio::spawn(async move {
273280
is_b_done.await.unwrap();
274281
visited.lock().unwrap().push(index);
275-
done.send(()).unwrap();
282+
done.send(true).unwrap();
276283
});
277284
} else if index == b {
278285
// send the signal that b is finished
279286
visited.lock().unwrap().push(index);
280-
done.send(()).unwrap();
287+
done.send(true).unwrap();
281288
b_done.take().unwrap().send(()).unwrap();
282289
} else {
283290
visited.lock().unwrap().push(index);
284-
done.send(()).unwrap();
291+
done.send(true).unwrap();
285292
}
286293
}
287294
walker.wait().await.unwrap();
@@ -322,12 +329,12 @@ mod test {
322329
tokio::spawn(async move {
323330
is_b_done.await.unwrap();
324331
visited.lock().unwrap().push(index);
325-
done.send(()).unwrap();
332+
done.send(true).unwrap();
326333
});
327334
} else if index == b {
328335
// send the signal that b is finished
329336
visited.lock().unwrap().push(index);
330-
done.send(()).unwrap();
337+
done.send(true).unwrap();
331338
b_done.take().unwrap().send(()).unwrap();
332339
} else if index == a {
333340
// don't mark as done until d finishes
@@ -336,19 +343,69 @@ mod test {
336343
tokio::spawn(async move {
337344
is_d_done.await.unwrap();
338345
visited.lock().unwrap().push(index);
339-
done.send(()).unwrap();
346+
done.send(true).unwrap();
340347
});
341348
} else if index == d {
342349
// send the signal that b is finished
343350
visited.lock().unwrap().push(index);
344-
done.send(()).unwrap();
351+
done.send(true).unwrap();
345352
d_done.take().unwrap().send(()).unwrap();
346353
} else {
347354
visited.lock().unwrap().push(index);
348-
done.send(()).unwrap();
355+
done.send(true).unwrap();
349356
}
350357
}
351358
walker.wait().await.unwrap();
352359
assert_eq!(visited.lock().unwrap().as_slice(), &[c, b, e, d, a]);
353360
}
361+
362+
#[tokio::test]
363+
async fn test_dependent_cancellation() {
364+
// a -- b -- c -- f
365+
// \ /
366+
// - d -- e -
367+
let mut g = Graph::new();
368+
let a = g.add_node("a");
369+
let b = g.add_node("b");
370+
let c = g.add_node("c");
371+
let d = g.add_node("d");
372+
let e = g.add_node("e");
373+
let f = g.add_node("f");
374+
g.add_edge(a, b, ());
375+
g.add_edge(b, c, ());
376+
g.add_edge(a, d, ());
377+
g.add_edge(d, e, ());
378+
g.add_edge(c, f, ());
379+
g.add_edge(e, f, ());
380+
381+
// We intentionally wait to mark c as finished until e has been finished
382+
let walker = Walker::new(&g);
383+
let visited = Arc::new(Mutex::new(Vec::new()));
384+
let (walker, mut node_emitter) = walker.walk();
385+
let (e_done, is_e_done) = oneshot::channel::<()>();
386+
let mut e_done = Some(e_done);
387+
let mut is_e_done = Some(is_e_done);
388+
while let Some((index, done)) = node_emitter.recv().await {
389+
if index == c {
390+
// don't mark as done until we get the signal that e is finished
391+
let is_e_done = is_e_done.take().unwrap();
392+
let visited = visited.clone();
393+
tokio::spawn(async move {
394+
is_e_done.await.unwrap();
395+
visited.lock().unwrap().push(index);
396+
done.send(true).unwrap();
397+
});
398+
} else if index == e {
399+
// send the signal that e is finished, and cancel its dependents
400+
visited.lock().unwrap().push(index);
401+
done.send(false).unwrap();
402+
e_done.take().unwrap().send(()).unwrap();
403+
} else {
404+
visited.lock().unwrap().push(index);
405+
done.send(true).unwrap();
406+
}
407+
}
408+
walker.wait().await.unwrap();
409+
assert_eq!(visited.lock().unwrap().as_slice(), &[f, e, c, b]);
410+
}
354411
}

crates/turborepo-lib/src/cli/mod.rs

+54-8
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,25 @@ impl fmt::Display for EnvMode {
160160
}
161161
}
162162

163+
#[derive(Copy, Clone, Debug, Default, PartialEq, ValueEnum, Serialize)]
164+
#[serde(rename_all = "lowercase")]
165+
pub enum ContinueMode {
166+
#[default]
167+
Never,
168+
DependenciesSuccessful,
169+
Always,
170+
}
171+
172+
impl fmt::Display for ContinueMode {
173+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174+
f.write_str(match self {
175+
ContinueMode::Never => "never",
176+
ContinueMode::DependenciesSuccessful => "dependencies-successful",
177+
ContinueMode::Always => "always",
178+
})
179+
}
180+
}
181+
163182
/// The parsed arguments from the command line. In general we should avoid using
164183
/// or mutating this directly, and instead use the fully canonicalized `Opts`
165184
/// struct.
@@ -825,10 +844,13 @@ pub struct ExecutionArgs {
825844
/// one-at-a-time) execution.
826845
#[clap(long)]
827846
pub concurrency: Option<String>,
828-
/// Continue execution even if a task exits with an error or non-zero
829-
/// exit code. The default behavior is to bail
830-
#[clap(long = "continue")]
831-
pub continue_execution: bool,
847+
/// Specify how task execution should proceed when an error occurs.
848+
/// Use "never" to cancel all tasks. Use "dependencies-successful" to
849+
/// continue running tasks whose dependencies have succeeded. Use "always"
850+
/// to continue running all tasks, even those whose dependencies have
851+
/// failed.
852+
#[clap(long = "continue", value_name = "CONTINUE", num_args = 0..=1, default_value = "never", default_missing_value = "always")]
853+
pub continue_execution: ContinueMode,
832854
/// Run turbo in single-package mode
833855
#[clap(long)]
834856
pub single_package: bool,
@@ -896,7 +918,16 @@ impl ExecutionArgs {
896918
// default to false
897919
track_usage!(telemetry, self.framework_inference, |val: bool| !val);
898920

899-
track_usage!(telemetry, self.continue_execution, |val| val);
921+
track_usage!(telemetry, self.continue_execution, |val| matches!(
922+
val,
923+
ContinueMode::Always | ContinueMode::DependenciesSuccessful
924+
));
925+
telemetry.track_arg_value(
926+
"continue-execution-strategy",
927+
self.continue_execution,
928+
EventType::NonSensitive,
929+
);
930+
900931
track_usage!(telemetry, self.single_package, |val| val);
901932
track_usage!(telemetry, self.only, |val| val);
902933
track_usage!(telemetry, &self.cache_dir, Option::is_some);
@@ -1653,7 +1684,7 @@ mod test {
16531684
use itertools::Itertools;
16541685
use pretty_assertions::assert_eq;
16551686

1656-
use crate::cli::{ExecutionArgs, LinkTarget, RunArgs};
1687+
use crate::cli::{ContinueMode, ExecutionArgs, LinkTarget, RunArgs};
16571688

16581689
struct CommandTestCase {
16591690
command: &'static str,
@@ -1898,14 +1929,29 @@ mod test {
18981929
command: Some(Command::Run {
18991930
execution_args: Box::new(ExecutionArgs {
19001931
tasks: vec!["build".to_string()],
1901-
continue_execution: true,
1932+
continue_execution: ContinueMode::Always,
1933+
..get_default_execution_args()
1934+
}),
1935+
run_args: Box::new(get_default_run_args())
1936+
}),
1937+
..Args::default()
1938+
} ;
1939+
"continue option with no value"
1940+
)]
1941+
#[test_case::test_case(
1942+
&["turbo", "run", "build", "--continue=dependencies-successful"],
1943+
Args {
1944+
command: Some(Command::Run {
1945+
execution_args: Box::new(ExecutionArgs {
1946+
tasks: vec!["build".to_string()],
1947+
continue_execution: ContinueMode::DependenciesSuccessful,
19021948
..get_default_execution_args()
19031949
}),
19041950
run_args: Box::new(get_default_run_args())
19051951
}),
19061952
..Args::default()
19071953
} ;
1908-
"continue flag"
1954+
"continue option with explicit value"
19091955
)]
19101956
#[test_case::test_case(
19111957
&["turbo", "run", "build", "--dry-run"],

crates/turborepo-lib/src/cli/snapshots/turborepo_lib__cli__test__turbo-watch-build---no-daemon.snap

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ error: unexpected argument '--no-daemon' found
77
tip: a similar argument exists: '--no-update-notifier'
88
tip: to pass '--no-daemon' as a value, use '-- --no-daemon'
99

10-
Usage: turbo watch --no-update-notifier <--cache-dir <CACHE_DIR>|--concurrency <CONCURRENCY>|--continue|--single-package|--framework-inference [<BOOL>]|--global-deps <GLOBAL_DEPS>|--env-mode [<ENV_MODE>]|--filter <FILTER>|--affected|--output-logs <OUTPUT_LOGS>|--log-order <LOG_ORDER>|--only|--pkg-inference-root <PKG_INFERENCE_ROOT>|--log-prefix <LOG_PREFIX>|TASKS|PASS_THROUGH_ARGS>
10+
Usage: turbo watch --no-update-notifier <--cache-dir <CACHE_DIR>|--concurrency <CONCURRENCY>|--continue [<CONTINUE>]|--single-package|--framework-inference [<BOOL>]|--global-deps <GLOBAL_DEPS>|--env-mode [<ENV_MODE>]|--filter <FILTER>|--affected|--output-logs <OUTPUT_LOGS>|--log-order <LOG_ORDER>|--only|--pkg-inference-root <PKG_INFERENCE_ROOT>|--log-prefix <LOG_PREFIX>|TASKS|PASS_THROUGH_ARGS>
1111

1212
For more information, try '--help'.

crates/turborepo-lib/src/engine/execute.rs

+22-12
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ impl From<mpsc::error::SendError<Message<VisitorData, VisitorResult>>> for Execu
5050
}
5151

5252
#[derive(Debug, Clone, Copy)]
53-
pub struct StopExecution;
53+
pub enum StopExecution {
54+
AllTasks,
55+
DependentTasks,
56+
}
5457

5558
impl Engine {
5659
/// Execute a task graph by sending task ids to the visitor
@@ -93,7 +96,7 @@ impl Engine {
9396
.expect("node id should be present")
9497
else {
9598
// Root task has nothing to do so we don't emit any event for it
96-
if done.send(()).is_err() {
99+
if done.send(true).is_err() {
97100
debug!(
98101
"Graph walker done callback receiver was closed before done signal \
99102
could be sent"
@@ -114,23 +117,30 @@ impl Engine {
114117
let (message, result) = Message::new(task_id.clone());
115118
visitor.send(message).await?;
116119

117-
if let Err(StopExecution) = result.await.unwrap_or_else(|_| {
120+
let mut continue_walking_subgraph = true;
121+
match result.await.unwrap_or_else(|_| {
118122
// If the visitor doesn't send a callback, then we assume the task finished
119123
tracing::trace!(
120124
"Engine visitor dropped callback sender without sending result"
121125
);
122126
Ok(())
123127
}) {
124-
if walker
125-
.lock()
126-
.expect("Walker mutex poisoned")
127-
.cancel()
128-
.is_err()
129-
{
130-
debug!("Unable to cancel graph walk");
128+
Err(StopExecution::AllTasks) => {
129+
if walker
130+
.lock()
131+
.expect("Walker mutex poisoned")
132+
.cancel()
133+
.is_err()
134+
{
135+
debug!("Unable to cancel graph walk");
136+
}
131137
}
132-
}
133-
if done.send(()).is_err() {
138+
Err(StopExecution::DependentTasks) => {
139+
continue_walking_subgraph = false;
140+
}
141+
_ => (),
142+
};
143+
if done.send(continue_walking_subgraph).is_err() {
134144
debug!("Graph walk done receiver closed before node was finished processing");
135145
}
136146
Ok(())

0 commit comments

Comments
 (0)