vercel · chris-olszewski · Aug 15, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 15, 2023
@@ -8,7 +8,11 @@ use std::{
 pub use builder::EngineBuilder;
 use petgraph::Graph;
 
-use crate::{run::task_id::TaskId, task_graph::TaskDefinition};
+use crate::{
+    package_graph::{PackageGraph, WorkspaceName},
+    run::task_id::TaskId,
+    task_graph::TaskDefinition,
+};
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum TaskNode {
@@ -110,4 +114,110 @@ impl Engine<Built> {
                 .collect(),
         )
     }
+
+    pub fn validate(
+        &self,
+        package_graph: &PackageGraph,
+        concurrency: u32,
+    ) -> Result<(), Vec<ValidateError>> {
+        // TODO(olszewski) once this is hooked up to a real run, we should
+        // see if using rayon to parallelize would provide a speedup
+        let (persistent_count, mut validation_errors) = self
+            .task_graph
+            .node_indices()
+            .map(|node_index| {
+                let TaskNode::Task(task_id) = self
+                    .task_graph
+                    .node_weight(node_index)
+                    .expect("graph should contain weight for node index")
+                else {
+                    // No need to check the root node if that's where we are.
+                    return Ok(false);
+                };
+                let is_persistent = self
+                    .task_definitions
+                    .get(task_id)
+                    .map_or(false, |task_def| task_def.persistent);
+
+                for dep_index in self
+                    .task_graph
+                    .neighbors_directed(node_index, petgraph::Direction::Outgoing)
+                {
+                    let TaskNode::Task(dep_id) = self
+                        .task_graph
+                        .node_weight(dep_index)
+                        .expect("node index not in graph")
+                    else {
+                        panic!("{task_id} depends on root task node");
+                    };
+
+                    let task_definition = self.task_definitions.get(dep_id).ok_or_else(|| {
+                        ValidateError::MissingTask {
+                            task_id: dep_id.to_string(),
+                            package_name: dep_id.package().to_string(),
+                        }
+                    })?;
+
+                    let package_json = package_graph
+                        .package_json(&WorkspaceName::from(dep_id.package()))
+                        .ok_or_else(|| ValidateError::MissingPackageJson {
+                            package: dep_id.package().to_string(),
+                        })?;
+                    if task_definition.persistent
+                        && package_json.scripts.contains_key(dep_id.task())
+                    {
+                        return Err(ValidateError::DependencyOnPersistentTask {
+                            persistent_task: dep_id.to_string(),
+                            dependant: task_id.to_string(),
+                        });
+                    }
+                }
+
+                Ok(is_persistent)
+            })
+            .fold((0, Vec::new()), |(mut count, mut errs), result| {
+                match result {
+                    Ok(true) => count += 1,
+                    Ok(false) => (),
+                    Err(e) => errs.push(e),
+                }
+                (count, errs)
+            });
+
+        if persistent_count > concurrency {
+            validation_errors.push(ValidateError::PersistentTasksExceedConcurrency {
+                persistent_count,
+                concurrency,
+            })
+        }
+
+        match validation_errors.is_empty() {
+            true => Err(validation_errors),
+            false => Ok(()),
+        }
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum ValidateError {
+    #[error("Cannot find task definition for {task_id} in package {package_name}")]
+    MissingTask {
+        task_id: String,
+        package_name: String,
+    },
+    #[error("Cannot find package {package}")]
+    MissingPackageJson { package: String },
+    #[error("\"{persistent_task}\" is a persistent task, \"{dependant}\" cannot depend on it")]
+    DependencyOnPersistentTask {
+        persistent_task: String,
+        dependant: String,
+    },
+    #[error(
+        "You have {persistent_count} persistent tasks, but `turbo` is configured for concurrency \
+         of {concurrency}. Set --concurrency to at least {persistent_count}"
+    )]
+    PersistentTasksExceedConcurrency {
+        persistent_count: u32,
+        concurrency: u32,
+    },
 }
@@ -44,8 +44,8 @@
 #[derive(Debug)]
 pub struct RunOpts<'a> {
     pub(crate) tasks: &'a [String],
-    concurrency: u32,
+    pub(crate) concurrency: u32,
     parallel: bool,
     pub(crate) env_mode: EnvMode,
     // Whether or not to infer the framework for each workspace.
    pub(crate) framework_inference: bool,

@@ -4,7 +4,8 @@ mod global_hash;
 mod scope;
 pub mod task_id;
 
-use anyhow::{Context as ErrorContext, Result};
+use anyhow::{anyhow, Context as ErrorContext, Result};
+use itertools::Itertools;
 use tracing::{debug, info};
 use turborepo_cache::{http::APIAuth, AsyncCache};
 use turborepo_env::EnvironmentVariableMap;
@@ -138,7 +139,7 @@ impl Run {
         )?;
 
         info!("created cache");
-        let _engine = EngineBuilder::new(
+        let engine = EngineBuilder::new(
             &self.base.repo_root,
             &pkg_dep_graph,
             opts.run_opts.single_package,
@@ -164,6 +165,9 @@ impl Run {
         )
         .build()?;
 
+        engine
+            .validate(&pkg_dep_graph, opts.run_opts.concurrency)
+            .map_err(|errors| anyhow!("Validation failed:\n{}", errors.into_iter().join("\n")))?;
         Ok(())
     }
 }

@@ -119,7 +119,7 @@ pub struct TaskDefinition {
 
     // Persistent indicates whether the Task is expected to exit or not
     // Tasks marked Persistent do not exit (e.g. --watch mode or dev servers)
-    persistent: bool,
+    pub persistent: bool,
 }
 
 impl BookkeepingTaskDefinition {