From c73345d33251bf85448f68c553d6f8adc5901ac8 Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Mon, 14 Aug 2023 17:13:34 -0400 Subject: [PATCH 1/8] task hash implementation --- crates/turborepo-env/src/lib.rs | 6 + crates/turborepo-lib/src/engine/mod.rs | 8 + crates/turborepo-lib/src/hash/mod.rs | 80 +++--- crates/turborepo-lib/src/hash/proto.capnp | 5 +- crates/turborepo-lib/src/lib.rs | 1 + crates/turborepo-lib/src/run/global_hash.rs | 22 +- crates/turborepo-lib/src/run/mod.rs | 22 +- crates/turborepo-lib/src/run/task_id.rs | 6 +- crates/turborepo-lib/src/task_graph/mod.rs | 8 +- crates/turborepo-lib/src/task_hash.rs | 263 ++++++++++++++++++++ crates/turborepo-scm/src/manual.rs | 4 +- crates/turborepo-scm/src/package_deps.rs | 12 +- 12 files changed, 364 insertions(+), 73 deletions(-) create mode 100644 crates/turborepo-lib/src/task_hash.rs diff --git a/crates/turborepo-env/src/lib.rs b/crates/turborepo-env/src/lib.rs index f8c2ee706d336..bafa53e0d2844 100644 --- a/crates/turborepo-env/src/lib.rs +++ b/crates/turborepo-env/src/lib.rs @@ -13,6 +13,12 @@ use thiserror::Error; const DEFAULT_ENV_VARS: [&str; 1] = ["VERCEL_ANALYTICS_ID"]; +/// Environment mode after we've resolved the `Infer` variant +pub enum ResolvedEnvMode { + Loose, + Strict, +} + #[derive(Clone, Debug, Error)] pub enum Error { #[error("Failed to parse regex: {0}")] diff --git a/crates/turborepo-lib/src/engine/mod.rs b/crates/turborepo-lib/src/engine/mod.rs index ffef61fda0ee3..b213ef9a045f9 100644 --- a/crates/turborepo-lib/src/engine/mod.rs +++ b/crates/turborepo-lib/src/engine/mod.rs @@ -125,6 +125,14 @@ impl Engine { self.task_definitions.get(task_id) } + pub fn tasks(&self) -> impl Iterator { + self.task_graph.node_weights() + } + + pub fn task_definitions(&self) -> &HashMap, TaskDefinition> { + &self.task_definitions + } + pub fn validate( &self, package_graph: &PackageGraph, diff --git a/crates/turborepo-lib/src/hash/mod.rs b/crates/turborepo-lib/src/hash/mod.rs index 32fe73cafdc12..2d9eb0f5836fd 100644 --- a/crates/turborepo-lib/src/hash/mod.rs +++ b/crates/turborepo-lib/src/hash/mod.rs @@ -9,11 +9,15 @@ mod traits; use std::collections::HashMap; use capnp::message::{Builder, HeapAllocator}; +use serde::Serialize; pub use traits::TurboHash; +use turborepo_env::ResolvedEnvMode; -use crate::cli::EnvMode; +use crate::{cli::EnvMode, task_graph::TaskOutputs}; mod proto_capnp { + use turborepo_env::ResolvedEnvMode; + use crate::cli::EnvMode; include!(concat!(env!("OUT_DIR"), "/src/hash/proto_capnp.rs")); @@ -28,58 +32,55 @@ mod proto_capnp { } } - impl From for task_hashable::EnvMode { - fn from(value: EnvMode) -> Self { + impl From for task_hashable::EnvMode { + fn from(value: ResolvedEnvMode) -> Self { match value { - EnvMode::Infer => task_hashable::EnvMode::Infer, - EnvMode::Loose => task_hashable::EnvMode::Loose, - EnvMode::Strict => task_hashable::EnvMode::Strict, + ResolvedEnvMode::Loose => task_hashable::EnvMode::Loose, + ResolvedEnvMode::Strict => task_hashable::EnvMode::Strict, } } } } -struct TaskHashable { +pub struct TaskHashable<'a> { // hashes - global_hash: String, - task_dependency_hashes: Vec, - hash_of_files: String, - external_deps_hash: String, + pub(crate) global_hash: &'a str, + pub(crate) task_dependency_hashes: Vec<&'a String>, + pub(crate) hash_of_files: &'a str, + pub(crate) external_deps_hash: String, // task - package_dir: turbopath::RelativeUnixPathBuf, - task: String, - outputs: TaskOutputs, - pass_thru_args: Vec, + pub(crate) package_dir: turbopath::RelativeUnixPathBuf, + pub(crate) task: &'a str, + pub(crate) outputs: TaskOutputs, + pub(crate) pass_through_args: &'a [String], // env - env: Vec, - resolved_env_vars: EnvVarPairs, - pass_thru_env: Vec, - env_mode: EnvMode, - dot_env: Vec, + pub(crate) env: &'a [String], + pub(crate) resolved_env_vars: EnvVarPairs, + pub(crate) pass_through_env: &'a [String], + pub(crate) env_mode: ResolvedEnvMode, + pub(crate) dot_env: &'a [turbopath::RelativeUnixPathBuf], } #[derive(Debug)] -pub struct GlobalHashable { +pub struct GlobalHashable<'a> { pub global_cache_key: &'static str, pub global_file_hash_map: HashMap, pub root_external_dependencies_hash: String, - pub env: Vec, + pub env: &'a [String], pub resolved_env_vars: Vec, - pub pass_through_env: Vec, + pub pass_through_env: &'a [String], pub env_mode: EnvMode, pub framework_inference: bool, - pub dot_env: Vec, -} - -struct TaskOutputs { - inclusions: Vec, - exclusions: Vec, + pub dot_env: &'a [turbopath::RelativeUnixPathBuf], } pub struct LockFilePackages(pub Vec); -struct FileHashes(HashMap); + +#[derive(Debug, Clone, Serialize)] +#[serde(transparent)] +pub struct FileHashes(pub HashMap); impl From for Builder { fn from(value: TaskOutputs) -> Self { @@ -192,7 +193,7 @@ impl From for Builder { type EnvVarPairs = Vec; -impl From for Builder { +impl From> for Builder { fn from(task_hashable: TaskHashable) -> Self { let mut message = ::capnp::message::TypedBuilder::::new_default(); @@ -224,8 +225,8 @@ impl From for Builder { { let mut pass_thru_args_builder = builder .reborrow() - .init_pass_thru_args(task_hashable.pass_thru_args.len() as u32); - for (i, arg) in task_hashable.pass_thru_args.iter().enumerate() { + .init_pass_thru_args(task_hashable.pass_through_args.len() as u32); + for (i, arg) in task_hashable.pass_through_args.iter().enumerate() { pass_thru_args_builder.set(i as u32, arg); } } @@ -240,8 +241,8 @@ impl From for Builder { { let mut pass_thru_env_builder = builder .reborrow() - .init_pass_thru_env(task_hashable.pass_thru_env.len() as u32); - for (i, env) in task_hashable.pass_thru_env.iter().enumerate() { + .init_pass_thru_env(task_hashable.pass_through_env.len() as u32); + for (i, env) in task_hashable.pass_through_env.iter().enumerate() { pass_thru_env_builder.set(i as u32, env); } } @@ -281,7 +282,7 @@ impl From for Builder { } } -impl From for Builder { +impl<'a> From> for Builder { fn from(hashable: GlobalHashable) -> Self { let mut message = ::capnp::message::TypedBuilder::::new_default(); @@ -372,6 +373,7 @@ impl From for Builder { #[cfg(test)] mod test { use test_case::test_case; + use turborepo_env::ResolvedEnvMode; use turborepo_lockfiles::Package; use super::{ @@ -392,11 +394,11 @@ mod test { inclusions: vec!["inclusions".to_string()], exclusions: vec!["exclusions".to_string()], }, - pass_thru_args: vec!["pass_thru_args".to_string()], + pass_through_args: vec!["pass_thru_args".to_string()], env: vec!["env".to_string()], resolved_env_vars: vec![], - pass_thru_env: vec!["pass_thru_env".to_string()], - env_mode: EnvMode::Infer, + pass_through_env: vec!["pass_thru_env".to_string()], + env_mode: ResolvedEnvMode::Loose, dot_env: vec![turbopath::RelativeUnixPathBuf::new("dotenv".to_string()).unwrap()], }; diff --git a/crates/turborepo-lib/src/hash/proto.capnp b/crates/turborepo-lib/src/hash/proto.capnp index b5887f0e9d734..4f6f4c00b9459 100644 --- a/crates/turborepo-lib/src/hash/proto.capnp +++ b/crates/turborepo-lib/src/hash/proto.capnp @@ -22,9 +22,8 @@ struct TaskHashable { dotEnv @12 :List(Text); enum EnvMode { - infer @0; - loose @1; - strict @2; + loose @0; + strict @1; } } diff --git a/crates/turborepo-lib/src/lib.rs b/crates/turborepo-lib/src/lib.rs index 1d92d0dc912b1..d7a698dd6b111 100644 --- a/crates/turborepo-lib/src/lib.rs +++ b/crates/turborepo-lib/src/lib.rs @@ -29,6 +29,7 @@ mod rewrite_json; mod run; mod shim; mod task_graph; +mod task_hash; mod tracing; use anyhow::Result; diff --git a/crates/turborepo-lib/src/run/global_hash.rs b/crates/turborepo-lib/src/run/global_hash.rs index eddaefbc73996..1db4d0abc2721 100644 --- a/crates/turborepo-lib/src/run/global_hash.rs +++ b/crates/turborepo-lib/src/run/global_hash.rs @@ -26,33 +26,33 @@ const GLOBAL_CACHE_KEY: &str = "You don't understand! I coulda had class. I coul enum GlobalHashError {} #[derive(Debug, Default)] -pub struct GlobalHashableInputs { +pub struct GlobalHashableInputs<'a> { global_cache_key: &'static str, global_file_hash_map: HashMap, root_external_dependencies_hash: String, - env: Vec, + env: &'a [String], // Only Option to allow #[derive(Default)] resolved_env_vars: Option, - pass_through_env: Vec, + pass_through_env: Option<&'a [String]>, env_mode: EnvMode, framework_inference: bool, - dot_env: Vec, + dot_env: &'a [RelativeUnixPathBuf], } #[allow(clippy::too_many_arguments)] -pub fn get_global_hash_inputs( +pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( root_workspace: &WorkspaceInfo, root_path: &AbsoluteSystemPath, package_manager: &PackageManager, lockfile: Option<&L>, - global_file_dependencies: Vec, + global_file_dependencies: &'a [String], env_at_execution_start: &EnvironmentVariableMap, - global_env: Vec, - global_pass_through_env: Vec, + global_env: &'a [String], + global_pass_through_env: &'a [String], env_mode: EnvMode, framework_inference: bool, - dot_env: Vec, -) -> Result { + dot_env: &'a [RelativeUnixPathBuf], +) -> Result> { let global_hashable_env_vars = get_global_hashable_env_vars(env_at_execution_start, &global_env)?; @@ -122,7 +122,7 @@ pub fn get_global_hash_inputs( }) } -impl GlobalHashableInputs { +impl<'a> GlobalHashableInputs<'a> { pub fn calculate_global_hash_from_inputs(mut self) -> String { match self.env_mode { EnvMode::Infer if !self.pass_through_env.is_empty() => { diff --git a/crates/turborepo-lib/src/run/mod.rs b/crates/turborepo-lib/src/run/mod.rs index 81221d6a68f29..dd2e2eacc1b5e 100644 --- a/crates/turborepo-lib/src/run/mod.rs +++ b/crates/turborepo-lib/src/run/mod.rs @@ -32,6 +32,7 @@ use crate::{ package_json::PackageJson, run::global_hash::get_global_hash_inputs, task_graph::Visitor, + task_hash::PackageInputsHashes, }; #[derive(Debug)] @@ -205,13 +206,13 @@ impl Run { &self.base.repo_root, pkg_dep_graph.package_manager(), pkg_dep_graph.lockfile(), - root_turbo_json.global_deps, + &root_turbo_json.global_deps, &env_at_execution_start, - root_turbo_json.global_env, - root_turbo_json.global_pass_through_env, + &root_turbo_json.global_env, + &root_turbo_json.global_pass_through_env, opts.run_opts.env_mode, opts.run_opts.framework_inference, - root_turbo_json.global_dot_env, + &root_turbo_json.global_dot_env, )?; let global_hash = global_hash_inputs.calculate_global_hash_from_inputs(); @@ -234,6 +235,19 @@ impl Run { let visitor = Visitor::new(pkg_dep_graph, runcache, &opts); visitor.visit(engine).await?; + let tasks: Vec<_> = engine.tasks().collect(); + let workspaces = pkg_dep_graph.workspaces().collect(); + + let package_file_hashes = PackageInputsHashes::calculate_file_hashes( + scm, + engine.tasks(), + workspaces, + engine.task_definitions(), + &self.base.repo_root, + )?; + + debug!("package file hashes: {:?}", package_file_hashes); + Ok(()) } } diff --git a/crates/turborepo-lib/src/run/task_id.rs b/crates/turborepo-lib/src/run/task_id.rs index be269b1231b7d..a4d59263a3cfb 100644 --- a/crates/turborepo-lib/src/run/task_id.rs +++ b/crates/turborepo-lib/src/run/task_id.rs @@ -8,10 +8,10 @@ pub const TASK_DELIMITER: &str = "#"; pub const ROOT_PKG_NAME: &str = "//"; /// A task identifier as it will appear in the task graph -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize)] pub struct TaskId<'a> { - package: Cow<'a, str>, - task: Cow<'a, str>, + pub(crate) package: Cow<'a, str>, + pub(crate) task: Cow<'a, str>, } /// A task name as it appears in in a `turbo.json` it might be for all diff --git a/crates/turborepo-lib/src/task_graph/mod.rs b/crates/turborepo-lib/src/task_graph/mod.rs index 429e95998686d..c2c9852adccf4 100644 --- a/crates/turborepo-lib/src/task_graph/mod.rs +++ b/crates/turborepo-lib/src/task_graph/mod.rs @@ -77,11 +77,11 @@ pub struct TaskDefinition { pub(crate) cache: bool, // This field is custom-marshalled from `env` and `depends_on`` - env: Vec, + pub(crate) env: Vec, - pass_through_env: Vec, + pub(crate) pass_through_env: Vec, - dot_env: Vec, + pub(crate) dot_env: Vec, // TopologicalDependencies are tasks from package dependencies. // E.g. "build" is a topological dependency in: @@ -97,7 +97,7 @@ pub struct TaskDefinition { // Inputs indicate the list of files this Task depends on. If any of those files change // we can conclude that any cached outputs or logs for this Task should be invalidated. - inputs: Vec, + pub(crate) inputs: Vec, // OutputMode determines how we should log the output. pub(crate) output_mode: OutputLogsMode, diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs new file mode 100644 index 0000000000000..2b3f40f0b0bb8 --- /dev/null +++ b/crates/turborepo-lib/src/task_hash.rs @@ -0,0 +1,263 @@ +use std::{ + collections::{HashMap, HashSet}, + rc::Rc, +}; + +use serde::Serialize; +use thiserror::Error; +use tracing::debug; +use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, AnchoredSystemPathBuf}; +use turborepo_env::{BySource, DetailedMap, EnvironmentVariableMap, ResolvedEnvMode}; +use turborepo_scm::SCM; + +use crate::{ + engine::TaskNode, + hash::{FileHashes, TaskHashable, TurboHash}, + package_graph::{WorkspaceInfo, WorkspaceName}, + run::task_id::{TaskId, ROOT_PKG_NAME}, + task_graph::TaskDefinition, +}; + +#[derive(Debug, Error)] +pub enum Error { + #[error("missing pipeline entry {0}")] + MissingPipelineEntry(TaskId<'static>), + #[error("missing package.json for {0}")] + MissingPackageJson(String), + #[error("cannot find package-file hash for {0}")] + MissingPackageFileHash(String), + #[error("missing hash for dependent task {0}")] + MissingDependencyTaskHash(String), + #[error(transparent)] + SCM(#[from] turborepo_scm::Error), + #[error(transparent)] + Env(#[from] turborepo_env::Error), + #[error(transparent)] + Regex(#[from] regex::Error), + #[error(transparent)] + Path(#[from] turbopath::PathError), +} + +#[derive(Debug)] +struct PackageFileHashInputs<'a> { + task_id: TaskId<'static>, + task_definition: &'a TaskDefinition, + workspace_name: WorkspaceName, +} + +impl TaskHashable<'_> { + fn calculate_task_hash(mut self) -> String { + if matches!(self.env_mode, ResolvedEnvMode::Loose) { + self.pass_through_env = &[]; + } + + self.hash() + } +} + +#[derive(Debug, Serialize)] +pub struct PackageInputsHashes { + // We make the TaskId a String for serialization purposes + hashes: HashMap, + expanded_hashes: HashMap, +} + +impl PackageInputsHashes { + pub fn calculate_file_hashes<'a>( + scm: SCM, + all_tasks: impl Iterator, + workspaces: HashMap<&WorkspaceName, &WorkspaceInfo>, + task_definitions: &HashMap, TaskDefinition>, + repo_root: &AbsoluteSystemPath, + ) -> Result { + let mut hash_tasks = Vec::new(); + + for task in all_tasks { + let TaskNode::Task(task_id) = task else { + continue; + }; + + if task_id.package() == ROOT_PKG_NAME { + continue; + } + + let task_definition = task_definitions + .get(&task_id) + .ok_or_else(|| Error::MissingPipelineEntry(task_id.clone()))?; + + // TODO: Look into making WorkspaceName take a Cow + let workspace_name = WorkspaceName::Other(task_id.package().to_string()); + + let package_file_hash_inputs = PackageFileHashInputs { + task_id: task_id.clone(), + task_definition, + workspace_name, + }; + + hash_tasks.push(package_file_hash_inputs); + } + + let mut hashes = HashMap::with_capacity(hash_tasks.len()); + let mut hash_objects = HashMap::with_capacity(hash_tasks.len()); + + for package_file_hash_inputs in hash_tasks { + let pkg = workspaces + .get(&package_file_hash_inputs.workspace_name) + .ok_or_else(|| { + Error::MissingPackageJson(package_file_hash_inputs.workspace_name.to_string()) + })?; + + let package_path = pkg + .package_json_path + .parent() + .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); + + let mut hash_object = scm.get_package_file_hashes( + &repo_root, + package_path, + &package_file_hash_inputs.task_definition.inputs, + )?; + + if !package_file_hash_inputs.task_definition.dot_env.is_empty() { + let package_path = pkg + .package_json_path + .parent() + .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); + let absolute_package_path = repo_root.resolve(package_path); + let dot_env_object = scm.hash_existing_of( + &absolute_package_path, + package_file_hash_inputs + .task_definition + .dot_env + .iter() + .map(|p| p.to_anchored_system_path_buf()), + )?; + + for (key, value) in dot_env_object { + hash_object.insert(key, value); + } + } + + let file_hashes = FileHashes(hash_object); + let hash = file_hashes.clone().hash(); + + hashes.insert(package_file_hash_inputs.task_id.to_string(), hash); + hash_objects.insert(package_file_hash_inputs.task_id.to_string(), file_hashes); + } + + Ok(PackageInputsHashes { + hashes: hashes, + expanded_hashes: hash_objects, + }) + } +} + +/// Caches package-inputs hashes, and package-task hashes. +struct TaskHasher { + package_inputs_hashes: PackageInputsHashes, + package_task_env_vars: HashMap, DetailedMap>, + package_task_hashes: HashMap, String>, + package_task_framework: HashMap, String>, + package_task_outputs: HashMap, Vec>, +} + +impl TaskHasher { + fn calculate_task_hash( + &mut self, + global_hash: &str, + do_framework_inference: bool, + env_at_execution_start: &EnvironmentVariableMap, + task_id: &TaskId, + task_definition: &TaskDefinition, + env_mode: ResolvedEnvMode, + workspace: &WorkspaceInfo, + dependency_set: &HashSet, + pass_through_args: &[String], + ) -> Result { + let hash_of_files = self + .package_inputs_hashes + .hashes + .get(&task_id.to_string()) + .ok_or_else(|| Error::MissingPackageFileHash(task_id.to_string()))?; + let mut explicit_env_var_map = EnvironmentVariableMap::default(); + let mut all_env_var_map = EnvironmentVariableMap::default(); + let mut matching_env_var_map = EnvironmentVariableMap::default(); + + if do_framework_inference { + todo!("framework inference not implemented yet") + } else { + all_env_var_map = env_at_execution_start.from_wildcards(&task_definition.env)?; + + explicit_env_var_map.union(&mut all_env_var_map); + } + + let env_vars = DetailedMap { + all: all_env_var_map, + by_source: BySource { + explicit: explicit_env_var_map, + matching: matching_env_var_map, + }, + }; + + let hashable_env_pairs = env_vars.all.to_hashable(); + let outputs = task_definition.hashable_outputs(&task_id); + let task_dependency_hashes = self.calculate_dependency_hashes(dependency_set)?; + + debug!( + "task hash env vars for {}:{}\n vars: {:?}", + task_id.package, task_id.task, hashable_env_pairs + ); + + let task_hashable = TaskHashable { + global_hash, + task_dependency_hashes, + package_dir: workspace.package_path().to_unix()?, + hash_of_files, + external_deps_hash: workspace.get_external_deps_hash(), + task: &task_id.task, + outputs, + + pass_through_args, + env: &task_definition.env, + resolved_env_vars: hashable_env_pairs, + pass_through_env: &task_definition.pass_through_env, + env_mode, + dot_env: &task_definition.dot_env, + }; + let task_hash = task_hashable.hash(); + + self.package_task_env_vars.insert(task_id.clone(), env_vars); + self.package_task_hashes + .insert(task_id.clone(), task_hash.clone()); + + Ok(task_hash) + } + + fn calculate_dependency_hashes<'a>( + &'a self, + dependency_set: &'a HashSet, + ) -> Result, Error> { + let mut dependency_hash_set = HashSet::new(); + + for dependency_task in dependency_set { + let TaskNode::Task(dependency_task_id) = dependency_task else { + continue; + }; + + if dependency_task_id.package == ROOT_PKG_NAME { + continue; + } + + let dependency_hash = self + .package_task_hashes + .get(&dependency_task_id) + .ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?; + dependency_hash_set.insert(dependency_hash); + } + + let mut dependency_hash_list = dependency_hash_set.into_iter().collect::>(); + dependency_hash_list.sort(); + + Ok(dependency_hash_list) + } +} diff --git a/crates/turborepo-scm/src/manual.rs b/crates/turborepo-scm/src/manual.rs index f5c239e9856ab..6a6cd7f068e55 100644 --- a/crates/turborepo-scm/src/manual.rs +++ b/crates/turborepo-scm/src/manual.rs @@ -4,7 +4,7 @@ use globwalk::fix_glob_pattern; use hex::ToHex; use ignore::WalkBuilder; use sha1::{Digest, Sha1}; -use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, AnchoredSystemPathBuf, IntoUnix}; +use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, IntoUnix}; use wax::{any, Glob, Pattern}; use crate::{package_deps::GitHashes, Error}; @@ -47,7 +47,7 @@ pub(crate) fn hash_files( pub(crate) fn get_package_file_hashes_from_processing_gitignore>( turbo_root: &AbsoluteSystemPath, - package_path: &AnchoredSystemPathBuf, + package_path: &AnchoredSystemPath, inputs: &[S], ) -> Result { let full_package_path = turbo_root.resolve(package_path); diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs index dcc224e3a25b3..4604e823fcb07 100644 --- a/crates/turborepo-scm/src/package_deps.rs +++ b/crates/turborepo-scm/src/package_deps.rs @@ -2,9 +2,7 @@ use std::collections::HashMap; use itertools::{Either, Itertools}; use tracing::debug; -use turbopath::{ - AbsoluteSystemPath, AnchoredSystemPath, AnchoredSystemPathBuf, PathError, RelativeUnixPathBuf, -}; +use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, PathError, RelativeUnixPathBuf}; use crate::{hash_object::hash_objects, Error, Git, SCM}; @@ -27,7 +25,7 @@ impl SCM { pub fn get_package_file_hashes>( &self, turbo_root: &AbsoluteSystemPath, - package_path: &AnchoredSystemPathBuf, + package_path: &AnchoredSystemPath, inputs: &[S], ) -> Result { match self { @@ -79,7 +77,7 @@ impl Git { fn get_package_file_hashes>( &self, turbo_root: &AbsoluteSystemPath, - package_path: &AnchoredSystemPathBuf, + package_path: &AnchoredSystemPath, inputs: &[S], ) -> Result { if inputs.is_empty() { @@ -92,7 +90,7 @@ impl Git { fn get_package_file_hashes_from_index( &self, turbo_root: &AbsoluteSystemPath, - package_path: &AnchoredSystemPathBuf, + package_path: &AnchoredSystemPath, ) -> Result { let full_pkg_path = turbo_root.resolve(package_path); let git_to_pkg_path = self.root.anchor(&full_pkg_path)?; @@ -126,7 +124,7 @@ impl Git { fn get_package_file_hashes_from_inputs>( &self, turbo_root: &AbsoluteSystemPath, - package_path: &AnchoredSystemPathBuf, + package_path: &AnchoredSystemPath, inputs: &[S], ) -> Result { let full_pkg_path = turbo_root.resolve(package_path); From aaa38bd8c953ac76525f1d500655b6baacaf059f Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Tue, 29 Aug 2023 16:48:42 -0400 Subject: [PATCH 2/8] Framework inference task hashing --- crates/turborepo-lib/src/framework.rs | 10 +++ crates/turborepo-lib/src/run/mod.rs | 5 +- crates/turborepo-lib/src/run/task_id.rs | 4 +- .../turborepo-lib/src/task_graph/visitor.rs | 78 ++++++++++++++----- crates/turborepo-lib/src/task_hash.rs | 70 ++++++++++++++--- 5 files changed, 132 insertions(+), 35 deletions(-) diff --git a/crates/turborepo-lib/src/framework.rs b/crates/turborepo-lib/src/framework.rs index 16149ef627e75..f727f5aa74230 100644 --- a/crates/turborepo-lib/src/framework.rs +++ b/crates/turborepo-lib/src/framework.rs @@ -21,6 +21,16 @@ pub struct Framework { dependency_match: Matcher, } +impl Framework { + pub fn slug(&self) -> &'static str { + self.slug + } + + pub fn env_wildcards(&self) -> &[&'static str] { + &self.env_wildcards + } +} + static FRAMEWORKS: OnceLock<[Framework; 12]> = OnceLock::new(); fn get_frameworks() -> &'static [Framework] { diff --git a/crates/turborepo-lib/src/run/mod.rs b/crates/turborepo-lib/src/run/mod.rs index dd2e2eacc1b5e..1e02f0e8ff8e3 100644 --- a/crates/turborepo-lib/src/run/mod.rs +++ b/crates/turborepo-lib/src/run/mod.rs @@ -232,8 +232,9 @@ impl Run { let pkg_dep_graph = Arc::new(pkg_dep_graph); let engine = Arc::new(engine); - let visitor = Visitor::new(pkg_dep_graph, runcache, &opts); - visitor.visit(engine).await?; + + let visitor = Visitor::new(pkg_dep_graph.clone(), runcache, &opts); + visitor.visit(engine.clone()).await?; let tasks: Vec<_> = engine.tasks().collect(); let workspaces = pkg_dep_graph.workspaces().collect(); diff --git a/crates/turborepo-lib/src/run/task_id.rs b/crates/turborepo-lib/src/run/task_id.rs index a4d59263a3cfb..1ce928f55de85 100644 --- a/crates/turborepo-lib/src/run/task_id.rs +++ b/crates/turborepo-lib/src/run/task_id.rs @@ -10,8 +10,8 @@ pub const ROOT_PKG_NAME: &str = "//"; /// A task identifier as it will appear in the task graph #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize)] pub struct TaskId<'a> { - pub(crate) package: Cow<'a, str>, - pub(crate) task: Cow<'a, str>, + package: Cow<'a, str>, + task: Cow<'a, str>, } /// A task name as it appears in in a `turbo.json` it might be for all diff --git a/crates/turborepo-lib/src/task_graph/visitor.rs b/crates/turborepo-lib/src/task_graph/visitor.rs index c17ffec32f005..d1c4f99ed024a 100644 --- a/crates/turborepo-lib/src/task_graph/visitor.rs +++ b/crates/turborepo-lib/src/task_graph/visitor.rs @@ -3,22 +3,25 @@ use std::sync::{Arc, OnceLock}; use futures::{stream::FuturesUnordered, StreamExt}; use regex::Regex; use tokio::sync::mpsc; +use tracing::debug; +use turborepo_env::{EnvironmentVariableMap, ResolvedEnvMode}; use crate::{ + cli::EnvMode, engine::{Engine, ExecutionOptions}, opts::Opts, package_graph::{PackageGraph, WorkspaceName}, - run::{ - task_id::{self, TaskId}, - RunCache, - }, + run::task_id::{self, TaskId}, + task_hash, + task_hash::{PackageInputsHashes, TaskHasher}, }; // This holds the whole world pub struct Visitor<'a> { package_graph: Arc, - run_cache: Arc, opts: &'a Opts<'a>, + task_hasher: TaskHasher<'a>, + global_env_mode: EnvMode, } #[derive(Debug, thiserror::Error)] @@ -36,14 +39,31 @@ pub enum Error { MissingDefinition, #[error("error while executing engine: {0}")] Engine(#[from] crate::engine::ExecuteError), + #[error(transparent)] + TaskHash(#[from] task_hash::Error), } impl<'a> Visitor<'a> { - pub fn new(package_graph: Arc, run_cache: Arc, opts: &'a Opts) -> Self { + pub fn new( + package_graph: Arc, + opts: &'a Opts, + package_inputs_hashes: PackageInputsHashes, + env_at_execution_start: &'a EnvironmentVariableMap, + global_hash: &'a str, + global_env_mode: EnvMode, + ) -> Self { + let task_hasher = TaskHasher::new( + package_inputs_hashes, + opts, + env_at_execution_start, + global_hash, + ); + Self { package_graph, - run_cache, opts, + task_hasher, + global_env_mode, } } @@ -61,19 +81,19 @@ impl<'a> Visitor<'a> { while let Some(message) = node_stream.recv().await { let crate::engine::Message { info, callback } = message; let package_name = WorkspaceName::from(info.package()); - let package_json = self + let workspace_info = self .package_graph - .package_json(&package_name) + .workspace_info(&package_name) .ok_or_else(|| Error::MissingPackage { package_name: package_name.clone(), task_id: info.clone(), })?; - let workspace_dir = self - .package_graph - .workspace_dir(&package_name) - .unwrap_or_else(|| panic!("no directory for workspace {package_name}")); - let command = package_json.scripts.get(info.task()).cloned(); + let command = workspace_info + .package_json + .scripts + .get(info.task()) + .cloned(); match command { Some(cmd) @@ -87,20 +107,40 @@ impl<'a> Visitor<'a> { _ => (), } - let task_def = engine + let task_definition = engine .task_definition(&info) .ok_or(Error::MissingDefinition)?; - let task_cache = - self.run_cache - .task_cache(task_def, workspace_dir, info.clone(), "fake"); + let task_env_mode = match self.global_env_mode { + // Task env mode is only independent when global env mode is `infer`. + EnvMode::Infer if !task_definition.pass_through_env.is_empty() => { + ResolvedEnvMode::Strict + } + // If we're in infer mode we have just detected non-usage of strict env vars. + // But our behavior's actual meaning of this state is `loose`. + EnvMode::Infer => ResolvedEnvMode::Loose, + // Otherwise we just use the global env mode. + EnvMode::Strict => ResolvedEnvMode::Strict, + EnvMode::Loose => ResolvedEnvMode::Loose, + }; + + let dependency_set = engine.dependencies(&info).ok_or(Error::MissingDefinition)?; + + let task_hash = self.task_hasher.calculate_task_hash( + &info, + &task_definition, + task_env_mode, + workspace_info, + dependency_set, + )?; + + debug!("task {} hash is {}", info, task_hash); tasks.push(tokio::spawn(async move { println!( "Executing {info}: {}", command.as_deref().unwrap_or("no script def") ); - let _task_cache = task_cache; callback.send(Ok(())).unwrap(); })); } diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index 2b3f40f0b0bb8..f0d79264cd85a 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -1,7 +1,4 @@ -use std::{ - collections::{HashMap, HashSet}, - rc::Rc, -}; +use std::collections::{HashMap, HashSet}; use serde::Serialize; use thiserror::Error; @@ -12,6 +9,7 @@ use turborepo_scm::SCM; use crate::{ engine::TaskNode, + framework::infer_framework, hash::{FileHashes, TaskHashable, TurboHash}, package_graph::{WorkspaceInfo, WorkspaceName}, run::task_id::{TaskId, ROOT_PKG_NAME}, @@ -164,10 +162,11 @@ struct TaskHasher { impl TaskHasher { fn calculate_task_hash( &mut self, + is_monorepo: bool, global_hash: &str, do_framework_inference: bool, env_at_execution_start: &EnvironmentVariableMap, - task_id: &TaskId, + task_id: TaskId<'static>, task_definition: &TaskDefinition, env_mode: ResolvedEnvMode, workspace: &WorkspaceInfo, @@ -184,7 +183,53 @@ impl TaskHasher { let mut matching_env_var_map = EnvironmentVariableMap::default(); if do_framework_inference { - todo!("framework inference not implemented yet") + // Se if we infer a framework + if let Some(framework) = infer_framework(workspace, is_monorepo) { + debug!("auto detected framework for {}", task_id.package()); + debug!( + "framework: {}, env_prefix: {:?}", + framework.slug(), + framework.env_wildcards() + ); + let mut computed_wildcards = framework + .env_wildcards() + .iter() + .map(|s| s.to_string()) + .collect::>(); + + if let Some(exclude_prefix) = env_at_execution_start.get("TURBOREPO_EXCLUDE_PREFIX") + { + if !exclude_prefix.is_empty() { + let computed_exclude = format!("!{}*", exclude_prefix); + debug!( + "excluding environment variables matching wildcard {}", + computed_exclude + ); + computed_wildcards.push(computed_exclude); + } + } + + let inference_env_var_map = + env_at_execution_start.from_wildcards(&computed_wildcards)?; + + let user_env_var_set = env_at_execution_start + .wildcard_map_from_wildcards_unresolved(&task_definition.env)?; + + all_env_var_map.union(&user_env_var_set.inclusions); + all_env_var_map.union(&inference_env_var_map); + all_env_var_map.difference(&user_env_var_set.exclusions); + + explicit_env_var_map.union(&user_env_var_set.inclusions); + explicit_env_var_map.difference(&user_env_var_set.exclusions); + + matching_env_var_map.union(&inference_env_var_map); + matching_env_var_map.difference(&user_env_var_set.exclusions); + } else { + let all_env_var_map = + env_at_execution_start.from_wildcards(&task_definition.env)?; + + explicit_env_var_map.union(&all_env_var_map); + } } else { all_env_var_map = env_at_execution_start.from_wildcards(&task_definition.env)?; @@ -205,16 +250,18 @@ impl TaskHasher { debug!( "task hash env vars for {}:{}\n vars: {:?}", - task_id.package, task_id.task, hashable_env_pairs + task_id.package(), + task_id.task(), + hashable_env_pairs ); let task_hashable = TaskHashable { global_hash, task_dependency_hashes, - package_dir: workspace.package_path().to_unix()?, + package_dir: workspace.package_path().to_unix(), hash_of_files, external_deps_hash: workspace.get_external_deps_hash(), - task: &task_id.task, + task: task_id.task(), outputs, pass_through_args, @@ -227,8 +274,7 @@ impl TaskHasher { let task_hash = task_hashable.hash(); self.package_task_env_vars.insert(task_id.clone(), env_vars); - self.package_task_hashes - .insert(task_id.clone(), task_hash.clone()); + self.package_task_hashes.insert(task_id, task_hash.clone()); Ok(task_hash) } @@ -244,7 +290,7 @@ impl TaskHasher { continue; }; - if dependency_task_id.package == ROOT_PKG_NAME { + if dependency_task_id.package() == ROOT_PKG_NAME { continue; } From 3cea5ffb1e084c367af1b7e9db51114741047404 Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Tue, 29 Aug 2023 17:55:29 -0400 Subject: [PATCH 3/8] Adding opts to task_hasher --- crates/turborepo-lib/src/task_hash.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index f0d79264cd85a..3455d8c042f88 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -11,6 +11,7 @@ use crate::{ engine::TaskNode, framework::infer_framework, hash::{FileHashes, TaskHashable, TurboHash}, + opts::Opts, package_graph::{WorkspaceInfo, WorkspaceName}, run::task_id::{TaskId, ROOT_PKG_NAME}, task_graph::TaskDefinition, @@ -151,20 +152,19 @@ impl PackageInputsHashes { } /// Caches package-inputs hashes, and package-task hashes. -struct TaskHasher { +struct TaskHasher<'a> { package_inputs_hashes: PackageInputsHashes, package_task_env_vars: HashMap, DetailedMap>, package_task_hashes: HashMap, String>, package_task_framework: HashMap, String>, package_task_outputs: HashMap, Vec>, + opts: &'a Opts<'a>, } impl TaskHasher { fn calculate_task_hash( &mut self, - is_monorepo: bool, global_hash: &str, - do_framework_inference: bool, env_at_execution_start: &EnvironmentVariableMap, task_id: TaskId<'static>, task_definition: &TaskDefinition, @@ -173,6 +173,9 @@ impl TaskHasher { dependency_set: &HashSet, pass_through_args: &[String], ) -> Result { + let do_framework_inference = self.opts.run_opts.framework_inference; + let is_monorepo = !self.opts.run_opts.single_package; + let hash_of_files = self .package_inputs_hashes .hashes From 6c83daf98e88b79175e8e1a8cbe20acf801776da Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Wed, 30 Aug 2023 17:02:34 -0400 Subject: [PATCH 4/8] Clean up and PR review, and rayon package inputs hashing --- Cargo.lock | 1 + crates/turborepo-lib/Cargo.toml | 1 + crates/turborepo-lib/src/hash/mod.rs | 26 +- crates/turborepo-lib/src/opts.rs | 4 +- crates/turborepo-lib/src/run/global_hash.rs | 4 +- crates/turborepo-lib/src/run/mod.rs | 35 ++- crates/turborepo-lib/src/run/task_id.rs | 2 +- .../turborepo-lib/src/task_graph/visitor.rs | 8 +- crates/turborepo-lib/src/task_hash.rs | 256 +++++++++++------- .../src/absolute_system_path.rs | 2 - crates/turborepo-scm/src/manual.rs | 4 +- crates/turborepo-scm/src/package_deps.rs | 2 +- 12 files changed, 210 insertions(+), 135 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e3cb085d5189f..2908bdad15b57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10049,6 +10049,7 @@ dependencies = [ "pretty_assertions", "prost", "rand 0.8.5", + "rayon", "regex", "reqwest", "rustc_version_runtime", diff --git a/crates/turborepo-lib/Cargo.toml b/crates/turborepo-lib/Cargo.toml index 4ec579ce0b625..003dbac1226eb 100644 --- a/crates/turborepo-lib/Cargo.toml +++ b/crates/turborepo-lib/Cargo.toml @@ -96,6 +96,7 @@ lazy-regex = "2.5.0" node-semver = "2.1.0" num_cpus = "1.15.0" owo-colors.workspace = true +rayon = "1.7.0" regex.workspace = true tracing-appender = "0.2.2" tracing-chrome = { version = "0.7.1", optional = true } diff --git a/crates/turborepo-lib/src/hash/mod.rs b/crates/turborepo-lib/src/hash/mod.rs index 2d9eb0f5836fd..f8db9e4dad5dc 100644 --- a/crates/turborepo-lib/src/hash/mod.rs +++ b/crates/turborepo-lib/src/hash/mod.rs @@ -9,7 +9,6 @@ mod traits; use std::collections::HashMap; use capnp::message::{Builder, HeapAllocator}; -use serde::Serialize; pub use traits::TurboHash; use turborepo_env::ResolvedEnvMode; @@ -45,7 +44,7 @@ mod proto_capnp { pub struct TaskHashable<'a> { // hashes pub(crate) global_hash: &'a str, - pub(crate) task_dependency_hashes: Vec<&'a String>, + pub(crate) task_dependency_hashes: Vec, pub(crate) hash_of_files: &'a str, pub(crate) external_deps_hash: String, @@ -78,8 +77,7 @@ pub struct GlobalHashable<'a> { pub struct LockFilePackages(pub Vec); -#[derive(Debug, Clone, Serialize)] -#[serde(transparent)] +#[derive(Debug, Clone)] pub struct FileHashes(pub HashMap); impl From for Builder { @@ -384,22 +382,22 @@ mod test { #[test] fn task_hashable() { let task_hashable = TaskHashable { - global_hash: "global_hash".to_string(), + global_hash: "global_hash", task_dependency_hashes: vec!["task_dependency_hash".to_string()], package_dir: turbopath::RelativeUnixPathBuf::new("package_dir").unwrap(), - hash_of_files: "hash_of_files".to_string(), + hash_of_files: "hash_of_files", external_deps_hash: "external_deps_hash".to_string(), - task: "task".to_string(), + task: "task", outputs: TaskOutputs { inclusions: vec!["inclusions".to_string()], exclusions: vec!["exclusions".to_string()], }, - pass_through_args: vec!["pass_thru_args".to_string()], - env: vec!["env".to_string()], + pass_through_args: &["pass_thru_args".to_string()], + env: &["env".to_string()], resolved_env_vars: vec![], - pass_through_env: vec!["pass_thru_env".to_string()], + pass_through_env: &["pass_thru_env".to_string()], env_mode: ResolvedEnvMode::Loose, - dot_env: vec![turbopath::RelativeUnixPathBuf::new("dotenv".to_string()).unwrap()], + dot_env: &[turbopath::RelativeUnixPathBuf::new("dotenv".to_string()).unwrap()], }; assert_eq!(task_hashable.hash(), "ff765ee2f83bc034"); @@ -416,13 +414,13 @@ mod test { .into_iter() .collect(), root_external_dependencies_hash: "0000000000000000".to_string(), - env: vec!["env".to_string()], + env: &["env".to_string()], resolved_env_vars: vec![], - pass_through_env: vec!["pass_through_env".to_string()], + pass_through_env: &["pass_through_env".to_string()], env_mode: EnvMode::Infer, framework_inference: true, - dot_env: vec![turbopath::RelativeUnixPathBuf::new("dotenv".to_string()).unwrap()], + dot_env: &[turbopath::RelativeUnixPathBuf::new("dotenv".to_string()).unwrap()], }; assert_eq!(global_hash.hash(), "c0ddf8138bd686e8"); diff --git a/crates/turborepo-lib/src/opts.rs b/crates/turborepo-lib/src/opts.rs index 3d837ea2be78b..50e66bd253685 100644 --- a/crates/turborepo-lib/src/opts.rs +++ b/crates/turborepo-lib/src/opts.rs @@ -63,7 +63,7 @@ pub struct RunOpts<'a> { pub(crate) framework_inference: bool, profile: Option<&'a str>, continue_on_error: bool, - passthrough_args: &'a [String], + pub(crate) pass_through_args: &'a [String], pub(crate) only: bool, dry_run: bool, pub(crate) dry_run_json: bool, @@ -121,7 +121,7 @@ impl<'a> TryFrom<&'a RunArgs> for RunOpts<'a> { parallel: args.parallel, profile: args.profile.as_deref(), continue_on_error: args.continue_execution, - passthrough_args: args.pass_through_args.as_ref(), + pass_through_args: args.pass_through_args.as_ref(), only: args.only, no_daemon: args.no_daemon, single_package: args.single_package, diff --git a/crates/turborepo-lib/src/run/global_hash.rs b/crates/turborepo-lib/src/run/global_hash.rs index 1db4d0abc2721..66ea6ea8054e6 100644 --- a/crates/turborepo-lib/src/run/global_hash.rs +++ b/crates/turborepo-lib/src/run/global_hash.rs @@ -33,7 +33,7 @@ pub struct GlobalHashableInputs<'a> { env: &'a [String], // Only Option to allow #[derive(Default)] resolved_env_vars: Option, - pass_through_env: Option<&'a [String]>, + pass_through_env: &'a [String], env_mode: EnvMode, framework_inference: bool, dot_env: &'a [RelativeUnixPathBuf], @@ -129,7 +129,7 @@ impl<'a> GlobalHashableInputs<'a> { self.env_mode = EnvMode::Strict; } EnvMode::Loose => { - self.pass_through_env = Vec::new(); + self.pass_through_env = &[]; } _ => {} } diff --git a/crates/turborepo-lib/src/run/mod.rs b/crates/turborepo-lib/src/run/mod.rs index 1e02f0e8ff8e3..ce61eca6c179f 100644 --- a/crates/turborepo-lib/src/run/mod.rs +++ b/crates/turborepo-lib/src/run/mod.rs @@ -13,6 +13,7 @@ use std::{ use anyhow::{anyhow, Context as ErrorContext, Result}; pub use cache::{RunCache, TaskCache}; use itertools::Itertools; +use rayon::iter::ParallelBridge; use tracing::{debug, info}; use turbopath::AbsoluteSystemPathBuf; use turborepo_cache::{http::APIAuth, AsyncCache}; @@ -22,6 +23,7 @@ use turborepo_ui::ColorSelector; use self::task_id::TaskName; use crate::{ + cli::EnvMode, commands::CommandBase, config::TurboJson, daemon::DaemonConnector, @@ -230,24 +232,37 @@ impl Run { self.base.ui, )); - let pkg_dep_graph = Arc::new(pkg_dep_graph); - let engine = Arc::new(engine); - - let visitor = Visitor::new(pkg_dep_graph.clone(), runcache, &opts); - visitor.visit(engine.clone()).await?; + let mut global_env_mode = opts.run_opts.env_mode; + if matches!(global_env_mode, EnvMode::Infer) + && !root_turbo_json.global_pass_through_env.is_empty() + { + global_env_mode = EnvMode::Strict; + } - let tasks: Vec<_> = engine.tasks().collect(); let workspaces = pkg_dep_graph.workspaces().collect(); - - let package_file_hashes = PackageInputsHashes::calculate_file_hashes( + let package_inputs_hashes = PackageInputsHashes::calculate_file_hashes( scm, - engine.tasks(), + engine.tasks().par_bridge(), workspaces, engine.task_definitions(), &self.base.repo_root, )?; - debug!("package file hashes: {:?}", package_file_hashes); + debug!("package inputs hashes: {:?}", package_inputs_hashes); + + let pkg_dep_graph = Arc::new(pkg_dep_graph); + let engine = Arc::new(engine); + let visitor = Visitor::new( + pkg_dep_graph.clone(), + runcache, + &opts, + package_inputs_hashes, + &env_at_execution_start, + &global_hash, + global_env_mode, + ); + + visitor.visit(engine.clone()).await?; Ok(()) } diff --git a/crates/turborepo-lib/src/run/task_id.rs b/crates/turborepo-lib/src/run/task_id.rs index 1ce928f55de85..be269b1231b7d 100644 --- a/crates/turborepo-lib/src/run/task_id.rs +++ b/crates/turborepo-lib/src/run/task_id.rs @@ -8,7 +8,7 @@ pub const TASK_DELIMITER: &str = "#"; pub const ROOT_PKG_NAME: &str = "//"; /// A task identifier as it will appear in the task graph -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct TaskId<'a> { package: Cow<'a, str>, task: Cow<'a, str>, diff --git a/crates/turborepo-lib/src/task_graph/visitor.rs b/crates/turborepo-lib/src/task_graph/visitor.rs index d1c4f99ed024a..5323aedb3d007 100644 --- a/crates/turborepo-lib/src/task_graph/visitor.rs +++ b/crates/turborepo-lib/src/task_graph/visitor.rs @@ -11,13 +11,17 @@ use crate::{ engine::{Engine, ExecutionOptions}, opts::Opts, package_graph::{PackageGraph, WorkspaceName}, - run::task_id::{self, TaskId}, + run::{ + task_id::{self, TaskId}, + RunCache, + }, task_hash, task_hash::{PackageInputsHashes, TaskHasher}, }; // This holds the whole world pub struct Visitor<'a> { + runcache: Arc, package_graph: Arc, opts: &'a Opts<'a>, task_hasher: TaskHasher<'a>, @@ -46,6 +50,7 @@ pub enum Error { impl<'a> Visitor<'a> { pub fn new( package_graph: Arc, + runcache: Arc, opts: &'a Opts, package_inputs_hashes: PackageInputsHashes, env_at_execution_start: &'a EnvironmentVariableMap, @@ -60,6 +65,7 @@ impl<'a> Visitor<'a> { ); Self { + runcache, package_graph, opts, task_hasher, diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index 3455d8c042f88..7507ba3ebc50d 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -1,6 +1,9 @@ -use std::collections::{HashMap, HashSet}; +use std::{ + collections::{HashMap, HashSet}, + sync::Mutex, +}; -use serde::Serialize; +use rayon::prelude::*; use thiserror::Error; use tracing::debug; use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, AnchoredSystemPathBuf}; @@ -27,6 +30,8 @@ pub enum Error { MissingPackageFileHash(String), #[error("missing hash for dependent task {0}")] MissingDependencyTaskHash(String), + #[error("cannot acquire lock for task hash tracker")] + Mutex, #[error(transparent)] SCM(#[from] turborepo_scm::Error), #[error(transparent)] @@ -54,7 +59,7 @@ impl TaskHashable<'_> { } } -#[derive(Debug, Serialize)] +#[derive(Debug, Default)] pub struct PackageInputsHashes { // We make the TaskId a String for serialization purposes hashes: HashMap, @@ -64,114 +69,142 @@ pub struct PackageInputsHashes { impl PackageInputsHashes { pub fn calculate_file_hashes<'a>( scm: SCM, - all_tasks: impl Iterator, + all_tasks: impl ParallelIterator, workspaces: HashMap<&WorkspaceName, &WorkspaceInfo>, task_definitions: &HashMap, TaskDefinition>, repo_root: &AbsoluteSystemPath, ) -> Result { - let mut hash_tasks = Vec::new(); - - for task in all_tasks { - let TaskNode::Task(task_id) = task else { - continue; - }; - - if task_id.package() == ROOT_PKG_NAME { - continue; - } - - let task_definition = task_definitions - .get(&task_id) - .ok_or_else(|| Error::MissingPipelineEntry(task_id.clone()))?; - - // TODO: Look into making WorkspaceName take a Cow - let workspace_name = WorkspaceName::Other(task_id.package().to_string()); - - let package_file_hash_inputs = PackageFileHashInputs { - task_id: task_id.clone(), - task_definition, - workspace_name, - }; - - hash_tasks.push(package_file_hash_inputs); - } - - let mut hashes = HashMap::with_capacity(hash_tasks.len()); - let mut hash_objects = HashMap::with_capacity(hash_tasks.len()); - - for package_file_hash_inputs in hash_tasks { - let pkg = workspaces - .get(&package_file_hash_inputs.workspace_name) - .ok_or_else(|| { - Error::MissingPackageJson(package_file_hash_inputs.workspace_name.to_string()) - })?; - - let package_path = pkg - .package_json_path - .parent() - .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); + let (hashes, expanded_hashes): (HashMap<_, _>, HashMap<_, _>) = all_tasks + .filter_map(|task| { + let TaskNode::Task(task_id) = task else { + return None; + }; + + if task_id.package() == ROOT_PKG_NAME { + return None; + } - let mut hash_object = scm.get_package_file_hashes( - &repo_root, - package_path, - &package_file_hash_inputs.task_definition.inputs, - )?; + let task_definition = match task_definitions + .get(&task_id) + .ok_or_else(|| Error::MissingPipelineEntry(task_id.clone())) + { + Ok(def) => def, + Err(err) => return Some(Err(err)), + }; + + // TODO: Look into making WorkspaceName take a Cow + let workspace_name = WorkspaceName::Other(task_id.package().to_string()); + + let package_file_hash_inputs = PackageFileHashInputs { + task_id: task_id.clone(), + task_definition, + workspace_name, + }; + + let pkg = match workspaces + .get(&package_file_hash_inputs.workspace_name) + .ok_or_else(|| { + Error::MissingPackageJson( + package_file_hash_inputs.workspace_name.to_string(), + ) + }) { + Ok(pkg) => pkg, + Err(err) => return Some(Err(err)), + }; - if !package_file_hash_inputs.task_definition.dot_env.is_empty() { let package_path = pkg .package_json_path .parent() .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); - let absolute_package_path = repo_root.resolve(package_path); - let dot_env_object = scm.hash_existing_of( - &absolute_package_path, - package_file_hash_inputs - .task_definition - .dot_env - .iter() - .map(|p| p.to_anchored_system_path_buf()), - )?; - - for (key, value) in dot_env_object { - hash_object.insert(key, value); + + let mut hash_object = match scm.get_package_file_hashes( + &repo_root, + package_path, + &package_file_hash_inputs.task_definition.inputs, + ) { + Ok(hash_object) => hash_object, + Err(err) => return Some(Err(err.into())), + }; + + if !package_file_hash_inputs.task_definition.dot_env.is_empty() { + let package_path = pkg + .package_json_path + .parent() + .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); + let absolute_package_path = repo_root.resolve(package_path); + let dot_env_object = match scm.hash_existing_of( + &absolute_package_path, + package_file_hash_inputs + .task_definition + .dot_env + .iter() + .map(|p| p.to_anchored_system_path_buf()), + ) { + Ok(dot_env_object) => dot_env_object, + Err(err) => return Some(Err(err.into())), + }; + + for (key, value) in dot_env_object { + hash_object.insert(key, value); + } } - } - let file_hashes = FileHashes(hash_object); - let hash = file_hashes.clone().hash(); + let file_hashes = FileHashes(hash_object); + let hash = file_hashes.clone().hash(); + let task_id = package_file_hash_inputs.task_id.to_string(); - hashes.insert(package_file_hash_inputs.task_id.to_string(), hash); - hash_objects.insert(package_file_hash_inputs.task_id.to_string(), file_hashes); - } + Some(Ok(((task_id.clone(), hash), (task_id, file_hashes)))) + }) + .collect::>()?; Ok(PackageInputsHashes { - hashes: hashes, - expanded_hashes: hash_objects, + hashes, + expanded_hashes, }) } } -/// Caches package-inputs hashes, and package-task hashes. -struct TaskHasher<'a> { - package_inputs_hashes: PackageInputsHashes, +#[derive(Default)] +pub struct TaskHashTracker { package_task_env_vars: HashMap, DetailedMap>, package_task_hashes: HashMap, String>, package_task_framework: HashMap, String>, package_task_outputs: HashMap, Vec>, +} + +/// Caches package-inputs hashes, and package-task hashes. +pub struct TaskHasher<'a> { + package_inputs_hashes: PackageInputsHashes, opts: &'a Opts<'a>, + env_at_execution_start: &'a EnvironmentVariableMap, + global_hash: &'a str, + + task_hash_tracker: Mutex, } -impl TaskHasher { - fn calculate_task_hash( - &mut self, - global_hash: &str, - env_at_execution_start: &EnvironmentVariableMap, - task_id: TaskId<'static>, +impl<'a> TaskHasher<'a> { + pub fn new( + package_inputs_hashes: PackageInputsHashes, + opts: &'a Opts, + env_at_execution_start: &'a EnvironmentVariableMap, + global_hash: &'a str, + ) -> Self { + Self { + package_inputs_hashes, + opts, + env_at_execution_start, + global_hash, + task_hash_tracker: Mutex::new(TaskHashTracker::default()), + } + } + + pub fn calculate_task_hash( + &self, + task_id: &TaskId<'static>, task_definition: &TaskDefinition, - env_mode: ResolvedEnvMode, + task_env_mode: ResolvedEnvMode, workspace: &WorkspaceInfo, - dependency_set: &HashSet, - pass_through_args: &[String], + dependency_set: HashSet<&TaskNode>, ) -> Result { let do_framework_inference = self.opts.run_opts.framework_inference; let is_monorepo = !self.opts.run_opts.single_package; @@ -200,7 +233,8 @@ impl TaskHasher { .map(|s| s.to_string()) .collect::>(); - if let Some(exclude_prefix) = env_at_execution_start.get("TURBOREPO_EXCLUDE_PREFIX") + if let Some(exclude_prefix) = + self.env_at_execution_start.get("TURBOREPO_EXCLUDE_PREFIX") { if !exclude_prefix.is_empty() { let computed_exclude = format!("!{}*", exclude_prefix); @@ -212,10 +246,12 @@ impl TaskHasher { } } - let inference_env_var_map = - env_at_execution_start.from_wildcards(&computed_wildcards)?; + let inference_env_var_map = self + .env_at_execution_start + .from_wildcards(&computed_wildcards)?; - let user_env_var_set = env_at_execution_start + let user_env_var_set = self + .env_at_execution_start .wildcard_map_from_wildcards_unresolved(&task_definition.env)?; all_env_var_map.union(&user_env_var_set.inclusions); @@ -228,13 +264,16 @@ impl TaskHasher { matching_env_var_map.union(&inference_env_var_map); matching_env_var_map.difference(&user_env_var_set.exclusions); } else { - let all_env_var_map = - env_at_execution_start.from_wildcards(&task_definition.env)?; + let all_env_var_map = self + .env_at_execution_start + .from_wildcards(&task_definition.env)?; explicit_env_var_map.union(&all_env_var_map); } } else { - all_env_var_map = env_at_execution_start.from_wildcards(&task_definition.env)?; + all_env_var_map = self + .env_at_execution_start + .from_wildcards(&task_definition.env)?; explicit_env_var_map.union(&mut all_env_var_map); } @@ -259,7 +298,7 @@ impl TaskHasher { ); let task_hashable = TaskHashable { - global_hash, + global_hash: self.global_hash, task_dependency_hashes, package_dir: workspace.package_path().to_unix(), hash_of_files, @@ -267,25 +306,39 @@ impl TaskHasher { task: task_id.task(), outputs, - pass_through_args, + pass_through_args: self.opts.run_opts.pass_through_args, env: &task_definition.env, resolved_env_vars: hashable_env_pairs, pass_through_env: &task_definition.pass_through_env, - env_mode, + env_mode: task_env_mode, dot_env: &task_definition.dot_env, }; let task_hash = task_hashable.hash(); - self.package_task_env_vars.insert(task_id.clone(), env_vars); - self.package_task_hashes.insert(task_id, task_hash.clone()); + let mut task_hash_tracker = self.task_hash_tracker.lock().map_err(|_| Error::Mutex)?; + task_hash_tracker + .package_task_env_vars + .insert(task_id.clone(), env_vars); + task_hash_tracker + .package_task_hashes + .insert(task_id.clone(), task_hash.clone()); Ok(task_hash) } - fn calculate_dependency_hashes<'a>( - &'a self, - dependency_set: &'a HashSet, - ) -> Result, Error> { + /// Gets the hashes of a task's dependencies. Because the visitor + /// receives the nodes in topological order, we know that all of + /// the dependencies have been processed before the current task. + /// + /// # Arguments + /// + /// * `dependency_set`: The dependencies of the current task + /// + /// returns: Result, Error> + fn calculate_dependency_hashes( + &self, + dependency_set: HashSet<&TaskNode>, + ) -> Result, Error> { let mut dependency_hash_set = HashSet::new(); for dependency_task in dependency_set { @@ -297,11 +350,12 @@ impl TaskHasher { continue; } - let dependency_hash = self + let task_hash_tracker = self.task_hash_tracker.lock().map_err(|_| Error::Mutex)?; + let dependency_hash = task_hash_tracker .package_task_hashes .get(&dependency_task_id) .ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?; - dependency_hash_set.insert(dependency_hash); + dependency_hash_set.insert(dependency_hash.clone()); } let mut dependency_hash_list = dependency_hash_set.into_iter().collect::>(); diff --git a/crates/turborepo-paths/src/absolute_system_path.rs b/crates/turborepo-paths/src/absolute_system_path.rs index 08b6b12b21927..f1637027eee1b 100644 --- a/crates/turborepo-paths/src/absolute_system_path.rs +++ b/crates/turborepo-paths/src/absolute_system_path.rs @@ -450,8 +450,6 @@ impl<'a> TryFrom<&'a Path> for &'a AbsoluteSystemPath { #[cfg(test)] mod tests { - use std::str::FromStr; - use anyhow::Result; use tempdir::TempDir; use test_case::test_case; diff --git a/crates/turborepo-scm/src/manual.rs b/crates/turborepo-scm/src/manual.rs index 6a6cd7f068e55..905fbaf150099 100644 --- a/crates/turborepo-scm/src/manual.rs +++ b/crates/turborepo-scm/src/manual.rs @@ -118,7 +118,9 @@ pub(crate) fn get_package_file_hashes_from_processing_gitignore>( #[cfg(test)] mod tests { use test_case::test_case; - use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPath, RelativeUnixPathBuf}; + use turbopath::{ + AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPath, RelativeUnixPathBuf, + }; use super::*; diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs index 4604e823fcb07..bff30ca495623 100644 --- a/crates/turborepo-scm/src/package_deps.rs +++ b/crates/turborepo-scm/src/package_deps.rs @@ -188,7 +188,7 @@ impl Git { mod tests { use std::{assert_matches::assert_matches, collections::HashMap, process::Command}; - use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPathBuf}; use super::*; use crate::{manual::get_package_file_hashes_from_processing_gitignore, SCM}; From e6c72bde9469d16cdadc8603c0eea1e738dae73c Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Wed, 6 Sep 2023 17:20:51 -0400 Subject: [PATCH 5/8] Fixing after rebase --- crates/turborepo-lib/src/task_graph/visitor.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/crates/turborepo-lib/src/task_graph/visitor.rs b/crates/turborepo-lib/src/task_graph/visitor.rs index 5323aedb3d007..ee13d789f4627 100644 --- a/crates/turborepo-lib/src/task_graph/visitor.rs +++ b/crates/turborepo-lib/src/task_graph/visitor.rs @@ -21,7 +21,7 @@ use crate::{ // This holds the whole world pub struct Visitor<'a> { - runcache: Arc, + run_cache: Arc, package_graph: Arc, opts: &'a Opts<'a>, task_hasher: TaskHasher<'a>, @@ -50,7 +50,7 @@ pub enum Error { impl<'a> Visitor<'a> { pub fn new( package_graph: Arc, - runcache: Arc, + run_cache: Arc, opts: &'a Opts, package_inputs_hashes: PackageInputsHashes, env_at_execution_start: &'a EnvironmentVariableMap, @@ -65,7 +65,7 @@ impl<'a> Visitor<'a> { ); Self { - runcache, + run_cache, package_graph, opts, task_hasher, @@ -87,6 +87,13 @@ impl<'a> Visitor<'a> { while let Some(message) = node_stream.recv().await { let crate::engine::Message { info, callback } = message; let package_name = WorkspaceName::from(info.package()); + let workspace_dir = + self.package_graph + .workspace_dir(&package_name) + .ok_or_else(|| Error::MissingPackage { + package_name: package_name.clone(), + task_id: info.clone(), + })?; let workspace_info = self .package_graph .workspace_info(&package_name) @@ -142,11 +149,16 @@ impl<'a> Visitor<'a> { debug!("task {} hash is {}", info, task_hash); + let task_cache = + self.run_cache + .task_cache(task_definition, workspace_dir, info.clone(), "fake"); + tasks.push(tokio::spawn(async move { println!( "Executing {info}: {}", command.as_deref().unwrap_or("no script def") ); + let _task_cache = task_cache; callback.send(Ok(())).unwrap(); })); } From 46e29a7577c0076da601d20822cfc10db4c2fc08 Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Thu, 7 Sep 2023 10:33:13 -0400 Subject: [PATCH 6/8] Clippy fixes --- crates/turborepo-lib/src/hash/mod.rs | 6 +++--- crates/turborepo-lib/src/run/global_hash.rs | 4 ++-- crates/turborepo-lib/src/task_graph/visitor.rs | 2 +- crates/turborepo-lib/src/task_hash.rs | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/turborepo-lib/src/hash/mod.rs b/crates/turborepo-lib/src/hash/mod.rs index f8db9e4dad5dc..973545dae475b 100644 --- a/crates/turborepo-lib/src/hash/mod.rs +++ b/crates/turborepo-lib/src/hash/mod.rs @@ -197,11 +197,11 @@ impl From> for Builder { ::capnp::message::TypedBuilder::::new_default(); let mut builder = message.init_root(); - builder.set_global_hash(&task_hashable.global_hash); + builder.set_global_hash(task_hashable.global_hash); builder.set_package_dir(&task_hashable.package_dir.to_string()); - builder.set_hash_of_files(&task_hashable.hash_of_files); + builder.set_hash_of_files(task_hashable.hash_of_files); builder.set_external_deps_hash(&task_hashable.external_deps_hash); - builder.set_task(&task_hashable.task); + builder.set_task(task_hashable.task); builder.set_env_mode(task_hashable.env_mode.into()); { diff --git a/crates/turborepo-lib/src/run/global_hash.rs b/crates/turborepo-lib/src/run/global_hash.rs index 66ea6ea8054e6..d2b119b5ebb96 100644 --- a/crates/turborepo-lib/src/run/global_hash.rs +++ b/crates/turborepo-lib/src/run/global_hash.rs @@ -54,7 +54,7 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( dot_env: &'a [RelativeUnixPathBuf], ) -> Result> { let global_hashable_env_vars = - get_global_hashable_env_vars(env_at_execution_start, &global_env)?; + get_global_hashable_env_vars(env_at_execution_start, global_env)?; debug!( "global hash env vars {:?}", @@ -68,7 +68,7 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( let files = globwalk::globwalk( root_path, - &global_file_dependencies, + global_file_dependencies, &globs.raw_exclusions, WalkType::All, )?; diff --git a/crates/turborepo-lib/src/task_graph/visitor.rs b/crates/turborepo-lib/src/task_graph/visitor.rs index ee13d789f4627..304ab3ed46d10 100644 --- a/crates/turborepo-lib/src/task_graph/visitor.rs +++ b/crates/turborepo-lib/src/task_graph/visitor.rs @@ -141,7 +141,7 @@ impl<'a> Visitor<'a> { let task_hash = self.task_hasher.calculate_task_hash( &info, - &task_definition, + task_definition, task_env_mode, workspace_info, dependency_set, diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index 7507ba3ebc50d..85b561218e341 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -33,7 +33,7 @@ pub enum Error { #[error("cannot acquire lock for task hash tracker")] Mutex, #[error(transparent)] - SCM(#[from] turborepo_scm::Error), + Scm(#[from] turborepo_scm::Error), #[error(transparent)] Env(#[from] turborepo_env::Error), #[error(transparent)] @@ -85,7 +85,7 @@ impl PackageInputsHashes { } let task_definition = match task_definitions - .get(&task_id) + .get(task_id) .ok_or_else(|| Error::MissingPipelineEntry(task_id.clone())) { Ok(def) => def, @@ -118,7 +118,7 @@ impl PackageInputsHashes { .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); let mut hash_object = match scm.get_package_file_hashes( - &repo_root, + repo_root, package_path, &package_file_hash_inputs.task_definition.inputs, ) { @@ -275,7 +275,7 @@ impl<'a> TaskHasher<'a> { .env_at_execution_start .from_wildcards(&task_definition.env)?; - explicit_env_var_map.union(&mut all_env_var_map); + explicit_env_var_map.union(&all_env_var_map); } let env_vars = DetailedMap { @@ -287,7 +287,7 @@ impl<'a> TaskHasher<'a> { }; let hashable_env_pairs = env_vars.all.to_hashable(); - let outputs = task_definition.hashable_outputs(&task_id); + let outputs = task_definition.hashable_outputs(task_id); let task_dependency_hashes = self.calculate_dependency_hashes(dependency_set)?; debug!( @@ -353,7 +353,7 @@ impl<'a> TaskHasher<'a> { let task_hash_tracker = self.task_hash_tracker.lock().map_err(|_| Error::Mutex)?; let dependency_hash = task_hash_tracker .package_task_hashes - .get(&dependency_task_id) + .get(dependency_task_id) .ok_or_else(|| Error::MissingDependencyTaskHash(dependency_task.to_string()))?; dependency_hash_set.insert(dependency_hash.clone()); } From 513f3ade961890014a25311d2a4b3bc2600351d7 Mon Sep 17 00:00:00 2001 From: Nicholas Yang Date: Fri, 8 Sep 2023 12:23:35 -0400 Subject: [PATCH 7/8] PR feedback --- .../turborepo-lib/src/task_graph/visitor.rs | 2 +- crates/turborepo-lib/src/task_hash.rs | 48 ++++++------------- 2 files changed, 15 insertions(+), 35 deletions(-) diff --git a/crates/turborepo-lib/src/task_graph/visitor.rs b/crates/turborepo-lib/src/task_graph/visitor.rs index 304ab3ed46d10..bdaec35f459e4 100644 --- a/crates/turborepo-lib/src/task_graph/visitor.rs +++ b/crates/turborepo-lib/src/task_graph/visitor.rs @@ -151,7 +151,7 @@ impl<'a> Visitor<'a> { let task_cache = self.run_cache - .task_cache(task_definition, workspace_dir, info.clone(), "fake"); + .task_cache(task_definition, workspace_dir, info.clone(), &task_hash); tasks.push(tokio::spawn(async move { println!( diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index 85b561218e341..14a8cf1500839 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -42,13 +42,6 @@ pub enum Error { Path(#[from] turbopath::PathError), } -#[derive(Debug)] -struct PackageFileHashInputs<'a> { - task_id: TaskId<'static>, - task_definition: &'a TaskDefinition, - workspace_name: WorkspaceName, -} - impl TaskHashable<'_> { fn calculate_task_hash(mut self) -> String { if matches!(self.env_mode, ResolvedEnvMode::Loose) { @@ -61,9 +54,8 @@ impl TaskHashable<'_> { #[derive(Debug, Default)] pub struct PackageInputsHashes { - // We make the TaskId a String for serialization purposes - hashes: HashMap, - expanded_hashes: HashMap, + hashes: HashMap, String>, + expanded_hashes: HashMap, FileHashes>, } impl PackageInputsHashes { @@ -95,19 +87,10 @@ impl PackageInputsHashes { // TODO: Look into making WorkspaceName take a Cow let workspace_name = WorkspaceName::Other(task_id.package().to_string()); - let package_file_hash_inputs = PackageFileHashInputs { - task_id: task_id.clone(), - task_definition, - workspace_name, - }; - let pkg = match workspaces - .get(&package_file_hash_inputs.workspace_name) - .ok_or_else(|| { - Error::MissingPackageJson( - package_file_hash_inputs.workspace_name.to_string(), - ) - }) { + .get(&workspace_name) + .ok_or_else(|| Error::MissingPackageJson(workspace_name.to_string())) + { Ok(pkg) => pkg, Err(err) => return Some(Err(err)), }; @@ -120,22 +103,17 @@ impl PackageInputsHashes { let mut hash_object = match scm.get_package_file_hashes( repo_root, package_path, - &package_file_hash_inputs.task_definition.inputs, + &task_definition.inputs, ) { Ok(hash_object) => hash_object, Err(err) => return Some(Err(err.into())), }; - if !package_file_hash_inputs.task_definition.dot_env.is_empty() { - let package_path = pkg - .package_json_path - .parent() - .unwrap_or_else(|| AnchoredSystemPath::new("").unwrap()); + if !task_definition.dot_env.is_empty() { let absolute_package_path = repo_root.resolve(package_path); let dot_env_object = match scm.hash_existing_of( &absolute_package_path, - package_file_hash_inputs - .task_definition + task_definition .dot_env .iter() .map(|p| p.to_anchored_system_path_buf()), @@ -151,9 +129,11 @@ impl PackageInputsHashes { let file_hashes = FileHashes(hash_object); let hash = file_hashes.clone().hash(); - let task_id = package_file_hash_inputs.task_id.to_string(); - Some(Ok(((task_id.clone(), hash), (task_id, file_hashes)))) + Some(Ok(( + (task_id.clone(), hash), + (task_id.clone(), file_hashes), + ))) }) .collect::>()?; @@ -212,7 +192,7 @@ impl<'a> TaskHasher<'a> { let hash_of_files = self .package_inputs_hashes .hashes - .get(&task_id.to_string()) + .get(&task_id) .ok_or_else(|| Error::MissingPackageFileHash(task_id.to_string()))?; let mut explicit_env_var_map = EnvironmentVariableMap::default(); let mut all_env_var_map = EnvironmentVariableMap::default(); @@ -234,7 +214,7 @@ impl<'a> TaskHasher<'a> { .collect::>(); if let Some(exclude_prefix) = - self.env_at_execution_start.get("TURBOREPO_EXCLUDE_PREFIX") + self.env_at_execution_start.get("TURBO_CI_VENDOR_ENV_KEY") { if !exclude_prefix.is_empty() { let computed_exclude = format!("!{}*", exclude_prefix); From 08f9e8ecfd0a7ba78759b112a3e540200f03115f Mon Sep 17 00:00:00 2001 From: nicholaslyang Date: Fri, 8 Sep 2023 13:18:59 -0400 Subject: [PATCH 8/8] Clippy fix --- crates/turborepo-lib/src/task_hash.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index 14a8cf1500839..940be3150a476 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -192,7 +192,7 @@ impl<'a> TaskHasher<'a> { let hash_of_files = self .package_inputs_hashes .hashes - .get(&task_id) + .get(task_id) .ok_or_else(|| Error::MissingPackageFileHash(task_id.to_string()))?; let mut explicit_env_var_map = EnvironmentVariableMap::default(); let mut all_env_var_map = EnvironmentVariableMap::default();