diff --git a/Cargo.lock b/Cargo.lock index ee34a6ad34ed3..70f44a2279f1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9239,7 +9239,6 @@ dependencies = [ "parking_lot", "pot", "rand 0.9.0", - "rayon", "regex", "ringmap", "rstest", diff --git a/packages/next/src/server/dev/hot-reloader-turbopack.ts b/packages/next/src/server/dev/hot-reloader-turbopack.ts index 031623b54c6bc..54aa9b6d7099d 100644 --- a/packages/next/src/server/dev/hot-reloader-turbopack.ts +++ b/packages/next/src/server/dev/hot-reloader-turbopack.ts @@ -258,7 +258,7 @@ export async function createHotReloaderTurbopack( } ) backgroundLogCompilationEvents(project, { - eventTypes: ['StartupCacheInvalidationEvent'], + eventTypes: ['StartupCacheInvalidationEvent', 'TimingEvent'], }) setBundlerFindSourceMapImplementation( getSourceMapFromTurbopack.bind(null, project, projectPath) diff --git a/turbopack/crates/turbo-persistence-tools/src/main.rs b/turbopack/crates/turbo-persistence-tools/src/main.rs index 25fbe6d31201b..6a384bae92ab4 100644 --- a/turbopack/crates/turbo-persistence-tools/src/main.rs +++ b/turbopack/crates/turbo-persistence-tools/src/main.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use anyhow::{Context, Result, bail}; -use turbo_persistence::{MetaFileEntryInfo, TurboPersistence}; +use turbo_persistence::{MetaFileEntryInfo, SerialScheduler, TurboPersistence}; fn main() -> Result<()> { // Get CLI argument @@ -16,7 +16,7 @@ fn main() -> Result<()> { bail!("The provided path does not exist: {}", path.display()); } - let db = TurboPersistence::open_read_only(path)?; + let db: TurboPersistence = TurboPersistence::open_read_only(path)?; let meta_info = db .meta_info() .context("Failed to retrieve meta information")?; diff --git a/turbopack/crates/turbo-persistence/Cargo.toml b/turbopack/crates/turbo-persistence/Cargo.toml index c3e26489251ba..ee331283cec62 100644 --- a/turbopack/crates/turbo-persistence/Cargo.toml +++ b/turbopack/crates/turbo-persistence/Cargo.toml @@ -22,7 +22,6 @@ memmap2 = "0.9.5" parking_lot = { workspace = true } qfilter = { version = "0.2.4", features = ["serde"] } quick_cache = { workspace = true } -rayon = { workspace = true } rustc-hash = { workspace = true } smallvec = { workspace = true} thread_local = { workspace = true } @@ -32,6 +31,7 @@ zstd = { version = "0.13.2", features = ["zdict_builder"] } [dev-dependencies] rand = { workspace = true, features = ["small_rng"] } +rayon = { workspace = true } tempfile = { workspace = true } [lints] diff --git a/turbopack/crates/turbo-persistence/src/collector.rs b/turbopack/crates/turbo-persistence/src/collector.rs index ea8b04ab16e70..6637ea2c13e3c 100644 --- a/turbopack/crates/turbo-persistence/src/collector.rs +++ b/turbopack/crates/turbo-persistence/src/collector.rs @@ -1,3 +1,5 @@ +use std::mem::take; + use crate::{ ValueBuffer, collector_entry::{CollectorEntry, CollectorEntryValue, EntryKey}, @@ -111,4 +113,11 @@ impl Collector { self.total_value_size = 0; self.entries.drain(..) } + + /// Clears the collector and drops the capacity + pub fn drop_contents(&mut self) { + drop(take(&mut self.entries)); + self.total_key_size = 0; + self.total_value_size = 0; + } } diff --git a/turbopack/crates/turbo-persistence/src/compaction/selector.rs b/turbopack/crates/turbo-persistence/src/compaction/selector.rs index 814a4aa87a69e..f5bc91687f2b1 100644 --- a/turbopack/crates/turbo-persistence/src/compaction/selector.rs +++ b/turbopack/crates/turbo-persistence/src/compaction/selector.rs @@ -136,8 +136,8 @@ impl Default for CompactConfig { optimal_merge_count: 8, max_merge_count: 32, max_merge_bytes: 500 * MB, - min_merge_duplication_bytes: MB, - optimal_merge_duplication_bytes: 10 * MB, + min_merge_duplication_bytes: 50 * MB, + optimal_merge_duplication_bytes: 100 * MB, max_merge_segment_count: 8, } } @@ -233,13 +233,20 @@ pub fn get_merge_segments( // We have reached the maximum number of merge jobs, so we stop here. break; } - let mut current_range = start_compactable.range(); + let start_compactable_range = start_compactable.range(); + let start_compactable_size = start_compactable.size(); + let mut current_range = start_compactable_range.clone(); // We might need to restart the search if we need to extend the range. 'search: loop { let mut current_set = smallvec![start_index]; - let mut current_size = start_compactable.size(); + let mut current_size = start_compactable_size; let mut duplication = IntervalMap::>::new(); + duplication.update(start_compactable_range.clone(), |dup_info| { + dup_info + .get_or_insert_default() + .add(start_compactable_size, &start_compactable_range); + }); let mut current_skip = 0; // We will capture compactables in the current_range until we find a optimal merge @@ -609,8 +616,8 @@ mod tests { min_merge_count: 2, optimal_merge_count: 4, max_merge_bytes: 5000, - min_merge_duplication_bytes: 200, - optimal_merge_duplication_bytes: 500, + min_merge_duplication_bytes: 500, + optimal_merge_duplication_bytes: 1000, max_merge_segment_count: 4, }; let jobs = get_merge_segments(&containers, &config); @@ -653,7 +660,7 @@ mod tests { println!("Number of compactions: {number_of_compactions}"); let metrics = compute_metrics(&containers, 0..=KEY_RANGE); - assert!(number_of_compactions < 40); + assert!(number_of_compactions < 30); assert!(containers.len() < 30); assert!(metrics.duplication < 0.5); } diff --git a/turbopack/crates/turbo-persistence/src/db.rs b/turbopack/crates/turbo-persistence/src/db.rs index 4ba703fa75ea5..79cf9d06fb86c 100644 --- a/turbopack/crates/turbo-persistence/src/db.rs +++ b/turbopack/crates/turbo-persistence/src/db.rs @@ -18,8 +18,6 @@ use jiff::Timestamp; use lzzzz::lz4::decompress; use memmap2::Mmap; use parking_lot::{Mutex, RwLock}; -use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; -use tracing::Span; pub use crate::compaction::selector::CompactConfig; use crate::{ @@ -36,6 +34,7 @@ use crate::{ merge_iter::MergeIter, meta_file::{AmqfCache, MetaFile, MetaLookupResult, StaticSortedFileRange}, meta_file_builder::MetaFileBuilder, + parallel_scheduler::ParallelScheduler, sst_filter::SstFilter, static_sorted_file::{BlockCache, SstLookupResult}, static_sorted_file_builder::{StaticSortedFileBuilderMeta, write_static_stored_file}, @@ -108,7 +107,8 @@ struct TrackedStats { /// TurboPersistence is a persistent key-value store. It is limited to a single writer at a time /// using a single write batch. It allows for concurrent reads. -pub struct TurboPersistence { +pub struct TurboPersistence { + parallel_scheduler: S, /// The path to the directory where the database is stored path: PathBuf, /// If true, the database is opened in read-only mode. In this mode, no writes are allowed and @@ -148,9 +148,26 @@ pub struct CommitOptions { keys_written: u64, } -impl TurboPersistence { - fn new(path: PathBuf, read_only: bool) -> Self { +impl TurboPersistence { + /// Open a TurboPersistence database at the given path. + /// This will read the directory and might performance cleanup when the database was not closed + /// properly. Cleanup only requires to read a few bytes from a few files and to delete + /// files, so it's fast. + pub fn open(path: PathBuf) -> Result { + Self::open_with_parallel_scheduler(path, Default::default()) + } + + /// Open a TurboPersistence database at the given path in read only mode. + /// This will read the directory. No Cleanup is performed. + pub fn open_read_only(path: PathBuf) -> Result { + Self::open_read_only_with_parallel_scheduler(path, Default::default()) + } +} + +impl TurboPersistence { + fn new(path: PathBuf, read_only: bool, parallel_scheduler: S) -> Self { Self { + parallel_scheduler, path, read_only, inner: RwLock::new(Inner { @@ -188,16 +205,19 @@ impl TurboPersistence { /// This will read the directory and might performance cleanup when the database was not closed /// properly. Cleanup only requires to read a few bytes from a few files and to delete /// files, so it's fast. - pub fn open(path: PathBuf) -> Result { - let mut db = Self::new(path, false); + pub fn open_with_parallel_scheduler(path: PathBuf, parallel_scheduler: S) -> Result { + let mut db = Self::new(path, false, parallel_scheduler); db.open_directory(false)?; Ok(db) } /// Open a TurboPersistence database at the given path in read only mode. /// This will read the directory. No Cleanup is performed. - pub fn open_read_only(path: PathBuf) -> Result { - let mut db = Self::new(path, true); + pub fn open_read_only_with_parallel_scheduler( + path: PathBuf, + parallel_scheduler: S, + ) -> Result { + let mut db = Self::new(path, true, parallel_scheduler); db.open_directory(false)?; Ok(db) } @@ -341,16 +361,12 @@ impl TurboPersistence { meta_files.retain(|seq| !deleted_files.contains(seq)); meta_files.sort_unstable(); - let span = Span::current(); - let mut meta_files = meta_files - .into_par_iter() - .with_min_len(1) - .map(|seq| { - let _span = span.enter(); + let mut meta_files = self + .parallel_scheduler + .parallel_map_collect::<_, _, Result>>(&meta_files, |&seq| { let meta_file = MetaFile::open(&self.path, seq)?; Ok(meta_file) - }) - .collect::>>()?; + })?; let mut sst_filter = SstFilter::new(); for meta_file in meta_files.iter_mut().rev() { @@ -398,7 +414,7 @@ impl TurboPersistence { /// This data will only become visible after the WriteBatch is committed. pub fn write_batch( &self, - ) -> Result> { + ) -> Result> { if self.read_only { bail!("Cannot write to a read-only database"); } @@ -413,7 +429,11 @@ impl TurboPersistence { ); } let current = self.inner.read().current_sequence_number; - Ok(WriteBatch::new(self.path.clone(), current)) + Ok(WriteBatch::new( + self.path.clone(), + current, + self.parallel_scheduler.clone(), + )) } fn open_log(&self) -> Result> { @@ -432,7 +452,7 @@ impl TurboPersistence { /// visible to readers. pub fn commit_write_batch( &self, - mut write_batch: WriteBatch, + mut write_batch: WriteBatch, ) -> Result<()> { if self.read_only { unreachable!("It's not possible to create a write batch for a read-only database"); @@ -475,27 +495,31 @@ impl TurboPersistence { new_meta_files.sort_unstable_by_key(|(seq, _)| *seq); - let mut new_meta_files = new_meta_files - .into_par_iter() - .with_min_len(1) - .map(|(seq, file)| { - file.sync_all()?; - let meta_file = MetaFile::open(&self.path, seq)?; - Ok(meta_file) - }) - .collect::>>()?; + let mut new_meta_files = self + .parallel_scheduler + .vec_into_parallel_map_collect::<_, _, Result>>( + new_meta_files, + |(seq, file)| { + file.sync_all()?; + let meta_file = MetaFile::open(&self.path, seq)?; + Ok(meta_file) + }, + )?; let mut sst_filter = SstFilter::new(); for meta_file in new_meta_files.iter_mut().rev() { sst_filter.apply_filter(meta_file); } - for (_, file) in new_sst_files.iter() { - file.sync_all()?; - } - for (_, file) in new_blob_files.iter() { - file.sync_all()?; - } + self.parallel_scheduler.block_in_place(|| { + for (_, file) in new_sst_files.iter() { + file.sync_all()?; + } + for (_, file) in new_blob_files.iter() { + file.sync_all()?; + } + anyhow::Ok(()) + })?; let new_meta_info = new_meta_files .iter() @@ -548,86 +572,88 @@ impl TurboPersistence { inner.current_sequence_number = seq; } - if has_delete_file { - sst_seq_numbers_to_delete.sort_unstable(); - meta_seq_numbers_to_delete.sort_unstable(); - blob_seq_numbers_to_delete.sort_unstable(); - // Write *.del file, marking the selected files as to delete - let mut buf = Vec::with_capacity( - (sst_seq_numbers_to_delete.len() - + meta_seq_numbers_to_delete.len() - + blob_seq_numbers_to_delete.len()) - * size_of::(), - ); - for seq in sst_seq_numbers_to_delete.iter() { - buf.write_u32::(*seq)?; - } - for seq in meta_seq_numbers_to_delete.iter() { - buf.write_u32::(*seq)?; - } - for seq in blob_seq_numbers_to_delete.iter() { - buf.write_u32::(*seq)?; - } - let mut file = File::create(self.path.join(format!("{seq:08}.del")))?; - file.write_all(&buf)?; - file.sync_all()?; - } - - let mut current_file = OpenOptions::new() - .write(true) - .truncate(false) - .read(false) - .open(self.path.join("CURRENT"))?; - current_file.write_u32::(seq)?; - current_file.sync_all()?; - - for seq in sst_seq_numbers_to_delete.iter() { - fs::remove_file(self.path.join(format!("{seq:08}.sst")))?; - } - for seq in meta_seq_numbers_to_delete.iter() { - fs::remove_file(self.path.join(format!("{seq:08}.meta")))?; - } - for seq in blob_seq_numbers_to_delete.iter() { - fs::remove_file(self.path.join(format!("{seq:08}.blob")))?; - } - - { - let mut log = self.open_log()?; - writeln!(log, "Time {time}")?; - let span = time.until(Timestamp::now())?; - writeln!(log, "Commit {seq:08} {keys_written} keys in {span:#}")?; - for (seq, family, ssts, obsolete) in new_meta_info { - writeln!(log, "{seq:08} META family:{family}",)?; - for (seq, min, max, size) in ssts { - writeln!( - log, - " {seq:08} SST {min:016x}-{max:016x} {} MiB", - size / 1024 / 1024 - )?; + self.parallel_scheduler.block_in_place(|| { + if has_delete_file { + sst_seq_numbers_to_delete.sort_unstable(); + meta_seq_numbers_to_delete.sort_unstable(); + blob_seq_numbers_to_delete.sort_unstable(); + // Write *.del file, marking the selected files as to delete + let mut buf = Vec::with_capacity( + (sst_seq_numbers_to_delete.len() + + meta_seq_numbers_to_delete.len() + + blob_seq_numbers_to_delete.len()) + * size_of::(), + ); + for seq in sst_seq_numbers_to_delete.iter() { + buf.write_u32::(*seq)?; } - for seq in obsolete { - writeln!(log, " {seq:08} OBSOLETE SST")?; + for seq in meta_seq_numbers_to_delete.iter() { + buf.write_u32::(*seq)?; } + for seq in blob_seq_numbers_to_delete.iter() { + buf.write_u32::(*seq)?; + } + let mut file = File::create(self.path.join(format!("{seq:08}.del")))?; + file.write_all(&buf)?; + file.sync_all()?; } - new_sst_files.sort_unstable_by_key(|(seq, _)| *seq); - for (seq, _) in new_sst_files.iter() { - writeln!(log, "{seq:08} NEW SST")?; - } - new_blob_files.sort_unstable_by_key(|(seq, _)| *seq); - for (seq, _) in new_blob_files.iter() { - writeln!(log, "{seq:08} NEW BLOB")?; - } + + let mut current_file = OpenOptions::new() + .write(true) + .truncate(false) + .read(false) + .open(self.path.join("CURRENT"))?; + current_file.write_u32::(seq)?; + current_file.sync_all()?; + for seq in sst_seq_numbers_to_delete.iter() { - writeln!(log, "{seq:08} SST DELETED")?; + fs::remove_file(self.path.join(format!("{seq:08}.sst")))?; } for seq in meta_seq_numbers_to_delete.iter() { - writeln!(log, "{seq:08} META DELETED")?; + fs::remove_file(self.path.join(format!("{seq:08}.meta")))?; } for seq in blob_seq_numbers_to_delete.iter() { - writeln!(log, "{seq:08} BLOB DELETED")?; + fs::remove_file(self.path.join(format!("{seq:08}.blob")))?; } - } + { + let mut log = self.open_log()?; + writeln!(log, "Time {time}")?; + let span = time.until(Timestamp::now())?; + writeln!(log, "Commit {seq:08} {keys_written} keys in {span:#}")?; + for (seq, family, ssts, obsolete) in new_meta_info { + writeln!(log, "{seq:08} META family:{family}",)?; + for (seq, min, max, size) in ssts { + writeln!( + log, + " {seq:08} SST {min:016x}-{max:016x} {} MiB", + size / 1024 / 1024 + )?; + } + for seq in obsolete { + writeln!(log, " {seq:08} OBSOLETE SST")?; + } + } + new_sst_files.sort_unstable_by_key(|(seq, _)| *seq); + for (seq, _) in new_sst_files.iter() { + writeln!(log, "{seq:08} NEW SST")?; + } + new_blob_files.sort_unstable_by_key(|(seq, _)| *seq); + for (seq, _) in new_blob_files.iter() { + writeln!(log, "{seq:08} NEW BLOB")?; + } + for seq in sst_seq_numbers_to_delete.iter() { + writeln!(log, "{seq:08} SST DELETED")?; + } + for seq in meta_seq_numbers_to_delete.iter() { + writeln!(log, "{seq:08} META DELETED")?; + } + for seq in blob_seq_numbers_to_delete.iter() { + writeln!(log, "{seq:08} BLOB DELETED")?; + } + } + anyhow::Ok(()) + })?; Ok(()) } @@ -650,7 +676,7 @@ impl TurboPersistence { /// files is above the given threshold. The coverage is the average number of SST files that /// need to be read to find a key. It also limits the maximum number of SST files that are /// merged at once, which is the main factor for the runtime of the compaction. - pub fn compact(&self, compact_config: &CompactConfig) -> Result<()> { + pub fn compact(&self, compact_config: &CompactConfig) -> Result { if self.read_only { bail!("Compaction is not allowed on a read only database"); } @@ -689,7 +715,8 @@ impl TurboPersistence { .context("Failed to compact database")?; } - if !new_meta_files.is_empty() { + let has_changes = !new_meta_files.is_empty(); + if has_changes { self.commit(CommitOptions { new_meta_files, new_sst_files, @@ -704,7 +731,7 @@ impl TurboPersistence { self.active_write_operation.store(false, Ordering::Release); - Ok(()) + Ok(has_changes) } /// Internal function to perform a compaction. @@ -777,7 +804,6 @@ impl TurboPersistence { let path = &self.path; let log_mutex = Mutex::new(()); - let span = Span::current(); struct PartialResultPerFamily { new_meta_file: Option<(u32, File)>, @@ -789,335 +815,350 @@ impl TurboPersistence { let mut compact_config = compact_config.clone(); let merge_jobs = sst_by_family - .iter() - .map(|ssts_with_ranges| { + .into_iter() + .enumerate() + .filter_map(|(family, ssts_with_ranges)| { if compact_config.max_merge_segment_count == 0 { - return Vec::new(); + return None; } - let merge_jobs = get_merge_segments(ssts_with_ranges, &compact_config); + let merge_jobs = get_merge_segments(&ssts_with_ranges, &compact_config); compact_config.max_merge_segment_count -= merge_jobs.len(); - merge_jobs + Some((family, ssts_with_ranges, merge_jobs)) }) .collect::>(); - let result = sst_by_family - .into_par_iter() - .zip(merge_jobs.into_par_iter()) - .with_min_len(1) - .enumerate() - .map(|(family, (ssts_with_ranges, merge_jobs))| { - let family = family as u32; - let _span = span.clone().entered(); - - if merge_jobs.is_empty() { - return Ok(PartialResultPerFamily { - new_meta_file: None, - new_sst_files: Vec::new(), - sst_seq_numbers_to_delete: Vec::new(), - blob_seq_numbers_to_delete: Vec::new(), - keys_written: 0, - }); - } - - { - let metrics = compute_metrics(&ssts_with_ranges, 0..=u64::MAX); - let guard = log_mutex.lock(); - let mut log = self.open_log()?; - writeln!( - log, - "Compaction for family {family} (coverage: {}, overlap: {}, duplication: \ - {} / {} MiB):", - metrics.coverage, - metrics.overlap, - metrics.duplication, - metrics.duplicated_size / 1024 / 1024 - )?; - for job in merge_jobs.iter() { - writeln!(log, " merge")?; - for i in job.iter() { - let seq = ssts_with_ranges[*i].seq; - let (min, max) = ssts_with_ranges[*i].range().into_inner(); - writeln!(log, " {seq:08} {min:016x}-{max:016x}")?; - } + let result = self + .parallel_scheduler + .vec_into_parallel_map_collect::<_, _, Result>>( + merge_jobs, + |(family, ssts_with_ranges, merge_jobs)| { + let family = family as u32; + + if merge_jobs.is_empty() { + return Ok(PartialResultPerFamily { + new_meta_file: None, + new_sst_files: Vec::new(), + sst_seq_numbers_to_delete: Vec::new(), + blob_seq_numbers_to_delete: Vec::new(), + keys_written: 0, + }); } - drop(guard); - } - // Later we will remove the merged files - let sst_seq_numbers_to_delete = merge_jobs - .iter() - .filter(|l| l.len() > 1) - .flat_map(|l| l.iter().copied()) - .map(|index| ssts_with_ranges[index].seq) - .collect::>(); - - // Merge SST files - let span = tracing::trace_span!("merge files"); - enum PartialMergeResult<'l> { - Merged { - new_sst_files: Vec<(u32, File, StaticSortedFileBuilderMeta<'static>)>, - blob_seq_numbers_to_delete: Vec, - keys_written: u64, - }, - Move { - seq: u32, - meta: StaticSortedFileBuilderMeta<'l>, - }, - } - let merge_result = merge_jobs - .into_par_iter() - .with_min_len(1) - .map(|indices| { - let _span = span.clone().entered(); - if indices.len() == 1 { - // If we only have one file, we can just move it - let index = indices[0]; - let meta_index = ssts_with_ranges[index].meta_index; - let index_in_meta = ssts_with_ranges[index].index_in_meta; - let meta_file = &meta_files[meta_index]; - let entry = meta_file.entry(index_in_meta); - let amqf = Cow::Borrowed(entry.raw_amqf(meta_file.amqf_data())); - let meta = StaticSortedFileBuilderMeta { - min_hash: entry.min_hash(), - max_hash: entry.max_hash(), - amqf, - key_compression_dictionary_length: entry - .key_compression_dictionary_length(), - value_compression_dictionary_length: entry - .value_compression_dictionary_length(), - block_count: entry.block_count(), - size: entry.size(), - entries: 0, - }; - return Ok(PartialMergeResult::Move { - seq: entry.sequence_number(), - meta, - }); + self.parallel_scheduler.block_in_place(|| { + let metrics = compute_metrics(&ssts_with_ranges, 0..=u64::MAX); + let guard = log_mutex.lock(); + let mut log = self.open_log()?; + writeln!( + log, + "Compaction for family {family} (coverage: {}, overlap: {}, \ + duplication: {} / {} MiB):", + metrics.coverage, + metrics.overlap, + metrics.duplication, + metrics.duplicated_size / 1024 / 1024 + )?; + for job in merge_jobs.iter() { + writeln!(log, " merge")?; + for i in job.iter() { + let seq = ssts_with_ranges[*i].seq; + let (min, max) = ssts_with_ranges[*i].range().into_inner(); + writeln!(log, " {seq:08} {min:016x}-{max:016x}")?; + } } + drop(guard); + anyhow::Ok(()) + })?; - fn create_sst_file( - entries: &[LookupEntry], - total_key_size: usize, - total_value_size: usize, - path: &Path, + // Later we will remove the merged files + let sst_seq_numbers_to_delete = merge_jobs + .iter() + .filter(|l| l.len() > 1) + .flat_map(|l| l.iter().copied()) + .map(|index| ssts_with_ranges[index].seq) + .collect::>(); + + // Merge SST files + let span = tracing::trace_span!("merge files"); + enum PartialMergeResult<'l> { + Merged { + new_sst_files: Vec<(u32, File, StaticSortedFileBuilderMeta<'static>)>, + blob_seq_numbers_to_delete: Vec, + keys_written: u64, + }, + Move { seq: u32, - ) -> Result<(u32, File, StaticSortedFileBuilderMeta<'static>)> - { - let _span = tracing::trace_span!("write merged sst file").entered(); - let (meta, file) = write_static_stored_file( - entries, - total_key_size, - total_value_size, - &path.join(format!("{seq:08}.sst")), - )?; - Ok((seq, file, meta)) - } + meta: StaticSortedFileBuilderMeta<'l>, + }, + } + let merge_result = self + .parallel_scheduler + .vec_into_parallel_map_collect::<_, _, Result>>( + merge_jobs, + |indices| { + let _span = span.clone().entered(); + if indices.len() == 1 { + // If we only have one file, we can just move it + let index = indices[0]; + let meta_index = ssts_with_ranges[index].meta_index; + let index_in_meta = ssts_with_ranges[index].index_in_meta; + let meta_file = &meta_files[meta_index]; + let entry = meta_file.entry(index_in_meta); + let amqf = Cow::Borrowed(entry.raw_amqf(meta_file.amqf_data())); + let meta = StaticSortedFileBuilderMeta { + min_hash: entry.min_hash(), + max_hash: entry.max_hash(), + amqf, + key_compression_dictionary_length: entry + .key_compression_dictionary_length(), + value_compression_dictionary_length: entry + .value_compression_dictionary_length(), + block_count: entry.block_count(), + size: entry.size(), + entries: 0, + }; + return Ok(PartialMergeResult::Move { + seq: entry.sequence_number(), + meta, + }); + } + + fn create_sst_file( + parallel_scheduler: &S, + entries: &[LookupEntry], + total_key_size: usize, + total_value_size: usize, + path: &Path, + seq: u32, + ) -> Result<(u32, File, StaticSortedFileBuilderMeta<'static>)> + { + let _span = + tracing::trace_span!("write merged sst file").entered(); + let (meta, file) = parallel_scheduler.block_in_place(|| { + write_static_stored_file( + entries, + total_key_size, + total_value_size, + &path.join(format!("{seq:08}.sst")), + ) + })?; + Ok((seq, file, meta)) + } - let mut new_sst_files = Vec::new(); - - // Iterate all SST files - let iters = indices - .iter() - .map(|&index| { - let meta_index = ssts_with_ranges[index].meta_index; - let index_in_meta = ssts_with_ranges[index].index_in_meta; - let meta = &meta_files[meta_index]; - meta.entry(index_in_meta) - .sst(meta)? - .iter(key_block_cache, value_block_cache) - }) - .collect::>>()?; - - let iter = MergeIter::new(iters.into_iter())?; - - // TODO figure out how to delete blobs when they are no longer - // referenced - let blob_seq_numbers_to_delete: Vec = Vec::new(); - - let mut keys_written = 0; - - let mut total_key_size = 0; - let mut total_value_size = 0; - let mut current: Option = None; - let mut entries = Vec::new(); - let mut last_entries = Vec::new(); - let mut last_entries_total_sizes = (0, 0); - for entry in iter { - let entry = entry?; - - // Remove duplicates - if let Some(current) = current.take() { - if current.key != entry.key { - let key_size = current.key.len(); - let value_size = current.value.size_in_sst(); - total_key_size += key_size; - total_value_size += value_size; - - if total_key_size + total_value_size - > DATA_THRESHOLD_PER_COMPACTED_FILE - || entries.len() >= MAX_ENTRIES_PER_COMPACTED_FILE - { - let (selected_total_key_size, selected_total_value_size) = - last_entries_total_sizes; - swap(&mut entries, &mut last_entries); - last_entries_total_sizes = ( - total_key_size - key_size, - total_value_size - value_size, - ); - total_key_size = key_size; - total_value_size = value_size; - - if !entries.is_empty() { - let seq = - sequence_number.fetch_add(1, Ordering::SeqCst) + 1; - - keys_written += entries.len() as u64; - new_sst_files.push(create_sst_file( - &entries, - selected_total_key_size, - selected_total_value_size, - path, - seq, - )?); - - entries.clear(); + let mut new_sst_files = Vec::new(); + + // Iterate all SST files + let iters = indices + .iter() + .map(|&index| { + let meta_index = ssts_with_ranges[index].meta_index; + let index_in_meta = ssts_with_ranges[index].index_in_meta; + let meta = &meta_files[meta_index]; + meta.entry(index_in_meta) + .sst(meta)? + .iter(key_block_cache, value_block_cache) + }) + .collect::>>()?; + + let iter = MergeIter::new(iters.into_iter())?; + + // TODO figure out how to delete blobs when they are no longer + // referenced + let blob_seq_numbers_to_delete: Vec = Vec::new(); + + let mut keys_written = 0; + + let mut total_key_size = 0; + let mut total_value_size = 0; + let mut current: Option = None; + let mut entries = Vec::new(); + let mut last_entries = Vec::new(); + let mut last_entries_total_sizes = (0, 0); + for entry in iter { + let entry = entry?; + + // Remove duplicates + if let Some(current) = current.take() { + if current.key != entry.key { + let key_size = current.key.len(); + let value_size = current.value.size_in_sst(); + total_key_size += key_size; + total_value_size += value_size; + + if total_key_size + total_value_size + > DATA_THRESHOLD_PER_COMPACTED_FILE + || entries.len() >= MAX_ENTRIES_PER_COMPACTED_FILE + { + let ( + selected_total_key_size, + selected_total_value_size, + ) = last_entries_total_sizes; + swap(&mut entries, &mut last_entries); + last_entries_total_sizes = ( + total_key_size - key_size, + total_value_size - value_size, + ); + total_key_size = key_size; + total_value_size = value_size; + + if !entries.is_empty() { + let seq = sequence_number + .fetch_add(1, Ordering::SeqCst) + + 1; + + keys_written += entries.len() as u64; + new_sst_files.push(create_sst_file( + &self.parallel_scheduler, + &entries, + selected_total_key_size, + selected_total_value_size, + path, + seq, + )?); + + entries.clear(); + } + } + + entries.push(current); + } else { + // Override value } } + current = Some(entry); + } + if let Some(entry) = current { + total_key_size += entry.key.len(); + total_value_size += entry.value.size_in_sst(); + entries.push(entry); + } - entries.push(current); - } else { - // Override value + // If we have one set of entries left, write them to a new SST file + if last_entries.is_empty() && !entries.is_empty() { + let seq = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; + + keys_written += entries.len() as u64; + new_sst_files.push(create_sst_file( + &self.parallel_scheduler, + &entries, + total_key_size, + total_value_size, + path, + seq, + )?); + } else + // If we have two sets of entries left, merge them and + // split it into two SST files, to avoid having a + // single SST file that is very small. + if !last_entries.is_empty() { + last_entries.append(&mut entries); + + last_entries_total_sizes.0 += total_key_size; + last_entries_total_sizes.1 += total_value_size; + + let (part1, part2) = + last_entries.split_at(last_entries.len() / 2); + + let seq1 = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; + let seq2 = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; + + keys_written += part1.len() as u64; + new_sst_files.push(create_sst_file( + &self.parallel_scheduler, + part1, + // We don't know the exact sizes so we estimate them + last_entries_total_sizes.0 / 2, + last_entries_total_sizes.1 / 2, + path, + seq1, + )?); + + keys_written += part2.len() as u64; + new_sst_files.push(create_sst_file( + &self.parallel_scheduler, + part2, + last_entries_total_sizes.0 / 2, + last_entries_total_sizes.1 / 2, + path, + seq2, + )?); } + Ok(PartialMergeResult::Merged { + new_sst_files, + blob_seq_numbers_to_delete, + keys_written, + }) + }, + ) + .with_context(|| { + format!("Failed to merge database files for family {family}") + })?; + + let Some((sst_files_len, blob_delete_len)) = merge_result + .iter() + .map(|r| { + if let PartialMergeResult::Merged { + new_sst_files, + blob_seq_numbers_to_delete, + keys_written: _, + } = r + { + (new_sst_files.len(), blob_seq_numbers_to_delete.len()) + } else { + (0, 0) } - current = Some(entry); - } - if let Some(entry) = current { - total_key_size += entry.key.len(); - total_value_size += entry.value.size_in_sst(); - entries.push(entry); - } - - // If we have one set of entries left, write them to a new SST file - if last_entries.is_empty() && !entries.is_empty() { - let seq = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; - - keys_written += entries.len() as u64; - new_sst_files.push(create_sst_file( - &entries, - total_key_size, - total_value_size, - path, - seq, - )?); - } else - // If we have two sets of entries left, merge them and - // split it into two SST files, to avoid having a - // single SST file that is very small. - if !last_entries.is_empty() { - last_entries.append(&mut entries); - - last_entries_total_sizes.0 += total_key_size; - last_entries_total_sizes.1 += total_value_size; - - let (part1, part2) = last_entries.split_at(last_entries.len() / 2); - - let seq1 = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; - let seq2 = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; - - keys_written += part1.len() as u64; - new_sst_files.push(create_sst_file( - part1, - // We don't know the exact sizes so we estimate them - last_entries_total_sizes.0 / 2, - last_entries_total_sizes.1 / 2, - path, - seq1, - )?); - - keys_written += part2.len() as u64; - new_sst_files.push(create_sst_file( - part2, - last_entries_total_sizes.0 / 2, - last_entries_total_sizes.1 / 2, - path, - seq2, - )?); - } - Ok(PartialMergeResult::Merged { - new_sst_files, - blob_seq_numbers_to_delete, - keys_written, }) - }) - .collect::>>() - .with_context(|| { - format!("Failed to merge database files for family {family}") - })?; - - let Some((sst_files_len, blob_delete_len)) = merge_result - .iter() - .map(|r| { - if let PartialMergeResult::Merged { - new_sst_files, - blob_seq_numbers_to_delete, - keys_written: _, - } = r - { - (new_sst_files.len(), blob_seq_numbers_to_delete.len()) - } else { - (0, 0) - } - }) - .reduce(|(a1, a2), (b1, b2)| (a1 + b1, a2 + b2)) - else { - unreachable!() - }; - - let mut new_sst_files = Vec::with_capacity(sst_files_len); - let mut blob_seq_numbers_to_delete = Vec::with_capacity(blob_delete_len); - - let mut meta_file_builder = MetaFileBuilder::new(family); - - let mut keys_written = 0; - for result in merge_result { - match result { - PartialMergeResult::Merged { - new_sst_files: merged_new_sst_files, - blob_seq_numbers_to_delete: merged_blob_seq_numbers_to_delete, - keys_written: merged_keys_written, - } => { - for (seq, file, meta) in merged_new_sst_files { + .reduce(|(a1, a2), (b1, b2)| (a1 + b1, a2 + b2)) + else { + unreachable!() + }; + + let mut new_sst_files = Vec::with_capacity(sst_files_len); + let mut blob_seq_numbers_to_delete = Vec::with_capacity(blob_delete_len); + + let mut meta_file_builder = MetaFileBuilder::new(family); + + let mut keys_written = 0; + for result in merge_result { + match result { + PartialMergeResult::Merged { + new_sst_files: merged_new_sst_files, + blob_seq_numbers_to_delete: merged_blob_seq_numbers_to_delete, + keys_written: merged_keys_written, + } => { + for (seq, file, meta) in merged_new_sst_files { + meta_file_builder.add(seq, meta); + new_sst_files.push((seq, file)); + } + blob_seq_numbers_to_delete + .extend(merged_blob_seq_numbers_to_delete); + keys_written += merged_keys_written; + } + PartialMergeResult::Move { seq, meta } => { meta_file_builder.add(seq, meta); - new_sst_files.push((seq, file)); } - blob_seq_numbers_to_delete.extend(merged_blob_seq_numbers_to_delete); - keys_written += merged_keys_written; - } - PartialMergeResult::Move { seq, meta } => { - meta_file_builder.add(seq, meta); } } - } - for &seq in sst_seq_numbers_to_delete.iter() { - meta_file_builder.add_obsolete_sst_file(seq); - } + for &seq in sst_seq_numbers_to_delete.iter() { + meta_file_builder.add_obsolete_sst_file(seq); + } - let seq = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; - let meta_file = { - let _span = tracing::trace_span!("write meta file").entered(); - meta_file_builder.write(&self.path, seq)? - }; - - Ok(PartialResultPerFamily { - new_meta_file: Some((seq, meta_file)), - new_sst_files, - sst_seq_numbers_to_delete, - blob_seq_numbers_to_delete, - keys_written, - }) - }) - .collect::>>()?; + let seq = sequence_number.fetch_add(1, Ordering::SeqCst) + 1; + let meta_file = { + let _span = tracing::trace_span!("write meta file").entered(); + self.parallel_scheduler + .block_in_place(|| meta_file_builder.write(&self.path, seq))? + }; + + Ok(PartialResultPerFamily { + new_meta_file: Some((seq, meta_file)), + new_sst_files, + sst_seq_numbers_to_delete, + blob_seq_numbers_to_delete, + keys_written, + }) + }, + )?; for PartialResultPerFamily { new_meta_file: inner_new_meta_file, diff --git a/turbopack/crates/turbo-persistence/src/lib.rs b/turbopack/crates/turbo-persistence/src/lib.rs index 70c87199f396c..f944e4b4d1202 100644 --- a/turbopack/crates/turbo-persistence/src/lib.rs +++ b/turbopack/crates/turbo-persistence/src/lib.rs @@ -13,19 +13,21 @@ mod db; mod key; mod lookup_entry; mod merge_iter; +mod meta_file; +mod meta_file_builder; +mod parallel_scheduler; +mod sst_filter; mod static_sorted_file; mod static_sorted_file_builder; +mod value_buf; mod write_batch; -mod meta_file; -mod meta_file_builder; -mod sst_filter; #[cfg(test)] mod tests; -mod value_buf; pub use arc_slice::ArcSlice; pub use db::{CompactConfig, MetaFileEntryInfo, MetaFileInfo, TurboPersistence}; pub use key::{KeyBase, QueryKey, StoreKey}; +pub use parallel_scheduler::{ParallelScheduler, SerialScheduler}; pub use value_buf::ValueBuffer; pub use write_batch::WriteBatch; diff --git a/turbopack/crates/turbo-persistence/src/parallel_scheduler.rs b/turbopack/crates/turbo-persistence/src/parallel_scheduler.rs new file mode 100644 index 0000000000000..b2415b54aa423 --- /dev/null +++ b/turbopack/crates/turbo-persistence/src/parallel_scheduler.rs @@ -0,0 +1,148 @@ +pub trait ParallelScheduler: Clone + Sync + Send { + fn block_in_place(&self, f: impl FnOnce() -> R + Send) -> R + where + R: Send; + + fn parallel_for_each(&self, items: &[T], f: impl Fn(&T) + Send + Sync) + where + T: Sync; + + fn try_parallel_for_each<'l, T, E>( + &self, + items: &'l [T], + f: impl (Fn(&'l T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Sync, + E: Send + 'static; + + fn try_parallel_for_each_mut<'l, T, E>( + &self, + items: &'l mut [T], + f: impl (Fn(&'l mut T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Send + Sync, + E: Send + 'static; + + fn try_vec_into_parallel_for_each( + &self, + items: Vec, + f: impl (Fn(T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Send + Sync, + E: Send + 'static; + + fn parallel_map_collect<'l, T, I, R>( + &self, + items: &'l [T], + f: impl Fn(&'l T) -> I + Send + Sync, + ) -> R + where + T: Sync, + I: Send + Sync + 'l, + R: FromIterator; + + fn vec_into_parallel_map_collect( + &self, + items: Vec, + f: impl Fn(T) -> I + Send + Sync, + ) -> R + where + T: Send + Sync, + I: Send + Sync, + R: FromIterator; +} + +#[derive(Clone, Copy, Default)] +pub struct SerialScheduler; + +impl ParallelScheduler for SerialScheduler { + fn block_in_place(&self, f: impl FnOnce() -> R + Send) -> R + where + R: Send, + { + f() + } + + fn parallel_for_each(&self, items: &[T], f: impl Fn(&T) + Send + Sync) + where + T: Sync, + { + for item in items { + f(item); + } + } + + fn try_parallel_for_each<'l, T, E>( + &self, + items: &'l [T], + f: impl (Fn(&'l T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Sync, + E: Send, + { + for item in items { + f(item)?; + } + Ok(()) + } + + fn try_parallel_for_each_mut<'l, T, E>( + &self, + items: &'l mut [T], + f: impl (Fn(&'l mut T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Sync, + E: Send, + { + for item in items { + f(item)?; + } + Ok(()) + } + + fn try_vec_into_parallel_for_each( + &self, + items: Vec, + f: impl (Fn(T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Sync, + E: Send, + { + for item in items { + f(item)?; + } + Ok(()) + } + + fn parallel_map_collect<'l, T, I, R>( + &self, + items: &'l [T], + f: impl Fn(&'l T) -> I + Send + Sync, + ) -> R + where + T: Sync, + I: Send + Sync + 'l, + R: FromIterator, + { + items.iter().map(f).collect() + } + + fn vec_into_parallel_map_collect( + &self, + items: Vec, + f: impl Fn(T) -> I + Send + Sync, + ) -> R + where + T: Send + Sync, + I: Send + Sync, + R: FromIterator, + { + items.into_iter().map(f).collect() + } +} diff --git a/turbopack/crates/turbo-persistence/src/tests.rs b/turbopack/crates/turbo-persistence/src/tests.rs index 5c123611d8759..dc9e7edb36374 100644 --- a/turbopack/crates/turbo-persistence/src/tests.rs +++ b/turbopack/crates/turbo-persistence/src/tests.rs @@ -6,28 +6,123 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use crate::{ constants::MAX_MEDIUM_VALUE_SIZE, db::{CompactConfig, TurboPersistence}, + parallel_scheduler::ParallelScheduler, write_batch::WriteBatch, }; +#[derive(Clone, Copy)] +struct RayonParallelScheduler; + +impl ParallelScheduler for RayonParallelScheduler { + fn block_in_place(&self, f: impl FnOnce() -> R + Send) -> R + where + R: Send, + { + f() + } + + fn parallel_for_each(&self, items: &[T], f: impl Fn(&T) + Send + Sync) + where + T: Sync, + { + items.into_par_iter().for_each(f); + } + + fn try_parallel_for_each<'l, T, E>( + &self, + items: &'l [T], + f: impl (Fn(&'l T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Sync, + E: Send, + { + items.into_par_iter().try_for_each(f) + } + + fn try_parallel_for_each_mut<'l, T, E>( + &self, + items: &'l mut [T], + f: impl (Fn(&'l mut T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Send + Sync, + E: Send, + { + items.into_par_iter().try_for_each(f) + } + + fn try_vec_into_parallel_for_each( + &self, + items: Vec, + f: impl (Fn(T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Send + Sync, + E: Send, + { + items.into_par_iter().try_for_each(f) + } + + fn parallel_map_collect<'l, T, I, R>( + &self, + items: &'l [T], + f: impl Fn(&'l T) -> I + Send + Sync, + ) -> R + where + T: Sync, + I: Send + Sync, + R: FromIterator, + { + items + .into_par_iter() + .map(f) + .collect_vec_list() + .into_iter() + .flatten() + .collect() + } + + fn vec_into_parallel_map_collect( + &self, + items: Vec, + f: impl Fn(T) -> I + Send + Sync, + ) -> R + where + T: Send + Sync, + I: Send + Sync, + R: FromIterator, + { + items + .into_par_iter() + .map(f) + .collect_vec_list() + .into_iter() + .flatten() + .collect() + } +} + #[test] fn full_cycle() -> Result<()> { let mut test_cases = Vec::new(); type TestCases = Vec<( &'static str, - Box, 16>) -> Result<()>>, - Box Result<()>>, + Box, RayonParallelScheduler, 16>) -> Result<()>>, + Box) -> Result<()>>, )>; fn test_case( test_cases: &mut TestCases, name: &'static str, - write: impl Fn(&mut WriteBatch, 16>) -> Result<()> + 'static, - read: impl Fn(&TurboPersistence) -> Result<()> + 'static, + write: impl Fn(&mut WriteBatch, RayonParallelScheduler, 16>) -> Result<()> + 'static, + read: impl Fn(&TurboPersistence) -> Result<()> + 'static, ) { test_cases.push(( name, - Box::new(write) as Box, 16>) -> Result<()>>, - Box::new(read) as Box Result<()>>, + Box::new(write) + as Box, RayonParallelScheduler, 16>) -> Result<()>>, + Box::new(read) as Box) -> Result<()>>, )); } @@ -215,7 +310,10 @@ fn full_cycle() -> Result<()> { { let start = Instant::now(); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let mut batch = db.write_batch()?; write(&mut batch)?; db.commit_write_batch(batch)?; @@ -231,7 +329,10 @@ fn full_cycle() -> Result<()> { } { let start = Instant::now(); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; println!("{name} restore time: {:?}", start.elapsed()); let start = Instant::now(); read(&db)?; @@ -257,7 +358,10 @@ fn full_cycle() -> Result<()> { } { let start = Instant::now(); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; println!("{name} restore time after compact: {:?}", start.elapsed()); let start = Instant::now(); read(&db)?; @@ -291,7 +395,10 @@ fn full_cycle() -> Result<()> { { let start = Instant::now(); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let mut batch = db.write_batch()?; for (_, write, _) in test_cases.iter() { write(&mut batch)?; @@ -311,7 +418,10 @@ fn full_cycle() -> Result<()> { } { let start = Instant::now(); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; println!("All restore time: {:?}", start.elapsed()); for (name, _, read) in test_cases.iter() { let start = Instant::now(); @@ -343,7 +453,10 @@ fn full_cycle() -> Result<()> { { let start = Instant::now(); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; println!("All restore time after compact: {:?}", start.elapsed()); for (name, _, read) in test_cases.iter() { @@ -383,13 +496,17 @@ fn persist_changes() -> Result<()> { let path = tempdir.path(); const READ_COUNT: u32 = 2_000; // we'll read every 10th value, so writes are 10x this value - fn put(b: &WriteBatch<(u8, [u8; 4]), 1>, key: u8, value: u8) -> Result<()> { + fn put( + b: &WriteBatch<(u8, [u8; 4]), RayonParallelScheduler, 1>, + key: u8, + value: u8, + ) -> Result<()> { for i in 0..(READ_COUNT * 10) { b.put(0, (key, i.to_be_bytes()), vec![value].into())?; } Ok(()) } - fn check(db: &TurboPersistence, key: u8, value: u8) -> Result<()> { + fn check(db: &TurboPersistence, key: u8, value: u8) -> Result<()> { for i in 0..READ_COUNT { // read every 10th item let i = i * 10; @@ -402,7 +519,10 @@ fn persist_changes() -> Result<()> { } { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let b = db.write_batch::<_, 1>()?; put(&b, 1, 11)?; put(&b, 2, 21)?; @@ -418,7 +538,10 @@ fn persist_changes() -> Result<()> { println!("---"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let b = db.write_batch::<_, 1>()?; put(&b, 1, 12)?; put(&b, 2, 22)?; @@ -432,7 +555,10 @@ fn persist_changes() -> Result<()> { } { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let b = db.write_batch::<_, 1>()?; put(&b, 1, 13)?; db.commit_write_batch(b)?; @@ -446,7 +572,10 @@ fn persist_changes() -> Result<()> { println!("---"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; check(&db, 1, 13)?; check(&db, 2, 22)?; @@ -457,7 +586,10 @@ fn persist_changes() -> Result<()> { println!("---"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; db.compact(&CompactConfig { optimal_merge_count: 4, @@ -475,7 +607,10 @@ fn persist_changes() -> Result<()> { println!("---"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; check(&db, 1, 13)?; check(&db, 2, 22)?; @@ -493,13 +628,17 @@ fn partial_compaction() -> Result<()> { let path = tempdir.path(); const READ_COUNT: u32 = 2_000; // we'll read every 10th value, so writes are 10x this value - fn put(b: &WriteBatch<(u8, [u8; 4]), 1>, key: u8, value: u8) -> Result<()> { + fn put( + b: &WriteBatch<(u8, [u8; 4]), RayonParallelScheduler, 1>, + key: u8, + value: u8, + ) -> Result<()> { for i in 0..(READ_COUNT * 10) { b.put(0, (key, i.to_be_bytes()), vec![value].into())?; } Ok(()) } - fn check(db: &TurboPersistence, key: u8, value: u8) -> Result<()> { + fn check(db: &TurboPersistence, key: u8, value: u8) -> Result<()> { for i in 0..READ_COUNT { // read every 10th item let i = i * 10; @@ -516,7 +655,10 @@ fn partial_compaction() -> Result<()> { println!("--- Iteration {i} ---"); println!("Add more entries"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let b = db.write_batch::<_, 1>()?; put(&b, i, i)?; put(&b, i + 1, i)?; @@ -535,7 +677,10 @@ fn partial_compaction() -> Result<()> { println!("Compaction"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; db.compact(&CompactConfig { optimal_merge_count: 4, @@ -556,7 +701,10 @@ fn partial_compaction() -> Result<()> { println!("Restore check"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; for j in 0..i { check(&db, j, j)?; @@ -580,7 +728,11 @@ fn merge_file_removal() -> Result<()> { let _ = fs::remove_dir_all(path); const READ_COUNT: u32 = 2_000; // we'll read every 10th value, so writes are 10x this value - fn put(b: &WriteBatch<(u8, [u8; 4]), 1>, key: u8, value: u32) -> Result<()> { + fn put( + b: &WriteBatch<(u8, [u8; 4]), RayonParallelScheduler, 1>, + key: u8, + value: u32, + ) -> Result<()> { for i in 0..(READ_COUNT * 10) { b.put( 0, @@ -590,7 +742,7 @@ fn merge_file_removal() -> Result<()> { } Ok(()) } - fn check(db: &TurboPersistence, key: u8, value: u32) -> Result<()> { + fn check(db: &TurboPersistence, key: u8, value: u32) -> Result<()> { for i in 0..READ_COUNT { // read every 10th item let i = i * 10; @@ -608,7 +760,10 @@ fn merge_file_removal() -> Result<()> { { println!("--- Init ---"); - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let b = db.write_batch::<_, 1>()?; for j in 0..=255 { put(&b, j, 0)?; @@ -624,7 +779,10 @@ fn merge_file_removal() -> Result<()> { let i = i * 37; println!("Add more entries"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; let b = db.write_batch::<_, 1>()?; for j in iter_bits(i) { println!("Put {j} = {i}"); @@ -642,7 +800,10 @@ fn merge_file_removal() -> Result<()> { println!("Compaction"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; db.compact(&CompactConfig { optimal_merge_count: 4, @@ -660,7 +821,10 @@ fn merge_file_removal() -> Result<()> { println!("Restore check"); { - let db = TurboPersistence::open(path.to_path_buf())?; + let db = TurboPersistence::open_with_parallel_scheduler( + path.to_path_buf(), + RayonParallelScheduler, + )?; for j in 0..32 { check(&db, j, expected_values[j as usize])?; diff --git a/turbopack/crates/turbo-persistence/src/write_batch.rs b/turbopack/crates/turbo-persistence/src/write_batch.rs index 490cf38e88a90..537d6f35aa878 100644 --- a/turbopack/crates/turbo-persistence/src/write_batch.rs +++ b/turbopack/crates/turbo-persistence/src/write_batch.rs @@ -9,15 +9,11 @@ use std::{ use anyhow::{Context, Result}; use byteorder::{BE, WriteBytesExt}; +use either::Either; use lzzzz::lz4::{self, ACC_LEVEL_DEFAULT}; use parking_lot::Mutex; -use rayon::{ - iter::{Either, IndexedParallelIterator, IntoParallelIterator, ParallelIterator}, - scope, -}; use smallvec::SmallVec; use thread_local::ThreadLocal; -use tracing::Span; use crate::{ ValueBuffer, @@ -26,6 +22,7 @@ use crate::{ constants::{MAX_MEDIUM_VALUE_SIZE, THREAD_LOCAL_SIZE_SHIFT}, key::StoreKey, meta_file_builder::MetaFileBuilder, + parallel_scheduler::ParallelScheduler, static_sorted_file_builder::{StaticSortedFileBuilderMeta, write_static_stored_file}, }; @@ -68,7 +65,9 @@ enum GlobalCollectorState { } /// A write batch. -pub struct WriteBatch { +pub struct WriteBatch { + /// Parallel scheduler + parallel_scheduler: S, /// The database path db_path: PathBuf, /// The current sequence number counter. Increased for every new SST file or blob file. @@ -84,13 +83,16 @@ pub struct WriteBatch { new_sst_files: Mutex>, } -impl WriteBatch { +impl + WriteBatch +{ /// Creates a new write batch for a database. - pub(crate) fn new(path: PathBuf, current: u32) -> Self { + pub(crate) fn new(path: PathBuf, current: u32, parallel_scheduler: S) -> Self { const { assert!(FAMILIES <= usize_from_u32(u32::MAX)); }; Self { + parallel_scheduler, db_path: path, current_sequence_number: AtomicU32::new(current), thread_locals: ThreadLocal::new(), @@ -223,13 +225,12 @@ impl WriteBatch { } } - let span = Span::current(); - collectors.into_par_iter().try_for_each(|mut collector| { - let _span = span.clone().entered(); - self.flush_thread_local_collector(family, &mut collector)?; - drop(collector); - anyhow::Ok(()) - })?; + self.parallel_scheduler + .try_vec_into_parallel_for_each(collectors, |mut collector| { + self.flush_thread_local_collector(family, &mut collector)?; + drop(collector); + anyhow::Ok(()) + })?; // Now we flush the global collector(s). let mut collector_state = self.collectors[usize_from_u32(family)].lock(); @@ -242,22 +243,22 @@ impl WriteBatch { } } GlobalCollectorState::Sharded(_) => { - let GlobalCollectorState::Sharded(shards) = replace( + let GlobalCollectorState::Sharded(mut shards) = replace( &mut *collector_state, GlobalCollectorState::Unsharded(Collector::new()), ) else { unreachable!(); }; - shards.into_par_iter().try_for_each(|mut collector| { - let _span = span.clone().entered(); - if !collector.is_empty() { - let sst = self.create_sst_file(family, collector.sorted())?; - collector.clear(); - self.new_sst_files.lock().push(sst); - drop(collector); - } - anyhow::Ok(()) - })?; + self.parallel_scheduler + .try_parallel_for_each_mut(&mut shards, |collector| { + if !collector.is_empty() { + let sst = self.create_sst_file(family, collector.sorted())?; + collector.clear(); + self.new_sst_files.lock().push(sst); + collector.drop_contents(); + } + anyhow::Ok(()) + })?; } } @@ -269,10 +270,9 @@ impl WriteBatch { #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn finish(&mut self) -> Result { let mut new_blob_files = Vec::new(); - let shared_error = Mutex::new(Ok(())); // First, we flush all thread local collectors to the global collectors. - scope(|scope| { + { let _span = tracing::trace_span!("flush thread local collectors").entered(); let mut collectors = [const { Vec::new() }; FAMILIES]; for cell in self.thread_locals.iter_mut() { @@ -286,23 +286,24 @@ impl WriteBatch { } } } - for (family, thread_local_collectors) in collectors.into_iter().enumerate() { - for mut collector in thread_local_collectors { - let this = &self; - let shared_error = &shared_error; - let span = Span::current(); - scope.spawn(move |_| { - let _span = span.entered(); - if let Err(err) = - this.flush_thread_local_collector(family as u32, &mut collector) - { - *shared_error.lock() = Err(err); - } - drop(collector); - }); - } - } - }); + let to_flush = collectors + .into_iter() + .enumerate() + .flat_map(|(family, collector)| { + collector + .into_iter() + .map(move |collector| (family as u32, collector)) + }) + .collect::>(); + self.parallel_scheduler.try_vec_into_parallel_for_each( + to_flush, + |(family, mut collector)| { + self.flush_thread_local_collector(family, &mut collector)?; + drop(collector); + anyhow::Ok(()) + }, + )?; + } let _span = tracing::trace_span!("flush collectors").entered(); @@ -313,25 +314,24 @@ impl WriteBatch { let new_collectors = [(); FAMILIES].map(|_| Mutex::new(GlobalCollectorState::Unsharded(Collector::new()))); let collectors = replace(&mut self.collectors, new_collectors); - let span = Span::current(); - collectors - .into_par_iter() + let collectors = collectors + .into_iter() .enumerate() .flat_map(|(family, state)| { let collector = state.into_inner(); match collector { GlobalCollectorState::Unsharded(collector) => { - Either::Left([(family, collector)].into_par_iter()) + Either::Left([(family, collector)].into_iter()) + } + GlobalCollectorState::Sharded(shards) => { + Either::Right(shards.into_iter().map(move |collector| (family, collector))) } - GlobalCollectorState::Sharded(shards) => Either::Right( - shards - .into_par_iter() - .map(move |collector| (family, collector)), - ), } }) - .try_for_each(|(family, mut collector)| { - let _span = span.clone().entered(); + .collect::>(); + self.parallel_scheduler.try_vec_into_parallel_for_each( + collectors, + |(family, mut collector)| { let family = family as u32; if !collector.is_empty() { let sst = self.create_sst_file(family, collector.sorted())?; @@ -340,33 +340,37 @@ impl WriteBatch { shared_new_sst_files.lock().push(sst); } anyhow::Ok(()) - })?; - - shared_error.into_inner()?; + }, + )?; // Not we need to write the new meta files. let new_meta_collectors = [(); FAMILIES].map(|_| Mutex::new(Vec::new())); let meta_collectors = replace(&mut self.meta_collectors, new_meta_collectors); let keys_written = AtomicU64::new(0); - let new_meta_files = meta_collectors - .into_par_iter() + let file_to_write = meta_collectors + .into_iter() .map(|mutex| mutex.into_inner()) .enumerate() .filter(|(_, sst_files)| !sst_files.is_empty()) - .map(|(family, sst_files)| { - let family = family as u32; - let mut entries = 0; - let mut builder = MetaFileBuilder::new(family); - for (seq, sst) in sst_files { - entries += sst.entries; - builder.add(seq, sst); - } - keys_written.fetch_add(entries, Ordering::Relaxed); - let seq = self.current_sequence_number.fetch_add(1, Ordering::SeqCst) + 1; - let file = builder.write(&self.db_path, seq)?; - Ok((seq, file)) - }) - .collect::>>()?; + .collect::>(); + let new_meta_files = self + .parallel_scheduler + .vec_into_parallel_map_collect::<_, _, Result>>( + file_to_write, + |(family, sst_files)| { + let family = family as u32; + let mut entries = 0; + let mut builder = MetaFileBuilder::new(family); + for (seq, sst) in sst_files { + entries += sst.entries; + builder.add(seq, sst); + } + keys_written.fetch_add(entries, Ordering::Relaxed); + let seq = self.current_sequence_number.fetch_add(1, Ordering::SeqCst) + 1; + let file = builder.write(&self.db_path, seq)?; + Ok((seq, file)) + }, + )?; // Finally we return the new files and sequence number. let seq = self.current_sequence_number.load(Ordering::SeqCst); @@ -409,9 +413,12 @@ impl WriteBatch { let seq = self.current_sequence_number.fetch_add(1, Ordering::SeqCst) + 1; let path = self.db_path.join(format!("{seq:08}.sst")); - let (meta, file) = - write_static_stored_file(entries, total_key_size, total_value_size, &path) - .with_context(|| format!("Unable to write SST file {seq:08}.sst"))?; + let (meta, file) = self + .parallel_scheduler + .block_in_place(|| { + write_static_stored_file(entries, total_key_size, total_value_size, &path) + }) + .with_context(|| format!("Unable to write SST file {seq:08}.sst"))?; #[cfg(feature = "verify_sst_content")] { diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index eb3ee57b72093..3554ce1f31b20 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -40,7 +40,6 @@ once_cell = { workspace = true } parking_lot = { workspace = true } pot = "3.0.0" rand = { workspace = true } -rayon = { workspace = true } ringmap = { workspace = true, features = ["serde"] } rustc-hash = { workspace = true } serde = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index addfc48bdca8f..92d61845918c2 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -1267,7 +1267,6 @@ impl TurboTasksBackendInner { return task_id; } - self.track_cache_miss(&task_type); let tx = self .should_restore() .then(|| self.backing_storage.start_read_transaction()) @@ -1279,6 +1278,7 @@ impl TurboTasksBackendInner { .forward_lookup_task_cache(tx.as_ref(), &task_type) .expect("Failed to lookup task id") } { + self.track_cache_hit(&task_type); let _ = self.task_cache.try_insert(Arc::new(task_type), task_id); task_id } else { @@ -1287,12 +1287,14 @@ impl TurboTasksBackendInner { let task_id = if let Err(existing_task_id) = self.task_cache.try_insert(task_type.clone(), task_id) { + self.track_cache_hit(&task_type); // Safety: We just created the id and failed to insert it. unsafe { self.persisted_task_id_factory.reuse(task_id); } existing_task_id } else { + self.track_cache_miss(&task_type); task_id }; if let Some(log) = &self.persisted_task_cache_log { @@ -1327,10 +1329,10 @@ impl TurboTasksBackendInner { return task_id; } - self.track_cache_miss(&task_type); let task_type = Arc::new(task_type); let task_id = self.transient_task_id_factory.get(); - if let Err(existing_task_id) = self.task_cache.try_insert(task_type, task_id) { + if let Err(existing_task_id) = self.task_cache.try_insert(task_type.clone(), task_id) { + self.track_cache_hit(&task_type); // Safety: We just created the id and failed to insert it. unsafe { self.transient_task_id_factory.reuse(task_id); @@ -1339,6 +1341,7 @@ impl TurboTasksBackendInner { return existing_task_id; } + self.track_cache_miss(&task_type); self.connect_child(parent_task, task_id, turbo_tasks); task_id @@ -2124,8 +2127,8 @@ impl TurboTasksBackendInner { let last_snapshot = self.last_snapshot.load(Ordering::Relaxed); let mut last_snapshot = self.start_time + Duration::from_millis(last_snapshot); loop { - const FIRST_SNAPSHOT_WAIT: Duration = Duration::from_secs(60); - const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(30); + const FIRST_SNAPSHOT_WAIT: Duration = Duration::from_secs(300); + const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(120); const IDLE_TIMEOUT: Duration = Duration::from_secs(2); let time = if id == BACKEND_JOB_INITIAL_SNAPSHOT { @@ -2172,7 +2175,7 @@ impl TurboTasksBackendInner { } let this = self.clone(); - let snapshot = turbo_tasks::spawn_blocking(move || this.snapshot()).await; + let snapshot = this.snapshot(); if let Some((snapshot_start, new_data)) = snapshot { last_snapshot = snapshot_start; if new_data { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs index ceab626298854..ba83f1da5a209 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs @@ -6,9 +6,8 @@ use std::{ }; use bitfield::bitfield; -use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; use smallvec::SmallVec; -use turbo_tasks::{FxDashMap, TaskId}; +use turbo_tasks::{FxDashMap, TaskId, parallel}; use crate::{ backend::dynamic_storage::DynamicStorage, @@ -664,48 +663,43 @@ impl Storage { // The number of shards is much larger than the number of threads, so the effect of the // locks held is negligible. - self.modified - .shards() - .par_iter() - .with_max_len(1) - .map(|shard| { - let mut direct_snapshots: Vec<(TaskId, Box)> = Vec::new(); - let mut modified: SmallVec<[TaskId; 4]> = SmallVec::new(); - { - // Take the snapshots from the modified map - let guard = shard.write(); - // Safety: guard must outlive the iterator. - for bucket in unsafe { guard.iter() } { - // Safety: the guard guarantees that the bucket is not removed and the ptr - // is valid. - let (key, shared_value) = unsafe { bucket.as_mut() }; - let modified_state = shared_value.get_mut(); - match modified_state { - ModifiedState::Modified => { - modified.push(*key); - } - ModifiedState::Snapshot(snapshot) => { - if let Some(snapshot) = snapshot.take() { - direct_snapshots.push((*key, snapshot)); - } + parallel::map_collect::<_, _, Vec<_>>(self.modified.shards(), |shard| { + let mut direct_snapshots: Vec<(TaskId, Box)> = Vec::new(); + let mut modified: SmallVec<[TaskId; 4]> = SmallVec::new(); + { + // Take the snapshots from the modified map + let guard = shard.write(); + // Safety: guard must outlive the iterator. + for bucket in unsafe { guard.iter() } { + // Safety: the guard guarantees that the bucket is not removed and the ptr + // is valid. + let (key, shared_value) = unsafe { bucket.as_mut() }; + let modified_state = shared_value.get_mut(); + match modified_state { + ModifiedState::Modified => { + modified.push(*key); + } + ModifiedState::Snapshot(snapshot) => { + if let Some(snapshot) = snapshot.take() { + direct_snapshots.push((*key, snapshot)); } } } - // Safety: guard must outlive the iterator. - drop(guard); } + // Safety: guard must outlive the iterator. + drop(guard); + } - SnapshotShard { - direct_snapshots, - modified, - storage: self, - guard: Some(guard.clone()), - process, - preprocess, - process_snapshot, - } - }) - .collect::>() + SnapshotShard { + direct_snapshots, + modified, + storage: self, + guard: Some(guard.clone()), + process, + preprocess, + process_snapshot, + } + }) } /// Start snapshot mode. diff --git a/turbopack/crates/turbo-tasks-backend/src/database/turbo.rs b/turbopack/crates/turbo-tasks-backend/src/database/turbo/mod.rs similarity index 73% rename from turbopack/crates/turbo-tasks-backend/src/database/turbo.rs rename to turbopack/crates/turbo-tasks-backend/src/database/turbo/mod.rs index 82e972f268d66..146a5f5e56952 100644 --- a/turbopack/crates/turbo-tasks-backend/src/database/turbo.rs +++ b/turbopack/crates/turbo-tasks-backend/src/database/turbo/mod.rs @@ -1,34 +1,33 @@ -use std::{ - cmp::max, - path::PathBuf, - sync::Arc, - thread::{JoinHandle, available_parallelism, spawn}, -}; +use std::{cmp::max, path::PathBuf, sync::Arc, thread::available_parallelism, time::Instant}; -use anyhow::Result; +use anyhow::{Ok, Result}; use parking_lot::Mutex; use turbo_persistence::{ ArcSlice, CompactConfig, KeyBase, StoreKey, TurboPersistence, ValueBuffer, }; +use turbo_tasks::{JoinHandle, block_for_future, message_queue::TimingEvent, spawn, turbo_tasks}; use crate::database::{ key_value_database::{KeySpace, KeyValueDatabase}, + turbo::parallel_scheduler::TurboTasksParallelScheduler, write_batch::{BaseWriteBatch, ConcurrentWriteBatch, WriteBatch, WriteBuffer}, }; +mod parallel_scheduler; + const MB: u64 = 1024 * 1024; const COMPACT_CONFIG: CompactConfig = CompactConfig { min_merge_count: 3, optimal_merge_count: 8, max_merge_count: 64, max_merge_bytes: 512 * MB, - min_merge_duplication_bytes: MB, + min_merge_duplication_bytes: 50 * MB, optimal_merge_duplication_bytes: 100 * MB, max_merge_segment_count: 16, }; pub struct TurboKeyValueDatabase { - db: Arc, + db: Arc>, compact_join_handle: Mutex>>>, is_ci: bool, is_short_session: bool, @@ -37,24 +36,12 @@ pub struct TurboKeyValueDatabase { impl TurboKeyValueDatabase { pub fn new(versioned_path: PathBuf, is_ci: bool, is_short_session: bool) -> Result { let db = Arc::new(TurboPersistence::open(versioned_path)?); - let mut this = Self { + Ok(Self { db: db.clone(), compact_join_handle: Mutex::new(None), is_ci, is_short_session, - }; - // start compaction in background if the database is not empty - if !db.is_empty() { - let handle = spawn(move || { - db.compact(&CompactConfig { - max_merge_segment_count: available_parallelism() - .map_or(4, |c| max(4, c.get() / 4)), - ..COMPACT_CONFIG - }) - }); - this.compact_join_handle.get_mut().replace(handle); - } - Ok(this) + }) } } @@ -96,7 +83,7 @@ impl KeyValueDatabase for TurboKeyValueDatabase { ) -> Result, Self::ConcurrentWriteBatch<'_>>> { // Wait for the compaction to finish if let Some(join_handle) = self.compact_join_handle.lock().take() { - join_handle.join().unwrap()?; + block_for_future(join_handle)?; } // Start a new write batch Ok(WriteBatch::concurrent(TurboWriteBatch { @@ -112,26 +99,47 @@ impl KeyValueDatabase for TurboKeyValueDatabase { fn shutdown(&self) -> Result<()> { // Wait for the compaction to finish if let Some(join_handle) = self.compact_join_handle.lock().take() { - join_handle.join().unwrap()?; + block_for_future(join_handle)?; } // Compact the database on shutdown - self.db.compact(&CompactConfig { - max_merge_segment_count: if self.is_ci { - // Fully compact in CI to reduce cache size - usize::MAX - } else { - available_parallelism().map_or(4, |c| max(4, c.get())) - }, - ..COMPACT_CONFIG - })?; + if self.is_ci { + // Fully compact in CI to reduce cache size + do_compact(&self.db, "Finished database compaction", usize::MAX)?; + } else { + // Compact with a reasonable limit in non-CI environments + do_compact( + &self.db, + "Finished database compaction", + available_parallelism().map_or(4, |c| max(4, c.get())), + )?; + } // Shutdown the database self.db.shutdown() } } +fn do_compact( + db: &TurboPersistence, + message: &'static str, + max_merge_segment_count: usize, +) -> Result<()> { + let start = Instant::now(); + // Compact the database with the given max merge segment count + let ran = db.compact(&CompactConfig { + max_merge_segment_count, + ..COMPACT_CONFIG + })?; + if ran { + let elapsed = start.elapsed(); + turbo_tasks() + .send_compilation_event(Arc::new(TimingEvent::new(message.to_string(), elapsed))); + } + Ok(()) +} + pub struct TurboWriteBatch<'a> { - batch: turbo_persistence::WriteBatch, 5>, - db: &'a Arc, + batch: turbo_persistence::WriteBatch, TurboTasksParallelScheduler, 5>, + db: &'a Arc>, compact_join_handle: Option<&'a Mutex>>>>, } @@ -156,12 +164,12 @@ impl<'a> BaseWriteBatch<'a> for TurboWriteBatch<'a> { if let Some(compact_join_handle) = self.compact_join_handle { // Start a new compaction in the background let db = self.db.clone(); - let handle = spawn(move || { - db.compact(&CompactConfig { - max_merge_segment_count: available_parallelism() - .map_or(4, |c| max(4, c.get() / 2)), - ..COMPACT_CONFIG - }) + let handle = spawn(async move { + do_compact( + &db, + "Finished database compaction", + available_parallelism().map_or(4, |c| max(4, c.get() / 2)), + ) }); compact_join_handle.lock().replace(handle); } diff --git a/turbopack/crates/turbo-tasks-backend/src/database/turbo/parallel_scheduler.rs b/turbopack/crates/turbo-tasks-backend/src/database/turbo/parallel_scheduler.rs new file mode 100644 index 0000000000000..c4e137c20f146 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/database/turbo/parallel_scheduler.rs @@ -0,0 +1,83 @@ +use turbo_persistence::ParallelScheduler; +use turbo_tasks::{block_in_place, parallel}; + +#[derive(Clone, Copy, Default)] +pub struct TurboTasksParallelScheduler; + +impl ParallelScheduler for TurboTasksParallelScheduler { + fn block_in_place(&self, f: impl FnOnce() -> R + Send) -> R + where + R: Send, + { + block_in_place(f) + } + + fn parallel_for_each(&self, items: &[T], f: impl Fn(&T) + Send + Sync) + where + T: Sync, + { + parallel::for_each(items, f); + } + + fn try_parallel_for_each<'l, T, E>( + &self, + items: &'l [T], + f: impl (Fn(&'l T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Sync, + E: Send + 'static, + { + parallel::try_for_each(items, f) + } + + fn try_parallel_for_each_mut<'l, T, E>( + &self, + items: &'l mut [T], + f: impl (Fn(&'l mut T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Send + Sync, + E: Send + 'static, + { + parallel::try_for_each_mut(items, f) + } + + fn try_vec_into_parallel_for_each( + &self, + items: Vec, + f: impl (Fn(T) -> Result<(), E>) + Send + Sync, + ) -> Result<(), E> + where + T: Send + Sync, + E: Send + 'static, + { + parallel::try_into_for_each(items, f) + } + + fn parallel_map_collect<'l, T, I, R>( + &self, + items: &'l [T], + f: impl Fn(&'l T) -> I + Send + Sync, + ) -> R + where + T: Sync, + I: Send + Sync + 'l, + R: FromIterator, + { + parallel::map_collect(items, f) + } + + fn vec_into_parallel_map_collect( + &self, + items: Vec, + f: impl Fn(T) -> I + Send + Sync, + ) -> R + where + T: Send + Sync, + I: Send + Sync, + R: FromIterator, + { + parallel::vec_into_map_collect(items, f) + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/kv_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/kv_backing_storage.rs index c4b84310d651f..77cc7e15e580d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/kv_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/kv_backing_storage.rs @@ -1,21 +1,18 @@ use std::{ borrow::Borrow, - cmp::max, env, path::PathBuf, sync::{Arc, LazyLock, Mutex, PoisonError, Weak}, }; use anyhow::{Context, Result, anyhow}; -use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; -use tracing::Span; use turbo_tasks::{ SessionId, TaskId, backend::CachedTaskType, panic_hooks::{PanicHookGuard, register_panic_hook}, - turbo_tasks_scope, + parallel, }; use crate::{ @@ -331,14 +328,15 @@ impl BackingStorageSealed let _span = tracing::trace_span!("update task data").entered(); process_task_data(snapshots, Some(batch))?; let span = tracing::trace_span!("flush task data").entered(); - [KeySpace::TaskMeta, KeySpace::TaskData] - .into_par_iter() - .try_for_each(|key_space| { + parallel::try_for_each( + &[KeySpace::TaskMeta, KeySpace::TaskData], + |&key_space| { let _span = span.clone().entered(); // Safety: We already finished all processing of the task data and task // meta unsafe { batch.flush(key_space) } - })?; + }, + )?; } let mut next_task_id = get_next_free_task_id::< @@ -352,10 +350,9 @@ impl BackingStorageSealed items = task_cache_updates.iter().map(|m| m.len()).sum::() ) .entered(); - let result = task_cache_updates - .into_par_iter() - .with_max_len(1) - .map(|updates| { + let result = parallel::vec_into_map_collect::<_, _, Result>>( + task_cache_updates, + |updates| { let _span = _span.clone().entered(); let mut max_task_id = 0; @@ -390,15 +387,11 @@ impl BackingStorageSealed } Ok(max_task_id) - }) - .reduce( - || Ok(0), - |a, b| -> anyhow::Result<_> { - let a_max = a?; - let b_max = b?; - Ok(max(a_max, b_max)) - }, - )?; + }, + )? + .into_iter() + .max() + .unwrap_or(0); next_task_id = next_task_id.max(result); } @@ -410,64 +403,11 @@ impl BackingStorageSealed )?; } WriteBatch::Serial(batch) => { - let mut task_items_result = Ok(Vec::new()); - turbo_tasks::scope(|s| { - s.spawn(|_| { - task_items_result = - process_task_data(snapshots, None::<&T::ConcurrentWriteBatch<'_>>); - }); - - let mut next_task_id = - get_next_free_task_id::< - T::SerialWriteBatch<'_>, - T::ConcurrentWriteBatch<'_>, - >(&mut WriteBatchRef::serial(batch))?; - - { - let _span = tracing::trace_span!( - "update task cache", - items = task_cache_updates.iter().map(|m| m.len()).sum::() - ) - .entered(); - let mut task_type_bytes = Vec::new(); - for (task_type, task_id) in task_cache_updates.into_iter().flatten() { - let task_id = *task_id; - serialize_task_type(&task_type, &mut task_type_bytes, task_id)?; - - batch - .put( - KeySpace::ForwardTaskCache, - WriteBuffer::Borrowed(&task_type_bytes), - WriteBuffer::Borrowed(&task_id.to_le_bytes()), - ) - .with_context(|| { - anyhow!("Unable to write task cache {task_type:?} => {task_id}") - })?; - batch - .put( - KeySpace::ReverseTaskCache, - WriteBuffer::Borrowed(IntKey::new(task_id).as_ref()), - WriteBuffer::Borrowed(&task_type_bytes), - ) - .with_context(|| { - anyhow!("Unable to write task cache {task_id} => {task_type:?}") - })?; - next_task_id = next_task_id.max(task_id + 1); - } - } - - save_infra::, T::ConcurrentWriteBatch<'_>>( - &mut WriteBatchRef::serial(batch), - next_task_id, - session_id, - operations, - )?; - anyhow::Ok(()) - })?; - { let _span = tracing::trace_span!("update tasks").entered(); - for (task_id, meta, data) in task_items_result?.into_iter().flatten() { + let task_items = + process_task_data(snapshots, None::<&T::ConcurrentWriteBatch<'_>>)?; + for (task_id, meta, data) in task_items.into_iter().flatten() { let key = IntKey::new(*task_id); let key = key.as_ref(); if let Some(meta) = meta { @@ -485,7 +425,54 @@ impl BackingStorageSealed })?; } } + batch.flush(KeySpace::TaskMeta)?; + batch.flush(KeySpace::TaskData)?; + } + + let mut next_task_id = get_next_free_task_id::< + T::SerialWriteBatch<'_>, + T::ConcurrentWriteBatch<'_>, + >(&mut WriteBatchRef::serial(batch))?; + + { + let _span = tracing::trace_span!( + "update task cache", + items = task_cache_updates.iter().map(|m| m.len()).sum::() + ) + .entered(); + let mut task_type_bytes = Vec::new(); + for (task_type, task_id) in task_cache_updates.into_iter().flatten() { + let task_id = *task_id; + serialize_task_type(&task_type, &mut task_type_bytes, task_id)?; + + batch + .put( + KeySpace::ForwardTaskCache, + WriteBuffer::Borrowed(&task_type_bytes), + WriteBuffer::Borrowed(&task_id.to_le_bytes()), + ) + .with_context(|| { + anyhow!("Unable to write task cache {task_type:?} => {task_id}") + })?; + batch + .put( + KeySpace::ReverseTaskCache, + WriteBuffer::Borrowed(IntKey::new(task_id).as_ref()), + WriteBuffer::Borrowed(&task_type_bytes), + ) + .with_context(|| { + anyhow!("Unable to write task cache {task_id} => {task_type:?}") + })?; + next_task_id = next_task_id.max(task_id + 1); + } } + + save_infra::, T::ConcurrentWriteBatch<'_>>( + &mut WriteBatchRef::serial(batch), + next_task_id, + session_id, + operations, + )?; } } @@ -703,48 +690,38 @@ where > + Send + Sync, { - let span = Span::current(); - let turbo_tasks = turbo_tasks::turbo_tasks(); - let handle = tokio::runtime::Handle::current(); - tasks - .into_par_iter() - .map(|tasks| { - let _span = span.clone().entered(); - let _guard = handle.clone().enter(); - turbo_tasks_scope(turbo_tasks.clone(), || { - let mut result = Vec::new(); - for (task_id, meta, data) in tasks { - if let Some(batch) = batch { - let key = IntKey::new(*task_id); - let key = key.as_ref(); - if let Some(meta) = meta { - batch.put( - KeySpace::TaskMeta, - WriteBuffer::Borrowed(key), - WriteBuffer::SmallVec(meta), - )?; - } - if let Some(data) = data { - batch.put( - KeySpace::TaskData, - WriteBuffer::Borrowed(key), - WriteBuffer::SmallVec(data), - )?; - } - } else { - // Store the new task data - result.push(( - task_id, - meta.map(WriteBuffer::SmallVec), - data.map(WriteBuffer::SmallVec), - )); - } + parallel::vec_into_map_collect::<_, _, Result>>(tasks, |tasks| { + let mut result = Vec::new(); + for (task_id, meta, data) in tasks { + if let Some(batch) = batch { + let key = IntKey::new(*task_id); + let key = key.as_ref(); + if let Some(meta) = meta { + batch.put( + KeySpace::TaskMeta, + WriteBuffer::Borrowed(key), + WriteBuffer::SmallVec(meta), + )?; + } + if let Some(data) = data { + batch.put( + KeySpace::TaskData, + WriteBuffer::Borrowed(key), + WriteBuffer::SmallVec(data), + )?; } + } else { + // Store the new task data + result.push(( + task_id, + meta.map(WriteBuffer::SmallVec), + data.map(WriteBuffer::SmallVec), + )); + } + } - Ok(result) - }) - }) - .collect::>>() + Ok(result) + }) } fn serialize(task: TaskId, data: &Vec) -> Result> { diff --git a/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs b/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs index f9321cfd797fb..add31c32ecd35 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/all_in_one.rs @@ -9,7 +9,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn all_in_one() { run(®ISTRATION, || async { let a: Vc = Vc::cell(4242); diff --git a/turbopack/crates/turbo-tasks-backend/tests/basic.rs b/turbopack/crates/turbo-tasks-backend/tests/basic.rs index a12da0b8578d8..a22cb96ade456 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/basic.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/basic.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn basic() { run(®ISTRATION, || async { let output1 = func_without_args(); diff --git a/turbopack/crates/turbo-tasks-backend/tests/bug.rs b/turbopack/crates/turbo-tasks-backend/tests/bug.rs index f7e8097a1b7aa..5d225bdb8c48e 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/bug.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/bug.rs @@ -24,7 +24,7 @@ struct TaskSpec { #[turbo_tasks::value(transparent)] struct TasksSpec(Vec); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn graph_bug() { // see https://github.com/vercel/next.js/pull/79451 run(®ISTRATION, || async { diff --git a/turbopack/crates/turbo-tasks-backend/tests/bug2.rs b/turbopack/crates/turbo-tasks-backend/tests/bug2.rs index df3115b8aa3da..a1495eeeca91b 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/bug2.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/bug2.rs @@ -33,7 +33,7 @@ pub struct TaskSpec { #[turbo_tasks::value(transparent)] struct Iteration(State); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn graph_bug() { run(®ISTRATION, move || async move { let spec = vec![ diff --git a/turbopack/crates/turbo-tasks-backend/tests/call_types.rs b/turbopack/crates/turbo-tasks-backend/tests/call_types.rs index 17875d2630d78..f06430ada2bd0 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/call_types.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/call_types.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn functions() { run(®ISTRATION, || async { assert_eq!(*fn_plain().await?, 42); @@ -53,7 +53,7 @@ async fn async_fn_vc_arg(n: Vc) -> Result> { Ok(Vc::cell(*n.await?)) } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn methods() { run(®ISTRATION, || async { assert_eq!(*Value::static_method().await?, 42); @@ -106,7 +106,7 @@ impl Value { } } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn trait_methods() { run(®ISTRATION, || async { assert_eq!(*Value::static_trait_method().await?, 42); diff --git a/turbopack/crates/turbo-tasks-backend/tests/collectibles.rs b/turbopack/crates/turbo-tasks-backend/tests/collectibles.rs index a86c0e09343d0..945845a86e3a2 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/collectibles.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/collectibles.rs @@ -14,7 +14,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn transitive_emitting() { run(®ISTRATION, || async { let result_op = my_transitive_emitting_function(rcstr!(""), rcstr!("")); @@ -32,7 +32,7 @@ async fn transitive_emitting() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn transitive_emitting_indirect() { run(®ISTRATION, || async { let result_op = my_transitive_emitting_function(rcstr!(""), rcstr!("")); @@ -50,7 +50,7 @@ async fn transitive_emitting_indirect() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn multi_emitting() { run(®ISTRATION, || async { let result_op = my_multi_emitting_function(); @@ -68,7 +68,7 @@ async fn multi_emitting() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn taking_collectibles() { run(®ISTRATION, || async { let result_op = my_collecting_function(); @@ -84,7 +84,7 @@ async fn taking_collectibles() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn taking_collectibles_extra_layer() { run(®ISTRATION, || async { let result_op = my_collecting_function_indirect(); @@ -100,7 +100,7 @@ async fn taking_collectibles_extra_layer() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn taking_collectibles_parallel() { run(®ISTRATION, || async { let result_op = my_transitive_emitting_function(rcstr!(""), rcstr!("a")); @@ -142,7 +142,7 @@ async fn taking_collectibles_parallel() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn taking_collectibles_with_resolve() { run(®ISTRATION, || async { let result_op = my_transitive_emitting_function_with_resolve(rcstr!("resolve")); diff --git a/turbopack/crates/turbo-tasks-backend/tests/debug.rs b/turbopack/crates/turbo-tasks-backend/tests/debug.rs index 854d57b234395..ccc833eeb85d8 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/debug.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/debug.rs @@ -9,7 +9,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn primitive_debug() { run(®ISTRATION, || async { let a: Vc = Vc::cell(42); @@ -20,7 +20,7 @@ async fn primitive_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn transparent_debug() { run(®ISTRATION, || async { let a: Vc = Transparent(42).cell(); @@ -32,7 +32,7 @@ async fn transparent_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn enum_none_debug() { run(®ISTRATION, || async { let a: Vc = Enum::None.cell(); @@ -44,7 +44,7 @@ async fn enum_none_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn enum_transparent_debug() { run(®ISTRATION, || async { let a: Vc = Enum::Transparent(Transparent(42).resolved_cell()).cell(); @@ -60,7 +60,7 @@ async fn enum_transparent_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn enum_inner_vc_debug() { run(®ISTRATION, || async { let a: Vc = Enum::Enum(Enum::None.resolved_cell()).cell(); @@ -76,7 +76,7 @@ async fn enum_inner_vc_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn struct_unit_debug() { run(®ISTRATION, || async { let a: Vc = StructUnit.cell(); @@ -87,7 +87,7 @@ async fn struct_unit_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn struct_transparent_debug() { run(®ISTRATION, || async { let a: Vc = StructWithTransparent { @@ -106,7 +106,7 @@ async fn struct_transparent_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn struct_vec_debug() { run(®ISTRATION, || async { let a: Vc = StructWithVec { vec: vec![] }.cell(); @@ -135,7 +135,7 @@ async fn struct_vec_debug() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn struct_ignore_debug() { run(®ISTRATION, || async { let a: Vc = StructWithIgnore { diff --git a/turbopack/crates/turbo-tasks-backend/tests/detached.rs b/turbopack/crates/turbo-tasks-backend/tests/detached.rs index c76c23590f8ab..b1c80929fad6a 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/detached.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/detached.rs @@ -15,7 +15,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_spawns_detached() -> anyhow::Result<()> { run(®ISTRATION, || async { // HACK: The watch channel we use has an incorrect implementation of `TraceRawVcs`, just @@ -82,7 +82,7 @@ async fn spawns_detached( Vc::cell(()) } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_spawns_detached_changing() -> anyhow::Result<()> { run(®ISTRATION, || async { // HACK: The watch channel we use has an incorrect implementation of `TraceRawVcs` diff --git a/turbopack/crates/turbo-tasks-backend/tests/dirty_in_progress.rs b/turbopack/crates/turbo-tasks-backend/tests/dirty_in_progress.rs index 8171cead7dd40..89aa8998fae80 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/dirty_in_progress.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/dirty_in_progress.rs @@ -11,7 +11,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn dirty_in_progress() { run(®ISTRATION, || async { let cases = [ diff --git a/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs b/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs index 4a3ddce3bfa73..87c2d6672e468 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/emptied_cells.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn recompute() { run(®ISTRATION, || async { let input = ChangingInput { diff --git a/turbopack/crates/turbo-tasks-backend/tests/filter_unused_args.rs b/turbopack/crates/turbo-tasks-backend/tests/filter_unused_args.rs index b7081174940c6..3193382110215 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/filter_unused_args.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/filter_unused_args.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn filtered_trait_method_args() -> Result<()> { run(®ISTRATION, || async { let uses_arg = UsesArg.cell(); diff --git a/turbopack/crates/turbo-tasks-backend/tests/immutable.rs b/turbopack/crates/turbo-tasks-backend/tests/immutable.rs index d90a4cb2f78de..0c716c7544744 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/immutable.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/immutable.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn hidden_mutate() { run(®ISTRATION, || async { let input = create_input().resolve().await?; diff --git a/turbopack/crates/turbo-tasks-backend/tests/local_tasks.rs b/turbopack/crates/turbo-tasks-backend/tests/local_tasks.rs index e2a6a7abdfa74..f66363d374635 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/local_tasks.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/local_tasks.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_local_task_id() -> Result<()> { run(®ISTRATION, || async { let local_vc = get_local_task_id(); diff --git a/turbopack/crates/turbo-tasks-backend/tests/operation_vc.rs b/turbopack/crates/turbo-tasks-backend/tests/operation_vc.rs index 8000ddc8b26e3..457971d0667c7 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/operation_vc.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/operation_vc.rs @@ -26,7 +26,7 @@ fn use_operations() -> Vc { forty_two.connect() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_use_operations() -> Result<()> { run(®ISTRATION, || async { assert_eq!(*use_operations().await?, 42); diff --git a/turbopack/crates/turbo-tasks-backend/tests/panics.rs b/turbopack/crates/turbo-tasks-backend/tests/panics.rs index d321e825f1430..8b9458ab4f046 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/panics.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/panics.rs @@ -25,7 +25,7 @@ static FILE_PATH_REGEX: LazyLock = // // This test depends on the process-wide global panic handler. This test must be run in its own // process in isolation of any other tests. -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_panic_hook() { let prev_hook = take_hook(); set_hook(Box::new(move |info| { diff --git a/turbopack/crates/turbo-tasks-backend/tests/performance.rs b/turbopack/crates/turbo-tasks-backend/tests/performance.rs index 904843fad2a63..13b76582af633 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/performance.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/performance.rs @@ -142,7 +142,7 @@ fn check_skip() -> bool { false } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_many_children() { if check_skip() { return; @@ -157,7 +157,7 @@ async fn many_calls_to_many_children() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_uncached_many_children() { if check_skip() { return; @@ -189,7 +189,7 @@ fn run_big_graph_test(counts: Vec) -> impl Future> + Se ) } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_1() { if check_skip() { return; @@ -199,7 +199,7 @@ async fn many_calls_to_big_graph_1() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_2() { if check_skip() { return; @@ -211,7 +211,7 @@ async fn many_calls_to_big_graph_2() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_3() { if check_skip() { return; @@ -221,7 +221,7 @@ async fn many_calls_to_big_graph_3() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_4() { if check_skip() { return; @@ -231,7 +231,7 @@ async fn many_calls_to_big_graph_4() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_5() { if check_skip() { return; @@ -243,7 +243,7 @@ async fn many_calls_to_big_graph_5() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_6() { if check_skip() { return; @@ -255,7 +255,7 @@ async fn many_calls_to_big_graph_6() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_7() { if check_skip() { return; @@ -270,7 +270,7 @@ async fn many_calls_to_big_graph_7() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_8() { if check_skip() { return; @@ -282,7 +282,7 @@ async fn many_calls_to_big_graph_8() { .unwrap(); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn many_calls_to_big_graph_9() { if check_skip() { return; diff --git a/turbopack/crates/turbo-tasks-backend/tests/random_change.rs b/turbopack/crates/turbo-tasks-backend/tests/random_change.rs index 841c4564af444..089490ab1c79c 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/random_change.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/random_change.rs @@ -9,7 +9,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn random_change() { run(®ISTRATION, || async { let state = make_state(); diff --git a/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs b/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs index d7ccf3b37b6cf..66c51c9e4f1ad 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/read_ref_cell.rs @@ -10,7 +10,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn read_ref() { run(®ISTRATION, || async { let counter = Counter::cell(Counter { diff --git a/turbopack/crates/turbo-tasks-backend/tests/recompute.rs b/turbopack/crates/turbo-tasks-backend/tests/recompute.rs index 17a69e9c151d3..dcad783b06e08 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/recompute.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/recompute.rs @@ -8,7 +8,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn recompute() { run(®ISTRATION, || async { let input = ChangingInput { @@ -58,7 +58,7 @@ async fn recompute() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn immutable_analysis() { run(®ISTRATION, || async { let input = ChangingInput { diff --git a/turbopack/crates/turbo-tasks-backend/tests/recompute_collectibles.rs b/turbopack/crates/turbo-tasks-backend/tests/recompute_collectibles.rs index 54074af628add..d7c0be301ac70 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/recompute_collectibles.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/recompute_collectibles.rs @@ -9,7 +9,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn recompute() { run(®ISTRATION, || async { let input = ChangingInput::new(1).resolve().await?; diff --git a/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs b/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs index da3a69ca62dce..a0b9914b7f8bb 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/resolved_vc.rs @@ -23,7 +23,7 @@ fn assert_resolved(input: ResolvedVc) { assert!(input_vc.is_resolved()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_conversion() -> Result<()> { run(®ISTRATION, || async { let unresolved: Vc = Vc::cell(42); @@ -38,7 +38,7 @@ async fn test_conversion() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_cell_construction() -> Result<()> { run(®ISTRATION, || async { let a: ResolvedVc = ResolvedVc::cell(42); @@ -50,7 +50,7 @@ async fn test_cell_construction() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_resolved_vc_as_arg() -> Result<()> { run(®ISTRATION, || async { let unresolved: Vc = returns_int(42); @@ -62,7 +62,7 @@ async fn test_resolved_vc_as_arg() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_into_future() -> Result<()> { run(®ISTRATION, || async { let mut resolved = ResolvedVc::cell(42); @@ -78,7 +78,7 @@ async fn test_into_future() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_sidecast() -> Result<()> { run(®ISTRATION, || async { let concrete_value = ImplementsAAndB.resolved_cell(); diff --git a/turbopack/crates/turbo-tasks-backend/tests/shrink_to_fit.rs b/turbopack/crates/turbo-tasks-backend/tests/shrink_to_fit.rs index 524a78950acf2..dc82e82174de5 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/shrink_to_fit.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/shrink_to_fit.rs @@ -11,7 +11,7 @@ static REGISTRATION: Registration = register!(); #[turbo_tasks::value(transparent)] struct Wrapper(Vec); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_shrink_to_fit() -> Result<()> { run(®ISTRATION, || async { // `Vec::shrink_to_fit` is implicitly called when a cell is constructed. diff --git a/turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs b/turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs index 8a391ace095aa..869c944bcb5c7 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/task_statistics.rs @@ -13,7 +13,7 @@ use turbo_tasks_testing::{Registration, register, run_without_cache_check}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_simple_task() -> Result<()> { run_without_cache_check(®ISTRATION, async move { enable_stats(); @@ -39,7 +39,7 @@ async fn test_simple_task() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_await_same_vc_multiple_times() -> Result<()> { run_without_cache_check(®ISTRATION, async move { enable_stats(); @@ -61,7 +61,7 @@ async fn test_await_same_vc_multiple_times() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_vc_receiving_task() -> Result<()> { run_without_cache_check(®ISTRATION, async move { enable_stats(); @@ -93,7 +93,7 @@ async fn test_vc_receiving_task() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_trait_methods() -> Result<()> { run_without_cache_check(®ISTRATION, async move { enable_stats(); @@ -130,7 +130,7 @@ async fn test_trait_methods() -> Result<()> { .await } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_dyn_trait_methods() -> Result<()> { run_without_cache_check(®ISTRATION, async move { enable_stats(); @@ -174,7 +174,7 @@ async fn test_dyn_trait_methods() -> Result<()> { } // creates Vcs, but doesn't ever execute them -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_no_execution() -> Result<()> { run_without_cache_check(®ISTRATION, async move { enable_stats(); diff --git a/turbopack/crates/turbo-tasks-backend/tests/trace_transient.rs b/turbopack/crates/turbo-tasks-backend/tests/trace_transient.rs index 74c21fcaebb65..f553a83a52c5b 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/trace_transient.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/trace_transient.rs @@ -18,7 +18,7 @@ Adder::add_method (read cell of type turbo-tasks@TODO::::primitives::u64) unknown transient task (read cell of type turbo-tasks@TODO::::primitives::u16) unknown transient task (read cell of type turbo-tasks@TODO::::primitives::u32)"; -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_trace_transient() { let result = run_without_cache_check(®ISTRATION, async { read_incorrect_task_input_operation(IncorrectTaskInput( diff --git a/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs b/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs index c556e8d422489..2372947303360 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell.rs @@ -10,7 +10,7 @@ use turbo_tasks_testing::{Registration, register, run}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn trait_ref() { run(®ISTRATION, || async { let counter = Counter::cell(Counter { diff --git a/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell_mode.rs b/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell_mode.rs index 15917f62563bf..3b8d1cb15c02a 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell_mode.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/trait_ref_cell_mode.rs @@ -9,7 +9,7 @@ static REGISTRATION: Registration = register!(); // Test that with `cell = "shared"`, the cell will be re-used as long as the // value is equal. -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_trait_ref_shared_cell_mode() { run(®ISTRATION, || async { let input = CellIdSelector { @@ -44,7 +44,7 @@ async fn test_trait_ref_shared_cell_mode() { // Test that with `cell = "new"`, the cell will is never re-used, even if the // value is equal. -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_trait_ref_new_cell_mode() { run(®ISTRATION, || async { let input = CellIdSelector { diff --git a/turbopack/crates/turbo-tasks-backend/tests/transient_collectible.rs b/turbopack/crates/turbo-tasks-backend/tests/transient_collectible.rs index 216e8a285dbf8..b144319ed4763 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/transient_collectible.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/transient_collectible.rs @@ -10,7 +10,7 @@ static REGISTRATION: Registration = register!(); const EXPECTED_MSG: &str = "Collectible is transient, transient collectibles cannot be emitted from persistent tasks"; -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_transient_emit_from_persistent() { let result = run_without_cache_check(®ISTRATION, async { emit_incorrect_task_input_operation(IncorrectTaskInput(U32Wrapper(123).resolved_cell())) diff --git a/turbopack/crates/turbo-tasks-backend/tests/transient_vc.rs b/turbopack/crates/turbo-tasks-backend/tests/transient_vc.rs index 7db072310c915..100008c755c5c 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/transient_vc.rs +++ b/turbopack/crates/turbo-tasks-backend/tests/transient_vc.rs @@ -7,7 +7,7 @@ use turbo_tasks_testing::{Registration, register, run_without_cache_check}; static REGISTRATION: Registration = register!(); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_transient_vc() -> Result<()> { run_without_cache_check(®ISTRATION, async { test_transient_operation(TransientValue::new(123)) diff --git a/turbopack/crates/turbo-tasks-fetch/tests/fetch.rs b/turbopack/crates/turbo-tasks-fetch/tests/fetch.rs index a325eefa0f445..b44f2a3a00522 100644 --- a/turbopack/crates/turbo-tasks-fetch/tests/fetch.rs +++ b/turbopack/crates/turbo-tasks-fetch/tests/fetch.rs @@ -18,7 +18,7 @@ static REGISTRATION: Registration = register!(turbo_tasks_fetch::register); /// acquire and hold this lock to prevent potential flakiness. static GLOBAL_TEST_LOCK: TokioMutex<()> = TokioMutex::const_new(()); -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn basic_get() { let _guard = GLOBAL_TEST_LOCK.lock().await; run(®ISTRATION, || async { @@ -49,7 +49,7 @@ async fn basic_get() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn sends_user_agent() { let _guard = GLOBAL_TEST_LOCK.lock().await; run(®ISTRATION, || async { @@ -85,7 +85,7 @@ async fn sends_user_agent() { // This is temporary behavior. // TODO: Implement invalidation that respects Cache-Control headers. -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn invalidation_does_not_invalidate() { let _guard = GLOBAL_TEST_LOCK.lock().await; run(®ISTRATION, || async { @@ -130,7 +130,7 @@ fn get_issue_context() -> Vc { DiskFileSystem::new(rcstr!("root"), rcstr!("/")).root() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn errors_on_failed_connection() { let _guard = GLOBAL_TEST_LOCK.lock().await; run(®ISTRATION, || async { @@ -161,7 +161,7 @@ async fn errors_on_failed_connection() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn errors_on_404() { let _guard = GLOBAL_TEST_LOCK.lock().await; run(®ISTRATION, || async { @@ -196,7 +196,7 @@ async fn errors_on_404() { .unwrap() } -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn client_cache() { // a simple fetch that should always succeed async fn simple_fetch(path: &str, client: FetchClient) -> anyhow::Result<()> { diff --git a/turbopack/crates/turbo-tasks-fs/src/lib.rs b/turbopack/crates/turbo-tasks-fs/src/lib.rs index 59205def86e6a..6e24d176978b2 100644 --- a/turbopack/crates/turbo-tasks-fs/src/lib.rs +++ b/turbopack/crates/turbo-tasks-fs/src/lib.rs @@ -46,7 +46,6 @@ use dunce::simplified; use indexmap::IndexSet; use jsonc_parser::{ParseOptions, parse_to_serde_value}; use mime::Mime; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rustc_hash::FxHashSet; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -56,7 +55,7 @@ use turbo_rcstr::{RcStr, rcstr}; use turbo_tasks::{ ApplyEffectsContext, Completion, InvalidationReason, Invalidator, NonLocalValue, ReadRef, ResolvedVc, TaskInput, ValueToString, Vc, debug::ValueDebugFormat, effect, - mark_session_dependent, mark_stateful, trace::TraceRawVcs, + mark_session_dependent, mark_stateful, parallel, trace::TraceRawVcs, }; use turbo_tasks_hash::{DeterministicHash, DeterministicHasher, hash_xxh3_hash64}; @@ -314,19 +313,14 @@ impl DiskFileSystemInner { fn invalidate(&self) { let _span = tracing::info_span!("invalidate filesystem", name = &*self.root).entered(); - let span = tracing::Span::current(); - let handle = tokio::runtime::Handle::current(); let invalidator_map = take(&mut *self.invalidator_map.lock().unwrap()); let dir_invalidator_map = take(&mut *self.dir_invalidator_map.lock().unwrap()); - let iter = invalidator_map - .into_par_iter() - .chain(dir_invalidator_map.into_par_iter()) - .flat_map(|(_, invalidators)| invalidators.into_par_iter()); - iter.for_each(|(i, _)| { - let _span = span.clone().entered(); - let _guard = handle.enter(); - i.invalidate() - }); + let invalidators = invalidator_map + .into_iter() + .chain(dir_invalidator_map) + .flat_map(|(_, invalidators)| invalidators.into_keys()) + .collect::>(); + parallel::vec_into_for_each(invalidators, |invalidator| invalidator.invalidate()); } /// Invalidates every tracked file in the filesystem. @@ -337,23 +331,19 @@ impl DiskFileSystemInner { reason: impl Fn(&Path) -> R + Sync, ) { let _span = tracing::info_span!("invalidate filesystem", name = &*self.root).entered(); - let span = tracing::Span::current(); - let handle = tokio::runtime::Handle::current(); let invalidator_map = take(&mut *self.invalidator_map.lock().unwrap()); let dir_invalidator_map = take(&mut *self.dir_invalidator_map.lock().unwrap()); - let iter = invalidator_map - .into_par_iter() - .chain(dir_invalidator_map.into_par_iter()) + let invalidators = invalidator_map + .into_iter() + .chain(dir_invalidator_map) .flat_map(|(path, invalidators)| { - let _span = span.clone().entered(); let reason_for_path = reason(&path); invalidators - .into_par_iter() + .into_keys() .map(move |i| (reason_for_path.clone(), i)) - }); - iter.for_each(|(reason, (invalidator, _))| { - let _span = span.clone().entered(); - let _guard = handle.enter(); + }) + .collect::>(); + parallel::vec_into_for_each(invalidators, |(reason, invalidator)| { invalidator.invalidate_with_reason(reason) }); } diff --git a/turbopack/crates/turbo-tasks-fs/src/watcher.rs b/turbopack/crates/turbo-tasks-fs/src/watcher.rs index bc429eb6cb16c..2c7c6ca98ac6f 100644 --- a/turbopack/crates/turbo-tasks-fs/src/watcher.rs +++ b/turbopack/crates/turbo-tasks-fs/src/watcher.rs @@ -16,13 +16,12 @@ use notify::{ Config, EventKind, PollWatcher, RecommendedWatcher, RecursiveMode, Watcher, event::{MetadataKind, ModifyKind, RenameMode}, }; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rustc_hash::FxHashSet; use serde::{Deserialize, Serialize}; use tracing::instrument; use turbo_rcstr::RcStr; use turbo_tasks::{ - FxIndexSet, InvalidationReason, InvalidationReasonKind, Invalidator, spawn_thread, + FxIndexSet, InvalidationReason, InvalidationReasonKind, Invalidator, parallel, spawn_thread, util::StaticOrArc, }; @@ -271,40 +270,30 @@ impl DiskWatcher { // We need to invalidate all reads that happened before watching // Best is to start_watching before starting to read { - let span = tracing::info_span!("invalidate filesystem"); - let _span = span.clone().entered(); + let _span = tracing::info_span!("invalidate filesystem").entered(); let invalidator_map = take(&mut *fs_inner.invalidator_map.lock().unwrap()); let dir_invalidator_map = take(&mut *fs_inner.dir_invalidator_map.lock().unwrap()); - let iter = invalidator_map - .into_par_iter() - .chain(dir_invalidator_map.into_par_iter()); - let handle = tokio::runtime::Handle::current(); + let iter = invalidator_map.into_iter().chain(dir_invalidator_map); if report_invalidation_reason { - iter.flat_map(|(path, invalidators)| { - let _span = span.clone().entered(); - let reason = WatchStart { - name: fs_inner.name.clone(), - // this path is just used for display purposes - path: RcStr::from(path.to_string_lossy()), - }; - invalidators - .into_par_iter() - .map(move |i| (reason.clone(), i)) - }) - .for_each(|(reason, (invalidator, _))| { - let _span = span.clone().entered(); - let _guard = handle.enter(); - invalidator.invalidate_with_reason(reason) + let invalidators = iter + .flat_map(|(path, invalidators)| { + let reason = WatchStart { + name: fs_inner.name.clone(), + // this path is just used for display purposes + path: RcStr::from(path.to_string_lossy()), + }; + invalidators.into_iter().map(move |i| (reason.clone(), i)) + }) + .collect::>(); + parallel::vec_into_for_each(invalidators, |(reason, (invalidator, _))| { + invalidator.invalidate_with_reason(reason); }); } else { - iter.flat_map(|(_, invalidators)| { - let _span = span.clone().entered(); - invalidators.into_par_iter().map(move |i| i) - }) - .for_each(|(invalidator, _)| { - let _span = span.clone().entered(); - let _guard = handle.enter(); - invalidator.invalidate() + let invalidators = iter + .flat_map(|(_, invalidators)| invalidators.into_keys()) + .collect::>(); + parallel::vec_into_for_each(invalidators, |invalidator| { + invalidator.invalidate(); }); } } diff --git a/turbopack/crates/turbo-tasks-malloc/src/lib.rs b/turbopack/crates/turbo-tasks-malloc/src/lib.rs index 5f2df85ee6282..194d2796d843b 100644 --- a/turbopack/crates/turbo-tasks-malloc/src/lib.rs +++ b/turbopack/crates/turbo-tasks-malloc/src/lib.rs @@ -3,6 +3,7 @@ mod counter; use std::{ alloc::{GlobalAlloc, Layout}, marker::PhantomData, + ops::{Add, AddAssign}, }; use self::counter::{add, flush, get, remove, update}; @@ -16,12 +17,45 @@ pub struct AllocationInfo { } impl AllocationInfo { + pub const ZERO: Self = Self { + allocations: 0, + deallocations: 0, + allocation_count: 0, + deallocation_count: 0, + }; + pub fn is_empty(&self) -> bool { self.allocations == 0 && self.deallocations == 0 && self.allocation_count == 0 && self.deallocation_count == 0 } + + pub fn memory_usage(&self) -> usize { + self.allocations.saturating_sub(self.deallocations) + } +} + +impl Add for AllocationInfo { + type Output = Self; + + fn add(self, other: Self) -> Self { + Self { + allocations: self.allocations + other.allocations, + deallocations: self.deallocations + other.deallocations, + allocation_count: self.allocation_count + other.allocation_count, + deallocation_count: self.deallocation_count + other.deallocation_count, + } + } +} + +impl AddAssign for AllocationInfo { + fn add_assign(&mut self, other: Self) { + self.allocations += other.allocations; + self.deallocations += other.deallocations; + self.allocation_count += other.allocation_count; + self.deallocation_count += other.deallocation_count; + } } #[derive(Default, Clone, Debug)] diff --git a/turbopack/crates/turbo-tasks/src/capture_future.rs b/turbopack/crates/turbo-tasks/src/capture_future.rs index 6bd646f942794..4f791869dbab4 100644 --- a/turbopack/crates/turbo-tasks/src/capture_future.rs +++ b/turbopack/crates/turbo-tasks/src/capture_future.rs @@ -31,8 +31,7 @@ pin_project! { #[pin] future: F, duration: Duration, - allocations: usize, - deallocations: usize, + allocations: AllocationInfo, } } @@ -41,8 +40,7 @@ impl> CaptureFuture { Self { future, duration: Duration::ZERO, - allocations: 0, - deallocations: 0, + allocations: AllocationInfo::ZERO, } } } @@ -77,6 +75,17 @@ pub struct TurboTasksPanic { pub location: Option, } +impl TurboTasksPanic { + pub fn into_panic(self) -> Box { + Box::new(format!( + "{} at {}", + self.message, + self.location + .unwrap_or_else(|| "unknown location".to_string()) + )) + } +} + impl Display for TurboTasksPanic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.message) @@ -84,7 +93,7 @@ impl Display for TurboTasksPanic { } impl> Future for CaptureFuture { - type Output = (Result, Duration, usize); + type Output = (Result, Duration, AllocationInfo); fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); @@ -127,17 +136,10 @@ impl> Future for CaptureFuture { let elapsed = start.elapsed(); let allocations = start_allocations.until_now(); *this.duration += elapsed + data.duration; - *this.allocations += allocations.allocations + data.allocations; - *this.deallocations += allocations.deallocations + data.deallocations; + *this.allocations += allocations; match result { - Err(err) => { - let memory_usage = this.allocations.saturating_sub(*this.deallocations); - Poll::Ready((Err(err), *this.duration, memory_usage)) - } - Ok(Poll::Ready(r)) => { - let memory_usage = this.allocations.saturating_sub(*this.deallocations); - Poll::Ready((Ok(r), *this.duration, memory_usage)) - } + Err(err) => Poll::Ready((Err(err), *this.duration, this.allocations.clone())), + Ok(Poll::Ready(r)) => Poll::Ready((Ok(r), *this.duration, this.allocations.clone())), Ok(Poll::Pending) => Poll::Pending, } } diff --git a/turbopack/crates/turbo-tasks/src/effect.rs b/turbopack/crates/turbo-tasks/src/effect.rs index 0b893b2a128ad..6373306853477 100644 --- a/turbopack/crates/turbo-tasks/src/effect.rs +++ b/turbopack/crates/turbo-tasks/src/effect.rs @@ -1,6 +1,5 @@ use std::{ any::{Any, TypeId}, - borrow::Cow, future::Future, mem::replace, panic, @@ -8,20 +7,20 @@ use std::{ sync::Arc, }; -use anyhow::{Result, anyhow}; +use anyhow::Result; use auto_hash_map::AutoSet; use futures::{StreamExt, TryStreamExt}; use parking_lot::Mutex; use rustc_hash::{FxHashMap, FxHashSet}; use tokio::task_local; -use tracing::{Instrument, Span}; +use tracing::Instrument; use crate::{ self as turbo_tasks, CollectiblesSource, NonLocalValue, ReadRef, ResolvedVc, TryJoinIterExt, debug::ValueDebugFormat, emit, event::{Event, EventListener}, - manager::turbo_tasks_future_scope, + spawn, trace::TraceRawVcs, util::SharedError, }; @@ -98,28 +97,10 @@ impl EffectInstance { listener.await; } State::NotStarted(EffectInner { future }) => { - let join_handle = tokio::spawn(ApplyEffectsContext::in_current_scope( - turbo_tasks_future_scope(turbo_tasks::turbo_tasks(), future) - .instrument(Span::current()), - )); + let join_handle = spawn(ApplyEffectsContext::in_current_scope(future)); let result = match join_handle.await { - Ok(Err(err)) => Err(SharedError::new(err)), - Err(err) => { - let any = err.into_panic(); - let panic = match any.downcast::() { - Ok(owned) => Some(Cow::Owned(*owned)), - Err(any) => match any.downcast::<&'static str>() { - Ok(str) => Some(Cow::Borrowed(*str)), - Err(_) => None, - }, - }; - Err(SharedError::new(if let Some(panic) = panic { - anyhow!("Task effect panicked: {panic}") - } else { - anyhow!("Task effect panicked") - })) - } - Ok(Ok(())) => Ok(()), + Err(err) => Err(SharedError::new(err)), + Ok(()) => Ok(()), }; let event = { let mut guard = self.inner.lock(); diff --git a/turbopack/crates/turbo-tasks/src/lib.rs b/turbopack/crates/turbo-tasks/src/lib.rs index 2c8494d6afb89..d5aafe26593ea 100644 --- a/turbopack/crates/turbo-tasks/src/lib.rs +++ b/turbopack/crates/turbo-tasks/src/lib.rs @@ -64,15 +64,16 @@ mod no_move_vec; mod once_map; mod output; pub mod panic_hooks; +pub mod parallel; pub mod persisted_graph; pub mod primitives; mod raw_vc; mod read_options; mod read_ref; pub mod registry; -mod scope; mod serialization_invalidation; pub mod small_duration; +mod spawn; mod state; pub mod task; mod task_execution_reason; @@ -107,17 +108,18 @@ pub use manager::{ CurrentCellRef, ReadConsistency, TaskPersistence, TurboTasks, TurboTasksApi, TurboTasksBackendApi, TurboTasksBackendApiExt, TurboTasksCallApi, Unused, UpdateInfo, dynamic_call, emit, mark_finished, mark_root, mark_session_dependent, mark_stateful, - prevent_gc, run_once, run_once_with_reason, spawn_blocking, spawn_thread, trait_call, - turbo_tasks, turbo_tasks_scope, + prevent_gc, run_once, run_once_with_reason, trait_call, turbo_tasks, turbo_tasks_scope, }; pub use output::OutputContent; pub use raw_vc::{CellId, RawVc, ReadRawVcFuture, ResolveTypeError}; pub use read_options::ReadCellOptions; pub use read_ref::ReadRef; use rustc_hash::FxHasher; -pub use scope::scope; pub use serialization_invalidation::SerializationInvalidator; pub use shrink_to_fit::ShrinkToFit; +pub use spawn::{ + JoinHandle, block_for_future, block_in_place, spawn, spawn_blocking, spawn_thread, +}; pub use state::{State, TransientState}; pub use task::{SharedReference, TypedSharedReference, task_input::TaskInput}; pub use task_execution_reason::TaskExecutionReason; diff --git a/turbopack/crates/turbo-tasks/src/manager.rs b/turbopack/crates/turbo-tasks/src/manager.rs index eaa31f420ed5d..68e3a784fb62e 100644 --- a/turbopack/crates/turbo-tasks/src/manager.rs +++ b/turbopack/crates/turbo-tasks/src/manager.rs @@ -8,7 +8,6 @@ use std::{ Arc, Mutex, RwLock, Weak, atomic::{AtomicBool, AtomicUsize, Ordering}, }, - thread, time::{Duration, Instant}, }; @@ -17,10 +16,9 @@ use auto_hash_map::AutoMap; use rustc_hash::FxHasher; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; -use tokio::{runtime::Handle, select, sync::mpsc::Receiver, task_local}; +use tokio::{select, sync::mpsc::Receiver, task_local}; use tokio_util::task::TaskTracker; -use tracing::{Instrument, Level, Span, info_span, instrument, trace_span}; -use turbo_tasks_malloc::TurboMalloc; +use tracing::{Instrument, Level, instrument, trace_span}; use crate::{ Completion, InvalidationReason, InvalidationReasonSet, OutputContent, ReadCellOptions, @@ -30,7 +28,7 @@ use crate::{ Backend, CachedTaskType, CellContent, TaskCollectiblesMap, TaskExecutionSpec, TransientTaskType, TurboTasksExecutionError, TypedCellContent, }, - capture_future::{self, CaptureFuture}, + capture_future::CaptureFuture, event::{Event, EventListener}, id::{BackendJobId, ExecutionId, LocalTaskId, TRANSIENT_TASK_BIT, TraitTypeId}, id_factory::IdFactoryWithReuse, @@ -718,7 +716,7 @@ impl TurboTasks { }; async { - let (result, duration, memory_usage) = CaptureFuture::new(future).await; + let (result, duration, alloc_info) = CaptureFuture::new(future).await; // wait for all spawned local tasks using `local` to finish let ltt = CURRENT_TASK_STATE @@ -742,7 +740,7 @@ impl TurboTasks { let schedule_again = this.backend.task_execution_completed( task_id, duration, - memory_usage, + alloc_info.memory_usage(), &cell_counters, stateful, has_invalidator, @@ -1060,27 +1058,30 @@ impl TurboTasks { } pub async fn stop_and_wait(&self) { - self.backend.stopping(self); - self.stopped.store(true, Ordering::Release); - { - let listener = self - .event - .listen_with_note(|| || "wait for stop".to_string()); - if self.currently_scheduled_tasks.load(Ordering::Acquire) != 0 { - listener.await; + turbo_tasks_future_scope(self.pin(), async move { + self.backend.stopping(self); + self.stopped.store(true, Ordering::Release); + { + let listener = self + .event + .listen_with_note(|| || "wait for stop".to_string()); + if self.currently_scheduled_tasks.load(Ordering::Acquire) != 0 { + listener.await; + } } - } - { - let listener = self.event_background.listen(); - if self - .currently_scheduled_background_jobs - .load(Ordering::Acquire) - != 0 { - listener.await; + let listener = self.event_background.listen(); + if self + .currently_scheduled_background_jobs + .load(Ordering::Acquire) + != 0 + { + listener.await; + } } - } - self.backend.stop(self); + self.backend.stop(self); + }) + .await; } #[track_caller] @@ -1677,6 +1678,10 @@ pub fn turbo_tasks() -> Arc { TURBO_TASKS.with(|arc| arc.clone()) } +pub fn try_turbo_tasks() -> Option> { + TURBO_TASKS.try_with(|arc| arc.clone()).ok() +} + pub fn with_turbo_tasks(func: impl FnOnce(&Arc) -> T) -> T { TURBO_TASKS.with(|arc| func(arc)) } @@ -1685,6 +1690,14 @@ pub fn turbo_tasks_scope(tt: Arc, f: impl FnOnce() -> T) - TURBO_TASKS.sync_scope(tt, f) } +pub fn turbo_tasks_try_scope(tt: Option>, f: impl FnOnce() -> T) -> T { + if let Some(tt) = tt { + TURBO_TASKS.sync_scope(tt, f) + } else { + f() + } +} + pub fn turbo_tasks_future_scope( tt: Arc, f: impl Future, @@ -1787,35 +1800,6 @@ pub fn emit(collectible: ResolvedVc) { }) } -pub async fn spawn_blocking(func: impl FnOnce() -> T + Send + 'static) -> T { - let turbo_tasks = turbo_tasks(); - let span = Span::current(); - let (result, duration, alloc_info) = tokio::task::spawn_blocking(|| { - let _guard = span.entered(); - let start = Instant::now(); - let start_allocations = TurboMalloc::allocation_counters(); - let r = turbo_tasks_scope(turbo_tasks, func); - (r, start.elapsed(), start_allocations.until_now()) - }) - .await - .unwrap(); - capture_future::add_duration(duration); - capture_future::add_allocation_info(alloc_info); - result -} - -pub fn spawn_thread(func: impl FnOnce() + Send + 'static) { - let handle = Handle::current(); - let span = info_span!("thread").or_current(); - thread::spawn(move || { - let span = span.entered(); - let guard = handle.enter(); - func(); - drop(guard); - drop(span); - }); -} - pub(crate) async fn read_task_output( this: &dyn TurboTasksApi, id: TaskId, diff --git a/turbopack/crates/turbo-tasks/src/parallel.rs b/turbopack/crates/turbo-tasks/src/parallel.rs new file mode 100644 index 0000000000000..751d3098e5a61 --- /dev/null +++ b/turbopack/crates/turbo-tasks/src/parallel.rs @@ -0,0 +1,535 @@ +//! Parallel for each resp. map running in the current tokio thread pool maintaining turbo tasks and +//! tracing context. +//! +//! This avoid the problem of sleeping threads with mimalloc when using rayon in combination with +//! tokio. It also avoid having multiple thread pools. + +use std::{ + mem::{ManuallyDrop, transmute}, + panic, + sync::{Arc, LazyLock}, + thread::available_parallelism, +}; + +use tokio::{ + runtime::Handle, + task::{JoinHandle, block_in_place}, +}; +use tracing::{Instrument, Span}; + +use crate::{ + TurboTasksApi, + manager::{try_turbo_tasks, turbo_tasks_try_scope}, +}; + +/// Calculates a good chunk size for parallel processing based on the number of available threads. +/// This is used to ensure that the workload is evenly distributed across the threads. +fn good_chunk_size(len: usize) -> usize { + static GOOD_CHUNK_COUNT: LazyLock = + LazyLock::new(|| available_parallelism().map_or(16, |c| c.get() * 4)); + let min_chunk_count = *GOOD_CHUNK_COUNT; + len.div_ceil(min_chunk_count) +} + +/// Context to allow spawning a task with a limited lifetime. +/// +/// ## Safety +/// +/// This context must not be dropped before all tasks spawned with it have been awaited. +struct ProcessInParallelContext<'l, R: Send + 'l> { + results: Box<[Option]>, + index: usize, + handle: Handle, + turbo_tasks: Option>, + span: Span, + phantom: std::marker::PhantomData<&'l ()>, +} + +impl<'l, R: Send + 'l> ProcessInParallelContext<'l, R> { + fn new(len: usize) -> Self { + let mut results = Vec::with_capacity(len); + for _ in 0..len { + results.push(None); + } + Self { + results: results.into_boxed_slice(), + index: 0, + handle: Handle::current(), + turbo_tasks: try_turbo_tasks(), + span: Span::current(), + phantom: std::marker::PhantomData, + } + } + + fn task(&mut self, f: F) -> JoinHandle<()> + where + F: FnOnce() -> R + Send + 'l, + { + struct SendablePtr(*mut Option); + unsafe impl Send for SendablePtr {} + unsafe impl Sync for SendablePtr {} + impl SendablePtr { + fn new(reference: &mut Option) -> Self { + SendablePtr(reference as *mut Option) + } + + unsafe fn get_mut(&mut self) -> &mut Option { + // SAFETY: This is a valid pointer, as we got this pointer from a reference. + unsafe { &mut *self.0 } + } + } + + let mut result_cell = SendablePtr::new(&mut self.results[self.index]); + self.index += 1; + + let f: Box = Box::new(move || { + let result = f(); + // SAFETY: This is a valid pointer, as we got this pointer from a reference. + let result_cell = unsafe { result_cell.get_mut() }; + *result_cell = Some(result); + }); + // SAFETY: In `process_in_parallel` we ensure that the spawned tasks is awaited before the + // lifetime `'l` ends. + let f: Box = unsafe { + transmute::, Box>(f) + }; + let turbo_tasks = self.turbo_tasks.clone(); + let span = self.span.clone(); + self.handle.spawn(async move { + turbo_tasks_try_scope(turbo_tasks, || { + let _guard = span.entered(); + f(); + }) + }) + } + + /// Converts the context into a vector of results + /// + /// ## Safety + /// + /// The caller must ensure that all tasks have been awaited before calling this method. + unsafe fn into_results(self) -> Vec> { + self.results.into_vec() + } +} + +/// Helper method to spawn tasks in parallel, ensuring that all tasks are awaited and errors are +/// handled. Also ensures turbo tasks and tracing context are maintained across the tasks. +/// +/// ## Safety +/// +/// The caller must ensure that all references in `inner` are valid for the lifetime `'l`. +unsafe fn process_in_parallel<'l, I, R>(len: usize, inner: I) -> Vec> +where + R: Send + 'l, + I: FnOnce(&mut ProcessInParallelContext<'l, R>) -> Vec> + 'l, +{ + let mut process_context = ProcessInParallelContext::new(len); + block_in_place(|| { + let tasks = inner(&mut process_context); + process_context.handle.block_on( + async { + let mut first_err = None; + for task in tasks { + match task.await { + Ok(()) => {} + Err(err) if first_err.is_none() => { + // SAFETY: We need to finish all tasks before panicking. + first_err = Some(err); + } + Err(_) => { + // Ignore subsequent errors + } + } + } + if let Some(err) = first_err { + panic::resume_unwind(err.into_panic()); + } + } + .instrument(process_context.span.clone()), + ); + }); + // SAFETY: We ensure that all tasks have been awaited before calling this method. + unsafe { process_context.into_results() } +} + +pub fn for_each<'l, T, F>(items: &'l [T], f: F) +where + T: Sync, + F: Fn(&'l T) + Send + Sync, +{ + let len = items.len(); + if len == 0 { + return; + } + let chunk_size = good_chunk_size(len); + let f = &f; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks(chunk_size) + .map(|chunk| { + ctx.task(move || { + for item in chunk { + f(item); + } + }) + }) + .collect::>() + }) + }; + // SAFETY: Ensure references are kept until here + let _ = items; + let _ = f; +} + +pub fn vec_into_for_each(items: Vec, f: impl Fn(T) + Send + Sync) +where + T: Send + Sync, +{ + let len = items.len(); + if len == 0 { + return; + } + let chunk_size = good_chunk_size(len); + let f = &f; + // SAFETY: transmuting to ManuallyDrop is always safe. We just need to make sure to not leak + // memory. + let mut items = unsafe { transmute::, Vec>>(items) }; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks_mut(chunk_size) + .map(|chunk| { + ctx.task(move || { + // SAFETY: Even when f() panics we drop all items in the chunk. + for item in MapEvenWhenDropped::new(chunk.iter_mut(), |item| { + ManuallyDrop::take(item) + }) { + f(item); + } + }) + }) + .collect::>() + }) + }; + // SAFETY: Ensure references are kept until here + drop(items); + let _ = f; +} + +pub fn try_for_each<'l, T, E>( + items: &'l [T], + f: impl (Fn(&'l T) -> Result<(), E>) + Send + Sync, +) -> Result<(), E> +where + T: Sync, + E: Send + 'static, +{ + let len = items.len(); + if len == 0 { + return Ok(()); // No items to process, return early + } + let chunk_size = good_chunk_size(len); + let f = &f; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + let results = unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks(chunk_size) + .map(|chunk| { + ctx.task(move || { + for item in chunk { + f(item)?; + } + Ok(()) + }) + }) + .collect::>() + }) + }; + let result = results.into_iter().flatten().collect::>(); + // SAFETY: Ensure references are kept until here + let _ = items; + let _ = f; + result +} + +pub fn try_for_each_mut<'l, T, E>( + items: &'l mut [T], + f: impl (Fn(&'l mut T) -> Result<(), E>) + Send + Sync, +) -> Result<(), E> +where + T: Send + Sync, + E: Send + 'static, +{ + let len = items.len(); + if len == 0 { + return Ok(()); // No items to process, return early + } + let chunk_size = good_chunk_size(len); + let f = &f; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + let results = unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks_mut(chunk_size) + .map(|chunk| { + ctx.task(move || { + for item in chunk { + f(item)?; + } + Ok(()) + }) + }) + .collect::>() + }) + }; + let result = results.into_iter().flatten().collect::>(); + // SAFETY: Ensure references are kept until here + let _ = items; + let _ = f; + result +} + +pub fn try_into_for_each( + items: Vec, + f: impl (Fn(T) -> Result<(), E>) + Send + Sync, +) -> Result<(), E> +where + T: Send + Sync, + E: Send + 'static, +{ + let len = items.len(); + if len == 0 { + return Ok(()); // No items to process, return early + } + let chunk_size = good_chunk_size(len); + let f = &f; + // SAFETY: transmuting to ManuallyDrop is always safe. We just need to make sure to not leak + // memory. + let mut items = unsafe { transmute::, Vec>>(items) }; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + let results = unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks_mut(chunk_size) + .map(|chunk| { + ctx.task(move || { + // SAFETY: Even when f() panics we drop all items in the chunk. + for item in MapEvenWhenDropped::new(chunk.iter_mut(), |item| { + ManuallyDrop::take(item) + }) { + f(item)?; + } + Ok(()) + }) + }) + .collect::>() + }) + }; + let result = results.into_iter().flatten().collect::>(); + // SAFETY: Ensure references are kept until here + let _ = items; + let _ = f; + result +} + +pub fn map_collect<'l, T, I, R>(items: &'l [T], f: impl Fn(&'l T) -> I + Send + Sync) -> R +where + T: Sync, + I: Send + Sync + 'l, + R: FromIterator, +{ + let len = items.len(); + if len == 0 { + return R::from_iter(std::iter::empty()); // No items to process, return empty collection + } + let chunk_size = good_chunk_size(len); + let f = &f; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + let results = unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks(chunk_size) + .map(|chunk| ctx.task(move || chunk.iter().map(f).collect::>())) + .collect::>() + }) + }; + let result = results.into_iter().flatten().flatten().collect(); + // SAFETY: Ensure references are kept until here + let _ = items; + let _ = f; + result +} + +pub fn vec_into_map_collect<'l, T, I, R>(items: Vec, f: impl Fn(T) -> I + Send + Sync) -> R +where + T: Send + Sync, + I: Send + Sync + 'l, + R: FromIterator, +{ + let len = items.len(); + if len == 0 { + return R::from_iter(std::iter::empty()); // No items to process, return empty collection; + } + let chunk_size = good_chunk_size(len); + let f = &f; + let mut items = unsafe { transmute::, Vec>>(items) }; + // SAFETY: We ensured that references in the closure are valid for the whole lifetime of this + // function. + let results = unsafe { + process_in_parallel(len.div_ceil(chunk_size), |ctx| { + items + .chunks_mut(chunk_size) + .map(|chunk| { + ctx.task(move || { + // SAFETY: Even when f() panics we drop all items in the chunk. + MapEvenWhenDropped::new(chunk.iter_mut(), |item| ManuallyDrop::take(item)) + .map(f) + .collect::>() + }) + }) + .collect::>() + }) + }; + let result = results.into_iter().flatten().flatten().collect(); + // SAFETY: Ensure references are kept until here + let _ = items; + let _ = f; + result +} + +struct MapEvenWhenDropped +where + I: Iterator, + F: FnMut(I::Item) -> B, +{ + iter: I, + f: F, +} + +impl MapEvenWhenDropped +where + I: Iterator, + F: FnMut(I::Item) -> B, +{ + fn new(iter: I, f: F) -> Self { + Self { iter, f } + } +} + +impl Iterator for MapEvenWhenDropped +where + I: Iterator, + F: FnMut(I::Item) -> B, +{ + type Item = B; + + fn next(&mut self) -> Option { + self.iter.next().map(&mut self.f) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl Drop for MapEvenWhenDropped +where + I: Iterator, + F: FnMut(I::Item) -> B, +{ + fn drop(&mut self) { + // Ensure that the mapping function is called even when the iterator is dropped. + for item in &mut self.iter { + drop((self.f)(item)); + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicI32, Ordering}; + + use super::*; + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_for_each() { + let input = vec![1, 2, 3, 4, 5]; + let sum = AtomicI32::new(0); + for_each(&input, |&x| { + sum.fetch_add(x, Ordering::SeqCst); + }); + assert_eq!(sum.load(Ordering::SeqCst), 15); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_try_for_each() { + let input = vec![1, 2, 3, 4, 5]; + let result = try_for_each(&input, |&x| { + if x % 2 == 0 { + Ok(()) + } else { + Err(format!("Odd number {x} encountered")) + } + }); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "Odd number 1 encountered"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_try_for_each_mut() { + let mut input = vec![1, 2, 3, 4, 5]; + let result = try_for_each_mut(&mut input, |x| { + *x += 10; + if *x % 2 == 0 { + Ok(()) + } else { + Err(format!("Odd number {} encountered", *x)) + } + }); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "Odd number 11 encountered"); + assert_eq!(input, vec![11, 12, 13, 14, 15]); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_vec_into_for_each() { + let input = vec![1, 2, 3, 4, 5]; + let sum = AtomicI32::new(0); + vec_into_for_each(input, |x| { + sum.fetch_add(x, Ordering::SeqCst); + }); + assert_eq!(sum.load(Ordering::SeqCst), 15); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_map_collect() { + let input = vec![1, 2, 3, 4, 5]; + let result: Vec<_> = map_collect(&input, |&x| x * 2); + assert_eq!(result, vec![2, 4, 6, 8, 10]); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_into_map_collect() { + let input = vec![1, 2, 3, 4, 5]; + let result: Vec<_> = vec_into_map_collect(input, |x| x * 2); + assert_eq!(result, vec![2, 4, 6, 8, 10]); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_parallel_vec_into_map_collect_many() { + let input = vec![1; 1000]; + let result: Vec<_> = vec_into_map_collect(input, |x| x * 2); + assert_eq!(result, vec![2; 1000]); + } +} diff --git a/turbopack/crates/turbo-tasks/src/scope.rs b/turbopack/crates/turbo-tasks/src/scope.rs deleted file mode 100644 index bfe5e355df358..0000000000000 --- a/turbopack/crates/turbo-tasks/src/scope.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::sync::Arc; - -use crate::{TurboTasksApi, turbo_tasks, turbo_tasks_scope}; - -/// A wrapper around [`rayon::Scope`] that preserves the [`turbo_tasks_scope`]. -pub struct Scope<'scope, 'a> { - scope: &'a rayon::Scope<'scope>, - handle: tokio::runtime::Handle, - turbo_tasks: Arc, - span: tracing::Span, -} - -impl<'scope> Scope<'scope, '_> { - pub fn spawn(&self, body: Body) - where - Body: FnOnce(&Scope<'scope, '_>) + Send + 'scope, - { - let span = self.span.clone(); - let handle = self.handle.clone(); - let turbo_tasks = self.turbo_tasks.clone(); - self.scope.spawn(|scope| { - let _span = span.clone().entered(); - let _guard = handle.enter(); - turbo_tasks_scope(turbo_tasks.clone(), || { - body(&Scope { - scope, - span, - handle, - turbo_tasks, - }) - }) - }); - } -} - -/// A wrapper around [`rayon::in_place_scope`] that preserves the [`turbo_tasks_scope`]. -pub fn scope<'scope, Op, R>(op: Op) -> R -where - Op: FnOnce(&Scope<'scope, '_>) -> R, -{ - let span = tracing::Span::current(); - let handle = tokio::runtime::Handle::current(); - let turbo_tasks = turbo_tasks(); - rayon::in_place_scope(|scope| { - op(&Scope { - scope, - span, - handle, - turbo_tasks, - }) - }) -} diff --git a/turbopack/crates/turbo-tasks/src/spawn.rs b/turbopack/crates/turbo-tasks/src/spawn.rs new file mode 100644 index 0000000000000..fb0eaf932e919 --- /dev/null +++ b/turbopack/crates/turbo-tasks/src/spawn.rs @@ -0,0 +1,106 @@ +use std::{ + panic::resume_unwind, + pin::Pin, + task::{Context, Poll}, + thread, + time::{Duration, Instant}, +}; + +use anyhow::Result; +use futures::{FutureExt, ready}; +use tokio::runtime::Handle; +use tracing::{Instrument, Span, info_span}; +use turbo_tasks_malloc::{AllocationInfo, TurboMalloc}; + +use crate::{ + TurboTasksPanic, + capture_future::{self, CaptureFuture}, + manager::turbo_tasks_future_scope, + turbo_tasks, turbo_tasks_scope, +}; + +pub struct JoinHandle { + join_handle: tokio::task::JoinHandle<(Result, Duration, AllocationInfo)>, +} + +impl Future for JoinHandle { + type Output = T; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.get_mut(); + match ready!(this.join_handle.poll_unpin(cx)) { + Ok((res, duration, alloc_info)) => { + capture_future::add_duration(duration); + capture_future::add_allocation_info(alloc_info); + match res { + Ok(res) => Poll::Ready(res), + Err(e) => resume_unwind(e.into_panic()), + } + } + Err(e) => resume_unwind(e.into_panic()), + } + } +} + +/// Spawns a future as separate task and returns a JoinHandle which can be used to await the result. +/// The future has access to the current TurboTasks context and runs in the same tracing span. +/// Allocations and cpu time is accounted to the current turbo-tasks function. +pub fn spawn(future: impl Future + Send + 'static) -> JoinHandle { + let turbo_tasks = turbo_tasks(); + let span = Span::current(); + let join_handle = tokio::task::spawn( + turbo_tasks_future_scope(turbo_tasks, CaptureFuture::new(future)).instrument(span), + ); + JoinHandle { join_handle } +} + +/// Spawns a blocking function in a separate task using the blocking pool and returns a JoinHandle +/// which can be used to await the result. The function has access to the current TurboTasks context +/// and runs in the same tracing span. +/// Allocations and cpu time is accounted to the current turbo-tasks function. +pub fn spawn_blocking( + func: impl FnOnce() -> T + Send + 'static, +) -> JoinHandle { + let turbo_tasks = turbo_tasks(); + let span = Span::current(); + let join_handle = tokio::task::spawn_blocking(|| { + let _guard = span.entered(); + let start = Instant::now(); + let start_allocations = TurboMalloc::allocation_counters(); + let r = turbo_tasks_scope(turbo_tasks, func); + (Ok(r), start.elapsed(), start_allocations.until_now()) + }); + JoinHandle { join_handle } +} + +/// Spawns a thread which runs in background. It has access to the current TurboTasks context, but +/// is not accounted towards the current turbo-tasks function. +pub fn spawn_thread(func: impl FnOnce() + Send + 'static) { + let handle = Handle::current(); + let span = info_span!("thread").or_current(); + let turbo_tasks = turbo_tasks(); + thread::spawn(move || { + let _span = span.entered(); + turbo_tasks_scope(turbo_tasks, || { + let _guard = handle.enter(); + func(); + }) + }); +} + +/// Tells the scheduler about blocking work happening in the current thread. +/// It will make sure to allocate extra threads for the pool. +pub fn block_in_place(f: impl FnOnce() -> R + Send) -> R +where + R: Send, +{ + tokio::task::block_in_place(f) +} + +/// Blocks the current thread until the future is resolved. +pub fn block_for_future(future: impl Future + Send) -> T +where + T: Send, +{ + block_in_place(|| Handle::current().block_on(future)) +} diff --git a/turbopack/crates/turbopack/tests/node-file-trace.rs b/turbopack/crates/turbopack/tests/node-file-trace.rs index 4670a3905dddb..8f4b0aa3d00be 100644 --- a/turbopack/crates/turbopack/tests/node-file-trace.rs +++ b/turbopack/crates/turbopack/tests/node-file-trace.rs @@ -272,7 +272,7 @@ fn test_cases() {} #[apply(test_cases)] fn node_file_trace_noop_backing_storage(#[case] input: CaseInput) { - node_file_trace(input, "noop_backing_storage", false, 1, 120, |_| { + node_file_trace(input, "noop_backing_storage", 1, 120, |_| { TurboTasks::new(TurboTasksBackend::new( turbo_tasks_backend::BackendOptions::default(), turbo_tasks_backend::noop_backing_storage(), @@ -282,7 +282,7 @@ fn node_file_trace_noop_backing_storage(#[case] input: CaseInput) { #[apply(test_cases)] fn node_file_trace_persistent(#[case] input: CaseInput) { - node_file_trace(input, "persistent_cache", false, 2, 240, |directory_path| { + node_file_trace(input, "persistent_cache", 2, 240, |directory_path| { TurboTasks::new(TurboTasksBackend::new( turbo_tasks_backend::BackendOptions::default(), turbo_tasks_backend::default_backing_storage( @@ -302,31 +302,18 @@ fn node_file_trace_persistent(#[case] input: CaseInput) { #[cfg(feature = "bench_against_node_nft")] #[apply(test_cases)] -fn bench_against_node_nft_st(#[case] input: CaseInput) { - bench_against_node_nft_inner(input, false); +fn bench_against_node_nft(#[case] input: CaseInput) { + bench_against_node_nft_inner(input); } #[cfg(feature = "bench_against_node_nft")] -#[apply(test_cases)] -fn bench_against_node_nft_mt(#[case] input: CaseInput) { - bench_against_node_nft_inner(input, true); -} - -#[cfg(feature = "bench_against_node_nft")] -fn bench_against_node_nft_inner(input: CaseInput, multi_threaded: bool) { - node_file_trace( - input, - "noop_backing_storage", - multi_threaded, - 1, - 120, - |_| { - TurboTasks::new(TurboTasksBackend::new( - turbo_tasks_backend::BackendOptions::default(), - turbo_tasks_backend::noop_backing_storage(), - )) - }, - ); +fn bench_against_node_nft_inner(input: CaseInput) { + node_file_trace(input, "noop_backing_storage", 1, 120, |_| { + TurboTasks::new(TurboTasksBackend::new( + turbo_tasks_backend::BackendOptions::default(), + turbo_tasks_backend::noop_backing_storage(), + )) + }); } #[turbo_tasks::function(operation)] @@ -401,7 +388,6 @@ fn node_file_trace( expected_stderr, }: CaseInput, mode: &str, - multi_threaded: bool, run_count: i32, timeout_len: u64, create_turbo_tasks: impl Fn(&Path) -> Arc>, @@ -410,15 +396,9 @@ fn node_file_trace( LazyLock::new(|| Arc::new(Mutex::new(Vec::new()))); let r = &mut { - let mut builder = if multi_threaded { - tokio::runtime::Builder::new_multi_thread() - } else { - tokio::runtime::Builder::new_current_thread() - }; + let mut builder = tokio::runtime::Builder::new_multi_thread(); builder.enable_all(); - if !multi_threaded { - builder.max_blocking_threads(20); - } + builder.max_blocking_threads(20); builder.build().unwrap() }; r.block_on(async move { @@ -490,12 +470,7 @@ fn node_file_trace( bench_suites_lock.push(BenchSuite { suite: input .trim_start_matches("node-file-trace/integration/") - .to_string() - + (if multi_threaded { - " (multi-threaded)" - } else { - "" - }), + .to_string(), is_faster, rust_duration, node_duration,