From 4afdeaaabd021bf5ac03d74c7577747ccbb926d0 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Tue, 16 Mar 2021 21:39:03 +0100 Subject: [PATCH 1/4] Mmap the incremental data instead of reading it. --- .../rustc_incremental/src/persist/file_format.rs | 12 +++++++----- compiler/rustc_incremental/src/persist/load.rs | 3 ++- compiler/rustc_middle/src/ty/context.rs | 3 ++- compiler/rustc_query_impl/src/on_disk_cache.rs | 15 ++++++++++----- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index b821ed6cff9f8..501f6bdb9cffe 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -14,6 +14,7 @@ use std::fs; use std::io::{self, Read}; use std::path::Path; +use rustc_data_structures::memmap::Mmap; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; use rustc_serialize::Encoder; @@ -54,14 +55,15 @@ pub fn read_file( report_incremental_info: bool, path: &Path, nightly_build: bool, -) -> io::Result, usize)>> { - let data = match fs::read(path) { - Ok(data) => data, +) -> io::Result> { + let file = match fs::File::open(path) { + Ok(file) => file, Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err), }; + let mmap = unsafe { Mmap::map(file) }?; - let mut file = io::Cursor::new(data); + let mut file = io::Cursor::new(&*mmap); // Check FILE_MAGIC { @@ -103,7 +105,7 @@ pub fn read_file( } let post_header_start_pos = file.position() as usize; - Ok(Some((file.into_inner(), post_header_start_pos))) + Ok(Some((mmap, post_header_start_pos))) } fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) { diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 437d5596447d7..4d38556e5d214 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -1,6 +1,7 @@ //! Code to save/load the dep-graph from files. use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::memmap::Mmap; use rustc_middle::dep_graph::{SerializedDepGraph, WorkProduct, WorkProductId}; use rustc_middle::ty::OnDiskCache; use rustc_serialize::opaque::Decoder; @@ -48,7 +49,7 @@ fn load_data( report_incremental_info: bool, path: &Path, nightly_build: bool, -) -> LoadResult<(Vec, usize)> { +) -> LoadResult<(Mmap, usize)> { match file_format::read_file(report_incremental_info, path, nightly_build) { Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos }, Ok(None) => { diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 89542a1ebabe2..08990cb7baf98 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -27,6 +27,7 @@ use crate::ty::{ use rustc_ast as ast; use rustc_attr as attr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::memmap::Mmap; use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; @@ -71,7 +72,7 @@ use std::sync::Arc; pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync { /// Creates a new `OnDiskCache` instance from the serialized data in `data`. - fn new(sess: &'tcx Session, data: Vec, start_pos: usize) -> Self + fn new(sess: &'tcx Session, data: Mmap, start_pos: usize) -> Self where Self: Sized; diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs index ee64f22618e7e..e696df85b888f 100644 --- a/compiler/rustc_query_impl/src/on_disk_cache.rs +++ b/compiler/rustc_query_impl/src/on_disk_cache.rs @@ -1,5 +1,6 @@ use crate::QueryCtxt; use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet}; +use rustc_data_structures::memmap::Mmap; use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell}; use rustc_data_structures::unhash::UnhashMap; use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE}; @@ -42,7 +43,7 @@ const TAG_EXPN_DATA: u8 = 1; /// any side effects that have been emitted during a query. pub struct OnDiskCache<'sess> { // The complete cache data in serialized form. - serialized_data: Vec, + serialized_data: Option, // Collects all `QuerySideEffects` created during the current compilation // session. @@ -182,7 +183,8 @@ impl EncodedSourceFileId { } impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { - fn new(sess: &'sess Session, data: Vec, start_pos: usize) -> Self { + /// Creates a new `OnDiskCache` instance from the serialized data in `data`. + fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self { debug_assert!(sess.opts.incremental.is_some()); // Wrap in a scope so we can borrow `data`. @@ -204,7 +206,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { }; Self { - serialized_data: data, + serialized_data: Some(data), file_index_to_stable_id: footer.file_index_to_stable_id, file_index_to_file: Default::default(), cnum_map: OnceCell::new(), @@ -225,7 +227,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { fn new_empty(source_map: &'sess SourceMap) -> Self { Self { - serialized_data: Vec::new(), + serialized_data: None, file_index_to_stable_id: Default::default(), file_index_to_file: Default::default(), cnum_map: OnceCell::new(), @@ -577,7 +579,10 @@ impl<'sess> OnDiskCache<'sess> { let mut decoder = CacheDecoder { tcx, - opaque: opaque::Decoder::new(&self.serialized_data[..], pos.to_usize()), + opaque: opaque::Decoder::new( + self.serialized_data.as_deref().unwrap_or(&[]), + pos.to_usize(), + ), source_map: self.source_map, cnum_map, file_index_to_file: &self.file_index_to_file, From 6b47e1ece87f8cb96709b772dbea1a2a979c1cbd Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Wed, 17 Mar 2021 18:31:21 +0100 Subject: [PATCH 2/4] Move save_in to file_format. --- .../src/persist/file_format.rs | 57 ++++++++++++++++- .../rustc_incremental/src/persist/save.rs | 62 ++----------------- 2 files changed, 61 insertions(+), 58 deletions(-) diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index 501f6bdb9cffe..2da72bfc2927d 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -12,11 +12,12 @@ use std::env; use std::fs; use std::io::{self, Read}; -use std::path::Path; +use std::path::{Path, PathBuf}; use rustc_data_structures::memmap::Mmap; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; use rustc_serialize::Encoder; +use rustc_session::Session; /// The first few bytes of files generated by incremental compilation. const FILE_MAGIC: &[u8] = b"RSIC"; @@ -29,7 +30,7 @@ const HEADER_FORMAT_VERSION: u16 = 0; /// the Git commit hash. const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION"); -pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult { +pub(crate) fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult { stream.emit_raw_bytes(FILE_MAGIC)?; stream.emit_raw_bytes(&[ (HEADER_FORMAT_VERSION >> 0) as u8, @@ -42,6 +43,58 @@ pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileE stream.emit_raw_bytes(rustc_version.as_bytes()) } +pub(crate) fn save_in(sess: &Session, path_buf: PathBuf, name: &str, encode: F) +where + F: FnOnce(&mut FileEncoder) -> FileEncodeResult, +{ + debug!("save: storing data in {}", path_buf.display()); + + // Delete the old file, if any. + // Note: It's important that we actually delete the old file and not just + // truncate and overwrite it, since it might be a shared hard-link, the + // underlying data of which we don't want to modify + match fs::remove_file(&path_buf) { + Ok(()) => { + debug!("save: remove old file"); + } + Err(err) if err.kind() == io::ErrorKind::NotFound => (), + Err(err) => { + sess.err(&format!( + "unable to delete old {} at `{}`: {}", + name, + path_buf.display(), + err + )); + return; + } + } + + let mut encoder = match FileEncoder::new(&path_buf) { + Ok(encoder) => encoder, + Err(err) => { + sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err)); + return; + } + }; + + if let Err(err) = write_file_header(&mut encoder, sess.is_nightly_build()) { + sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err)); + return; + } + + if let Err(err) = encode(&mut encoder) { + sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err)); + return; + } + + if let Err(err) = encoder.flush() { + sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err)); + return; + } + + debug!("save: data written to disk successfully"); +} + /// Reads the contents of a file with a file header as defined in this module. /// /// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs index a8455854ebb5f..2feba71e010d3 100644 --- a/compiler/rustc_incremental/src/persist/save.rs +++ b/compiler/rustc_incremental/src/persist/save.rs @@ -6,8 +6,6 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; use rustc_serialize::Encodable as RustcEncodable; use rustc_session::Session; use std::fs; -use std::io; -use std::path::PathBuf; use super::data::*; use super::dirty_clean; @@ -44,7 +42,9 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) { join( move || { sess.time("incr_comp_persist_result_cache", || { - save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e)); + file_format::save_in(sess, query_cache_path, "query cache", |e| { + encode_query_cache(tcx, e) + }); }); }, move || { @@ -86,7 +86,9 @@ pub fn save_work_product_index( debug!("save_work_product_index()"); dep_graph.assert_ignored(); let path = work_products_path(sess); - save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e)); + file_format::save_in(sess, path, "work product index", |e| { + encode_work_product_index(&new_work_products, e) + }); // We also need to clean out old work-products, as not all of them are // deleted during invalidation. Some object files don't change their @@ -113,58 +115,6 @@ pub fn save_work_product_index( }); } -pub(crate) fn save_in(sess: &Session, path_buf: PathBuf, name: &str, encode: F) -where - F: FnOnce(&mut FileEncoder) -> FileEncodeResult, -{ - debug!("save: storing data in {}", path_buf.display()); - - // Delete the old file, if any. - // Note: It's important that we actually delete the old file and not just - // truncate and overwrite it, since it might be a shared hard-link, the - // underlying data of which we don't want to modify - match fs::remove_file(&path_buf) { - Ok(()) => { - debug!("save: remove old file"); - } - Err(err) if err.kind() == io::ErrorKind::NotFound => (), - Err(err) => { - sess.err(&format!( - "unable to delete old {} at `{}`: {}", - name, - path_buf.display(), - err - )); - return; - } - } - - let mut encoder = match FileEncoder::new(&path_buf) { - Ok(encoder) => encoder, - Err(err) => { - sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err)); - return; - } - }; - - if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) { - sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err)); - return; - } - - if let Err(err) = encode(&mut encoder) { - sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err)); - return; - } - - if let Err(err) = encoder.flush() { - sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err)); - return; - } - - debug!("save: data written to disk successfully"); -} - fn encode_work_product_index( work_products: &FxHashMap, encoder: &mut FileEncoder, From 98007e2ce6f3731a935b0541d6ef63f292ef7ab9 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Wed, 17 Mar 2021 22:49:16 +0100 Subject: [PATCH 3/4] Drop the query result memmap before serializing it back. --- .../src/persist/file_format.rs | 11 +++- .../rustc_incremental/src/persist/save.rs | 5 ++ compiler/rustc_middle/src/ty/context.rs | 2 + .../rustc_query_impl/src/on_disk_cache.rs | 52 ++++++++++--------- 4 files changed, 44 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index 2da72bfc2927d..572a4fc697173 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -52,7 +52,10 @@ where // Delete the old file, if any. // Note: It's important that we actually delete the old file and not just // truncate and overwrite it, since it might be a shared hard-link, the - // underlying data of which we don't want to modify + // underlying data of which we don't want to modify. + // + // We have to ensure we have dropped the memory maps to this file + // before performing this removal. match fs::remove_file(&path_buf) { Ok(()) => { debug!("save: remove old file"); @@ -114,6 +117,12 @@ pub fn read_file( Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), Err(err) => return Err(err), }; + // SAFETY: This process must not modify nor remove the backing file while the memory map lives. + // For the dep-graph and the work product index, it is as soon as the decoding is done. + // For the query result cache, the memory map is dropped in save_dep_graph before calling + // save_in and trying to remove the backing file. + // + // There is no way to prevent another process from modifying this file. let mmap = unsafe { Mmap::map(file) }?; let mut file = io::Cursor::new(&*mmap); diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs index 2feba71e010d3..6c683058b12d6 100644 --- a/compiler/rustc_incremental/src/persist/save.rs +++ b/compiler/rustc_incremental/src/persist/save.rs @@ -42,6 +42,11 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) { join( move || { sess.time("incr_comp_persist_result_cache", || { + // Drop the memory map so that we can remove the file and write to it. + if let Some(odc) = &tcx.on_disk_cache { + odc.drop_serialized_data(tcx); + } + file_format::save_in(sess, query_cache_path, "query cache", |e| { encode_query_cache(tcx, e) }); diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 08990cb7baf98..dae82d2438ade 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -101,6 +101,8 @@ pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync { fn register_reused_dep_node(&self, tcx: TyCtxt<'tcx>, dep_node: &DepNode); fn store_foreign_def_id_hash(&self, def_id: DefId, hash: DefPathHash); + fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>); + fn serialize(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult; } diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs index e696df85b888f..c197962fabbef 100644 --- a/compiler/rustc_query_impl/src/on_disk_cache.rs +++ b/compiler/rustc_query_impl/src/on_disk_cache.rs @@ -1,7 +1,7 @@ use crate::QueryCtxt; use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet}; use rustc_data_structures::memmap::Mmap; -use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell}; +use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell, RwLock}; use rustc_data_structures::unhash::UnhashMap; use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE}; use rustc_hir::definitions::DefPathHash; @@ -43,7 +43,7 @@ const TAG_EXPN_DATA: u8 = 1; /// any side effects that have been emitted during a query. pub struct OnDiskCache<'sess> { // The complete cache data in serialized form. - serialized_data: Option, + serialized_data: RwLock>, // Collects all `QuerySideEffects` created during the current compilation // session. @@ -206,7 +206,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { }; Self { - serialized_data: Some(data), + serialized_data: RwLock::new(Some(data)), file_index_to_stable_id: footer.file_index_to_stable_id, file_index_to_file: Default::default(), cnum_map: OnceCell::new(), @@ -227,7 +227,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { fn new_empty(source_map: &'sess SourceMap) -> Self { Self { - serialized_data: None, + serialized_data: RwLock::new(None), file_index_to_stable_id: Default::default(), file_index_to_file: Default::default(), cnum_map: OnceCell::new(), @@ -246,7 +246,26 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { } } - fn serialize(&self, tcx: TyCtxt<'sess>, encoder: &mut FileEncoder) -> FileEncodeResult { + fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>) { + // Register any dep nodes that we reused from the previous session, + // but didn't `DepNode::construct` in this session. This ensures + // that their `DefPathHash` to `RawDefId` mappings are registered + // in 'latest_foreign_def_path_hashes' if necessary, since that + // normally happens in `DepNode::construct`. + tcx.dep_graph.register_reused_dep_nodes(tcx); + + // Load everything into memory so we can write it out to the on-disk + // cache. The vast majority of cacheable query results should already + // be in memory, so this should be a cheap operation. + // Do this *before* we clone 'latest_foreign_def_path_hashes', since + // loading existing queries may cause us to create new DepNodes, which + // may in turn end up invoking `store_foreign_def_id_hash` + tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx)); + + *self.serialized_data.write() = None; + } + + fn serialize<'tcx>(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult { // Serializing the `DepGraph` should not modify it. tcx.dep_graph.with_ignore(|| { // Allocate `SourceFileIndex`es. @@ -268,21 +287,6 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { (file_to_file_index, file_index_to_stable_id) }; - // Register any dep nodes that we reused from the previous session, - // but didn't `DepNode::construct` in this session. This ensures - // that their `DefPathHash` to `RawDefId` mappings are registered - // in 'latest_foreign_def_path_hashes' if necessary, since that - // normally happens in `DepNode::construct`. - tcx.dep_graph.register_reused_dep_nodes(tcx); - - // Load everything into memory so we can write it out to the on-disk - // cache. The vast majority of cacheable query results should already - // be in memory, so this should be a cheap operation. - // Do this *before* we clone 'latest_foreign_def_path_hashes', since - // loading existing queries may cause us to create new DepNodes, which - // may in turn end up invoking `store_foreign_def_id_hash` - tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx)); - let latest_foreign_def_path_hashes = self.latest_foreign_def_path_hashes.lock().clone(); let hygiene_encode_context = HygieneEncodeContext::default(); @@ -566,7 +570,7 @@ impl<'sess> OnDiskCache<'sess> { }) } - fn with_decoder<'a, 'tcx, T, F: FnOnce(&mut CacheDecoder<'sess, 'tcx>) -> T>( + fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T>( &'sess self, tcx: TyCtxt<'tcx>, pos: AbsoluteBytePos, @@ -577,12 +581,10 @@ impl<'sess> OnDiskCache<'sess> { { let cnum_map = self.cnum_map.get_or_init(|| Self::compute_cnum_map(tcx)); + let serialized_data = self.serialized_data.read(); let mut decoder = CacheDecoder { tcx, - opaque: opaque::Decoder::new( - self.serialized_data.as_deref().unwrap_or(&[]), - pos.to_usize(), - ), + opaque: opaque::Decoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()), source_map: self.source_map, cnum_map, file_index_to_file: &self.file_index_to_file, From bcefd487c380b113d81ac066ea9b3b4b65e9efe7 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 28 Aug 2021 21:49:51 +0200 Subject: [PATCH 4/4] Comment drop_serialized_data. --- compiler/rustc_query_impl/src/on_disk_cache.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs index c197962fabbef..5c2803c67e73f 100644 --- a/compiler/rustc_query_impl/src/on_disk_cache.rs +++ b/compiler/rustc_query_impl/src/on_disk_cache.rs @@ -246,6 +246,11 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> { } } + /// Execute all cache promotions and release the serialized backing Mmap. + /// + /// Cache promotions require invoking queries, which needs to read the serialized data. + /// In order to serialize the new on-disk cache, the former on-disk cache file needs to be + /// deleted, hence we won't be able to refer to its memmapped data. fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>) { // Register any dep nodes that we reused from the previous session, // but didn't `DepNode::construct` in this session. This ensures