Optimize hash map operations in the query system

rust-lang-ci · Sep 24, 2023 · 8229536 · 8229536
1 parent a1c7a1c
commit 8229536
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 40 deletions.
diff --git a/Cargo.lock b/Cargo.lock
@@ -3621,6 +3621,7 @@ dependencies = [
  "cfg-if",
  "elsa",
  "ena",
+ "hashbrown 0.14.0",
  "indexmap 2.0.0",
  "itertools",
  "jobserver",
@@ -4349,7 +4350,9 @@ dependencies = [
 name = "rustc_query_system"
 version = "0.0.0"
 dependencies = [
+ "hashbrown 0.14.0",
  "parking_lot 0.12.1",
+ "rustc-hash",
  "rustc-rayon-core",
  "rustc_ast",
  "rustc_data_structures",

diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml
@@ -14,6 +14,11 @@ indexmap = { version = "2.0.0" }
 jobserver_crate = { version = "0.1.13", package = "jobserver" }
 libc = "0.2"
 measureme = "10.0.0"
+hashbrown = { version = "0.14", default-features = false, features = [
+    "raw",
+    "inline-more",
+    "nightly",
+] }
 rustc-rayon-core = { version = "0.5.0", optional = true }
 rustc-rayon = { version = "0.5.0", optional = true }
 rustc_arena = { path = "../rustc_arena" }

diff --git a/compiler/rustc_data_structures/src/marker.rs b/compiler/rustc_data_structures/src/marker.rs
@@ -87,6 +87,7 @@ cfg_if!(
             [std::sync::mpsc::Sender<T> where T: DynSend]
             [std::sync::Arc<T> where T: ?Sized + DynSync + DynSend]
             [std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
+            [hashbrown::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
             [std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
             [std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
             [std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]

diff --git a/compiler/rustc_middle/src/query/plumbing.rs b/compiler/rustc_middle/src/query/plumbing.rs
@@ -524,7 +524,8 @@ macro_rules! define_feedable {
                             &value,
                             hash_result!([$($modifiers)*]),
                         );
-                        cache.complete(key, erased, dep_node_index);
+                        let key_hash = rustc_data_structures::sharded::make_hash(&key);
+                        cache.complete(key, key_hash, erased, dep_node_index);
                     }
                 }
             }

diff --git a/compiler/rustc_query_system/Cargo.toml b/compiler/rustc_query_system/Cargo.toml
@@ -6,7 +6,13 @@ edition = "2021"
 [lib]
 
 [dependencies]
+hashbrown = { version = "0.14", default-features = false, features = [
+    "raw",
+    "inline-more",
+    "nightly",
+] }
 parking_lot = "0.12"
+rustc-hash = "1.1.0"
 rustc_ast = { path = "../rustc_ast" }
 rustc_data_structures = { path = "../rustc_data_structures" }
 rustc_errors = { path = "../rustc_errors" }

diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs
@@ -1,10 +1,10 @@
 use crate::dep_graph::DepNodeIndex;
-
-use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sharded::{self, Sharded};
 use rustc_data_structures::sync::OnceLock;
+use rustc_hash::FxHasher;
 use rustc_index::{Idx, IndexVec};
 use std::fmt::Debug;
+use std::hash::BuildHasherDefault;
 use std::hash::Hash;
 use std::marker::PhantomData;
 
@@ -19,9 +19,9 @@ pub trait QueryCache: Sized {
     type Value: Copy;
 
     /// Checks if the query is already computed and in the cache.
-    fn lookup(&self, key: &Self::Key) -> Option<(Self::Value, DepNodeIndex)>;
+    fn lookup(&self, key: &Self::Key, key_hash: u64) -> Option<(Self::Value, DepNodeIndex)>;
 
-    fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex);
+    fn complete(&self, key: Self::Key, key_hash: u64, value: Self::Value, index: DepNodeIndex);
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
 }
@@ -35,7 +35,7 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<'tcx, V> for DefaultCacheSelecto
 }
 
 pub struct DefaultCache<K, V> {
-    cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
+    cache: Sharded<hashbrown::HashMap<K, (V, DepNodeIndex), BuildHasherDefault<FxHasher>>>,
 }
 
 impl<K, V> Default for DefaultCache<K, V> {
@@ -53,20 +53,20 @@ where
     type Value = V;
 
     #[inline(always)]
-    fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
-        let key_hash = sharded::make_hash(key);
+    fn lookup(&self, key: &K, key_hash: u64) -> Option<(V, DepNodeIndex)> {
         let lock = self.cache.lock_shard_by_hash(key_hash);
         let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
 
         if let Some((_, value)) = result { Some(*value) } else { None }
     }
 
     #[inline]
-    fn complete(&self, key: K, value: V, index: DepNodeIndex) {
-        let mut lock = self.cache.lock_shard_by_value(&key);
-        // We may be overwriting another value. This is all right, since the dep-graph
-        // will check that the fingerprint matches.
-        lock.insert(key, (value, index));
+    fn complete(&self, key: K, key_hash: u64, value: V, index: DepNodeIndex) {
+        let mut lock = self.cache.lock_shard_by_hash(key_hash);
+        // Use the raw table API since we know that the
+        // key does not exist and we already have the hash.
+        lock.raw_table_mut()
+            .insert(key_hash, (key, (value, index)), |(k, _)| sharded::make_hash(k));
     }
 
     fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
@@ -104,12 +104,12 @@ where
     type Value = V;
 
     #[inline(always)]
-    fn lookup(&self, _key: &()) -> Option<(V, DepNodeIndex)> {
+    fn lookup(&self, _key: &(), _key_hash: u64) -> Option<(V, DepNodeIndex)> {
         self.cache.get().copied()
     }
 
     #[inline]
-    fn complete(&self, _key: (), value: V, index: DepNodeIndex) {
+    fn complete(&self, _key: (), _key_hash: u64, value: V, index: DepNodeIndex) {
         self.cache.set((value, index)).ok();
     }
 
@@ -147,13 +147,13 @@ where
     type Value = V;
 
     #[inline(always)]
-    fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
+    fn lookup(&self, key: &K, _key_hash: u64) -> Option<(V, DepNodeIndex)> {
         let lock = self.cache.lock_shard_by_hash(key.index() as u64);
         if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
     }
 
     #[inline]
-    fn complete(&self, key: K, value: V, index: DepNodeIndex) {
+    fn complete(&self, key: K, _key_hash: u64, value: V, index: DepNodeIndex) {
         let mut lock = self.cache.lock_shard_by_hash(key.index() as u64);
         lock.insert(key, (value, index));
     }

diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs
@@ -12,26 +12,27 @@ use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobI
 use crate::query::SerializedDepNodeIndex;
 use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame};
 use crate::HandleCycleError;
+use hashbrown::hash_map::RawEntryMut;
 use rustc_data_structures::fingerprint::Fingerprint;
-use rustc_data_structures::fx::FxHashMap;
-use rustc_data_structures::sharded::Sharded;
+use rustc_data_structures::sharded::{self, Sharded};
 use rustc_data_structures::stack::ensure_sufficient_stack;
 use rustc_data_structures::sync::Lock;
 #[cfg(parallel_compiler)]
 use rustc_data_structures::{cold_path, sync};
 use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed, FatalError};
+use rustc_hash::FxHasher;
 use rustc_span::{Span, DUMMY_SP};
 use std::cell::Cell;
-use std::collections::hash_map::Entry;
 use std::fmt::Debug;
+use std::hash::BuildHasherDefault;
 use std::hash::Hash;
 use std::mem;
 use thin_vec::ThinVec;
 
 use super::QueryConfig;
 
 pub struct QueryState<K> {
-    active: Sharded<FxHashMap<K, QueryResult>>,
+    active: Sharded<hashbrown::HashMap<K, QueryResult, BuildHasherDefault<FxHasher>>>,
 }
 
 /// Indicates the state of a query for a given key in a query map.
@@ -142,7 +143,7 @@ where
 {
     /// Completes the query by updating the query cache with the `result`,
     /// signals the waiter and forgets the JobOwner, so it won't poison the query
-    fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex)
+    fn complete<C>(self, cache: &C, key_hash: u64, result: C::Value, dep_node_index: DepNodeIndex)
     where
         C: QueryCache<Key = K>,
     {
@@ -154,13 +155,17 @@ where
 
         // Mark as complete before we remove the job from the active state
         // so no other thread can re-execute this query.
-        cache.complete(key, result, dep_node_index);
+        cache.complete(key, key_hash, result, dep_node_index);
 
         let job = {
-            let mut lock = state.active.lock_shard_by_value(&key);
-            match lock.remove(&key).unwrap() {
-                QueryResult::Started(job) => job,
-                QueryResult::Poisoned => panic!(),
+            let mut lock = state.active.lock_shard_by_hash(key_hash);
+
+            match lock.raw_entry_mut().from_key_hashed_nocheck(key_hash, &key) {
+                RawEntryMut::Vacant(_) => panic!(),
+                RawEntryMut::Occupied(occupied) => match occupied.remove() {
+                    QueryResult::Started(job) => job,
+                    QueryResult::Poisoned => panic!(),
+                },
             }
         };
 
@@ -209,7 +214,8 @@ where
     C: QueryCache,
     Tcx: DepContext,
 {
-    match cache.lookup(&key) {
+    let key_hash = sharded::make_hash(key);
+    match cache.lookup(&key, key_hash) {
         Some((value, index)) => {
             tcx.profiler().query_cache_hit(index.into());
             tcx.dep_graph().read_index(index);
@@ -246,6 +252,7 @@ fn wait_for_query<Q, Qcx>(
     qcx: Qcx,
     span: Span,
     key: Q::Key,
+    key_hash: u64,
     latch: QueryLatch,
     current: Option<QueryJobId>,
 ) -> (Q::Value, Option<DepNodeIndex>)
@@ -264,7 +271,7 @@ where
 
     match result {
         Ok(()) => {
-            let Some((v, index)) = query.query_cache(qcx).lookup(&key) else {
+            let Some((v, index)) = query.query_cache(qcx).lookup(&key, key_hash) else {
                 cold_path(|| {
                     // We didn't find the query result in the query cache. Check if it was
                     // poisoned due to a panic instead.
@@ -301,7 +308,8 @@ where
     Qcx: QueryContext,
 {
     let state = query.query_state(qcx);
-    let mut state_lock = state.active.lock_shard_by_value(&key);
+    let key_hash = sharded::make_hash(&key);
+    let mut state_lock = state.active.lock_shard_by_hash(key_hash);
 
     // For the parallel compiler we need to check both the query cache and query state structures
     // while holding the state lock to ensure that 1) the query has not yet completed and 2) the
@@ -310,28 +318,28 @@ where
     // executing, but another thread may have already completed the query and stores it result
     // in the query cache.
     if cfg!(parallel_compiler) && qcx.dep_context().sess().threads() > 1 {
-        if let Some((value, index)) = query.query_cache(qcx).lookup(&key) {
+        if let Some((value, index)) = query.query_cache(qcx).lookup(&key, key_hash) {
             qcx.dep_context().profiler().query_cache_hit(index.into());
             return (value, Some(index));
         }
     }
 
     let current_job_id = qcx.current_query_job();
 
-    match state_lock.entry(key) {
-        Entry::Vacant(entry) => {
+    match state_lock.raw_entry_mut().from_key_hashed_nocheck(key_hash, &key) {
+        RawEntryMut::Vacant(entry) => {
             // Nothing has computed or is computing the query, so we start a new job and insert it in the
             // state map.
             let id = qcx.next_job_id();
             let job = QueryJob::new(id, span, current_job_id);
-            entry.insert(QueryResult::Started(job));
+            entry.insert_hashed_nocheck(key_hash, key, QueryResult::Started(job));
 
             // Drop the lock before we start executing the query
             drop(state_lock);
 
-            execute_job::<_, _, INCR>(query, qcx, state, key, id, dep_node)
+            execute_job::<_, _, INCR>(query, qcx, state, key, key_hash, id, dep_node)
         }
-        Entry::Occupied(mut entry) => {
+        RawEntryMut::Occupied(mut entry) => {
             match entry.get_mut() {
                 QueryResult::Started(job) => {
                     #[cfg(parallel_compiler)]
@@ -342,7 +350,15 @@ where
 
                         // Only call `wait_for_query` if we're using a Rayon thread pool
                         // as it will attempt to mark the worker thread as blocked.
-                        return wait_for_query(query, qcx, span, key, latch, current_job_id);
+                        return wait_for_query(
+                            query,
+                            qcx,
+                            span,
+                            key,
+                            key_hash,
+                            latch,
+                            current_job_id,
+                        );
                     }
 
                     let id = job.id;
@@ -364,6 +380,7 @@ fn execute_job<Q, Qcx, const INCR: bool>(
     qcx: Qcx,
     state: &QueryState<Q::Key>,
     key: Q::Key,
+    key_hash: u64,
     id: QueryJobId,
     dep_node: Option<DepNode>,
 ) -> (Q::Value, Option<DepNodeIndex>)
@@ -395,7 +412,7 @@ where
         // This can't happen, as query feeding adds the very dependencies to the fed query
         // as its feeding query had. So if the fed query is red, so is its feeder, which will
         // get evaluated first, and re-feed the query.
-        if let Some((cached_result, _)) = cache.lookup(&key) {
+        if let Some((cached_result, _)) = cache.lookup(&key, key_hash) {
             let Some(hasher) = query.hash_result() else {
                 panic!(
                     "no_hash fed query later has its value computed.\n\
@@ -427,7 +444,7 @@ where
             }
         }
     }
-    job_owner.complete(cache, result, dep_node_index);
+    job_owner.complete(cache, key_hash, result, dep_node_index);
 
     (result, Some(dep_node_index))
 }
@@ -826,7 +843,7 @@ where
 {
     // We may be concurrently trying both execute and force a query.
     // Ensure that only one of them runs the query.
-    if let Some((_, index)) = query.query_cache(qcx).lookup(&key) {
+    if let Some((_, index)) = query.query_cache(qcx).lookup(&key, sharded::make_hash(&key)) {
         qcx.dep_context().profiler().query_cache_hit(index.into());
         return;
     }