Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: faster IndexedCrate::new with rayon #418

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ jobs:
- name: test
run: cargo test

- name: compile with rayon
run: cargo test --no-run --features rayon

- name: test with rayon
run: cargo test --features rayon

publish:
name: Publish to crates.io
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,16 @@ readme = "./README.md"

[dependencies]
trustfall = "0.7.1"
rayon = { version = "1.10.0", optional = true }
rustdoc-types = "0.29.1"

[features]
default = []
# Enable rayon for speeding up building `IndexedCrate`, this massively improves
# performance on big crates, but may impact performance on small crates and
# definitely increases the memory usage
rayon = ["dep:rayon"]

[[bench]]
name = "indexed_crate"
harness = false
Expand Down
96 changes: 81 additions & 15 deletions src/indexed_crate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use std::{
collections::{hash_map::Entry, HashMap, HashSet},
};

#[cfg(feature = "rayon")]
use rayon::prelude::*;
use rustdoc_types::{Crate, Id, Item};

use crate::{adapter::supported_item_kind, sealed_trait, visibility_tracker::VisibilityTracker};
Expand Down Expand Up @@ -47,6 +49,29 @@ pub struct IndexedCrate<'a> {
/// many values.
struct MapList<K, V>(HashMap<K, Vec<V>>);

#[cfg(feature = "rayon")]
impl<K: std::cmp::Eq + std::hash::Hash + Send, V: Send> FromParallelIterator<(K, V)>
for MapList<K, V>
{
#[inline]
fn from_par_iter<I>(par_iter: I) -> Self
where
I: IntoParallelIterator<Item = (K, V)>,
{
par_iter
.into_par_iter()
.fold(Self::new, |mut map, (key, value)| {
map.insert(key, value);
map
})
// Reduce left is faster than reduce right (about 19% less time in our benchmarks)
.reduce(Self::new, |mut l, r| {
l.merge(r);
l
})
}
}

impl<K: std::cmp::Eq + std::hash::Hash, V> FromIterator<(K, V)> for MapList<K, V> {
#[inline]
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
Expand Down Expand Up @@ -91,13 +116,36 @@ impl<K: std::cmp::Eq + std::hash::Hash, V> MapList<K, V> {
}
}
}

#[inline]
#[cfg(feature = "rayon")]
pub fn insert_many(&mut self, key: K, mut value: Vec<V>) {
match self.0.entry(key) {
Entry::Occupied(mut entry) => entry.get_mut().append(&mut value),
Entry::Vacant(entry) => {
entry.insert(value);
}
}
}

#[inline]
#[cfg(feature = "rayon")]
pub fn merge(&mut self, other: Self) {
self.0.reserve(other.0.len());
for (key, value) in other.0 {
self.insert_many(key, value);
}
}
}

/// Build the impl index
///
/// When compiled using the `rayon` feature, build it in parallel. Specifically, this paralelizes
/// the work of gathering all of the impls for the items in the index.
fn build_impl_index(index: &HashMap<Id, Item>) -> MapList<ImplEntry<'_>, (&Item, &Item)> {
#[cfg(feature = "rayon")]
let iter = index.par_iter();
#[cfg(not(feature = "rayon"))]
let iter = index.iter();
iter.filter_map(|(id, item)| {
let impls = match &item.inner {
Expand All @@ -107,6 +155,9 @@ fn build_impl_index(index: &HashMap<Id, Item>) -> MapList<ImplEntry<'_>, (&Item,
_ => return None,
};

#[cfg(feature = "rayon")]
let iter = impls.par_iter();
#[cfg(not(feature = "rayon"))]
let iter = impls.iter();

Some((id, iter.filter_map(|impl_id| index.get(impl_id))))
Expand Down Expand Up @@ -136,6 +187,9 @@ fn build_impl_index(index: &HashMap<Id, Item>) -> MapList<ImplEntry<'_>, (&Item,
})
.unwrap_or(&[]);

#[cfg(feature = "rayon")]
let trait_items = trait_items.par_iter();
#[cfg(not(feature = "rayon"))]
let trait_items = trait_items.iter();

let trait_provided_items = trait_items
Expand All @@ -159,6 +213,9 @@ fn build_impl_index(index: &HashMap<Id, Item>) -> MapList<ImplEntry<'_>, (&Item,
)
});

#[cfg(feature = "rayon")]
let impl_items = impl_inner.items.par_iter();
#[cfg(not(feature = "rayon"))]
let impl_items = impl_inner.items.iter();

impl_items
Expand Down Expand Up @@ -187,23 +244,32 @@ impl<'a> IndexedCrate<'a> {
//
// This is inlined because we need access to `value`, but `value` is not a valid
// `IndexedCrate` yet. Do not extract into a separate function.
#[cfg(feature = "rayon")]
let iter = crate_.index.par_iter();
#[cfg(not(feature = "rayon"))]
let iter = crate_.index.iter();

value.imports_index = Some(
crate_
.index
.iter()
.filter_map(|(_id, item)| {
if !supported_item_kind(item) {
return None;
}
let importable_paths = value.publicly_importable_names(&item.id);
Some(importable_paths.into_iter().map(move |importable_path| {
(importable_path.path, (item, importable_path.modifiers))
}))
})
.flatten()
.collect::<MapList<_, _>>()
.into_inner(),
iter.filter_map(|(_id, item)| {
if !supported_item_kind(item) {
return None;
}
let importable_paths = value.publicly_importable_names(&item.id);

#[cfg(feature = "rayon")]
let iter = importable_paths.into_par_iter();
#[cfg(not(feature = "rayon"))]
let iter = importable_paths.into_iter();

Some(iter.map(move |importable_path| {
(importable_path.path, (item, importable_path.modifiers))
}))
})
.flatten()
.collect::<MapList<_, _>>()
.into_inner(),
);

value.impl_index = Some(build_impl_index(&crate_.index).into_inner());

value
Expand Down