Skip to content

Commit 683ae1a

Browse files
Updates to cargo docs
1 parent 1604535 commit 683ae1a

File tree

3 files changed

+49
-36
lines changed

3 files changed

+49
-36
lines changed

src/bfield.rs

+41-33
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use serde::Serialize;
77

88
use crate::bfield_member::{BFieldLookup, BFieldMember, BFieldVal};
99

10-
/// The struct holding the various bfields
10+
/// The `struct` holding the `BField` primary and secondary bit arrays.
1111
pub struct BField<T> {
1212
members: Vec<BFieldMember<T>>,
1313
read_only: bool,
@@ -18,18 +18,26 @@ unsafe impl<T> Send for BField<T> {}
1818
unsafe impl<T> Sync for BField<T> {}
1919

2020
impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
21-
/// The (complicated) method to create a bfield.
22-
/// The bfield files will be created in `directory` with the given `filename` and the
23-
/// suffixes `(0..n_secondaries).bfd`
24-
/// `size` is the primary bfield size, subsequent bfield sizes will be determined by
25-
/// `secondary_scaledown` and `max_scaledown`.
26-
/// If you set `in_memory` to true, remember to call `persist_to_disk` when it's built to
21+
/// A (rather complex) method for creating a `BField`.
22+
///
23+
/// This will create a series of `BField` bit array files in `directory` with the given `filename` and the
24+
/// suffixes `(0..n_secondaries).bfd`. If you set `in_memory` to true, remember to call `persist_to_disk` once it's built to
2725
/// save it.
28-
/// The params are the following in the paper:
29-
/// `n_hashes` -> k
30-
/// `marker_width` -> v (nu)
31-
/// `n_marker_bits` -> κ (kappa)
32-
/// `secondary_scaledown` -> β (beta)
26+
///
27+
/// The following parameters are required. See the [README.md](https://github.com/onecodex/rust-bfield/)
28+
/// for additional details as well as the
29+
/// [parameter selection notebook](https://github.com/onecodex/rust-bfield/blob/main/docs/notebook/calculate-parameters.ipynb)
30+
/// for helpful guidance in picking optimal parameters.
31+
/// - `size` is the primary `BField` size, subsequent `BField` sizes will be determined
32+
/// by the `secondary_scaledown` and `max_scaledown` parameters
33+
/// - `n_hashes`. The number of hash functions _k_ to use.
34+
/// - `marker_width` or v (nu). The length of the bit-string to use for
35+
/// - `n_marker_bits` or κ (kappa). The number of 1s to set in each v-length bit-string (also its Hamming weight).
36+
/// - `secondary_scaledown` or β (beta). The scaling factor to use for each subsequent `BField` size.
37+
/// - `max_scaledown`. A maximum scaling factor to use for secondary `BField` sizes, since β raised to the power of
38+
/// `n_secondaries` can be impractically/needlessly small.
39+
/// - `n_secondaries`. The number of secondary `BField`s to create.
40+
/// - `in_memory`. Whether to create the `BField` in memory or on disk.
3341
#[allow(clippy::too_many_arguments)]
3442
pub fn create<P>(
3543
directory: P,
@@ -84,7 +92,7 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
8492
})
8593
}
8694

87-
/// Loads the bfield given the path to the "main" db path (eg the one ending with `0.bfd`).
95+
/// Loads the `BField` given the path to the primary array data file (eg the one ending with `0.bfd`).
8896
pub fn load<P: AsRef<Path>>(main_db_path: P, read_only: bool) -> Result<Self, io::Error> {
8997
let mut members = Vec::new();
9098
let mut n = 0;
@@ -126,8 +134,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
126134
Ok(BField { members, read_only })
127135
}
128136

129-
/// Write the current bfields to disk.
130-
/// Only useful if you are creating a bfield in memory
137+
/// Write the current `BField` to disk.
138+
/// Only useful if you are creating a `BField` in memory.
131139
pub fn persist_to_disk(self) -> Result<Self, io::Error> {
132140
let mut members = Vec::with_capacity(self.members.len());
133141
for m in self.members {
@@ -139,32 +147,32 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
139147
})
140148
}
141149

142-
/// Returns (n_hashes, marker_width, n_marker_bits, Vec<size of each member>)
150+
/// Returns `(n_hashes, marker_width, n_marker_bits, Vec<size of each member>)`.
143151
pub fn build_params(&self) -> (u8, u8, u8, Vec<usize>) {
144152
let (_, n_hashes, marker_width, n_marker_bits) = self.members[0].info();
145153
let sizes = self.members.iter().map(|i| i.info().0).collect();
146154
(n_hashes, marker_width, n_marker_bits, sizes)
147155
}
148156

149-
/// Returns the params given at build time to the bfields
157+
/// Returns the params given at build time to the `BField` arrays.
150158
pub fn params(&self) -> &Option<T> {
151159
&self.members[0].params.other
152160
}
153161

154-
/// This doesn't actually update the file, so we can use it to e.g.
155-
/// simulate params on an old legacy file that may not actually have
156-
/// them set.
162+
/// ⚠️ Method for setting parameters without actually updating any files on disk. **Only useful for supporting legacy file formats
163+
/// in which these parameters are not saved.**
157164
pub fn mock_params(&mut self, params: T) {
158165
self.members[0].params.other = Some(params);
159166
}
160167

161-
/// This allows an insert of a value into the b-field after the entire
162-
/// b-field build process has been completed.
163-
///
164-
/// It has the very bad downside of potentially knocking other keys out
165-
/// of the b-field by making them indeterminate (which will make them fall
166-
/// back to the secondaries where they don't exist and thus it'll appear
167-
/// as if they were never inserted to begin with)
168+
/// ⚠️ Method for inserting a value into a `BField`
169+
/// after it has been fully built and finalized.
170+
/// **This method should be used with extreme care**
171+
/// as it does not guarantee that keys are properly propagated
172+
/// to secondary arrays and therefore may make lookups of previously
173+
/// set values return an indeterminate result in the primary array,
174+
/// then causing fallback to the secondary arrays where they were never
175+
/// inserted (and returning a false negative).
168176
pub fn force_insert(&self, key: &[u8], value: BFieldVal) {
169177
debug_assert!(!self.read_only, "Can't insert into read_only bfields");
170178
for secondary in &self.members {
@@ -174,8 +182,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
174182
}
175183
}
176184

177-
/// Insert the given key/value at the given pass
178-
/// Returns whether the value was inserted during this call, eg will return `false` if
185+
/// Insert the given key/value at the given pass (1-indexed `BField` array/member).
186+
/// Returns whether the value was inserted during this call, i.e., will return `false` if
179187
/// the value was already present.
180188
pub fn insert(&self, key: &[u8], value: BFieldVal, pass: usize) -> bool {
181189
debug_assert!(!self.read_only, "Can't insert into read_only bfields");
@@ -195,8 +203,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
195203
true
196204
}
197205

198-
/// Returns the value of the given key if found, None otherwise.
199-
/// If the value is indeterminate, we still return None.
206+
/// Returns the value of the given key if found, `None` otherwise.
207+
/// The current implementation also returns `None` for indeterminate values.
200208
pub fn get(&self, key: &[u8]) -> Option<BFieldVal> {
201209
for secondary in self.members.iter() {
202210
match secondary.get(key) {
@@ -210,8 +218,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
210218
None
211219
}
212220

213-
/// Get the info of each member
214-
/// Returns Vec<(size, n_hashes, marker_width, n_marker_bits)>
221+
/// Get the info of each secondary array (`BFieldMember`) in the `BField`.
222+
/// Returns `Vec<(size, n_hashes, marker_width, n_marker_bits)>`.
215223
pub fn info(&self) -> Vec<(usize, u8, u8, u8)> {
216224
self.members.iter().map(|m| m.info()).collect()
217225
}

src/combinatorial.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ pub fn unrank(marker: u128) -> usize {
6363
value as usize
6464
}
6565

66-
/// (Hopefully) fast implementation of a binomial
66+
/// (Hopefully) fast implementation of a binomial.
6767
///
68-
/// This uses a preset group of equations for k < 8 and then falls back to a
68+
/// This function uses a preset group of equations for k < 8 and then falls back to a
6969
/// multiplicative implementation that tries to prevent overflows while
7070
/// maintaining all results as exact integers.
7171
#[inline]

src/lib.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#![deny(missing_docs)]
22

3-
//! The bfield datastructure, implemented in Rust.
3+
//! The B-field datastructure, implemented in Rust.
44
//! A space-efficient, probabilistic data structure and storage and retrieval method for key-value information.
5+
//! These Rust docs represent some minimal documentation of the crate itself.
6+
//! See the [Github README](https://github.com/onecodex/rust-bfield) for an
7+
//! extensive write-up, including the math and design underpinning the B-field
8+
//! data structure, guidance on B-field parameter selection, as well as usage
9+
//! examples.
510
611
mod bfield;
712
mod bfield_member;

0 commit comments

Comments
 (0)