@@ -7,7 +7,7 @@ use serde::Serialize;
7
7
8
8
use crate :: bfield_member:: { BFieldLookup , BFieldMember , BFieldVal } ;
9
9
10
- /// The struct holding the various bfields
10
+ /// The ` struct` holding the `BField` primary and secondary bit arrays.
11
11
pub struct BField < T > {
12
12
members : Vec < BFieldMember < T > > ,
13
13
read_only : bool ,
@@ -18,18 +18,26 @@ unsafe impl<T> Send for BField<T> {}
18
18
unsafe impl < T > Sync for BField < T > { }
19
19
20
20
impl < T : Clone + DeserializeOwned + Serialize > BField < T > {
21
- /// The (complicated) method to create a bfield.
22
- /// The bfield files will be created in `directory` with the given `filename` and the
23
- /// suffixes `(0..n_secondaries).bfd`
24
- /// `size` is the primary bfield size, subsequent bfield sizes will be determined by
25
- /// `secondary_scaledown` and `max_scaledown`.
26
- /// If you set `in_memory` to true, remember to call `persist_to_disk` when it's built to
21
+ /// A (rather complex) method for creating a `BField`.
22
+ ///
23
+ /// This will create a series of `BField` bit array files in `directory` with the given `filename` and the
24
+ /// suffixes `(0..n_secondaries).bfd`. If you set `in_memory` to true, remember to call `persist_to_disk` once it's built to
27
25
/// save it.
28
- /// The params are the following in the paper:
29
- /// `n_hashes` -> k
30
- /// `marker_width` -> v (nu)
31
- /// `n_marker_bits` -> κ (kappa)
32
- /// `secondary_scaledown` -> β (beta)
26
+ ///
27
+ /// The following parameters are required. See the [README.md](https://github.com/onecodex/rust-bfield/)
28
+ /// for additional details as well as the
29
+ /// [parameter selection notebook](https://github.com/onecodex/rust-bfield/blob/main/docs/notebook/calculate-parameters.ipynb)
30
+ /// for helpful guidance in picking optimal parameters.
31
+ /// - `size` is the primary `BField` size, subsequent `BField` sizes will be determined
32
+ /// by the `secondary_scaledown` and `max_scaledown` parameters
33
+ /// - `n_hashes`. The number of hash functions _k_ to use.
34
+ /// - `marker_width` or v (nu). The length of the bit-string to use for
35
+ /// - `n_marker_bits` or κ (kappa). The number of 1s to set in each v-length bit-string (also its Hamming weight).
36
+ /// - `secondary_scaledown` or β (beta). The scaling factor to use for each subsequent `BField` size.
37
+ /// - `max_scaledown`. A maximum scaling factor to use for secondary `BField` sizes, since β raised to the power of
38
+ /// `n_secondaries` can be impractically/needlessly small.
39
+ /// - `n_secondaries`. The number of secondary `BField`s to create.
40
+ /// - `in_memory`. Whether to create the `BField` in memory or on disk.
33
41
#[ allow( clippy:: too_many_arguments) ]
34
42
pub fn create < P > (
35
43
directory : P ,
@@ -84,7 +92,7 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
84
92
} )
85
93
}
86
94
87
- /// Loads the bfield given the path to the "main" db path (eg the one ending with `0.bfd`).
95
+ /// Loads the `BField` given the path to the primary array data file (eg the one ending with `0.bfd`).
88
96
pub fn load < P : AsRef < Path > > ( main_db_path : P , read_only : bool ) -> Result < Self , io:: Error > {
89
97
let mut members = Vec :: new ( ) ;
90
98
let mut n = 0 ;
@@ -126,8 +134,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
126
134
Ok ( BField { members, read_only } )
127
135
}
128
136
129
- /// Write the current bfields to disk.
130
- /// Only useful if you are creating a bfield in memory
137
+ /// Write the current `BField` to disk.
138
+ /// Only useful if you are creating a `BField` in memory.
131
139
pub fn persist_to_disk ( self ) -> Result < Self , io:: Error > {
132
140
let mut members = Vec :: with_capacity ( self . members . len ( ) ) ;
133
141
for m in self . members {
@@ -139,32 +147,32 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
139
147
} )
140
148
}
141
149
142
- /// Returns (n_hashes, marker_width, n_marker_bits, Vec<size of each member>)
150
+ /// Returns ` (n_hashes, marker_width, n_marker_bits, Vec<size of each member>)`.
143
151
pub fn build_params ( & self ) -> ( u8 , u8 , u8 , Vec < usize > ) {
144
152
let ( _, n_hashes, marker_width, n_marker_bits) = self . members [ 0 ] . info ( ) ;
145
153
let sizes = self . members . iter ( ) . map ( |i| i. info ( ) . 0 ) . collect ( ) ;
146
154
( n_hashes, marker_width, n_marker_bits, sizes)
147
155
}
148
156
149
- /// Returns the params given at build time to the bfields
157
+ /// Returns the params given at build time to the `BField` arrays.
150
158
pub fn params ( & self ) -> & Option < T > {
151
159
& self . members [ 0 ] . params . other
152
160
}
153
161
154
- /// This doesn't actually update the file, so we can use it to e.g.
155
- /// simulate params on an old legacy file that may not actually have
156
- /// them set.
162
+ /// ⚠️ Method for setting parameters without actually updating any files on disk. **Only useful for supporting legacy file formats
163
+ /// in which these parameters are not saved.**
157
164
pub fn mock_params ( & mut self , params : T ) {
158
165
self . members [ 0 ] . params . other = Some ( params) ;
159
166
}
160
167
161
- /// This allows an insert of a value into the b-field after the entire
162
- /// b-field build process has been completed.
163
- ///
164
- /// It has the very bad downside of potentially knocking other keys out
165
- /// of the b-field by making them indeterminate (which will make them fall
166
- /// back to the secondaries where they don't exist and thus it'll appear
167
- /// as if they were never inserted to begin with)
168
+ /// ⚠️ Method for inserting a value into a `BField`
169
+ /// after it has been fully built and finalized.
170
+ /// **This method should be used with extreme care**
171
+ /// as it does not guarantee that keys are properly propagated
172
+ /// to secondary arrays and therefore may make lookups of previously
173
+ /// set values return an indeterminate result in the primary array,
174
+ /// then causing fallback to the secondary arrays where they were never
175
+ /// inserted (and returning a false negative).
168
176
pub fn force_insert ( & self , key : & [ u8 ] , value : BFieldVal ) {
169
177
debug_assert ! ( !self . read_only, "Can't insert into read_only bfields" ) ;
170
178
for secondary in & self . members {
@@ -174,8 +182,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
174
182
}
175
183
}
176
184
177
- /// Insert the given key/value at the given pass
178
- /// Returns whether the value was inserted during this call, eg will return `false` if
185
+ /// Insert the given key/value at the given pass (1-indexed `BField` array/member).
186
+ /// Returns whether the value was inserted during this call, i.e., will return `false` if
179
187
/// the value was already present.
180
188
pub fn insert ( & self , key : & [ u8 ] , value : BFieldVal , pass : usize ) -> bool {
181
189
debug_assert ! ( !self . read_only, "Can't insert into read_only bfields" ) ;
@@ -195,8 +203,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
195
203
true
196
204
}
197
205
198
- /// Returns the value of the given key if found, None otherwise.
199
- /// If the value is indeterminate, we still return None .
206
+ /// Returns the value of the given key if found, ` None` otherwise.
207
+ /// The current implementation also returns `None` for indeterminate values .
200
208
pub fn get ( & self , key : & [ u8 ] ) -> Option < BFieldVal > {
201
209
for secondary in self . members . iter ( ) {
202
210
match secondary. get ( key) {
@@ -210,8 +218,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
210
218
None
211
219
}
212
220
213
- /// Get the info of each member
214
- /// Returns Vec<(size, n_hashes, marker_width, n_marker_bits)>
221
+ /// Get the info of each secondary array (`BFieldMember`) in the `BField`.
222
+ /// Returns ` Vec<(size, n_hashes, marker_width, n_marker_bits)>`.
215
223
pub fn info ( & self ) -> Vec < ( usize , u8 , u8 , u8 ) > {
216
224
self . members . iter ( ) . map ( |m| m. info ( ) ) . collect ( )
217
225
}
0 commit comments