From 25436842d02e00e5f64116b4977b2064931662bf Mon Sep 17 00:00:00 2001 From: Michele Riva Date: Tue, 26 Nov 2024 14:12:07 +0100 Subject: [PATCH 1/2] feat: use log crate instead of println --- Cargo.lock | 1 + Cargo.toml | 1 + src/pq.rs | 49 +++++++++++++++++++++++-------------------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04985c9..5ee74e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -678,6 +678,7 @@ version = "0.0.1" dependencies = [ "anyhow", "criterion", + "log", "ndarray", "ndarray-rand", "ndarray-stats", diff --git a/Cargo.toml b/Cargo.toml index 532ed04..ca8504a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ rand = "0.9.0-alpha.2" ndarray-rand = "0.15.0" rand_distr = "0.4.3" rayon = "1.10.0" +log = "0.4.22" [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } diff --git a/src/pq.rs b/src/pq.rs index 485b5ab..4181f46 100644 --- a/src/pq.rs +++ b/src/pq.rs @@ -3,6 +3,7 @@ use anyhow::Result; use ndarray::parallel::prelude::*; use ndarray::{s, Array2, Array3, Axis}; use rayon::prelude::*; +use log::{debug, error, info, trace, warn}; #[derive(Debug, Clone, Copy)] pub enum CodeType { @@ -14,7 +15,6 @@ pub enum CodeType { pub struct PQ { m: usize, ks: u32, - verbose: bool, code_dtype: CodeType, codewords: Option>, ds: Option>, @@ -22,7 +22,7 @@ pub struct PQ { } impl PQ { - pub fn try_new(m: usize, ks: u32, verbose: Option) -> Result { + pub fn try_new(m: usize, ks: u32) -> Result { if ks == 0 { anyhow::bail!( "cluster subspaces (ks) must be a u32 between 1 and 2**32 - 1. Got {}", @@ -37,7 +37,6 @@ impl PQ { Ok(Self { m, ks, - verbose: verbose.unwrap_or(false), code_dtype: determine_code_type(ks), codewords: None, ds: None, @@ -94,15 +93,13 @@ impl PQ { let trained_codewords: Vec<(usize, Array2)> = (0..self.m) .into_par_iter() .map(|m| { - if self.verbose { - println!( - "# Training the subspace: {} / {}, {} -> {}", - m, - self.m, - self.ds.as_ref().unwrap()[m], - self.ds.as_ref().unwrap()[m + 1] - ); - } + info!( + "Training the subspace: {} / {}, {} -> {}", + m, + self.m, + self.ds.as_ref().unwrap()[m], + self.ds.as_ref().unwrap()[m + 1] + ); let ds_ref = self.ds.as_ref().unwrap(); @@ -256,13 +253,13 @@ mod tests { // Edge case: ks is zero or exceeds u32 limits. #[test] fn test_try_new_invalid_ks_zero() { - let pq = PQ::try_new(4, 0, None); + let pq = PQ::try_new(4, 0); assert!(pq.is_err(), "Initialization should fail when ks is zero"); } #[test] fn test_try_new_invalid_ks_max() { - let pq = PQ::try_new(4, u32::MAX, None); + let pq = PQ::try_new(4, u32::MAX); assert!( pq.is_ok(), "Initialization should succeed when ks is u32::MAX" @@ -272,7 +269,7 @@ mod tests { // Edge Case: m is zero. #[test] fn test_try_new_invalid_m_zero() { - let pq = PQ::try_new(0, 256, None); + let pq = PQ::try_new(0, 256); assert!( pq.is_err(), "Initialization should fail when m is zero, but it succeeded" @@ -282,7 +279,7 @@ mod tests { // Edge Case: Number of training vectors is less than ks. #[test] fn test_fit_vectors_less_than_ks() { - let mut pq = PQ::try_new(4, 256, None).unwrap(); + let mut pq = PQ::try_new(4, 256).unwrap(); let vecs = create_dummy_vectors(100, 128); // Less than ks let result = pq.fit(&vecs, 10); assert!( @@ -294,7 +291,7 @@ mod tests { // Edge Case: Vectors have zero dimensions or m exceeds vector dimensions. #[test] fn test_fit_zero_dimensions() { - let mut pq = PQ::try_new(4, 256, None).unwrap(); + let mut pq = PQ::try_new(4, 256).unwrap(); let vecs = create_dummy_vectors(1000, 0); // Zero dimensions let result = pq.fit(&vecs, 10); assert!( @@ -305,7 +302,7 @@ mod tests { #[test] fn test_fit_m_greater_than_dimensions() { - let mut pq = PQ::try_new(200, 256, None).unwrap(); + let mut pq = PQ::try_new(200, 256).unwrap(); let vecs = create_dummy_vectors(1000, 128); // m > dimensions let result = pq.fit(&vecs, 10); assert!( @@ -317,7 +314,7 @@ mod tests { // Edge Case: Calling encode before fit. #[test] fn test_encode_without_fit() { - let pq = PQ::try_new(4, 256, None).unwrap(); + let pq = PQ::try_new(4, 256).unwrap(); let vecs = create_dummy_vectors(1000, 128); let result = pq.encode(&vecs); assert!( @@ -329,7 +326,7 @@ mod tests { // Edge Case: Vectors have different dimensions than those used in fit. #[test] fn test_encode_mismatched_dimensions() { - let mut pq = PQ::try_new(4, 256, None).unwrap(); + let mut pq = PQ::try_new(4, 256).unwrap(); let train_vecs = create_dummy_vectors(1000, 128); pq.fit(&train_vecs, 10).unwrap(); @@ -344,7 +341,7 @@ mod tests { // Edge Case: Codes have incorrect dimensions or contain invalid values. #[test] fn test_decode_invalid_code_m() { - let mut pq = PQ::try_new(4, 256, None).unwrap(); + let mut pq = PQ::try_new(4, 256).unwrap(); let train_vecs = create_dummy_vectors(1000, 128); pq.fit(&train_vecs, 10).unwrap(); @@ -358,7 +355,7 @@ mod tests { #[test] fn test_decode_code_value_exceeds_ks() { - let mut pq = PQ::try_new(4, 256, None).unwrap(); + let mut pq = PQ::try_new(4, 256).unwrap(); let train_vecs = create_dummy_vectors(1000, 128); pq.fit(&train_vecs, 10).unwrap(); @@ -374,7 +371,7 @@ mod tests { // Edge Case: Ensuring compress works end-to-end. #[test] fn test_compress() { - let mut pq = PQ::try_new(4, 256, None).unwrap(); + let mut pq = PQ::try_new(4, 256).unwrap(); let vecs = create_dummy_vectors(1000, 128); pq.fit(&vecs, 10).unwrap(); @@ -389,7 +386,7 @@ mod tests { // Edge Case: Ensuring code values fit within specified data types. #[test] fn test_encode_code_dtype_u8_overflow() { - let mut pq = PQ::try_new(4, 300, None).unwrap(); // ks exceeds u8::MAX + let mut pq = PQ::try_new(4, 300).unwrap(); // ks exceeds u8::MAX pq.code_dtype = CodeType::U8; let vecs = create_random_vectors(1000, 128); pq.fit(&vecs, 10).unwrap(); @@ -403,7 +400,7 @@ mod tests { #[test] fn test_encode_code_dtype_u16_overflow() { - let mut pq = PQ::try_new(4, 70000, None).unwrap(); + let mut pq = PQ::try_new(4, 70000).unwrap(); pq.code_dtype = CodeType::U16; pq.codewords = Some(Array3::zeros((pq.m, pq.ks as usize, 128 / pq.m))); pq.dim = Some(128); @@ -418,7 +415,7 @@ mod tests { #[test] fn test_encode_code_dtype_u8_valid() { - let mut pq = PQ::try_new(4, 200, None).unwrap(); // ks within u8::MAX + let mut pq = PQ::try_new(4, 200).unwrap(); // ks within u8::MAX pq.code_dtype = CodeType::U8; let vecs = create_random_vectors(1000, 128); pq.fit(&vecs, 10).unwrap(); From 7a166d34d8e206f733920842565a9a24fe450cde Mon Sep 17 00:00:00 2001 From: Michele Riva Date: Tue, 26 Nov 2024 14:13:01 +0100 Subject: [PATCH 2/2] removes 'verbose' from examples --- README.md | 2 +- src/bin/example.rs | 2 +- src/bin/readme_example.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b8a70ec..0218583 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ fn main() -> Result<()> { // Configure PQ parameters let m = 8; // Number of subspaces (controls compression ratio) let ks = 256; // Number of centroids per subspace (usually 256 for uint8) - let mut pq = PQ::try_new(m, ks, Some(true))?; + let mut pq = PQ::try_new(m, ks)?; // Train the quantizer on the data println!("Training PQ model..."); diff --git a/src/bin/example.rs b/src/bin/example.rs index c1ac462..ba543f1 100644 --- a/src/bin/example.rs +++ b/src/bin/example.rs @@ -31,7 +31,7 @@ fn main() -> Result<()> { let ks = 256; // Number of clusters per subspace let verbose = Some(true); - let mut pq = PQ::try_new(m, ks, verbose)?; + let mut pq = PQ::try_new(m, ks)?; // Step 3: Train the PQ Model let iterations = 20; // Number of iterations for k-means diff --git a/src/bin/readme_example.rs b/src/bin/readme_example.rs index f0f2409..90f8b4a 100644 --- a/src/bin/readme_example.rs +++ b/src/bin/readme_example.rs @@ -13,7 +13,7 @@ fn main() -> Result<()> { // Configure PQ parameters let m = 8; // Number of subspaces (controls compression ratio) let ks = 256; // Number of centroids per subspace (usually 256 for uint8) - let mut pq = PQ::try_new(m, ks, Some(true))?; + let mut pq = PQ::try_new(m, ks)?; // Train the quantizer on the data println!("Training PQ model...");