rust-ml · YuhanLiin · Nov 17, 2022 · Nov 11, 2022 · Nov 11, 2022 · Nov 11, 2022
diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
@@ -171,9 +171,12 @@ It is important to the project that we have benchmarks in place to evaluate the
 7. In `BenchmarkId` include the values used to parametrize the benchmark. For example if we're doing Pls then we may have something like `Canonical-Nipals-5feats-1_000samples`
 8. Pass data as an argument to the function being benched. This will prevent Criterion from including data creation time as part of the benchmark.
 9. Add a profiler see [here](https://github.com/tikv/pprof-rs#integrate-with-criterion) for an example on how to do so with pprof, Criterion, and Flamegraph.
+10. Use the benchmark feature to configure your benchmark groups and profiler. See the bench in linfa-pls as an example of this. In most cases you can just copy and paste portions of thecode. If other configurations are desired it is still easily customizable and explained in the pprof and Criterion documentation.
+
+Feel free to use the pls bench as a guideline. Note tha it uses functions get get default configurations for profiling and benchmarking. 
 
 ### Running Benchmarks
-When running benchmarks sometimes you will want to profile the code execution. Assuming you have followed step 9 to add a pprof profiling hook for the linfa-ica package you can run the following to get your profiling results as a flamegraph.
+When running benchmarks sometimes you will want to profile the code execution. Assuming you have followed step 9 to add a pprof profiling hook for the linfa-ica package you can run the following to get your profiling results as a flamegraph. Be advised that at the time of writing this profiling will not work on Windows machines.
 
 `cargo bench -p linfa-ica --bench fast_ica -q -- --profile-time 30`
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -21,7 +21,7 @@ exclude = [".github/"]
 
 [features]
 default = []
-
+benchmarks = ["criterion", "pprof"]
 netlib-static = ["blas", "ndarray-linalg/netlib-static"]
 netlib-system = ["blas", "ndarray-linalg/netlib-system"]
 
@@ -45,6 +45,8 @@ ndarray-linalg = { version = "0.15", optional = true }
 
 thiserror = "1.0"
 
+criterion = { version = "0.4.0", optional = true}
+
 [dependencies.serde_crate]
 package = "serde"
 optional = true
@@ -57,6 +59,9 @@ ndarray-rand = "0.14"
 linfa-datasets = { path = "datasets", features = ["winequality", "iris", "diabetes", "generate"] }
 statrs = "0.16.0"
 
+[target.'cfg(not(windows))'.dependencies]
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"], optional = true}
+
 [workspace]
 members = [
     "algorithms/*",

diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
@@ -49,9 +49,7 @@ criterion = "0.4.0"
 serde_json = "1"
 approx = "0.4"
 lax = "0.15.0"
-
-[target.'cfg(not(windows))'.dev-dependencies]
-pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+linfa = { version = "0.6.0", path = "../..", features = ["benchmarks"] }
 
 [[bench]]
 name = "k_means"

diff --git a/algorithms/linfa-clustering/benches/appx_dbscan.rs b/algorithms/linfa-clustering/benches/appx_dbscan.rs
@@ -2,15 +2,14 @@ use criterion::{
     black_box, criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion,
     PlotConfiguration,
 };
+use linfa::benchmarks::config;
 use linfa::traits::Transformer;
 use linfa_clustering::AppxDbscan;
 use linfa_datasets::generate;
 use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn appx_dbscan_bench(c: &mut Criterion) {
@@ -23,7 +22,9 @@ fn appx_dbscan_bench(c: &mut Criterion) {
     ];
 
     let mut benchmark = c.benchmark_group("appx_dbscan");
+    config::set_default_benchmark_configs(&mut benchmark);
     benchmark.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+
     for cluster_size_and_slack in cluster_sizes_and_slacks {
         let rng = &mut rng;
         benchmark.bench_with_input(
@@ -53,7 +54,7 @@ fn appx_dbscan_bench(c: &mut Criterion) {
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = config::get_default_profiling_configs();
     targets = appx_dbscan_bench
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-clustering/benches/dbscan.rs b/algorithms/linfa-clustering/benches/dbscan.rs
@@ -2,23 +2,24 @@ use criterion::{
     black_box, criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion,
     PlotConfiguration,
 };
+use linfa::benchmarks::config;
 use linfa::prelude::{ParamGuard, Transformer};
 use linfa_clustering::Dbscan;
 use linfa_datasets::generate;
 use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn dbscan_bench(c: &mut Criterion) {
     let mut rng = Xoshiro256Plus::seed_from_u64(40);
     let cluster_sizes = vec![10, 100, 1000, 10000];
 
     let mut benchmark = c.benchmark_group("dbscan");
+    config::set_default_benchmark_configs(&mut benchmark);
     benchmark.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+
     for cluster_size in cluster_sizes {
         let rng = &mut rng;
         benchmark.bench_with_input(
@@ -49,7 +50,7 @@ fn dbscan_bench(c: &mut Criterion) {
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = config::get_default_profiling_configs();
     targets = dbscan_bench
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-clustering/benches/gaussian_mixture.rs b/algorithms/linfa-clustering/benches/gaussian_mixture.rs
@@ -2,6 +2,7 @@ use criterion::{
     black_box, criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion,
     PlotConfiguration,
 };
+use linfa::benchmarks::config;
 use linfa::traits::Fit;
 use linfa::DatasetBase;
 use linfa_clustering::GaussianMixtureModel;
@@ -10,16 +11,16 @@ use ndarray::Array2;
 use ndarray_rand::rand::SeedableRng;
 use ndarray_rand::rand_distr::Uniform;
 use ndarray_rand::RandomExt;
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn gaussian_mixture_bench(c: &mut Criterion) {
     let mut rng = Xoshiro256Plus::seed_from_u64(40);
     let cluster_sizes = vec![10, 100, 1000, 10000];
 
     let mut benchmark = c.benchmark_group("gaussian_mixture");
+    config::set_default_benchmark_configs(&mut benchmark);
     benchmark.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+
     for cluster_size in cluster_sizes {
         let rng = &mut rng;
         benchmark.bench_with_input(
@@ -51,7 +52,7 @@ fn gaussian_mixture_bench(c: &mut Criterion) {
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
   name = benches;
-  config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+  config = config::get_default_profiling_configs();
   targets = gaussian_mixture_bench
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-clustering/benches/k_means.rs b/algorithms/linfa-clustering/benches/k_means.rs
@@ -2,15 +2,14 @@ use criterion::{
     black_box, criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion,
     PlotConfiguration,
 };
+use linfa::benchmarks::config;
 use linfa::prelude::*;
 use linfa::DatasetBase;
 use linfa_clustering::{IncrKMeansError, KMeans, KMeansInit};
 use linfa_datasets::generate;
 use ndarray::Array2;
 use ndarray_rand::RandomExt;
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform};
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 #[derive(Default)]
@@ -40,7 +39,9 @@ fn k_means_bench(c: &mut Criterion) {
     let n_features = 3;
 
     let mut benchmark = c.benchmark_group("naive_k_means");
+    config::set_default_benchmark_configs(&mut benchmark);
     benchmark.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+
     for &(cluster_size, n_clusters) in &cluster_sizes {
         let rng = &mut rng;
         let centroids =
@@ -73,7 +74,9 @@ fn k_means_incr_bench(c: &mut Criterion) {
     let n_features = 3;
 
     let mut benchmark = c.benchmark_group("incremental_k_means");
+    config::set_default_benchmark_configs(&mut benchmark);
     benchmark.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+
     for &(cluster_size, n_clusters) in &cluster_sizes {
         let rng = &mut rng;
         let centroids =
@@ -124,6 +127,7 @@ fn k_means_init_bench(c: &mut Criterion) {
     let n_features = 3;
 
     let mut benchmark = c.benchmark_group("k_means_init");
+    config::set_default_benchmark_configs(&mut benchmark);
     benchmark.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
     for init in &init_methods {
         for &(cluster_size, n_clusters) in &cluster_sizes {
@@ -160,7 +164,7 @@ fn k_means_init_bench(c: &mut Criterion) {
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = config::get_default_profiling_configs();
     targets = k_means_bench, k_means_init_bench, k_means_incr_bench
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-ftrl/Cargo.toml b/algorithms/linfa-ftrl/Cargo.toml
@@ -28,9 +28,7 @@ linfa = { version = "0.6.0", path = "../.."}
 criterion = "0.4.0"
 approx = "0.4"
 linfa-datasets = { version = "0.6.0", path = "../../datasets", features = ["winequality"] }
-
-[target.'cfg(not(windows))'.dev-dependencies]
-pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+linfa = { version = "0.6.0", path = "../..", features = ["benchmarks"] }
 
 [[bench]]
 name = "ftrl"

diff --git a/algorithms/linfa-ftrl/benches/ftrl.rs b/algorithms/linfa-ftrl/benches/ftrl.rs
@@ -1,4 +1,5 @@
 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use linfa::benchmarks::config;
 use linfa::prelude::Predict;
 use linfa::traits::FitWith;
 use linfa::{Dataset, DatasetBase, ParamGuard};
@@ -7,13 +8,14 @@ use ndarray::{Array1, Array2};
 use ndarray_rand::{
     rand::distributions::Uniform, rand::rngs::SmallRng, rand::SeedableRng, RandomExt,
 };
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 
 fn fit_without_prior_model(c: &mut Criterion) {
     let mut rng = SmallRng::seed_from_u64(42);
     let params = Ftrl::params();
+
     let mut group = c.benchmark_group("Ftrl with no initial model");
+    config::set_default_benchmark_configs(&mut group);
+
     let sizes: Vec<(usize, usize)> = vec![(10, 1_000), (50, 5_000), (100, 10_000)];
 
     for (nfeatures, nrows) in sizes.iter() {
@@ -34,7 +36,10 @@ fn fit_with_prior_model(c: &mut Criterion) {
     let mut rng = SmallRng::seed_from_u64(42);
     let params = Ftrl::params();
     let valid_params = params.clone().check().unwrap();
+
     let mut group = c.benchmark_group("Ftrl incremental model training");
+    config::set_default_benchmark_configs(&mut group);
+
     let sizes: Vec<(usize, usize)> = vec![(10, 1_000), (50, 5_000), (100, 10_000)];
 
     for (nfeatures, nrows) in sizes.iter() {
@@ -57,8 +62,11 @@ fn fit_with_prior_model(c: &mut Criterion) {
 fn predict(c: &mut Criterion) {
     let mut rng = SmallRng::seed_from_u64(42);
     let params = Ftrl::params();
+
     let valid_params = params.clone().check().unwrap();
     let mut group = c.benchmark_group("Ftrl");
+    config::set_default_benchmark_configs(&mut group);
+
     let sizes: Vec<(usize, usize)> = vec![(10, 1_000), (50, 5_000), (100, 10_000)];
     for (nfeatures, nrows) in sizes.iter() {
         let model = Ftrl::new(valid_params.clone(), *nfeatures);
@@ -91,7 +99,7 @@ fn get_dataset(
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = config::get_default_profiling_configs();
     targets = fit_without_prior_model, fit_with_prior_model, predict
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
@@ -40,9 +40,7 @@ linfa = { version = "0.6.0", path = "../.." }
 ndarray-npy = { version = "0.8", default-features = false }
 paste = "1.0"
 criterion = "0.4.0"
-
-[target.'cfg(not(windows))'.dev-dependencies]
-pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+linfa = { version = "0.6.0", path = "../..", features = ["benchmarks"] }
 
 [[bench]]
 name = "fast_ica"

diff --git a/algorithms/linfa-ica/benches/fast_ica.rs b/algorithms/linfa-ica/benches/fast_ica.rs
@@ -1,11 +1,10 @@
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use linfa::benchmarks::config;
 use linfa::{dataset::DatasetBase, traits::Fit};
 use linfa_ica::fast_ica::{FastIca, GFunc};
 use ndarray::{array, concatenate};
 use ndarray::{Array, Array2, Axis};
 use ndarray_rand::{rand::SeedableRng, rand_distr::Uniform, RandomExt};
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 use rand_xoshiro::Xoshiro256Plus;
 
 fn perform_ica(size: usize, gfunc: GFunc) {
@@ -55,6 +54,8 @@ fn bench(c: &mut Criterion) {
         (GFunc::Exp, "Exp"),
     ] {
         let mut group = c.benchmark_group("Fast ICA");
+        config::set_default_benchmark_configs(&mut group);
+
         let sizes: [usize; 3] = [1_000, 10_000, 100_000];
         for size in sizes {
             let input = (size, gfunc);
@@ -69,7 +70,7 @@ fn bench(c: &mut Criterion) {
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = config::get_default_profiling_configs();
     targets = bench
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-linear/Cargo.toml b/algorithms/linfa-linear/Cargo.toml
@@ -35,9 +35,7 @@ linfa-datasets = { version = "0.6.0", path = "../../datasets", features = ["diab
 approx = "0.4"
 criterion = "0.4.0" 
 statrs = "0.16.0"
-
-[target.'cfg(not(windows))'.dev-dependencies]
-pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+linfa = { version = "0.6.0", path = "../..", features = ["benchmarks"] }
 
 [[bench]]
 name = "ols_bench"

diff --git a/algorithms/linfa-linear/benches/ols_bench.rs b/algorithms/linfa-linear/benches/ols_bench.rs
@@ -1,11 +1,10 @@
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use linfa::benchmarks::config;
 use linfa::traits::Fit;
 use linfa::Dataset;
 use linfa_datasets::generate::make_dataset;
 use linfa_linear::{LinearRegression, TweedieRegressor};
 use ndarray::Ix1;
-#[cfg(not(target_os = "windows"))]
-use pprof::criterion::{Output, PProfProfiler};
 use statrs::distribution::{DiscreteUniform, Laplace};
 
 #[allow(unused_must_use)]
@@ -22,6 +21,8 @@ fn perform_glm(dataset: &Dataset<f64, f64, Ix1>) {
 
 fn bench(c: &mut Criterion) {
     let mut group = c.benchmark_group("Linfa_linear");
+    config::set_default_benchmark_configs(&mut group);
+
     let params: [(usize, usize); 4] = [(1_000, 5), (10_000, 5), (100_000, 5), (100_000, 10)];
 
     let feat_distr = Laplace::new(0.5, 5.).unwrap();
@@ -62,7 +63,7 @@ fn bench(c: &mut Criterion) {
 #[cfg(not(target_os = "windows"))]
 criterion_group! {
     name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    config = config::get_default_profiling_configs();
     targets = bench
 }
 #[cfg(target_os = "windows")]

diff --git a/algorithms/linfa-nn/Cargo.toml b/algorithms/linfa-nn/Cargo.toml
@@ -40,9 +40,7 @@ approx = "0.4"
 criterion = "0.4.0"
 rand_xoshiro = "0.6"
 ndarray-rand = "0.14"
-
-[target.'cfg(not(windows))'.dev-dependencies]
-pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
+linfa = { version = "0.6.0", path = "../..", features = ["benchmarks"] }
 
 [[bench]]
 name = "nn"