rust-ml · YuhanLiin · Oct 11, 2022 · Sep 28, 2022 · Oct 5, 2022 · Oct 5, 2022
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -2,7 +2,7 @@
 on:
   push:
     branches:
-    - master 
+    - master
   pull_request:
 
 name: Build website with Zola, build rust docs and publish to GH pages
@@ -14,13 +14,19 @@ jobs:
     steps:
       - name: 'Checkout'
         uses: actions/checkout@master
+
       - name: 'Build only' 
         uses: shalzz/zola-deploy-action@master
         env:
           BUILD_DIR: docs/website/
           TOKEN: ${{ secrets.TOKEN }}
           BUILD_ONLY: true
 
+      - name: Build Documentation
+        run: cargo doc --workspace --no-deps
+        env:
+          RUSTDOCFLAGS: -D warnings
+
   build_and_deploy:
     runs-on: ubuntu-latest
     if: github.ref == 'refs/heads/master' || github.repository != 'rust-ml/linfa'
@@ -34,7 +40,9 @@ jobs:
           components: rustfmt, rust-src
 
       - name: Build Documentation
-        run: cargo doc --all --no-deps
+        run: cargo doc --workspace --no-deps
+        env:
+          RUSTDOCFLAGS: -D warnings
 
       - name: Copy Rust Documentation to Zola
         run: cp -R "target/doc/" "docs/website/static/rustdocs/"

diff --git a/algorithms/linfa-clustering/src/appx_dbscan/hyperparams.rs b/algorithms/linfa-clustering/src/appx_dbscan/hyperparams.rs
@@ -13,7 +13,7 @@ use thiserror::Error;
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [Approximated DBSCAN algorithm](struct.AppxDbscan.html).
+/// the [Approximated DBSCAN algorithm](crate::AppxDbscan).
 pub struct AppxDbscanValidParams<F: Float, N> {
     pub(crate) tolerance: F,
     pub(crate) min_points: usize,
@@ -23,7 +23,7 @@ pub struct AppxDbscanValidParams<F: Float, N> {
 
 #[derive(Debug, Clone, PartialEq)]
 /// Helper struct for building a set of [Approximated DBSCAN
-/// hyperparameters](struct.AppxDbscanParams.html)
+/// hyperparameters](AppxDbscanParams)
 pub struct AppxDbscanParams<F: Float, N>(AppxDbscanValidParams<F, N>);
 
 #[derive(Debug, Error)]

diff --git a/algorithms/linfa-clustering/src/dbscan/hyperparams.rs b/algorithms/linfa-clustering/src/dbscan/hyperparams.rs
@@ -11,7 +11,7 @@ use thiserror::Error;
 )]
 #[derive(Debug, Clone, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [DBSCAN algorithm](struct.Dbscan.html).
+/// the [DBSCAN algorithm](crate::Dbscan).
 pub struct DbscanValidParams<F: Float, D: Distance<F>, N: NearestNeighbour> {
     pub(crate) tolerance: F,
     pub(crate) min_points: usize,
@@ -20,7 +20,7 @@ pub struct DbscanValidParams<F: Float, D: Distance<F>, N: NearestNeighbour> {
 }
 
 #[derive(Debug, Clone, PartialEq)]
-/// Helper struct for building a set of [DBSCAN hyperparameters](struct.DbscanParams.html)
+/// Helper struct for building a set of [DBSCAN hyperparameters](DbscanParams)
 pub struct DbscanParams<F: Float, D: Distance<F>, N: NearestNeighbour>(DbscanValidParams<F, D, N>);
 
 #[derive(Error, Debug)]

diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
@@ -41,7 +41,7 @@ use serde_crate::{Deserialize, Serialize};
 /// that is maximising the probability that the dataset is drawn from our mixture of normal distributions.
 ///
 /// After an initialization step which can be either from random distribution or from the result
-/// of the [KMeans](struct.KMeans.html) algorithm (which is the default value of the `init_method` parameter).
+/// of the [KMeans](KMeans) algorithm (which is the default value of the `init_method` parameter).
 /// The core EM iterative algorithm for Gaussian Mixture is a fixed-point two-step algorithm:
 ///
 /// 1. Expectation step: compute the expectation of the likelihood of the current gaussian mixture model wrt the dataset.

diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/hyperparams.rs b/algorithms/linfa-clustering/src/gaussian_mixture/hyperparams.rs
@@ -26,7 +26,7 @@ pub enum GmmCovarType {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 /// A specifier for the method used for the initialization of the fitting algorithm of GMM
 pub enum GmmInitMethod {
-    /// GMM fitting algorithm is initalized with the esult of the [KMeans](struct.KMeans.html) clustering.
+    /// GMM fitting algorithm is initalized with the result of the [KMeans](crate::KMeans) clustering.
     KMeans,
     /// GMM fitting algorithm is initialized randomly.
     Random,
@@ -39,7 +39,7 @@ pub enum GmmInitMethod {
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [GMM algorithm](struct.GaussianMixtureModel.html).
+/// the [GMM algorithm](crate::GaussianMixtureModel).
 pub struct GmmValidParams<F: Float, R: Rng> {
     n_clusters: usize,
     covar_type: GmmCovarType,
@@ -92,7 +92,7 @@ impl<F: Float, R: Rng + Clone> GmmValidParams<F, R> {
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [GMM algorithm](struct.GaussianMixtureModel.html).
+/// the [GMM algorithm](crate::GaussianMixtureModel).
 pub struct GmmParams<F: Float, R: Rng>(GmmValidParams<F, R>);
 
 impl<F: Float> GmmParams<F, Xoshiro256Plus> {

diff --git a/algorithms/linfa-clustering/src/k_means/hyperparams.rs b/algorithms/linfa-clustering/src/k_means/hyperparams.rs
@@ -15,7 +15,7 @@ use serde_crate::{Deserialize, Serialize};
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [K-means algorithm](struct.KMeans.html).
+/// the [K-means algorithm](crate::KMeans).
 pub struct KMeansValidParams<F: Float, R: Rng, D: Distance<F>> {
     /// Number of time the k-means algorithm will be run with different centroid seeds.
     n_runs: usize,
@@ -38,8 +38,8 @@ pub struct KMeansValidParams<F: Float, R: Rng, D: Distance<F>> {
 }
 
 #[derive(Clone, Debug, PartialEq)]
-/// An helper struct used to construct a set of [valid hyperparameters](struct.KMeansParams.html) for
-/// the [K-means algorithm](struct.KMeans.html) (using the builder pattern).
+/// An helper struct used to construct a set of [valid hyperparameters](KMeansParams) for
+/// the [K-means algorithm](crate::KMeans) (using the builder pattern).
 pub struct KMeansParams<F: Float, R: Rng, D: Distance<F>>(KMeansValidParams<F, R, D>);
 
 impl<F: Float, R: Rng, D: Distance<F>> KMeansParams<F, R, D> {

diff --git a/algorithms/linfa-clustering/src/lib.rs b/algorithms/linfa-clustering/src/lib.rs
@@ -13,11 +13,11 @@
 //! ## Current state
 //!
 //! Right now `linfa-clustering` provides the following clustering algorithms:
-//! * [K-Means](struct.KMeans.html)
-//! * [DBSCAN](struct.Dbscan.html)
-//! * [Approximated DBSCAN](struct.AppxDbscan.html)
-//! * [Gaussian-Mixture-Model](struct.GaussianMixtureModel.html)
-//! * [OPTICS](struct.OpticsAnalysis.html)
+//! * [K-Means](KMeans)
+//! * [DBSCAN](Dbscan)
+//! * [Approximated DBSCAN](AppxDbscan)
+//! * [Gaussian-Mixture-Model](GaussianMixtureModel)
+//! * [OPTICS](OpticsAnalysis)
 //!
 //! Implementation choices, algorithmic details and tutorials can be found in the page dedicated to the specific algorithms.
 mod appx_dbscan;

diff --git a/algorithms/linfa-clustering/src/optics/hyperparams.rs b/algorithms/linfa-clustering/src/optics/hyperparams.rs
@@ -10,7 +10,7 @@ use serde_crate::{Deserialize, Serialize};
     serde(crate = "serde_crate")
 )]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [OPTICS algorithm](struct.Optics.html).
+/// the [OPTICS algorithm](crate::Optics).
 pub struct OpticsValidParams<F, D, N> {
     /// Distance between points for them to be considered neighbours.
     tolerance: F,

diff --git a/algorithms/linfa-linear/src/glm/hyperparams.rs b/algorithms/linfa-linear/src/glm/hyperparams.rs
@@ -2,38 +2,7 @@ use crate::{glm::link::Link, LinearError, TweedieRegressor};
 use linfa::{Float, ParamGuard};
 use serde::{Deserialize, Serialize};
 
-/// Generalized Linear Model (GLM) with a Tweedie distribution
-///
-/// The Regressor can be used to model different GLMs depending on
-/// [`power`](struct.TweedieRegressor.html#method.power),
-/// which determines the underlying distribution.
-///
-/// | Power  | Distribution           |
-/// | ------ | ---------------------- |
-/// | 0      | Normal                 |
-/// | 1      | Poisson                |
-/// | (1, 2) | Compound Poisson Gamma |
-/// | 2      | Gamma                  |
-/// | 3      | Inverse Gaussian       |
-///
-/// NOTE: No distribution exists between 0 and 1
-///
-/// Learn more from sklearn's excellent [User Guide](https://scikit-learn.org/stable/modules/linear_model.html#generalized-linear-regression)
-///
-/// ## Examples
-///
-/// Here's an example on how to train a GLM on the `diabetes` dataset
-/// ```rust
-/// use linfa::traits::{Fit, Predict};
-/// use linfa_linear::TweedieRegressor;
-/// use linfa::prelude::SingleTargetRegression;
-///
-/// let dataset = linfa_datasets::diabetes();
-/// let model = TweedieRegressor::params().fit(&dataset).unwrap();
-/// let pred = model.predict(&dataset);
-/// let r2 = pred.r2(&dataset).unwrap();
-/// println!("r2 from prediction: {}", r2);
-/// ```
+/// The set of hyperparameters that can be specified for the execution of the Tweedie Regressor.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct TweedieRegressorValidParams<F> {
     alpha: F,
@@ -74,6 +43,7 @@ impl<F: Float> TweedieRegressorValidParams<F> {
     }
 }
 
+/// The set of hyperparameters that can be specified for the execution of the Tweedie Regressor.
 #[derive(Debug, Clone, PartialEq)]
 pub struct TweedieRegressorParams<F>(TweedieRegressorValidParams<F>);
 

diff --git a/algorithms/linfa-linear/src/glm/mod.rs b/algorithms/linfa-linear/src/glm/mod.rs
@@ -7,7 +7,8 @@ mod link;
 use crate::error::{LinearError, Result};
 use crate::float::{ArgminParam, Float};
 use distribution::TweedieDistribution;
-use hyperparams::TweedieRegressorValidParams;
+pub use hyperparams::TweedieRegressorParams;
+pub use hyperparams::TweedieRegressorValidParams;
 use linfa::dataset::AsSingleTargets;
 pub use link::Link;
 
@@ -171,7 +172,38 @@ impl<'a, A: Float> ArgminOp for TweedieProblem<'a, A> {
     }
 }
 
-/// Fitted Tweedie regressor model for scoring
+/// Generalized Linear Model (GLM) with a Tweedie distribution
+///
+/// The Regressor can be used to model different GLMs depending on
+/// [`power`](TweedieRegressorParams),
+/// which determines the underlying distribution.
+///
+/// | Power  | Distribution           |
+/// | ------ | ---------------------- |
+/// | 0      | Normal                 |
+/// | 1      | Poisson                |
+/// | (1, 2) | Compound Poisson Gamma |
+/// | 2      | Gamma                  |
+/// | 3      | Inverse Gaussian       |
+///
+/// NOTE: No distribution exists between 0 and 1
+///
+/// Learn more from sklearn's excellent [User Guide](https://scikit-learn.org/stable/modules/linear_model.html#generalized-linear-regression)
+///
+/// ## Examples
+///
+/// Here's an example on how to train a GLM on the `diabetes` dataset
+/// ```rust
+/// use linfa::traits::{Fit, Predict};
+/// use linfa_linear::TweedieRegressor;
+/// use linfa::prelude::SingleTargetRegression;
+///
+/// let dataset = linfa_datasets::diabetes();
+/// let model = TweedieRegressor::params().fit(&dataset).unwrap();
+/// let pred = model.predict(&dataset);
+/// let r2 = pred.r2(&dataset).unwrap();
+/// println!("r2 from prediction: {}", r2);
+/// ```
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct TweedieRegressor<A> {
     /// Estimated coefficients for the linear predictor

diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs
@@ -5,7 +5,7 @@
 //! `linfa-logistic` is a crate in the [`linfa`](https://crates.io/crates/linfa) ecosystem, an effort to create a toolkit for classical Machine Learning implemented in pure Rust, akin to Python's `scikit-learn`.
 //!
 //! ## Current state
-//! `linfa-logistic` provides a pure Rust implementation of a [binomial logistic regression model](struct.LogisticRegression.html) and a [multinomial logistic regression model](struct.MultiLogisticRegression).
+//! `linfa-logistic` provides a pure Rust implementation of a [binomial logistic regression model](LogisticRegression) and a [multinomial logistic regression model](MultiLogisticRegression).
 //!
 //! ## Examples
 //!
@@ -49,7 +49,7 @@ use hyperparams::{LogisticRegressionParams, LogisticRegressionValidParams};
 ///
 /// Logistic regression is used in binary classification
 /// by interpreting the predicted value as the probability that the sample
-/// has label `1`. A threshold can be set in the [fitted model](struct.FittedLogisticRegression.html) to decide the minimum
+/// has label `1`. A threshold can be set in the [fitted model](FittedLogisticRegression) to decide the minimum
 /// probability needed to classify a sample as `1`, which defaults to `0.5`.
 ///
 /// In this implementation any binary set of labels can be used, not necessarily `0` and `1`.

diff --git a/algorithms/linfa-nn/src/balltree.rs b/algorithms/linfa-nn/src/balltree.rs
@@ -158,7 +158,7 @@ impl<'a, F: Float> BallTreeInner<'a, F> {
     }
 }
 
-/// Spatial indexing structure created by [`BallTree`](struct.BallTree.html)
+/// Spatial indexing structure created by [`BallTree`](BallTree)
 #[derive(Debug, Clone, PartialEq)]
 pub struct BallTreeIndex<'a, F: Float, D: Distance<F>> {
     tree: BallTreeInner<'a, F>,
@@ -282,7 +282,7 @@ impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for BallTreeIndex<'a
 /// Implementation of ball tree, a space partitioning data structure that partitions its points
 /// into nested hyperspheres called "balls". It performs spatial queries in `O(k * logN)` time,
 /// where `k` is the number of points returned by the query. Calling `from_batch` returns a
-/// [`BallTreeIndex`](struct.BallTreeIndex.html).
+/// [`BallTreeIndex`](BallTreeIndex).
 ///
 /// More details can be found [here](https://en.wikipedia.org/wiki/Ball_tree). This implementation
 /// is based off of the [ball_tree](https://docs.rs/ball-tree/0.2.0/ball_tree/) crate.

diff --git a/algorithms/linfa-nn/src/kdtree.rs b/algorithms/linfa-nn/src/kdtree.rs
@@ -8,7 +8,7 @@ use crate::{
     NnError, Point,
 };
 
-/// Spatial indexing structure created by [`KdTree`](struct.KdTree.html)
+/// Spatial indexing structure created by [`KdTree`](KdTree)
 #[derive(Debug)]
 pub struct KdTreeIndex<'a, F: Float, D: Distance<F>>(
     kdtree::KdTree<F, (Point<'a, F>, usize), &'a [F]>,
@@ -90,7 +90,7 @@ impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for KdTreeIndex<'a,
 /// Implementation of K-D tree, a fast space-partitioning data structure.  For each parent node,
 /// the indexed points are split with a hyperplane into two child nodes. Due to its tree-like
 /// structure, the K-D tree performs spatial queries in `O(k * logN)` time, where `k` is the number
-/// of points returned by the query. Calling `from_batch` returns a [`KdTree`](struct.KdTree.html).
+/// of points returned by the query. Calling `from_batch` returns a [`KdTree`](KdTree).
 ///
 /// More details can be found [here](https://en.wikipedia.org/wiki/K-d_tree).
 ///

diff --git a/algorithms/linfa-nn/src/lib.rs b/algorithms/linfa-nn/src/lib.rs
@@ -14,9 +14,9 @@
 //! ## Current state
 //!
 //! Right now `linfa-nn` provides the following algorithms:
-//! * [Linear Scan](struct.LinearSearch.html)
-//! * [KD Tree](struct.KdTree.html)
-//! * [Ball Tree](struct.BallTree.html)
+//! * [Linear Scan](LinearSearch)
+//! * [KD Tree](KdTree)
+//! * [Ball Tree](BallTree)
 //!
 //! The [`CommonNearestNeighbour`](struct.CommonNearestNeighbour) enum should be used to dispatch
 //! between all of the above algorithms flexibly.
@@ -58,7 +58,7 @@ pub enum NnError {
 
 /// Nearest neighbour algorithm builds a spatial index structure out of a batch of points. The
 /// distance between points is calculated using a provided distance function. The index implements
-/// the [`NearestNeighbourIndex`](trait.NearestNeighbourIndex.html) trait and allows for efficient
+/// the [`NearestNeighbourIndex`](NearestNeighbourIndex) trait and allows for efficient
 /// computing of nearest neighbour and range queries.
 pub trait NearestNeighbour: std::fmt::Debug + Send + Sync + Unpin {
     /// Builds a spatial index using a MxN two-dimensional array representing M points with N
@@ -115,7 +115,7 @@ pub trait NearestNeighbourIndex<F: Float>: Send + Sync + Unpin {
     ) -> Result<Vec<(Point<F>, usize)>, NnError>;
 }
 
-/// Enum that dispatches to one of the crate's [`NearestNeighbour`](trait.NearestNeighbour.html)
+/// Enum that dispatches to one of the crate's [`NearestNeighbour`](NearestNeighbour)
 /// implementations based on value. This enum should be used instead of using types like
 /// `LinearSearch` and `KdTree` directly.
 ///

diff --git a/algorithms/linfa-nn/src/linear.rs b/algorithms/linfa-nn/src/linear.rs
@@ -11,7 +11,7 @@ use crate::{
     NearestNeighbourIndex, NnError, Point,
 };
 
-/// Spatial indexing structure created by [`LinearSearch`](struct.LinearSearch.html)
+/// Spatial indexing structure created by [`LinearSearch`](LinearSearch)
 #[derive(Debug, Clone, PartialEq)]
 pub struct LinearSearchIndex<'a, F: Float, D: Distance<F>>(ArrayView2<'a, F>, D);
 
@@ -76,7 +76,7 @@ impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for LinearSearchInde
 
 /// Implementation of linear search, which is the simplest nearest neighbour algorithm. All queries
 /// are implemented by scanning through every point, so all of them are `O(N)`. Calling
-/// `from_batch` returns a [`LinearSearchIndex`](struct.LinearSearchIndex.html).
+/// `from_batch` returns a [`LinearSearchIndex`](LinearSearchIndex).
 #[derive(Default, Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(
     feature = "serde",

diff --git a/algorithms/linfa-preprocessing/src/countgrams/hyperparams.rs b/algorithms/linfa-preprocessing/src/countgrams/hyperparams.rs
@@ -5,14 +5,14 @@ use std::cell::{Ref, RefCell};
 use std::collections::HashSet;
 
 /// Count vectorizer: learns a vocabulary from a sequence of documents (or file paths) and maps each
-/// vocabulary entry to an integer value, producing a [FittedCountVectorizer](struct.FittedCountVectorizer.html) that can
+/// vocabulary entry to an integer value, producing a [CountVectorizer](crate::CountVectorizer) that can
 /// be used to count the occurrences of each vocabulary entry in any sequence of documents. Alternatively a user-specified vocabulary can
 /// be used for fitting.
 ///
 /// ### Attributes
 ///
 /// If a user-defined vocabulary is used for fitting then the following attributes will not be considered during the fitting phase but
-/// they will still be used by the [FittedCountVectorizer](struct.FittedCountVectorizer.html) to transform any text to be examined.
+/// they will still be used by the [CountVectorizer](crate::CountVectorizer) to transform any text to be examined.
 ///
 /// * `split_regex`: the regex espression used to split decuments into tokens. Defaults to r"\\b\\w\\w+\\b", which selects "words", using whitespaces and
 /// punctuation symbols as separators.