diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index b1b299d35..86179217e 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -2,7 +2,7 @@
 on:
   push:
     branches:
-    - master 
+    - master
   pull_request:
 
 name: Build website with Zola, build rust docs and publish to GH pages
@@ -14,6 +14,7 @@ jobs:
     steps:
       - name: 'Checkout'
         uses: actions/checkout@master
+
       - name: 'Build only' 
         uses: shalzz/zola-deploy-action@master
         env:
@@ -21,6 +22,11 @@ jobs:
           TOKEN: ${{ secrets.TOKEN }}
           BUILD_ONLY: true
 
+      - name: Build Documentation
+        run: cargo doc --workspace --no-deps
+        env:
+          RUSTDOCFLAGS: -D warnings
+
   build_and_deploy:
     runs-on: ubuntu-latest
     if: github.ref == 'refs/heads/master' || github.repository != 'rust-ml/linfa'
@@ -34,7 +40,9 @@ jobs:
           components: rustfmt, rust-src
 
       - name: Build Documentation
-        run: cargo doc --all --no-deps
+        run: cargo doc --workspace --no-deps
+        env:
+          RUSTDOCFLAGS: -D warnings
 
       - name: Copy Rust Documentation to Zola
         run: cp -R "target/doc/" "docs/website/static/rustdocs/"
diff --git a/algorithms/linfa-clustering/src/appx_dbscan/hyperparams.rs b/algorithms/linfa-clustering/src/appx_dbscan/hyperparams.rs
index 97018520f..b2076511f 100644
--- a/algorithms/linfa-clustering/src/appx_dbscan/hyperparams.rs
+++ b/algorithms/linfa-clustering/src/appx_dbscan/hyperparams.rs
@@ -13,7 +13,7 @@ use thiserror::Error;
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [Approximated DBSCAN algorithm](struct.AppxDbscan.html).
+/// the [Approximated DBSCAN algorithm](crate::AppxDbscan).
 pub struct AppxDbscanValidParams<F: Float, N> {
     pub(crate) tolerance: F,
     pub(crate) min_points: usize,
@@ -23,7 +23,7 @@ pub struct AppxDbscanValidParams<F: Float, N> {
 
 #[derive(Debug, Clone, PartialEq)]
 /// Helper struct for building a set of [Approximated DBSCAN
-/// hyperparameters](struct.AppxDbscanParams.html)
+/// hyperparameters](AppxDbscanParams)
 pub struct AppxDbscanParams<F: Float, N>(AppxDbscanValidParams<F, N>);
 
 #[derive(Debug, Error)]
diff --git a/algorithms/linfa-clustering/src/dbscan/hyperparams.rs b/algorithms/linfa-clustering/src/dbscan/hyperparams.rs
index 3eec71a52..57cf5ad0d 100644
--- a/algorithms/linfa-clustering/src/dbscan/hyperparams.rs
+++ b/algorithms/linfa-clustering/src/dbscan/hyperparams.rs
@@ -11,7 +11,7 @@ use thiserror::Error;
 )]
 #[derive(Debug, Clone, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [DBSCAN algorithm](struct.Dbscan.html).
+/// the [DBSCAN algorithm](crate::Dbscan).
 pub struct DbscanValidParams<F: Float, D: Distance<F>, N: NearestNeighbour> {
     pub(crate) tolerance: F,
     pub(crate) min_points: usize,
@@ -20,7 +20,7 @@ pub struct DbscanValidParams<F: Float, D: Distance<F>, N: NearestNeighbour> {
 }
 
 #[derive(Debug, Clone, PartialEq)]
-/// Helper struct for building a set of [DBSCAN hyperparameters](struct.DbscanParams.html)
+/// Helper struct for building a set of [DBSCAN hyperparameters](DbscanParams)
 pub struct DbscanParams<F: Float, D: Distance<F>, N: NearestNeighbour>(DbscanValidParams<F, D, N>);
 
 #[derive(Error, Debug)]
diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
index f0deaf0e3..1e640cd59 100644
--- a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
+++ b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
@@ -41,7 +41,7 @@ use serde_crate::{Deserialize, Serialize};
 /// that is maximising the probability that the dataset is drawn from our mixture of normal distributions.
 ///
 /// After an initialization step which can be either from random distribution or from the result
-/// of the [KMeans](struct.KMeans.html) algorithm (which is the default value of the `init_method` parameter).
+/// of the [KMeans](KMeans) algorithm (which is the default value of the `init_method` parameter).
 /// The core EM iterative algorithm for Gaussian Mixture is a fixed-point two-step algorithm:
 ///
 /// 1. Expectation step: compute the expectation of the likelihood of the current gaussian mixture model wrt the dataset.
diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/hyperparams.rs b/algorithms/linfa-clustering/src/gaussian_mixture/hyperparams.rs
index fb2328693..5f9a07b1a 100644
--- a/algorithms/linfa-clustering/src/gaussian_mixture/hyperparams.rs
+++ b/algorithms/linfa-clustering/src/gaussian_mixture/hyperparams.rs
@@ -26,7 +26,7 @@ pub enum GmmCovarType {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 /// A specifier for the method used for the initialization of the fitting algorithm of GMM
 pub enum GmmInitMethod {
-    /// GMM fitting algorithm is initalized with the esult of the [KMeans](struct.KMeans.html) clustering.
+    /// GMM fitting algorithm is initalized with the result of the [KMeans](crate::KMeans) clustering.
     KMeans,
     /// GMM fitting algorithm is initialized randomly.
     Random,
@@ -39,7 +39,7 @@ pub enum GmmInitMethod {
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [GMM algorithm](struct.GaussianMixtureModel.html).
+/// the [GMM algorithm](crate::GaussianMixtureModel).
 pub struct GmmValidParams<F: Float, R: Rng> {
     n_clusters: usize,
     covar_type: GmmCovarType,
@@ -92,7 +92,7 @@ impl<F: Float, R: Rng + Clone> GmmValidParams<F, R> {
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [GMM algorithm](struct.GaussianMixtureModel.html).
+/// the [GMM algorithm](crate::GaussianMixtureModel).
 pub struct GmmParams<F: Float, R: Rng>(GmmValidParams<F, R>);
 
 impl<F: Float> GmmParams<F, Xoshiro256Plus> {
diff --git a/algorithms/linfa-clustering/src/k_means/hyperparams.rs b/algorithms/linfa-clustering/src/k_means/hyperparams.rs
index cda2a8507..2f7a687cb 100644
--- a/algorithms/linfa-clustering/src/k_means/hyperparams.rs
+++ b/algorithms/linfa-clustering/src/k_means/hyperparams.rs
@@ -15,7 +15,7 @@ use serde_crate::{Deserialize, Serialize};
 )]
 #[derive(Clone, Debug, PartialEq)]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [K-means algorithm](struct.KMeans.html).
+/// the [K-means algorithm](crate::KMeans).
 pub struct KMeansValidParams<F: Float, R: Rng, D: Distance<F>> {
     /// Number of time the k-means algorithm will be run with different centroid seeds.
     n_runs: usize,
@@ -38,8 +38,8 @@ pub struct KMeansValidParams<F: Float, R: Rng, D: Distance<F>> {
 }
 
 #[derive(Clone, Debug, PartialEq)]
-/// An helper struct used to construct a set of [valid hyperparameters](struct.KMeansParams.html) for
-/// the [K-means algorithm](struct.KMeans.html) (using the builder pattern).
+/// An helper struct used to construct a set of [valid hyperparameters](KMeansParams) for
+/// the [K-means algorithm](crate::KMeans) (using the builder pattern).
 pub struct KMeansParams<F: Float, R: Rng, D: Distance<F>>(KMeansValidParams<F, R, D>);
 
 impl<F: Float, R: Rng, D: Distance<F>> KMeansParams<F, R, D> {
diff --git a/algorithms/linfa-clustering/src/lib.rs b/algorithms/linfa-clustering/src/lib.rs
index befef3ee0..990f2d376 100644
--- a/algorithms/linfa-clustering/src/lib.rs
+++ b/algorithms/linfa-clustering/src/lib.rs
@@ -13,11 +13,11 @@
 //! ## Current state
 //!
 //! Right now `linfa-clustering` provides the following clustering algorithms:
-//! * [K-Means](struct.KMeans.html)
-//! * [DBSCAN](struct.Dbscan.html)
-//! * [Approximated DBSCAN](struct.AppxDbscan.html)
-//! * [Gaussian-Mixture-Model](struct.GaussianMixtureModel.html)
-//! * [OPTICS](struct.OpticsAnalysis.html)
+//! * [K-Means](KMeans)
+//! * [DBSCAN](Dbscan)
+//! * [Approximated DBSCAN](AppxDbscan)
+//! * [Gaussian-Mixture-Model](GaussianMixtureModel)
+//! * [OPTICS](OpticsAnalysis)
 //!
 //! Implementation choices, algorithmic details and tutorials can be found in the page dedicated to the specific algorithms.
 mod appx_dbscan;
diff --git a/algorithms/linfa-clustering/src/optics/hyperparams.rs b/algorithms/linfa-clustering/src/optics/hyperparams.rs
index 23009c38d..1ab427bbf 100644
--- a/algorithms/linfa-clustering/src/optics/hyperparams.rs
+++ b/algorithms/linfa-clustering/src/optics/hyperparams.rs
@@ -10,7 +10,7 @@ use serde_crate::{Deserialize, Serialize};
     serde(crate = "serde_crate")
 )]
 /// The set of hyperparameters that can be specified for the execution of
-/// the [OPTICS algorithm](struct.Optics.html).
+/// the [OPTICS algorithm](crate::Optics).
 pub struct OpticsValidParams<F, D, N> {
     /// Distance between points for them to be considered neighbours.
     tolerance: F,
diff --git a/algorithms/linfa-linear/src/glm/hyperparams.rs b/algorithms/linfa-linear/src/glm/hyperparams.rs
index 7db89f1e9..2ca8ea228 100644
--- a/algorithms/linfa-linear/src/glm/hyperparams.rs
+++ b/algorithms/linfa-linear/src/glm/hyperparams.rs
@@ -2,38 +2,7 @@ use crate::{glm::link::Link, LinearError, TweedieRegressor};
 use linfa::{Float, ParamGuard};
 use serde::{Deserialize, Serialize};
 
-/// Generalized Linear Model (GLM) with a Tweedie distribution
-///
-/// The Regressor can be used to model different GLMs depending on
-/// [`power`](struct.TweedieRegressor.html#method.power),
-/// which determines the underlying distribution.
-///
-/// | Power  | Distribution           |
-/// | ------ | ---------------------- |
-/// | 0      | Normal                 |
-/// | 1      | Poisson                |
-/// | (1, 2) | Compound Poisson Gamma |
-/// | 2      | Gamma                  |
-/// | 3      | Inverse Gaussian       |
-///
-/// NOTE: No distribution exists between 0 and 1
-///
-/// Learn more from sklearn's excellent [User Guide](https://scikit-learn.org/stable/modules/linear_model.html#generalized-linear-regression)
-///
-/// ## Examples
-///
-/// Here's an example on how to train a GLM on the `diabetes` dataset
-/// ```rust
-/// use linfa::traits::{Fit, Predict};
-/// use linfa_linear::TweedieRegressor;
-/// use linfa::prelude::SingleTargetRegression;
-///
-/// let dataset = linfa_datasets::diabetes();
-/// let model = TweedieRegressor::params().fit(&dataset).unwrap();
-/// let pred = model.predict(&dataset);
-/// let r2 = pred.r2(&dataset).unwrap();
-/// println!("r2 from prediction: {}", r2);
-/// ```
+/// The set of hyperparameters that can be specified for the execution of the Tweedie Regressor.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct TweedieRegressorValidParams<F> {
     alpha: F,
@@ -74,6 +43,7 @@ impl<F: Float> TweedieRegressorValidParams<F> {
     }
 }
 
+/// The set of hyperparameters that can be specified for the execution of the Tweedie Regressor.
 #[derive(Debug, Clone, PartialEq)]
 pub struct TweedieRegressorParams<F>(TweedieRegressorValidParams<F>);
 
diff --git a/algorithms/linfa-linear/src/glm/mod.rs b/algorithms/linfa-linear/src/glm/mod.rs
index f89ebb79d..35956365b 100644
--- a/algorithms/linfa-linear/src/glm/mod.rs
+++ b/algorithms/linfa-linear/src/glm/mod.rs
@@ -7,7 +7,8 @@ mod link;
 use crate::error::{LinearError, Result};
 use crate::float::{ArgminParam, Float};
 use distribution::TweedieDistribution;
-use hyperparams::TweedieRegressorValidParams;
+pub use hyperparams::TweedieRegressorParams;
+pub use hyperparams::TweedieRegressorValidParams;
 use linfa::dataset::AsSingleTargets;
 pub use link::Link;
 
@@ -171,7 +172,38 @@ impl<'a, A: Float> ArgminOp for TweedieProblem<'a, A> {
     }
 }
 
-/// Fitted Tweedie regressor model for scoring
+/// Generalized Linear Model (GLM) with a Tweedie distribution
+///
+/// The Regressor can be used to model different GLMs depending on
+/// [`power`](TweedieRegressorParams),
+/// which determines the underlying distribution.
+///
+/// | Power  | Distribution           |
+/// | ------ | ---------------------- |
+/// | 0      | Normal                 |
+/// | 1      | Poisson                |
+/// | (1, 2) | Compound Poisson Gamma |
+/// | 2      | Gamma                  |
+/// | 3      | Inverse Gaussian       |
+///
+/// NOTE: No distribution exists between 0 and 1
+///
+/// Learn more from sklearn's excellent [User Guide](https://scikit-learn.org/stable/modules/linear_model.html#generalized-linear-regression)
+///
+/// ## Examples
+///
+/// Here's an example on how to train a GLM on the `diabetes` dataset
+/// ```rust
+/// use linfa::traits::{Fit, Predict};
+/// use linfa_linear::TweedieRegressor;
+/// use linfa::prelude::SingleTargetRegression;
+///
+/// let dataset = linfa_datasets::diabetes();
+/// let model = TweedieRegressor::params().fit(&dataset).unwrap();
+/// let pred = model.predict(&dataset);
+/// let r2 = pred.r2(&dataset).unwrap();
+/// println!("r2 from prediction: {}", r2);
+/// ```
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct TweedieRegressor<A> {
     /// Estimated coefficients for the linear predictor
diff --git a/algorithms/linfa-logistic/src/lib.rs b/algorithms/linfa-logistic/src/lib.rs
index edc3b66cd..94852f25b 100644
--- a/algorithms/linfa-logistic/src/lib.rs
+++ b/algorithms/linfa-logistic/src/lib.rs
@@ -5,7 +5,7 @@
 //! `linfa-logistic` is a crate in the [`linfa`](https://crates.io/crates/linfa) ecosystem, an effort to create a toolkit for classical Machine Learning implemented in pure Rust, akin to Python's `scikit-learn`.
 //!
 //! ## Current state
-//! `linfa-logistic` provides a pure Rust implementation of a [binomial logistic regression model](struct.LogisticRegression.html) and a [multinomial logistic regression model](struct.MultiLogisticRegression).
+//! `linfa-logistic` provides a pure Rust implementation of a [binomial logistic regression model](LogisticRegression) and a [multinomial logistic regression model](MultiLogisticRegression).
 //!
 //! ## Examples
 //!
@@ -49,7 +49,7 @@ use hyperparams::{LogisticRegressionParams, LogisticRegressionValidParams};
 ///
 /// Logistic regression is used in binary classification
 /// by interpreting the predicted value as the probability that the sample
-/// has label `1`. A threshold can be set in the [fitted model](struct.FittedLogisticRegression.html) to decide the minimum
+/// has label `1`. A threshold can be set in the [fitted model](FittedLogisticRegression) to decide the minimum
 /// probability needed to classify a sample as `1`, which defaults to `0.5`.
 ///
 /// In this implementation any binary set of labels can be used, not necessarily `0` and `1`.
diff --git a/algorithms/linfa-nn/src/balltree.rs b/algorithms/linfa-nn/src/balltree.rs
index bc93944da..e29045609 100644
--- a/algorithms/linfa-nn/src/balltree.rs
+++ b/algorithms/linfa-nn/src/balltree.rs
@@ -158,7 +158,7 @@ impl<'a, F: Float> BallTreeInner<'a, F> {
     }
 }
 
-/// Spatial indexing structure created by [`BallTree`](struct.BallTree.html)
+/// Spatial indexing structure created by [`BallTree`](BallTree)
 #[derive(Debug, Clone, PartialEq)]
 pub struct BallTreeIndex<'a, F: Float, D: Distance<F>> {
     tree: BallTreeInner<'a, F>,
@@ -282,7 +282,7 @@ impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for BallTreeIndex<'a
 /// Implementation of ball tree, a space partitioning data structure that partitions its points
 /// into nested hyperspheres called "balls". It performs spatial queries in `O(k * logN)` time,
 /// where `k` is the number of points returned by the query. Calling `from_batch` returns a
-/// [`BallTreeIndex`](struct.BallTreeIndex.html).
+/// [`BallTreeIndex`](BallTreeIndex).
 ///
 /// More details can be found [here](https://en.wikipedia.org/wiki/Ball_tree). This implementation
 /// is based off of the [ball_tree](https://docs.rs/ball-tree/0.2.0/ball_tree/) crate.
diff --git a/algorithms/linfa-nn/src/kdtree.rs b/algorithms/linfa-nn/src/kdtree.rs
index e7c722c18..d6ec643bb 100644
--- a/algorithms/linfa-nn/src/kdtree.rs
+++ b/algorithms/linfa-nn/src/kdtree.rs
@@ -8,7 +8,7 @@ use crate::{
     NnError, Point,
 };
 
-/// Spatial indexing structure created by [`KdTree`](struct.KdTree.html)
+/// Spatial indexing structure created by [`KdTree`](KdTree)
 #[derive(Debug)]
 pub struct KdTreeIndex<'a, F: Float, D: Distance<F>>(
     kdtree::KdTree<F, (Point<'a, F>, usize), &'a [F]>,
@@ -90,7 +90,7 @@ impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for KdTreeIndex<'a,
 /// Implementation of K-D tree, a fast space-partitioning data structure.  For each parent node,
 /// the indexed points are split with a hyperplane into two child nodes. Due to its tree-like
 /// structure, the K-D tree performs spatial queries in `O(k * logN)` time, where `k` is the number
-/// of points returned by the query. Calling `from_batch` returns a [`KdTree`](struct.KdTree.html).
+/// of points returned by the query. Calling `from_batch` returns a [`KdTree`](KdTree).
 ///
 /// More details can be found [here](https://en.wikipedia.org/wiki/K-d_tree).
 ///
diff --git a/algorithms/linfa-nn/src/lib.rs b/algorithms/linfa-nn/src/lib.rs
index 0a2fd17cf..599b60018 100644
--- a/algorithms/linfa-nn/src/lib.rs
+++ b/algorithms/linfa-nn/src/lib.rs
@@ -14,9 +14,9 @@
 //! ## Current state
 //!
 //! Right now `linfa-nn` provides the following algorithms:
-//! * [Linear Scan](struct.LinearSearch.html)
-//! * [KD Tree](struct.KdTree.html)
-//! * [Ball Tree](struct.BallTree.html)
+//! * [Linear Scan](LinearSearch)
+//! * [KD Tree](KdTree)
+//! * [Ball Tree](BallTree)
 //!
 //! The [`CommonNearestNeighbour`](struct.CommonNearestNeighbour) enum should be used to dispatch
 //! between all of the above algorithms flexibly.
@@ -58,7 +58,7 @@ pub enum NnError {
 
 /// Nearest neighbour algorithm builds a spatial index structure out of a batch of points. The
 /// distance between points is calculated using a provided distance function. The index implements
-/// the [`NearestNeighbourIndex`](trait.NearestNeighbourIndex.html) trait and allows for efficient
+/// the [`NearestNeighbourIndex`](NearestNeighbourIndex) trait and allows for efficient
 /// computing of nearest neighbour and range queries.
 pub trait NearestNeighbour: std::fmt::Debug + Send + Sync + Unpin {
     /// Builds a spatial index using a MxN two-dimensional array representing M points with N
@@ -115,7 +115,7 @@ pub trait NearestNeighbourIndex<F: Float>: Send + Sync + Unpin {
     ) -> Result<Vec<(Point<F>, usize)>, NnError>;
 }
 
-/// Enum that dispatches to one of the crate's [`NearestNeighbour`](trait.NearestNeighbour.html)
+/// Enum that dispatches to one of the crate's [`NearestNeighbour`](NearestNeighbour)
 /// implementations based on value. This enum should be used instead of using types like
 /// `LinearSearch` and `KdTree` directly.
 ///
diff --git a/algorithms/linfa-nn/src/linear.rs b/algorithms/linfa-nn/src/linear.rs
index 00cf53568..d227e1833 100644
--- a/algorithms/linfa-nn/src/linear.rs
+++ b/algorithms/linfa-nn/src/linear.rs
@@ -11,7 +11,7 @@ use crate::{
     NearestNeighbourIndex, NnError, Point,
 };
 
-/// Spatial indexing structure created by [`LinearSearch`](struct.LinearSearch.html)
+/// Spatial indexing structure created by [`LinearSearch`](LinearSearch)
 #[derive(Debug, Clone, PartialEq)]
 pub struct LinearSearchIndex<'a, F: Float, D: Distance<F>>(ArrayView2<'a, F>, D);
 
@@ -76,7 +76,7 @@ impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for LinearSearchInde
 
 /// Implementation of linear search, which is the simplest nearest neighbour algorithm. All queries
 /// are implemented by scanning through every point, so all of them are `O(N)`. Calling
-/// `from_batch` returns a [`LinearSearchIndex`](struct.LinearSearchIndex.html).
+/// `from_batch` returns a [`LinearSearchIndex`](LinearSearchIndex).
 #[derive(Default, Clone, Debug, PartialEq, Eq)]
 #[cfg_attr(
     feature = "serde",
diff --git a/algorithms/linfa-preprocessing/src/countgrams/hyperparams.rs b/algorithms/linfa-preprocessing/src/countgrams/hyperparams.rs
index c48b620e4..ceacbb2d9 100644
--- a/algorithms/linfa-preprocessing/src/countgrams/hyperparams.rs
+++ b/algorithms/linfa-preprocessing/src/countgrams/hyperparams.rs
@@ -5,14 +5,14 @@ use std::cell::{Ref, RefCell};
 use std::collections::HashSet;
 
 /// Count vectorizer: learns a vocabulary from a sequence of documents (or file paths) and maps each
-/// vocabulary entry to an integer value, producing a [FittedCountVectorizer](struct.FittedCountVectorizer.html) that can
+/// vocabulary entry to an integer value, producing a [CountVectorizer](crate::CountVectorizer) that can
 /// be used to count the occurrences of each vocabulary entry in any sequence of documents. Alternatively a user-specified vocabulary can
 /// be used for fitting.
 ///
 /// ### Attributes
 ///
 /// If a user-defined vocabulary is used for fitting then the following attributes will not be considered during the fitting phase but
-/// they will still be used by the [FittedCountVectorizer](struct.FittedCountVectorizer.html) to transform any text to be examined.
+/// they will still be used by the [CountVectorizer](crate::CountVectorizer) to transform any text to be examined.
 ///
 /// * `split_regex`: the regex espression used to split decuments into tokens. Defaults to r"\\b\\w\\w+\\b", which selects "words", using whitespaces and
 /// punctuation symbols as separators.
diff --git a/algorithms/linfa-preprocessing/src/countgrams/mod.rs b/algorithms/linfa-preprocessing/src/countgrams/mod.rs
index 7e1f5abf3..1f91f543c 100644
--- a/algorithms/linfa-preprocessing/src/countgrams/mod.rs
+++ b/algorithms/linfa-preprocessing/src/countgrams/mod.rs
@@ -20,7 +20,7 @@ mod hyperparams;
 
 impl CountVectorizerValidParams {
     /// Learns a vocabulary from the documents in `x`, according to the specified attributes and maps each
-    /// vocabulary entry to an integer value, producing a [CountVectorizer](struct.CountVectorizer.html).
+    /// vocabulary entry to an integer value, producing a [CountVectorizer](CountVectorizer).
     ///
     /// Returns an error if:
     /// * one of the `n_gram` boundaries is set to zero or the minimum value is greater than the maximum value
@@ -48,7 +48,7 @@ impl CountVectorizerValidParams {
     }
 
     /// Learns a vocabulary from the documents contained in the files in `input`, according to the specified attributes and maps each
-    /// vocabulary entry to an integer value, producing a [CountVectorizer](struct.CountVectorizer.html).
+    /// vocabulary entry to an integer value, producing a [CountVectorizer](CountVectorizer).
     ///
     /// The files will be read using the specified `encoding`, and any sequence unrecognized by the encoding will be handled
     /// according to `trap`.
@@ -93,8 +93,8 @@ impl CountVectorizerValidParams {
         })
     }
 
-    /// Produces a [CountVectorizer](struct.CountVectorizer.html) with the input vocabulary.
-    /// All struct attributes are ignored in the fitting but will be used by the [CountVectorizer](struct.CountVectorizer.html)
+    /// Produces a [CountVectorizer](CountVectorizer) with the input vocabulary.
+    /// All struct attributes are ignored in the fitting but will be used by the [CountVectorizer](CountVectorizer)
     /// to transform any text to be examined. As such this will return an error in the same cases as the `fit` method.
     pub fn fit_vocabulary<T: ToString>(&self, words: &[T]) -> Result<CountVectorizer> {
         let mut vocabulary: HashMap<String, (usize, usize)> = HashMap::with_capacity(words.len());
@@ -179,7 +179,7 @@ impl CountVectorizerValidParams {
 
 impl CountVectorizerParams {
     /// Learns a vocabulary from the documents in `x`, according to the specified attributes and maps each
-    /// vocabulary entry to an integer value, producing a [CountVectorizer](struct.CountVectorizer.html).
+    /// vocabulary entry to an integer value, producing a [CountVectorizer](CountVectorizer).
     ///
     /// Returns an error if:
     /// * one of the `n_gram` boundaries is set to zero or the minimum value is greater than the maximum value
@@ -194,7 +194,7 @@ impl CountVectorizerParams {
     }
 
     /// Learns a vocabulary from the documents contained in the files in `input`, according to the specified attributes and maps each
-    /// vocabulary entry to an integer value, producing a [CountVectorizer](struct.CountVectorizer.html).
+    /// vocabulary entry to an integer value, producing a [CountVectorizer](CountVectorizer).
     ///
     /// The files will be read using the specified `encoding`, and any sequence unrecognized by the encoding will be handled
     /// according to `trap`.
@@ -216,8 +216,8 @@ impl CountVectorizerParams {
             .and_then(|params| params.fit_files(input, encoding, trap))
     }
 
-    /// Produces a [CountVectorizer](struct.CountVectorizer.html) with the input vocabulary.
-    /// All struct attributes are ignored in the fitting but will be used by the [CountVectorizer](struct.CountVectorizer.html)
+    /// Produces a [CountVectorizer](CountVectorizer) with the input vocabulary.
+    /// All struct attributes are ignored in the fitting but will be used by the [CountVectorizer](CountVectorizer)
     /// to transform any text to be examined. As such this will return an error in the same cases as the `fit` method.
     pub fn fit_vocabulary<T: ToString>(&self, words: &[T]) -> Result<CountVectorizer> {
         self.check_ref()
diff --git a/algorithms/linfa-preprocessing/src/linear_scaling.rs b/algorithms/linfa-preprocessing/src/linear_scaling.rs
index e72f1aabe..29e3198d6 100644
--- a/algorithms/linfa-preprocessing/src/linear_scaling.rs
+++ b/algorithms/linfa-preprocessing/src/linear_scaling.rs
@@ -11,7 +11,7 @@ use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix2, Zip};
 use ndarray_linalg::norm::Norm;
 
 #[derive(Clone, Debug, PartialEq)]
-/// Possible scaling methods for [LinearScaler](struct.LinearScaler.html)
+/// Possible scaling methods for [LinearScaler](LinearScaler)
 ///
 /// * Standard (with mean, with std): subtracts the mean to each feature and scales it by the inverse of its standard deviation
 /// * MinMax (min, max): scales each feature to fit in the range `min..=max`, default values are
@@ -140,7 +140,7 @@ impl<F: Float> std::fmt::Display for ScalingMethod<F> {
     }
 }
 
-/// Linear Scaler: learns scaling parameters, according to the specified [method](enum.ScalingMethod.html), from a dataset, producing a [fitted linear scaler](struct.LinearScaler.html)
+/// Linear Scaler: learns scaling parameters, according to the specified [method](ScalingMethod), from a dataset, producing a [fitted linear scaler](LinearScaler)
 /// that can be used to scale different datasets using the same parameters.
 ///
 ///
@@ -226,7 +226,7 @@ impl<F: Float, D: Data<Elem = F>, T: AsTargets> Fit<ArrayBase<D, Ix2>, T, Prepro
 {
     type Object = LinearScaler<F>;
 
-    /// Fits the input dataset accordng to the scaler [method](enum.ScalingMethod.html). Will return an error
+    /// Fits the input dataset accordng to the scaler [method](ScalingMethod). Will return an error
     /// if the dataset does not contain any samples or (in the case of MinMax scaling) if the specified range is not valid.
     fn fit(&self, x: &DatasetBase<ArrayBase<D, Ix2>, T>) -> Result<Self::Object> {
         self.method.fit(x.records())
@@ -234,7 +234,7 @@ impl<F: Float, D: Data<Elem = F>, T: AsTargets> Fit<ArrayBase<D, Ix2>, T, Prepro
 }
 
 #[derive(Debug, Clone, PartialEq)]
-/// The result of fitting a [linear scaler](struct.LinearScalerParams.html).
+/// The result of fitting a [linear scaler](LinearScalerParams).
 /// Scales datasets with the parameters learned during fitting.
 pub struct LinearScaler<F: Float> {
     offsets: Array1<F>,
@@ -253,7 +253,7 @@ impl<F: Float> LinearScaler<F> {
         &self.scales
     }
 
-    /// Returns the method used for fitting. Useful for printing, since [ScalingMethod](enum.ScalingMethod.html) implements `Display`
+    /// Returns the method used for fitting. Useful for printing, since [ScalingMethod](ScalingMethod) implements `Display`
     pub fn method(&self) -> &ScalingMethod<F> {
         &self.method
     }
diff --git a/algorithms/linfa-preprocessing/src/tf_idf_vectorization.rs b/algorithms/linfa-preprocessing/src/tf_idf_vectorization.rs
index 1bf8c6b55..d2226e793 100644
--- a/algorithms/linfa-preprocessing/src/tf_idf_vectorization.rs
+++ b/algorithms/linfa-preprocessing/src/tf_idf_vectorization.rs
@@ -32,12 +32,12 @@ impl TfIdfMethod {
     }
 }
 
-/// Simlar to [`CountVectorizer`](studct.CountVectorizer.html) but instead of
+/// Simlar to [`CountVectorizer`](CountVectorizer) but instead of
 /// just counting the term frequency of each vocabulary entry in each given document,
 /// it computes the term frequecy times the inverse document frequency, thus giving more importance
 /// to entries that appear many times but only on some documents. The weight function can be adjusted
-/// by setting the appropriate [method](enum.TfIdfMethod.html). This struct provides the same string  
-/// processing customizations described in [`CountVectorizer`](studct.CountVectorizer.html).
+/// by setting the appropriate [method](TfIdfMethod). This struct provides the same string  
+/// processing customizations described in [`CountVectorizer`](CountVectorizer).
 #[derive(Clone, Debug)]
 pub struct TfIdfVectorizer {
     count_vectorizer: CountVectorizerParams,
@@ -110,7 +110,7 @@ impl TfIdfVectorizer {
     }
 
     /// Learns a vocabulary from the texts in `x`, according to the specified attributes and maps each
-    /// vocabulary entry to an integer value, producing a [FittedTfIdfVectorizer](struct.FittedTfIdfVectorizer.html).
+    /// vocabulary entry to an integer value, producing a [FittedTfIdfVectorizer](FittedTfIdfVectorizer).
     ///
     /// Returns an error if:
     /// * one of the `n_gram` boundaries is set to zero or the minimum value is greater than the maximum value
@@ -127,8 +127,8 @@ impl TfIdfVectorizer {
         })
     }
 
-    /// Produces a [FittedTfIdfVectorizer](struct.FittedTfIdfVectorizer.html) with the input vocabulary.
-    /// All struct attributes are ignored in the fitting but will be used by the [FittedTfIdfVectorizer](struct.FittedTfIdfVectorizer.html)
+    /// Produces a [FittedTfIdfVectorizer](FittedTfIdfVectorizer) with the input vocabulary.
+    /// All struct attributes are ignored in the fitting but will be used by the [FittedTfIdfVectorizer](FittedTfIdfVectorizer)
     /// to transform any text to be examined. As such this will return an error in the same cases as the `fit` method.
     pub fn fit_vocabulary<T: ToString>(&self, words: &[T]) -> Result<FittedTfIdfVectorizer> {
         let fitted_vectorizer = self.count_vectorizer.fit_vocabulary(words)?;
@@ -153,7 +153,7 @@ impl TfIdfVectorizer {
 }
 
 /// Counts the occurrences of each vocabulary entry, learned during fitting, in a sequence of texts and scales them by the inverse document
-/// document frequency defined by the [method](enum.TfIdfMethod.html). Each vocabulary entry is mapped
+/// document frequency defined by the [method](TfIdfMethod). Each vocabulary entry is mapped
 /// to an integer value that is used to index the count in the result.
 #[derive(Clone, Debug)]
 pub struct FittedTfIdfVectorizer {
diff --git a/algorithms/linfa-preprocessing/src/whitening.rs b/algorithms/linfa-preprocessing/src/whitening.rs
index 2d10ced6d..6d031ad2b 100644
--- a/algorithms/linfa-preprocessing/src/whitening.rs
+++ b/algorithms/linfa-preprocessing/src/whitening.rs
@@ -31,7 +31,7 @@ pub enum WhiteningMethod {
 }
 
 /// Struct that can be fitted to the input data to obtain the related whitening matrix.
-/// Fitting returns a [FittedWhitener](struct.FittedWhitener.html) struct that can be used to
+/// Fitting returns a [FittedWhitener](FittedWhitener) struct that can be used to
 /// apply the whitening transformation to the input data.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Whitener {
@@ -144,7 +144,7 @@ impl<F: Float, D: Data<Elem = F>, T: AsTargets> Fit<ArrayBase<D, Ix2>, T, Prepro
 
 /// Struct that can be used to whiten data. Data will be scaled according to the whitening matrix learned
 /// during fitting.
-/// Obtained by fitting a [Whitener](struct.Whitener.html).
+/// Obtained by fitting a [Whitener](Whitener).
 ///
 /// Transforming the data used during fitting will yield a scaled data matrix with
 /// unit diagonal covariance matrix.
diff --git a/algorithms/linfa-trees/src/decision_trees/algorithm.rs b/algorithms/linfa-trees/src/decision_trees/algorithm.rs
index d1422eacc..a281db39d 100644
--- a/algorithms/linfa-trees/src/decision_trees/algorithm.rs
+++ b/algorithms/linfa-trees/src/decision_trees/algorithm.rs
@@ -446,7 +446,7 @@ impl<F: Float, L: Label + std::fmt::Debug> TreeNode<F, L> {
 ///   containing all observations with `feature <= split value` and the right one containing the rest.
 /// * If no suitable split is found, the node is marked as leaf and its prediction is set to be the most common label in the node;
 ///
-/// The [quality score](enum.SplitQuality.html) used can be specified in the [parameters](struct.DecisionTreeParams.html).
+/// The [quality score](SplitQuality) used can be specified in the [parameters](crate::DecisionTreeParams).
 ///
 /// ### Predictions
 ///
@@ -456,7 +456,7 @@ impl<F: Float, L: Label + std::fmt::Debug> TreeNode<F, L> {
 /// ### Additional constraints
 ///
 /// In order to avoid overfitting the training data, some additional constraints on the quality/quantity of splits can be added to the tree.
-/// A description of these additional rules is provided in the [parameters](struct.DecisionTreeParams.html) page.
+/// A description of these additional rules is provided in the [parameters](crate::DecisionTreeParams) page.
 ///
 /// ### Example
 ///
@@ -615,7 +615,7 @@ impl<F: Float, L: Label> DecisionTree<F, L> {
         self.iter_nodes().filter(|node| node.is_leaf()).count()
     }
 
-    /// Generates a [`Tikz`](struct.Tikz.html) structure to print the
+    /// Generates a [`Tikz`](Tikz) structure to print the
     /// fitted tree in Tex using tikz and forest, with the following default parameters:
     ///
     /// * `legend=false`
diff --git a/algorithms/linfa-trees/src/decision_trees/hyperparams.rs b/algorithms/linfa-trees/src/decision_trees/hyperparams.rs
index c0b739eb8..75382ff74 100644
--- a/algorithms/linfa-trees/src/decision_trees/hyperparams.rs
+++ b/algorithms/linfa-trees/src/decision_trees/hyperparams.rs
@@ -30,7 +30,7 @@ pub enum SplitQuality {
 }
 
 /// The set of hyperparameters that can be specified for fitting a
-/// [decision tree](struct.DecisionTree.html).
+/// [decision tree](DecisionTree).
 ///
 /// ### Example
 ///
diff --git a/algorithms/linfa-trees/src/lib.rs b/algorithms/linfa-trees/src/lib.rs
index 61a76470a..3440e9004 100644
--- a/algorithms/linfa-trees/src/lib.rs
+++ b/algorithms/linfa-trees/src/lib.rs
@@ -14,7 +14,7 @@
 //!
 //! # Current state
 //!
-//! `linfa-trees` currently provides an [implementation](struct.DecisionTree.html) of single-tree fitting for classification.
+//! `linfa-trees` currently provides an [implementation](DecisionTree) of single-tree fitting for classification.
 //!
 
 mod decision_trees;
diff --git a/src/metrics_regression.rs b/src/metrics_regression.rs
index 44f9a1b1d..551b244ab 100644
--- a/src/metrics_regression.rs
+++ b/src/metrics_regression.rs
@@ -14,7 +14,7 @@ use std::ops::Sub;
 /// Regression metrices trait for single targets.
 ///
 /// It is possible to compute the listed mectrics between two 1D arrays.
-/// To compare bi-dimensional arrays use [`MultiTargetRegression`](trait.MultiTargetRegression.html).
+/// To compare bi-dimensional arrays use [`MultiTargetRegression`](MultiTargetRegression).
 pub trait SingleTargetRegression<F: Float, T: AsSingleTargets<Elem = F>>:
     AsSingleTargets<Elem = F>
 {
@@ -124,7 +124,7 @@ impl<F: Float, T: AsSingleTargets<Elem = F>, T2: AsSingleTargets<Elem = F>, D: D
 /// Regression metrices trait for multiple targets.
 ///
 /// It is possible to compute the listed mectrics between two 2D arrays.
-/// To compare single-dimensional arrays use [`SingleTargetRegression`](trait.SingleTargetRegression.html).
+/// To compare single-dimensional arrays use [`SingleTargetRegression`](SingleTargetRegression).
 pub trait MultiTargetRegression<F: Float, T: AsMultiTargets<Elem = F>>:
     AsMultiTargets<Elem = F>
 {