From b7bdb3e6d622f2206ea20767ce38b0f011b95a2b Mon Sep 17 00:00:00 2001 From: Art Wild Date: Fri, 27 May 2022 03:24:27 -0400 Subject: [PATCH 1/9] added isotonic regression algo --- algorithms/linfa-linear/src/isotonic.rs | 337 ++++++++++++++++++++++++ algorithms/linfa-linear/src/lib.rs | 3 + 2 files changed, 340 insertions(+) create mode 100644 algorithms/linfa-linear/src/isotonic.rs diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs new file mode 100644 index 000000000..d6e8c6b53 --- /dev/null +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -0,0 +1,337 @@ +//! Isotonic +#![allow(non_snake_case)] +use crate::error::{LinearError, Result}; +use ndarray::{s, stack, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; + +use linfa::dataset::{AsSingleTargets, DatasetBase}; +use linfa::traits::{Fit, PredictInplace}; + +pub trait Float: linfa::Float {} +impl Float for f32 {} +impl Float for f64 {} + +#[derive(Serialize, Deserialize)] +/// An isotonic regression model. +/// +/// IsotonicRegression solves an isotonic regression problem using the pool +/// adjacent violators algorithm. +/// +/// /// ## Examples +/// +/// Here's an example on how to train an isotonic regression model on one feature +/// from the `diabetes` dataset. +/// ```rust +/// use linfa::{traits::Fit, traits::Predict, Dataset}; +/// use linfa_linear::IsotonicRegression; +/// use linfa::dataset::Records; +/// use linfa::prelude::SingleTargetRegression; +/// +/// let diabetes = linfa_datasets::diabetes(); +/// let feature1d = diabetes.records().column(0) // first feature of the datasets +/// .to_shape((diabetes.nsamples(), 1)) // form Nx1 dataset +/// .unwrap().to_owned(); +/// let dataset = Dataset::new(feature1d, diabetes.targets().to_owned()); +/// let model = IsotonicRegression::default().fit(&dataset).unwrap(); +/// let pred = model.predict(&dataset); +/// let r2 = pred.r2(&dataset).unwrap(); +/// println!("r2 from prediction: {}", r2); +/// ``` +pub struct IsotonicRegression {} + +#[derive(Serialize, Deserialize)] +/// A fitted isotonic regression model which can be used for making predictions. +pub struct FittedIsotonicRegression { + regressor: Array1, + response: Array1, +} + +impl Default for IsotonicRegression { + fn default() -> Self { + IsotonicRegression::new() + } +} + +/// Configure and fit a isotonic regression model +impl IsotonicRegression { + /// Create a default isotonic regression model. + pub fn new() -> IsotonicRegression { + IsotonicRegression {} + } +} + +impl, T: AsSingleTargets> + Fit, T, LinearError> for IsotonicRegression +{ + type Object = FittedIsotonicRegression; + + /// Fit an isotonic regression model given a feature matrix `X` and a target + /// variable `y`. + /// + /// The feature matrix `X` must have shape `(n_samples, 1)` + /// + /// The target variable `y` must have shape `(n_samples)` + /// + /// Returns a `FittedIsotonicRegression` object which contains the fitted + /// parameters and can be used to `predict` values of the target variable + /// for new feature values. + fn fit(&self, dataset: &DatasetBase, T>) -> Result { + let X = dataset.records(); + let (n_samples, dim) = X.dim(); + let y = dataset.as_single_targets(); + + // Check the input dimension + assert_eq!(dim, 1, "The input dimension must be 1."); + + // Check that our inputs have compatible shapes + assert_eq!(y.dim(), n_samples); + + // use correlation for determining relationship between x & y + let x = X.slice(s![.., 0]); + let rho = DatasetBase::from(stack![Axis(1), x, y]).pearson_correlation(); + let incresing = rho.get_coeffs()[0] >= F::zero(); + + // sort data + let indices = argsort_by(&x, |a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)); + + // PVA algorithm + let mut J: Vec<(F, F, Vec)> = indices + .iter() + .map(|&i| (y[i], F::cast(dataset.weight_for(i)), vec![i])) + .collect(); + if !incresing { + J.reverse(); + }; + + //println!("{:#?} {:#?}", indices, J); + let mut i: usize = 0; + while i < (J.len() - 1) { + let B_zero = &J[i]; + let B_plus = &J[i + 1]; + if B_zero.0 <= B_plus.0 { + i += 1; + } else { + let w0 = B_zero.1 + B_plus.1; + let v0 = (B_zero.0 * B_zero.1 + B_plus.0 * B_plus.1) / w0; + let mut i0 = B_zero.2.clone(); + i0.extend(&(B_plus.2)); + J[i] = (v0, w0, i0); + J.remove(i + 1); + let idx = i; + while i > 0 { + let B_minus = &J[i - 1]; + if v0 <= B_minus.0 { + let ww = w0 + B_minus.1; + let vv = (v0 * w0 + B_minus.0 * B_minus.1) / ww; + let mut ii = J[idx].2.clone(); + ii.extend(&(B_minus.2)); + J[i] = (vv, ww, ii); + J.remove(i - 1); + i -= 1; + } else { + break; + } + } + } + } + if !incresing { + J.reverse(); + }; + //println!("{:#?}", J); + + let params: (Vec, Vec) = J + .iter() + .map(|e| { + ( + e.0, + x.select(Axis(0), &e.2) + .into_iter() + .max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)) + .unwrap(), + ) + }) + .unzip(); + let regressor = Array1::from_vec(params.1); + let response = Array1::from_vec(params.0); + + /* + let mut params = Array2::from_elem((J.len(), 2), F::zero()); + for (i, mut row) in params.axis_iter_mut(Axis(0)).enumerate() { + // Perform calculations and assign to `row`; this is a trivial example: + let (v, _, idx) = &J[i]; + row[0] = *v; + row[1] = x + .select(Axis(0), idx) + .into_iter() + .max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)) + .unwrap(); + } + println!("{:#?}", params); + let regressor = params.column(0).to_owned(); + let response = params.column(1).to_owned(); + */ + + Ok(FittedIsotonicRegression { + regressor, + response, + }) + } +} + +fn argsort_by(arr: &ArrayBase, mut compare: F) -> Vec +where + S: Data, + F: FnMut(&S::Elem, &S::Elem) -> Ordering, +{ + let mut indices: Vec = (0..arr.len()).collect(); + indices.sort_unstable_by(move |&i, &j| compare(&arr[i], &arr[j])); + indices +} + +impl> PredictInplace, Array1> + for FittedIsotonicRegression +{ + /// Given an input matrix `X`, with shape `(n_samples, 1)`, + /// `predict` returns the target variable according to linear model + /// learned from the training data distribution. + fn predict_inplace(&self, x: &ArrayBase, y: &mut Array1) { + let (n_samples, dim) = x.dim(); + + // Check the input dimension + assert_eq!(dim, 1, "The input dimension must be 1."); + + // Check that our inputs have compatible shapes + assert_eq!( + n_samples, + y.len(), + "The number of data points must match the number of output targets." + ); + + let regressor = &self.regressor; + let n = regressor.len(); + let x_min = regressor[0]; + let x_max = regressor[n - 1]; + + let response = &self.response; + let y_min = response[0]; + let y_max = response[n - 1]; + + let regressor = &self.regressor; + + for (i, row) in x.rows().into_iter().enumerate() { + let val = row[0]; + if val >= x_max { + y[i] = y_max; + } else if val <= x_min { + y[i] = y_min; + } else { + let res = regressor.into_iter().position(|x| x >= &val); + if res.is_some() { + let j = res.unwrap(); + if val <= regressor[j] && j < n { + let x_scale = (val - regressor[j - 1]) / (regressor[j] - regressor[j - 1]); + y[i] = response[j - 1] + x_scale * (response[j] - response[j - 1]); + } else { + y[i] = y_min; + } + } + } + } + } + + fn default_target(&self, x: &ArrayBase) -> Array1 { + Array1::zeros(x.nrows()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + use linfa::{traits::Predict, Dataset}; + use ndarray::array; + + #[test] + fn dimension_mismatch() { + let reg = IsotonicRegression::new(); + let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5.]); + let res = std::panic::catch_unwind(|| reg.fit(&dataset)); + assert!(res.is_err()); + } + + #[test] + fn length_mismatch() { + let reg = IsotonicRegression::new(); + let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5., 6.]); + let res = std::panic::catch_unwind(|| reg.fit(&dataset)); + assert!(res.is_err()); + } + + #[test] + fn best_example1() { + let reg = IsotonicRegression::new(); + let dataset = Dataset::new( + array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], + array![4., 5., 1., 6., 8., 7.0], + ); + let model = reg.fit(&dataset).unwrap(); + assert_abs_diff_eq!(model.regressor, &array![3.3, 6., 7.5], epsilon = 1e-12); + assert_abs_diff_eq!( + model.response, + &array![10.0 / 3.0, 6., 7.5], + epsilon = 1e-12 + ); + + let result = model.predict(dataset.records()); + assert_abs_diff_eq!( + result, + &array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], + epsilon = 1e-12 + ); + + let xs = array![[2.0f64], [5.], [7.0], [9.0]]; + let result = model.predict(&xs); + assert_abs_diff_eq!( + result, + &array![10. / 3., 5.01234567901234, 7., 7.5], + epsilon = 1e-12 + ); + } + + #[test] + fn decr_best_example1() { + let reg = IsotonicRegression::new(); + let dataset = Dataset::new( + array![[7.5f64], [7.5], [6.], [3.3], [3.3], [3.3]], + array![4., 5., 1., 6., 8., 7.0], + ); + let model = reg.fit(&dataset).unwrap(); + assert_abs_diff_eq!(model.regressor, &array![3.3, 7.5], epsilon = 1e-12); + assert_abs_diff_eq!(model.response, &array![7.0, 10.0 / 3.0], epsilon = 1e-12); + + let result = model.predict(dataset.records()); + assert_abs_diff_eq!( + result, + &array![10. / 3., 10. / 3., 4.64285714285714, 7., 7., 7.], + epsilon = 1e-12 + ); + } + /* + #[test] + fn wine() { + let diabetes = linfa_datasets::diabetes(); + let feature1d = diabetes + .records() + .column(0) + .to_shape((diabetes.nsamples(), 1)) + .unwrap() + .to_owned(); + let dataset = Dataset::new(feature1d, diabetes.targets().to_owned()); + let model = IsotonicRegression::default().fit(&dataset).unwrap(); + let pred = model.predict(&dataset); + let r2 = pred.r2(&dataset).unwrap(); + println!("r2 from prediction: {}", r2); + } + */ +} diff --git a/algorithms/linfa-linear/src/lib.rs b/algorithms/linfa-linear/src/lib.rs index 0df8cb778..93bde4d4f 100644 --- a/algorithms/linfa-linear/src/lib.rs +++ b/algorithms/linfa-linear/src/lib.rs @@ -11,6 +11,7 @@ //! `linfa-linear` currently provides an implementation of the following regression algorithms: //! - Ordinary Least Squares //! - Generalized Linear Models (GLM) +//! - Isotonic //! //! ## Examples //! @@ -24,8 +25,10 @@ mod error; mod float; mod glm; +mod isotonic; mod ols; pub use error::*; pub use glm::*; +pub use isotonic::*; pub use ols::*; From 304f0c6f119bfece795c51d4ade69e9a16f93643 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Fri, 27 May 2022 19:56:49 -0400 Subject: [PATCH 2/9] fix docstest & clean up --- algorithms/linfa-linear/src/isotonic.rs | 51 +++---------------------- 1 file changed, 5 insertions(+), 46 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index d6e8c6b53..aac7f1b74 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -20,19 +20,15 @@ impl Float for f64 {} /// /// /// ## Examples /// -/// Here's an example on how to train an isotonic regression model on one feature -/// from the `diabetes` dataset. +/// Here's an example on how to train an isotonic regression model on +/// the first feature from the `diabetes` dataset. /// ```rust /// use linfa::{traits::Fit, traits::Predict, Dataset}; /// use linfa_linear::IsotonicRegression; -/// use linfa::dataset::Records; /// use linfa::prelude::SingleTargetRegression; /// /// let diabetes = linfa_datasets::diabetes(); -/// let feature1d = diabetes.records().column(0) // first feature of the datasets -/// .to_shape((diabetes.nsamples(), 1)) // form Nx1 dataset -/// .unwrap().to_owned(); -/// let dataset = Dataset::new(feature1d, diabetes.targets().to_owned()); +/// let dataset = diabetes.feature_iter().next().unwrap(); // get first 1D feature /// let model = IsotonicRegression::default().fit(&dataset).unwrap(); /// let pred = model.predict(&dataset); /// let r2 = pred.r2(&dataset).unwrap(); @@ -104,7 +100,6 @@ impl, T: AsSingleTargets> J.reverse(); }; - //println!("{:#?} {:#?}", indices, J); let mut i: usize = 0; while i < (J.len() - 1) { let B_zero = &J[i]; @@ -138,8 +133,8 @@ impl, T: AsSingleTargets> if !incresing { J.reverse(); }; - //println!("{:#?}", J); + // Form model parameters let params: (Vec, Vec) = J .iter() .map(|e| { @@ -154,24 +149,6 @@ impl, T: AsSingleTargets> .unzip(); let regressor = Array1::from_vec(params.1); let response = Array1::from_vec(params.0); - - /* - let mut params = Array2::from_elem((J.len(), 2), F::zero()); - for (i, mut row) in params.axis_iter_mut(Axis(0)).enumerate() { - // Perform calculations and assign to `row`; this is a trivial example: - let (v, _, idx) = &J[i]; - row[0] = *v; - row[1] = x - .select(Axis(0), idx) - .into_iter() - .max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)) - .unwrap(); - } - println!("{:#?}", params); - let regressor = params.column(0).to_owned(); - let response = params.column(1).to_owned(); - */ - Ok(FittedIsotonicRegression { regressor, response, @@ -217,8 +194,7 @@ impl> PredictInplace, Array1> let y_min = response[0]; let y_max = response[n - 1]; - let regressor = &self.regressor; - + // calculate a piecewise linear approximation of response for (i, row) in x.rows().into_iter().enumerate() { let val = row[0]; if val >= x_max { @@ -317,21 +293,4 @@ mod tests { epsilon = 1e-12 ); } - /* - #[test] - fn wine() { - let diabetes = linfa_datasets::diabetes(); - let feature1d = diabetes - .records() - .column(0) - .to_shape((diabetes.nsamples(), 1)) - .unwrap() - .to_owned(); - let dataset = Dataset::new(feature1d, diabetes.targets().to_owned()); - let model = IsotonicRegression::default().fit(&dataset).unwrap(); - let pred = model.predict(&dataset); - let r2 = pred.r2(&dataset).unwrap(); - println!("r2 from prediction: {}", r2); - } - */ } From 2fe35301777312cb1e364192155c2b051306ad59 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Fri, 27 May 2022 20:13:45 -0400 Subject: [PATCH 3/9] rebased and added missing traits --- algorithms/linfa-linear/src/isotonic.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index aac7f1b74..63d8062e7 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -12,7 +12,7 @@ pub trait Float: linfa::Float {} impl Float for f32 {} impl Float for f64 {} -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] /// An isotonic regression model. /// /// IsotonicRegression solves an isotonic regression problem using the pool @@ -36,7 +36,7 @@ impl Float for f64 {} /// ``` pub struct IsotonicRegression {} -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] /// A fitted isotonic regression model which can be used for making predictions. pub struct FittedIsotonicRegression { regressor: Array1, @@ -228,6 +228,14 @@ mod tests { use linfa::{traits::Predict, Dataset}; use ndarray::array; + #[test] + fn autotraits() { + fn has_autotraits() {} + has_autotraits::>(); + has_autotraits::(); + has_autotraits::>(); + } + #[test] fn dimension_mismatch() { let reg = IsotonicRegression::new(); From e71f06ae9b177b3e246988bda750626c5088b3c3 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Mon, 30 May 2022 14:39:55 -0400 Subject: [PATCH 4/9] fixed typos, removed unnecessary declarations --- algorithms/linfa-linear/src/isotonic.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index 63d8062e7..d78174d05 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -1,7 +1,7 @@ //! Isotonic #![allow(non_snake_case)] use crate::error::{LinearError, Result}; -use ndarray::{s, stack, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; +use ndarray::{stack, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; @@ -12,7 +12,7 @@ pub trait Float: linfa::Float {} impl Float for f32 {} impl Float for f64 {} -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] /// An isotonic regression model. /// /// IsotonicRegression solves an isotonic regression problem using the pool @@ -43,12 +43,6 @@ pub struct FittedIsotonicRegression { response: Array1, } -impl Default for IsotonicRegression { - fn default() -> Self { - IsotonicRegression::new() - } -} - /// Configure and fit a isotonic regression model impl IsotonicRegression { /// Create a default isotonic regression model. @@ -84,9 +78,9 @@ impl, T: AsSingleTargets> assert_eq!(y.dim(), n_samples); // use correlation for determining relationship between x & y - let x = X.slice(s![.., 0]); + let x = X.column(0); let rho = DatasetBase::from(stack![Axis(1), x, y]).pearson_correlation(); - let incresing = rho.get_coeffs()[0] >= F::zero(); + let increasing = rho.get_coeffs()[0] >= F::zero(); // sort data let indices = argsort_by(&x, |a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)); @@ -96,7 +90,7 @@ impl, T: AsSingleTargets> .iter() .map(|&i| (y[i], F::cast(dataset.weight_for(i)), vec![i])) .collect(); - if !incresing { + if !increasing { J.reverse(); }; @@ -109,7 +103,7 @@ impl, T: AsSingleTargets> } else { let w0 = B_zero.1 + B_plus.1; let v0 = (B_zero.0 * B_zero.1 + B_plus.0 * B_plus.1) / w0; - let mut i0 = B_zero.2.clone(); + let mut i0 = B_zero.2.to_vec(); i0.extend(&(B_plus.2)); J[i] = (v0, w0, i0); J.remove(i + 1); @@ -119,7 +113,7 @@ impl, T: AsSingleTargets> if v0 <= B_minus.0 { let ww = w0 + B_minus.1; let vv = (v0 * w0 + B_minus.0 * B_minus.1) / ww; - let mut ii = J[idx].2.clone(); + let mut ii = J[idx].2.to_vec(); ii.extend(&(B_minus.2)); J[i] = (vv, ww, ii); J.remove(i - 1); @@ -130,7 +124,7 @@ impl, T: AsSingleTargets> } } } - if !incresing { + if !increasing { J.reverse(); }; From fe364531aa0d132e0edd8dd7bc074e5534fc006f Mon Sep 17 00:00:00 2001 From: Art Wild Date: Sun, 5 Jun 2022 03:16:20 -0400 Subject: [PATCH 5/9] rewrote of PVA & added AlgorithmA implementations --- algorithms/linfa-linear/src/isotonic.rs | 405 +++++++++++++++++++----- 1 file changed, 325 insertions(+), 80 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index d78174d05..ab2a10a05 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -1,7 +1,7 @@ //! Isotonic #![allow(non_snake_case)] use crate::error::{LinearError, Result}; -use ndarray::{stack, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; +use ndarray::{s, stack, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; @@ -12,7 +12,200 @@ pub trait Float: linfa::Float {} impl Float for f32 {} impl Float for f64 {} -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] +// Isotonic regression (IR) algorithms +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub struct PVA; +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub struct AlgorithmA; + +// IR trait +pub trait IR { + fn evaluate( + &self, + y: &ArrayBase, + weights: Option<&[f32]>, + index: &Vec, + ) -> (Vec, Vec) + where + F: Float, + D: Data; +} + +/// An implementation of algorithm A from Best (1990) +/// for solving IRC problem +impl IR for AlgorithmA { + fn evaluate( + &self, + ys: &ArrayBase, + weights: Option<&[f32]>, + index: &Vec, + ) -> (Vec, Vec) + where + F: Float, + D: Data, + { + let n = ys.len(); + + // Precompute partial averages + let mut AvU = vec![F::zero(); n + 1]; + let mut wsum = F::zero(); + for i in 1..=n { + let ii = n - i; + let ui = index[ii]; + let w = if weights.is_none() { + F::one() + } else { + F::cast(weights.unwrap()[ui]) + }; + AvU[ii] = (ys[ui] * w + AvU[ii + 1] * wsum) / (wsum + w); + wsum += w; + } + //println!("y:{:?}\nAvU:{:?}", y, AvU); + + let mut V = Vec::::new(); + let mut W = Vec::::new(); + let mut J_index = vec![0_usize; n]; + J_index[0] = n - 1; + let mut i = 0; // B0 + let mut j = 1; // Uj + while i + j < n { + let AvB0 = AvU[i]; + //println!("AvB0: {AvB0}, i:{i}, j:{j}, AvUj:{}", AvU[i + j]); + //println!("i:{i} j:{j}, V:{:?} W:{:?}, J:{:?}", V, W, J_index); + if AvU[i + j] <= AvB0 { + // Step 1 + j += 1; + } else { + // Step 2 + let B_minus_index = i + j - 1; + J_index[i] = B_minus_index; // B- = L_j + J_index[B_minus_index] = i; + J_index[B_minus_index + 1] = n - 1; // B0 = U_j + J_index[n - 1] = B_minus_index + 1; + + // Step 2.1 + let (mut AvB_minus, mut B_minus_w) = waverage(&ys, weights, i, J_index[i], &index); + + let mut P_B_minus_index = i; + let AvP_B_minus = *V.last().unwrap_or(&F::neg_infinity()); + while V.len() > 0 && AvB_minus < AvP_B_minus { + if AvB_minus <= AvP_B_minus { + P_B_minus_index -= 1; + let AvP_B_minus = V.pop().unwrap(); + let P_B_minus_w = W.pop().unwrap(); + //println!( + // "AvB-:{AvB_minus}*{B_minus_w}, AvP(B-):{AvP_B_minus}*{P_B_minus_w}, P(B-)[{P_B_minus_index}]" + // ); + + // New Av(B-) + AvB_minus = (AvB_minus * B_minus_w + AvP_B_minus * P_B_minus_w) + / (B_minus_w + P_B_minus_w); + B_minus_w += P_B_minus_w; + + // New B- + let P_B_minus_start = J_index[P_B_minus_index]; + J_index[P_B_minus_start] = B_minus_index; + J_index[B_minus_index] = P_B_minus_index; + //println!( + // "P(B-)[{P_B_minus_index}], V:{:?} W:{:?}, J:{:?}", + // V, W, J_index + //); + } + } + + V.push(AvB_minus); + W.push(B_minus_w); + i = B_minus_index + 1; + j = 1; + //println!("i:{i} j:{j}, V:{:?} W:{:?}, J:{:?}", V, W, J_index); + } + } + //println!("AA] i:{i}, j:{j} {:?}", J_index); + + // Last block average + let (AvB_minus, _) = waverage(&ys, weights, i, J_index[i], &index); + V.push(AvB_minus); + + (V, J_index) + } +} + +/// An implementation of PVA algorithm from Best (1990) +/// for solving IRC problem +impl IR for PVA { + fn evaluate( + &self, + ys: &ArrayBase, + weights: Option<&[f32]>, + index: &Vec, + ) -> (Vec, Vec) + where + F: Float, + D: Data, + { + let n = ys.len(); + let mut V = Vec::::new(); + let mut W = Vec::::new(); + let mut J_index: Vec = (0..n).collect(); + let mut i = 0; + let (mut AvB_zero, mut W_B_zero) = waverage(&ys, weights, i, i, &index); + //println!("Y:{:?}", ys); + //println!("I:{:?}", index); + //println!("y:{:?}", ys.select(Axis(0), &index)); + while i < n { + // Step 1 + let j = J_index[i]; + let k = j + 1; + if k == n { + break; + } + let l = J_index[k]; + let (AvB_plus, W_B_plus) = waverage(&ys, weights, k, l, &index); + //println!("i:{i} j:{j}, k:{k}, l:{l}, Av(B₀):{AvB_zero}[{W_B_zero}], Av(B₊):{AvB_plus}[{W_B_plus}]"); + if AvB_zero <= AvB_plus { + V.push(AvB_zero); + W.push(W_B_zero); + AvB_zero = AvB_plus; + W_B_zero = W_B_plus; + i = k; + //println!( + // "i:{i} Av(B₀):{AvB_zero}[{W_B_zero}], Av(B₊):{AvB_plus}[{W_B_plus}], {:?}", + // J_index + //); + } else { + // Step 2 + J_index[i] = l; + J_index[l] = i; + AvB_zero = AvB_zero * W_B_zero + AvB_plus * W_B_plus; + W_B_zero += W_B_plus; + AvB_zero /= W_B_zero; + //println!("i:{i} j:{j}, k:{k}, l:{l}, Av(B₀):{AvB_zero}[{W_B_zero}], {:?}", J_index); + // Step 2.1 + let mut AvB_minus = *V.last().unwrap_or(&F::neg_infinity()); + while V.len() > 0 && AvB_zero <= AvB_minus { + AvB_minus = V.pop().unwrap(); + let W_B_minus = W.pop().unwrap(); + i = J_index[J_index[l] - 1]; + J_index[l] = i; + J_index[i] = l; + AvB_zero = AvB_zero * W_B_zero + AvB_minus * W_B_minus; + W_B_zero += W_B_minus; + AvB_zero /= W_B_zero; + //println!("i:{i} j:{j}, Av(B₀):{AvB_zero}[{W_B_zero}], Av(B₋):{AvB_minus}[{W_B_minus}], {:?}", J_index); + } + } + //println!("i:{i} V:{:?} W:{:?}, J:{:?}", V, W, J_index); + } + + // Last block average + let (AvB_minus, _) = waverage(&ys, weights, i, J_index[i], &index); + V.push(AvB_minus); + + (V, J_index) + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] /// An isotonic regression model. /// /// IsotonicRegression solves an isotonic regression problem using the pool @@ -34,7 +227,9 @@ impl Float for f64 {} /// let r2 = pred.r2(&dataset).unwrap(); /// println!("r2 from prediction: {}", r2); /// ``` -pub struct IsotonicRegression {} +pub struct IsotonicRegression { + algo: A, +} #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] /// A fitted isotonic regression model which can be used for making predictions. @@ -44,15 +239,21 @@ pub struct FittedIsotonicRegression { } /// Configure and fit a isotonic regression model -impl IsotonicRegression { +impl IsotonicRegression { /// Create a default isotonic regression model. - pub fn new() -> IsotonicRegression { - IsotonicRegression {} + pub fn new() -> IsotonicRegression { + IsotonicRegression { algo: A::default() } } } -impl, T: AsSingleTargets> - Fit, T, LinearError> for IsotonicRegression +impl Default for IsotonicRegression { + fn default() -> Self { + ::new() + } +} + +impl, T: AsSingleTargets, A: IR> + Fit, T, LinearError> for IsotonicRegression { type Object = FittedIsotonicRegression; @@ -68,14 +269,14 @@ impl, T: AsSingleTargets> /// for new feature values. fn fit(&self, dataset: &DatasetBase, T>) -> Result { let X = dataset.records(); - let (n_samples, dim) = X.dim(); + let (n, dim) = X.dim(); let y = dataset.as_single_targets(); // Check the input dimension assert_eq!(dim, 1, "The input dimension must be 1."); // Check that our inputs have compatible shapes - assert_eq!(y.dim(), n_samples); + assert_eq!(y.dim(), n); // use correlation for determining relationship between x & y let x = X.column(0); @@ -83,66 +284,30 @@ impl, T: AsSingleTargets> let increasing = rho.get_coeffs()[0] >= F::zero(); // sort data - let indices = argsort_by(&x, |a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)); - - // PVA algorithm - let mut J: Vec<(F, F, Vec)> = indices - .iter() - .map(|&i| (y[i], F::cast(dataset.weight_for(i)), vec![i])) - .collect(); + let mut indices = argsort_by(&x, |a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)); if !increasing { - J.reverse(); + indices.reverse(); }; - let mut i: usize = 0; - while i < (J.len() - 1) { - let B_zero = &J[i]; - let B_plus = &J[i + 1]; - if B_zero.0 <= B_plus.0 { - i += 1; - } else { - let w0 = B_zero.1 + B_plus.1; - let v0 = (B_zero.0 * B_zero.1 + B_plus.0 * B_plus.1) / w0; - let mut i0 = B_zero.2.to_vec(); - i0.extend(&(B_plus.2)); - J[i] = (v0, w0, i0); - J.remove(i + 1); - let idx = i; - while i > 0 { - let B_minus = &J[i - 1]; - if v0 <= B_minus.0 { - let ww = w0 + B_minus.1; - let vv = (v0 * w0 + B_minus.0 * B_minus.1) / ww; - let mut ii = J[idx].2.to_vec(); - ii.extend(&(B_minus.2)); - J[i] = (vv, ww, ii); - J.remove(i - 1); - i -= 1; - } else { - break; - } - } - } + // Construct response value using particular algorithm + let (V, J_index) = self.algo.evaluate(&y, dataset.weights(), &indices); + let response = Array1::from_vec(V.clone()); + + // Construct regressor array + let mut W = Vec::::new(); + let mut i = 0; + while i < n { + let j = J_index[i]; + let x = X + .slice(s![i..=j, -1]) + .into_iter() + .max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)) + .unwrap(); + W.push(*x); + i = j + 1 } - if !increasing { - J.reverse(); - }; + let regressor = Array1::from_vec(W.clone()); - // Form model parameters - let params: (Vec, Vec) = J - .iter() - .map(|e| { - ( - e.0, - x.select(Axis(0), &e.2) - .into_iter() - .max_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Greater)) - .unwrap(), - ) - }) - .unzip(); - let regressor = Array1::from_vec(params.1); - let response = Array1::from_vec(params.0); Ok(FittedIsotonicRegression { regressor, response, @@ -150,6 +315,33 @@ impl, T: AsSingleTargets> } } +fn waverage( + vs: &ArrayBase, + ws: Option<&[f32]>, + start: usize, + end: usize, + index: &Vec, +) -> (F, F) +where + F: Float, + D: Data, +{ + let mut wsum = F::zero(); + let mut avg = F::zero(); + for k in start..=end { + let kk = index[k]; + let w = if ws.is_none() { + F::one() + } else { + F::cast(ws.unwrap()[kk]) + }; + wsum += w; + avg += vs[kk] * w; + } + avg /= wsum; + (avg, wsum) +} + fn argsort_by(arr: &ArrayBase, mut compare: F) -> Vec where S: Data, @@ -226,33 +418,37 @@ mod tests { fn autotraits() { fn has_autotraits() {} has_autotraits::>(); - has_autotraits::(); + has_autotraits::>(); has_autotraits::>(); } #[test] + #[should_panic] fn dimension_mismatch() { - let reg = IsotonicRegression::new(); + let reg = ::new(); let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5.]); - let res = std::panic::catch_unwind(|| reg.fit(&dataset)); - assert!(res.is_err()); + let _res = reg.fit(&dataset); + () } #[test] + #[should_panic] fn length_mismatch() { - let reg = IsotonicRegression::new(); + let reg = IsotonicRegression::default(); let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5., 6.]); - let res = std::panic::catch_unwind(|| reg.fit(&dataset)); - assert!(res.is_err()); + let _res = reg.fit(&dataset); + () } #[test] - fn best_example1() { - let reg = IsotonicRegression::new(); + fn best_example1_incr_pva() { let dataset = Dataset::new( array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], array![4., 5., 1., 6., 8., 7.0], ); + + let reg = IsotonicRegression::::new(); + let model = reg.fit(&dataset).unwrap(); assert_abs_diff_eq!(model.regressor, &array![3.3, 6., 7.5], epsilon = 1e-12); assert_abs_diff_eq!( @@ -278,20 +474,69 @@ mod tests { } #[test] - fn decr_best_example1() { - let reg = IsotonicRegression::new(); + fn best_example1_incr_algoa() { + let reg = IsotonicRegression::::new(); let dataset = Dataset::new( - array![[7.5f64], [7.5], [6.], [3.3], [3.3], [3.3]], + array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], array![4., 5., 1., 6., 8., 7.0], ); let model = reg.fit(&dataset).unwrap(); - assert_abs_diff_eq!(model.regressor, &array![3.3, 7.5], epsilon = 1e-12); - assert_abs_diff_eq!(model.response, &array![7.0, 10.0 / 3.0], epsilon = 1e-12); + assert_abs_diff_eq!(model.regressor, &array![3.3, 6., 7.5], epsilon = 1e-12); + assert_abs_diff_eq!( + model.response, + &array![10.0 / 3.0, 6., 7.5], + epsilon = 1e-12 + ); + + let result = model.predict(dataset.records()); + assert_abs_diff_eq!( + result, + &array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], + epsilon = 1e-12 + ); + + let xs = array![[2.0f64], [5.], [7.0], [9.0]]; + let result = model.predict(&xs); + assert_abs_diff_eq!( + result, + &array![10. / 3., 5.01234567901234, 7., 7.5], + epsilon = 1e-12 + ); + } + + #[test] + fn example2_incr() { + let reg = IsotonicRegression::default(); + let dataset = Dataset::new( + array![[1.0f64], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.]], + array![1., 2., 6., 2., 1., 2., 8., 2., 1.0], + ); + //let dataset = Dataset::new( + // array![[1.0f64], [2.], [3.], [2.], [1.], [2.], [3.], [2.], [1.]], + // array![1., 3., 2., 4., 5., 1., 6., 8., 7.0], + //); + let model = reg.fit(&dataset).unwrap(); + assert_abs_diff_eq!(model.regressor, &array![1., 2., 6., 9.], epsilon = 1e-12); + assert_abs_diff_eq!( + model.response, + &array![1., 2., 2.75, 11. / 3.], + epsilon = 1e-12 + ); let result = model.predict(dataset.records()); assert_abs_diff_eq!( result, - &array![10. / 3., 10. / 3., 4.64285714285714, 7., 7., 7.], + &array![ + 1., + 2., + 2.1875, + 2.375, + 2.5625, + 2.75, + 55. / 18., + 121. / 36., + 11. / 3. + ], epsilon = 1e-12 ); } From e7529a02f4913563a04a173b5f0b4a1607ace257 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Sun, 5 Jun 2022 20:56:41 -0400 Subject: [PATCH 6/9] fixed tests --- algorithms/linfa-linear/src/isotonic.rs | 236 ++++++++++++++++-------- 1 file changed, 160 insertions(+), 76 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index ab2a10a05..ef2dbbfc4 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -440,104 +440,188 @@ mod tests { () } - #[test] - fn best_example1_incr_pva() { - let dataset = Dataset::new( - array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], - array![4., 5., 1., 6., 8., 7.0], + fn best_example1(reg: &IsotonicRegression) { + let (X, y, regr, resp, yy, V, w) = ( + array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], // X + array![4., 5., 1., 6., 8., 7.0], // y + array![3.3, 6., 7.5], // regressor + array![10.0 / 3.0, 6., 7.5], // response + array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], // predict X + array![[2.0f64], [5.], [7.], [9.]], // newX + array![10. / 3., 5.01234567901234, 7., 7.5], // predict newX ); - let reg = IsotonicRegression::::new(); + let dataset = Dataset::new(X, y); let model = reg.fit(&dataset).unwrap(); - assert_abs_diff_eq!(model.regressor, &array![3.3, 6., 7.5], epsilon = 1e-12); - assert_abs_diff_eq!( - model.response, - &array![10.0 / 3.0, 6., 7.5], - epsilon = 1e-12 - ); + assert_abs_diff_eq!(model.regressor, ®r, epsilon = 1e-12); + assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12); let result = model.predict(dataset.records()); - assert_abs_diff_eq!( - result, - &array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], - epsilon = 1e-12 - ); + assert_abs_diff_eq!(result, &yy, epsilon = 1e-12); - let xs = array![[2.0f64], [5.], [7.0], [9.0]]; - let result = model.predict(&xs); - assert_abs_diff_eq!( - result, - &array![10. / 3., 5.01234567901234, 7., 7.5], - epsilon = 1e-12 - ); + let result = model.predict(&V); + assert_abs_diff_eq!(result, &w, epsilon = 1e-12); } - #[test] - fn best_example1_incr_algoa() { - let reg = IsotonicRegression::::new(); - let dataset = Dataset::new( - array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], - array![4., 5., 1., 6., 8., 7.0], + fn best_example1_decr(reg: &IsotonicRegression) { + let (X, y, regr, resp, yy, V, w) = ( + array![[7.5], [7.5], [6.], [3.3], [3.3], [3.3]], // X + array![4., 5., 1., 6., 8., 7.0], // y + array![3.3, 6., 7.5], // regressor + array![10.0 / 3.0, 6., 7.5], // response + array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], // predict X + array![[2.0f64], [5.], [7.], [9.]], // newX + array![10. / 3., 5.01234567901234, 7., 7.5], // predict newX ); + + let dataset = Dataset::new(X, y); + let model = reg.fit(&dataset).unwrap(); - assert_abs_diff_eq!(model.regressor, &array![3.3, 6., 7.5], epsilon = 1e-12); - assert_abs_diff_eq!( - model.response, - &array![10.0 / 3.0, 6., 7.5], - epsilon = 1e-12 - ); + assert_abs_diff_eq!(model.regressor, ®r, epsilon = 1e-12); + assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12); let result = model.predict(dataset.records()); - assert_abs_diff_eq!( - result, - &array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], - epsilon = 1e-12 - ); + assert_abs_diff_eq!(result, &yy, epsilon = 1e-12); - let xs = array![[2.0f64], [5.], [7.0], [9.0]]; - let result = model.predict(&xs); - assert_abs_diff_eq!( - result, - &array![10. / 3., 5.01234567901234, 7., 7.5], - epsilon = 1e-12 - ); + let result = model.predict(&V); + assert_abs_diff_eq!(result, &w, epsilon = 1e-12); } - #[test] - fn example2_incr() { - let reg = IsotonicRegression::default(); - let dataset = Dataset::new( + fn example2_incr(reg: &IsotonicRegression) { + let is_pva = std::any::type_name::().ends_with("PVA"); + let (X, y, regr, resp, yy) = ( array![[1.0f64], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.]], array![1., 2., 6., 2., 1., 2., 8., 2., 1.0], + if is_pva { + array![1., 2., 6., 9.] + } else { + array![6., 9.] + }, + if is_pva { + array![1., 2., 2.75, 11. / 3.] + } else { + array![7. / 3., 11. / 3.] + }, + if is_pva { + array![ + 1., + 2., + 2.1875, + 2.375, + 2.5625, + 2.75, + 55. / 18., + 121. / 36., + 11. / 3. + ] + } else { + let v1 = 7. / 3.; + array![v1, v1, v1, v1, v1, v1, 25. / 9., 29. / 9., 11. / 3.] + }, ); - //let dataset = Dataset::new( - // array![[1.0f64], [2.], [3.], [2.], [1.], [2.], [3.], [2.], [1.]], - // array![1., 3., 2., 4., 5., 1., 6., 8., 7.0], - //); + + let dataset = Dataset::new(X, y); + let model = reg.fit(&dataset).unwrap(); - assert_abs_diff_eq!(model.regressor, &array![1., 2., 6., 9.], epsilon = 1e-12); - assert_abs_diff_eq!( - model.response, - &array![1., 2., 2.75, 11. / 3.], - epsilon = 1e-12 - ); + assert_abs_diff_eq!(model.regressor, ®r, epsilon = 1e-12); + assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12); let result = model.predict(dataset.records()); - assert_abs_diff_eq!( - result, - &array![ - 1., - 2., - 2.1875, - 2.375, - 2.5625, - 2.75, - 55. / 18., - 121. / 36., - 11. / 3. - ], - epsilon = 1e-12 + assert_abs_diff_eq!(result, &yy, epsilon = 1e-12); + } + + fn example2_decr(reg: &IsotonicRegression) { + let is_pva = std::any::type_name::().ends_with("PVA"); + let (X, y, regr, resp, yy) = ( + array![[1.0f64], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.]], + array![1., 2., 6., 2., 1., 2., 8., 2., 1.0], + if is_pva { + array![1., 2., 6., 9.] + } else { + array![6., 9.] + }, + if is_pva { + array![1., 2., 2.75, 11. / 3.] + } else { + array![7. / 3., 11. / 3.] + }, + if is_pva { + array![ + 1., + 2., + 2.1875, + 2.375, + 2.5625, + 2.75, + 55. / 18., + 121. / 36., + 11. / 3. + ] + } else { + let v1 = 7. / 3.; + array![v1, v1, v1, v1, v1, v1, 25. / 9., 29. / 9., 11. / 3.] + }, ); + + let dataset = Dataset::new(X, y); + + let model = reg.fit(&dataset).unwrap(); + assert_abs_diff_eq!(model.regressor, ®r, epsilon = 1e-12); + assert_abs_diff_eq!(model.response, &resp, epsilon = 1e-12); + + let result = model.predict(dataset.records()); + assert_abs_diff_eq!(result, &yy, epsilon = 1e-12); + } + + #[test] + fn best_example1_incr_pva() { + let reg = IsotonicRegression::::new(); + best_example1(®); + } + + #[test] + fn best_example1_incr_algoa() { + let reg = IsotonicRegression::::new(); + best_example1(®); + } + + #[test] + fn best_example1_decr_pva() { + let reg = IsotonicRegression::::new(); + best_example1_decr(®); + } + + #[test] + #[ignore] + fn best_example1_decr_algoa() { + let reg = IsotonicRegression::::new(); + best_example1_decr(®); + } + + #[test] + fn example2_incr_pva() { + let reg = IsotonicRegression::default(); + example2_incr(®); + } + + #[test] + fn example2_incr_algoa() { + let reg = IsotonicRegression::::new(); + example2_incr(®); + } + + #[test] + #[ignore] + fn example2_decr_pva() { + let reg = IsotonicRegression::default(); + example2_decr(®); + } + + #[test] + #[ignore] + fn example2_decr_algoa() { + let reg = IsotonicRegression::::new(); + example2_decr(®); } } From 06434cd4d90a22e1a50ca5091db468a50a6d6b60 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Sun, 5 Jun 2022 23:48:12 -0400 Subject: [PATCH 7/9] code clean up --- algorithms/linfa-linear/src/isotonic.rs | 75 ++++++++----------------- 1 file changed, 24 insertions(+), 51 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index ef2dbbfc4..e51076cce 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -60,7 +60,6 @@ impl IR for AlgorithmA { AvU[ii] = (ys[ui] * w + AvU[ii + 1] * wsum) / (wsum + w); wsum += w; } - //println!("y:{:?}\nAvU:{:?}", y, AvU); let mut V = Vec::::new(); let mut W = Vec::::new(); @@ -70,8 +69,6 @@ impl IR for AlgorithmA { let mut j = 1; // Uj while i + j < n { let AvB0 = AvU[i]; - //println!("AvB0: {AvB0}, i:{i}, j:{j}, AvUj:{}", AvU[i + j]); - //println!("i:{i} j:{j}, V:{:?} W:{:?}, J:{:?}", V, W, J_index); if AvU[i + j] <= AvB0 { // Step 1 j += 1; @@ -85,7 +82,6 @@ impl IR for AlgorithmA { // Step 2.1 let (mut AvB_minus, mut B_minus_w) = waverage(&ys, weights, i, J_index[i], &index); - let mut P_B_minus_index = i; let AvP_B_minus = *V.last().unwrap_or(&F::neg_infinity()); while V.len() > 0 && AvB_minus < AvP_B_minus { @@ -93,9 +89,6 @@ impl IR for AlgorithmA { P_B_minus_index -= 1; let AvP_B_minus = V.pop().unwrap(); let P_B_minus_w = W.pop().unwrap(); - //println!( - // "AvB-:{AvB_minus}*{B_minus_w}, AvP(B-):{AvP_B_minus}*{P_B_minus_w}, P(B-)[{P_B_minus_index}]" - // ); // New Av(B-) AvB_minus = (AvB_minus * B_minus_w + AvP_B_minus * P_B_minus_w) @@ -106,10 +99,6 @@ impl IR for AlgorithmA { let P_B_minus_start = J_index[P_B_minus_index]; J_index[P_B_minus_start] = B_minus_index; J_index[B_minus_index] = P_B_minus_index; - //println!( - // "P(B-)[{P_B_minus_index}], V:{:?} W:{:?}, J:{:?}", - // V, W, J_index - //); } } @@ -117,10 +106,8 @@ impl IR for AlgorithmA { W.push(B_minus_w); i = B_minus_index + 1; j = 1; - //println!("i:{i} j:{j}, V:{:?} W:{:?}, J:{:?}", V, W, J_index); } } - //println!("AA] i:{i}, j:{j} {:?}", J_index); // Last block average let (AvB_minus, _) = waverage(&ys, weights, i, J_index[i], &index); @@ -149,9 +136,6 @@ impl IR for PVA { let mut J_index: Vec = (0..n).collect(); let mut i = 0; let (mut AvB_zero, mut W_B_zero) = waverage(&ys, weights, i, i, &index); - //println!("Y:{:?}", ys); - //println!("I:{:?}", index); - //println!("y:{:?}", ys.select(Axis(0), &index)); while i < n { // Step 1 let j = J_index[i]; @@ -161,17 +145,12 @@ impl IR for PVA { } let l = J_index[k]; let (AvB_plus, W_B_plus) = waverage(&ys, weights, k, l, &index); - //println!("i:{i} j:{j}, k:{k}, l:{l}, Av(B₀):{AvB_zero}[{W_B_zero}], Av(B₊):{AvB_plus}[{W_B_plus}]"); if AvB_zero <= AvB_plus { V.push(AvB_zero); W.push(W_B_zero); AvB_zero = AvB_plus; W_B_zero = W_B_plus; i = k; - //println!( - // "i:{i} Av(B₀):{AvB_zero}[{W_B_zero}], Av(B₊):{AvB_plus}[{W_B_plus}], {:?}", - // J_index - //); } else { // Step 2 J_index[i] = l; @@ -179,10 +158,10 @@ impl IR for PVA { AvB_zero = AvB_zero * W_B_zero + AvB_plus * W_B_plus; W_B_zero += W_B_plus; AvB_zero /= W_B_zero; - //println!("i:{i} j:{j}, k:{k}, l:{l}, Av(B₀):{AvB_zero}[{W_B_zero}], {:?}", J_index); + // Step 2.1 let mut AvB_minus = *V.last().unwrap_or(&F::neg_infinity()); - while V.len() > 0 && AvB_zero <= AvB_minus { + while V.len() > 0 && AvB_zero < AvB_minus { AvB_minus = V.pop().unwrap(); let W_B_minus = W.pop().unwrap(); i = J_index[J_index[l] - 1]; @@ -191,10 +170,8 @@ impl IR for PVA { AvB_zero = AvB_zero * W_B_zero + AvB_minus * W_B_minus; W_B_zero += W_B_minus; AvB_zero /= W_B_zero; - //println!("i:{i} j:{j}, Av(B₀):{AvB_zero}[{W_B_zero}], Av(B₋):{AvB_minus}[{W_B_minus}], {:?}", J_index); } } - //println!("i:{i} V:{:?} W:{:?}, J:{:?}", V, W, J_index); } // Last block average @@ -465,14 +442,23 @@ mod tests { } fn best_example1_decr(reg: &IsotonicRegression) { + let is_pva = std::any::type_name::().ends_with("PVA"); let (X, y, regr, resp, yy, V, w) = ( array![[7.5], [7.5], [6.], [3.3], [3.3], [3.3]], // X array![4., 5., 1., 6., 8., 7.0], // y - array![3.3, 6., 7.5], // regressor - array![10.0 / 3.0, 6., 7.5], // response - array![10. / 3., 10. / 3., 10. / 3., 6., 7.5, 7.5], // predict X - array![[2.0f64], [5.], [7.], [9.]], // newX - array![10. / 3., 5.01234567901234, 7., 7.5], // predict newX + if is_pva { + array![7.5, 3.3, 3.3] + } else { + array![7.5, 3.3] + }, + if is_pva { + array![10.0 / 3.0, 7., 7.] + } else { + array![10.0 / 3.0, 7.] + }, + array![7., 7., 7., 7., 7., 7.], // predict X + array![[2.], [3.], [3.3], [7.]], // newX + array![10. / 3., 10. / 3., 7., 7.], // predict newX ); let dataset = Dataset::new(X, y); @@ -533,34 +519,24 @@ mod tests { fn example2_decr(reg: &IsotonicRegression) { let is_pva = std::any::type_name::().ends_with("PVA"); + let (v1, v2) = (11. / 3., 7. / 3.); let (X, y, regr, resp, yy) = ( - array![[1.0f64], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.]], + array![[9.0], [8.], [7.], [6.], [5.], [4.], [3.], [2.], [1.]], array![1., 2., 6., 2., 1., 2., 8., 2., 1.0], if is_pva { - array![1., 2., 6., 9.] + array![9., 8., 7., 3.] } else { - array![6., 9.] + array![9., 3.] }, if is_pva { - array![1., 2., 2.75, 11. / 3.] + array![1., 2., 2.75, v1] } else { - array![7. / 3., 11. / 3.] + array![v2, v1] }, if is_pva { - array![ - 1., - 2., - 2.1875, - 2.375, - 2.5625, - 2.75, - 55. / 18., - 121. / 36., - 11. / 3. - ] + array![v1, v1, v1, v1, v1, v1, v1, 1., 1.] } else { - let v1 = 7. / 3.; - array![v1, v1, v1, v1, v1, v1, 25. / 9., 29. / 9., 11. / 3.] + array![v1, v1, v1, v1, v1, v1, v1, v2, v2] }, ); @@ -593,7 +569,6 @@ mod tests { } #[test] - #[ignore] fn best_example1_decr_algoa() { let reg = IsotonicRegression::::new(); best_example1_decr(®); @@ -612,14 +587,12 @@ mod tests { } #[test] - #[ignore] fn example2_decr_pva() { let reg = IsotonicRegression::default(); example2_decr(®); } #[test] - #[ignore] fn example2_decr_algoa() { let reg = IsotonicRegression::::new(); example2_decr(®); From 06adbebe42398d1ca3550178ac337a330c413300 Mon Sep 17 00:00:00 2001 From: Art Wild Date: Sun, 3 Jul 2022 16:47:37 -0400 Subject: [PATCH 8/9] removed AlgorithmA. --- algorithms/linfa-linear/src/isotonic.rs | 379 ++++++------------------ 1 file changed, 93 insertions(+), 286 deletions(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index e51076cce..d5cd9d056 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -12,177 +12,69 @@ pub trait Float: linfa::Float {} impl Float for f32 {} impl Float for f64 {} -// Isotonic regression (IR) algorithms -#[derive(Serialize, Deserialize, Debug, Clone, Default)] -pub struct PVA; -#[derive(Serialize, Deserialize, Debug, Clone, Default)] -pub struct AlgorithmA; - -// IR trait -pub trait IR { - fn evaluate( - &self, - y: &ArrayBase, - weights: Option<&[f32]>, - index: &Vec, - ) -> (Vec, Vec) - where - F: Float, - D: Data; -} - -/// An implementation of algorithm A from Best (1990) -/// for solving IRC problem -impl IR for AlgorithmA { - fn evaluate( - &self, - ys: &ArrayBase, - weights: Option<&[f32]>, - index: &Vec, - ) -> (Vec, Vec) - where - F: Float, - D: Data, - { - let n = ys.len(); - - // Precompute partial averages - let mut AvU = vec![F::zero(); n + 1]; - let mut wsum = F::zero(); - for i in 1..=n { - let ii = n - i; - let ui = index[ii]; - let w = if weights.is_none() { - F::one() - } else { - F::cast(weights.unwrap()[ui]) - }; - AvU[ii] = (ys[ui] * w + AvU[ii + 1] * wsum) / (wsum + w); - wsum += w; - } - - let mut V = Vec::::new(); - let mut W = Vec::::new(); - let mut J_index = vec![0_usize; n]; - J_index[0] = n - 1; - let mut i = 0; // B0 - let mut j = 1; // Uj - while i + j < n { - let AvB0 = AvU[i]; - if AvU[i + j] <= AvB0 { - // Step 1 - j += 1; - } else { - // Step 2 - let B_minus_index = i + j - 1; - J_index[i] = B_minus_index; // B- = L_j - J_index[B_minus_index] = i; - J_index[B_minus_index + 1] = n - 1; // B0 = U_j - J_index[n - 1] = B_minus_index + 1; - - // Step 2.1 - let (mut AvB_minus, mut B_minus_w) = waverage(&ys, weights, i, J_index[i], &index); - let mut P_B_minus_index = i; - let AvP_B_minus = *V.last().unwrap_or(&F::neg_infinity()); - while V.len() > 0 && AvB_minus < AvP_B_minus { - if AvB_minus <= AvP_B_minus { - P_B_minus_index -= 1; - let AvP_B_minus = V.pop().unwrap(); - let P_B_minus_w = W.pop().unwrap(); - - // New Av(B-) - AvB_minus = (AvB_minus * B_minus_w + AvP_B_minus * P_B_minus_w) - / (B_minus_w + P_B_minus_w); - B_minus_w += P_B_minus_w; - - // New B- - let P_B_minus_start = J_index[P_B_minus_index]; - J_index[P_B_minus_start] = B_minus_index; - J_index[B_minus_index] = P_B_minus_index; - } - } - - V.push(AvB_minus); - W.push(B_minus_w); - i = B_minus_index + 1; - j = 1; - } - } - - // Last block average - let (AvB_minus, _) = waverage(&ys, weights, i, J_index[i], &index); - V.push(AvB_minus); - - (V, J_index) - } -} - /// An implementation of PVA algorithm from Best (1990) /// for solving IRC problem -impl IR for PVA { - fn evaluate( - &self, - ys: &ArrayBase, - weights: Option<&[f32]>, - index: &Vec, - ) -> (Vec, Vec) - where - F: Float, - D: Data, - { - let n = ys.len(); - let mut V = Vec::::new(); - let mut W = Vec::::new(); - let mut J_index: Vec = (0..n).collect(); - let mut i = 0; - let (mut AvB_zero, mut W_B_zero) = waverage(&ys, weights, i, i, &index); - while i < n { - // Step 1 - let j = J_index[i]; - let k = j + 1; - if k == n { - break; - } - let l = J_index[k]; - let (AvB_plus, W_B_plus) = waverage(&ys, weights, k, l, &index); - if AvB_zero <= AvB_plus { - V.push(AvB_zero); - W.push(W_B_zero); - AvB_zero = AvB_plus; - W_B_zero = W_B_plus; - i = k; - } else { - // Step 2 - J_index[i] = l; +fn pva( + ys: &ArrayBase, + weights: Option<&[f32]>, + index: &Vec, +) -> (Vec, Vec) +where + F: Float, + D: Data, +{ + let n = ys.len(); + let mut V = Vec::::new(); + let mut W = Vec::::new(); + let mut J_index: Vec = (0..n).collect(); + let mut i = 0; + let (mut AvB_zero, mut W_B_zero) = waverage(&ys, weights, i, i, &index); + while i < n { + // Step 1 + let j = J_index[i]; + let k = j + 1; + if k == n { + break; + } + let l = J_index[k]; + let (AvB_plus, W_B_plus) = waverage(&ys, weights, k, l, &index); + if AvB_zero <= AvB_plus { + V.push(AvB_zero); + W.push(W_B_zero); + AvB_zero = AvB_plus; + W_B_zero = W_B_plus; + i = k; + } else { + // Step 2 + J_index[i] = l; + J_index[l] = i; + AvB_zero = AvB_zero * W_B_zero + AvB_plus * W_B_plus; + W_B_zero += W_B_plus; + AvB_zero /= W_B_zero; + + // Step 2.1 + let mut AvB_minus = *V.last().unwrap_or(&F::neg_infinity()); + while V.len() > 0 && AvB_zero < AvB_minus { + AvB_minus = V.pop().unwrap(); + let W_B_minus = W.pop().unwrap(); + i = J_index[J_index[l] - 1]; J_index[l] = i; - AvB_zero = AvB_zero * W_B_zero + AvB_plus * W_B_plus; - W_B_zero += W_B_plus; + J_index[i] = l; + AvB_zero = AvB_zero * W_B_zero + AvB_minus * W_B_minus; + W_B_zero += W_B_minus; AvB_zero /= W_B_zero; - - // Step 2.1 - let mut AvB_minus = *V.last().unwrap_or(&F::neg_infinity()); - while V.len() > 0 && AvB_zero < AvB_minus { - AvB_minus = V.pop().unwrap(); - let W_B_minus = W.pop().unwrap(); - i = J_index[J_index[l] - 1]; - J_index[l] = i; - J_index[i] = l; - AvB_zero = AvB_zero * W_B_zero + AvB_minus * W_B_minus; - W_B_zero += W_B_minus; - AvB_zero /= W_B_zero; - } } } + } - // Last block average - let (AvB_minus, _) = waverage(&ys, weights, i, J_index[i], &index); - V.push(AvB_minus); + // Last block average + let (AvB_minus, _) = waverage(&ys, weights, i, J_index[i], &index); + V.push(AvB_minus); - (V, J_index) - } + (V, J_index) } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] /// An isotonic regression model. /// /// IsotonicRegression solves an isotonic regression problem using the pool @@ -204,9 +96,7 @@ impl IR for PVA { /// let r2 = pred.r2(&dataset).unwrap(); /// println!("r2 from prediction: {}", r2); /// ``` -pub struct IsotonicRegression { - algo: A, -} +pub struct IsotonicRegression {} #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] /// A fitted isotonic regression model which can be used for making predictions. @@ -215,22 +105,15 @@ pub struct FittedIsotonicRegression { response: Array1, } -/// Configure and fit a isotonic regression model -impl IsotonicRegression { +impl IsotonicRegression { /// Create a default isotonic regression model. - pub fn new() -> IsotonicRegression { - IsotonicRegression { algo: A::default() } - } -} - -impl Default for IsotonicRegression { - fn default() -> Self { - ::new() + pub fn new() -> IsotonicRegression { + IsotonicRegression {} } } -impl, T: AsSingleTargets, A: IR> - Fit, T, LinearError> for IsotonicRegression +impl, T: AsSingleTargets> + Fit, T, LinearError> for IsotonicRegression { type Object = FittedIsotonicRegression; @@ -267,7 +150,7 @@ impl, T: AsSingleTargets, A: IR> }; // Construct response value using particular algorithm - let (V, J_index) = self.algo.evaluate(&y, dataset.weights(), &indices); + let (V, J_index) = pva(&y, dataset.weights(), &indices); let response = Array1::from_vec(V.clone()); // Construct regressor array @@ -395,14 +278,14 @@ mod tests { fn autotraits() { fn has_autotraits() {} has_autotraits::>(); - has_autotraits::>(); + has_autotraits::(); has_autotraits::>(); } #[test] #[should_panic] fn dimension_mismatch() { - let reg = ::new(); + let reg = IsotonicRegression::new(); let dataset = Dataset::new(array![[3.3f64, 0.], [3.3, 0.]], array![4., 5.]); let _res = reg.fit(&dataset); () @@ -417,7 +300,9 @@ mod tests { () } - fn best_example1(reg: &IsotonicRegression) { + #[test] + fn best_example1() { + let reg = IsotonicRegression::new(); let (X, y, regr, resp, yy, V, w) = ( array![[3.3f64], [3.3], [3.3], [6.], [7.5], [7.5]], // X array![4., 5., 1., 6., 8., 7.0], // y @@ -441,21 +326,14 @@ mod tests { assert_abs_diff_eq!(result, &w, epsilon = 1e-12); } - fn best_example1_decr(reg: &IsotonicRegression) { - let is_pva = std::any::type_name::().ends_with("PVA"); + #[test] + fn best_example1_decr() { + let reg = IsotonicRegression::default(); let (X, y, regr, resp, yy, V, w) = ( array![[7.5], [7.5], [6.], [3.3], [3.3], [3.3]], // X array![4., 5., 1., 6., 8., 7.0], // y - if is_pva { - array![7.5, 3.3, 3.3] - } else { - array![7.5, 3.3] - }, - if is_pva { - array![10.0 / 3.0, 7., 7.] - } else { - array![10.0 / 3.0, 7.] - }, + array![7.5, 3.3, 3.3], + array![10.0 / 3.0, 7., 7.], array![7., 7., 7., 7., 7., 7.], // predict X array![[2.], [3.], [3.3], [7.]], // newX array![10. / 3., 10. / 3., 7., 7.], // predict newX @@ -474,37 +352,25 @@ mod tests { assert_abs_diff_eq!(result, &w, epsilon = 1e-12); } - fn example2_incr(reg: &IsotonicRegression) { - let is_pva = std::any::type_name::().ends_with("PVA"); + #[test] + fn example2_incr() { + let reg = IsotonicRegression::default(); let (X, y, regr, resp, yy) = ( array![[1.0f64], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.]], array![1., 2., 6., 2., 1., 2., 8., 2., 1.0], - if is_pva { - array![1., 2., 6., 9.] - } else { - array![6., 9.] - }, - if is_pva { - array![1., 2., 2.75, 11. / 3.] - } else { - array![7. / 3., 11. / 3.] - }, - if is_pva { - array![ - 1., - 2., - 2.1875, - 2.375, - 2.5625, - 2.75, - 55. / 18., - 121. / 36., - 11. / 3. - ] - } else { - let v1 = 7. / 3.; - array![v1, v1, v1, v1, v1, v1, 25. / 9., 29. / 9., 11. / 3.] - }, + array![1., 2., 6., 9.], + array![1., 2., 2.75, 11. / 3.], + array![ + 1., + 2., + 2.1875, + 2.375, + 2.5625, + 2.75, + 55. / 18., + 121. / 36., + 11. / 3. + ], ); let dataset = Dataset::new(X, y); @@ -517,27 +383,16 @@ mod tests { assert_abs_diff_eq!(result, &yy, epsilon = 1e-12); } - fn example2_decr(reg: &IsotonicRegression) { - let is_pva = std::any::type_name::().ends_with("PVA"); - let (v1, v2) = (11. / 3., 7. / 3.); + #[test] + fn example2_decr() { + let reg = IsotonicRegression::default(); + let v1 = 11. / 3.; let (X, y, regr, resp, yy) = ( array![[9.0], [8.], [7.], [6.], [5.], [4.], [3.], [2.], [1.]], array![1., 2., 6., 2., 1., 2., 8., 2., 1.0], - if is_pva { - array![9., 8., 7., 3.] - } else { - array![9., 3.] - }, - if is_pva { - array![1., 2., 2.75, v1] - } else { - array![v2, v1] - }, - if is_pva { - array![v1, v1, v1, v1, v1, v1, v1, 1., 1.] - } else { - array![v1, v1, v1, v1, v1, v1, v1, v2, v2] - }, + array![9., 8., 7., 3.], + array![1., 2., 2.75, v1], + array![v1, v1, v1, v1, v1, v1, v1, 1., 1.], ); let dataset = Dataset::new(X, y); @@ -549,52 +404,4 @@ mod tests { let result = model.predict(dataset.records()); assert_abs_diff_eq!(result, &yy, epsilon = 1e-12); } - - #[test] - fn best_example1_incr_pva() { - let reg = IsotonicRegression::::new(); - best_example1(®); - } - - #[test] - fn best_example1_incr_algoa() { - let reg = IsotonicRegression::::new(); - best_example1(®); - } - - #[test] - fn best_example1_decr_pva() { - let reg = IsotonicRegression::::new(); - best_example1_decr(®); - } - - #[test] - fn best_example1_decr_algoa() { - let reg = IsotonicRegression::::new(); - best_example1_decr(®); - } - - #[test] - fn example2_incr_pva() { - let reg = IsotonicRegression::default(); - example2_incr(®); - } - - #[test] - fn example2_incr_algoa() { - let reg = IsotonicRegression::::new(); - example2_incr(®); - } - - #[test] - fn example2_decr_pva() { - let reg = IsotonicRegression::default(); - example2_decr(®); - } - - #[test] - fn example2_decr_algoa() { - let reg = IsotonicRegression::::new(); - example2_decr(®); - } } From 51a5752d6c3e6b0de04a3603adc586d4c7d4828f Mon Sep 17 00:00:00 2001 From: Art Wild Date: Mon, 18 Jul 2022 22:25:45 -0400 Subject: [PATCH 9/9] added the reference --- algorithms/linfa-linear/src/isotonic.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/algorithms/linfa-linear/src/isotonic.rs b/algorithms/linfa-linear/src/isotonic.rs index d5cd9d056..4d087f625 100644 --- a/algorithms/linfa-linear/src/isotonic.rs +++ b/algorithms/linfa-linear/src/isotonic.rs @@ -80,7 +80,7 @@ where /// IsotonicRegression solves an isotonic regression problem using the pool /// adjacent violators algorithm. /// -/// /// ## Examples +/// ## Examples /// /// Here's an example on how to train an isotonic regression model on /// the first feature from the `diabetes` dataset. @@ -96,6 +96,11 @@ where /// let r2 = pred.r2(&dataset).unwrap(); /// println!("r2 from prediction: {}", r2); /// ``` +/// +/// ## References +/// +/// Best, M.J., Chakravarti, N. Active set algorithms for isotonic regression; +/// A unifying framework. Mathematical Programming 47, 425–439 (1990). pub struct IsotonicRegression {} #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]