Skip to content

Commit

Permalink
Add machine degradation dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoDiFrancesco committed Jul 9, 2024
1 parent ec2109a commit b77ba69
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 6 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ path = "examples/anomaly_detection/credit_card.rs"
name = "keystroke"
path = "examples/classification/keystroke.rs"

[[example]]
name = "machine_degradations"
path = "examples/regression/machine_degradations.rs"

[[example]]
name = "synthetic"
path = "examples/classification/synthetic.rs"
Expand Down
83 changes: 83 additions & 0 deletions examples/regression/machine_degradations.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use light_river::mondrian_forest::mondrian_forest::MondrianForestRegressor;

use light_river::common::{RegTarget, Regressor};
use light_river::datasets::machine_degradation::MachineDegradation;
use light_river::stream::iter_csv::IterCsv;
use ndarray::Array1;
use num::ToPrimitive;

use std::fs::File;
use std::time::Instant;

/// Get list of features of the dataset.
///
/// e.g. features: ["H.e", "UD.t.i", "H.i", ...]
fn get_features(transactions: IterCsv<f32, File>) -> Vec<String> {
let sample = transactions.into_iter().next();
let observation = sample.unwrap().unwrap().get_observation();
let mut out: Vec<String> = observation.iter().map(|(k, _)| k.clone()).collect();
out.sort();
out
}

fn get_dataset_size(transactions: IterCsv<f32, File>) -> usize {
let mut length = 0;
for _ in transactions {
length += 1;
}
length
}

fn main() {
let n_trees: usize = 10;

let transactions_f = MachineDegradation::load_data();
let features = get_features(transactions_f);

println!("Features: {:?}", features);

let mut mf: MondrianForestRegressor<f32> =
MondrianForestRegressor::new(n_trees, features.len());
let mut err_total = 0.0;

let transactions_l = MachineDegradation::load_data();
let dataset_size = get_dataset_size(transactions_l);

let now = Instant::now();

let transactions = MachineDegradation::load_data();
for (idx, transaction) in transactions.enumerate() {
let data = transaction.unwrap();

let x = data.get_observation();
let x = Array1::<f32>::from_vec(features.iter().map(|k| x[k]).collect());

let y = data.get_y().unwrap();
let y = data.to_regression_target("pCut::Motor_Torque").unwrap();

// println!("=M=1 idx={idx}, x={x}, y={y}");

// Skip first sample since tree has still no node
if idx != 0 {
let pred = mf.predict_one(&x, &y);
let err = (pred - y).powi(2);
err_total += err;
// println!("idx={idx}, x={x}, y={y}, pred: {pred}, err: {err}");
}

mf.learn_one(&x, &y);
}

let elapsed_time = now.elapsed();
println!("Took {}ms", elapsed_time.as_millis());

println!(
"MSE: {} / {} = {}",
err_total,
dataset_size - 1,
err_total / (dataset_size - 1).to_f32().unwrap()
);

let forest_size = mf.get_forest_size();
println!("Forest tree sizes: {:?}", forest_size);
}
25 changes: 25 additions & 0 deletions src/datasets/machine_degradation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use crate::datasets::utils;
use crate::stream::data_stream::Target;
use crate::stream::iter_csv::IterCsv;
use std::{fs::File, path::Path};

/// One Year Industrial Component Degradation
///
/// References
/// ----------
/// [^1]: [One Year Industrial Component Degradation](https://www.kaggle.com/datasets/inIT-OWL/one-year-industrial-component-degradation)
pub struct MachineDegradation;
impl MachineDegradation {
pub fn load_data() -> IterCsv<f32, File> {
// let url = "https://www.kaggle.com/datasets/inIT-OWL/one-year-industrial-component-degradation/download/fA53OHmuZ0enYASBqytj%2Fversions%2FvXObUJmxGJQSUSC2Wyc7%2Ffiles%2F01-04T184148_000_mode1.csv?datasetVersionNumber=1";
let file_name = "one-year-industrial-component-degradation.csv";

if !Path::new(file_name).exists() {
panic!("Dataset not downloaded. Download it in file '{file_name}'");
}

let file = File::open(file_name).unwrap();
let y_cols = Some(Target::Name("pCut::Motor_Torque".to_string()));
IterCsv::<f32, File>::new(file, y_cols).unwrap()
}
}
1 change: 1 addition & 0 deletions src/datasets/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod credit_card;
pub mod keystroke;
pub mod machine_degradation;
pub mod synthetic;
pub mod synthetic_regression;
pub mod utils;
6 changes: 3 additions & 3 deletions src/mondrian_forest/mondrian_tree_clf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ impl<F: FType> MondrianTreeClassifier<F> {
let exp_dist = Exp::new(lambda.to_f32().unwrap()).unwrap();
let exp_sample = F::from_f32(exp_dist.sample(&mut self.rng)).unwrap();
// DEBUG: shadowing with Exp expected value
let exp_sample = F::one() / lambda;
// let exp_sample = F::one() / lambda;
exp_sample
};
let split_time = self.compute_split_time(time, exp_sample, node_idx, y, extensions.sum());
Expand All @@ -516,7 +516,7 @@ impl<F: FType> MondrianTreeClassifier<F> {
.collect::<Array1<F>>();
let e_sample = F::from_f32(self.rng.gen::<f32>()).unwrap() * extensions.sum();
// DEBUG: shadowing with expected value
let e_sample = F::from_f32(0.5).unwrap() * extensions.sum();
// let e_sample = F::from_f32(0.5).unwrap() * extensions.sum();
// println!("go_downwards() - cumsum: {cumsum}, e_sample: {e_sample}");
cumsum.iter().position(|&val| val > e_sample).unwrap()
};
Expand All @@ -536,7 +536,7 @@ impl<F: FType> MondrianTreeClassifier<F> {
};
let threshold: F = F::from_f32(self.rng.gen_range(lower_bound..upper_bound)).unwrap();
// DEBUG: split in the middle
let threshold = F::from_f32((lower_bound + upper_bound) / 2.0).unwrap();
// let threshold = F::from_f32((lower_bound + upper_bound) / 2.0).unwrap();
// println!(
// "Threshold: {threshold} - Lower: {}, Upper: {}",
// lower_bound, upper_bound
Expand Down
6 changes: 3 additions & 3 deletions src/mondrian_forest/mondrian_tree_reg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ impl<F: FType> MondrianTreeRegressor<F> {
let exp_dist = Exp::new(lambda.to_f32().unwrap()).unwrap();
let exp_sample = F::from_f32(exp_dist.sample(&mut self.rng)).unwrap();
// DEBUG: shadowing with Exp expected value
let exp_sample = F::one() / lambda;
// let exp_sample = F::one() / lambda;
exp_sample
};
let split_time = self.compute_split_time(time, exp_sample, node_idx, extensions.sum());
Expand All @@ -488,7 +488,7 @@ impl<F: FType> MondrianTreeRegressor<F> {
.collect::<Array1<F>>();
let e_sample = F::from_f32(self.rng.gen::<f32>()).unwrap() * extensions.sum();
// DEBUG: shadowing with expected value
let e_sample = F::from_f32(0.5).unwrap() * extensions.sum();
// let e_sample = F::from_f32(0.5).unwrap() * extensions.sum();
// println!("go_downwards() - split_time: {split_time}, cumsum: {cumsum}, e_sample: {e_sample}");
cumsum.iter().position(|&val| val > e_sample).unwrap()
};
Expand All @@ -508,7 +508,7 @@ impl<F: FType> MondrianTreeRegressor<F> {
};
let threshold: F = F::from_f32(self.rng.gen_range(lower_bound..upper_bound)).unwrap();
// DEBUG: split in the middle
let threshold = F::from_f32((lower_bound + upper_bound) / 2.0).unwrap();
// let threshold = F::from_f32((lower_bound + upper_bound) / 2.0).unwrap();
// println!(
// "Threshold: {threshold} - Lower: {}, Upper: {}",
// lower_bound, upper_bound
Expand Down

0 comments on commit b77ba69

Please sign in to comment.