Skip to content

Commit

Permalink
Merge pull request #79 from Samyssmile/reformat
Browse files Browse the repository at this point in the history
chore():Reformat Codebase with Google Formatter #78
  • Loading branch information
Samyssmile authored Nov 1, 2023
2 parents e6dd0c3 + df62e30 commit 7b84045
Show file tree
Hide file tree
Showing 51 changed files with 2,497 additions and 2,303 deletions.
245 changes: 131 additions & 114 deletions example/src/main/java/de/example/benchmark/Benchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,127 +13,144 @@
import de.edux.ml.randomforest.RandomForest;
import de.edux.ml.svm.SVMKernel;
import de.edux.ml.svm.SupportVectorMachine;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.IntStream;

/**
* Compare the performance of different classifiers
*/
/** Compare the performance of different classifiers */
public class Benchmark {
private static final double TRAIN_TEST_SPLIT_RATIO = 0.70;
private static final File CSV_FILE = new File("example" + File.separator + "datasets" + File.separator + "iris" + File.separator + "iris.csv");
private static final boolean SKIP_HEAD = true;

private double[][] trainFeatures;
private double[][] trainLabels;
private double[][] testFeatures;
private double[][] testLabels;
private MultilayerPerceptron multilayerPerceptron;
private NetworkConfiguration networkConfiguration;

public static void main(String[] args) {
new Benchmark().run();
}

private void run() {
initFeaturesAndLabels();

Classifier knn = new KnnClassifier(2);
Classifier decisionTree = new DecisionTree(2, 2, 3, 12);
Classifier randomForest = new RandomForest(500, 10, 2, 3, 3, 60);
Classifier svm = new SupportVectorMachine(SVMKernel.LINEAR, 1);

networkConfiguration = new NetworkConfiguration(trainFeatures[0].length, List.of(128, 256, 512), 3, 0.01, 300, ActivationFunction.LEAKY_RELU, ActivationFunction.SOFTMAX, LossFunction.CATEGORICAL_CROSS_ENTROPY, Initialization.XAVIER, Initialization.XAVIER);
multilayerPerceptron = new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels);
Map<String, Classifier> classifiers = Map.of(
"KNN", knn,
"DecisionTree", decisionTree,
"RandomForest", randomForest,
"SVM", svm,
"MLP", multilayerPerceptron
);

Map<String, List<Double>> results = new ConcurrentHashMap<>();
results.put("KNN", new ArrayList<>());
results.put("DecisionTree", new ArrayList<>());
results.put("RandomForest", new ArrayList<>());
results.put("SVM", new ArrayList<>());
results.put("MLP", new ArrayList<>());


IntStream.range(0, 5).forEach(i -> {
knn.train(trainFeatures, trainLabels);
decisionTree.train(trainFeatures, trainLabels);
randomForest.train(trainFeatures, trainLabels);
svm.train(trainFeatures, trainLabels);
multilayerPerceptron.train(trainFeatures, trainLabels);

double knnAccuracy = knn.evaluate(testFeatures, testLabels);
double decisionTreeAccuracy = decisionTree.evaluate(testFeatures, testLabels);
double randomForestAccuracy = randomForest.evaluate(testFeatures, testLabels);
double svmAccuracy = svm.evaluate(testFeatures, testLabels);
double multilayerPerceptronAccuracy = multilayerPerceptron.evaluate(testFeatures, testLabels);

results.get("KNN").add(knnAccuracy);
results.get("DecisionTree").add(decisionTreeAccuracy);
results.get("RandomForest").add(randomForestAccuracy);
results.get("SVM").add(svmAccuracy);
results.get("MLP").add(multilayerPerceptronAccuracy);
initFeaturesAndLabels();
updateMLP(testFeatures, testLabels);
});

System.out.println("Classifier performances (sorted by average accuracy):");
results.entrySet().stream()
.map(entry -> {
double avgAccuracy = entry.getValue().stream()
.mapToDouble(Double::doubleValue)
.average()
.orElse(0.0);
return Map.entry(entry.getKey(), avgAccuracy);
})
.sorted(Map.Entry.<String, Double>comparingByValue().reversed())
.forEachOrdered(entry -> {
System.out.printf("%s: %.2f%%\n", entry.getKey(), entry.getValue() * 100);
});

System.out.println("\nClassifier best and worst performances:");
results.forEach((classifierName, accuracies) -> {
double maxAccuracy = accuracies.stream()
.mapToDouble(Double::doubleValue)
.max()
.orElse(0.0);
double minAccuracy = accuracies.stream()
.mapToDouble(Double::doubleValue)
.min()
.orElse(0.0);
System.out.printf("%s: Best: %.2f%%, Worst: %.2f%%\n", classifierName, maxAccuracy * 100, minAccuracy * 100);
private static final double TRAIN_TEST_SPLIT_RATIO = 0.70;
private static final File CSV_FILE =
new File(
"example"
+ File.separator
+ "datasets"
+ File.separator
+ "iris"
+ File.separator
+ "iris.csv");
private static final boolean SKIP_HEAD = true;

private double[][] trainFeatures;
private double[][] trainLabels;
private double[][] testFeatures;
private double[][] testLabels;
private MultilayerPerceptron multilayerPerceptron;
private NetworkConfiguration networkConfiguration;

public static void main(String[] args) {
new Benchmark().run();
}

private void run() {
initFeaturesAndLabels();

Classifier knn = new KnnClassifier(2);
Classifier decisionTree = new DecisionTree(2, 2, 3, 12);
Classifier randomForest = new RandomForest(500, 10, 2, 3, 3, 60);
Classifier svm = new SupportVectorMachine(SVMKernel.LINEAR, 1);

networkConfiguration =
new NetworkConfiguration(
trainFeatures[0].length,
List.of(128, 256, 512),
3,
0.01,
300,
ActivationFunction.LEAKY_RELU,
ActivationFunction.SOFTMAX,
LossFunction.CATEGORICAL_CROSS_ENTROPY,
Initialization.XAVIER,
Initialization.XAVIER);
multilayerPerceptron = new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels);
Map<String, Classifier> classifiers =
Map.of(
"KNN", knn,
"DecisionTree", decisionTree,
"RandomForest", randomForest,
"SVM", svm,
"MLP", multilayerPerceptron);

Map<String, List<Double>> results = new ConcurrentHashMap<>();
results.put("KNN", new ArrayList<>());
results.put("DecisionTree", new ArrayList<>());
results.put("RandomForest", new ArrayList<>());
results.put("SVM", new ArrayList<>());
results.put("MLP", new ArrayList<>());

IntStream.range(0, 5)
.forEach(
i -> {
knn.train(trainFeatures, trainLabels);
decisionTree.train(trainFeatures, trainLabels);
randomForest.train(trainFeatures, trainLabels);
svm.train(trainFeatures, trainLabels);
multilayerPerceptron.train(trainFeatures, trainLabels);

double knnAccuracy = knn.evaluate(testFeatures, testLabels);
double decisionTreeAccuracy = decisionTree.evaluate(testFeatures, testLabels);
double randomForestAccuracy = randomForest.evaluate(testFeatures, testLabels);
double svmAccuracy = svm.evaluate(testFeatures, testLabels);
double multilayerPerceptronAccuracy =
multilayerPerceptron.evaluate(testFeatures, testLabels);

results.get("KNN").add(knnAccuracy);
results.get("DecisionTree").add(decisionTreeAccuracy);
results.get("RandomForest").add(randomForestAccuracy);
results.get("SVM").add(svmAccuracy);
results.get("MLP").add(multilayerPerceptronAccuracy);
initFeaturesAndLabels();
updateMLP(testFeatures, testLabels);
});

System.out.println("Classifier performances (sorted by average accuracy):");
results.entrySet().stream()
.map(
entry -> {
double avgAccuracy =
entry.getValue().stream().mapToDouble(Double::doubleValue).average().orElse(0.0);
return Map.entry(entry.getKey(), avgAccuracy);
})
.sorted(Map.Entry.<String, Double>comparingByValue().reversed())
.forEachOrdered(
entry -> {
System.out.printf("%s: %.2f%%\n", entry.getKey(), entry.getValue() * 100);
});

System.out.println("\nClassifier best and worst performances:");
results.forEach(
(classifierName, accuracies) -> {
double maxAccuracy =
accuracies.stream().mapToDouble(Double::doubleValue).max().orElse(0.0);
double minAccuracy =
accuracies.stream().mapToDouble(Double::doubleValue).min().orElse(0.0);
System.out.printf(
"%s: Best: %.2f%%, Worst: %.2f%%\n",
classifierName, maxAccuracy * 100, minAccuracy * 100);
});
}

private void updateMLP(double[][] testFeatures, double[][] testLabels) {
multilayerPerceptron = new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels);
}

private void initFeaturesAndLabels() {
var featureColumnIndices = new int[]{0, 1, 2, 3};
var targetColumnIndex = 4;

var dataProcessor = new DataProcessor(new CSVIDataReader())
.loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex)
.normalize()
.shuffle()
.split(TRAIN_TEST_SPLIT_RATIO);

trainFeatures = dataProcessor.getTrainFeatures(featureColumnIndices);
trainLabels = dataProcessor.getTrainLabels(targetColumnIndex);
testFeatures = dataProcessor.getTestFeatures(featureColumnIndices);
testLabels = dataProcessor.getTestLabels(targetColumnIndex);

}
}

private void updateMLP(double[][] testFeatures, double[][] testLabels) {
multilayerPerceptron = new MultilayerPerceptron(networkConfiguration, testFeatures, testLabels);
}

private void initFeaturesAndLabels() {
var featureColumnIndices = new int[] {0, 1, 2, 3};
var targetColumnIndex = 4;

var dataProcessor =
new DataProcessor(new CSVIDataReader())
.loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex)
.normalize()
.shuffle()
.split(TRAIN_TEST_SPLIT_RATIO);

trainFeatures = dataProcessor.getTrainFeatures(featureColumnIndices);
trainLabels = dataProcessor.getTrainLabels(targetColumnIndex);
testFeatures = dataProcessor.getTestFeatures(featureColumnIndices);
testLabels = dataProcessor.getTestLabels(targetColumnIndex);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,46 @@
import de.edux.data.provider.DataProcessor;
import de.edux.data.reader.CSVIDataReader;
import de.edux.ml.decisiontree.DecisionTree;
import de.edux.ml.randomforest.RandomForest;

import java.io.File;

public class DecisionTreeExampleOnIrisDataset {
private static final double TRAIN_TEST_SPLIT_RATIO = 0.70;
private static final File CSV_FILE = new File("example" + File.separator + "datasets" + File.separator + "iris" + File.separator + "iris.csv");
private static final boolean SKIP_HEAD = true;
public static void main(String[] args) {
private static final double TRAIN_TEST_SPLIT_RATIO = 0.70;
private static final File CSV_FILE =
new File(
"example"
+ File.separator
+ "datasets"
+ File.separator
+ "iris"
+ File.separator
+ "iris.csv");
private static final boolean SKIP_HEAD = true;

public static void main(String[] args) {
/* IRIS Dataset...
+-------------+------------+-------------+------------+---------+
| sepal.length| sepal.width| petal.length| petal.width| variety |
+-------------+------------+-------------+------------+---------+
| 5.1 | 3.5 | 1.4 | .2 | Setosa |
+-------------+------------+-------------+------------+---------+
*/
var featureColumnIndices = new int[]{0, 1, 2, 3}; // First 4 columns are features
var targetColumnIndex = 4; // Last column is the target
var featureColumnIndices = new int[] {0, 1, 2, 3}; // First 4 columns are features
var targetColumnIndex = 4; // Last column is the target

var irisDataProcessor = new DataProcessor(new CSVIDataReader()).loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex).normalize().shuffle().split(TRAIN_TEST_SPLIT_RATIO);
Classifier classifier = new DecisionTree(2, 2, 3, 12);
var irisDataProcessor =
new DataProcessor(new CSVIDataReader())
.loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex)
.normalize()
.shuffle()
.split(TRAIN_TEST_SPLIT_RATIO);
Classifier classifier = new DecisionTree(2, 2, 3, 12);

var trainFeatures = irisDataProcessor.getTrainFeatures(featureColumnIndices);
var trainTestFeatures = irisDataProcessor.getTestFeatures(featureColumnIndices);
var trainLabels = irisDataProcessor.getTrainLabels(targetColumnIndex);
var trainTestLabels = irisDataProcessor.getTestLabels(targetColumnIndex);
var trainFeatures = irisDataProcessor.getTrainFeatures(featureColumnIndices);
var trainTestFeatures = irisDataProcessor.getTestFeatures(featureColumnIndices);
var trainLabels = irisDataProcessor.getTrainLabels(targetColumnIndex);
var trainTestLabels = irisDataProcessor.getTestLabels(targetColumnIndex);

classifier.train(trainFeatures, trainLabels);
classifier.evaluate(trainTestFeatures, trainTestLabels);
}
classifier.train(trainFeatures, trainLabels);
classifier.evaluate(trainTestFeatures, trainTestLabels);
}
}
59 changes: 33 additions & 26 deletions example/src/main/java/de/example/knn/KnnIrisExample.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,40 @@
import de.edux.data.provider.DataProcessor;
import de.edux.data.reader.CSVIDataReader;
import de.edux.ml.knn.KnnClassifier;

import java.io.File;

public class KnnIrisExample {
private static final double TRAIN_TEST_SPLIT_RATIO = 0.70;
private static final File CSV_FILE = new File("example" + File.separator + "datasets" + File.separator + "iris" + File.separator + "iris.csv");
private static final boolean SKIP_HEAD = true;

public static void main(String[] args) {
var featureColumnIndices = new int[]{0, 1, 2, 3};
var targetColumnIndex = 4;

var irisDataProcessor = new DataProcessor(new CSVIDataReader())
.loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex)
.normalize()
.shuffle()
.split(TRAIN_TEST_SPLIT_RATIO);


Classifier knn = new KnnClassifier(2);

var trainFeatures = irisDataProcessor.getTrainFeatures(featureColumnIndices);
var trainTestFeatures = irisDataProcessor.getTestFeatures(featureColumnIndices);
var trainLabels = irisDataProcessor.getTrainLabels(targetColumnIndex);
var trainTestLabels = irisDataProcessor.getTestLabels(targetColumnIndex);

knn.train(trainFeatures, trainLabels);
knn.evaluate(trainTestFeatures, trainTestLabels);
}
private static final double TRAIN_TEST_SPLIT_RATIO = 0.70;
private static final File CSV_FILE =
new File(
"example"
+ File.separator
+ "datasets"
+ File.separator
+ "iris"
+ File.separator
+ "iris.csv");
private static final boolean SKIP_HEAD = true;

public static void main(String[] args) {
var featureColumnIndices = new int[] {0, 1, 2, 3};
var targetColumnIndex = 4;

var irisDataProcessor =
new DataProcessor(new CSVIDataReader())
.loadDataSetFromCSV(CSV_FILE, ',', SKIP_HEAD, featureColumnIndices, targetColumnIndex)
.normalize()
.shuffle()
.split(TRAIN_TEST_SPLIT_RATIO);

Classifier knn = new KnnClassifier(2);

var trainFeatures = irisDataProcessor.getTrainFeatures(featureColumnIndices);
var trainTestFeatures = irisDataProcessor.getTestFeatures(featureColumnIndices);
var trainLabels = irisDataProcessor.getTrainLabels(targetColumnIndex);
var trainTestLabels = irisDataProcessor.getTestLabels(targetColumnIndex);

knn.train(trainFeatures, trainLabels);
knn.evaluate(trainTestFeatures, trainTestLabels);
}
}
Loading

0 comments on commit 7b84045

Please sign in to comment.