diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..e33553a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: It's broken ¯\_(ツ)_/¯ + +--- + +**File**: org/iconic/file.java +**Lines**: +**Branch**: + +**Describe the bug** +A clear and concise description of what the bug is. + +**To reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Additional context** +Add any other context about the problem here. (eg. **Stacktraces**) diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 0000000..effcf9c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,14 @@ +--- +name: Feature Request +about: Improvements and TODOs + +--- + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..91271f2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# How to contribute +I'm really glad you're reading this. + +## Merges into higher branches +This section is for merging your changes into the development branch (and later the master branch). +Make sure you run every unit test in the suite before creating your pull request. We don't want any regressions. Once you've done this, you're good to make your very own Pull Request! + +### Creating a Pull Request (PR) +- Merges from any branch into *development* and *master* must go through a GitHub PR. +- Each PR must be assigned to me ([@scottwalkerau](https://github.com/scottwalkerau)) and have someone relevant as a reviewer. (Try not to add too many people as reviewers, at most 2) +- If you cannot think of someone relevant, leave it blank and I will do it myself or assign someone. +- Add any relevant comments for reviewers and myself on the PR. (There is a very small template for this) + +### Reviewing a PR +- Read through every line changed so you understand **why** the PR is there. +- Flag any overall questions you have as a **Comment** +- Flag anything general you think should be done differently as **Request Changes** (For specific sections of the code, see below) +- If you think **every file** is good, select **Approve** + +*NOTE:* +- You can select individual lines for single comments or starting a review. +- Selecting one of the three radio buttons applies the action to every file and doesn't reference a specific line. + +## Coding conventions +I'm hoping you all adhere to these when writing your code. It will increase readability for your reviewers and maintainability later on... +- No single line *if*, *while*, *for*, etc. structures without curly braces. Use curly braces. +- Each method that does not call a subroutine should perform at most 1 function. (eg. A method to translate numbers to excel headers should not be buried within another function, it should be its own) +- If your methods are too long, create a private subroutine. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..48bda07 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Iconic + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..d6ac3b8 --- /dev/null +++ b/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,2 @@ +**Changes** +Describe what this merge will change diff --git a/api/src/main/java/org/iconic/ea/chromosome/Chromosome.java b/api/src/main/java/org/iconic/ea/chromosome/Chromosome.java index 8ac2175..7fb92ea 100644 --- a/api/src/main/java/org/iconic/ea/chromosome/Chromosome.java +++ b/api/src/main/java/org/iconic/ea/chromosome/Chromosome.java @@ -1,5 +1,6 @@ package org.iconic.ea.chromosome; +import org.iconic.ea.data.DataManager; import org.iconic.ea.operator.evolutionary.mutation.Mutator; import java.util.LinkedList; @@ -96,7 +97,7 @@ public void setChanged(boolean changed) { * @param input The input samples to evaluate * @return A list of outputs, one for each input sample */ - public abstract List evaluate(List> input); + public abstract List evaluate(final DataManager input); /** *

diff --git a/api/src/main/java/org/iconic/ea/chromosome/expression/ExpressionChromosome.java b/api/src/main/java/org/iconic/ea/chromosome/expression/ExpressionChromosome.java index 5d89a30..8c516a6 100644 --- a/api/src/main/java/org/iconic/ea/chromosome/expression/ExpressionChromosome.java +++ b/api/src/main/java/org/iconic/ea/chromosome/expression/ExpressionChromosome.java @@ -1,9 +1,12 @@ package org.iconic.ea.chromosome.expression; +import lombok.extern.log4j.Log4j2; import org.iconic.ea.chromosome.Chromosome; import org.iconic.ea.chromosome.LinearChromosome; -import org.iconic.ea.chromosome.graph.Node; import org.iconic.ea.chromosome.TreeChromosome; +import org.iconic.ea.chromosome.graph.Node; +import org.iconic.ea.data.DataManager; +import org.iconic.ea.data.FeatureClass; import java.util.LinkedList; import java.util.List; @@ -13,6 +16,7 @@ * {@inheritDoc} *

A chromosome that encodes an expression tree.

*/ +@Log4j2 public class ExpressionChromosome extends Chromosome implements TreeChromosome, LinearChromosome>, Cloneable { private List> genome; private Node root; @@ -91,11 +95,24 @@ private Node recursivelyGenerateTree(Node root) { * {@inheritDoc} */ @Override - public List evaluate(List> input) { + public List evaluate(final DataManager dataManager) { List calculatedValues = new LinkedList<>(); + List headers = dataManager.getSampleHeaders(); + int numSamples = dataManager.getSampleSize(); + + for (int i = 0; i < numSamples; ++i) { + List row = new LinkedList<>(); + + for (String header : headers) { + FeatureClass feature = dataManager.getDataset().get(header); + + row.add( + (T) feature.getSampleValue(i) + ); + } - for (List row : input) { - calculatedValues.add(getRoot().apply(row)); + T output = getRoot().apply(row); + calculatedValues.add(output); } return calculatedValues; diff --git a/api/src/main/java/org/iconic/ea/data/DataManager.java b/api/src/main/java/org/iconic/ea/data/DataManager.java index 1e8a824..f1ebe52 100644 --- a/api/src/main/java/org/iconic/ea/data/DataManager.java +++ b/api/src/main/java/org/iconic/ea/data/DataManager.java @@ -3,38 +3,41 @@ import lombok.extern.log4j.Log4j2; import java.io.*; -import java.util.ArrayList; -import java.util.List; -import java.util.Scanner; +import java.util.*; @Log4j2 +/** + * TODO: generify this - DataManager => NumericDataManager / StringDataManager / PictureDataManager + */ public class DataManager { - private final Class clazz; - private static String fileName; - private List> samples, originalSamples; - private static ArrayList sampleHeader; - private static ArrayList sampleDescription; - private static int featureSize; - private static int sampleSize; - - public DataManager(Class clazz, String fileName) { - this.clazz = clazz; - DataManager.fileName = fileName; + + private String fileName; + private List sampleHeaders; + private List expectedOutputHeaders; + private HashMap> dataset; + private int featureSize; + private int sampleSize; + private boolean containsHeader = false; + + public DataManager(String fileName) { + this.fileName = fileName; + expectedOutputHeaders = new ArrayList<>(); + sampleHeaders = new ArrayList<>(); try { - importData(fileName); + importData(this.fileName); } catch (IOException ex) { - log.error("Bad File: {}", () -> fileName); - log.error("Exception: {}", ex); + log.error("Bad File: {}", () -> fileName); + log.error("Exception: {}", ex); } } private void importData(String fileName) throws IOException { - DataManager.fileName = fileName; - log.traceEntry(); - featureSize = 0; + this.fileName = fileName; sampleSize = 0; - boolean headerRow = true; + dataset = new HashMap<>(); + + // log.traceEntry(); // Check if the file is on the classpath, otherwise check outside InputStream resource = Thread.currentThread() @@ -45,121 +48,169 @@ private void importData(String fileName) throws IOException { ? new BufferedReader(new FileReader(fileName)) : new BufferedReader(new InputStreamReader(resource)); - samples = new ArrayList<>(); - originalSamples = new ArrayList<>(); - sampleHeader = new ArrayList<>(); - - // Sometimes data is given as a String like "Boy, Girl", this ArrayList keeps track of all the strings and returns the - // index value of a string variable. This will ensure all data is in a numerical format. - ArrayList stringValues = new ArrayList<>(); Scanner sc = new Scanner(reader); + // Check the file isn't empty + if (!sc.hasNextLine()) { + log.error("The input file is empty"); + return; + } + + // Get the first line from the datafile + String line = getNextLineFromDataFile(sc); + + // Assume the delimiter is a comma, and set feature size + String[] split = line.split(","); + featureSize = split.length; + + // Try to determine if the datafile contains a header row + for (String header : split) { + try { + Double.parseDouble(header); + } catch (NumberFormatException e) { + containsHeader = true; + break; + } + } + + if (containsHeader) { + // Update the headers + Collections.addAll(sampleHeaders, split); + log.error(sampleHeaders); + + // Read in the next line for later (needed because the `else` block already reads in the next line) + line = getNextLineFromDataFile(sc); + } else { + // Generate all the header names such as: A, B, C, ..., Z, AA, BB, etc + for (int i = 0; i < featureSize; i++) { + sampleHeaders.add(intToHeader(i)); + } + } + + // Set the last column by default as the expected output + expectedOutputHeaders.add(sampleHeaders.get(featureSize - 1)); + + // Create a list of all features + ArrayList> featureClasses = new ArrayList<>(featureSize); + + for (String aSampleHeader : sampleHeaders) { + if (expectedOutputHeaders.contains(aSampleHeader)) { + featureClasses.add(new NumericFeatureClass(true)); + } else { + featureClasses.add(new NumericFeatureClass(false)); + } + } + // Scan through the input file one line a time - while (sc.hasNextLine()) { - String line = sc.nextLine(); - - // Ensure the line isn't blank - if (!"".equals(line)) { - // Split the line values separated by a "," - String[] values = line.split(","); - - // Track the number of features - if (featureSize == 0) { - featureSize = values.length; - } - - // currentValues holds all the collected values in a double format - ArrayList currentValues = new ArrayList<>(); - - // Loop through all the collected values - for (String value : values) { - if (headerRow) { - sampleHeader.add(value); - } else { - // If the value is already in a double format then instantly add it into the array - try { - if (clazz.isAssignableFrom(String.class)) { - currentValues.add((T) (value)); - } else if (clazz.isAssignableFrom(Integer.class)) { - currentValues.add((T) Integer.valueOf(value)); - } else if (clazz.isAssignableFrom(Boolean.class)) { - currentValues.add((T) Boolean.valueOf(value)); - } else if (clazz.isAssignableFrom(Double.class)) { - currentValues.add((T) Double.valueOf(value)); - } else { - throw new IllegalArgumentException("Bad type."); - } - } - // If the value collected is a string value then... - catch (NumberFormatException nfe) { - log.error("DataManager importData Trying to cast the input to "); - } - } - } - - if (headerRow) - headerRow = false; - - // At the end of each line being read in, add that array into the ArrayList for storage - if (!currentValues.isEmpty()) { - // Track the sample size - sampleSize++; - samples.add(currentValues); - originalSamples.add((List)currentValues.clone()); - } + do { + if (line == null) { + break; + } + + sampleSize++; + + // Assume the delimiter is a comma + String[] values = line.split(","); + + // Parse the string values to a double and add to FeatureClass + for (int i = 0; i < values.length; i++) { + Double value = Double.parseDouble(values[i]); + featureClasses.get(i).addSampleValue(value); } + + line = getNextLineFromDataFile(sc); + } while (line != null); + + // Add all the feature classes to the map + for (int i = 0; i < featureSize; i++) { + dataset.put(sampleHeaders.get(i), featureClasses.get(i)); } + sc.close(); - log.info("DataManager importData - Successfully Imported Dataset"); + // log.info("Successfully Imported Dataset"); } - public List> getSamples() { - return samples; + private String getNextLineFromDataFile(Scanner sc) { + if (!sc.hasNextLine()) + return null; + + // Read in the next line of the file + String line = sc.nextLine(); + + // While there are comments or empty lines in the file, read next line + while(line.startsWith("#") || line.equals("")) { + if (sc.hasNextLine()) { + line = sc.nextLine(); + } else { + // log.error("The data file is empty"); + return null; + } + } + + return line; + } + + // Takes an int value and converts it into the excel format for a header + // Example (0 = A, 1 = B, 26 = AA, 27 = AB) + public String intToHeader(int num) { + StringBuilder name = new StringBuilder(); + do { + char letter = (char) (65 + num % 26); + name.insert(0, letter); + if (num < 26) + break; + num /= 26; + num -= 1; + } while (num >= 0); + return name.toString(); + } + + public void applyPreProcessing() { + dataset.forEach((key, value) -> value.applyPreProcessing()); } - public List getSampleRow(int row) { - return samples.get(row); + public HashMap> getDataset() { + return dataset; } - public List getSampleColumn(int column) { - List values = new ArrayList<>(); + public List getSampleRow(int row) { + List samples = new ArrayList<>(); - for (List row : samples) - values.add(row.get(column)); + for (String header : sampleHeaders) { + FeatureClass fc = dataset.get(header); + Number value = fc.getSampleValue(row); + samples.add(value); + } - return values; + return samples; } - public T getInputVariable(int sample, int index) { - return samples.get(sample).get(index); + public ArrayList getSampleColumn(int column) { + String columnName = sampleHeaders.get(column); + + return getSampleColumn(columnName); } - public int getFeatureSize() { - return featureSize - 1; + public ArrayList getSampleColumn(String columnName) { + FeatureClass featureClass = dataset.get(columnName); + + return featureClass.getSamples(); } - public int getSampleSize() { - return sampleSize; + public Number getSampleVariable(String headerName, int row) { + return dataset.get(headerName).getSampleValue(row); } - /** - * Loop through all samples and reset the current value back to the origninal. - * Used when preprocessing methods are deactivated. - * - * @param column - */ - public void resetSampleColumn(int column) { - for (int i = 0; i < sampleSize; i++) - samples.get(i).set(column, originalSamples.get(i).get(column)); + public int getFeatureSize() { + return featureSize; } - public ArrayList getSampleHeaders() { return sampleHeader; } + public int getSampleSize() { + return sampleSize; + } - public void setSampleColumn(int column, List values) { - for (int i = 0; i < sampleSize; i++) { - T value = values.get(i); - samples.get(i).set(column, value); - } + public List getSampleHeaders() { + return sampleHeaders; } } \ No newline at end of file diff --git a/api/src/main/java/org/iconic/ea/data/FeatureClass.java b/api/src/main/java/org/iconic/ea/data/FeatureClass.java new file mode 100644 index 0000000..8f93fbb --- /dev/null +++ b/api/src/main/java/org/iconic/ea/data/FeatureClass.java @@ -0,0 +1,57 @@ +package org.iconic.ea.data; + +import org.iconic.ea.data.preprocessing.Preprocessor; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Set; + +public abstract class FeatureClass { + private boolean output; + private ArrayList originalSamples, modifiedSamples; + private Set> preprocessors; + // TODO - Add in the pre-processing features + + protected FeatureClass(boolean output) { + this.output = output; + this.originalSamples = new ArrayList<>(); + this.modifiedSamples = new ArrayList<>(); + this.preprocessors = new HashSet<>(); + } + + public void addSampleValue(T value) { + originalSamples.add(value); + modifiedSamples.add(value); + } + + public T getSampleValue(int row) { + return modifiedSamples.get(row); + } + + public ArrayList getSamples() { + return modifiedSamples; + } + + public void applyPreProcessing() { + // Ignoring order of operations + getPreprocessors().stream() + .filter(Preprocessor::isEnabled) + .forEach(p -> p.apply(modifiedSamples)); + } + + public Set> getPreprocessors() { + return preprocessors; + } + + public void setPreprocessors(Set> preprocessors) { + this.preprocessors = preprocessors; + } + + public boolean isOutput() { + return output; + } + + public void setOutput(boolean value) { + this.output = value; + } +} \ No newline at end of file diff --git a/api/src/main/java/org/iconic/ea/data/NumericFeatureClass.java b/api/src/main/java/org/iconic/ea/data/NumericFeatureClass.java new file mode 100644 index 0000000..1a0326e --- /dev/null +++ b/api/src/main/java/org/iconic/ea/data/NumericFeatureClass.java @@ -0,0 +1,12 @@ +package org.iconic.ea.data; + +import org.iconic.ea.data.preprocessing.Normalise; +import org.iconic.ea.data.preprocessing.Offset; + +public class NumericFeatureClass extends FeatureClass { + public NumericFeatureClass(boolean output) { + super(output); + getPreprocessors().add(new Normalise()); + getPreprocessors().add(new Offset()); + } +} \ No newline at end of file diff --git a/api/src/main/java/org/iconic/ea/data/preprocessing/Normalise.java b/api/src/main/java/org/iconic/ea/data/preprocessing/Normalise.java index b84e2f9..970fbe6 100644 --- a/api/src/main/java/org/iconic/ea/data/preprocessing/Normalise.java +++ b/api/src/main/java/org/iconic/ea/data/preprocessing/Normalise.java @@ -1,26 +1,41 @@ package org.iconic.ea.data.preprocessing; - -import java.util.List; - -public class Normalise { - - public static List apply(List values, double newMin, double newMax) { - double oldMin = values.get(0); - double oldMax = values.get(0); - - for (double value : values) { - oldMin = Math.min(oldMin, value); - oldMax = Math.max(oldMax, value); +import java.util.ArrayList; + +public class Normalise extends Preprocessor{ + private Number oldMin, oldMax, newMin, newMax; + + /** + * + * @param values + */ + @Override + public void apply(ArrayList values) { + oldMin = values.get(0); + oldMax = values.get(0); + + for (Number value : values) { + if (value.doubleValue() < oldMin.doubleValue()) { + oldMin = value; + } + + if (value.doubleValue() > oldMax.doubleValue()) { + oldMax = value; + } } - for (int i = 0; i < values.size(); i++) - values.set(i, map(values.get(i), oldMin, oldMax, newMin, newMax)); - - return values; + for (int i = 0; i < values.size(); i++) { + Number value = map(values.get(i), oldMin, oldMax, newMin, newMax); + values.set(i, value); + } } - private static double map(double value, double oldMin, double oldMax, double newMin, double newMax) { - return newMin + ((value - oldMin) * (newMax - newMin)) / (oldMax - oldMin); + private double map(Number value, Number oldMin, Number oldMax, Number newMin, Number newMax) { + return newMin.doubleValue() + + ((value.doubleValue() - oldMin.doubleValue()) + * (newMax.doubleValue() - newMin.doubleValue())) + / (oldMax.doubleValue() - oldMin.doubleValue()); } -} + + public void setRange(Number newMin, Number newMax) { this.newMin = newMin; this.newMax = newMax; } +} \ No newline at end of file diff --git a/api/src/main/java/org/iconic/ea/data/preprocessing/Offset.java b/api/src/main/java/org/iconic/ea/data/preprocessing/Offset.java new file mode 100644 index 0000000..70de332 --- /dev/null +++ b/api/src/main/java/org/iconic/ea/data/preprocessing/Offset.java @@ -0,0 +1,30 @@ +package org.iconic.ea.data.preprocessing; + +import java.util.ArrayList; + +public class Offset extends Preprocessor{ + private Number offset; + + /** + *

+ * Transforms an array of values by shifting the values by a given offset. + *

+ * + * @param values the array that will be transformed. + */ + @Override + public void apply(ArrayList values) { + for (int i = 0; i < values.size(); i++) { + Double value = values.get(i).doubleValue() + offset.doubleValue(); + values.set(i, value); + } + } + + /** + *

+ * Set the value that the array will be offset by. + *

+ * @param offset The value to offset the array by. + */ + public void setOffset(Number offset) { this.offset = offset; } +} \ No newline at end of file diff --git a/api/src/main/java/org/iconic/ea/data/preprocessing/Preprocessor.java b/api/src/main/java/org/iconic/ea/data/preprocessing/Preprocessor.java new file mode 100644 index 0000000..6ba9edf --- /dev/null +++ b/api/src/main/java/org/iconic/ea/data/preprocessing/Preprocessor.java @@ -0,0 +1,30 @@ +package org.iconic.ea.data.preprocessing; + +import java.util.ArrayList; + +public abstract class Preprocessor { + private boolean enabled; + + protected Preprocessor() { + enabled = false; + } + + public abstract void apply(ArrayList values); + + /** + *

Sets whether or not this preprocessor should be used to the provided value

+ * + * @param enabled True if the preprocessor should be used + */ + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + + /** + * + * @return + */ + public boolean isEnabled() { + return enabled; + } +} diff --git a/api/src/main/java/org/iconic/ea/operator/objective/DefaultObjective.java b/api/src/main/java/org/iconic/ea/operator/objective/DefaultObjective.java index c494bd0..758e658 100644 --- a/api/src/main/java/org/iconic/ea/operator/objective/DefaultObjective.java +++ b/api/src/main/java/org/iconic/ea/operator/objective/DefaultObjective.java @@ -2,6 +2,7 @@ import lombok.extern.log4j.Log4j2; import org.iconic.ea.chromosome.Chromosome; +import org.iconic.ea.data.DataManager; import org.iconic.ea.operator.objective.error.ErrorFunction; import java.util.List; @@ -16,15 +17,13 @@ public class DefaultObjective extends ErrorBasedObjective { /** - *

- * Constructs a new DefaultObjective. - *

+ *

Constructs a new DefaultObjective

* - * @param lambda The error function to apply - * @param samples The samples to use with the error function + * @param lambda The error function to apply + * @param dataManager The samples to use with the error function */ - public DefaultObjective(ErrorFunction lambda, List> samples) { - super(lambda, samples); + public DefaultObjective(final ErrorFunction lambda, final DataManager dataManager) { + super(lambda, dataManager); } /** @@ -32,12 +31,9 @@ public DefaultObjective(ErrorFunction lambda, List> samples) { */ @Override public double apply(final Chromosome c) { - List results = (List) c.evaluate(getSamples()); - + List results = (List) c.evaluate(getDataManager()); final double fitness = getLambda().apply(results, (List) getExpectedResults()); - c.setFitness(fitness); - return fitness; } } diff --git a/api/src/main/java/org/iconic/ea/operator/objective/ErrorBasedObjective.java b/api/src/main/java/org/iconic/ea/operator/objective/ErrorBasedObjective.java index c17b782..c1ce551 100644 --- a/api/src/main/java/org/iconic/ea/operator/objective/ErrorBasedObjective.java +++ b/api/src/main/java/org/iconic/ea/operator/objective/ErrorBasedObjective.java @@ -1,10 +1,16 @@ package org.iconic.ea.operator.objective; +import lombok.extern.log4j.Log4j2; import org.iconic.ea.chromosome.Chromosome; +import org.iconic.ea.data.DataManager; +import org.iconic.ea.data.FeatureClass; import org.iconic.ea.operator.objective.error.ErrorFunction; +import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.stream.Collectors; /** * {@inheritDoc} @@ -12,9 +18,10 @@ * An error based objective is an objective function based around an error function. *

*/ +@Log4j2 public abstract class ErrorBasedObjective implements Objective { private final ErrorFunction lambda; - private List> samples; + private DataManager dataManager; private List expectedResults; private boolean changed; @@ -23,17 +30,12 @@ public abstract class ErrorBasedObjective implements Objective { * Constructs a new ErrorBasedObjective with the provided error function and samples. *

* - *

- * The samples used should be a two-dimensional matrix, with each sample on a separate row. The final - * column must contain the expected result. - *

- * * @param lambda The error function to apply - * @param samples The samples to use with the error function + * @param dataManager The dataset to apply the error function on */ - public ErrorBasedObjective(final ErrorFunction lambda, final List> samples) { + public ErrorBasedObjective(final ErrorFunction lambda, final DataManager dataManager) { this.lambda = lambda; - this.samples = samples; + this.dataManager = dataManager; this.expectedResults = new LinkedList<>(); this.changed = true; } @@ -45,12 +47,12 @@ public ErrorBasedObjective(final ErrorFunction lambda, final List> sampl public abstract double apply(final Chromosome c); /** - *

Returns the samples used by this objective.

+ *

Returns the dataset used by this objective.

* - * @return the samples used by this objective + * @return the dataset used by this objective */ - protected List> getSamples() { - return samples; + protected DataManager getDataManager() { + return dataManager; } /** @@ -70,15 +72,16 @@ protected ErrorFunction getLambda() { protected List getExpectedResults() { // Check if the expected results need to be recalculated if (isChanged()) { - List results = new LinkedList<>(); + HashMap> dataset = getDataManager().getDataset(); // Collect the expected answers - for (List sample : getSamples()) { - T result = sample.get(sample.size() - 1); - results.add(result); - } + List> features = dataset.values().stream() + .filter(FeatureClass::isOutput) + .limit(1) + .collect(Collectors.toList()); + + expectedResults = (ArrayList) features.get(0).getSamples(); - expectedResults = results; setChanged(false); } @@ -112,13 +115,13 @@ private void setChanged(final boolean changed) { /** *

- * Sets the samples of this objective to the provided value and marks it as changed. + * Sets the dataset of this objective to the provided value and marks it as changed. *

* - * @param samples The value to set the samples of this objective to + * @param dataManager The value to set the dataset of this objective to */ - public void setSamples(final List> samples) { + public void setDataManager(final DataManager dataManager) { setChanged(true); - this.samples = samples; + this.dataManager = dataManager; } } diff --git a/api/src/test/java/org/iconic/ea/data/DataManagerTest.java b/api/src/test/java/org/iconic/ea/data/DataManagerTest.java new file mode 100644 index 0000000..cc7c84c --- /dev/null +++ b/api/src/test/java/org/iconic/ea/data/DataManagerTest.java @@ -0,0 +1,132 @@ +package org.iconic.ea.data; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.Random; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + +class DataManagerTest { + + /** Test file */ + private static final String TEST_FILE_HEADER = "Iris-Header.txt"; + /** Same test file without the header row */ + private static final String TEST_FILE_NO_HEADER = "Iris-NoHeader.txt"; + + /** No. samples in test file */ + private static final int SAMPLES = 37; + /** No. features in test file */ + private static final int FEATURES = 5; + + /** DM for file with header */ + private static DataManager dataManagerHeader; + /** DM for file without header */ + private static DataManager dataManagerNoHeader; + /** Generic DM (Will be set randomly) */ + private static DataManager dataManager; + + /** + * Run once, before any of the tests start + */ + @BeforeAll + static void setUp() { + dataManagerHeader = new DataManager<>(TEST_FILE_HEADER); + dataManagerNoHeader = new DataManager<>(TEST_FILE_NO_HEADER); + // Set to either of the above two, shouldn't matter. + dataManager = (new Random().nextInt(2) == 0) ? dataManagerHeader : dataManagerNoHeader; + } + + @DisplayName("Test conversion of integers to Excel-style headers") + @MethodSource("intToHeaderProvider") + @ParameterizedTest + void testIntToHeader(int num, String expected) { + assertEquals(expected, dataManager.intToHeader(num)); + } + + @DisplayName("Test the header values are correct") + @Test + void testHeaderValues() { + List supplied = dataManagerHeader.getSampleHeaders(); + List converted = dataManagerNoHeader.getSampleHeaders(); + assertAll("Header checks", + () -> assertEquals("Sepal Length", supplied.get(0)), + () -> assertEquals("Class", supplied.get(4)), + () -> assertEquals("A", converted.get(0)), + () -> assertEquals("E", converted.get(4)) + ); + } + + @DisplayName("Assert that all sizes match the input files") + @Test + void testSizes() { + assertAll("All sizes", + () -> assertEquals(SAMPLES, dataManagerHeader.getSampleSize()), + () -> assertEquals(FEATURES, dataManagerHeader.getFeatureSize()), + () -> assertEquals(SAMPLES, dataManagerNoHeader.getSampleSize()), + () -> assertEquals(FEATURES, dataManagerNoHeader.getFeatureSize()), + // Only need to test one of the data managers from here on + () -> assertEquals(FEATURES, dataManager.getDataset().size()), + () -> { + for (int i = 0; i < FEATURES; i++) { + assertEquals(SAMPLES, dataManager.getSampleColumn(i).size()); + } + }, + () -> { + for (int i = 0; i < SAMPLES; i++) { + assertEquals(FEATURES, dataManager.getSampleRow(i).size()); + } + } + ); + } + + @DisplayName("Assert that certain variables match what was supplied") + @Test + void testVariables() { + final double delta = 0.001; + assertAll("Check certain variables match the input file", + () -> assertEquals(5.1, getVariable("A", 0), delta), + () -> assertEquals(3.0, getVariable("B", 1), delta), + () -> assertEquals(1.3, getVariable("C", 2), delta), + () -> assertEquals(0.2, getVariable("D", 3), delta), + () -> assertEquals(0.0, getVariable("E", 4), delta) + ); + } + + //--------- + // Helpers + //--------- + + /** + * Get the expected results for the method intToHeader. Current usage dictates no negative numbers are supplied. + * @return Stream of: int, String + */ + private static Stream intToHeaderProvider() { + return Stream.of( + Arguments.of(0, "A"), + Arguments.of(1, "B"), + Arguments.of(25, "Z"), + Arguments.of(26, "AA"), + Arguments.of(260, "JA"), + Arguments.of(701, "ZZ"), + Arguments.of(702, "AAA") + ); + } + + /** + * Helper method to retrieve the double variable of a certain row, making the test more readable + * (For data manager without a header, so headers are A, B, C, ...) + * @param header Feature selected + * @param row Sample selected + * @return Double value of this variable + */ + private double getVariable(String header, int row) { + return dataManagerNoHeader.getSampleVariable(header, row).doubleValue(); + } +} \ No newline at end of file diff --git a/api/src/test/resources/Iris-Header.txt b/api/src/test/resources/Iris-Header.txt new file mode 100644 index 0000000..05d3cbd --- /dev/null +++ b/api/src/test/resources/Iris-Header.txt @@ -0,0 +1,38 @@ +Sepal Length,Sepal Width,Petal Length,Petal Width,Class +5.1,3.5,1.4,0.2,0 +4.9,3.0,1.4,0.2,0 +4.7,3.2,1.3,0.2,0 +4.6,3.1,1.5,0.2,0 +4.9,3.1,1.5,0.2,0 +5.0,3.2,1.2,0.2,0 +5.5,3.5,1.3,0.2,0 +5.1,3.8,1.9,0.4,0 +4.8,3.0,1.4,0.3,0 +5.1,3.8,1.6,0.2,0 +4.6,3.2,1.4,0.2,0 +5.3,3.7,1.5,0.2,0 +5.0,3.3,1.4,0.2,0 +7.0,3.2,4.7,1.4,0.5 +6.4,3.2,4.5,1.5,0.5 +6.9,3.1,4.9,1.5,0.5 +5.5,2.3,4.0,1.3,0.5 +6.5,2.8,4.6,1.5,0.5 +5.6,2.7,4.2,1.3,0.5 +5.7,3.0,4.2,1.2,0.5 +5.7,2.9,4.2,1.3,0.5 +6.2,2.9,4.3,1.3,0.5 +5.1,2.5,3.0,1.1,0.5 +5.7,2.8,4.1,1.3,0.5 +6.3,3.3,6.0,2.5,1 +5.8,2.7,5.1,1.9,1 +7.1,3.0,5.9,2.1,1 +6.7,3.1,5.6,2.4,1 +6.9,3.1,5.1,2.3,1 +5.8,2.7,5.1,1.9,1 +6.8,3.2,5.9,2.3,1 +6.7,3.3,5.7,2.5,1 +6.7,3.0,5.2,2.3,1 +6.3,2.5,5.0,1.9,1 +6.5,3.0,5.2,2.0,1 +6.2,3.4,5.4,2.3,1 +5.9,3.0,5.1,1.8,1 \ No newline at end of file diff --git a/api/src/test/resources/Iris-NoHeader.txt b/api/src/test/resources/Iris-NoHeader.txt new file mode 100644 index 0000000..6f22a55 --- /dev/null +++ b/api/src/test/resources/Iris-NoHeader.txt @@ -0,0 +1,37 @@ +5.1,3.5,1.4,0.2,0 +4.9,3.0,1.4,0.2,0 +4.7,3.2,1.3,0.2,0 +4.6,3.1,1.5,0.2,0 +4.9,3.1,1.5,0.2,0 +5.0,3.2,1.2,0.2,0 +5.5,3.5,1.3,0.2,0 +5.1,3.8,1.9,0.4,0 +4.8,3.0,1.4,0.3,0 +5.1,3.8,1.6,0.2,0 +4.6,3.2,1.4,0.2,0 +5.3,3.7,1.5,0.2,0 +5.0,3.3,1.4,0.2,0 +7.0,3.2,4.7,1.4,0.5 +6.4,3.2,4.5,1.5,0.5 +6.9,3.1,4.9,1.5,0.5 +5.5,2.3,4.0,1.3,0.5 +6.5,2.8,4.6,1.5,0.5 +5.6,2.7,4.2,1.3,0.5 +5.7,3.0,4.2,1.2,0.5 +5.7,2.9,4.2,1.3,0.5 +6.2,2.9,4.3,1.3,0.5 +5.1,2.5,3.0,1.1,0.5 +5.7,2.8,4.1,1.3,0.5 +6.3,3.3,6.0,2.5,1 +5.8,2.7,5.1,1.9,1 +7.1,3.0,5.9,2.1,1 +6.7,3.1,5.6,2.4,1 +6.9,3.1,5.1,2.3,1 +5.8,2.7,5.1,1.9,1 +6.8,3.2,5.9,2.3,1 +6.7,3.3,5.7,2.5,1 +6.7,3.0,5.2,2.3,1 +6.3,2.5,5.0,1.9,1 +6.5,3.0,5.2,2.0,1 +6.2,3.4,5.4,2.3,1 +5.9,3.0,5.1,1.8,1 \ No newline at end of file diff --git a/cli/src/main/java/org/iconic/Client.java b/cli/src/main/java/org/iconic/Client.java index ae02411..ee9bc36 100644 --- a/cli/src/main/java/org/iconic/Client.java +++ b/cli/src/main/java/org/iconic/Client.java @@ -38,16 +38,16 @@ public static void main(String[] args) { final String inputFile = client.getArgs().getInput(); if (!"".equals(inputFile) && !inputFile.isEmpty()) { - final DataManager dm = new DataManager<>(Double.class, inputFile); + final DataManager dm = new DataManager<>(inputFile); int featureSize = dm.getFeatureSize(); int sampleSize = dm.getSampleSize(); - log.info("Feature Size: {}", () -> featureSize); + log.info("Feature Size: {}", () -> featureSize - 1); log.info("Sample Size: {}", () -> sampleSize); // Create a supplier for Gene Expression Programming chromosomes - ExpressionChromosomeFactory supplier = new ExpressionChromosomeFactory<>(10, featureSize); + ExpressionChromosomeFactory supplier = new ExpressionChromosomeFactory<>(10, featureSize - 1); // Add in the functions the chromosomes can use supplier.addFunction(Arrays.asList( @@ -66,9 +66,9 @@ public static void main(String[] args) { // Add in the objectives the algorithm should aim for gep.addObjective( - new DefaultObjective<>( - new MeanSquaredError(), dm.getSamples()) - ); + new DefaultObjective( + new MeanSquaredError(), dm + )); // log.info("Function Primitives used: {}", supplier::getFunctions); diff --git a/client/src/main/java/org/iconic/project/dataset/DatasetModel.java b/client/src/main/java/org/iconic/project/dataset/DatasetModel.java index a28629e..e8828af 100644 --- a/client/src/main/java/org/iconic/project/dataset/DatasetModel.java +++ b/client/src/main/java/org/iconic/project/dataset/DatasetModel.java @@ -31,7 +31,7 @@ public class DatasetModel implements Displayable { public DatasetModel(@NonNull final String name, @NonNull final String absolutePath) { this.name = new SimpleStringProperty(name); this.absolutePath = new SimpleStringProperty(absolutePath); - this.dataManager = new DataManager<>(Double.class, absolutePath); + this.dataManager = new DataManager(absolutePath); this.id = UUID.randomUUID(); } diff --git a/client/src/main/java/org/iconic/project/search/SearchModel.java b/client/src/main/java/org/iconic/project/search/SearchModel.java index 52ea7eb..0603b94 100644 --- a/client/src/main/java/org/iconic/project/search/SearchModel.java +++ b/client/src/main/java/org/iconic/project/search/SearchModel.java @@ -74,7 +74,7 @@ public SearchModel(@NonNull final DatasetModel datasetModel) { // Add in the objectives the algorithm should aim for ea.addObjective( new DefaultObjective<>( - new MeanSquaredError(), datasetModel.getDataManager().getSamples()) + new MeanSquaredError(), datasetModel.getDataManager()) ); } diff --git a/client/src/main/java/org/iconic/workspace/WorkspaceController.java b/client/src/main/java/org/iconic/workspace/WorkspaceController.java index a0430ef..3de4021 100644 --- a/client/src/main/java/org/iconic/workspace/WorkspaceController.java +++ b/client/src/main/java/org/iconic/workspace/WorkspaceController.java @@ -19,14 +19,12 @@ import lombok.extern.log4j.Log4j2; import lombok.val; import org.iconic.ea.data.DataManager; -import org.iconic.ea.data.preprocessing.Normalise; import org.iconic.project.Displayable; import org.iconic.project.dataset.DatasetModel; import org.iconic.project.search.SearchModel; import org.iconic.project.search.SearchService; import java.net.URL; -import java.util.List; import java.util.Optional; import java.util.ResourceBundle; @@ -198,12 +196,12 @@ public void featureSelected(int selectedIndex) { Optional> dataManager = getDataManager(); if (dataManager.isPresent() && selectedIndex >= 0) { - List values = dataManager.get().getSampleColumn(selectedIndex); +// List values = dataManager.get().getSampleColumn(selectedIndex); - for (int sample = 0; sample < values.size(); sample++) { - double value = values.get(sample); - series.getData().add(new XYChart.Data<>(sample, value)); - } +// for (int sample = 0; sample < values.size(); sample++) { +// double value = values.get(sample); +// series.getData().add(new XYChart.Data<>(sample, value)); +// } } lcDataView.getData().add(series); } @@ -217,23 +215,23 @@ public void normalizeDatasetFeature() { Optional> dataManager = getDataManager(); if (cbNormalise.isSelected() && dataManager.isPresent()) { - List values = dataManager.get().getSampleColumn(selectedIndex); +// List values = dataManager.get().getSampleColumn(selectedIndex); try { double min = Double.parseDouble(tfNormaliseMin.getText()); double max = Double.parseDouble(tfNormaliseMax.getText()); if (min < max) { - values = Normalise.apply(values, min, max); +// values = Normalise.apply(values, min, max); - dataManager.get().setSampleColumn(selectedIndex, values); +// dataManager.get().setSampleColumn(selectedIndex, values); } } catch (Exception e) { log.error("Min and Max values must be a Number"); } } // Otherwise reset the sample column - else dataManager.ifPresent(doubleDataManager -> doubleDataManager.resetSampleColumn(selectedIndex)); +// else dataManager.ifPresent(doubleDataManager -> doubleDataManager.resetSampleColumn(selectedIndex)); featureSelected(selectedIndex); }