From 327baa0c14a0538732e52e868913779343275d96 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Fri, 17 Jul 2020 18:37:43 +0200 Subject: [PATCH 1/3] Rename binary_soft_classification evaluation to outlier_detection --- .../MlEvaluationNamedXContentProvider.java | 30 ++-- .../AbstractConfusionMatrixMetric.java | 2 +- .../AucRocMetric.java | 2 +- .../ConfusionMatrixMetric.java | 2 +- .../OutlierDetection.java} | 26 ++-- .../PrecisionMetric.java | 2 +- .../RecallMetric.java | 2 +- .../client/MachineLearningIT.java | 97 +++++++------ .../client/RestHighLevelClientTests.java | 31 ++-- .../MlClientDocumentationIT.java | 39 +++-- .../ml/EvaluateDataFrameRequestTests.java | 4 +- .../ml/EvaluateDataFrameResponseTests.java | 14 +- .../AucRocMetricAucRocPointTests.java | 4 +- .../AucRocMetricResultTests.java | 8 +- ...usionMatrixMetricConfusionMatrixTests.java | 4 +- .../ConfusionMatrixMetricResultTests.java | 6 +- .../OutlierDetectionTests.java} | 22 +-- .../PrecisionMetricResultTests.java | 4 +- .../RecallMetricResultTests.java | 4 +- .../ml/evaluate-data-frame.asciidoc | 10 +- .../apis/evaluate-dfanalytics.asciidoc | 136 +++++++++--------- .../MlEvaluationNamedXContentProvider.java | 42 +++--- .../AbstractConfusionMatrixMetric.java | 4 +- .../AucRoc.java | 8 +- .../ConfusionMatrix.java | 6 +- .../OutlierDetection.java} | 26 ++-- .../Precision.java | 4 +- .../Recall.java | 4 +- .../ScoreByThresholdResult.java | 4 +- .../EvaluateDataFrameActionRequestTests.java | 6 +- .../AucRocTests.java | 2 +- .../ConfusionMatrixTests.java | 2 +- .../OutlierDetectionTests.java} | 24 ++-- .../PrecisionTests.java | 2 +- .../RecallTests.java | 2 +- .../ml/qa/ml-with-security/build.gradle | 22 +-- .../test/ml/evaluate_data_frame.yml | 122 ++++++++-------- 37 files changed, 360 insertions(+), 369 deletions(-) rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/AbstractConfusionMatrixMetric.java (95%) rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/AucRocMetric.java (99%) rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/ConfusionMatrixMetric.java (98%) rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification/BinarySoftClassification.java => outlierdetection/OutlierDetection.java} (79%) rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/PrecisionMetric.java (98%) rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/RecallMetric.java (98%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/AucRocMetricAucRocPointTests.java (92%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/AucRocMetricResultTests.java (86%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/ConfusionMatrixMetricConfusionMatrixTests.java (92%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/ConfusionMatrixMetricResultTests.java (88%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification/BinarySoftClassificationTests.java => outlierdetection/OutlierDetectionTests.java} (74%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/PrecisionMetricResultTests.java (94%) rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/{softclassification => outlierdetection}/RecallMetricResultTests.java (94%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/AbstractConfusionMatrixMetric.java (97%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/AucRoc.java (97%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/ConfusionMatrix.java (96%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification/BinarySoftClassification.java => outlierdetection/OutlierDetection.java} (80%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/Precision.java (95%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/Recall.java (95%) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/ScoreByThresholdResult.java (93%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/AucRocTests.java (98%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/ConfusionMatrixTests.java (96%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification/BinarySoftClassificationTests.java => outlierdetection/OutlierDetectionTests.java} (78%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/PrecisionTests.java (97%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/{softclassification => outlierdetection}/RecallTests.java (97%) diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java index 0c8094a208fc3..1110de4d64444 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java @@ -21,16 +21,16 @@ import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyMetric; import org.elasticsearch.client.ml.dataframe.evaluation.classification.Classification; import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicErrorMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.plugins.spi.NamedXContentProvider; @@ -57,25 +57,25 @@ public List getNamedXContentParsers() { return Arrays.asList( // Evaluations new NamedXContentRegistry.Entry( - Evaluation.class, new ParseField(BinarySoftClassification.NAME), BinarySoftClassification::fromXContent), + Evaluation.class, new ParseField(OutlierDetection.NAME), OutlierDetection::fromXContent), new NamedXContentRegistry.Entry(Evaluation.class, new ParseField(Classification.NAME), Classification::fromXContent), new NamedXContentRegistry.Entry(Evaluation.class, new ParseField(Regression.NAME), Regression::fromXContent), // Evaluation metrics new NamedXContentRegistry.Entry( EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME)), AucRocMetric::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME)), PrecisionMetric::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME)), RecallMetric::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME)), ConfusionMatrixMetric::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.class, @@ -114,19 +114,19 @@ Evaluation.class, new ParseField(BinarySoftClassification.NAME), BinarySoftClass // Evaluation metrics results new NamedXContentRegistry.Entry( EvaluationMetric.Result.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME)), AucRocMetric.Result::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.Result.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME)), PrecisionMetric.Result::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.Result.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME)), RecallMetric.Result::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.Result.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME)), ConfusionMatrixMetric.Result::fromXContent), new NamedXContentRegistry.Entry( EvaluationMetric.Result.class, diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AbstractConfusionMatrixMetric.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AbstractConfusionMatrixMetric.java similarity index 95% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AbstractConfusionMatrixMetric.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AbstractConfusionMatrixMetric.java index f41c13f248ab9..4eb535c29e268 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AbstractConfusionMatrixMetric.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AbstractConfusionMatrixMetric.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; import org.elasticsearch.common.ParseField; diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetric.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetric.java similarity index 99% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetric.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetric.java index 78c713c592581..959de6a97a8ba 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetric.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetric.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; import org.elasticsearch.common.Nullable; diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetric.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetric.java similarity index 98% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetric.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetric.java index d5e4307c9cc74..cc90a4c2a4640 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetric.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetric.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; import org.elasticsearch.common.ParseField; diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/BinarySoftClassification.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java similarity index 79% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/BinarySoftClassification.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java index b75af7cec11f6..ee29240f7cba4 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/BinarySoftClassification.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; @@ -38,24 +38,22 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; /** - * Evaluation of binary soft classification methods, e.g. outlier detection. - * This is useful to evaluate problems where a model outputs a probability of whether - * a data frame row belongs to one of two groups. + * Evaluation of outlier detection results. */ -public class BinarySoftClassification implements Evaluation { +public class OutlierDetection implements Evaluation { - public static final String NAME = "binary_soft_classification"; + public static final String NAME = "outlier_detection"; private static final ParseField ACTUAL_FIELD = new ParseField("actual_field"); private static final ParseField PREDICTED_PROBABILITY_FIELD = new ParseField("predicted_probability_field"); private static final ParseField METRICS = new ParseField("metrics"); @SuppressWarnings("unchecked") - public static final ConstructingObjectParser PARSER = + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, true, - args -> new BinarySoftClassification((String) args[0], (String) args[1], (List) args[2])); + args -> new OutlierDetection((String) args[0], (String) args[1], (List) args[2])); static { PARSER.declareString(constructorArg(), ACTUAL_FIELD); @@ -64,7 +62,7 @@ public class BinarySoftClassification implements Evaluation { optionalConstructorArg(), (p, c, n) -> p.namedObject(EvaluationMetric.class, registeredMetricName(NAME, n), null), METRICS); } - public static BinarySoftClassification fromXContent(XContentParser parser) { + public static OutlierDetection fromXContent(XContentParser parser) { return PARSER.apply(parser, null); } @@ -84,16 +82,16 @@ public static BinarySoftClassification fromXContent(XContentParser parser) { */ private final List metrics; - public BinarySoftClassification(String actualField, String predictedField) { + public OutlierDetection(String actualField, String predictedField) { this(actualField, predictedField, (List)null); } - public BinarySoftClassification(String actualField, String predictedProbabilityField, EvaluationMetric... metric) { + public OutlierDetection(String actualField, String predictedProbabilityField, EvaluationMetric... metric) { this(actualField, predictedProbabilityField, Arrays.asList(metric)); } - public BinarySoftClassification(String actualField, String predictedProbabilityField, - @Nullable List metrics) { + public OutlierDetection(String actualField, String predictedProbabilityField, + @Nullable List metrics) { this.actualField = Objects.requireNonNull(actualField); this.predictedProbabilityField = Objects.requireNonNull(predictedProbabilityField); if (metrics != null) { @@ -129,7 +127,7 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - BinarySoftClassification that = (BinarySoftClassification) o; + OutlierDetection that = (OutlierDetection) o; return Objects.equals(actualField, that.actualField) && Objects.equals(predictedProbabilityField, that.predictedProbabilityField) && Objects.equals(metrics, that.metrics); diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/PrecisionMetric.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetric.java similarity index 98% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/PrecisionMetric.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetric.java index 2a0f1499461d6..98fb64add64cb 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/PrecisionMetric.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetric.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; import org.elasticsearch.common.Strings; diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/RecallMetric.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetric.java similarity index 98% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/RecallMetric.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetric.java index 505ff1b34d7c5..badbd008a04fa 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/RecallMetric.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetric.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; import org.elasticsearch.common.Strings; diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index 3e0a006598d75..d8e886b1a417e 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -135,22 +135,21 @@ import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsSource; import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsState; import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsStats; -import org.elasticsearch.client.ml.dataframe.OutlierDetection; import org.elasticsearch.client.ml.dataframe.PhaseProgress; import org.elasticsearch.client.ml.dataframe.QueryConfig; import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyMetric; import org.elasticsearch.client.ml.dataframe.evaluation.classification.Classification; import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicErrorMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.dataframe.stats.common.DataCounts; @@ -1286,7 +1285,7 @@ public void testPutDataFrameAnalyticsConfig_GivenOutlierDetectionAnalysis() thro .setDest(DataFrameAnalyticsDest.builder() .setIndex("put-test-dest-index") .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .setDescription("some description") .build(); @@ -1301,7 +1300,7 @@ public void testPutDataFrameAnalyticsConfig_GivenOutlierDetectionAnalysis() thro assertThat(createdConfig.getSource().getQueryConfig(), equalTo(new QueryConfig(new MatchAllQueryBuilder()))); // default value assertThat(createdConfig.getDest().getIndex(), equalTo(config.getDest().getIndex())); assertThat(createdConfig.getDest().getResultsField(), equalTo("ml")); // default value - assertThat(createdConfig.getAnalysis(), equalTo(OutlierDetection.builder() + assertThat(createdConfig.getAnalysis(), equalTo(org.elasticsearch.client.ml.dataframe.OutlierDetection.builder() .setComputeFeatureInfluence(true) .setOutlierFraction(0.05) .setStandardizationEnabled(true).build())); @@ -1438,7 +1437,7 @@ public void testGetDataFrameAnalyticsConfig_SingleConfig() throws Exception { .setDest(DataFrameAnalyticsDest.builder() .setIndex("get-test-dest-index") .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); createIndex("get-test-source-index", defaultMappingForTest()); @@ -1472,7 +1471,7 @@ public void testGetDataFrameAnalyticsConfig_MultipleConfigs() throws Exception { .setDest(DataFrameAnalyticsDest.builder() .setIndex("get-test-dest-index") .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); PutDataFrameAnalyticsResponse putDataFrameAnalyticsResponse = execute( @@ -1542,7 +1541,7 @@ public void testGetDataFrameAnalyticsStats() throws Exception { .setDest(DataFrameAnalyticsDest.builder() .setIndex(destIndex) .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); execute( @@ -1595,7 +1594,7 @@ public void testStartDataFrameAnalyticsConfig() throws Exception { .setDest(DataFrameAnalyticsDest.builder() .setIndex(destIndex) .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); execute( @@ -1636,7 +1635,7 @@ public void testStopDataFrameAnalyticsConfig() throws Exception { .setDest(DataFrameAnalyticsDest.builder() .setIndex(destIndex) .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); execute( @@ -1678,7 +1677,7 @@ public void testDeleteDataFrameAnalyticsConfig() throws Exception { .setDest(DataFrameAnalyticsDest.builder() .setIndex("delete-test-dest-index") .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); createIndex("delete-test-source-index", defaultMappingForTest()); @@ -1720,21 +1719,21 @@ public void testDeleteDataFrameAnalyticsConfig_ConfigNotFound() { assertThat(exception.status().getStatus(), equalTo(404)); } - public void testEvaluateDataFrame_BinarySoftClassification() throws IOException { + public void testEvaluateDataFrame_OutlierDetection() throws IOException { String indexName = "evaluate-test-index"; - createIndex(indexName, mappingForSoftClassification()); + createIndex(indexName, mappingForOutlierDetection()); BulkRequest bulk = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) - .add(docForSoftClassification(indexName, "blue", false, 0.1)) // #0 - .add(docForSoftClassification(indexName, "blue", false, 0.2)) // #1 - .add(docForSoftClassification(indexName, "blue", false, 0.3)) // #2 - .add(docForSoftClassification(indexName, "blue", false, 0.4)) // #3 - .add(docForSoftClassification(indexName, "blue", false, 0.7)) // #4 - .add(docForSoftClassification(indexName, "blue", true, 0.2)) // #5 - .add(docForSoftClassification(indexName, "green", true, 0.3)) // #6 - .add(docForSoftClassification(indexName, "green", true, 0.4)) // #7 - .add(docForSoftClassification(indexName, "green", true, 0.8)) // #8 - .add(docForSoftClassification(indexName, "green", true, 0.9)); // #9 + .add(docForOutlierDetection(indexName, "blue", false, 0.1)) // #0 + .add(docForOutlierDetection(indexName, "blue", false, 0.2)) // #1 + .add(docForOutlierDetection(indexName, "blue", false, 0.3)) // #2 + .add(docForOutlierDetection(indexName, "blue", false, 0.4)) // #3 + .add(docForOutlierDetection(indexName, "blue", false, 0.7)) // #4 + .add(docForOutlierDetection(indexName, "blue", true, 0.2)) // #5 + .add(docForOutlierDetection(indexName, "green", true, 0.3)) // #6 + .add(docForOutlierDetection(indexName, "green", true, 0.4)) // #7 + .add(docForOutlierDetection(indexName, "green", true, 0.8)) // #8 + .add(docForOutlierDetection(indexName, "green", true, 0.9)); // #9 highLevelClient().bulk(bulk, RequestOptions.DEFAULT); MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); @@ -1742,14 +1741,14 @@ public void testEvaluateDataFrame_BinarySoftClassification() throws IOException new EvaluateDataFrameRequest( indexName, null, - new BinarySoftClassification( + new OutlierDetection( actualField, probabilityField, PrecisionMetric.at(0.4, 0.5, 0.6), RecallMetric.at(0.5, 0.7), ConfusionMatrixMetric.at(0.5), AucRocMetric.withCurve())); EvaluateDataFrameResponse evaluateDataFrameResponse = execute(evaluateDataFrameRequest, machineLearningClient::evaluateDataFrame, machineLearningClient::evaluateDataFrameAsync); - assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(BinarySoftClassification.NAME)); + assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(OutlierDetection.NAME)); assertThat(evaluateDataFrameResponse.getMetrics().size(), equalTo(4)); PrecisionMetric.Result precisionResult = evaluateDataFrameResponse.getMetricByName(PrecisionMetric.NAME); @@ -1794,21 +1793,21 @@ public void testEvaluateDataFrame_BinarySoftClassification() throws IOException assertThat(curvePointAtThreshold1.getThreshold(), equalTo(1.0)); } - public void testEvaluateDataFrame_BinarySoftClassification_WithQuery() throws IOException { + public void testEvaluateDataFrame_OutlierDetection_WithQuery() throws IOException { String indexName = "evaluate-with-query-test-index"; - createIndex(indexName, mappingForSoftClassification()); + createIndex(indexName, mappingForOutlierDetection()); BulkRequest bulk = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) - .add(docForSoftClassification(indexName, "blue", true, 1.0)) // #0 - .add(docForSoftClassification(indexName, "blue", true, 1.0)) // #1 - .add(docForSoftClassification(indexName, "blue", true, 1.0)) // #2 - .add(docForSoftClassification(indexName, "blue", true, 1.0)) // #3 - .add(docForSoftClassification(indexName, "blue", true, 0.0)) // #4 - .add(docForSoftClassification(indexName, "blue", true, 0.0)) // #5 - .add(docForSoftClassification(indexName, "green", true, 0.0)) // #6 - .add(docForSoftClassification(indexName, "green", true, 0.0)) // #7 - .add(docForSoftClassification(indexName, "green", true, 0.0)) // #8 - .add(docForSoftClassification(indexName, "green", true, 1.0)); // #9 + .add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #0 + .add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #1 + .add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #2 + .add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #3 + .add(docForOutlierDetection(indexName, "blue", true, 0.0)) // #4 + .add(docForOutlierDetection(indexName, "blue", true, 0.0)) // #5 + .add(docForOutlierDetection(indexName, "green", true, 0.0)) // #6 + .add(docForOutlierDetection(indexName, "green", true, 0.0)) // #7 + .add(docForOutlierDetection(indexName, "green", true, 0.0)) // #8 + .add(docForOutlierDetection(indexName, "green", true, 1.0)); // #9 highLevelClient().bulk(bulk, RequestOptions.DEFAULT); MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); @@ -1817,11 +1816,11 @@ public void testEvaluateDataFrame_BinarySoftClassification_WithQuery() throws IO indexName, // Request only "blue" subset to be evaluated new QueryConfig(QueryBuilders.termQuery(datasetField, "blue")), - new BinarySoftClassification(actualField, probabilityField, ConfusionMatrixMetric.at(0.5))); + new OutlierDetection(actualField, probabilityField, ConfusionMatrixMetric.at(0.5))); EvaluateDataFrameResponse evaluateDataFrameResponse = execute(evaluateDataFrameRequest, machineLearningClient::evaluateDataFrame, machineLearningClient::evaluateDataFrameAsync); - assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(BinarySoftClassification.NAME)); + assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(OutlierDetection.NAME)); assertThat(evaluateDataFrameResponse.getMetrics().size(), equalTo(1)); ConfusionMatrixMetric.Result confusionMatrixResult = evaluateDataFrameResponse.getMetricByName(ConfusionMatrixMetric.NAME); @@ -2093,7 +2092,7 @@ private static XContentBuilder defaultMappingForTest() throws IOException { private static final String actualField = "label"; private static final String probabilityField = "p"; - private static XContentBuilder mappingForSoftClassification() throws IOException { + private static XContentBuilder mappingForOutlierDetection() throws IOException { return XContentFactory.jsonBuilder().startObject() .startObject("properties") .startObject(datasetField) @@ -2109,7 +2108,7 @@ private static XContentBuilder mappingForSoftClassification() throws IOException .endObject(); } - private static IndexRequest docForSoftClassification(String indexName, String dataset, boolean isTrue, double p) { + private static IndexRequest docForOutlierDetection(String indexName, String dataset, boolean isTrue, double p) { return new IndexRequest() .index(indexName) .source(XContentType.JSON, datasetField, dataset, actualField, Boolean.toString(isTrue), probabilityField, p); @@ -2165,11 +2164,11 @@ private void createIndex(String indexName, XContentBuilder mapping) throws IOExc public void testExplainDataFrameAnalytics() throws IOException { String indexName = "explain-df-test-index"; - createIndex(indexName, mappingForSoftClassification()); + createIndex(indexName, mappingForOutlierDetection()); BulkRequest bulk1 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); for (int i = 0; i < 10; ++i) { - bulk1.add(docForSoftClassification(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true))); + bulk1.add(docForOutlierDetection(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true))); } highLevelClient().bulk(bulk1, RequestOptions.DEFAULT); @@ -2178,7 +2177,7 @@ public void testExplainDataFrameAnalytics() throws IOException { new ExplainDataFrameAnalyticsRequest( DataFrameAnalyticsConfig.builder() .setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build()); // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow. @@ -2200,7 +2199,7 @@ public void testExplainDataFrameAnalytics() throws IOException { BulkRequest bulk2 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); for (int i = 10; i < 100; ++i) { - bulk2.add(docForSoftClassification(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true))); + bulk2.add(docForOutlierDetection(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true))); } highLevelClient().bulk(bulk2, RequestOptions.DEFAULT); diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java index ea26ef7ce912e..03065248efca7 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java @@ -56,7 +56,6 @@ import org.elasticsearch.client.ilm.ShrinkAction; import org.elasticsearch.client.ilm.UnfollowAction; import org.elasticsearch.client.ml.dataframe.DataFrameAnalysis; -import org.elasticsearch.client.ml.dataframe.OutlierDetection; import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyMetric; import org.elasticsearch.client.ml.dataframe.evaluation.classification.Classification; import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric; @@ -65,11 +64,11 @@ import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric; import org.elasticsearch.client.ml.dataframe.stats.classification.ClassificationStats; import org.elasticsearch.client.ml.dataframe.stats.outlierdetection.OutlierDetectionStats; import org.elasticsearch.client.ml.dataframe.stats.regression.RegressionStats; @@ -742,7 +741,7 @@ public void testProvidedNamedXContents() { assertTrue(names.contains(FreezeAction.NAME)); assertTrue(names.contains(SetPriorityAction.NAME)); assertEquals(Integer.valueOf(3), categories.get(DataFrameAnalysis.class)); - assertTrue(names.contains(OutlierDetection.NAME.getPreferredName())); + assertTrue(names.contains(org.elasticsearch.client.ml.dataframe.OutlierDetection.NAME.getPreferredName())); assertTrue(names.contains(org.elasticsearch.client.ml.dataframe.Regression.NAME.getPreferredName())); assertTrue(names.contains(org.elasticsearch.client.ml.dataframe.Classification.NAME.getPreferredName())); assertTrue(names.contains(OutlierDetectionStats.NAME.getPreferredName())); @@ -751,14 +750,14 @@ public void testProvidedNamedXContents() { assertEquals(Integer.valueOf(1), categories.get(SyncConfig.class)); assertTrue(names.contains(TimeSyncConfig.NAME)); assertEquals(Integer.valueOf(3), categories.get(org.elasticsearch.client.ml.dataframe.evaluation.Evaluation.class)); - assertThat(names, hasItems(BinarySoftClassification.NAME, Classification.NAME, Regression.NAME)); + assertThat(names, hasItems(OutlierDetection.NAME, Classification.NAME, Regression.NAME)); assertEquals(Integer.valueOf(12), categories.get(org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric.class)); assertThat(names, hasItems( - registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME), - registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME), - registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME), - registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME), + registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME), + registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME), + registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME), + registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME), registeredMetricName(Classification.NAME, AccuracyMetric.NAME), registeredMetricName( Classification.NAME, org.elasticsearch.client.ml.dataframe.evaluation.classification.PrecisionMetric.NAME), @@ -772,10 +771,10 @@ public void testProvidedNamedXContents() { assertEquals(Integer.valueOf(12), categories.get(org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric.Result.class)); assertThat(names, hasItems( - registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME), - registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME), - registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME), - registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME), + registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME), + registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME), + registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME), + registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME), registeredMetricName(Classification.NAME, AccuracyMetric.NAME), registeredMetricName( Classification.NAME, org.elasticsearch.client.ml.dataframe.evaluation.classification.PrecisionMetric.NAME), diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index 7ccee107985bb..cd0ed4f7a1b34 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -151,7 +151,6 @@ import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsSource; import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsState; import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsStats; -import org.elasticsearch.client.ml.dataframe.OutlierDetection; import org.elasticsearch.client.ml.dataframe.QueryConfig; import org.elasticsearch.client.ml.dataframe.Regression; import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation; @@ -160,16 +159,16 @@ import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric; import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric.ActualClass; import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric.PredictedClass; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric.ConfusionMatrix; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric; +import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicErrorMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric; import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; @@ -2977,12 +2976,12 @@ public void testPutDataFrameAnalytics() throws Exception { // end::put-data-frame-analytics-dest-config // tag::put-data-frame-analytics-outlier-detection-default - DataFrameAnalysis outlierDetection = OutlierDetection.createDefault(); // <1> + DataFrameAnalysis outlierDetection = org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault(); // <1> // end::put-data-frame-analytics-outlier-detection-default // tag::put-data-frame-analytics-outlier-detection-customized - DataFrameAnalysis outlierDetectionCustomized = OutlierDetection.builder() // <1> - .setMethod(OutlierDetection.Method.DISTANCE_KNN) // <2> + DataFrameAnalysis outlierDetectionCustomized = org.elasticsearch.client.ml.dataframe.OutlierDetection.builder() // <1> + .setMethod(org.elasticsearch.client.ml.dataframe.OutlierDetection.Method.DISTANCE_KNN) // <2> .setNNeighbors(5) // <3> .setFeatureInfluenceThreshold(0.1) // <4> .setComputeFeatureInfluence(true) // <5> @@ -3351,9 +3350,9 @@ public void testEvaluateDataFrame() throws Exception { client.indices().create(createIndexRequest, RequestOptions.DEFAULT); client.bulk(bulkRequest, RequestOptions.DEFAULT); { - // tag::evaluate-data-frame-evaluation-softclassification + // tag::evaluate-data-frame-evaluation-outlierdetection Evaluation evaluation = - new BinarySoftClassification( // <1> + new OutlierDetection( // <1> "label", // <2> "p", // <3> // Evaluation metrics // <4> @@ -3361,7 +3360,7 @@ public void testEvaluateDataFrame() throws Exception { RecallMetric.at(0.5, 0.7), // <6> ConfusionMatrixMetric.at(0.5), // <7> AucRocMetric.withCurve()); // <8> - // end::evaluate-data-frame-evaluation-softclassification + // end::evaluate-data-frame-evaluation-outlierdetection // tag::evaluate-data-frame-request EvaluateDataFrameRequest request = @@ -3379,13 +3378,13 @@ public void testEvaluateDataFrame() throws Exception { List metrics = response.getMetrics(); // <1> // end::evaluate-data-frame-response - // tag::evaluate-data-frame-results-softclassification + // tag::evaluate-data-frame-results-outlierdetection PrecisionMetric.Result precisionResult = response.getMetricByName(PrecisionMetric.NAME); // <1> double precision = precisionResult.getScoreByThreshold("0.4"); // <2> ConfusionMatrixMetric.Result confusionMatrixResult = response.getMetricByName(ConfusionMatrixMetric.NAME); // <3> ConfusionMatrix confusionMatrix = confusionMatrixResult.getScoreByThreshold("0.5"); // <4> - // end::evaluate-data-frame-results-softclassification + // end::evaluate-data-frame-results-outlierdetection assertThat( metrics.stream().map(EvaluationMetric.Result::getMetricName).collect(Collectors.toList()), @@ -3400,7 +3399,7 @@ public void testEvaluateDataFrame() throws Exception { EvaluateDataFrameRequest request = new EvaluateDataFrameRequest( indexName, new QueryConfig(QueryBuilders.termQuery("dataset", "blue")), - new BinarySoftClassification( + new OutlierDetection( "label", "p", PrecisionMetric.at(0.4, 0.5, 0.6), @@ -3622,7 +3621,7 @@ public void testExplainDataFrameAnalytics() throws Exception { // tag::explain-data-frame-analytics-config-request DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); request = new ExplainDataFrameAnalyticsRequest(config); // <1> // end::explain-data-frame-analytics-config-request @@ -3652,7 +3651,7 @@ public void testExplainDataFrameAnalytics() throws Exception { { DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); // tag::explain-data-frame-analytics-execute-listener @@ -4357,6 +4356,6 @@ protected NamedXContentRegistry xContentRegistry() { .setDest(DataFrameAnalyticsDest.builder() .setIndex("put-test-dest-index") .build()) - .setAnalysis(OutlierDetection.createDefault()) + .setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault()) .build(); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java index 8cdeaf68ed648..4ac5a991701ea 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java @@ -22,7 +22,7 @@ import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation; import org.elasticsearch.client.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider; import org.elasticsearch.client.ml.dataframe.evaluation.regression.RegressionTests; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassificationTests; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetectionTests; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParser; @@ -49,7 +49,7 @@ public static EvaluateDataFrameRequest createRandom() { QueryConfig queryConfig = randomBoolean() ? new QueryConfig(QueryBuilders.termQuery(randomAlphaOfLength(10), randomAlphaOfLength(10))) : null; - Evaluation evaluation = randomBoolean() ? BinarySoftClassificationTests.createRandom() : RegressionTests.createRandom(); + Evaluation evaluation = randomBoolean() ? OutlierDetectionTests.createRandom() : RegressionTests.createRandom(); return new EvaluateDataFrameRequest(indices, queryConfig, evaluation); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameResponseTests.java index 92d3ab81bce47..7a05b904e71a1 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameResponseTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameResponseTests.java @@ -26,11 +26,11 @@ import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetricResultTests; import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetricResultTests; import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetricResultTests; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetricResultTests; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetricResultTests; -import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetricResultTests; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetricResultTests; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetricResultTests; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetricResultTests; +import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetricResultTests; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; @@ -43,10 +43,10 @@ public class EvaluateDataFrameResponseTests extends AbstractXContentTestCase { public static EvaluateDataFrameResponse randomResponse() { - String evaluationName = randomFrom(BinarySoftClassification.NAME, Classification.NAME, Regression.NAME); + String evaluationName = randomFrom(OutlierDetection.NAME, Classification.NAME, Regression.NAME); List metrics; switch (evaluationName) { - case BinarySoftClassification.NAME: + case OutlierDetection.NAME: metrics = randomSubsetOf( Arrays.asList( AucRocMetricResultTests.randomResult(), diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetricAucRocPointTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java similarity index 92% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetricAucRocPointTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java index 93f2b25a7346f..ccb12ecb05930 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetricAucRocPointTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java similarity index 86% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetricResultTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java index bd8fc8e790e81..48df63d64ebd6 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/AucRocMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; @@ -26,15 +26,13 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetricAucRocPointTests.randomPoint; - public class AucRocMetricResultTests extends AbstractXContentTestCase { public static AucRocMetric.Result randomResult() { return new AucRocMetric.Result( randomDouble(), Stream - .generate(() -> randomPoint()) + .generate(() -> AucRocMetricAucRocPointTests.randomPoint()) .limit(randomIntBetween(1, 10)) .collect(Collectors.toList())); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetricConfusionMatrixTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java similarity index 92% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetricConfusionMatrixTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java index 39897112f38d8..f4a82490d7a8d 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetricConfusionMatrixTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java similarity index 88% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetricResultTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java index 42819e077d8cc..6107563652130 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/ConfusionMatrixMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; @@ -26,7 +26,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetricConfusionMatrixTests.randomConfusionMatrix; +import static org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetricConfusionMatrixTests.randomConfusionMatrix; public class ConfusionMatrixMetricResultTests extends AbstractXContentTestCase { diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/BinarySoftClassificationTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/OutlierDetectionTests.java similarity index 74% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/BinarySoftClassificationTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/OutlierDetectionTests.java index 7fd9af2ab88ff..2f4a531551ca5 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/BinarySoftClassificationTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/OutlierDetectionTests.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric; import org.elasticsearch.client.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider; @@ -30,14 +30,14 @@ import java.util.List; import java.util.function.Predicate; -public class BinarySoftClassificationTests extends AbstractXContentTestCase { +public class OutlierDetectionTests extends AbstractXContentTestCase { @Override protected NamedXContentRegistry xContentRegistry() { return new NamedXContentRegistry(new MlEvaluationNamedXContentProvider().getNamedXContentParsers()); } - public static BinarySoftClassification createRandom() { + public static OutlierDetection createRandom() { List metrics = new ArrayList<>(); if (randomBoolean()) { metrics.add(new AucRocMetric(randomBoolean())); @@ -46,33 +46,33 @@ public static BinarySoftClassification createRandom() { metrics.add(new PrecisionMetric(Arrays.asList(randomArray(1, 4, Double[]::new, - BinarySoftClassificationTests::randomDouble)))); + OutlierDetectionTests::randomDouble)))); } if (randomBoolean()) { metrics.add(new RecallMetric(Arrays.asList(randomArray(1, 4, Double[]::new, - BinarySoftClassificationTests::randomDouble)))); + OutlierDetectionTests::randomDouble)))); } if (randomBoolean()) { metrics.add(new ConfusionMatrixMetric(Arrays.asList(randomArray(1, 4, Double[]::new, - BinarySoftClassificationTests::randomDouble)))); + OutlierDetectionTests::randomDouble)))); } return randomBoolean() ? - new BinarySoftClassification(randomAlphaOfLength(10), randomAlphaOfLength(10)) : - new BinarySoftClassification(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics.isEmpty() ? null : metrics); + new OutlierDetection(randomAlphaOfLength(10), randomAlphaOfLength(10)) : + new OutlierDetection(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics.isEmpty() ? null : metrics); } @Override - protected BinarySoftClassification createTestInstance() { + protected OutlierDetection createTestInstance() { return createRandom(); } @Override - protected BinarySoftClassification doParseInstance(XContentParser parser) throws IOException { - return BinarySoftClassification.fromXContent(parser); + protected OutlierDetection doParseInstance(XContentParser parser) throws IOException { + return OutlierDetection.fromXContent(parser); } @Override diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/PrecisionMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java similarity index 94% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/PrecisionMetricResultTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java index 7ece003ef22e0..5dbde36bc0bfd 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/PrecisionMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/RecallMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java similarity index 94% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/RecallMetricResultTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java index 85d9b38075e21..44dc414773a54 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/softclassification/RecallMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractXContentTestCase; diff --git a/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc b/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc index 72e27b0848d78..10bc5bae7827d 100644 --- a/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc +++ b/docs/java-rest/high-level/ml/evaluate-data-frame.asciidoc @@ -25,13 +25,13 @@ include-tagged::{doc-tests-file}[{api}-request] ==== Evaluation Evaluation to be performed. -Currently, supported evaluations include: +BinarySoftClassification+, +Classification+, +Regression+. +Currently, supported evaluations include: +OutlierDetection+, +Classification+, +Regression+. -===== Binary soft classification +===== Outlier detection ["source","java",subs="attributes,callouts,macros"] -------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-evaluation-softclassification] +include-tagged::{doc-tests-file}[{api}-evaluation-outlierdetection] -------------------------------------------------- <1> Constructing a new evaluation <2> Name of the field in the index. Its value denotes the actual (i.e. ground truth) label for an example. Must be either true or false. @@ -87,11 +87,11 @@ include-tagged::{doc-tests-file}[{api}-response] ==== Results -===== Binary soft classification +===== Outlier detection ["source","java",subs="attributes,callouts,macros"] -------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-results-softclassification] +include-tagged::{doc-tests-file}[{api}-results-outlierdetection] -------------------------------------------------- <1> Fetching precision metric by name diff --git a/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc index 0f628a34a91cd..67acc71c93c5e 100644 --- a/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc @@ -22,20 +22,20 @@ experimental[] [[ml-evaluate-dfanalytics-prereq]] == {api-prereq-title} -If the {es} {security-features} are enabled, you must have the following +If the {es} {security-features} are enabled, you must have the following privileges: * cluster: `monitor_ml` - + For more information, see <> and <>. [[ml-evaluate-dfanalytics-desc]] == {api-description-title} -The API packages together commonly used evaluation metrics for various types of -machine learning features. This has been designed for use on indexes created by -{dfanalytics}. Evaluation requires both a ground truth field and an analytics +The API packages together commonly used evaluation metrics for various types of +machine learning features. This has been designed for use on indexes created by +{dfanalytics}. Evaluation requires both a ground truth field and an analytics result field to be present. @@ -49,7 +49,7 @@ See <>. -- Available evaluation types: -* `binary_soft_classification` +* `outlier_detection` * `regression` * `classification` @@ -66,33 +66,31 @@ source index. See <>. [[ml-evaluate-dfanalytics-resources]] == {dfanalytics-cap} evaluation resources -[[binary-sc-resources]] -=== Binary soft classification evaluation objects +[[oldetection-resources]] +=== {oldetection-cap} evaluation objects -Binary soft classification evaluates the results of an analysis which outputs -the probability that each document belongs to a certain class. For example, in -the context of {oldetection}, the analysis outputs the probability whether each -document is an outlier. +{oldetection-cap} evaluates the results of an {oldetection} analysis which outputs +the probability that each document is an outlier. `actual_field`:: - (Required, string) The field of the `index` which contains the `ground truth`. - The data type of this field can be boolean or integer. If the data type is + (Required, string) The field of the `index` which contains the `ground truth`. + The data type of this field can be boolean or integer. If the data type is integer, the value has to be either `0` (false) or `1` (true). `predicted_probability_field`:: - (Required, string) The field of the `index` that defines the probability of - whether the item belongs to the class in question or not. It's the field that + (Required, string) The field of the `index` that defines the probability of + whether the item belongs to the class in question or not. It's the field that contains the results of the analysis. `metrics`:: - (Optional, object) Specifies the metrics that are used for the evaluation. + (Optional, object) Specifies the metrics that are used for the evaluation. Available metrics: - + `auc_roc`::: - (Optional, object) The AUC ROC (area under the curve of the receiver - operating characteristic) score and optionally the curve. Default value is + (Optional, object) The AUC ROC (area under the curve of the receiver + operating characteristic) score and optionally the curve. Default value is {"includes_curve": false}. - + `confusion_matrix`::: (Optional, object) Set the different thresholds of the {olscore} at where the metrics (`tp` - true positive, `fp` - false positive, `tn` - true @@ -100,28 +98,28 @@ document is an outlier. {"at": [0.25, 0.50, 0.75]}. `precision`::: - (Optional, object) Set the different thresholds of the {olscore} at where + (Optional, object) Set the different thresholds of the {olscore} at where the metric is calculated. Default value is {"at": [0.25, 0.50, 0.75]}. - + `recall`::: - (Optional, object) Set the different thresholds of the {olscore} at where + (Optional, object) Set the different thresholds of the {olscore} at where the metric is calculated. Default value is {"at": [0.25, 0.50, 0.75]}. - + [[regression-evaluation-resources]] === {regression-cap} evaluation objects -{regression-cap} evaluation evaluates the results of a {regression} analysis +{regression-cap} evaluation evaluates the results of a {regression} analysis which outputs a prediction of values. `actual_field`:: - (Required, string) The field of the `index` which contains the `ground truth`. + (Required, string) The field of the `index` which contains the `ground truth`. The data type of this field must be numerical. - + `predicted_field`:: - (Required, string) The field in the `index` that contains the predicted value, + (Required, string) The field in the `index` that contains the predicted value, in other words the results of the {regression} analysis. - + `metrics`:: (Optional, object) Specifies the metrics that are used for the evaluation. Available metrics: @@ -143,20 +141,20 @@ which outputs a prediction of values. For more information, read https://en.wikipedia.org/wiki/Coefficient_of_determination[this wiki article]. - + [[classification-evaluation-resources]] == {classification-cap} evaluation objects -{classification-cap} evaluation evaluates the results of a {classanalysis} which -outputs a prediction that identifies to which of the classes each document +{classification-cap} evaluation evaluates the results of a {classanalysis} which +outputs a prediction that identifies to which of the classes each document belongs. `actual_field`:: (Required, string) The field of the `index` which contains the `ground truth`. The data type of this field must be categorical. - + `predicted_field`:: - (Required, string) The field in the `index` that contains the predicted value, + (Required, string) The field in the `index` that contains the predicted value, in other words the results of the {classanalysis}. `metrics`:: @@ -180,14 +178,14 @@ belongs. [[ml-evaluate-dfanalytics-results]] == {api-response-body-title} -`binary_soft_classification`:: - (object) If you chose to do binary soft classification, the API returns the +`outlier_detection`:: + (object) If you chose to do outlier detection, the API returns the following evaluation metrics: - + `auc_roc`::: TBD `confusion_matrix`::: TBD - + `precision`::: TBD `recall`::: TBD @@ -198,8 +196,8 @@ belongs. == {api-examples-title} -[[ml-evaluate-binary-soft-class-example]] -=== Binary soft classification +[[ml-evaluate-binary-oldetection-example]] +=== {oldetection-cap} [source,console] -------------------------------------------------- @@ -207,7 +205,7 @@ POST _ml/data_frame/_evaluate { "index": "my_analytics_dest_index", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "ml.outlier_score" } @@ -221,7 +219,7 @@ The API returns the following results: [source,console-result] ---- { - "binary_soft_classification": { + "outlier_detection": { "auc_roc": { "score": 0.92584757746414444 }, @@ -276,10 +274,10 @@ POST _ml/data_frame/_evaluate } }, "evaluation": { - "regression": { + "regression": { "actual_field": "price", <3> "predicted_field": "ml.price_prediction", <4> - "metrics": { + "metrics": { "r_squared": {}, "mse": {} } @@ -290,10 +288,10 @@ POST _ml/data_frame/_evaluate // TEST[skip:TBD] <1> The output destination index from a {dfanalytics} {reganalysis}. -<2> In this example, a test/train split (`training_percent`) was defined for the -{reganalysis}. This query limits evaluation to be performed on the test split -only. -<3> The ground truth value for the actual house price. This is required in order +<2> In this example, a test/train split (`training_percent`) was defined for the +{reganalysis}. This query limits evaluation to be performed on the test split +only. +<3> The ground truth value for the actual house price. This is required in order to evaluate results. <4> The predicted value for house price calculated by the {reganalysis}. @@ -313,10 +311,10 @@ POST _ml/data_frame/_evaluate } }, "evaluation": { - "regression": { + "regression": { "actual_field": "G3", <2> "predicted_field": "ml.G3_prediction", <3> - "metrics": { + "metrics": { "r_squared": {}, "mse": {} } @@ -326,17 +324,17 @@ POST _ml/data_frame/_evaluate -------------------------------------------------- // TEST[skip:TBD] -<1> In this example, a test/train split (`training_percent`) was defined for the -{reganalysis}. This query limits evaluation to be performed on the train split +<1> In this example, a test/train split (`training_percent`) was defined for the +{reganalysis}. This query limits evaluation to be performed on the train split only. It means that a training error will be calculated. -<2> The field that contains the ground truth value for the actual student +<2> The field that contains the ground truth value for the actual student performance. This is required in order to evaluate results. -<3> The field that contains the predicted value for student performance +<3> The field that contains the predicted value for student performance calculated by the {reganalysis}. -The next example calculates the testing error. The only difference compared with -the previous example is that `ml.is_training` is set to `false` this time, so +The next example calculates the testing error. The only difference compared with +the previous example is that `ml.is_training` is set to `false` this time, so the query excludes the train split from the evaluation. [source,console] @@ -352,10 +350,10 @@ POST _ml/data_frame/_evaluate } }, "evaluation": { - "regression": { + "regression": { "actual_field": "G3", <2> "predicted_field": "ml.G3_prediction", <3> - "metrics": { + "metrics": { "r_squared": {}, "mse": {} } @@ -365,12 +363,12 @@ POST _ml/data_frame/_evaluate -------------------------------------------------- // TEST[skip:TBD] -<1> In this example, a test/train split (`training_percent`) was defined for the -{reganalysis}. This query limits evaluation to be performed on the test split +<1> In this example, a test/train split (`training_percent`) was defined for the +{reganalysis}. This query limits evaluation to be performed on the test split only. It means that a testing error will be calculated. -<2> The field that contains the ground truth value for the actual student +<2> The field that contains the ground truth value for the actual student performance. This is required in order to evaluate results. -<3> The field that contains the predicted value for student performance +<3> The field that contains the predicted value for student performance calculated by the {reganalysis}. @@ -381,13 +379,13 @@ calculated by the {reganalysis}. [source,console] -------------------------------------------------- POST _ml/data_frame/_evaluate -{ +{ "index": "animal_classification", "evaluation": { "classification": { <1> "actual_field": "animal_class", <2> "predicted_field": "ml.animal_class_prediction", <3> - "metrics": { + "metrics": { "multiclass_confusion_matrix" : {} <4> } } @@ -397,9 +395,9 @@ POST _ml/data_frame/_evaluate // TEST[skip:TBD] <1> The evaluation type. -<2> The field that contains the ground truth value for the actual animal +<2> The field that contains the ground truth value for the actual animal classification. This is required in order to evaluate results. -<3> The field that contains the predicted value for animal classification by +<3> The field that contains the predicted value for animal classification by the {classanalysis}. <4> Specifies the metric for the evaluation. @@ -450,9 +448,9 @@ The API returns the following result: -------------------------------------------------- <1> The name of the actual class that the analysis tried to predict. <2> The number of documents in the index that belong to the `actual_class`. -<3> This object contains the list of the predicted classes and the number of +<3> This object contains the list of the predicted classes and the number of predictions associated with the class. <4> The number of cats in the dataset that are correctly identified as cats. <5> The number of cats in the dataset that are incorrectly classified as dogs. -<6> The number of documents that are classified as a class that is not listed as +<6> The number of documents that are classified as a class that is not listed as a `predicted_class`. diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java index 300b0b968ee7e..009b4fa22890d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/MlEvaluationNamedXContentProvider.java @@ -12,17 +12,17 @@ import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.Accuracy; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.Classification; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.MulticlassConfusionMatrix; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.AucRoc; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.ConfusionMatrix; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.Precision; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.Recall; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.ScoreByThresholdResult; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.Huber; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.MeanSquaredError; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicError; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.RSquared; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.Regression; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.AucRoc; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassification; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ConfusionMatrix; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Precision; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult; import java.util.Arrays; import java.util.List; @@ -57,22 +57,22 @@ public static String registeredMetricName(String evaluationName, String metricNa public List getNamedXContentParsers() { return Arrays.asList( // Evaluations - new NamedXContentRegistry.Entry(Evaluation.class, BinarySoftClassification.NAME, BinarySoftClassification::fromXContent), + new NamedXContentRegistry.Entry(Evaluation.class, OutlierDetection.NAME, OutlierDetection::fromXContent), new NamedXContentRegistry.Entry(Evaluation.class, Classification.NAME, Classification::fromXContent), new NamedXContentRegistry.Entry(Evaluation.class, Regression.NAME, Regression::fromXContent), - // Soft classification metrics + // Outlier detection metrics new NamedXContentRegistry.Entry(EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, AucRoc.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, AucRoc.NAME)), AucRoc::fromXContent), new NamedXContentRegistry.Entry(EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, Precision.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, Precision.NAME)), Precision::fromXContent), new NamedXContentRegistry.Entry(EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, Recall.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, Recall.NAME)), Recall::fromXContent), new NamedXContentRegistry.Entry(EvaluationMetric.class, - new ParseField(registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrix.NAME)), + new ParseField(registeredMetricName(OutlierDetection.NAME, ConfusionMatrix.NAME)), ConfusionMatrix::fromXContent), // Classification metrics @@ -113,8 +113,8 @@ public static List getNamedWriteables() { return Arrays.asList( // Evaluations new NamedWriteableRegistry.Entry(Evaluation.class, - BinarySoftClassification.NAME.getPreferredName(), - BinarySoftClassification::new), + OutlierDetection.NAME.getPreferredName(), + OutlierDetection::new), new NamedWriteableRegistry.Entry(Evaluation.class, Classification.NAME.getPreferredName(), Classification::new), @@ -124,16 +124,16 @@ public static List getNamedWriteables() { // Evaluation metrics new NamedWriteableRegistry.Entry(EvaluationMetric.class, - registeredMetricName(BinarySoftClassification.NAME, AucRoc.NAME), + registeredMetricName(OutlierDetection.NAME, AucRoc.NAME), AucRoc::new), new NamedWriteableRegistry.Entry(EvaluationMetric.class, - registeredMetricName(BinarySoftClassification.NAME, Precision.NAME), + registeredMetricName(OutlierDetection.NAME, Precision.NAME), Precision::new), new NamedWriteableRegistry.Entry(EvaluationMetric.class, - registeredMetricName(BinarySoftClassification.NAME, Recall.NAME), + registeredMetricName(OutlierDetection.NAME, Recall.NAME), Recall::new), new NamedWriteableRegistry.Entry(EvaluationMetric.class, - registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrix.NAME), + registeredMetricName(OutlierDetection.NAME, ConfusionMatrix.NAME), ConfusionMatrix::new), new NamedWriteableRegistry.Entry(EvaluationMetric.class, registeredMetricName(Classification.NAME, MulticlassConfusionMatrix.NAME), @@ -164,13 +164,13 @@ public static List getNamedWriteables() { // Evaluation metrics results new NamedWriteableRegistry.Entry(EvaluationMetricResult.class, - registeredMetricName(BinarySoftClassification.NAME, AucRoc.NAME), + registeredMetricName(OutlierDetection.NAME, AucRoc.NAME), AucRoc.Result::new), new NamedWriteableRegistry.Entry(EvaluationMetricResult.class, - registeredMetricName(BinarySoftClassification.NAME, ScoreByThresholdResult.NAME), + registeredMetricName(OutlierDetection.NAME, ScoreByThresholdResult.NAME), ScoreByThresholdResult::new), new NamedWriteableRegistry.Entry(EvaluationMetricResult.class, - registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrix.NAME), + registeredMetricName(OutlierDetection.NAME, ConfusionMatrix.NAME), ConfusionMatrix.Result::new), new NamedWriteableRegistry.Entry(EvaluationMetricResult.class, registeredMetricName(Classification.NAME, MulticlassConfusionMatrix.NAME), diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AbstractConfusionMatrixMetric.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AbstractConfusionMatrixMetric.java similarity index 97% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AbstractConfusionMatrixMetric.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AbstractConfusionMatrixMetric.java index fdd7cd2ce2d6c..3fc5f3e73a038 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AbstractConfusionMatrixMetric.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AbstractConfusionMatrixMetric.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.collect.Tuple; @@ -26,7 +26,7 @@ import java.util.List; import java.util.Optional; -import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassification.actualIsTrueQuery; +import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection.actualIsTrueQuery; abstract class AbstractConfusionMatrixMetric implements EvaluationMetric { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AucRoc.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AucRoc.java similarity index 97% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AucRoc.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AucRoc.java index db0f9b95d1380..9c95754715f7c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AucRoc.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AucRoc.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; @@ -38,7 +38,7 @@ import java.util.stream.IntStream; import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider.registeredMetricName; -import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassification.actualIsTrueQuery; +import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection.actualIsTrueQuery; /** * Area under the curve (AUC) of the receiver operating characteristic (ROC). @@ -93,7 +93,7 @@ public AucRoc(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override @@ -339,7 +339,7 @@ public Result(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ConfusionMatrix.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ConfusionMatrix.java similarity index 96% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ConfusionMatrix.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ConfusionMatrix.java index 1b1d5b8f9d170..294f8fff2436e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ConfusionMatrix.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ConfusionMatrix.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; @@ -48,7 +48,7 @@ public ConfusionMatrix(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override @@ -131,7 +131,7 @@ public Result(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/BinarySoftClassification.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java similarity index 80% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/BinarySoftClassification.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java index 8d4f4f01d02cd..b5f842755c518 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/BinarySoftClassification.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; @@ -26,20 +26,18 @@ import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider.registeredMetricName; /** - * Evaluation of binary soft classification methods, e.g. outlier detection. - * This is useful to evaluate problems where a model outputs a probability of whether - * a data frame row belongs to one of two groups. + * Evaluation of outlier detection results. */ -public class BinarySoftClassification implements Evaluation { +public class OutlierDetection implements Evaluation { - public static final ParseField NAME = new ParseField("binary_soft_classification"); + public static final ParseField NAME = new ParseField("outlier_detection"); private static final ParseField ACTUAL_FIELD = new ParseField("actual_field"); private static final ParseField PREDICTED_PROBABILITY_FIELD = new ParseField("predicted_probability_field"); private static final ParseField METRICS = new ParseField("metrics"); - public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( - NAME.getPreferredName(), a -> new BinarySoftClassification((String) a[0], (String) a[1], (List) a[2])); + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + NAME.getPreferredName(), a -> new OutlierDetection((String) a[0], (String) a[1], (List) a[2])); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), ACTUAL_FIELD); @@ -48,7 +46,7 @@ public class BinarySoftClassification implements Evaluation { (p, c, n) -> p.namedObject(EvaluationMetric.class, registeredMetricName(NAME.getPreferredName(), n), c), METRICS); } - public static BinarySoftClassification fromXContent(XContentParser parser) { + public static OutlierDetection fromXContent(XContentParser parser) { return PARSER.apply(parser, null); } @@ -72,11 +70,11 @@ static QueryBuilder actualIsTrueQuery(String actualField) { */ private final List metrics; - public BinarySoftClassification(String actualField, String predictedProbabilityField, - @Nullable List metrics) { + public OutlierDetection(String actualField, String predictedProbabilityField, + @Nullable List metrics) { this.actualField = ExceptionsHelper.requireNonNull(actualField, ACTUAL_FIELD); this.predictedProbabilityField = ExceptionsHelper.requireNonNull(predictedProbabilityField, PREDICTED_PROBABILITY_FIELD); - this.metrics = initMetrics(metrics, BinarySoftClassification::defaultMetrics); + this.metrics = initMetrics(metrics, OutlierDetection::defaultMetrics); } private static List defaultMetrics() { @@ -87,7 +85,7 @@ private static List defaultMetrics() { new ConfusionMatrix(Arrays.asList(0.25, 0.5, 0.75))); } - public BinarySoftClassification(StreamInput in) throws IOException { + public OutlierDetection(StreamInput in) throws IOException { this.actualField = in.readString(); this.predictedProbabilityField = in.readString(); this.metrics = in.readNamedWriteableList(EvaluationMetric.class); @@ -145,7 +143,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - BinarySoftClassification that = (BinarySoftClassification) o; + OutlierDetection that = (OutlierDetection) o; return Objects.equals(actualField, that.actualField) && Objects.equals(predictedProbabilityField, that.predictedProbabilityField) && Objects.equals(metrics, that.metrics); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/Precision.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/Precision.java similarity index 95% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/Precision.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/Precision.java index d05ddb5fc4c9b..cd95f4ccc0558 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/Precision.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/Precision.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; @@ -46,7 +46,7 @@ public Precision(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/Recall.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/Recall.java similarity index 95% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/Recall.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/Recall.java index 2dd44aff6715d..9b115b8fa5885 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/Recall.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/Recall.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; @@ -46,7 +46,7 @@ public Recall(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ScoreByThresholdResult.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ScoreByThresholdResult.java similarity index 93% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ScoreByThresholdResult.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ScoreByThresholdResult.java index 8fdb06bde4d6e..7d7904a8f3394 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ScoreByThresholdResult.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ScoreByThresholdResult.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; @@ -39,7 +39,7 @@ public ScoreByThresholdResult(StreamInput in) throws IOException { @Override public String getWriteableName() { - return registeredMetricName(BinarySoftClassification.NAME, NAME); + return registeredMetricName(OutlierDetection.NAME, NAME); } @Override diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EvaluateDataFrameActionRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EvaluateDataFrameActionRequestTests.java index dd485341cb7fd..b48fd5bec7e59 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EvaluateDataFrameActionRequestTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EvaluateDataFrameActionRequestTests.java @@ -16,8 +16,9 @@ import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction.Request; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.Evaluation; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.ClassificationTests; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.RegressionTests; -import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassificationTests; +import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetectionTests; import org.elasticsearch.xpack.core.ml.utils.QueryProvider; import java.io.IOException; @@ -60,7 +61,8 @@ protected Request createTestInstance() { throw new UncheckedIOException(e); } } - Evaluation evaluation = randomBoolean() ? BinarySoftClassificationTests.createRandom() : RegressionTests.createRandom(); + Evaluation evaluation = + randomFrom(OutlierDetectionTests.createRandom(), ClassificationTests.createRandom(), RegressionTests.createRandom()); return new Request() .setIndices(indices) .setQueryProvider(queryProvider) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AucRocTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AucRocTests.java similarity index 98% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AucRocTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AucRocTests.java index 6f8ca9339715d..610b4830b57d2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/AucRocTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/AucRocTests.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.XContentParser; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ConfusionMatrixTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixTests.java similarity index 96% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ConfusionMatrixTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixTests.java index 84194bd0bac09..efb5002922bf3 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/ConfusionMatrixTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixTests.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/BinarySoftClassificationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetectionTests.java similarity index 78% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/BinarySoftClassificationTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetectionTests.java index 2a9645e094291..cc560e6495927 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/BinarySoftClassificationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetectionTests.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; @@ -27,7 +27,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; -public class BinarySoftClassificationTests extends AbstractSerializingTestCase { +public class OutlierDetectionTests extends AbstractSerializingTestCase { private static final EvaluationParameters EVALUATION_PARAMETERS = new EvaluationParameters(100); @@ -41,7 +41,7 @@ protected NamedXContentRegistry xContentRegistry() { return new NamedXContentRegistry(new MlEvaluationNamedXContentProvider().getNamedXContentParsers()); } - public static BinarySoftClassification createRandom() { + public static OutlierDetection createRandom() { List metrics = new ArrayList<>(); if (randomBoolean()) { metrics.add(AucRocTests.createRandom()); @@ -62,28 +62,28 @@ public static BinarySoftClassification createRandom() { metrics.add(RecallTests.createRandom()); metrics.add(ConfusionMatrixTests.createRandom()); } - return new BinarySoftClassification(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics); + return new OutlierDetection(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics); } @Override - protected BinarySoftClassification doParseInstance(XContentParser parser) throws IOException { - return BinarySoftClassification.fromXContent(parser); + protected OutlierDetection doParseInstance(XContentParser parser) throws IOException { + return OutlierDetection.fromXContent(parser); } @Override - protected BinarySoftClassification createTestInstance() { + protected OutlierDetection createTestInstance() { return createRandom(); } @Override - protected Writeable.Reader instanceReader() { - return BinarySoftClassification::new; + protected Writeable.Reader instanceReader() { + return OutlierDetection::new; } public void testConstructor_GivenEmptyMetrics() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new BinarySoftClassification("foo", "bar", Collections.emptyList())); - assertThat(e.getMessage(), equalTo("[binary_soft_classification] must have one or more metrics")); + () -> new OutlierDetection("foo", "bar", Collections.emptyList())); + assertThat(e.getMessage(), equalTo("[outlier_detection] must have one or more metrics")); } public void testBuildSearch() { @@ -99,7 +99,7 @@ public void testBuildSearch() { .filter(QueryBuilders.termQuery("field_A", "some-value")) .filter(QueryBuilders.termQuery("field_B", "some-other-value"))); - BinarySoftClassification evaluation = new BinarySoftClassification("act", "prob", Arrays.asList(new Precision(Arrays.asList(0.7)))); + OutlierDetection evaluation = new OutlierDetection("act", "prob", Arrays.asList(new Precision(Arrays.asList(0.7)))); SearchSourceBuilder searchSourceBuilder = evaluation.buildSearch(EVALUATION_PARAMETERS, userProvidedQuery); assertThat(searchSourceBuilder.query(), equalTo(expectedSearchQuery)); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/PrecisionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/PrecisionTests.java similarity index 97% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/PrecisionTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/PrecisionTests.java index faf7c9ac0e7f2..3755ff9a60f4d 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/PrecisionTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/PrecisionTests.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/RecallTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/RecallTests.java similarity index 97% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/RecallTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/RecallTests.java index 343d19059551c..39825ecd3ef0e 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/softclassification/RecallTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/RecallTests.java @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification; +package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index 4d9cf149867cc..bbe63e34b11bb 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -104,17 +104,17 @@ integTest.runner { 'ml/evaluate_data_frame/Test given missing index', 'ml/evaluate_data_frame/Test given index does not exist', 'ml/evaluate_data_frame/Test given missing evaluation', - 'ml/evaluate_data_frame/Test binary_soft_classification auc_roc given actual_field is always true', - 'ml/evaluate_data_frame/Test binary_soft_classification auc_roc given actual_field is always false', - 'ml/evaluate_data_frame/Test binary_soft_classification given evaluation with empty metrics', - 'ml/evaluate_data_frame/Test binary_soft_classification given missing actual_field', - 'ml/evaluate_data_frame/Test binary_soft_classification given missing predicted_probability_field', - 'ml/evaluate_data_frame/Test binary_soft_classification given precision with threshold less than zero', - 'ml/evaluate_data_frame/Test binary_soft_classification given recall with threshold less than zero', - 'ml/evaluate_data_frame/Test binary_soft_classification given confusion_matrix with threshold less than zero', - 'ml/evaluate_data_frame/Test binary_soft_classification given precision with empty thresholds', - 'ml/evaluate_data_frame/Test binary_soft_classification given recall with empty thresholds', - 'ml/evaluate_data_frame/Test binary_soft_classification given confusion_matrix with empty thresholds', + 'ml/evaluate_data_frame/Test outlier_detection auc_roc given actual_field is always true', + 'ml/evaluate_data_frame/Test outlier_detection auc_roc given actual_field is always false', + 'ml/evaluate_data_frame/Test outlier_detection given evaluation with empty metrics', + 'ml/evaluate_data_frame/Test outlier_detection given missing actual_field', + 'ml/evaluate_data_frame/Test outlier_detection given missing predicted_probability_field', + 'ml/evaluate_data_frame/Test outlier_detection given precision with threshold less than zero', + 'ml/evaluate_data_frame/Test outlier_detection given recall with threshold less than zero', + 'ml/evaluate_data_frame/Test outlier_detection given confusion_matrix with threshold less than zero', + 'ml/evaluate_data_frame/Test outlier_detection given precision with empty thresholds', + 'ml/evaluate_data_frame/Test outlier_detection given recall with empty thresholds', + 'ml/evaluate_data_frame/Test outlier_detection given confusion_matrix with empty thresholds', 'ml/evaluate_data_frame/Test classification given evaluation with empty metrics', 'ml/evaluate_data_frame/Test classification given missing actual_field', 'ml/evaluate_data_frame/Test classification given missing predicted_field', diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/evaluate_data_frame.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/evaluate_data_frame.yml index 0822b26cf914f..57eb1d1116acb 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/evaluate_data_frame.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/evaluate_data_frame.yml @@ -149,14 +149,14 @@ setup: indices.refresh: {} --- -"Test binary_soft_classification auc_roc": +"Test outlier_detection auc_roc": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -165,18 +165,18 @@ setup: } } } - - match: { binary_soft_classification.auc_roc.score: 0.9899 } - - is_false: binary_soft_classification.auc_roc.curve + - match: { outlier_detection.auc_roc.score: 0.9899 } + - is_false: outlier_detection.auc_roc.curve --- -"Test binary_soft_classification auc_roc given actual_field is int": +"Test outlier_detection auc_roc given actual_field is int": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier_int", "predicted_probability_field": "outlier_score", "metrics": { @@ -185,18 +185,18 @@ setup: } } } - - match: { binary_soft_classification.auc_roc.score: 0.9899 } - - is_false: binary_soft_classification.auc_roc.curve + - match: { outlier_detection.auc_roc.score: 0.9899 } + - is_false: outlier_detection.auc_roc.curve --- -"Test binary_soft_classification auc_roc include curve": +"Test outlier_detection auc_roc include curve": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -205,11 +205,11 @@ setup: } } } - - match: { binary_soft_classification.auc_roc.score: 0.9899 } - - is_true: binary_soft_classification.auc_roc.curve + - match: { outlier_detection.auc_roc.score: 0.9899 } + - is_true: outlier_detection.auc_roc.curve --- -"Test binary_soft_classification auc_roc given actual_field is always true": +"Test outlier_detection auc_roc given actual_field is always true": - do: catch: /\[auc_roc\] requires at least one actual_field to have a different value than \[true\]/ ml.evaluate_data_frame: @@ -217,7 +217,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "all_true_field", "predicted_probability_field": "outlier_score", "metrics": { @@ -228,7 +228,7 @@ setup: } --- -"Test binary_soft_classification auc_roc given actual_field is always false": +"Test outlier_detection auc_roc given actual_field is always false": - do: catch: /\[auc_roc\] requires at least one actual_field to have the value \[true\]/ ml.evaluate_data_frame: @@ -236,7 +236,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "all_false_field", "predicted_probability_field": "outlier_score", "metrics": { @@ -247,14 +247,14 @@ setup: } --- -"Test binary_soft_classification precision": +"Test outlier_detection precision": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -264,20 +264,20 @@ setup: } } - match: - binary_soft_classification: + outlier_detection: precision: '0.0': 0.625 '0.5': 1.0 --- -"Test binary_soft_classification recall": +"Test outlier_detection recall": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -287,21 +287,21 @@ setup: } } - match: - binary_soft_classification: + outlier_detection: recall: '0.0': 1.0 '0.4': 0.8 '0.5': 0.6 --- -"Test binary_soft_classification confusion_matrix": +"Test outlier_detection confusion_matrix": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -311,7 +311,7 @@ setup: } } - match: - binary_soft_classification: + outlier_detection: confusion_matrix: '0.0': tp: 5 @@ -330,7 +330,7 @@ setup: fn: 2 --- -"Test binary_soft_classification with query": +"Test outlier_detection with query": - do: ml.evaluate_data_frame: body: > @@ -338,7 +338,7 @@ setup: "index": "utopia", "query": { "bool": { "filter": { "term": { "dataset": "blue" } } } }, "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -348,7 +348,7 @@ setup: } } - match: - binary_soft_classification: + outlier_detection: confusion_matrix: '0.5': tp: 0 @@ -357,29 +357,29 @@ setup: fn: 1 --- -"Test binary_soft_classification default metrics": +"Test outlier_detection default metrics": - do: ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score" } } } - - is_true: binary_soft_classification.auc_roc.score - - is_true: binary_soft_classification.precision.0\.25 - - is_true: binary_soft_classification.precision.0\.5 - - is_true: binary_soft_classification.precision.0\.75 - - is_true: binary_soft_classification.recall.0\.25 - - is_true: binary_soft_classification.recall.0\.5 - - is_true: binary_soft_classification.recall.0\.75 - - is_true: binary_soft_classification.confusion_matrix.0\.25 - - is_true: binary_soft_classification.confusion_matrix.0\.5 - - is_true: binary_soft_classification.confusion_matrix.0\.75 + - is_true: outlier_detection.auc_roc.score + - is_true: outlier_detection.precision.0\.25 + - is_true: outlier_detection.precision.0\.5 + - is_true: outlier_detection.precision.0\.75 + - is_true: outlier_detection.recall.0\.25 + - is_true: outlier_detection.recall.0\.5 + - is_true: outlier_detection.recall.0\.75 + - is_true: outlier_detection.confusion_matrix.0\.25 + - is_true: outlier_detection.confusion_matrix.0\.5 + - is_true: outlier_detection.confusion_matrix.0\.75 --- "Test given missing index": @@ -389,7 +389,7 @@ setup: body: > { "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score" } @@ -405,7 +405,7 @@ setup: { "index": "missing_index", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score" } @@ -423,15 +423,15 @@ setup: } --- -"Test binary_soft_classification given evaluation with empty metrics": +"Test outlier_detection given evaluation with empty metrics": - do: - catch: /\[binary_soft_classification\] must have one or more metrics/ + catch: /\[outlier_detection\] must have one or more metrics/ ml.evaluate_data_frame: body: > { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -441,7 +441,7 @@ setup: } --- -"Test binary_soft_classification given missing actual_field": +"Test outlier_detection given missing actual_field": - do: catch: /No documents found containing both \[missing, outlier_score\] fields/ ml.evaluate_data_frame: @@ -449,7 +449,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "missing", "predicted_probability_field": "outlier_score" } @@ -457,7 +457,7 @@ setup: } --- -"Test binary_soft_classification given missing predicted_probability_field": +"Test outlier_detection given missing predicted_probability_field": - do: catch: /No documents found containing both \[is_outlier, missing\] fields/ ml.evaluate_data_frame: @@ -465,7 +465,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "missing" } @@ -473,7 +473,7 @@ setup: } --- -"Test binary_soft_classification given precision with threshold less than zero": +"Test outlier_detection given precision with threshold less than zero": - do: catch: /\[precision.at\] values must be in \[0.0, 1.0\]/ ml.evaluate_data_frame: @@ -481,7 +481,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -492,7 +492,7 @@ setup: } --- -"Test binary_soft_classification given recall with threshold less than zero": +"Test outlier_detection given recall with threshold less than zero": - do: catch: /\[recall.at\] values must be in \[0.0, 1.0\]/ ml.evaluate_data_frame: @@ -500,7 +500,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -511,7 +511,7 @@ setup: } --- -"Test binary_soft_classification given confusion_matrix with threshold less than zero": +"Test outlier_detection given confusion_matrix with threshold less than zero": - do: catch: /\[confusion_matrix.at\] values must be in \[0.0, 1.0\]/ ml.evaluate_data_frame: @@ -519,7 +519,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -530,7 +530,7 @@ setup: } --- -"Test binary_soft_classification given precision with empty thresholds": +"Test outlier_detection given precision with empty thresholds": - do: catch: /\[precision.at\] must have at least one value/ ml.evaluate_data_frame: @@ -538,7 +538,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -549,7 +549,7 @@ setup: } --- -"Test binary_soft_classification given recall with empty thresholds": +"Test outlier_detection given recall with empty thresholds": - do: catch: /\[recall.at\] must have at least one value/ ml.evaluate_data_frame: @@ -557,7 +557,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { @@ -568,7 +568,7 @@ setup: } --- -"Test binary_soft_classification given confusion_matrix with empty thresholds": +"Test outlier_detection given confusion_matrix with empty thresholds": - do: catch: /\[confusion_matrix.at\] must have at least one value/ ml.evaluate_data_frame: @@ -576,7 +576,7 @@ setup: { "index": "utopia", "evaluation": { - "binary_soft_classification": { + "outlier_detection": { "actual_field": "is_outlier", "predicted_probability_field": "outlier_score", "metrics": { From aad119f161000e4632faccb4f195a5d0f28a67a2 Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Tue, 21 Jul 2020 10:49:49 +0200 Subject: [PATCH 2/3] Revert spurious whitespace changes --- .../ml/EvaluateDataFrameRequestTests.java | 6 +- .../classification/ClassificationTests.java | 2 +- .../AucRocMetricAucRocPointTests.java | 2 +- .../AucRocMetricResultTests.java | 4 +- ...usionMatrixMetricConfusionMatrixTests.java | 2 +- .../ConfusionMatrixMetricResultTests.java | 2 +- .../PrecisionMetricResultTests.java | 2 +- .../RecallMetricResultTests.java | 2 +- .../apis/evaluate-dfanalytics.asciidoc | 114 +++++++++--------- 9 files changed, 69 insertions(+), 67 deletions(-) diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java index 4ac5a991701ea..687b70f169cd2 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EvaluateDataFrameRequestTests.java @@ -21,8 +21,9 @@ import org.elasticsearch.client.ml.dataframe.QueryConfig; import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation; import org.elasticsearch.client.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider; -import org.elasticsearch.client.ml.dataframe.evaluation.regression.RegressionTests; +import org.elasticsearch.client.ml.dataframe.evaluation.classification.ClassificationTests; import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetectionTests; +import org.elasticsearch.client.ml.dataframe.evaluation.regression.RegressionTests; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParser; @@ -49,7 +50,8 @@ public static EvaluateDataFrameRequest createRandom() { QueryConfig queryConfig = randomBoolean() ? new QueryConfig(QueryBuilders.termQuery(randomAlphaOfLength(10), randomAlphaOfLength(10))) : null; - Evaluation evaluation = randomBoolean() ? OutlierDetectionTests.createRandom() : RegressionTests.createRandom(); + Evaluation evaluation = + randomFrom(OutlierDetectionTests.createRandom(), ClassificationTests.createRandom(), RegressionTests.createRandom()); return new EvaluateDataFrameRequest(indices, queryConfig, evaluation); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/classification/ClassificationTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/classification/ClassificationTests.java index 81691fcbb2eed..4e8ed73fd5e3a 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/classification/ClassificationTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/classification/ClassificationTests.java @@ -36,7 +36,7 @@ protected NamedXContentRegistry xContentRegistry() { return new NamedXContentRegistry(new MlEvaluationNamedXContentProvider().getNamedXContentParsers()); } - static Classification createRandom() { + public static Classification createRandom() { List metrics = randomSubsetOf( Arrays.asList( diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java index ccb12ecb05930..d85e8193cc1c4 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricAucRocPointTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java index 48df63d64ebd6..bf4e3f749a521 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/AucRocMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -32,7 +32,7 @@ public static AucRocMetric.Result randomResult() { return new AucRocMetric.Result( randomDouble(), Stream - .generate(() -> AucRocMetricAucRocPointTests.randomPoint()) + .generate(AucRocMetricAucRocPointTests::randomPoint) .limit(randomIntBetween(1, 10)) .collect(Collectors.toList())); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java index f4a82490d7a8d..058a4407932ff 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricConfusionMatrixTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java index 6107563652130..d062eb3826f99 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/ConfusionMatrixMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java index 5dbde36bc0bfd..d59631732741a 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/PrecisionMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java index 44dc414773a54..7c7b49da825fa 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/evaluation/outlierdetection/RecallMetricResultTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an diff --git a/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc index 67acc71c93c5e..6049ca58fc1a4 100644 --- a/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/evaluate-dfanalytics.asciidoc @@ -22,20 +22,20 @@ experimental[] [[ml-evaluate-dfanalytics-prereq]] == {api-prereq-title} -If the {es} {security-features} are enabled, you must have the following +If the {es} {security-features} are enabled, you must have the following privileges: * cluster: `monitor_ml` - + For more information, see <> and <>. [[ml-evaluate-dfanalytics-desc]] == {api-description-title} -The API packages together commonly used evaluation metrics for various types of -machine learning features. This has been designed for use on indexes created by -{dfanalytics}. Evaluation requires both a ground truth field and an analytics +The API packages together commonly used evaluation metrics for various types of +machine learning features. This has been designed for use on indexes created by +{dfanalytics}. Evaluation requires both a ground truth field and an analytics result field to be present. @@ -73,24 +73,24 @@ source index. See <>. the probability that each document is an outlier. `actual_field`:: - (Required, string) The field of the `index` which contains the `ground truth`. - The data type of this field can be boolean or integer. If the data type is + (Required, string) The field of the `index` which contains the `ground truth`. + The data type of this field can be boolean or integer. If the data type is integer, the value has to be either `0` (false) or `1` (true). `predicted_probability_field`:: - (Required, string) The field of the `index` that defines the probability of - whether the item belongs to the class in question or not. It's the field that + (Required, string) The field of the `index` that defines the probability of + whether the item belongs to the class in question or not. It's the field that contains the results of the analysis. `metrics`:: - (Optional, object) Specifies the metrics that are used for the evaluation. + (Optional, object) Specifies the metrics that are used for the evaluation. Available metrics: - + `auc_roc`::: - (Optional, object) The AUC ROC (area under the curve of the receiver - operating characteristic) score and optionally the curve. Default value is + (Optional, object) The AUC ROC (area under the curve of the receiver + operating characteristic) score and optionally the curve. Default value is {"includes_curve": false}. - + `confusion_matrix`::: (Optional, object) Set the different thresholds of the {olscore} at where the metrics (`tp` - true positive, `fp` - false positive, `tn` - true @@ -98,28 +98,28 @@ the probability that each document is an outlier. {"at": [0.25, 0.50, 0.75]}. `precision`::: - (Optional, object) Set the different thresholds of the {olscore} at where + (Optional, object) Set the different thresholds of the {olscore} at where the metric is calculated. Default value is {"at": [0.25, 0.50, 0.75]}. - + `recall`::: - (Optional, object) Set the different thresholds of the {olscore} at where + (Optional, object) Set the different thresholds of the {olscore} at where the metric is calculated. Default value is {"at": [0.25, 0.50, 0.75]}. - + [[regression-evaluation-resources]] === {regression-cap} evaluation objects -{regression-cap} evaluation evaluates the results of a {regression} analysis +{regression-cap} evaluation evaluates the results of a {regression} analysis which outputs a prediction of values. `actual_field`:: - (Required, string) The field of the `index` which contains the `ground truth`. + (Required, string) The field of the `index` which contains the `ground truth`. The data type of this field must be numerical. - + `predicted_field`:: - (Required, string) The field in the `index` that contains the predicted value, + (Required, string) The field in the `index` that contains the predicted value, in other words the results of the {regression} analysis. - + `metrics`:: (Optional, object) Specifies the metrics that are used for the evaluation. Available metrics: @@ -141,20 +141,20 @@ which outputs a prediction of values. For more information, read https://en.wikipedia.org/wiki/Coefficient_of_determination[this wiki article]. - + [[classification-evaluation-resources]] == {classification-cap} evaluation objects -{classification-cap} evaluation evaluates the results of a {classanalysis} which -outputs a prediction that identifies to which of the classes each document +{classification-cap} evaluation evaluates the results of a {classanalysis} which +outputs a prediction that identifies to which of the classes each document belongs. `actual_field`:: (Required, string) The field of the `index` which contains the `ground truth`. The data type of this field must be categorical. - + `predicted_field`:: - (Required, string) The field in the `index` that contains the predicted value, + (Required, string) The field in the `index` that contains the predicted value, in other words the results of the {classanalysis}. `metrics`:: @@ -181,11 +181,11 @@ belongs. `outlier_detection`:: (object) If you chose to do outlier detection, the API returns the following evaluation metrics: - + `auc_roc`::: TBD `confusion_matrix`::: TBD - + `precision`::: TBD `recall`::: TBD @@ -196,7 +196,7 @@ belongs. == {api-examples-title} -[[ml-evaluate-binary-oldetection-example]] +[[ml-evaluate-oldetection-example]] === {oldetection-cap} [source,console] @@ -274,10 +274,10 @@ POST _ml/data_frame/_evaluate } }, "evaluation": { - "regression": { + "regression": { "actual_field": "price", <3> "predicted_field": "ml.price_prediction", <4> - "metrics": { + "metrics": { "r_squared": {}, "mse": {} } @@ -288,10 +288,10 @@ POST _ml/data_frame/_evaluate // TEST[skip:TBD] <1> The output destination index from a {dfanalytics} {reganalysis}. -<2> In this example, a test/train split (`training_percent`) was defined for the -{reganalysis}. This query limits evaluation to be performed on the test split -only. -<3> The ground truth value for the actual house price. This is required in order +<2> In this example, a test/train split (`training_percent`) was defined for the +{reganalysis}. This query limits evaluation to be performed on the test split +only. +<3> The ground truth value for the actual house price. This is required in order to evaluate results. <4> The predicted value for house price calculated by the {reganalysis}. @@ -311,10 +311,10 @@ POST _ml/data_frame/_evaluate } }, "evaluation": { - "regression": { + "regression": { "actual_field": "G3", <2> "predicted_field": "ml.G3_prediction", <3> - "metrics": { + "metrics": { "r_squared": {}, "mse": {} } @@ -324,17 +324,17 @@ POST _ml/data_frame/_evaluate -------------------------------------------------- // TEST[skip:TBD] -<1> In this example, a test/train split (`training_percent`) was defined for the -{reganalysis}. This query limits evaluation to be performed on the train split +<1> In this example, a test/train split (`training_percent`) was defined for the +{reganalysis}. This query limits evaluation to be performed on the train split only. It means that a training error will be calculated. -<2> The field that contains the ground truth value for the actual student +<2> The field that contains the ground truth value for the actual student performance. This is required in order to evaluate results. -<3> The field that contains the predicted value for student performance +<3> The field that contains the predicted value for student performance calculated by the {reganalysis}. -The next example calculates the testing error. The only difference compared with -the previous example is that `ml.is_training` is set to `false` this time, so +The next example calculates the testing error. The only difference compared with +the previous example is that `ml.is_training` is set to `false` this time, so the query excludes the train split from the evaluation. [source,console] @@ -350,10 +350,10 @@ POST _ml/data_frame/_evaluate } }, "evaluation": { - "regression": { + "regression": { "actual_field": "G3", <2> "predicted_field": "ml.G3_prediction", <3> - "metrics": { + "metrics": { "r_squared": {}, "mse": {} } @@ -363,12 +363,12 @@ POST _ml/data_frame/_evaluate -------------------------------------------------- // TEST[skip:TBD] -<1> In this example, a test/train split (`training_percent`) was defined for the -{reganalysis}. This query limits evaluation to be performed on the test split +<1> In this example, a test/train split (`training_percent`) was defined for the +{reganalysis}. This query limits evaluation to be performed on the test split only. It means that a testing error will be calculated. -<2> The field that contains the ground truth value for the actual student +<2> The field that contains the ground truth value for the actual student performance. This is required in order to evaluate results. -<3> The field that contains the predicted value for student performance +<3> The field that contains the predicted value for student performance calculated by the {reganalysis}. @@ -379,13 +379,13 @@ calculated by the {reganalysis}. [source,console] -------------------------------------------------- POST _ml/data_frame/_evaluate -{ +{ "index": "animal_classification", "evaluation": { "classification": { <1> "actual_field": "animal_class", <2> "predicted_field": "ml.animal_class_prediction", <3> - "metrics": { + "metrics": { "multiclass_confusion_matrix" : {} <4> } } @@ -395,9 +395,9 @@ POST _ml/data_frame/_evaluate // TEST[skip:TBD] <1> The evaluation type. -<2> The field that contains the ground truth value for the actual animal +<2> The field that contains the ground truth value for the actual animal classification. This is required in order to evaluate results. -<3> The field that contains the predicted value for animal classification by +<3> The field that contains the predicted value for animal classification by the {classanalysis}. <4> Specifies the metric for the evaluation. @@ -448,9 +448,9 @@ The API returns the following result: -------------------------------------------------- <1> The name of the actual class that the analysis tried to predict. <2> The number of documents in the index that belong to the `actual_class`. -<3> This object contains the list of the predicted classes and the number of +<3> This object contains the list of the predicted classes and the number of predictions associated with the class. <4> The number of cats in the dataset that are correctly identified as cats. <5> The number of cats in the dataset that are incorrectly classified as dogs. -<6> The number of documents that are classified as a class that is not listed as +<6> The number of documents that are classified as a class that is not listed as a `predicted_class`. From a67489cc8590ee572523d4605b1494a5051cddcd Mon Sep 17 00:00:00 2001 From: Przemyslaw Witek Date: Tue, 21 Jul 2020 12:46:16 +0200 Subject: [PATCH 3/3] Leave the old field name as deprecated --- .../dataframe/evaluation/outlierdetection/OutlierDetection.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java index b5f842755c518..3250272b03d16 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/evaluation/outlierdetection/OutlierDetection.java @@ -30,7 +30,7 @@ */ public class OutlierDetection implements Evaluation { - public static final ParseField NAME = new ParseField("outlier_detection"); + public static final ParseField NAME = new ParseField("outlier_detection", "binary_soft_classification"); private static final ParseField ACTUAL_FIELD = new ParseField("actual_field"); private static final ParseField PREDICTED_PROBABILITY_FIELD = new ParseField("predicted_probability_field");