Skip to content

Commit c03ac54

Browse files
committed
[ML] Remove old per-partition normalization code
Per-partition normalization is an old, undocumented feature that was never used by clients. It has been superseded by per-partition maximum scoring (see #32748). This PR removes the now redundant code. Relates elastic/elasticsearch#32816
1 parent 88095c1 commit c03ac54

18 files changed

+15
-261
lines changed

bin/autodetect/CCmdLineParser.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ bool CCmdLineParser::parse(int argc,
5353
std::size_t& bucketResultsDelay,
5454
bool& multivariateByFields,
5555
std::string& multipleBucketspans,
56-
bool& perPartitionNormalization,
5756
TStrVec& clauseTokens) {
5857
try {
5958
boost::program_options::options_description desc(DESCRIPTION);
@@ -119,8 +118,6 @@ bool CCmdLineParser::parse(int argc,
119118
"Optional flag to enable multi-variate analysis of correlated by fields")
120119
("multipleBucketspans", boost::program_options::value<std::string>(),
121120
"Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan")
122-
("perPartitionNormalization",
123-
"Optional flag to enable per partition normalization")
124121
;
125122
// clang-format on
126123

@@ -237,9 +234,6 @@ bool CCmdLineParser::parse(int argc,
237234
if (vm.count("multipleBucketspans") > 0) {
238235
multipleBucketspans = vm["multipleBucketspans"].as<std::string>();
239236
}
240-
if (vm.count("perPartitionNormalization") > 0) {
241-
perPartitionNormalization = true;
242-
}
243237

244238
boost::program_options::collect_unrecognized(
245239
parsed.options, boost::program_options::include_positional)

bin/autodetect/CCmdLineParser.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ class CCmdLineParser {
6565
std::size_t& bucketResultsDelay,
6666
bool& multivariateByFields,
6767
std::string& multipleBucketspans,
68-
bool& perPartitionNormalization,
6968
TStrVec& clauseTokens);
7069

7170
private:

bin/autodetect/Main.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,16 @@ int main(int argc, char** argv) {
8989
std::size_t bucketResultsDelay(0);
9090
bool multivariateByFields(false);
9191
std::string multipleBucketspans;
92-
bool perPartitionNormalization(false);
9392
TStrVec clauseTokens;
9493
if (ml::autodetect::CCmdLineParser::parse(
9594
argc, argv, limitConfigFile, modelConfigFile, fieldConfigFile,
9695
modelPlotConfigFile, jobId, logProperties, logPipe, bucketSpan, latency,
9796
summaryCountFieldName, delimiter, lengthEncodedInput, timeField,
9897
timeFormat, quantilesStateFile, deleteStateFiles, persistInterval,
9998
maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName,
100-
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe,
101-
persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage,
102-
bucketResultsDelay, multivariateByFields, multipleBucketspans,
103-
perPartitionNormalization, clauseTokens) == false) {
99+
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName,
100+
isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage, bucketResultsDelay,
101+
multivariateByFields, multipleBucketspans, clauseTokens) == false) {
104102
return EXIT_FAILURE;
105103
}
106104

@@ -148,7 +146,6 @@ int main(int argc, char** argv) {
148146
ml::model::CAnomalyDetectorModelConfig::defaultConfig(
149147
bucketSpan, summaryMode, summaryCountFieldName, latency,
150148
bucketResultsDelay, multivariateByFields, multipleBucketspans);
151-
modelConfig.perPartitionNormalization(perPartitionNormalization);
152149
modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(
153150
fieldConfig.detectionRules()));
154151
modelConfig.scheduledEvents(ml::model::CAnomalyDetectorModelConfig::TStrDetectionRulePrVecCRef(

bin/normalize/CCmdLineParser.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ bool CCmdLineParser::parse(int argc,
3030
bool& isOutputFileNamedPipe,
3131
std::string& quantilesState,
3232
bool& deleteStateFiles,
33-
bool& writeCsv,
34-
bool& perPartitionNormalization) {
33+
bool& writeCsv) {
3534
try {
3635
boost::program_options::options_description desc(DESCRIPTION);
3736
// clang-format off
@@ -60,8 +59,6 @@ bool CCmdLineParser::parse(int argc,
6059
"If this flag is set then delete the normalizer state files once they have been read")
6160
("writeCsv",
6261
"Write the results in CSV format (default is lineified JSON)")
63-
("perPartitionNormalization",
64-
"Optional flag to enable per partition normalization")
6562
;
6663
// clang-format on
6764

@@ -114,9 +111,6 @@ bool CCmdLineParser::parse(int argc,
114111
if (vm.count("writeCsv") > 0) {
115112
writeCsv = true;
116113
}
117-
if (vm.count("perPartitionNormalization") > 0) {
118-
perPartitionNormalization = true;
119-
}
120114
} catch (std::exception& e) {
121115
std::cerr << "Error processing command line: " << e.what() << std::endl;
122116
return false;

bin/normalize/CCmdLineParser.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ class CCmdLineParser {
4343
bool& isOutputFileNamedPipe,
4444
std::string& quantilesState,
4545
bool& deleteStateFiles,
46-
bool& writeCsv,
47-
bool& perPartitionNormalization);
46+
bool& writeCsv);
4847

4948
private:
5049
static const std::string DESCRIPTION;

bin/normalize/Main.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,10 @@ int main(int argc, char** argv) {
5454
std::string quantilesStateFile;
5555
bool deleteStateFiles(false);
5656
bool writeCsv(false);
57-
bool perPartitionNormalization(false);
5857
if (ml::normalize::CCmdLineParser::parse(
59-
argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan,
60-
lengthEncodedInput, inputFileName, isInputFileNamedPipe,
61-
outputFileName, isOutputFileNamedPipe, quantilesStateFile,
62-
deleteStateFiles, writeCsv, perPartitionNormalization) == false) {
58+
argc, argv, modelConfigFile, logProperties, logPipe, bucketSpan, lengthEncodedInput,
59+
inputFileName, isInputFileNamedPipe, outputFileName, isOutputFileNamedPipe,
60+
quantilesStateFile, deleteStateFiles, writeCsv) == false) {
6361
return EXIT_FAILURE;
6462
}
6563

@@ -93,7 +91,6 @@ int main(int argc, char** argv) {
9391
LOG_FATAL(<< "Ml model config file '" << modelConfigFile << "' could not be loaded");
9492
return EXIT_FAILURE;
9593
}
96-
modelConfig.perPartitionNormalization(perPartitionNormalization);
9794

9895
// There's a choice of input and output formats for the numbers to be normalised
9996
using TInputParserUPtr = std::unique_ptr<ml::api::CInputParser>;

include/api/CHierarchicalResultsWriter.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,7 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults
5151
using TStr1Vec = core::CSmallVector<std::string, 1>;
5252

5353
public:
54-
enum EResultType {
55-
E_SimpleCountResult,
56-
E_PopulationResult,
57-
E_PartitionResult,
58-
E_Result
59-
};
54+
enum EResultType { E_SimpleCountResult, E_PopulationResult, E_Result };
6055
//! Type which wraps up the results of anomaly detection.
6156
struct API_EXPORT SResults {
6257
//! Construct for population results
@@ -168,9 +163,6 @@ class API_EXPORT CHierarchicalResultsWriter : public model::CHierarchicalResults
168163
//! pivot.
169164
void writePivotResult(const model::CHierarchicalResults& results, const TNode& node);
170165

171-
//! Write partition result if \p node is a partition level result
172-
void writePartitionResult(const model::CHierarchicalResults& results, const TNode& node);
173-
174166
//! Write out a simple count result if \p node is simple
175167
//! count.
176168
void writeSimpleCountResult(const TNode& node);

include/api/CJsonOutputWriter.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,6 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler {
162162
// when the number to write is limited
163163
double s_LowestBucketInfluencerScore;
164164

165-
//! Partition scores
166-
TDocumentWeakPtrVec s_PartitionScoreDocuments;
167-
168165
//! scheduled event descriptions
169166
TStr1Vec s_ScheduledEventDescriptions;
170167
};
@@ -304,10 +301,6 @@ class API_EXPORT CJsonOutputWriter : public COutputHandler {
304301
void addInfluences(const CHierarchicalResultsWriter::TStoredStringPtrStoredStringPtrPrDoublePrVec& influenceResults,
305302
TDocumentWeakPtr weakDoc);
306303

307-
//! Write partition score & probability
308-
void addPartitionScores(const CHierarchicalResultsWriter::TResults& results,
309-
TDocumentWeakPtr weakDoc);
310-
311304
private:
312305
//! The job ID
313306
std::string m_JobId;

include/api/CResultNormalizer.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,6 @@ class API_EXPORT CResultNormalizer {
9393
std::string& valueFieldName,
9494
double& probability);
9595

96-
bool parseDataFields(const TStrStrUMap& dataRowFields,
97-
std::string& level,
98-
std::string& partition,
99-
std::string& partitionValue,
100-
std::string& person,
101-
std::string& function,
102-
std::string& valueFieldName,
103-
double& probability);
104-
10596
template<typename T>
10697
bool parseDataField(const TStrStrUMap& dataRowFields,
10798
const std::string& fieldName,

include/model/CAnomalyDetectorModelConfig.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -424,12 +424,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig {
424424
const TDoubleDoublePrVec& normalizedScoreKnotPoints() const;
425425
//@}
426426

427-
//! Check if we should create one normalizer per partition field value.
428-
bool perPartitionNormalization() const;
429-
430-
//! Set whether we should create one normalizer per partition field value.
431-
void perPartitionNormalization(bool value);
432-
433427
//! Sets the reference to the detection rules map
434428
void detectionRules(TIntDetectionRuleVecUMapCRef detectionRules);
435429

@@ -500,9 +494,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig {
500494
//! and the normalized anomaly score with these knot points.
501495
//! \see DEFAULT_NORMALIZED_SCORE_KNOT_POINTS for details.
502496
TDoubleDoublePrVec m_NormalizedScoreKnotPoints;
503-
504-
//! If true then create one normalizer per partition field value.
505-
bool m_PerPartitionNormalisation;
506497
//@}
507498

508499
//! A reference to the map containing detection rules per

0 commit comments

Comments
 (0)