Skip to content

Commit

Permalink
[ML] Refactor DataStreamDiagnostics to use array (#30129)
Browse files Browse the repository at this point in the history
This commit refactors the DataStreamDiagnostics class
achieving the following advantages:

- simpler code; by encapsulating the moving bucket histogram
into its own class
- better performance; by using an array to store the buckets
instead of a map
- explicit handling of gap buckets; in preparation of fixing #30080
  • Loading branch information
dimitris-athanasiou authored May 1, 2018
1 parent acdf330 commit 057cdff
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 230 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.ml.job.persistence.JobDataCountsPersister;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.job.persistence.JobDataCountsPersister;
import org.elasticsearch.xpack.ml.job.process.diagnostics.DataStreamDiagnostics;

import java.util.Date;
import java.util.Locale;
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.process.diagnostics;

import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.utils.Intervals;

/**
* A moving window of buckets that allow keeping
* track of some statistics like the bucket count,
* empty or sparse buckets, etc.
*
* The counts are stored in an array that functions as a
* circular buffer. When time is advanced, all buckets
* out of the window are flushed.
*/
class BucketDiagnostics {

private static final int MIN_BUCKETS = 10;

private final long bucketSpanMs;
private final long latencyMs;
private final int maxSize;
private final long[] buckets;
private long movingBucketCount = 0;
private long latestBucketStartMs = -1;
private int latestBucketIndex;
private long earliestBucketStartMs = -1;
private int earliestBucketIndex;
private long latestFlushedBucketStartMs = -1;
private final BucketFlushListener bucketFlushListener;

BucketDiagnostics(Job job, BucketFlushListener bucketFlushListener) {
bucketSpanMs = job.getAnalysisConfig().getBucketSpan().millis();
latencyMs = job.getAnalysisConfig().getLatency() == null ? 0 : job.getAnalysisConfig().getLatency().millis();
maxSize = Math.max((int) (Intervals.alignToCeil(latencyMs, bucketSpanMs) / bucketSpanMs), MIN_BUCKETS);
buckets = new long[maxSize];
this.bucketFlushListener = bucketFlushListener;
}

void addRecord(long recordTimestampMs) {
long bucketStartMs = Intervals.alignToFloor(recordTimestampMs, bucketSpanMs);

// Initialize earliest/latest times
if (latestBucketStartMs < 0) {
latestBucketStartMs = bucketStartMs;
earliestBucketStartMs = bucketStartMs;
}

advanceTime(bucketStartMs);
addToBucket(bucketStartMs);
}

private void advanceTime(long bucketStartMs) {
while (bucketStartMs > latestBucketStartMs) {
int flushBucketIndex = (latestBucketIndex + 1) % maxSize;

if (flushBucketIndex == earliestBucketIndex) {
flush(flushBucketIndex);
movingBucketCount -= buckets[flushBucketIndex];
earliestBucketStartMs += bucketSpanMs;
earliestBucketIndex = (earliestBucketIndex + 1) % maxSize;
}
buckets[flushBucketIndex] = 0L;

latestBucketStartMs += bucketSpanMs;
latestBucketIndex = flushBucketIndex;
}
}

private void addToBucket(long bucketStartMs) {
int offsetToLatest = (int) ((bucketStartMs - latestBucketStartMs) / bucketSpanMs);
int bucketIndex = (latestBucketIndex + offsetToLatest) % maxSize;
if (bucketIndex < 0) {
bucketIndex = maxSize + bucketIndex;
}

++buckets[bucketIndex];
++movingBucketCount;

if (bucketStartMs < earliestBucketStartMs) {
earliestBucketStartMs = bucketStartMs;
earliestBucketIndex = bucketIndex;
}
}

private void flush(int bucketIndex) {
long bucketStartMs = getTimestampMs(bucketIndex);
if (bucketStartMs > latestFlushedBucketStartMs) {
bucketFlushListener.onBucketFlush(bucketStartMs, buckets[bucketIndex]);
latestFlushedBucketStartMs = bucketStartMs;
}
}

private long getTimestampMs(int bucketIndex) {
int offsetToLatest = latestBucketIndex - bucketIndex;
if (offsetToLatest < 0) {
offsetToLatest = maxSize + offsetToLatest;
}
return latestBucketStartMs - offsetToLatest * bucketSpanMs;
}

void flush() {
if (latestBucketStartMs < 0) {
return;
}

int bucketIndex = earliestBucketIndex;
while (bucketIndex != latestBucketIndex) {
flush(bucketIndex);
bucketIndex = (bucketIndex + 1) % maxSize;
}
}

double averageBucketCount() {
return (double) movingBucketCount / size();
}

private int size() {
if (latestBucketStartMs < 0) {
return 0;
}
return (int) ((latestBucketStartMs - earliestBucketStartMs) / bucketSpanMs) + 1;
}

interface BucketFlushListener {
void onBucketFlush(long bucketStartMs, long bucketCounts);
}
}
Loading

0 comments on commit 057cdff

Please sign in to comment.