opendistro-for-elasticsearch · LiuJoyceC · Jul 16, 2020 · Jun 24, 2020 · Jul 6, 2020 · Jul 6, 2020
diff --git a/src/main/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManager.java b/src/main/java/com/amazon/opendistroforelasticsearch/ad/feature/FeatureManager.java
@@ -35,6 +35,7 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
+import java.util.stream.LongStream;
 import java.util.stream.Stream;
 
 import org.apache.logging.log4j.LogManager;
@@ -53,7 +54,7 @@ public class FeatureManager {
     private static final Logger logger = LogManager.getLogger(FeatureManager.class);
 
     // Each anomaly detector has a queue of data points with timestamps (in epoch milliseconds).
-    private final Map<String, ArrayDeque<Entry<Long, double[]>>> detectorIdsToTimeShingles;
+    private final Map<String, ArrayDeque<Entry<Long, Optional<double[]>>>> detectorIdsToTimeShingles;
 
     private final SearchFeatureDao searchFeatureDao;
     private final Interpolator interpolator;
@@ -121,81 +122,132 @@ public FeatureManager(
      */
     public void getCurrentFeatures(AnomalyDetector detector, long startTime, long endTime, ActionListener<SinglePointFeatures> listener) {
 Instant executionStartTime = Instant.now(); 
 Instant executionStartTime = Instant.now(); 
 
-        Deque<Entry<Long, double[]>> shingle = detectorIdsToTimeShingles
-            .computeIfAbsent(detector.getDetectorId(), id -> new ArrayDeque<Entry<Long, double[]>>(shingleSize));
-        if (shingle.isEmpty() || shingle.getLast().getKey() < endTime) {
-            searchFeatureDao
-                .getFeaturesForPeriod(
-                    detector,
-                    startTime,
-                    endTime,
-                    ActionListener
-                        .wrap(point -> updateUnprocessedFeatures(point, shingle, detector, endTime, listener), listener::onFailure)
-                );
+        Deque<Entry<Long, Optional<double[]>>> shingle = detectorIdsToTimeShingles
+            .computeIfAbsent(detector.getDetectorId(), id -> new ArrayDeque<>(shingleSize));
+
+        long maxTimeDifference = getDetectorIntervalInMilliseconds(detector) / 2;
+        Map<Long, Entry<Long, Optional<double[]>>> featuresMap = getNearbyPointsForShingle(detector, shingle, endTime, maxTimeDifference)
+            .collect(Collectors.toMap(Entry::getKey, Entry::getValue));
+
+        List<Entry<Long, Long>> missingRanges = getMissingRangesInShingle(detector, featuresMap, endTime);
+
+        if (missingRanges.size() > 0) {
+            try {
+                searchFeatureDao.getFeatureSamplesForPeriods(detector, missingRanges, ActionListener.wrap(points -> {
+                    for (int i = 0; i < points.size(); i++) {
+                        Optional<double[]> point = points.get(i);
+                        long rangeEndTime = missingRanges.get(i).getValue();
+                        featuresMap.put(rangeEndTime, new SimpleImmutableEntry<>(rangeEndTime, point));
+                    }
+                    updateUnprocessedFeatures(detector, shingle, featuresMap, endTime, listener);
+                }, listener::onFailure));
+            } catch (IOException e) {
+                listener.onFailure(e);
+            }
         } else {
             getProcessedFeatures(shingle, detector, endTime, listener);
         }
     }
 
+    private List<Entry<Long, Long>> getMissingRangesInShingle(
+        AnomalyDetector detector,
+        Map<Long, Entry<Long, Optional<double[]>>> featuresMap,
+        long endTime
+    ) {
+        long intervalMilli = getDetectorIntervalInMilliseconds(detector);
+
+        return getFullShingleEndTimes(endTime, intervalMilli)
+            .filter(time -> !featuresMap.containsKey(time))
+            .mapToObj(time -> new SimpleImmutableEntry<>(time - intervalMilli, time))
+            .collect(Collectors.toList());
+    }
+
     private void updateUnprocessedFeatures(
-        Optional<double[]> point,
-        Deque<Entry<Long, double[]>> shingle,
         AnomalyDetector detector,
+        Deque<Entry<Long, Optional<double[]>>> shingle,
+        Map<Long, Entry<Long, Optional<double[]>>> featuresMap,
         long endTime,
         ActionListener<SinglePointFeatures> listener
     ) {
-        if (point.isPresent()) {
-            if (shingle.size() == shingleSize) {
-                shingle.remove();
-            }
-            shingle.add(new SimpleImmutableEntry<>(endTime, point.get()));
+        shingle.clear();
+        getFullShingleEndTimes(endTime, getDetectorIntervalInMilliseconds(detector))
+            .filter(time -> featuresMap.containsKey(time))
+            .mapToObj(time -> featuresMap.get(time))
+            .forEach(e -> shingle.add(e));
+
+        if (featuresMap.containsKey(endTime)) {
             getProcessedFeatures(shingle, detector, endTime, listener);
         } else {
             listener.onResponse(new SinglePointFeatures(Optional.empty(), Optional.empty()));
         }
     }
 
     private void getProcessedFeatures(
-        Deque<Entry<Long, double[]>> shingle,
+        Deque<Entry<Long, Optional<double[]>>> shingle,
         AnomalyDetector detector,
         long endTime,
         ActionListener<SinglePointFeatures> listener
     ) {
-
-        double[][] currentPoints = filterAndFill(shingle, endTime, detector);
-        Optional<double[]> currentPoint = Optional.ofNullable(shingle.peekLast()).map(Entry::getValue);
+        Optional<double[]> currentPoint = shingle.peekLast().getValue();
         listener
             .onResponse(
-                Optional
-                    .ofNullable(currentPoints)
+                currentPoint
+                    .map(point -> filterAndFill(shingle, endTime, detector))
                     .map(points -> new SinglePointFeatures(currentPoint, Optional.of(batchShingle(points, shingleSize)[0])))
                     .orElse(new SinglePointFeatures(currentPoint, Optional.empty()))
             );
     }
 
-    private double[][] filterAndFill(Deque<Entry<Long, double[]>> shingle, long endTime, AnomalyDetector detector) {
-        long intervalMilli = ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMillis();
+    private double[][] filterAndFill(Deque<Entry<Long, Optional<double[]>>> shingle, long endTime, AnomalyDetector detector) {
+        Deque<Entry<Long, Optional<double[]>>> filteredShingle = shingle
+            .stream()
+            .filter(e -> e.getValue().isPresent())
+            .collect(Collectors.toCollection(ArrayDeque::new));
         double[][] result = null;
-        if (shingle.size() >= shingleSize - maxMissingPoints) {
-            TreeMap<Long, double[]> search = new TreeMap<>(shingle.stream().collect(Collectors.toMap(Entry::getKey, Entry::getValue)));
-            result = IntStream.rangeClosed(1, shingleSize).mapToLong(i -> endTime - (shingleSize - i) * intervalMilli).mapToObj(t -> {
-                Optional<Entry<Long, double[]>> after = Optional.ofNullable(search.ceilingEntry(t));
-                Optional<Entry<Long, double[]>> before = Optional.ofNullable(search.floorEntry(t));
-                return after
-                    .filter(a -> Math.abs(t - a.getKey()) <= before.map(b -> Math.abs(t - b.getKey())).orElse(Long.MAX_VALUE))
-                    .map(Optional::of)
-                    .orElse(before)
-                    .filter(e -> Math.abs(t - e.getKey()) < intervalMilli * maxNeighborDistance)
-                    .map(Entry::getValue)
-                    .orElse(null);
-            }).filter(d -> d != null).toArray(double[][]::new);
+        if (filteredShingle.size() >= shingleSize - maxMissingPoints) {
+            long maxMillisecondsDifference = maxNeighborDistance * getDetectorIntervalInMilliseconds(detector);
+            result = getNearbyPointsForShingle(detector, filteredShingle, endTime, maxMillisecondsDifference)
+                .map(e -> e.getValue().getValue().orElse(null))
+                .filter(d -> d != null)
+                .toArray(double[][]::new);
+
             if (result.length < shingleSize) {
                 result = null;
             }
         }
         return result;
     }
 
+    private Stream<Entry<Long, Entry<Long, Optional<double[]>>>> getNearbyPointsForShingle(
+        AnomalyDetector detector,
+        Deque<Entry<Long, Optional<double[]>>> shingle,
+        long endTime,
+        long maxMillisecondsDifference
+    ) {
+        long intervalMilli = getDetectorIntervalInMilliseconds(detector);
+        TreeMap<Long, Optional<double[]>> search = new TreeMap<>(
+            shingle.stream().collect(Collectors.toMap(Entry::getKey, Entry::getValue))
+        );
+        return getFullShingleEndTimes(endTime, intervalMilli).mapToObj(t -> {
+            Optional<Entry<Long, Optional<double[]>>> after = Optional.ofNullable(search.ceilingEntry(t));
+            Optional<Entry<Long, Optional<double[]>>> before = Optional.ofNullable(search.floorEntry(t));
+            return after
+                .filter(a -> Math.abs(t - a.getKey()) <= before.map(b -> Math.abs(t - b.getKey())).orElse(Long.MAX_VALUE))
+                .map(Optional::of)
+                .orElse(before)
+                .filter(e -> Math.abs(t - e.getKey()) < maxMillisecondsDifference)
+                .map(e -> new SimpleImmutableEntry<>(t, e));
+        }).filter(Optional::isPresent).map(Optional::get);
+    }
+
+    private long getDetectorIntervalInMilliseconds(AnomalyDetector detector) {
+        return ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMillis();
+    }
+
+    private LongStream getFullShingleEndTimes(long endTime, long intervalMilli) {
+        return LongStream.rangeClosed(1, shingleSize).map(i -> endTime - (shingleSize - i) * intervalMilli);
+    }
+
     /**
      * Provides data for cold-start training.
      *
@@ -367,7 +419,7 @@ public void getPreviewFeatures(AnomalyDetector detector, long startMilli, long e
     private Entry<List<Entry<Long, Long>>, Integer> getSampleRanges(AnomalyDetector detector, long startMilli, long endMilli) {
         long start = truncateToMinute(startMilli);
         long end = truncateToMinute(endMilli);
-        long bucketSize = ((IntervalTimeConfiguration) detector.getDetectionInterval()).toDuration().toMillis();
+        long bucketSize = getDetectorIntervalInMilliseconds(detector);
         int numBuckets = (int) Math.floor((end - start) / (double) bucketSize);
         int numSamples = (int) Math.max(Math.min(numBuckets * previewSampleRate, maxPreviewSamples), 1);
         int stride = (int) Math.max(1, Math.floor((double) numBuckets / numSamples));
@@ -455,9 +507,9 @@ private long truncateToMinute(long epochMillis) {
     }
 
     public int getShingleSize(String detectorId) {
-        Deque<Entry<Long, double[]>> shingle = detectorIdsToTimeShingles.get(detectorId);
+        Deque<Entry<Long, Optional<double[]>>> shingle = detectorIdsToTimeShingles.get(detectorId);
         if (shingle != null) {
-            return shingle.size();
+            return Math.toIntExact(shingle.stream().filter(entry -> entry.getValue().isPresent()).count());
         } else {
             return -1;
         }