adding model level metric in node level (#1330)

* adding model level metric in node level Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> * spotlessApply and fixed a test Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> * added if clause to bypass the integration test Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> * addressing comments Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> * addressed comments Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> * add more tests Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> * adding boolean check if model stats exists Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> --------- Signed-off-by: Dhrubo Saha <dhrubo@amazon.com>
opensearch-project · Oct 4, 2023 · bb84282 · bb84282
1 parent efe5566
commit bb84282
Show file tree

Hide file tree

Showing 17 changed files with 449 additions and 40 deletions.
diff --git a/plugin/src/main/java/org/opensearch/ml/action/stats/MLStatsNodeResponse.java b/plugin/src/main/java/org/opensearch/ml/action/stats/MLStatsNodeResponse.java
@@ -17,7 +17,9 @@
 import org.opensearch.core.xcontent.XContentBuilder;
 import org.opensearch.ml.common.FunctionName;
 import org.opensearch.ml.stats.MLAlgoStats;
+import org.opensearch.ml.stats.MLModelStats;
 import org.opensearch.ml.stats.MLNodeLevelStat;
+import org.opensearch.ml.stats.MLStatsInput;
 
 public class MLStatsNodeResponse extends BaseNodeResponse implements ToXContentFragment {
     /**
@@ -30,6 +32,12 @@ public class MLStatsNodeResponse extends BaseNodeResponse implements ToXContentF
      * Example: {kmeans: { train: { request_count: 1} }}
      */
     private Map<FunctionName, MLAlgoStats> algorithmStats;
+    /**
+     * Model stats which includes model level stats.
+     *
+     * Example: {model_id: { predict: { request_count: 1} }}
+     */
+    private Map<String, MLModelStats> modelStats;
 
     /**
      * Constructor
@@ -45,21 +53,30 @@ public MLStatsNodeResponse(StreamInput in) throws IOException {
         if (in.readBoolean()) {
             this.algorithmStats = in.readMap(stream -> stream.readEnum(FunctionName.class), MLAlgoStats::new);
         }
+        if (in.readBoolean()) {
+            this.modelStats = in.readMap(stream -> stream.readOptionalString(), MLModelStats::new);
+        }
     }
 
     public MLStatsNodeResponse(DiscoveryNode node, Map<MLNodeLevelStat, Object> nodeStats) {
         super(node);
         this.nodeStats = nodeStats;
     }
 
-    public MLStatsNodeResponse(DiscoveryNode node, Map<MLNodeLevelStat, Object> nodeStats, Map<FunctionName, MLAlgoStats> algorithmStats) {
+    public MLStatsNodeResponse(
+        DiscoveryNode node,
+        Map<MLNodeLevelStat, Object> nodeStats,
+        Map<FunctionName, MLAlgoStats> algorithmStats,
+        Map<String, MLModelStats> modelStats
+    ) {
         super(node);
         this.nodeStats = nodeStats;
         this.algorithmStats = algorithmStats;
+        this.modelStats = modelStats;
     }
 
     public boolean isEmpty() {
-        return getNodeLevelStatSize() == 0 && getAlgorithmStatSize() == 0;
+        return getNodeLevelStatSize() == 0 && getAlgorithmStatSize() == 0 && getModelStatSize() == 0;
     }
 
     /**
@@ -88,6 +105,12 @@ public void writeTo(StreamOutput out) throws IOException {
         } else {
             out.writeBoolean(false);
         }
+        if (modelStats != null) {
+            out.writeBoolean(true);
+            out.writeMap(modelStats, (stream, v) -> stream.writeOptionalString(v), (stream, stats) -> stats.writeTo(stream));
+        } else {
+            out.writeBoolean(false);
+        }
     }
 
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
@@ -97,14 +120,23 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
             }
         }
         if (algorithmStats != null) {
-            builder.startObject("algorithms");
+            builder.startObject(MLStatsInput.ALGORITHMS);
             for (Map.Entry<FunctionName, MLAlgoStats> stat : algorithmStats.entrySet()) {
                 builder.startObject(stat.getKey().name().toLowerCase(Locale.ROOT));
                 stat.getValue().toXContent(builder, params);
                 builder.endObject();
             }
             builder.endObject();
         }
+        if (modelStats != null) {
+            builder.startObject(MLStatsInput.MODELS);
+            for (Map.Entry<String, MLModelStats> stat : modelStats.entrySet()) {
+                builder.startObject(stat.getKey());
+                stat.getValue().toXContent(builder, params);
+                builder.endObject();
+            }
+            builder.endObject();
+        }
         return builder;
     }
 
@@ -120,17 +152,35 @@ public int getAlgorithmStatSize() {
         return algorithmStats == null ? 0 : algorithmStats.size();
     }
 
+    public int getModelStatSize() {
+        return modelStats == null ? 0 : modelStats.size();
+    }
+
     public boolean hasAlgorithmStats(FunctionName algorithm) {
-        return algorithmStats == null ? false : algorithmStats.containsKey(algorithm);
+        return algorithmStats != null && algorithmStats.containsKey(algorithm);
+    }
+
+    public boolean hasModelStats(String modelId) {
+        return modelStats != null && modelStats.containsKey(modelId);
     }
 
     public MLAlgoStats getAlgorithmStats(FunctionName algorithm) {
         return algorithmStats == null ? null : algorithmStats.get(algorithm);
     }
 
+    public MLModelStats getModelStats(String modelId) {
+        return modelStats == null ? null : modelStats.get(modelId);
+    }
+
     public void removeAlgorithmStats(FunctionName algorithm) {
         if (algorithmStats != null) {
             algorithmStats.remove(algorithm);
         }
     }
+
+    public void removeModelStats(String modelId) {
+        if (modelStats != null) {
+            modelStats.remove(modelId);
+        }
+    }
 }
diff --git a/plugin/src/main/java/org/opensearch/ml/action/stats/MLStatsNodesResponse.java b/plugin/src/main/java/org/opensearch/ml/action/stats/MLStatsNodesResponse.java
@@ -60,7 +60,7 @@ public List<MLStatsNodeResponse> readNodesFrom(StreamInput in) throws IOExceptio
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         String nodeId;
         DiscoveryNode node;
-        builder.startObject("nodes");
+        builder.startObject(NODES_KEY);
         for (MLStatsNodeResponse mlStats : getNodes()) {
             node = mlStats.getNode();
             nodeId = node.getId();

diff --git a/plugin/src/main/java/org/opensearch/ml/action/stats/MLStatsNodesTransportAction.java b/plugin/src/main/java/org/opensearch/ml/action/stats/MLStatsNodesTransportAction.java
@@ -21,6 +21,7 @@
 import org.opensearch.ml.stats.ActionName;
 import org.opensearch.ml.stats.MLActionStats;
 import org.opensearch.ml.stats.MLAlgoStats;
+import org.opensearch.ml.stats.MLModelStats;
 import org.opensearch.ml.stats.MLNodeLevelStat;
 import org.opensearch.ml.stats.MLStatLevel;
 import org.opensearch.ml.stats.MLStats;
@@ -125,6 +126,22 @@ MLStatsNodeResponse createMLStatsNodeResponse(MLStatsNodesRequest mlStatsNodesRe
             }
         }
 
-        return new MLStatsNodeResponse(clusterService.localNode(), statValues, algorithmStats);
+        Map<String, MLModelStats> modelStats = new HashMap<>();
+        // return model level stats
+        if (mlStatsInput.includeModelStats()) {
+            for (String modelId : mlStats.getAllModels()) {
+                if (mlStatsInput.retrieveStatsForModel(modelId)) {
+                    Map<ActionName, MLActionStats> actionStatsMap = new HashMap<>();
+                    for (Map.Entry<ActionName, MLActionStats> entry : mlStats.getModelStats(modelId).entrySet()) {
+                        if (mlStatsInput.retrieveStatsForAction(entry.getKey())) {
+                            actionStatsMap.put(entry.getKey(), entry.getValue());
+                        }
+                    }
+                    modelStats.put(modelId, new MLModelStats(actionStatsMap));
+                }
+            }
+        }
+
+        return new MLStatsNodeResponse(clusterService.localNode(), statValues, algorithmStats, modelStats);
     }
 }
diff --git a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java
@@ -865,6 +865,7 @@ public void deployModel(
         mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, ML_ACTION_REQUEST_COUNT).increment();
         mlStats.getStat(MLNodeLevelStat.ML_EXECUTING_TASK_COUNT).increment();
         mlStats.getStat(MLNodeLevelStat.ML_REQUEST_COUNT).increment();
+        mlStats.createModelCounterStatIfAbsent(modelId, ActionName.DEPLOY, ML_ACTION_REQUEST_COUNT).increment();
         List<String> workerNodes = mlTask.getWorkerNodes();
         if (modelCacheHelper.isModelDeployed(modelId)) {
             if (workerNodes != null && workerNodes.size() > 0) {
@@ -1210,6 +1211,7 @@ public synchronized Map<String, String> undeployModel(String[] modelIds) {
                     mlStats
                         .createCounterStatIfAbsent(getModelFunctionName(modelId), ActionName.UNDEPLOY, ML_ACTION_REQUEST_COUNT)
                         .increment();
+                    mlStats.createModelCounterStatIfAbsent(modelId, ActionName.UNDEPLOY, ML_ACTION_REQUEST_COUNT).increment();
                 } else {
                     modelUndeployStatus.put(modelId, NOT_FOUND);
                 }
@@ -1221,6 +1223,7 @@ public synchronized Map<String, String> undeployModel(String[] modelIds) {
                 modelUndeployStatus.put(modelId, UNDEPLOYED);
                 mlStats.getStat(MLNodeLevelStat.ML_DEPLOYED_MODEL_COUNT).decrement();
                 mlStats.createCounterStatIfAbsent(getModelFunctionName(modelId), ActionName.UNDEPLOY, ML_ACTION_REQUEST_COUNT).increment();
+                mlStats.createModelCounterStatIfAbsent(modelId, ActionName.UNDEPLOY, ML_ACTION_REQUEST_COUNT).increment();
                 removeModel(modelId);
             }
         }

diff --git a/plugin/src/main/java/org/opensearch/ml/stats/MLModelStats.java b/plugin/src/main/java/org/opensearch/ml/stats/MLModelStats.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.ml.stats;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.Map;
+
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.common.io.stream.Writeable;
+import org.opensearch.core.xcontent.ToXContentFragment;
+import org.opensearch.core.xcontent.XContentBuilder;
+
+public class MLModelStats implements ToXContentFragment, Writeable {
+
+    /**
+     * Model stats.
+     * Key: Model Id.
+     * Value: MLActionStats which contains action stat/value map.
+     *
+     * Example: {predict: { request_count: 1}}
+     */
+    private Map<ActionName, MLActionStats> modelStats;
+
+    public MLModelStats(StreamInput in) throws IOException {
+        if (in.readBoolean()) {
+            this.modelStats = in.readMap(stream -> stream.readEnum(ActionName.class), MLActionStats::new);
+        }
+    }
+
+    public MLModelStats(Map<ActionName, MLActionStats> modelStats) {
+        this.modelStats = modelStats;
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        if (modelStats != null && modelStats.size() > 0) {
+            out.writeBoolean(true);
+            out.writeMap(modelStats, (stream, v) -> stream.writeEnum(v), (stream, stats) -> stats.writeTo(stream));
+        } else {
+            out.writeBoolean(false);
+        }
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        if (modelStats != null && modelStats.size() > 0) {
+            for (Map.Entry<ActionName, MLActionStats> entry : modelStats.entrySet()) {
+                builder.startObject(entry.getKey().name().toLowerCase(Locale.ROOT));
+                entry.getValue().toXContent(builder, params);
+                builder.endObject();
+            }
+        }
+        return builder;
+    }
+
+    public MLActionStats getActionStats(ActionName action) {
+        return modelStats == null ? null : modelStats.get(action);
+    }
+}
diff --git a/plugin/src/main/java/org/opensearch/ml/stats/MLStatLevel.java b/plugin/src/main/java/org/opensearch/ml/stats/MLStatLevel.java
@@ -9,6 +9,7 @@ public enum MLStatLevel {
     CLUSTER,
     NODE,
     ALGORITHM,
+    MODEL,
     ACTION;
 
     public static MLStatLevel from(String value) {

diff --git a/plugin/src/main/java/org/opensearch/ml/stats/MLStats.java b/plugin/src/main/java/org/opensearch/ml/stats/MLStats.java
@@ -22,6 +22,7 @@ public class MLStats {
     @Getter
     private Map<Enum, MLStat<?>> stats;
     private Map<FunctionName, Map<ActionName, Map<MLActionLevelStat, MLStat>>> algoStats;// {"kmeans":{"train":{"request_count":10}}}
+    private Map<String, Map<ActionName, Map<MLActionLevelStat, MLStat>>> modelStats;// {"model_id":{"train":{"request_count":10}}}
 
     /**
      * Constructor
@@ -31,6 +32,7 @@ public class MLStats {
     public MLStats(Map<Enum, MLStat<?>> stats) {
         this.stats = stats;
         this.algoStats = new ConcurrentHashMap<>();
+        this.modelStats = new ConcurrentHashMap<>();
     }
 
     /**
@@ -62,6 +64,12 @@ public MLStat<?> createCounterStatIfAbsent(FunctionName algoName, ActionName act
         return createAlgoStatIfAbsent(algoActionStats, stat, () -> new MLStat<>(false, new CounterSupplier()));
     }
 
+    public MLStat<?> createModelCounterStatIfAbsent(String modelId, ActionName action, MLActionLevelStat stat) {
+        Map<ActionName, Map<MLActionLevelStat, MLStat>> actionStats = modelStats.computeIfAbsent(modelId, it -> new ConcurrentHashMap<>());
+        Map<MLActionLevelStat, MLStat> algoActionStats = actionStats.computeIfAbsent(action, it -> new ConcurrentHashMap<>());
+        return createAlgoStatIfAbsent(algoActionStats, stat, () -> new MLStat<>(false, new CounterSupplier()));
+    }
+
     public synchronized MLStat<?> createAlgoStatIfAbsent(
         Map<MLActionLevelStat, MLStat> algoActionStats,
         MLActionLevelStat key,
@@ -130,7 +138,27 @@ public Map<ActionName, MLActionStats> getAlgorithmStats(FunctionName algoName) {
         return algoActionStats;
     }
 
+    public Map<ActionName, MLActionStats> getModelStats(String modelId) {
+        if (!modelStats.containsKey(modelId)) {
+            return null;
+        }
+        Map<ActionName, MLActionStats> modelActionStats = new HashMap<>();
+
+        for (Map.Entry<ActionName, Map<MLActionLevelStat, MLStat>> entry : modelStats.get(modelId).entrySet()) {
+            Map<MLActionLevelStat, Object> statsMap = new HashMap<>();
+            for (Map.Entry<MLActionLevelStat, MLStat> state : entry.getValue().entrySet()) {
+                statsMap.put(state.getKey(), state.getValue().getValue());
+            }
+            modelActionStats.put(entry.getKey(), new MLActionStats(statsMap));
+        }
+        return modelActionStats;
+    }
+
     public FunctionName[] getAllAlgorithms() {
         return algoStats.keySet().toArray(new FunctionName[0]);
     }
+
+    public String[] getAllModels() {
+        return modelStats.keySet().toArray(new String[0]);
+    }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ public enum MLStatLevel { @@
         CLUSTER,
         NODE,
         ALGORITHM,
+        MODEL,
         ACTION;
         public static MLStatLevel from(String value) {
@@ Expand Down @@