Adding p99.9, p100 and num_of_segments metrics to perf-tool (opensear…

…ch-project#739) * Adding p99.9, p100 and num_of_segments metrics to perf-tool Signed-off-by: Martin Gaievski <gaievski@amazon.com>
msfroh · Jan 25, 2023 · 4de9f83 · 4de9f83
1 parent 46694f5
commit 4de9f83
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 8 deletions.
diff --git a/benchmarks/perf-tool/README.md b/benchmarks/perf-tool/README.md
@@ -272,12 +272,12 @@ Runs a set of queries against an index.
 
 ##### Metrics
 
-| Metric Name | Description | Unit | 
-| ----------- | ----------- | ----------- |
-| took | Took times returned per query aggregated as total, p50, p90 and p99 (when applicable) | ms |
-| memory_kb | Native memory k-NN is using at the end of the query workload | KB |
+| Metric Name | Description  | Unit | 
+| ----------- |---------------------------------------------------------------------------------------------------------| ----------- |
+| took | Took times returned per query aggregated as total, p50, p90, p99, p99.9 and p100 (when applicable)  | ms |
+| memory_kb | Native memory k-NN is using at the end of the query workload  | KB |
 | recall@R | ratio of top R results from the ground truth neighbors that are in the K results returned by the plugin | float 0.0-1.0 |
-| recall@K | ratio of results returned that were ground truth nearest neighbors | float 0.0-1.0 |
+| recall@K | ratio of results returned that were ground truth nearest neighbors  | float 0.0-1.0 |
 
 #### query_with_filter
 
@@ -311,6 +311,23 @@ Runs a set of queries with filter against an index.
 | recall@R | ratio of top R results from the ground truth neighbors that are in the K results returned by the plugin | float 0.0-1.0 |
 | recall@K | ratio of results returned that were ground truth nearest neighbors | float 0.0-1.0 |
 
+#### get_stats
+
+Gets the index stats.
+
+##### Parameters
+
+| Parameter Name | Description | Default | 
+| ----------- |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------| 
+| index_name | Name of index to search | No default |
+
+##### Metrics
+
+| Metric Name | Description | Unit | 
+| ----------- |-------------------------------------------------|------------|
+| num_of_committed_segments | Total number of commited segments in the index | integer >= 0 |
+| num_of_search_segments | Total number of search segments in the index | integer >= 0 |
+
 ### Data sets
 
 This benchmark tool uses pre-generated data sets to run indexing and query workload. For some benchmark types existing dataset need to be 

diff --git a/benchmarks/perf-tool/okpt/test/steps/factory.py b/benchmarks/perf-tool/okpt/test/steps/factory.py
@@ -9,7 +9,8 @@
 from okpt.test.steps.base import Step, StepConfig
 
 from okpt.test.steps.steps import CreateIndexStep, DisableRefreshStep, RefreshIndexStep, DeleteIndexStep, \
- TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, QueryStep, QueryWithFilterStep
+ TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, \
+ QueryStep, QueryWithFilterStep, GetStatsStep
 
 
 def create_step(step_config: StepConfig) -> Step:
@@ -37,5 +38,7 @@ def create_step(step_config: StepConfig) -> Step:
  return ForceMergeStep(step_config)
  elif step_config.step_name == ClearCacheStep.label:
  return ClearCacheStep(step_config)
+ elif step_config.step_name == GetStatsStep.label:
+ return GetStatsStep(step_config)
 
  raise ConfigurationError(f'Invalid step {step_config.step_name}')
diff --git a/benchmarks/perf-tool/okpt/test/steps/steps.py b/benchmarks/perf-tool/okpt/test/steps/steps.py
@@ -454,7 +454,8 @@ def _action(self):
  results['took'] = [
  float(query_response['took']) for query_response in query_responses
  ]
- results['memory_kb'] = get_cache_size_in_kb(self.endpoint, 80)
+ port = 9200 if self.endpoint == 'localhost' else 80
+ results['memory_kb'] = get_cache_size_in_kb(self.endpoint, port)
 
  if self.calculate_recall:
  ids = [[int(hit['_id'])
@@ -588,6 +589,41 @@ def get_body(self, vec):
  else:
  raise ConfigurationError('Not supported filter type {}'.format(self.filter_type))
 
+class GetStatsStep(OpenSearchStep):
+ """See base class."""
+
+ label = 'get_stats'
+
+ def __init__(self, step_config: StepConfig):
+ super().__init__(step_config)
+
+ self.index_name = parse_string_param('index_name', step_config.config,
+ {}, None)
+
+ def _action(self):
+ """Get stats for cluster/index etc.
+
+ Returns:
+ Stats with following info:
+ - number of committed and search segments in the index
+ """
+ results = {}
+ segment_stats = get_segment_stats(self.opensearch, self.index_name)
+ shards = segment_stats["indices"][self.index_name]["shards"]
+ num_of_committed_segments = 0
+ num_of_search_segments = 0;
+ for shard_key in shards.keys():
+ for segment in shards[shard_key]:
+
+ num_of_committed_segments += segment["num_committed_segments"]
+ num_of_search_segments += segment["num_search_segments"]
+
+ results['committed_segments'] = num_of_committed_segments
+ results['search_segments'] = num_of_search_segments
+ return results
+
+ def _get_measures(self) -> List[str]:
+ return ['committed_segments', 'search_segments']
 
 # Helper functions - (AKA not steps)
 def bulk_transform(partition: np.ndarray, field_name: str, action,
@@ -755,3 +791,6 @@ def query_index(opensearch: OpenSearch, index_name: str, body: dict,
 
 def bulk_index(opensearch: OpenSearch, index_name: str, body: List):
  return opensearch.bulk(index=index_name, body=body, timeout='5m')
+
+def get_segment_stats(opensearch: OpenSearch, index_name: str):
+ return opensearch.indices.segments(index=index_name)
diff --git a/benchmarks/perf-tool/okpt/test/test.py b/benchmarks/perf-tool/okpt/test/test.py
@@ -53,6 +53,8 @@ def _pxx(values: List[Any], p: float):
  if p < 0 or p > 1:
  return -1.0
  elif p < lowest_percentile or p > highest_percentile:
+ if p == 1.0 and len(values) > 1:
+ return float(values[len(values) - 1])
  return -1.0
  else:
  return float(values[floor(len(values) * p)])
@@ -105,7 +107,7 @@ def _aggregate_steps(step_results: List[Dict[str, Any]],
  for measure_label in step_measure_labels:
 
  step_measure = step[measure_label]
- step_measure_label = f'{step_label}_{measure_label}'
+ step_measure_label = f'{measure_label}' if step_label == 'get_stats' else f'{step_label}_{measure_label}'
 
  # Add cumulative test measures from steps to test measures
  if measure_label in measure_labels:
@@ -137,6 +139,12 @@ def _aggregate_steps(step_results: List[Dict[str, Any]],
  p99 = _pxx(step_measure, 0.99)
  if p99 != -1:
  aggregate[step_measure_label + '_p99'] = p99
+ p99_9 = _pxx(step_measure, 0.999)
+ if p99_9 != -1:
+ aggregate[step_measure_label + '_p99.9'] = p99_9
+ p100 = _pxx(step_measure, 1.00)
+ if p100 != -1:
+ aggregate[step_measure_label + '_p100'] = p100
 
  return aggregate