Skip to content

Commit a917f50

Browse files
committed
Search query failure and search star-tree query failure stats
Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
1 parent 5e6ee97 commit a917f50

File tree

9 files changed

+206
-19
lines changed

9 files changed

+206
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2929
- Add all-active ingestion as docrep equivalent in pull-based ingestion ([#19316](https://github.com/opensearch-project/OpenSearch/pull/19316))
3030
- Adding logic for histogram aggregation using skiplist ([#19130](https://github.com/opensearch-project/OpenSearch/pull/19130))
3131
- Add skip_list param for date, scaled float and token count fields ([#19142](https://github.com/opensearch-project/OpenSearch/pull/19142))
32+
- [Search Stats] Add search & star-tree search query failure count metrics ([#19210](https://github.com/opensearch-project/OpenSearch/issues/19210))
3233

3334
### Changed
3435
- Refactor `if-else` chains to use `Java 17 pattern matching switch expressions`(([#18965](https://github.com/opensearch-project/OpenSearch/pull/18965))

rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,113 @@
11
"Help":
22
- skip:
3-
version: " - 3.1.99"
3+
version: " - 3.2.99"
4+
reason: search query failure stats is added in 3.3.0
5+
features: node_selector
6+
- do:
7+
cat.shards:
8+
help: true
9+
node_selector:
10+
version: "3.3.0 - "
11+
12+
- match:
13+
$body: |
14+
/^ index .+ \n
15+
shard .+ \n
16+
prirep .+ \n
17+
state .+ \n
18+
docs .+ \n
19+
store .+ \n
20+
ip .+ \n
21+
id .+ \n
22+
node .+ \n
23+
sync_id .+ \n
24+
unassigned.reason .+ \n
25+
unassigned.at .+ \n
26+
unassigned.for .+ \n
27+
unassigned.details .+ \n
28+
recoverysource.type .+ \n
29+
completion.size .+ \n
30+
fielddata.memory_size .+ \n
31+
fielddata.evictions .+ \n
32+
query_cache.memory_size .+ \n
33+
query_cache.evictions .+ \n
34+
flush.total .+ \n
35+
flush.total_time .+ \n
36+
get.current .+ \n
37+
get.time .+ \n
38+
get.total .+ \n
39+
get.exists_time .+ \n
40+
get.exists_total .+ \n
41+
get.missing_time .+ \n
42+
get.missing_total .+ \n
43+
indexing.delete_current .+ \n
44+
indexing.delete_time .+ \n
45+
indexing.delete_total .+ \n
46+
indexing.index_current .+ \n
47+
indexing.index_time .+ \n
48+
indexing.index_total .+ \n
49+
indexing.index_failed .+ \n
50+
merges.current .+ \n
51+
merges.current_docs .+ \n
52+
merges.current_size .+ \n
53+
merges.total .+ \n
54+
merges.total_docs .+ \n
55+
merges.total_size .+ \n
56+
merges.total_time .+ \n
57+
refresh.total .+ \n
58+
refresh.time .+ \n
59+
refresh.external_total .+ \n
60+
refresh.external_time .+ \n
61+
refresh.listeners .+ \n
62+
search.fetch_current .+ \n
63+
search.fetch_time .+ \n
64+
search.fetch_total .+ \n
65+
search.open_contexts .+ \n
66+
search.query_current .+ \n
67+
search.query_time .+ \n
68+
search.query_total .+ \n
69+
search.query_failed .+ \n
70+
search.concurrent_query_current .+ \n
71+
search.concurrent_query_time .+ \n
72+
search.concurrent_query_total .+ \n
73+
search.concurrent_avg_slice_count .+ \n
74+
search.startree_query_current .+ \n
75+
search.startree_query_time .+ \n
76+
search.startree_query_total .+ \n
77+
search.startree_query_failed .+ \n
78+
search.scroll_current .+ \n
79+
search.scroll_time .+ \n
80+
search.scroll_total .+ \n
81+
search.point_in_time_current .+ \n
82+
search.point_in_time_time .+ \n
83+
search.point_in_time_total .+ \n
84+
search.search_idle_reactivate_count_total .+ \n
85+
segments.count .+ \n
86+
segments.memory .+ \n
87+
segments.index_writer_memory .+ \n
88+
segments.version_map_memory .+ \n
89+
segments.fixed_bitset_memory .+ \n
90+
seq_no.max .+ \n
91+
seq_no.local_checkpoint .+ \n
92+
seq_no.global_checkpoint .+ \n
93+
warmer.current .+ \n
94+
warmer.total .+ \n
95+
warmer.total_time .+ \n
96+
path.data .+ \n
97+
path.state .+ \n
98+
docs.deleted .+ \n
99+
$/
100+
---
101+
"Help from 3.2.0 to 3.2.99":
102+
- skip:
103+
version: " - 3.1.99, 3.3.0 - "
4104
reason: star-tree search stats is only added in 3.2.0
5105
features: node_selector
6106
- do:
7107
cat.shards:
8108
help: true
9109
node_selector:
10-
version: "3.2.0 - "
110+
version: "3.2.0 - 3.2.99"
11111

12112
- match:
13113
$body: |

server/src/main/java/org/opensearch/index/search/stats/SearchStats.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ public static class Stats implements Writeable, ToXContentFragment {
142142
private long queryCount;
143143
private long queryTimeInMillis;
144144
private long queryCurrent;
145+
private long queryFailedCount;
145146

146147
private long concurrentQueryCount;
147148
private long concurrentQueryTimeInMillis;
@@ -169,6 +170,7 @@ public static class Stats implements Writeable, ToXContentFragment {
169170
private long starTreeQueryCount;
170171
private long starTreeQueryTimeInMillis;
171172
private long starTreeQueryCurrent;
173+
private long starTreeQueryFailed;
172174

173175
@Nullable
174176
private RequestStatsLongHolder requestStatsLongHolder;
@@ -191,6 +193,7 @@ private Stats(Builder builder) {
191193
this.queryCount = builder.queryCount;
192194
this.queryTimeInMillis = builder.queryTimeInMillis;
193195
this.queryCurrent = builder.queryCurrent;
196+
this.queryFailedCount = builder.queryFailedCount;
194197

195198
this.concurrentQueryCount = builder.concurrentQueryCount;
196199
this.concurrentQueryTimeInMillis = builder.concurrentQueryTimeInMillis;
@@ -218,6 +221,7 @@ private Stats(Builder builder) {
218221
this.starTreeQueryCount = builder.starTreeQueryCount;
219222
this.starTreeQueryTimeInMillis = builder.starTreeQueryTimeInMillis;
220223
this.starTreeQueryCurrent = builder.starTreeQueryCurrent;
224+
this.starTreeQueryFailed = builder.starTreeQueryFailed;
221225
}
222226

223227
/**
@@ -319,12 +323,18 @@ private Stats(StreamInput in) throws IOException {
319323
starTreeQueryTimeInMillis = in.readVLong();
320324
starTreeQueryCurrent = in.readVLong();
321325
}
326+
327+
if (in.getVersion().onOrAfter(Version.V_3_3_0)) {
328+
queryFailedCount = in.readVLong();
329+
starTreeQueryFailed = in.readVLong();
330+
}
322331
}
323332

324333
public void add(Stats stats) {
325334
queryCount += stats.queryCount;
326335
queryTimeInMillis += stats.queryTimeInMillis;
327336
queryCurrent += stats.queryCurrent;
337+
queryFailedCount += stats.queryFailedCount;
328338

329339
concurrentQueryCount += stats.concurrentQueryCount;
330340
concurrentQueryTimeInMillis += stats.concurrentQueryTimeInMillis;
@@ -352,11 +362,13 @@ public void add(Stats stats) {
352362
starTreeQueryCount += stats.starTreeQueryCount;
353363
starTreeQueryTimeInMillis += stats.starTreeQueryTimeInMillis;
354364
starTreeQueryCurrent += stats.starTreeQueryCurrent;
365+
starTreeQueryFailed += stats.starTreeQueryFailed;
355366
}
356367

357368
public void addForClosingShard(Stats stats) {
358369
queryCount += stats.queryCount;
359370
queryTimeInMillis += stats.queryTimeInMillis;
371+
queryFailedCount += stats.queryFailedCount;
360372

361373
concurrentQueryCount += stats.concurrentQueryCount;
362374
concurrentQueryTimeInMillis += stats.concurrentQueryTimeInMillis;
@@ -381,6 +393,7 @@ public void addForClosingShard(Stats stats) {
381393

382394
starTreeQueryCount += stats.starTreeQueryCount;
383395
starTreeQueryTimeInMillis += stats.starTreeQueryTimeInMillis;
396+
starTreeQueryFailed += stats.starTreeQueryFailed;
384397
}
385398

386399
public long getQueryCount() {
@@ -399,6 +412,10 @@ public long getQueryCurrent() {
399412
return queryCurrent;
400413
}
401414

415+
public long getQueryFailedCount() {
416+
return queryFailedCount;
417+
}
418+
402419
public long getConcurrentQueryCount() {
403420
return concurrentQueryCount;
404421
}
@@ -507,6 +524,10 @@ public long getStarTreeQueryCurrent() {
507524
return starTreeQueryCurrent;
508525
}
509526

527+
public long getStarTreeQueryFailed() {
528+
return starTreeQueryFailed;
529+
}
530+
510531
public static Stats readStats(StreamInput in) throws IOException {
511532
return new Stats(in);
512533
}
@@ -562,13 +583,19 @@ public void writeTo(StreamOutput out) throws IOException {
562583
out.writeVLong(starTreeQueryTimeInMillis);
563584
out.writeVLong(starTreeQueryCurrent);
564585
}
586+
587+
if (out.getVersion().onOrAfter(Version.V_3_3_0)) {
588+
out.writeVLong(queryFailedCount);
589+
out.writeVLong(starTreeQueryFailed);
590+
}
565591
}
566592

567593
@Override
568594
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
569595
builder.field(Fields.QUERY_TOTAL, queryCount);
570596
builder.humanReadableField(Fields.QUERY_TIME_IN_MILLIS, Fields.QUERY_TIME, getQueryTime());
571597
builder.field(Fields.QUERY_CURRENT, queryCurrent);
598+
builder.field(Fields.QUERY_FAILED_TOTAL, queryFailedCount);
572599

573600
builder.field(Fields.CONCURRENT_QUERY_TOTAL, concurrentQueryCount);
574601
builder.humanReadableField(Fields.CONCURRENT_QUERY_TIME_IN_MILLIS, Fields.CONCURRENT_QUERY_TIME, getConcurrentQueryTime());
@@ -578,6 +605,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
578605
builder.field(Fields.STARTREE_QUERY_TOTAL, starTreeQueryCount);
579606
builder.humanReadableField(Fields.STARTREE_QUERY_TIME_IN_MILLIS, Fields.STARTREE_QUERY_TIME, getStarTreeQueryTime());
580607
builder.field(Fields.STARTREE_QUERY_CURRENT, getStarTreeQueryCurrent());
608+
builder.field(Fields.STARTREE_QUERY_FAILED, getStarTreeQueryFailed());
581609

582610
builder.field(Fields.FETCH_TOTAL, fetchCount);
583611
builder.humanReadableField(Fields.FETCH_TIME_IN_MILLIS, Fields.FETCH_TIME, getFetchTime());
@@ -633,6 +661,7 @@ public static class Builder {
633661
private long queryCount = 0;
634662
private long queryTimeInMillis = 0;
635663
private long queryCurrent = 0;
664+
private long queryFailedCount = 0;
636665
private long concurrentQueryCount = 0;
637666
private long concurrentQueryTimeInMillis = 0;
638667
private long concurrentQueryCurrent = 0;
@@ -653,6 +682,7 @@ public static class Builder {
653682
private long starTreeQueryCount = 0;
654683
private long starTreeQueryTimeInMillis = 0;
655684
private long starTreeQueryCurrent = 0;
685+
private long starTreeQueryFailed = 0;
656686
@Nullable
657687
private RequestStatsLongHolder requestStatsLongHolder = null;
658688

@@ -673,6 +703,11 @@ public Builder queryCurrent(long current) {
673703
return this;
674704
}
675705

706+
public Builder queryFailed(long count) {
707+
this.queryFailedCount = count;
708+
return this;
709+
}
710+
676711
public Builder concurrentQueryCount(long count) {
677712
this.concurrentQueryCount = count;
678713
return this;
@@ -773,6 +808,11 @@ public Builder starTreeQueryCurrent(long current) {
773808
return this;
774809
}
775810

811+
public Builder starTreeQueryFailed(long count) {
812+
this.starTreeQueryFailed = count;
813+
return this;
814+
}
815+
776816
/**
777817
* Creates a {@link Stats} object from the builder's current state.
778818
* @return A new Stats instance.
@@ -916,6 +956,7 @@ static final class Fields {
916956
static final String QUERY_TIME = "query_time";
917957
static final String QUERY_TIME_IN_MILLIS = "query_time_in_millis";
918958
static final String QUERY_CURRENT = "query_current";
959+
static final String QUERY_FAILED_TOTAL = "query_failed";
919960
static final String CONCURRENT_QUERY_TOTAL = "concurrent_query_total";
920961
static final String CONCURRENT_QUERY_TIME = "concurrent_query_time";
921962
static final String CONCURRENT_QUERY_TIME_IN_MILLIS = "concurrent_query_time_in_millis";
@@ -925,6 +966,7 @@ static final class Fields {
925966
static final String STARTREE_QUERY_TIME = "startree_query_time";
926967
static final String STARTREE_QUERY_TIME_IN_MILLIS = "startree_query_time_in_millis";
927968
static final String STARTREE_QUERY_CURRENT = "startree_query_current";
969+
static final String STARTREE_QUERY_FAILED = "startree_query_failed";
928970
static final String FETCH_TOTAL = "fetch_total";
929971
static final String FETCH_TIME = "fetch_time";
930972
static final String FETCH_TIME_IN_MILLIS = "fetch_time_in_millis";

server/src/main/java/org/opensearch/index/search/stats/ShardSearchStats.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ public void onPreQueryPhase(SearchContext searchContext) {
104104
@Override
105105
public void onFailedQueryPhase(SearchContext searchContext) {
106106
computeStats(searchContext, statsHolder -> {
107+
statsHolder.queryFailed.inc();
107108
if (searchContext.hasOnlySuggest()) {
108109
statsHolder.suggestCurrent.dec();
109110
assert statsHolder.suggestCurrent.count() >= 0;
@@ -115,6 +116,7 @@ public void onFailedQueryPhase(SearchContext searchContext) {
115116
assert statsHolder.concurrentQueryCurrent.count() >= 0;
116117
}
117118
if (searchContext.getQueryShardContext().getStarTreeQueryContext() != null) {
119+
statsHolder.starTreeQueryFailed.inc();
118120
statsHolder.starTreeCurrent.dec();
119121
assert statsHolder.starTreeCurrent.count() >= 0;
120122
}
@@ -237,6 +239,7 @@ public void onSearchIdleReactivation() {
237239
*/
238240
static final class StatsHolder {
239241
final MeanMetric queryMetric = new MeanMetric();
242+
final CounterMetric queryFailed = new CounterMetric();
240243
final MeanMetric concurrentQueryMetric = new MeanMetric();
241244
final CounterMetric queryConcurrencyMetric = new CounterMetric();
242245
final MeanMetric fetchMetric = new MeanMetric();
@@ -259,11 +262,13 @@ static final class StatsHolder {
259262
final CounterMetric searchIdleMetric = new CounterMetric();
260263
final MeanMetric starTreeQueryMetric = new MeanMetric();
261264
final CounterMetric starTreeCurrent = new CounterMetric();
265+
final CounterMetric starTreeQueryFailed = new CounterMetric();
262266

263267
SearchStats.Stats stats() {
264268
return new SearchStats.Stats.Builder().queryCount(queryMetric.count())
265269
.queryTimeInMillis(TimeUnit.NANOSECONDS.toMillis(queryMetric.sum()))
266270
.queryCurrent(queryCurrent.count())
271+
.queryFailed(queryFailed.count())
267272
.concurrentQueryCount(concurrentQueryMetric.count())
268273
.concurrentQueryTimeInMillis(TimeUnit.NANOSECONDS.toMillis(concurrentQueryMetric.sum()))
269274
.concurrentQueryCurrent(concurrentQueryCurrent.count())
@@ -284,6 +289,7 @@ SearchStats.Stats stats() {
284289
.starTreeQueryCount(starTreeQueryMetric.count())
285290
.starTreeQueryTimeInMillis(TimeUnit.NANOSECONDS.toMillis(starTreeQueryMetric.sum()))
286291
.starTreeQueryCurrent(starTreeCurrent.count())
292+
.starTreeQueryFailed(starTreeQueryFailed.count())
287293
.build();
288294
}
289295
}

0 commit comments

Comments
 (0)