Skip to content

Commit

Permalink
Merge branch 'quidem-record' into quidem-msq
Browse files Browse the repository at this point in the history
  • Loading branch information
kgyrtkirk committed Aug 5, 2024
2 parents 929e68c + fda0d63 commit 841ab46
Show file tree
Hide file tree
Showing 16 changed files with 507 additions and 44 deletions.
4 changes: 4 additions & 0 deletions docs/operations/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ Most metric values reset each emission period, as specified in `druid.monitoring
|`metadatacache/schemaPoll/time`|Time taken for coordinator polls to fetch datasource schema.|||
|`serverview/sync/healthy`|Sync status of the Broker with a segment-loading server such as a Historical or Peon. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled. This metric can be used in conjunction with `serverview/sync/unstableTime` to debug slow startup of Brokers.|`server`, `tier`|1 for fully synced servers, 0 otherwise|
|`serverview/sync/unstableTime`|Time in milliseconds for which the Broker has been failing to sync with a segment-loading server. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled.|`server`, `tier`|Not emitted for synced servers.|
|`subquery/rows`|Number of rows materialized by the subquery's results. |`id`, `subqueryId`| Varies |
|`subquery/bytes`|Number of bytes materialized by the subquery's results. This metric is only emitted if the query uses [byte-based subquery guardrails](https://druid.apache.org/docs/latest/configuration/#guardrails-for-materialization-of-subqueries) |`id`, `subqueryId` | Varies |
|`subquery/rowLimit/count`|Number of subqueries whose results are materialized as rows (Java objects on heap).|This metric is only available if the `SubqueryCountStatsMonitor` module is included.| |
|`subquery/byteLimit/count`|Number of subqueries whose results are materialized as frames (Druid's internal byte representation of rows).|This metric is only available if the `SubqueryCountStatsMonitor` module is included.| |
|`subquery/fallback/count`|Number of subqueries which cannot be materialized as frames|This metric is only available if the `SubqueryCountStatsMonitor` module is included.| |
Expand Down Expand Up @@ -297,6 +299,8 @@ If the JVM does not support CPU time measurement for the current thread, `ingest
|`worker/taskSlot/used/count`|Number of busy task slots on the reporting worker per emission period. This metric is only available if the `WorkerTaskCountStatsMonitor` module is included.| `category`, `workerVersion`|Varies|
|`worker/task/assigned/count`|Number of tasks assigned to an indexer per emission period. This metric is only available if the `WorkerTaskCountStatsMonitor` module is included.|`dataSource`|Varies|
|`worker/task/completed/count`|Number of tasks completed by an indexer per emission period. This metric is only available if the `WorkerTaskCountStatsMonitor` module is included.|`dataSource`|Varies|
|`worker/task/failed/count`|Number of tasks that failed on an indexer during the emission period. This metric is only available if the `WorkerTaskCountStatsMonitor` module is included.|`dataSource`|Varies|
|`worker/task/success/count`|Number of tasks that succeeded on an indexer during the emission period. This metric is only available if the `WorkerTaskCountStatsMonitor` module is included.|`dataSource`|Varies|
|`worker/task/running/count`|Number of tasks running on an indexer per emission period. This metric is only available if the `WorkerTaskCountStatsMonitor` module is included.|`dataSource`|Varies|

## Shuffle metrics (Native parallel task)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@
"worker/task/assigned/count" : { "dimensions" : ["dataSource"], "type" : "count" },
"worker/task/running/count" : { "dimensions" : ["dataSource"], "type" : "count" },
"worker/task/completed/count" : { "dimensions" : ["dataSource"], "type" : "count" },
"worker/task/failed/count" : { "dimensions" : ["category", "workerVersion"], "type" : "count" },
"worker/task/success/count" : { "dimensions" : ["category", "workerVersion"], "type" : "count" },
"worker/task/failed/count" : { "dimensions" : ["category", "workerVersion", "dataSource"], "type" : "count" },
"worker/task/success/count" : { "dimensions" : ["category", "workerVersion", "dataSource"], "type" : "count" },
"worker/taskSlot/idle/count" : { "dimensions" : ["category", "workerVersion"], "type" : "gauge" },
"worker/taskSlot/total/count" : { "dimensions" : ["category", "workerVersion"], "type" : "gauge" },
"worker/taskSlot/used/count" : { "dimensions" : ["category", "workerVersion"], "type" : "gauge" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;


Expand Down Expand Up @@ -588,27 +587,27 @@ private Optional<SqlStatementResult> getStatementStatus(
MSQControllerTask msqControllerTask = getMSQControllerTaskAndCheckPermission(queryId, authenticationResult, forAction);
SqlStatementState sqlStatementState = SqlStatementResourceHelper.getSqlStatementState(statusPlus);

Supplier<Optional<MSQTaskReportPayload>> msqTaskReportPayloadSupplier = () -> {
MSQTaskReportPayload taskReportPayload = null;
if (detail || SqlStatementState.FAILED == sqlStatementState) {
try {
return Optional.ofNullable(SqlStatementResourceHelper.getPayload(
taskReportPayload = SqlStatementResourceHelper.getPayload(
contactOverlord(overlordClient.taskReportAsMap(queryId), queryId)
));
);
}
catch (DruidException e) {
if (e.getErrorCode().equals("notFound") || e.getMessage().contains("Unable to contact overlord")) {
return Optional.empty();
if (!e.getErrorCode().equals("notFound") && !e.getMessage().contains("Unable to contact overlord")) {
throw e;
}
throw e;
}
};
}

if (SqlStatementState.FAILED == sqlStatementState) {
return SqlStatementResourceHelper.getExceptionPayload(
queryId,
taskResponse,
statusPlus,
sqlStatementState,
msqTaskReportPayloadSupplier.get().orElse(null),
taskReportPayload,
jsonMapper,
detail
);
Expand All @@ -627,9 +626,9 @@ private Optional<SqlStatementResult> getStatementStatus(
msqControllerTask.getQuerySpec().getDestination()
).orElse(null) : null,
null,
detail ? SqlStatementResourceHelper.getQueryStagesReport(msqTaskReportPayloadSupplier.get().orElse(null)) : null,
detail ? SqlStatementResourceHelper.getQueryCounters(msqTaskReportPayloadSupplier.get().orElse(null)) : null,
detail ? SqlStatementResourceHelper.getQueryWarningDetails(msqTaskReportPayloadSupplier.get().orElse(null)) : null
SqlStatementResourceHelper.getQueryStagesReport(taskReportPayload),
SqlStatementResourceHelper.getQueryCounters(taskReportPayload),
SqlStatementResourceHelper.getQueryWarningDetails(taskReportPayload)
));
}
}
Expand Down
Loading

0 comments on commit 841ab46

Please sign in to comment.