Skip to content

Commit

Permalink
[Monitoring] Using primary average shard size (#96177)
Browse files Browse the repository at this point in the history
* Using shard size avg instead of primary total

* Added ui text

* Changed to primary average instead of total

* Addressed cr feedback

* Added zero check

* Fixed threshold checking

* Changed description

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
igoristic and kibanamachine authored Apr 13, 2021
1 parent 448562f commit 355c949
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 16 deletions.
4 changes: 2 additions & 2 deletions docs/user/monitoring/kibana-alerts.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ by running checks on a schedule time of 1 minute with a re-notify interval of 6
[[kibana-alerts-large-shard-size]]
== Large shard size

This alert is triggered if a large (primary) shard size is found on any of the
specified index patterns. The trigger condition is met if an index's shard size is
This alert is triggered if a large average shard size (across associated primaries) is found on any of the
specified index patterns. The trigger condition is met if an index's average shard size is
55gb or higher in the last 5 minutes. The alert is grouped across all indices that match
the default pattern of `*` by running checks on a schedule time of 1 minute with a re-notify
interval of 12 hours.
Expand Down
4 changes: 2 additions & 2 deletions x-pack/plugins/monitoring/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ export const ALERT_DETAILS = {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.shardSize.paramDetails.threshold.label', {
defaultMessage: `Notify when a shard exceeds this size`,
defaultMessage: `Notify when average shard size exceeds this value`,
}),
type: AlertParamType.Number,
append: 'GB',
Expand All @@ -477,7 +477,7 @@ export const ALERT_DETAILS = {
defaultMessage: 'Shard size',
}),
description: i18n.translate('xpack.monitoring.alerts.shardSize.description', {
defaultMessage: 'Alert if an index (primary) shard is oversize.',
defaultMessage: 'Alert if the average shard size is larger than the configured threshold.',
}),
},
};
Expand Down
3 changes: 3 additions & 0 deletions x-pack/plugins/monitoring/common/types/es.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ export interface ElasticsearchNodeStats {

export interface ElasticsearchIndexStats {
index?: string;
shards: {
primaries: number;
};
primaries?: {
docs?: {
count?: number;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ export class LargeShardSizeAlert extends BaseAlert {
description: i18n.translate(
'xpack.monitoring.alerts.shardSize.actionVariables.shardIndex',
{
defaultMessage: 'List of indices which are experiencing large shard size.',
defaultMessage: 'List of indices which are experiencing large average shard size.',
}
),
},
Expand Down Expand Up @@ -100,7 +100,7 @@ export class LargeShardSizeAlert extends BaseAlert {
const { shardIndex, shardSize } = item.meta as IndexShardSizeUIMeta;
return {
text: i18n.translate('xpack.monitoring.alerts.shardSize.ui.firingMessage', {
defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large shard size of: {shardSize}GB at #absolute`,
defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large average shard size of: {shardSize}GB at #absolute`,
values: {
shardIndex,
shardSize,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,6 @@ export async function fetchIndexShardSize(
},
aggs: {
over_threshold: {
filter: {
range: {
'index_stats.primaries.store.size_in_bytes': {
gt: threshold * gbMultiplier,
},
},
},
aggs: {
index: {
terms: {
Expand All @@ -96,6 +89,7 @@ export async function fetchIndexShardSize(
_source: {
includes: [
'_index',
'index_stats.shards.primaries',
'index_stats.primaries.store.size_in_bytes',
'source_node.name',
'source_node.uuid',
Expand Down Expand Up @@ -123,7 +117,7 @@ export async function fetchIndexShardSize(
if (!clusterBuckets.length) {
return stats;
}

const thresholdBytes = threshold * gbMultiplier;
for (const clusterBucket of clusterBuckets) {
const indexBuckets = clusterBucket.over_threshold.index.buckets;
const clusterUuid = clusterBucket.key;
Expand All @@ -143,9 +137,25 @@ export async function fetchIndexShardSize(
_source: { source_node: sourceNode, index_stats: indexStats },
} = topHit;

const { size_in_bytes: shardSizeBytes } = indexStats?.primaries?.store!;
if (!indexStats || !indexStats.primaries) {
continue;
}

const { primaries: totalPrimaryShards } = indexStats.shards;
const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store || {};
if (!primaryShardSizeBytes || !totalPrimaryShards) {
continue;
}
/**
* We can only calculate the average primary shard size at this point, since we don't have
* data (in .monitoring-es* indices) to give us individual shards. This might change in the future
*/
const { name: nodeName, uuid: nodeId } = sourceNode;
const shardSize = +(shardSizeBytes! / gbMultiplier).toFixed(2);
const avgShardSize = primaryShardSizeBytes / totalPrimaryShards;
if (avgShardSize < thresholdBytes) {
continue;
}
const shardSize = +(avgShardSize / gbMultiplier).toFixed(2);
stats.push({
shardIndex,
shardSize,
Expand Down

0 comments on commit 355c949

Please sign in to comment.