Skip to content

Commit

Permalink
NETOBSERV-1649: Improve UX and cases managed with prometheus (#549)
Browse files Browse the repository at this point in the history
* NETOBSERV-1649: Improve UX and cases managed with prometheus

- Automatically switch to PktDropBytes/Packets metrics when there's a
  drop cause/state filter
- Make topology queries less strict on fetching drops for decoration: an error will not break the whole thing anymore
  - Display warning when that happens
- small refactoring related to warning message and details

* Fix error reporting on missing labels

* fix lint
  • Loading branch information
jotak authored Jul 1, 2024
1 parent 1a710ec commit 3d6a89a
Show file tree
Hide file tree
Showing 14 changed files with 171 additions and 138 deletions.
100 changes: 51 additions & 49 deletions pkg/model/fields/fields.go
Original file line number Diff line number Diff line change
@@ -1,55 +1,57 @@
package fields

const (
Src = "Src"
Dst = "Dst"
Namespace = "K8S_Namespace"
SrcNamespace = Src + Namespace
DstNamespace = Dst + Namespace
OwnerType = "K8S_OwnerType"
SrcOwnerType = Src + OwnerType
DstOwnerType = Dst + OwnerType
OwnerName = "K8S_OwnerName"
SrcOwnerName = Src + OwnerName
DstOwnerName = Dst + OwnerName
Type = "K8S_Type"
SrcType = Src + Type
DstType = Dst + Type
Name = "K8S_Name"
SrcName = Src + Name
DstName = Dst + Name
Addr = "Addr"
SrcAddr = Src + Addr
DstAddr = Dst + Addr
Port = "Port"
SrcPort = Src + Port
DstPort = Dst + Port
HostIP = "K8S_HostIP"
SrcHostIP = Src + HostIP
DstHostIP = Dst + HostIP
HostName = "K8S_HostName"
SrcHostName = Src + HostName
DstHostName = Dst + HostName
Zone = "K8S_Zone"
SrcZone = Src + Zone
DstZone = Dst + Zone
Cluster = "K8S_ClusterName"
Layer = "K8S_FlowLayer"
Packets = "Packets"
PktDropPackets = "PktDropPackets"
Proto = "Proto"
Bytes = "Bytes"
DSCP = "Dscp"
PktDropBytes = "PktDropBytes"
FlowDirection = "FlowDirection"
Interfaces = "Interfaces"
IfDirections = "IfDirections"
DNSID = "DnsId"
DNSLatency = "DnsLatencyMs"
DNSErrNo = "DnsErrno"
DNSCode = "DnsFlagsResponseCode"
Duplicate = "Duplicate"
TimeFlowRTT = "TimeFlowRttNs"
Src = "Src"
Dst = "Dst"
Namespace = "K8S_Namespace"
SrcNamespace = Src + Namespace
DstNamespace = Dst + Namespace
OwnerType = "K8S_OwnerType"
SrcOwnerType = Src + OwnerType
DstOwnerType = Dst + OwnerType
OwnerName = "K8S_OwnerName"
SrcOwnerName = Src + OwnerName
DstOwnerName = Dst + OwnerName
Type = "K8S_Type"
SrcType = Src + Type
DstType = Dst + Type
Name = "K8S_Name"
SrcName = Src + Name
DstName = Dst + Name
Addr = "Addr"
SrcAddr = Src + Addr
DstAddr = Dst + Addr
Port = "Port"
SrcPort = Src + Port
DstPort = Dst + Port
HostIP = "K8S_HostIP"
SrcHostIP = Src + HostIP
DstHostIP = Dst + HostIP
HostName = "K8S_HostName"
SrcHostName = Src + HostName
DstHostName = Dst + HostName
Zone = "K8S_Zone"
SrcZone = Src + Zone
DstZone = Dst + Zone
Cluster = "K8S_ClusterName"
Layer = "K8S_FlowLayer"
Packets = "Packets"
Proto = "Proto"
Bytes = "Bytes"
DSCP = "Dscp"
PktDropPackets = "PktDropPackets"
PktDropBytes = "PktDropBytes"
PktDropLatestState = "PktDropLatestState"
PktDropLatestDropCause = "PktDropLatestDropCause"
FlowDirection = "FlowDirection"
Interfaces = "Interfaces"
IfDirections = "IfDirections"
DNSID = "DnsId"
DNSLatency = "DnsLatencyMs"
DNSErrNo = "DnsErrno"
DNSCode = "DnsFlagsResponseCode"
Duplicate = "Duplicate"
TimeFlowRTT = "TimeFlowRttNs"
)

func IsNumeric(v string) bool {
Expand Down
14 changes: 12 additions & 2 deletions pkg/prometheus/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ func (i *Inventory) Search(neededLabels []string, valueField string) SearchResul

func (i *Inventory) searchWithDir(neededLabels []string, valueField string, dir config.FlowDirection) SearchResult {
sr := SearchResult{}
// Special case, when the query has a filter to PktDropState/Cause and value field is bytes/packets,
// we must consider value field is actually PktDropBytes/Packets
if slices.Contains(neededLabels, fields.PktDropLatestDropCause) || slices.Contains(neededLabels, fields.PktDropLatestState) {
switch valueField {
case fields.Bytes:
valueField = fields.PktDropBytes
case fields.Packets:
valueField = fields.PktDropPackets
}
}
for _, m := range i.metrics {
match, missingLabels := checkMatch(&m, neededLabels, valueField, dir)
if match {
Expand All @@ -84,12 +94,12 @@ func (i *Inventory) searchWithDir(neededLabels []string, valueField string, dir
return sr
}
sr.Candidates = append(sr.Candidates, m.Name)
} else if m.Enabled && (sr.MissingLabels == nil || len(missingLabels) < len(sr.MissingLabels)) {
} else if m.Enabled && len(missingLabels) > 0 && (sr.MissingLabels == nil || len(missingLabels) < len(sr.MissingLabels)) {
// Keep smaller possible set of missing labels
sr.MissingLabels = missingLabels
}
}
log.Debugf("No metric match for %v / %s (/ %s)", neededLabels, valueField, dir)
log.Debugf("No metric match for %v / %s / %s", neededLabels, valueField, dir)
return sr
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/prometheus/inventory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,9 @@ func TestInventory_Search_RTT_Candidate(t *testing.T) {
search := inv.Search([]string{"SrcK8S_Namespace", "DstK8S_Namespace", "K8S_FlowLayer", "DstK8S_Type", "SrcK8S_Type"}, "TimeFlowRttNs")
assert.Equal(t, []string{"netobserv_workload_rtt_seconds"}, search.Candidates)
}

func TestInventory_Search_MissingLabels(t *testing.T) {
inv := NewInventory(&config.Prometheus{Metrics: configuredMetrics})
search := inv.Search([]string{"SrcK8S_Namespace", "DstK8S_Namespace", "SrcK8S_HostName"}, "Bytes")
assert.Equal(t, []string{"SrcK8S_HostName"}, search.MissingLabels)
}
5 changes: 3 additions & 2 deletions web/locales/en/plugin__netobserv-plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@
"Configuration limits": "Configuration limits",
"Metrics": "Metrics",
"You may consider the following changes to avoid this error:": "You may consider the following changes to avoid this error:",
"Add missing metrics to prometheus using FlowMetric API": "Add missing metrics to prometheus using FlowMetric API",
"Enable Loki in FlowCollector API": "Enable Loki in FlowCollector API",
"Add missing metrics to prometheus in the FlowCollector API (processor.metrics.includeList)": "Add missing metrics to prometheus in the FlowCollector API (processor.metrics.includeList)",
"Enable Loki in the FlowCollector API (loki.enable)": "Enable Loki in the FlowCollector API (loki.enable)",
"Reduce the Query Options -> limit to reduce the number of results": "Reduce the Query Options -> limit to reduce the number of results",
"Increase Loki \"max_entries_limit_per_query\" entry in configuration file": "Increase Loki \"max_entries_limit_per_query\" entry in configuration file",
"Add Namespace, Owner or Resource filters (which use indexed fields) to improve the query performance": "Add Namespace, Owner or Resource filters (which use indexed fields) to improve the query performance",
Expand Down Expand Up @@ -318,6 +318,7 @@
"When in \"Match any\" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance": "When in \"Match any\" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance",
"Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance": "Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance",
"Add more filters or decrease limit / range to improve the query performance": "Add more filters or decrease limit / range to improve the query performance",
"Could not fetch drop information": "Could not fetch drop information",
"Overview": "Overview",
"Traffic flows": "Traffic flows",
"Topology": "Topology",
Expand Down
6 changes: 4 additions & 2 deletions web/src/components/messages/error.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,11 @@ export const Error: React.FC<ErrorProps> = ({ title, error, isLokiRelated }) =>
{error.includes('promUnsupported') && (
<>
<Text component={TextVariants.blockquote}>
{t('Add missing metrics to prometheus using FlowMetric API')}
{t('Add missing metrics to prometheus in the FlowCollector API (processor.metrics.includeList)')}
</Text>
<Text component={TextVariants.blockquote}>
{t('Enable Loki in the FlowCollector API (loki.enable)')}
</Text>
<Text component={TextVariants.blockquote}>{t('Enable Loki in FlowCollector API')}</Text>
</>
)}
{error.includes('max entries limit') && (
Expand Down
93 changes: 56 additions & 37 deletions web/src/components/netflow-traffic.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,14 @@ import {
TopologyGroupTypes,
TopologyOptions
} from '../model/topology';
import { Warning } from '../model/warnings';
import { getFetchFunctions as getBackAndForthFetch } from '../utils/back-and-forth';
import { Column, ColumnsId, ColumnSizeMap, getDefaultColumns } from '../utils/columns';
import { loadConfig } from '../utils/config';
import { ContextSingleton } from '../utils/context';
import { computeStepInterval, getTimeRangeOptions, TimeRange } from '../utils/datetime';
import { formatDuration, getDateMsInSeconds, getDateSInMiliseconds, parseDuration } from '../utils/duration';
import { getHTTPErrorDetails, isPromUnsupportedError } from '../utils/errors';
import { getHTTPErrorDetails, getPromUnsupportedError, isPromUnsupportedError } from '../utils/errors';
import { exportToPng } from '../utils/export';
import { checkFilterAvailable, getFilterDefinitions } from '../utils/filter-definitions';
import { mergeFlowReporters } from '../utils/flows';
Expand Down Expand Up @@ -199,7 +200,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
}

const [config, setConfig] = React.useState<Config>(defaultConfig);
const [warningMessage, setWarningMessage] = React.useState<string | undefined>();
const [warning, setWarning] = React.useState<Warning | undefined>();
const [showViewOptions, setShowViewOptions] = useLocalStorage<boolean>(localStorageShowOptionsKey, false);
const [showHistogram, setShowHistogram] = useLocalStorage<boolean>(localStorageShowHistogramKey, false);
const [isViewOptionOverflowMenuOpen, setViewOptionOverflowMenuOpen] = React.useState(false);
Expand Down Expand Up @@ -472,7 +473,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
setFilters(f);
setFlows([]);
setMetrics(defaultNetflowMetrics);
setWarningMessage(undefined);
setWarning(undefined);
},
[setFilters, setFlows]
);
Expand Down Expand Up @@ -611,12 +612,12 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
(query: Promise<unknown>) => {
setLastRefresh(undefined);
setLastDuration(undefined);
setWarningMessage(undefined);
setWarning(undefined);
Promise.race([query, new Promise((resolve, reject) => setTimeout(reject, 4000, 'slow'))]).then(
null,
(reason: string) => {
if (reason === 'slow') {
setWarningMessage(`${t('Query is slow')}`);
setWarning({ type: 'slow', summary: `${t('Query is slow')}` });
}
}
);
Expand All @@ -626,22 +627,30 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
[]
);

const slownessReason = React.useCallback((): string => {
if (match === 'any' && hasNonIndexFields(filters.list)) {
return t(
// eslint-disable-next-line max-len
'When in "Match any" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
);
}
if (match === 'all' && !hasIndexFields(filters.list)) {
return t(
// eslint-disable-next-line max-len
'Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
);
}
return t('Add more filters or decrease limit / range to improve the query performance');
const checkSlownessReason = React.useCallback(
(w: Warning | undefined): Warning | undefined => {
if (w?.type == 'slow') {
let reason = '';
if (match === 'any' && hasNonIndexFields(filters.list)) {
reason = t(
// eslint-disable-next-line max-len
'When in "Match any" mode, try using only Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
);
} else if (match === 'all' && !hasIndexFields(filters.list)) {
reason = t(
// eslint-disable-next-line max-len
'Add Namespace, Owner or Resource filters (which use indexed fields), or decrease limit / range, to improve the query performance'
);
} else {
reason = t('Add more filters or decrease limit / range to improve the query performance');
}
return { ...w, details: reason };
}
return w;
},
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [match, filters]);
[match, filters]
);

const fetchTable = React.useCallback(
(fq: FlowQuery) => {
Expand Down Expand Up @@ -1005,21 +1014,34 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i

if (droppedType) {
promises.push(
getMetrics({ ...fq, type: droppedType }, range).then(res => {
const droppedRateMetrics = {} as RateMetrics;
droppedRateMetrics[getRateMetricKey(topologyMetricType)] = res.metrics;
currentMetrics = { ...currentMetrics, droppedRateMetrics };
setMetrics(currentMetrics);
return res.stats;
})
getMetrics({ ...fq, type: droppedType }, range)
.then(res => {
const droppedRateMetrics = {} as RateMetrics;
droppedRateMetrics[getRateMetricKey(topologyMetricType)] = res.metrics;
currentMetrics = { ...currentMetrics, droppedRateMetrics };
setMetrics(currentMetrics);
return res.stats;
})
.catch(err => {
// Error might occur for instance when fetching node-based topology with drop feature enabled, and Loki disabled
// We don't want to break the whole topology due to missing drops enrichement
let strErr = getHTTPErrorDetails(err, true);
if (isPromUnsupportedError(strErr)) {
strErr = getPromUnsupportedError(strErr);
}
setWarning({ type: 'cantfetchdrops', summary: t('Could not fetch drop information'), details: strErr });
return { numQueries: 0, dataSources: [], limitReached: false };
})
);
} else if (!['PktDropBytes', 'PktDropPackets'].includes(topologyMetricType)) {
currentMetrics = { ...currentMetrics, droppedRateMetrics: undefined };
setMetrics(currentMetrics);
}
return Promise.all(promises);
},
[config.features, getFetchFunctions, topologyMetricType, topologyMetricFunction, range]
// "t" dependency kills jest
// eslint-disable-next-line react-hooks/exhaustive-deps
[config.features, getFetchFunctions, topologyMetricType, topologyMetricFunction, range, setWarning]
);

const tick = React.useCallback(() => {
Expand Down Expand Up @@ -1076,7 +1098,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
setFlows([]);
setMetrics(defaultNetflowMetrics);
setError(getHTTPErrorDetails(err, true));
setWarningMessage(undefined);
setWarning(undefined);
})
.finally(() => {
const endDate = new Date();
Expand Down Expand Up @@ -1536,8 +1558,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
limit={limit}
lastRefresh={lastRefresh}
lastDuration={lastDuration}
warningMessage={warningMessage}
slownessReason={slownessReason()}
warning={checkSlownessReason(warning)}
range={range}
showDNSLatency={isDNSTracking()}
showRTTLatency={isFlowRTT()}
Expand Down Expand Up @@ -1685,8 +1706,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
loading={loading}
lastRefresh={lastRefresh}
lastDuration={lastDuration}
warningMessage={warningMessage}
slownessReason={slownessReason()}
warning={checkSlownessReason(warning)}
isShowQuerySummary={isShowQuerySummary}
toggleQuerySummary={() => onToggleQuerySummary(!isShowQuerySummary)}
isDark={isDarkTheme}
Expand All @@ -1698,8 +1718,7 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
loading={loading}
lastRefresh={lastRefresh}
lastDuration={lastDuration}
warningMessage={warningMessage}
slownessReason={slownessReason()}
warning={checkSlownessReason(warning)}
range={range}
type={recordType}
isShowQuerySummary={isShowQuerySummary}
Expand Down Expand Up @@ -1749,13 +1768,13 @@ export const NetflowTraffic: React.FC<NetflowTrafficProps> = ({ forcedFilters, i
setOverviewFocus,
setTopologyOptions,
size,
slownessReason,
checkSlownessReason,
stats,
t,
topologyMetricFunction,
topologyMetricType,
topologyOptions,
warningMessage
warning
]);

//update data on filters changes
Expand Down
Loading

0 comments on commit 3d6a89a

Please sign in to comment.