Skip to content

Commit

Permalink
OCPBUGS-34784: Collect aggregated Prometheus Alertmanager instances (#…
Browse files Browse the repository at this point in the history
…941)

* Add draft gatherer for Prometheuses and Alertmanagers

* Add rule for GatherAggregatedInstances to access Prometheus and Alertmanagers

* Add sample file for GatherAggregatedInstances gatherer

* Refactor and Docs for GatherAggregatedInstances gatherer

* Add entry for GatherAggregatedInstances gatherer

* Fix file folder

* Fix linting issues

* Add unit tests for aggregatedInstances gatherer

* Add new unit tests for not found scenarios

* Refactor constant visibility

* Fix errcheck linting

* Refactor name of the gatherer
  • Loading branch information
ncaak authored Jun 3, 2024
1 parent e32ee76 commit 90d750b
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 0 deletions.
29 changes: 29 additions & 0 deletions docs/gathered-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,35 @@ None
None


## AggregatedMonitoringCRNames

Collects instances outside of the `openshift-monitoring` of the following custom resources:
- Kind: `Prometheus` Group: `monitoring.coreos.com`
- Kind: `AlertManager` Group: `monitoring.coreos.com`

### API Reference
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html

### Sample data
- [docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json](./insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json)

### Location in archive
- `aggregated/custom_prometheuses_alertmanagers.json`

### Config ID
`clusterconfig/aggregated_monitoring_cr_names`

### Released version
- 4.16

### Backported versions
TBD

### Changes
None


## CRD

Collects the specified Custom Resource Definitions.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"prometheuses": [
"example-prometheus-name"
],
"alertmanagers": [
"example-alertmanager-name"
]
}
8 changes: 8 additions & 0 deletions manifests/03-clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@ metadata:
include.release.openshift.io/single-node-developer: "true"
capability.openshift.io/name: Insights
rules:
- apiGroups:
- "monitoring.coreos.com"
resources:
- alertmanagers
- prometheuses
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
1 change: 1 addition & 0 deletions pkg/gatherers/clusterconfig/clusterconfig_gatherer.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type gathererFuncPtr = func(*Gatherer, context.Context) ([]record.Record, []erro

var gatheringFunctions = map[string]gathererFuncPtr{
"active_alerts": (*Gatherer).GatherActiveAlerts,
"aggregated_monitoring_cr_names": (*Gatherer).GatherAggregatedMonitoringCRNames,
"authentication": (*Gatherer).GatherClusterAuthentication,
"certificate_signing_requests": (*Gatherer).GatherCertificateSigningRequests,
"ceph_cluster": (*Gatherer).GatherCephCluster,
Expand Down
5 changes: 5 additions & 0 deletions pkg/gatherers/clusterconfig/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)

const (
// introduced by GatherAggregatedInstances gatherer
monitoringNamespace string = "openshift-monitoring"
)

var (
registryScheme = runtime.NewScheme()
// logMaxLines sets maximum number of lines of the log file
Expand Down
118 changes: 118 additions & 0 deletions pkg/gatherers/clusterconfig/gather_aggregated_instances.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package clusterconfig

import (
"context"

"github.com/openshift/insights-operator/pkg/record"

promcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// GatherAggregatedMonitoringCRNames Collects instances outside of the `openshift-monitoring` of the following custom resources:
// - Kind: `Prometheus` Group: `monitoring.coreos.com`
// - Kind: `AlertManager` Group: `monitoring.coreos.com`
//
// ### API Reference
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html
//
// ### Sample data
// - docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json
//
// ### Location in archive
// - `aggregated/custom_prometheuses_alertmanagers.json`
//
// ### Config ID
// `clusterconfig/aggregated_monitoring_cr_names`
//
// ### Released version
// - 4.16
//
// ### Backported versions
// TBD
//
// ### Changes
// None
func (g *Gatherer) GatherAggregatedMonitoringCRNames(ctx context.Context) ([]record.Record, []error) {
client, err := promcli.NewForConfig(g.gatherKubeConfig)
if err != nil {
return nil, []error{err}
}

return monitoringCRNames{}.gather(ctx, client)
}

type monitoringCRNames struct {
Prometheuses []string `json:"prometheuses"`
Alertmanagers []string `json:"alertmanagers"`
}

// gather returns records for all Prometheus and Alertmanager instances that exist outside the openshift-monitoring namespace.
// It could instead return a collection of errors found when trying to get those instances.
func (mn monitoringCRNames) gather(ctx context.Context, client promcli.Interface) ([]record.Record, []error) {
const Filename = "aggregated/custom_prometheuses_alertmanagers"

errs := []error{}
prometheusList, err := mn.getOutcastedPrometheuses(ctx, client)
if err != nil {
errs = append(errs, err)
}

alertManagersList, err := mn.getOutcastedAlertManagers(ctx, client)
if err != nil {
errs = append(errs, err)
}

if len(errs) > 0 {
return nil, errs
}

// De not return an empty file if no Custom Resources were found
if len(prometheusList) == 0 && len(alertManagersList) == 0 {
return []record.Record{}, nil
}

mn.Prometheuses = prometheusList
mn.Alertmanagers = alertManagersList

return []record.Record{{Name: Filename, Item: record.JSONMarshaller{Object: mn}}}, nil
}

// getOutcastedAlertManagers returns a collection of AlertManagers names, if any, from other than the openshift-monitoring namespace
// or an error if it couldn't retrieve them
func (mn monitoringCRNames) getOutcastedAlertManagers(ctx context.Context, client promcli.Interface) ([]string, error) {
alertManagersList, err := client.MonitoringV1().Alertmanagers(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

amNames := []string{}
for i := range alertManagersList.Items {
alertMgr := alertManagersList.Items[i]
if alertMgr.GetNamespace() != monitoringNamespace {
amNames = append(amNames, alertMgr.GetName())
}
}

return amNames, nil
}

// getOutcastedPrometheuses returns a collection of Prometheus names, if any, from other than the openshift-monitoring namespace
// or an error if it couldn't retrieve them
func (mn monitoringCRNames) getOutcastedPrometheuses(ctx context.Context, client promcli.Interface) ([]string, error) {
prometheusList, err := client.MonitoringV1().Prometheuses(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

promNames := []string{}
for i := range prometheusList.Items {
prom := prometheusList.Items[i]
if prom.GetNamespace() != monitoringNamespace {
promNames = append(promNames, prom.GetName())
}
}

return promNames, nil
}
194 changes: 194 additions & 0 deletions pkg/gatherers/clusterconfig/gather_aggregated_instances_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package clusterconfig

import (
"context"
"testing"

"github.com/openshift/insights-operator/pkg/record"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake"

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// Test_GatherAggregatedInstances provides unit tests for the correct output file structure
func Test_GatherAggregatedInstances(t *testing.T) {
testCases := []struct {
name string
proms []*v1.Prometheus
alertMgrs []*v1.Alertmanager
expected []record.Record
}{
{
name: "The function returns the name of the Prometheus instance in the correct field",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: monitoringCRNames{
Prometheuses: []string{"test"}, Alertmanagers: []string{},
}}},
},
}, {
name: "The function returns the name of the AlertManager instance in the correct field",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: monitoringCRNames{
Alertmanagers: []string{"test"}, Prometheuses: []string{},
}}},
},
}, {
name: "The function returns the names of the mixed instances in the correct field",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test-alertmanager", Namespace: "test-namespace"}},
},
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test-prometheus", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: monitoringCRNames{
Alertmanagers: []string{"test-alertmanager"}, Prometheuses: []string{"test-prometheus"},
}}},
},
}, {
name: "The function returns no records if no instances are found",
alertMgrs: []*v1.Alertmanager{},
proms: []*v1.Prometheus{},
expected: []record.Record{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
clientset := fake.NewSimpleClientset()
for _, am := range tc.alertMgrs {
assert.NoError(t,
clientset.Tracker().Add(am))
}
for _, prom := range tc.proms {
assert.NoError(t,
clientset.Tracker().Add(prom))
}

// When
test, errs := monitoringCRNames{}.gather(context.Background(), clientset)

// Assert
assert.Empty(t, errs)
assert.EqualValues(t, tc.expected, test)
})
}
}

// Test_getOutcastedAlertManagers provides unit tests for the namespace filtering logic of AlertManager instances
func Test_getOutcastedAlertManagers(t *testing.T) {
testCases := []struct {
name string
alertMgrs []*v1.Alertmanager
expected []string
}{
{
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []string{"test"},
}, {
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
},
expected: []string{},
}, {
name: "The function returns only items outside of the namespace on a mixed response from client",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
},
expected: []string{"test1", "test2"},
}, {
name: "The function returns an empty slice if no instances are found",
alertMgrs: []*v1.Alertmanager{},
expected: []string{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
clientset := fake.NewSimpleClientset()
for _, am := range tc.alertMgrs {
assert.NoError(t,
clientset.Tracker().Add(am))
}

// When
test, err := monitoringCRNames{}.getOutcastedAlertManagers(context.Background(), clientset)

// Assert
assert.NoError(t, err)
assert.EqualValues(t, tc.expected, test)
})
}
}

// Test_getOutcastedPrometheuses provides unit tests for the namespace filtering logic of Prometheus instances
func Test_getOutcastedPrometheuses(t *testing.T) {
testCases := []struct {
name string
proms []*v1.Prometheus
expected []string
}{
{
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []string{"test"},
}, {
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
},
expected: []string{},
}, {
name: "The function returns only items outside of the namespace on a mixed response from client",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
},
expected: []string{"test1", "test2"},
}, {
name: "The function returns an empty slice if no instances are found",
proms: []*v1.Prometheus{},
expected: []string{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
clientset := fake.NewSimpleClientset()
for _, prom := range tc.proms {
assert.NoError(t,
clientset.Tracker().Add(prom))
}

// When
test, err := monitoringCRNames{}.getOutcastedPrometheuses(context.Background(), clientset)

// Assert
assert.NoError(t, err)
assert.EqualValues(t, tc.expected, test)
})
}
}

0 comments on commit 90d750b

Please sign in to comment.