Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-34784: Collect aggregated Prometheus Alertmanager instances #941

29 changes: 29 additions & 0 deletions docs/gathered-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,35 @@ None
None


## AggregatedMonitoringCRNames

Collects instances outside of the `openshift-monitoring` of the following custom resources:
- Kind: `Prometheus` Group: `monitoring.coreos.com`
- Kind: `AlertManager` Group: `monitoring.coreos.com`

### API Reference
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html

### Sample data
- [docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json](./insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json)

### Location in archive
- `aggregated/custom_prometheuses_alertmanagers.json`

### Config ID
`clusterconfig/aggregated_monitoring_cr_names`

### Released version
- 4.16

### Backported versions
TBD

### Changes
None


## CRD

Collects the specified Custom Resource Definitions.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"prometheuses": [
"example-prometheus-name"
],
"alertmanagers": [
"example-alertmanager-name"
]
}
8 changes: 8 additions & 0 deletions manifests/03-clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@ metadata:
include.release.openshift.io/single-node-developer: "true"
capability.openshift.io/name: Insights
rules:
- apiGroups:
- "monitoring.coreos.com"
resources:
- alertmanagers
- prometheuses
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
1 change: 1 addition & 0 deletions pkg/gatherers/clusterconfig/clusterconfig_gatherer.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type gathererFuncPtr = func(*Gatherer, context.Context) ([]record.Record, []erro

var gatheringFunctions = map[string]gathererFuncPtr{
"active_alerts": (*Gatherer).GatherActiveAlerts,
"aggregated_monitoring_cr_names": (*Gatherer).GatherAggregatedMonitoringCRNames,
"authentication": (*Gatherer).GatherClusterAuthentication,
"certificate_signing_requests": (*Gatherer).GatherCertificateSigningRequests,
"ceph_cluster": (*Gatherer).GatherCephCluster,
Expand Down
5 changes: 5 additions & 0 deletions pkg/gatherers/clusterconfig/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)

const (
// introduced by GatherAggregatedInstances gatherer
monitoringNamespace string = "openshift-monitoring"
)

var (
registryScheme = runtime.NewScheme()
// logMaxLines sets maximum number of lines of the log file
Expand Down
118 changes: 118 additions & 0 deletions pkg/gatherers/clusterconfig/gather_aggregated_instances.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package clusterconfig

import (
"context"

"github.com/openshift/insights-operator/pkg/record"

promcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// GatherAggregatedMonitoringCRNames Collects instances outside of the `openshift-monitoring` of the following custom resources:
// - Kind: `Prometheus` Group: `monitoring.coreos.com`
// - Kind: `AlertManager` Group: `monitoring.coreos.com`
//
// ### API Reference
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html
//
// ### Sample data
// - docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json
//
// ### Location in archive
// - `aggregated/custom_prometheuses_alertmanagers.json`
//
// ### Config ID
// `clusterconfig/aggregated_monitoring_cr_names`
//
// ### Released version
// - 4.16
//
// ### Backported versions
// TBD
//
// ### Changes
// None
func (g *Gatherer) GatherAggregatedMonitoringCRNames(ctx context.Context) ([]record.Record, []error) {
client, err := promcli.NewForConfig(g.gatherKubeConfig)
if err != nil {
return nil, []error{err}
}

return monitoringCRNames{}.gather(ctx, client)
}

type monitoringCRNames struct {
Prometheuses []string `json:"prometheuses"`
Alertmanagers []string `json:"alertmanagers"`
}

// gather returns records for all Prometheus and Alertmanager instances that exist outside the openshift-monitoring namespace.
// It could instead return a collection of errors found when trying to get those instances.
func (mn monitoringCRNames) gather(ctx context.Context, client promcli.Interface) ([]record.Record, []error) {
const Filename = "aggregated/custom_prometheuses_alertmanagers"

errs := []error{}
prometheusList, err := mn.getOutcastedPrometheuses(ctx, client)
if err != nil {
errs = append(errs, err)
}

alertManagersList, err := mn.getOutcastedAlertManagers(ctx, client)
if err != nil {
errs = append(errs, err)
}

if len(errs) > 0 {
return nil, errs
}

// De not return an empty file if no Custom Resources were found
if len(prometheusList) == 0 && len(alertManagersList) == 0 {
return []record.Record{}, nil
}

mn.Prometheuses = prometheusList
mn.Alertmanagers = alertManagersList

return []record.Record{{Name: Filename, Item: record.JSONMarshaller{Object: mn}}}, nil
}

// getOutcastedAlertManagers returns a collection of AlertManagers names, if any, from other than the openshift-monitoring namespace
// or an error if it couldn't retrieve them
func (mn monitoringCRNames) getOutcastedAlertManagers(ctx context.Context, client promcli.Interface) ([]string, error) {
alertManagersList, err := client.MonitoringV1().Alertmanagers(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

amNames := []string{}
for i := range alertManagersList.Items {
alertMgr := alertManagersList.Items[i]
if alertMgr.GetNamespace() != monitoringNamespace {
amNames = append(amNames, alertMgr.GetName())
}
}

return amNames, nil
}

// getOutcastedPrometheuses returns a collection of Prometheus names, if any, from other than the openshift-monitoring namespace
// or an error if it couldn't retrieve them
func (mn monitoringCRNames) getOutcastedPrometheuses(ctx context.Context, client promcli.Interface) ([]string, error) {
prometheusList, err := client.MonitoringV1().Prometheuses(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

promNames := []string{}
for i := range prometheusList.Items {
prom := prometheusList.Items[i]
if prom.GetNamespace() != monitoringNamespace {
promNames = append(promNames, prom.GetName())
}
}

return promNames, nil
}
194 changes: 194 additions & 0 deletions pkg/gatherers/clusterconfig/gather_aggregated_instances_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package clusterconfig

import (
"context"
"testing"

"github.com/openshift/insights-operator/pkg/record"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake"

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// Test_GatherAggregatedInstances provides unit tests for the correct output file structure
func Test_GatherAggregatedInstances(t *testing.T) {
testCases := []struct {
name string
proms []*v1.Prometheus
alertMgrs []*v1.Alertmanager
expected []record.Record
}{
{
name: "The function returns the name of the Prometheus instance in the correct field",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: monitoringCRNames{
Prometheuses: []string{"test"}, Alertmanagers: []string{},
}}},
},
}, {
name: "The function returns the name of the AlertManager instance in the correct field",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: monitoringCRNames{
Alertmanagers: []string{"test"}, Prometheuses: []string{},
}}},
},
}, {
name: "The function returns the names of the mixed instances in the correct field",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test-alertmanager", Namespace: "test-namespace"}},
},
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test-prometheus", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: monitoringCRNames{
Alertmanagers: []string{"test-alertmanager"}, Prometheuses: []string{"test-prometheus"},
}}},
},
}, {
name: "The function returns no records if no instances are found",
alertMgrs: []*v1.Alertmanager{},
proms: []*v1.Prometheus{},
expected: []record.Record{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
clientset := fake.NewSimpleClientset()
for _, am := range tc.alertMgrs {
assert.NoError(t,
clientset.Tracker().Add(am))
}
for _, prom := range tc.proms {
assert.NoError(t,
clientset.Tracker().Add(prom))
}

// When
test, errs := monitoringCRNames{}.gather(context.Background(), clientset)

// Assert
assert.Empty(t, errs)
assert.EqualValues(t, tc.expected, test)
})
}
}

// Test_getOutcastedAlertManagers provides unit tests for the namespace filtering logic of AlertManager instances
func Test_getOutcastedAlertManagers(t *testing.T) {
testCases := []struct {
name string
alertMgrs []*v1.Alertmanager
expected []string
}{
{
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []string{"test"},
}, {
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
},
expected: []string{},
}, {
name: "The function returns only items outside of the namespace on a mixed response from client",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
},
expected: []string{"test1", "test2"},
}, {
name: "The function returns an empty slice if no instances are found",
alertMgrs: []*v1.Alertmanager{},
expected: []string{},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
clientset := fake.NewSimpleClientset()
for _, am := range tc.alertMgrs {
assert.NoError(t,
clientset.Tracker().Add(am))
}

// When
test, err := monitoringCRNames{}.getOutcastedAlertManagers(context.Background(), clientset)

// Assert
assert.NoError(t, err)
assert.EqualValues(t, tc.expected, test)
})
}
}

// Test_getOutcastedPrometheuses provides unit tests for the namespace filtering logic of Prometheus instances
func Test_getOutcastedPrometheuses(t *testing.T) {
testCases := []struct {
name string
proms []*v1.Prometheus
expected []string
}{
{
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []string{"test"},
}, {
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
},
expected: []string{},
}, {
name: "The function returns only items outside of the namespace on a mixed response from client",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
},
expected: []string{"test1", "test2"},
}, {
name: "The function returns an empty slice if no instances are found",
proms: []*v1.Prometheus{},
expected: []string{},
},
ncaak marked this conversation as resolved.
Show resolved Hide resolved
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
clientset := fake.NewSimpleClientset()
for _, prom := range tc.proms {
assert.NoError(t,
clientset.Tracker().Add(prom))
}

// When
test, err := monitoringCRNames{}.getOutcastedPrometheuses(context.Background(), clientset)

// Assert
assert.NoError(t, err)
assert.EqualValues(t, tc.expected, test)
})
}
}