Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-34784: Collect aggregated Prometheus Alertmanager instances #941

28 changes: 28 additions & 0 deletions docs/gathered-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,34 @@ None
None


## AggregatedInstances
ncaak marked this conversation as resolved.
Show resolved Hide resolved

Collects instances of `Prometheus` and `AlertManager` deployments
that are outside of the `openshift-monitoring` namespace
ncaak marked this conversation as resolved.
Show resolved Hide resolved

### API Reference
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html

### Sample data
- [docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json](./insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json)

### Location in archive
- `aggregated/custom_prometheuses_alertmanagers.json`

### Config ID
`clusterconfig/aggregated_instances`

### Released version
- 4.16

### Backported versions
TBD

### Changes
None


## CRD

Collects the specified Custom Resource Definitions.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"prometheuses": [
"example-prometheus-name"
],
"alertmanagers": [
"example-alertmanager-name"
]
}
8 changes: 8 additions & 0 deletions manifests/03-clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@ metadata:
include.release.openshift.io/single-node-developer: "true"
capability.openshift.io/name: Insights
rules:
- apiGroups:
- "monitoring.coreos.com"
resources:
- alertmanagers
- prometheuses
verbs:
- get
- list
- apiGroups:
- ""
resources:
Expand Down
1 change: 1 addition & 0 deletions pkg/gatherers/clusterconfig/clusterconfig_gatherer.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type gathererFuncPtr = func(*Gatherer, context.Context) ([]record.Record, []erro

var gatheringFunctions = map[string]gathererFuncPtr{
"active_alerts": (*Gatherer).GatherActiveAlerts,
"aggregated_instances": (*Gatherer).GatherAggregatedInstances,
"authentication": (*Gatherer).GatherClusterAuthentication,
"certificate_signing_requests": (*Gatherer).GatherCertificateSigningRequests,
"ceph_cluster": (*Gatherer).GatherCephCluster,
Expand Down
5 changes: 5 additions & 0 deletions pkg/gatherers/clusterconfig/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)

const (
// introduced by GatherAggregatedInstances gatherer
MonitoringNamespace string = "openshift-monitoring"
ncaak marked this conversation as resolved.
Show resolved Hide resolved
)

var (
registryScheme = runtime.NewScheme()
// logMaxLines sets maximum number of lines of the log file
Expand Down
112 changes: 112 additions & 0 deletions pkg/gatherers/clusterconfig/gather_aggregated_instances.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package clusterconfig

import (
"context"

"github.com/openshift/insights-operator/pkg/record"

promcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// GatherAggregatedInstances Collects instances of `Prometheus` and `AlertManager` deployments
// that are outside of the `openshift-monitoring` namespace
//
// ### API Reference
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html
//
// ### Sample data
// - docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json
//
// ### Location in archive
// - `aggregated/custom_prometheuses_alertmanagers.json`
//
// ### Config ID
// `clusterconfig/aggregated_instances`
//
// ### Released version
// - 4.16
//
// ### Backported versions
// TBD
//
// ### Changes
// None
func (g *Gatherer) GatherAggregatedInstances(ctx context.Context) ([]record.Record, []error) {
client, err := promcli.NewForConfig(g.gatherKubeConfig)
if err != nil {
return nil, []error{err}
}

return aggregatedInstances{}.gather(ctx, client)
}

type aggregatedInstances struct {
Prometheuses []string `json:"prometheuses"`
Alertmanagers []string `json:"alertmanagers"`
}

// gather returns records for all Prometheus and Alertmanager instances that exist outside the openshift-monitoring namespace.
// It could instead return a collection of errors found when trying to get those instances.
func (ai aggregatedInstances) gather(ctx context.Context, client promcli.Interface) ([]record.Record, []error) {
const Filename = "aggregated/custom_prometheuses_alertmanagers"

errs := []error{}
prometheusList, err := ai.getOutcastedPrometheuses(ctx, client)
if err != nil {
errs = append(errs, err)
}

alertManagersList, err := ai.getOutcastedAlertManagers(ctx, client)
if err != nil {
errs = append(errs, err)
}

if len(errs) > 0 {
return nil, errs
}

ai.Prometheuses = prometheusList
ai.Alertmanagers = alertManagersList

return []record.Record{{Name: Filename, Item: record.JSONMarshaller{Object: ai}}}, nil
}

// getOutcastedAlertManagers returns a collection of AlertManagers names, if any, from other than the openshift-monitoring namespace
// or an error if it couldn't retrieve them
func (ai aggregatedInstances) getOutcastedAlertManagers(ctx context.Context, client promcli.Interface) ([]string, error) {
alertManagersList, err := client.MonitoringV1().Alertmanagers(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

amNames := []string{}
for i := range alertManagersList.Items {
alertMgr := alertManagersList.Items[i]
if alertMgr.GetNamespace() != MonitoringNamespace {
amNames = append(amNames, alertMgr.GetName())
}
}

return amNames, nil
}

// getOutcastedPrometheuses returns a collection of Prometheus names, if any, from other than the openshift-monitoring namespace
// or an error if it couldn't retrieve them
func (ai aggregatedInstances) getOutcastedPrometheuses(ctx context.Context, client promcli.Interface) ([]string, error) {
prometheusList, err := client.MonitoringV1().Prometheuses(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

promNames := []string{}
for i := range prometheusList.Items {
prom := prometheusList.Items[i]
if prom.GetNamespace() != MonitoringNamespace {
promNames = append(promNames, prom.GetName())
}
}

return promNames, nil
}
183 changes: 183 additions & 0 deletions pkg/gatherers/clusterconfig/gather_aggregated_instances_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
package clusterconfig

import (
"context"
"testing"

"github.com/openshift/insights-operator/pkg/record"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake"

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// Test_GatherAggregatedInstances provides unit tests for the correct output file structure
func Test_GatherAggregatedInstances(t *testing.T) {
testCases := []struct {
name string
proms []*v1.Prometheus
alertMgrs []*v1.Alertmanager
expected []record.Record
}{
{
name: "The function returns the name of the Prometheus instance in the correct field",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: aggregatedInstances{
Prometheuses: []string{"test"}, Alertmanagers: []string{},
}}},
},
}, {
name: "The function returns the name of the AlertManager instance in the correct field",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: aggregatedInstances{
Alertmanagers: []string{"test"}, Prometheuses: []string{},
}}},
},
}, {
name: "The function returns the names of the mixed instances in the correct field",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test-alertmanager", Namespace: "test-namespace"}},
},
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test-prometheus", Namespace: "test-namespace"}},
},
expected: []record.Record{{
Name: "aggregated/custom_prometheuses_alertmanagers",
Item: record.JSONMarshaller{Object: aggregatedInstances{
Alertmanagers: []string{"test-alertmanager"}, Prometheuses: []string{"test-prometheus"},
}}},
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
items := []runtime.Object{}
if len(tc.proms) > 0 || len(tc.alertMgrs) > 0 {
for i := range tc.proms {
items = append(items, tc.proms[i])
}
for i := range tc.alertMgrs {
items = append(items, tc.alertMgrs[i])
}
}
clientset := fake.NewSimpleClientset(items...)

// When
test, errs := aggregatedInstances{}.gather(context.Background(), clientset)

// Assert
assert.Empty(t, errs)
assert.EqualValues(t, tc.expected, test)
})
}
}

// Test_getOutcastedAlertManagers provides unit tests for the namespace filtering logic of AlertManager instances
func Test_getOutcastedAlertManagers(t *testing.T) {
testCases := []struct {
name string
alertMgrs []*v1.Alertmanager
expected []string
}{
{
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []string{"test"},
}, {
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
},
expected: []string{},
}, {
name: "The function returns only items outside of the namespace on a mixed response from client",
alertMgrs: []*v1.Alertmanager{
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
},
expected: []string{"test1", "test2"},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
items := make([]runtime.Object, len(tc.alertMgrs))
for i := range tc.alertMgrs {
items[i] = tc.alertMgrs[i]
}
clientset := fake.NewSimpleClientset(items...)

// When
test, err := aggregatedInstances{}.getOutcastedAlertManagers(context.Background(), clientset)

// Assert
assert.NoError(t, err)
assert.EqualValues(t, tc.expected, test)
})
}
}

// Test_getOutcastedPrometheuses provides unit tests for the namespace filtering logic of Prometheus instances
func Test_getOutcastedPrometheuses(t *testing.T) {
testCases := []struct {
name string
proms []*v1.Prometheus
expected []string
}{
{
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
},
expected: []string{"test"},
}, {
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
},
expected: []string{},
}, {
name: "The function returns only items outside of the namespace on a mixed response from client",
proms: []*v1.Prometheus{
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
},
expected: []string{"test1", "test2"},
},
ncaak marked this conversation as resolved.
Show resolved Hide resolved
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Given
items := make([]runtime.Object, len(tc.proms))
for i := range tc.proms {
items[i] = tc.proms[i]
}
clientset := fake.NewSimpleClientset(items...)
ncaak marked this conversation as resolved.
Show resolved Hide resolved

// When
test, err := aggregatedInstances{}.getOutcastedPrometheuses(context.Background(), clientset)

// Assert
assert.NoError(t, err)
assert.EqualValues(t, tc.expected, test)
})
}
}