diff --git a/docs/gathered-data.md b/docs/gathered-data.md index 8b1c8f904..585c0402f 100644 --- a/docs/gathered-data.md +++ b/docs/gathered-data.md @@ -69,6 +69,35 @@ None None +## AggregatedMonitoringCRNames + +Collects instances outside of the `openshift-monitoring` of the following custom resources: +- Kind: `Prometheus` Group: `monitoring.coreos.com` +- Kind: `AlertManager` Group: `monitoring.coreos.com` + +### API Reference +- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html +- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html + +### Sample data +- [docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json](./insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json) + +### Location in archive +- `aggregated/custom_prometheuses_alertmanagers.json` + +### Config ID +`clusterconfig/aggregated_monitoring_cr_names` + +### Released version +- 4.16 + +### Backported versions +TBD + +### Changes +None + + ## CRD Collects the specified Custom Resource Definitions. diff --git a/docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json b/docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json new file mode 100644 index 000000000..aa0cf89eb --- /dev/null +++ b/docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json @@ -0,0 +1,8 @@ +{ + "prometheuses": [ + "example-prometheus-name" + ], + "alertmanagers": [ + "example-alertmanager-name" + ] +} \ No newline at end of file diff --git a/manifests/03-clusterrole.yaml b/manifests/03-clusterrole.yaml index 8f891e186..be27fe8fc 100644 --- a/manifests/03-clusterrole.yaml +++ b/manifests/03-clusterrole.yaml @@ -168,6 +168,14 @@ metadata: include.release.openshift.io/single-node-developer: "true" capability.openshift.io/name: Insights rules: + - apiGroups: + - "monitoring.coreos.com" + resources: + - alertmanagers + - prometheuses + verbs: + - get + - list - apiGroups: - "" resources: diff --git a/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go b/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go index ec8325344..4fb932c74 100644 --- a/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go +++ b/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go @@ -27,6 +27,7 @@ type gathererFuncPtr = func(*Gatherer, context.Context) ([]record.Record, []erro var gatheringFunctions = map[string]gathererFuncPtr{ "active_alerts": (*Gatherer).GatherActiveAlerts, + "aggregated_monitoring_cr_names": (*Gatherer).GatherAggregatedMonitoringCRNames, "authentication": (*Gatherer).GatherClusterAuthentication, "certificate_signing_requests": (*Gatherer).GatherCertificateSigningRequests, "ceph_cluster": (*Gatherer).GatherCephCluster, diff --git a/pkg/gatherers/clusterconfig/const.go b/pkg/gatherers/clusterconfig/const.go index 2d6058a79..743488ff5 100644 --- a/pkg/gatherers/clusterconfig/const.go +++ b/pkg/gatherers/clusterconfig/const.go @@ -7,6 +7,11 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" ) +const ( + // introduced by GatherAggregatedInstances gatherer + monitoringNamespace string = "openshift-monitoring" +) + var ( registryScheme = runtime.NewScheme() // logMaxLines sets maximum number of lines of the log file diff --git a/pkg/gatherers/clusterconfig/gather_aggregated_instances.go b/pkg/gatherers/clusterconfig/gather_aggregated_instances.go new file mode 100644 index 000000000..14685bfb5 --- /dev/null +++ b/pkg/gatherers/clusterconfig/gather_aggregated_instances.go @@ -0,0 +1,118 @@ +package clusterconfig + +import ( + "context" + + "github.com/openshift/insights-operator/pkg/record" + + promcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// GatherAggregatedMonitoringCRNames Collects instances outside of the `openshift-monitoring` of the following custom resources: +// - Kind: `Prometheus` Group: `monitoring.coreos.com` +// - Kind: `AlertManager` Group: `monitoring.coreos.com` +// +// ### API Reference +// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html +// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html +// +// ### Sample data +// - docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json +// +// ### Location in archive +// - `aggregated/custom_prometheuses_alertmanagers.json` +// +// ### Config ID +// `clusterconfig/aggregated_monitoring_cr_names` +// +// ### Released version +// - 4.16 +// +// ### Backported versions +// TBD +// +// ### Changes +// None +func (g *Gatherer) GatherAggregatedMonitoringCRNames(ctx context.Context) ([]record.Record, []error) { + client, err := promcli.NewForConfig(g.gatherKubeConfig) + if err != nil { + return nil, []error{err} + } + + return monitoringCRNames{}.gather(ctx, client) +} + +type monitoringCRNames struct { + Prometheuses []string `json:"prometheuses"` + Alertmanagers []string `json:"alertmanagers"` +} + +// gather returns records for all Prometheus and Alertmanager instances that exist outside the openshift-monitoring namespace. +// It could instead return a collection of errors found when trying to get those instances. +func (mn monitoringCRNames) gather(ctx context.Context, client promcli.Interface) ([]record.Record, []error) { + const Filename = "aggregated/custom_prometheuses_alertmanagers" + + errs := []error{} + prometheusList, err := mn.getOutcastedPrometheuses(ctx, client) + if err != nil { + errs = append(errs, err) + } + + alertManagersList, err := mn.getOutcastedAlertManagers(ctx, client) + if err != nil { + errs = append(errs, err) + } + + if len(errs) > 0 { + return nil, errs + } + + // De not return an empty file if no Custom Resources were found + if len(prometheusList) == 0 && len(alertManagersList) == 0 { + return []record.Record{}, nil + } + + mn.Prometheuses = prometheusList + mn.Alertmanagers = alertManagersList + + return []record.Record{{Name: Filename, Item: record.JSONMarshaller{Object: mn}}}, nil +} + +// getOutcastedAlertManagers returns a collection of AlertManagers names, if any, from other than the openshift-monitoring namespace +// or an error if it couldn't retrieve them +func (mn monitoringCRNames) getOutcastedAlertManagers(ctx context.Context, client promcli.Interface) ([]string, error) { + alertManagersList, err := client.MonitoringV1().Alertmanagers(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + + amNames := []string{} + for i := range alertManagersList.Items { + alertMgr := alertManagersList.Items[i] + if alertMgr.GetNamespace() != monitoringNamespace { + amNames = append(amNames, alertMgr.GetName()) + } + } + + return amNames, nil +} + +// getOutcastedPrometheuses returns a collection of Prometheus names, if any, from other than the openshift-monitoring namespace +// or an error if it couldn't retrieve them +func (mn monitoringCRNames) getOutcastedPrometheuses(ctx context.Context, client promcli.Interface) ([]string, error) { + prometheusList, err := client.MonitoringV1().Prometheuses(metav1.NamespaceAll).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + + promNames := []string{} + for i := range prometheusList.Items { + prom := prometheusList.Items[i] + if prom.GetNamespace() != monitoringNamespace { + promNames = append(promNames, prom.GetName()) + } + } + + return promNames, nil +} diff --git a/pkg/gatherers/clusterconfig/gather_aggregated_instances_test.go b/pkg/gatherers/clusterconfig/gather_aggregated_instances_test.go new file mode 100644 index 000000000..36ce8eda0 --- /dev/null +++ b/pkg/gatherers/clusterconfig/gather_aggregated_instances_test.go @@ -0,0 +1,194 @@ +package clusterconfig + +import ( + "context" + "testing" + + "github.com/openshift/insights-operator/pkg/record" + v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Test_GatherAggregatedInstances provides unit tests for the correct output file structure +func Test_GatherAggregatedInstances(t *testing.T) { + testCases := []struct { + name string + proms []*v1.Prometheus + alertMgrs []*v1.Alertmanager + expected []record.Record + }{ + { + name: "The function returns the name of the Prometheus instance in the correct field", + proms: []*v1.Prometheus{ + {ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}}, + }, + expected: []record.Record{{ + Name: "aggregated/custom_prometheuses_alertmanagers", + Item: record.JSONMarshaller{Object: monitoringCRNames{ + Prometheuses: []string{"test"}, Alertmanagers: []string{}, + }}}, + }, + }, { + name: "The function returns the name of the AlertManager instance in the correct field", + alertMgrs: []*v1.Alertmanager{ + {ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}}, + }, + expected: []record.Record{{ + Name: "aggregated/custom_prometheuses_alertmanagers", + Item: record.JSONMarshaller{Object: monitoringCRNames{ + Alertmanagers: []string{"test"}, Prometheuses: []string{}, + }}}, + }, + }, { + name: "The function returns the names of the mixed instances in the correct field", + alertMgrs: []*v1.Alertmanager{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-alertmanager", Namespace: "test-namespace"}}, + }, + proms: []*v1.Prometheus{ + {ObjectMeta: metav1.ObjectMeta{Name: "test-prometheus", Namespace: "test-namespace"}}, + }, + expected: []record.Record{{ + Name: "aggregated/custom_prometheuses_alertmanagers", + Item: record.JSONMarshaller{Object: monitoringCRNames{ + Alertmanagers: []string{"test-alertmanager"}, Prometheuses: []string{"test-prometheus"}, + }}}, + }, + }, { + name: "The function returns no records if no instances are found", + alertMgrs: []*v1.Alertmanager{}, + proms: []*v1.Prometheus{}, + expected: []record.Record{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Given + clientset := fake.NewSimpleClientset() + for _, am := range tc.alertMgrs { + assert.NoError(t, + clientset.Tracker().Add(am)) + } + for _, prom := range tc.proms { + assert.NoError(t, + clientset.Tracker().Add(prom)) + } + + // When + test, errs := monitoringCRNames{}.gather(context.Background(), clientset) + + // Assert + assert.Empty(t, errs) + assert.EqualValues(t, tc.expected, test) + }) + } +} + +// Test_getOutcastedAlertManagers provides unit tests for the namespace filtering logic of AlertManager instances +func Test_getOutcastedAlertManagers(t *testing.T) { + testCases := []struct { + name string + alertMgrs []*v1.Alertmanager + expected []string + }{ + { + name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace", + alertMgrs: []*v1.Alertmanager{ + {ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}}, + }, + expected: []string{"test"}, + }, { + name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace", + alertMgrs: []*v1.Alertmanager{ + {ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}}, + }, + expected: []string{}, + }, { + name: "The function returns only items outside of the namespace on a mixed response from client", + alertMgrs: []*v1.Alertmanager{ + {ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}}, + }, + expected: []string{"test1", "test2"}, + }, { + name: "The function returns an empty slice if no instances are found", + alertMgrs: []*v1.Alertmanager{}, + expected: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Given + clientset := fake.NewSimpleClientset() + for _, am := range tc.alertMgrs { + assert.NoError(t, + clientset.Tracker().Add(am)) + } + + // When + test, err := monitoringCRNames{}.getOutcastedAlertManagers(context.Background(), clientset) + + // Assert + assert.NoError(t, err) + assert.EqualValues(t, tc.expected, test) + }) + } +} + +// Test_getOutcastedPrometheuses provides unit tests for the namespace filtering logic of Prometheus instances +func Test_getOutcastedPrometheuses(t *testing.T) { + testCases := []struct { + name string + proms []*v1.Prometheus + expected []string + }{ + { + name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace", + proms: []*v1.Prometheus{ + {ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}}, + }, + expected: []string{"test"}, + }, { + name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace", + proms: []*v1.Prometheus{ + {ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}}, + }, + expected: []string{}, + }, { + name: "The function returns only items outside of the namespace on a mixed response from client", + proms: []*v1.Prometheus{ + {ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}}, + }, + expected: []string{"test1", "test2"}, + }, { + name: "The function returns an empty slice if no instances are found", + proms: []*v1.Prometheus{}, + expected: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Given + clientset := fake.NewSimpleClientset() + for _, prom := range tc.proms { + assert.NoError(t, + clientset.Tracker().Add(prom)) + } + + // When + test, err := monitoringCRNames{}.getOutcastedPrometheuses(context.Background(), clientset) + + // Assert + assert.NoError(t, err) + assert.EqualValues(t, tc.expected, test) + }) + } +}