Skip to content

Commit

Permalink
pkg/gather/clusterconfig/version: Walk openshift-cluster-version
Browse files Browse the repository at this point in the history
Using similar logic to what gatherClusterOperators uses, but for the
ClusterVersion operator's namespace.  Usually we'd only gather the pod
YAML if the pod was failing, but in order to help audit tolerations
for [1], at the moment I'm gathering it every time.

[1]: https://bugzilla.redhat.com/show_bug.cgi?id=1941901
  • Loading branch information
wking committed Mar 24, 2021
1 parent bc39708 commit 8699181
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
1 change: 1 addition & 0 deletions pkg/gather/clusterconfig/operators.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ type CompactedEventList struct {
// Location of operators in archive: config/clusteroperator/
// See: docs/insights-archive-sample/config/clusteroperator
// Location of pods in archive: config/pod/
// Location of events in archive: events/
// Id in config: operators
func GatherClusterOperators(g *Gatherer, c chan<- gatherResult) {
defer close(c)
Expand Down
55 changes: 53 additions & 2 deletions pkg/gather/clusterconfig/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@ package clusterconfig

import (
"context"
"fmt"
"time"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/klog/v2"

configv1 "github.com/openshift/api/config/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"

"github.com/openshift/insights-operator/pkg/record"
"github.com/openshift/insights-operator/pkg/utils/check"
"github.com/openshift/insights-operator/pkg/utils/marshal"
"github.com/openshift/insights-operator/pkg/utils/anonymize"
)
Expand All @@ -22,6 +29,8 @@ import (
// Location in archive: config/version/
// See: docs/insights-archive-sample/config/version
// Location of cluster ID: config/id/
// Location of pods in archive: config/pod/
// Location of events in archive: events/
// Id in config: version
func GatherClusterVersion(g *Gatherer, c chan<- gatherResult) {
defer close(c)
Expand All @@ -30,11 +39,16 @@ func GatherClusterVersion(g *Gatherer, c chan<- gatherResult) {
c <- gatherResult{nil, []error{err}}
return
}
records, errors := getClusterVersion(g.ctx, gatherConfigClient)
gatherKubeClient, err := kubernetes.NewForConfig(g.gatherProtoKubeConfig)
if err != nil {
c <- gatherResult{nil, []error{err}}
return
}
records, errors := getClusterVersion(g.ctx, gatherConfigClient, gatherKubeClient.CoreV1())
c <- gatherResult{records, errors}
}

func getClusterVersion(ctx context.Context, configClient configv1client.ConfigV1Interface) ([]record.Record, []error) {
func getClusterVersion(ctx context.Context, configClient configv1client.ConfigV1Interface, coreClient corev1client.CoreV1Interface) ([]record.Record, []error) {
config, err := configClient.ClusterVersions().Get(ctx, "version", metav1.GetOptions{})
if errors.IsNotFound(err) {
return nil, nil
Expand All @@ -51,6 +65,43 @@ func getClusterVersion(ctx context.Context, configClient configv1client.ConfigV1
records = append(records, record.Record{Name: "config/id", Item: marshal.Raw{Str: string(config.Spec.ClusterID)}})
}

// TODO: In the future, make this conditional on sad ClusterVersion conditions or ClusterVersionOperatorDown alerting, etc.
namespace := "openshift-cluster-version"
now := time.Now()
unhealthyPods := []*corev1.Pod{}

pods, err := coreClient.Pods(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
klog.V(2).Infof("Unable to find pods in namespace %s for cluster-version operator", namespace)
return records, nil
}
for i := range pods.Items {
pod := &pods.Items[i]

// TODO: shift after IsHealthyPod
records = append(records, record.Record{Name: fmt.Sprintf("config/pod/%s/%s", pod.Namespace, pod.Name), Item: record.JSONMarshaller{Object: pod}})

if check.IsHealthyPod(pod, now) {
continue
}

unhealthyPods = append(unhealthyPods, pod)

// TODO: gather container logs
}

// Exit early if no unhealthy pods found
if len(unhealthyPods) == 0 {
return records, nil
}
klog.V(2).Infof("Found %d unhealthy pods in %s", len(unhealthyPods), namespace)

namespaceRecords, errs := gatherNamespaceEvents(ctx, coreClient, namespace)
if len(errs) > 0 {
klog.V(2).Infof("Unable to collect events for namespace %q: %#v", namespace, errs)
}
records = append(records, namespaceRecords...)

return records, nil
}

Expand Down

0 comments on commit 8699181

Please sign in to comment.