-
Notifications
You must be signed in to change notification settings - Fork 153
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ensure PodDisruptionBudgetAtLimit alert is silenced (#3020)
* Add observability controller Signed-off-by: machadovilaca <machadovilaca@gmail.com> * Ensure PodDisruptionBudgetAtLimit alert is silenced Signed-off-by: machadovilaca <machadovilaca@gmail.com> --------- Signed-off-by: machadovilaca <machadovilaca@gmail.com>
- Loading branch information
1 parent
b45fc6a
commit 2628100
Showing
11 changed files
with
491 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package observability | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/rest" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
"sigs.k8s.io/controller-runtime/pkg/event" | ||
"sigs.k8s.io/controller-runtime/pkg/handler" | ||
logf "sigs.k8s.io/controller-runtime/pkg/log" | ||
"sigs.k8s.io/controller-runtime/pkg/source" | ||
|
||
"github.com/kubevirt/hyperconverged-cluster-operator/pkg/alertmanager" | ||
) | ||
|
||
var ( | ||
log = logf.Log.WithName("controller_observability") | ||
periodicity = 1 * time.Hour | ||
) | ||
|
||
type Reconciler struct { | ||
config *rest.Config | ||
events chan event.GenericEvent | ||
|
||
amApi *alertmanager.Api | ||
} | ||
|
||
func (r *Reconciler) Reconcile(_ context.Context, _ ctrl.Request) (ctrl.Result, error) { | ||
log.Info("Reconciling Observability") | ||
|
||
if err := r.ensurePodDisruptionBudgetAtLimitIsSilenced(); err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
|
||
return ctrl.Result{}, nil | ||
} | ||
|
||
func NewReconciler(config *rest.Config) *Reconciler { | ||
return &Reconciler{ | ||
config: config, | ||
events: make(chan event.GenericEvent, 1), | ||
} | ||
} | ||
|
||
func SetupWithManager(mgr ctrl.Manager) error { | ||
log.Info("Setting up controller") | ||
|
||
r := NewReconciler(mgr.GetConfig()) | ||
r.startEventLoop() | ||
|
||
return ctrl.NewControllerManagedBy(mgr). | ||
Named("observability"). | ||
WatchesRawSource(source.Channel( | ||
r.events, | ||
&handler.EnqueueRequestForObject{}, | ||
)). | ||
Complete(r) | ||
} | ||
|
||
func (r *Reconciler) startEventLoop() { | ||
ticker := time.NewTicker(periodicity) | ||
|
||
go func() { | ||
r.events <- event.GenericEvent{ | ||
Object: &metav1.PartialObjectMetadata{}, | ||
} | ||
|
||
for range ticker.C { | ||
r.events <- event.GenericEvent{ | ||
Object: &metav1.PartialObjectMetadata{}, | ||
} | ||
} | ||
}() | ||
} |
107 changes: 107 additions & 0 deletions
107
controllers/observability/pod_disruption_budget_at_limit.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package observability | ||
|
||
import ( | ||
"crypto/tls" | ||
"crypto/x509" | ||
"fmt" | ||
"net/http" | ||
"os" | ||
"time" | ||
|
||
"github.com/kubevirt/hyperconverged-cluster-operator/pkg/alertmanager" | ||
) | ||
|
||
const ( | ||
alertmanagerSvcHost = "https://alertmanager-main.openshift-monitoring.svc.cluster.local:9094" | ||
tlsCertPath = "/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt" | ||
) | ||
|
||
func (r *Reconciler) ensurePodDisruptionBudgetAtLimitIsSilenced() error { | ||
if r.amApi == nil { | ||
var err error | ||
r.amApi, err = r.NewAlertmanagerApi() | ||
if err != nil { | ||
return fmt.Errorf("failed to initialize alertmanager api: %w", err) | ||
} | ||
} | ||
|
||
amSilences, err := r.amApi.ListSilences() | ||
if err != nil { | ||
return fmt.Errorf("failed to list alertmanager silences: %w", err) | ||
} | ||
|
||
if FindPodDisruptionBudgetAtLimitSilence(amSilences) != nil { | ||
log.Info("KubeVirt PodDisruptionBudgetAtLimit alerts are already silenced") | ||
return nil | ||
} | ||
|
||
silence := alertmanager.Silence{ | ||
Comment: "Silence KubeVirt PodDisruptionBudgetAtLimit alerts", | ||
CreatedBy: "hyperconverged-cluster-operator", | ||
EndsAt: "3000-01-01T00:00:00Z", | ||
Matchers: []alertmanager.Matcher{ | ||
{ | ||
IsEqual: true, | ||
Name: "alertname", | ||
Value: "PodDisruptionBudgetAtLimit", | ||
}, | ||
{ | ||
IsRegex: true, | ||
Name: "poddisruptionbudget", | ||
Value: "kubevirt-disruption-budget-.*", | ||
}, | ||
}, | ||
StartsAt: time.Now().Format(time.RFC3339), | ||
} | ||
|
||
if err := r.amApi.CreateSilence(silence); err != nil { | ||
return fmt.Errorf("failed to create alertmanager silence: %w", err) | ||
} | ||
log.Info("Silenced PodDisruptionBudgetAtLimit alerts") | ||
|
||
return nil | ||
} | ||
|
||
func (r *Reconciler) NewAlertmanagerApi() (*alertmanager.Api, error) { | ||
caCert, err := os.ReadFile(tlsCertPath) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to read ca cert: %w", err) | ||
} | ||
|
||
caCertPool := x509.NewCertPool() | ||
caCertPool.AppendCertsFromPEM(caCert) | ||
|
||
httpClient := http.Client{} | ||
httpClient.Transport = &http.Transport{ | ||
TLSClientConfig: &tls.Config{RootCAs: caCertPool}, | ||
} | ||
|
||
return alertmanager.NewAPI(httpClient, alertmanagerSvcHost, r.config.BearerToken), nil | ||
} | ||
|
||
func FindPodDisruptionBudgetAtLimitSilence(amSilences []alertmanager.Silence) *alertmanager.Silence { | ||
for _, silence := range amSilences { | ||
if silence.Status.State != "active" { | ||
continue | ||
} | ||
|
||
var isPDBSilence bool | ||
var isKubeVirtPDBSilence bool | ||
|
||
for _, matcher := range silence.Matchers { | ||
if matcher.Name == "alertname" && matcher.Value == "PodDisruptionBudgetAtLimit" && matcher.IsEqual { | ||
isPDBSilence = true | ||
} | ||
|
||
if matcher.Name == "poddisruptionbudget" && matcher.IsRegex && matcher.Value == "kubevirt-disruption-budget-.*" { | ||
isKubeVirtPDBSilence = true | ||
} | ||
} | ||
|
||
if isPDBSilence && isKubeVirtPDBSilence { | ||
return &silence | ||
} | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package alertmanager_test | ||
|
||
import ( | ||
"testing" | ||
|
||
. "github.com/onsi/ginkgo/v2" | ||
. "github.com/onsi/gomega" | ||
) | ||
|
||
func TestAlertmanager(t *testing.T) { | ||
RegisterFailHandler(Fail) | ||
RunSpecs(t, "Alertmanager Suite") | ||
} |
Oops, something went wrong.