Skip to content

Commit

Permalink
pkg/monitoring/metrics: add alert for VMs using outdated machine type
Browse files Browse the repository at this point in the history
- Introduce new alert for VMs using an outdated machine type.

- Machine types are considered outdated if they are no longer compatible
  due to changes in the virt-launcher OS version. These VMs must be
  updated with supported machine types to ensure compatibility and avoid
  potential issues.

- Add a functional test to verify the alert is triggered when VMs with
  outdated machine types are detected.

Signed-off-by: Daniel Sionov <dsionov@redhat.com>
  • Loading branch information
dasionov committed Oct 6, 2024
1 parent e3b2b42 commit 82528f6
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 100 deletions.
49 changes: 25 additions & 24 deletions pkg/components/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,30 +62,31 @@ var deploymentType = metav1.TypeMeta{
}

type DeploymentOperatorParams struct {
Namespace string
Image string
WebhookImage string
CliDownloadsImage string
KVUIPluginImage string
KVUIProxyImage string
ImagePullPolicy string
ConversionContainer string
VmwareContainer string
VirtIOWinContainer string
Smbios string
Machinetype string
Amd64MachineType string
Arm64MachineType string
HcoKvIoVersion string
KubevirtVersion string
CdiVersion string
CnaoVersion string
SspVersion string
HppoVersion string
MtqVersion string
AaqVersion string
PrimaryUDNImage string
Env []corev1.EnvVar
Namespace string
Image string
WebhookImage string
CliDownloadsImage string
KVUIPluginImage string
KVUIProxyImage string
ImagePullPolicy string
ConversionContainer string
VmwareContainer string
VirtIOWinContainer string
Smbios string
Machinetype string
Amd64MachineType string
Arm64MachineType string
HcoKvIoVersion string
KubevirtVersion string
KubevirtVirtLancherOsVersion string
CdiVersion string
CnaoVersion string
SspVersion string
HppoVersion string
MtqVersion string
AaqVersion string
PrimaryUDNImage string
Env []corev1.EnvVar
}

func GetDeploymentOperator(params *DeploymentOperatorParams) appsv1.Deployment {
Expand Down
53 changes: 45 additions & 8 deletions pkg/monitoring/rules/alerts/operator_alerts.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
package alerts

import (
"os"
"strconv"

promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
logf "sigs.k8s.io/controller-runtime/pkg/log"
)

const (
outOfBandUpdateAlert = "KubeVirtCRModified"
unsafeModificationAlert = "UnsupportedHCOModification"
installationNotCompletedAlert = "HCOInstallationIncomplete"
singleStackIPv6Alert = "SingleStackIPv6Unsupported"
MisconfiguredDeschedulerAlert = "HCOMisconfiguredDescheduler"
severityAlertLabelKey = "severity"
healthImpactAlertLabelKey = "operator_health_impact"
outOfBandUpdateAlert = "KubeVirtCRModified"
unsafeModificationAlert = "UnsupportedHCOModification"
installationNotCompletedAlert = "HCOInstallationIncomplete"
singleStackIPv6Alert = "SingleStackIPv6Unsupported"
MisconfiguredDeschedulerAlert = "HCOMisconfiguredDescheduler"
VMOutdatedMachineTypeAlert = "VMHasOutdatedMachineType"
minSupportedVirtLauncherOSVersion = 8
severityAlertLabelKey = "severity"
healthImpactAlertLabelKey = "operator_health_impact"
)

func operatorAlerts() []promv1.Rule {
return []promv1.Rule{
logger := logf.Log.WithName("operator-alerts")

rules := []promv1.Rule{
{
Alert: outOfBandUpdateAlert,
Expr: intstr.FromString("sum by(component_name) ((round(increase(kubevirt_hco_out_of_band_modifications_total[10m]))>0 and kubevirt_hco_out_of_band_modifications_total offset 10m) or (kubevirt_hco_out_of_band_modifications_total != 0 unless kubevirt_hco_out_of_band_modifications_total offset 10m))"),
Expand Down Expand Up @@ -80,4 +88,33 @@ func operatorAlerts() []promv1.Rule {
},
},
}

rhelVersion, exists := os.LookupEnv("VIRT_LAUNCHER_OS_VERSION")
if !exists {
return rules
}

virtLauncherOSVersion, err := strconv.Atoi(rhelVersion)
if err != nil {
logger.Error(err, "Error parsing VIRT_LAUNCHER_OS_VERSION")
return rules
}

if virtLauncherOSVersion > minSupportedVirtLauncherOSVersion {
rules = append(rules, promv1.Rule{
Alert: VMOutdatedMachineTypeAlert,
Expr: intstr.FromString(`count(kubevirt_vmi_info{guest_os_machine=~".*rhel8.*"}
and on(name, namespace) kubevirt_vm_info{status=~"Running|Stopped"}) > 0`),
Annotations: map[string]string{
"description": "There are virtual machines using an outdated machine type that need to be patched.",
"summary": "{{ $value }} virtual machines are using an outdated machine type.",
},
Labels: map[string]string{
severityAlertLabelKey: "warning",
healthImpactAlertLabelKey: "none",
},
})
}

return rules
}
77 changes: 77 additions & 0 deletions tests/func-tests/monitoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"crypto/tls"
"flag"
"fmt"
"k8s.io/utils/ptr"
"math"
"net/http"
"strconv"
Expand All @@ -20,8 +21,10 @@ import (
promConfig "github.com/prometheus/common/config"
promModel "github.com/prometheus/common/model"
authenticationv1 "k8s.io/api/authentication/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -192,6 +195,80 @@ var _ = Describe("[crit:high][vendor:cnv-qe@redhat.com][level:system]Monitoring"
verifyOperatorHealthMetricValue(ctx, promClient, hcoClient, initialOperatorHealthMetricValue, warningImpact)
})

Context("VMHasOutdatedMachineType alert", func() {
const (
query = `kubevirt_vmi_info{guest_os_machine=pc-q35-rhel8.4.0"}`
vmName = "test-vm-outdated-machine-type"
)

var ruleExists bool

BeforeEach(func(ctx context.Context) {
By("Checking if the VMHasOutdatedMachineType rule is registered in Prometheus")
ruleExists = Eventually(func(ctx context.Context) (bool, error) {
rulesResult, err := promClient.Rules(ctx)
if err != nil {
return false, err
}

for _, group := range rulesResult.Groups {
for _, rule := range group.Rules {
if alertingRule, ok := rule.(promApiv1.AlertingRule); ok {
if alertingRule.Name == hcoalerts.VMOutdatedMachineTypeAlert {
return true, nil
}
}
}
}
return false, nil
}).WithTimeout(60 * time.Second).WithPolling(time.Second).WithContext(ctx).Should(BeTrue())
})

It("should fire the VMHasOutdatedMachineType alert when a VM is using an outdated machine type", func(ctx context.Context) {
if !ruleExists {
Skip("Skipping test because the VMHasOutdatedMachineType rule is not registered")
}

By("Ensuring the VMHasOutdatedMachineType alert doesnt exist before creating the VM")
Consistently(func(ctx context.Context) *promApiv1.Alert {
alerts, err := promClient.Alerts(ctx)
Expect(err).ToNot(HaveOccurred())
alert := getAlertByName(alerts, hcoalerts.VMOutdatedMachineTypeAlert)
return alert
}).WithPolling(time.Second).WithTimeout(15 * time.Second).WithContext(ctx).Should(BeNil())

By("Creating a VM with an outdated machine type")
vm := &kubevirtcorev1.VirtualMachine{
ObjectMeta: metav1.ObjectMeta{
Name: vmName,
Namespace: tests.TestNamespace,
},
}
vm.Spec.Template.Spec.Domain.Resources.Requests = corev1.ResourceList{corev1.ResourceMemory: resource.MustParse("128Mi")}
vm.Spec.RunStrategy = ptr.To(kubevirtcorev1.RunStrategyOnce)
vm.Spec.Template.Spec.Domain.Machine = &kubevirtcorev1.Machine{Type: "pc-q35-rhel8.4.0"}
Expect(cli.Create(ctx, vm)).To(Succeed())

By("Checking that the metric for outdated machine types is set to 1.0")
Eventually(func(g Gomega, ctx context.Context) float64 {
valueAfter, err := hcoClient.GetHCOMetric(ctx, query)
g.Expect(err).NotTo(HaveOccurred())
return valueAfter
}).WithTimeout(60*time.Second).WithPolling(time.Second).WithContext(ctx).Should(
Equal(float64(1)),
"expected outdated machine type metric to be 1.0",
)

By("Checking the VMHasOutdatedMachineType alert")
Eventually(func(ctx context.Context) *promApiv1.Alert {
alerts, err := promClient.Alerts(ctx)
Expect(err).ToNot(HaveOccurred())
alert := getAlertByName(alerts, hcoalerts.VMOutdatedMachineTypeAlert)
return alert
}).WithTimeout(60 * time.Second).WithPolling(time.Second).WithContext(ctx).ShouldNot(BeNil())
})
})

Describe("KubeDescheduler", Serial, Ordered, Label(tests.OpenshiftLabel, "monitoring"), func() {

var (
Expand Down
44 changes: 23 additions & 21 deletions tools/csv-merger/csv-merger.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ var (
crdDir = flag.String("crds-dir", "", "the directory containing the CRDs for apigroup validation. The validation will be performed if and only if the value is non-empty.")
hcoKvIoVersion = flag.String("hco-kv-io-version", "", "KubeVirt version")
kubevirtVersion = flag.String("kubevirt-version", "", "Kubevirt operator version")
kubevirtVirtLauncherOSVersion = flag.String("kubevirt-virt-launcher-os-version", "", "Kubevirt Virt launcher OS version")
cdiVersion = flag.String("cdi-version", "", "CDI operator version")
cnaoVersion = flag.String("cnao-version", "", "CNA operator version")
sspVersion = flag.String("ssp-version", "", "SSP operator version")
Expand Down Expand Up @@ -514,27 +515,28 @@ func getCsvBaseParams(replaces string, version semver.Version) *components.CSVBa

func getDeploymentParams() *components.DeploymentOperatorParams {
return &components.DeploymentOperatorParams{
Namespace: *namespace,
Image: *operatorImage,
WebhookImage: *webhookImage,
CliDownloadsImage: *cliDownloadsImage,
KVUIPluginImage: *kvUIPluginImage,
KVUIProxyImage: *kvUIProxyImage,
ImagePullPolicy: "IfNotPresent",
VirtIOWinContainer: *kvVirtIOWinImage,
Smbios: *smbios,
Machinetype: *machinetype,
Amd64MachineType: *amd64MachineType,
Arm64MachineType: *arm64MachineType,
HcoKvIoVersion: *hcoKvIoVersion,
KubevirtVersion: *kubevirtVersion,
CdiVersion: *cdiVersion,
CnaoVersion: *cnaoVersion,
SspVersion: *sspVersion,
HppoVersion: *hppoVersion,
AaqVersion: *aaqVersion,
PrimaryUDNImage: *primaryUDNImage,
Env: envVars,
Namespace: *namespace,
Image: *operatorImage,
WebhookImage: *webhookImage,
CliDownloadsImage: *cliDownloadsImage,
KVUIPluginImage: *kvUIPluginImage,
KVUIProxyImage: *kvUIProxyImage,
ImagePullPolicy: "IfNotPresent",
VirtIOWinContainer: *kvVirtIOWinImage,
Smbios: *smbios,
Machinetype: *machinetype,
Amd64MachineType: *amd64MachineType,
Arm64MachineType: *arm64MachineType,
HcoKvIoVersion: *hcoKvIoVersion,
KubevirtVersion: *kubevirtVersion,
KubevirtVirtLancherOsVersion: *kubevirtVirtLauncherOSVersion,
CdiVersion: *cdiVersion,
CnaoVersion: *cnaoVersion,
SspVersion: *sspVersion,
HppoVersion: *hppoVersion,
AaqVersion: *aaqVersion,
PrimaryUDNImage: *primaryUDNImage,
Env: envVars,
}
}

Expand Down
96 changes: 49 additions & 47 deletions tools/manifest-templator/manifest-templator.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,34 +50,35 @@ var (

// flags for the command line arguments we accept
var (
cwd, _ = os.Getwd()
deployDir = flag.String("deploy-dir", "deploy", "Directory where manifests should be written")
cnaCsv = flag.String("cna-csv", "", "Cluster Network Addons CSV string")
virtCsv = flag.String("virt-csv", "", "KubeVirt CSV string")
sspCsv = flag.String("ssp-csv", "", "Scheduling Scale Performance CSV string")
cdiCsv = flag.String("cdi-csv", "", "Containerized Data Importer CSV String")
hppCsv = flag.String("hpp-csv", "", "HostPath Provisioner Operator CSV String")
_ = flag.String("mtq-csv", "", "deprecated. This flag is ignored")
aaqCsv = flag.String("aaq-csv", "", "Applications Aware Quota Operator CSV String")
operatorNamespace = flag.String("operator-namespace", "kubevirt-hyperconverged", "Name of the Operator")
operatorImage = flag.String("operator-image", "", "HyperConverged Cluster Operator image")
webhookImage = flag.String("webhook-image", "", "HyperConverged Cluster Webhook image")
cliDownloadsImage = flag.String("cli-downloads-image", "", "Downloads Server image")
kvVirtIOWinImage = flag.String("kv-virtiowin-image-name", "", "KubeVirt VirtIO Win image")
smbios = flag.String("smbios", "", "Custom SMBIOS string for KubeVirt ConfigMap")
machinetype = flag.String("machinetype", "", "Custom MACHINETYPE string for KubeVirt ConfigMap (Deprecated, use amd64-machinetype)")
amd64MachineType = flag.String("amd64-machinetype", "", "Custom AMD64_MACHINETYPE string for KubeVirt ConfigMap")
arm64MachineType = flag.String("arm64-machinetype", "", "Custom ARM64_MACHINETYPE string for KubeVirt ConfigMap")
hcoKvIoVersion = flag.String("hco-kv-io-version", "", "KubeVirt version")
kubevirtVersion = flag.String("kubevirt-version", "", "Kubevirt operator version")
cdiVersion = flag.String("cdi-version", "", "CDI operator version")
cnaoVersion = flag.String("cnao-version", "", "CNA operator version")
sspVersion = flag.String("ssp-version", "", "SSP operator version")
hppoVersion = flag.String("hppo-version", "", "HPP operator version")
_ = flag.String("mtq-version", "", "deprecated. This flag is ignored")
aaqVersion = flag.String("aaq-version", "", "AAQ operator version")
primaryUDNImage = flag.String("primary-udn-binding-image-name", "", "Primary UDN binding image")
apiSources = flag.String("api-sources", cwd+"/...", "Project sources")
cwd, _ = os.Getwd()
deployDir = flag.String("deploy-dir", "deploy", "Directory where manifests should be written")
cnaCsv = flag.String("cna-csv", "", "Cluster Network Addons CSV string")
virtCsv = flag.String("virt-csv", "", "KubeVirt CSV string")
sspCsv = flag.String("ssp-csv", "", "Scheduling Scale Performance CSV string")
cdiCsv = flag.String("cdi-csv", "", "Containerized Data Importer CSV String")
hppCsv = flag.String("hpp-csv", "", "HostPath Provisioner Operator CSV String")
_ = flag.String("mtq-csv", "", "deprecated. This flag is ignored")
aaqCsv = flag.String("aaq-csv", "", "Applications Aware Quota Operator CSV String")
operatorNamespace = flag.String("operator-namespace", "kubevirt-hyperconverged", "Name of the Operator")
operatorImage = flag.String("operator-image", "", "HyperConverged Cluster Operator image")
webhookImage = flag.String("webhook-image", "", "HyperConverged Cluster Webhook image")
cliDownloadsImage = flag.String("cli-downloads-image", "", "Downloads Server image")
kvVirtIOWinImage = flag.String("kv-virtiowin-image-name", "", "KubeVirt VirtIO Win image")
smbios = flag.String("smbios", "", "Custom SMBIOS string for KubeVirt ConfigMap")
machinetype = flag.String("machinetype", "", "Custom MACHINETYPE string for KubeVirt ConfigMap (Deprecated, use amd64-machinetype)")
amd64MachineType = flag.String("amd64-machinetype", "", "Custom AMD64_MACHINETYPE string for KubeVirt ConfigMap")
arm64MachineType = flag.String("arm64-machinetype", "", "Custom ARM64_MACHINETYPE string for KubeVirt ConfigMap")
hcoKvIoVersion = flag.String("hco-kv-io-version", "", "KubeVirt version")
kubevirtVersion = flag.String("kubevirt-version", "", "Kubevirt operator version")
kubevirtVirtLauncherOSVersion = flag.String("kubevirt-virt-launcher-os-version", "", "Kubevirt Virt launcher OS version")
cdiVersion = flag.String("cdi-version", "", "CDI operator version")
cnaoVersion = flag.String("cnao-version", "", "CNA operator version")
sspVersion = flag.String("ssp-version", "", "SSP operator version")
hppoVersion = flag.String("hppo-version", "", "HPP operator version")
_ = flag.String("mtq-version", "", "deprecated. This flag is ignored")
aaqVersion = flag.String("aaq-version", "", "AAQ operator version")
primaryUDNImage = flag.String("primary-udn-binding-image-name", "", "Primary UDN binding image")
apiSources = flag.String("api-sources", cwd+"/...", "Project sources")
)

// check handles errors
Expand Down Expand Up @@ -412,25 +413,26 @@ func getCsvWithComponent() []util.CsvWithComponent {

func getOperatorParameters() *components.DeploymentOperatorParams {
params := &components.DeploymentOperatorParams{
Namespace: *operatorNamespace,
Image: *operatorImage,
WebhookImage: *webhookImage,
CliDownloadsImage: *cliDownloadsImage,
ImagePullPolicy: "IfNotPresent",
VirtIOWinContainer: *kvVirtIOWinImage,
Smbios: *smbios,
Machinetype: *machinetype,
Amd64MachineType: *amd64MachineType,
Arm64MachineType: *arm64MachineType,
HcoKvIoVersion: *hcoKvIoVersion,
KubevirtVersion: *kubevirtVersion,
CdiVersion: *cdiVersion,
CnaoVersion: *cnaoVersion,
SspVersion: *sspVersion,
HppoVersion: *hppoVersion,
AaqVersion: *aaqVersion,
PrimaryUDNImage: *primaryUDNImage,
Env: []corev1.EnvVar{},
Namespace: *operatorNamespace,
Image: *operatorImage,
WebhookImage: *webhookImage,
CliDownloadsImage: *cliDownloadsImage,
ImagePullPolicy: "IfNotPresent",
VirtIOWinContainer: *kvVirtIOWinImage,
Smbios: *smbios,
Machinetype: *machinetype,
Amd64MachineType: *amd64MachineType,
Arm64MachineType: *arm64MachineType,
HcoKvIoVersion: *hcoKvIoVersion,
KubevirtVersion: *kubevirtVersion,
KubevirtVirtLancherOsVersion: *kubevirtVirtLauncherOSVersion,
CdiVersion: *cdiVersion,
CnaoVersion: *cnaoVersion,
SspVersion: *sspVersion,
HppoVersion: *hppoVersion,
AaqVersion: *aaqVersion,
PrimaryUDNImage: *primaryUDNImage,
Env: []corev1.EnvVar{},
}
return params
}
Expand Down

0 comments on commit 82528f6

Please sign in to comment.