internal/files/alarms/probable_causes.json

[
    {
        "probableCauseId": "Watchdog",
        "name": "An alert that should always be firing to certify that Alertmanager is working properly.",
        "description": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n"
    },
    {
        "probableCauseId": "UpdateAvailable",
        "name": "Your upstream update recommendation service recommends you update your cluster.",
        "description": "For more information refer to 'oc adm upgrade'"
    },
    {
        "probableCauseId": "ClusterNotUpgradeable",
        "name": "One or more cluster operators have been blocking minor version cluster upgrades for at least an hour.",
        "description": "In most cases, you will still be able to apply patch releases. Reason AdminAckRequired."
    },
    {
        "probableCauseId": "AlertmanagerReceiversNotConfigured",
        "name": "Receivers (notification integrations) are not configured on Alertmanager",
        "description": "Alerts are not configured to be sent to a notification system, meaning that you may not be notified in a timely fashion when important failures occur."
    },
    {
        "probableCauseId": "HighOverallControlPlaneMemory",
        "name": "Memory utilization across all control plane nodes is high, and could impact responsiveness and stability.",
        "description": "Given three control plane nodes, the overall memory utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the kube-apiserver and etcd my be slow to respond."
    },
    {
        "probableCauseId": "NodeClockNotSynchronising",
        "name": "Clock not synchronising.",
        "description": "Clock on host is not synchronising. Ensure NTP is configured on this host."
    },
    {
        "probableCauseId": "NodeClockSkewDetected",
        "name": "Clock skew detected.",
        "description": "Clock is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host."
    },
    {
        "probableCauseId": "IngressWithoutClassName",
        "name": "Ingress without IngressClassName for 1 day",
        "description": "This alert fires when there is an Ingress with an unset IngressClassName for longer than one day."
    },
    {
        "probableCauseId": "NodeMemoryHighUtilization",
        "name": "Host is running out of memory.",
        "description": "Memory is filling up, has been above memory high utilization threshold for the last 15 minutes"
    }
]