Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ATMOSPHERE-527] [stable/zed] Improve NeutronNetworkOutOfIPs alarm #2064

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions hack/promtool-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0

import json
import os
import shutil
import tempfile

import rjsonnet
import yaml


def import_callback(base, rel):
"""
:param base: The directory containing the code that did the import.
:param rel: The path imported by the code.
"""
path = os.path.join(base, rel)
with open(path, "r") as f:
return path, f.read()


def main():
compiled_string = rjsonnet.evaluate_file(
"roles/kube_prometheus_stack/files/jsonnet/rules.jsonnet",
import_callback=import_callback,
)
compiled = json.loads(compiled_string)

tempdir = tempfile.mkdtemp()
rule_files = []

try:
for rule_file, data in compiled.items():
file_name = rule_file + ".yml"
path = os.path.join(tempdir, file_name)

if os.path.exists(path):
raise Exception(f"File {path} already exists")
with open(path, "w") as f:
yaml.dump(data, f)

rule_files.append(path)

with open("roles/kube_prometheus_stack/files/jsonnet/tests.yml") as f:
tests = yaml.safe_load(f)

tests["rule_files"] = rule_files

tests_file = os.path.join(tempdir, "tests.yml")
with open(tests_file, "w") as f:
yaml.dump(tests, f)

# TODO(mnaser): Enable JUnit output
os.system(f"promtool test rules {tests_file}")
finally:
shutil.rmtree(tempdir)


if __name__ == "__main__":
main()
6 changes: 3 additions & 3 deletions roles/kube_prometheus_stack/files/jsonnet/openstack.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@
{
alert: 'NeutronNetworkOutOfIPs',
annotations: {
description: 'The subnet {{ $labels.subnet_name }} within {{ $labels.network_name }} is currently at {{ $value }}% utilization. If the IP addresses run out, it will impact the provisioning of new ports.',
summary: '[{{ $labels.network_name }}] {{ $labels.subnet_name }} running out of IPs',
description: 'The network {{ $labels.network_id }} is currently at {{ $value }}% utilization. If the IP addresses run out, it will impact the provisioning of new ports.',
summary: '[{{ $labels.network_id }}] Network running out of IPs',
},
expr: 'sum by (network_id) (openstack_neutron_network_ip_availabilities_used{project_id!=""}) / sum by (network_id) (openstack_neutron_network_ip_availabilities_total{project_id!=""}) * 100 > 80',
expr: '(sum by (network_id) (openstack_neutron_network_ip_availabilities_used{project_id!=""}) and on (network_id) label_replace(openstack_neutron_network{is_external="true", is_shared="true"}, "network_id", "$1", "id", "(.*)")) / (sum by (network_id) (openstack_neutron_network_ip_availabilities_total{project_id!=""}) and on (network_id) label_replace(openstack_neutron_network{is_external="true", is_shared="true"}, "network_id", "$1", "id", "(.*)")) * 100 > 80',
'for': '6h',
labels: {
severity: 'warning',
Expand Down
35 changes: 35 additions & 0 deletions roles/kube_prometheus_stack/files/jsonnet/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0

tests:
- interval: 1m
input_series:
- series: 'openstack_neutron_network{id="4cf895c9-c3d1-489e-b02e-59b5c8976809",is_external="false",is_shared="false",name="public",provider_network_type="vlan",provider_physical_network="external",provider_segmentation_id="3",status="ACTIVE",subnets="54d6f61d-db07-451c-9ab3-b9609b6b6f0b",tags="tag1,tag2",tenant_id="4fd44f30292945e481c7b8a0c8908869"} 0'
values: '0x360'
- series: 'openstack_neutron_network_ip_availabilities_total{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '253x360'
- series: 'openstack_neutron_network_ip_availabilities_used{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '250x360'
alert_rule_test:
- eval_time: 6h
alertname: NeutronNetworkOutOfIPs
exp_alerts: []

- interval: 1m
input_series:
- series: 'openstack_neutron_network{id="4cf895c9-c3d1-489e-b02e-59b5c8976809",is_external="true",is_shared="true",name="public",provider_network_type="vlan",provider_physical_network="external",provider_segmentation_id="3",status="ACTIVE",subnets="54d6f61d-db07-451c-9ab3-b9609b6b6f0b",tags="tag1,tag2",tenant_id="4fd44f30292945e481c7b8a0c8908869"} 0'
values: '0x360'
- series: 'openstack_neutron_network_ip_availabilities_total{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '253x360'
- series: 'openstack_neutron_network_ip_availabilities_used{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '250x360'
alert_rule_test:
- eval_time: 6h
alertname: NeutronNetworkOutOfIPs
exp_alerts:
- exp_labels:
network_id: 4cf895c9-c3d1-489e-b02e-59b5c8976809
severity: P3
exp_annotations:
summary: "[4cf895c9-c3d1-489e-b02e-59b5c8976809] Network running out of IPs"
description: "The network 4cf895c9-c3d1-489e-b02e-59b5c8976809 is currently at 98.81422924901186% utilization. If the IP addresses run out, it will impact the provisioning of new ports."
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,11 @@ allowlist_externals =
bash
commands =
bash {toxinidir}/build/build-manila-image.sh

[testenv:promtool-test]
skip_install = true
deps =
PyYAML
rjsonnet
commands =
python3 {toxinidir}/hack/promtool-test.py
7 changes: 7 additions & 0 deletions zuul.d/jobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
parent: tox-linters
pre-run: zuul.d/playbooks/linters/pre.yml

- job:
name: atmosphere-tox-promtool-test
parent: tox
pre-run: zuul.d/playbooks/promtool/pre.yml
vars:
tox_envlist: promtool-test

- job:
name: atmosphere-tox-py3
parent: tox
Expand Down
34 changes: 34 additions & 0 deletions zuul.d/playbooks/promtool/pre.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0

- hosts: all
tasks:
- name: Install promtool
block:
- name: Create temporary file to download
ansible.builtin.tempfile:
state: file
suffix: .tar.gz
register: promtool_file

- name: Download Prometheus
ansible.builtin.get_url:
url: https://github.com/prometheus/prometheus/releases/download/v2.55.0/prometheus-2.55.0.linux-amd64.tar.gz
dest: "{{ promtool_file.path }}"
checksum: sha256:7a6b6d5ea003e8d59def294392c64e28338da627bf760cf268e788d6a8832a23

- name: Extract Prometheus into /usr/local/bin
become: true
ansible.builtin.unarchive:
src: "{{ promtool_file.path }}"
dest: /usr/local/bin
remote_src: true
extra_opts:
- --strip-components=1
include:
- prometheus-2.55.0.linux-amd64/promtool
always:
- name: Remove temporary file
ansible.builtin.file:
path: "{{ promtool_file.path }}"
state: absent
1 change: 1 addition & 0 deletions zuul.d/project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
jobs:
- atmosphere-chart-vendor
- atmosphere-linters
- atmosphere-tox-promtool-test
- atmosphere-tox-py3
- atmosphere-build-collection:
dependencies: &molecule_check_dependencies
Expand Down