From 2c221324b3effb3b1a6d54d570e2164eda034a84 Mon Sep 17 00:00:00 2001 From: Enje Shakirova <166139731+enjenjenje@users.noreply.github.com> Date: Mon, 17 Feb 2025 16:50:27 +0100 Subject: [PATCH 01/10] chore: prometheus monitoring tune (#133) * increased metric retention up to a week * added lightmare monitoring by default * added specs to enable lightmare monitoring --- .../monitoring/configs/kustomization.yml | 1 + .../monitoring/configs/lightmare.yml | 16 ++++++++++++++++ .../configs/lightmare/kustomization.yml | 19 +++++++++++++++++++ .../configs/lightmare/service-monitor.yml | 17 +++++++++++++++++ .../kube-prometheus-stack/release.yaml | 3 ++- 5 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 flux/components/monitoring/configs/lightmare.yml create mode 100644 flux/components/monitoring/configs/lightmare/kustomization.yml create mode 100644 flux/components/monitoring/configs/lightmare/service-monitor.yml diff --git a/flux/components/monitoring/configs/kustomization.yml b/flux/components/monitoring/configs/kustomization.yml index 88ed8b03..99bda9a6 100644 --- a/flux/components/monitoring/configs/kustomization.yml +++ b/flux/components/monitoring/configs/kustomization.yml @@ -5,3 +5,4 @@ resources: - system - kubernetes - kubevirt.yml + - lightmare.yml diff --git a/flux/components/monitoring/configs/lightmare.yml b/flux/components/monitoring/configs/lightmare.yml new file mode 100644 index 00000000..2e49e81b --- /dev/null +++ b/flux/components/monitoring/configs/lightmare.yml @@ -0,0 +1,16 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: lightmare-monitoring + namespace: flux-system +spec: + interval: 1m0s + path: ./flux/components/monitoring/configs/lightmare + prune: true + sourceRef: + kind: GitRepository + name: spectrum + namespace: flux-system + dependsOn: + - name: lightmare + namespace: flux-system diff --git a/flux/components/monitoring/configs/lightmare/kustomization.yml b/flux/components/monitoring/configs/lightmare/kustomization.yml new file mode 100644 index 00000000..b9dec980 --- /dev/null +++ b/flux/components/monitoring/configs/lightmare/kustomization.yml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: monitoring +resources: + - service-monitor.yml +generatorOptions: + disableNameSuffixHash: true + labels: + grafana_dashboard: "1" + app.kubernetes.io/part-of: spectrum-monitoring + app.kubernetes.io/component: lightmare +commonAnnotations: + grafana_folder: "lightmare" + +# tbd +# configMapGenerator: +# - name: dashboards-k8s-lightmare +# files: +# - *.json diff --git a/flux/components/monitoring/configs/lightmare/service-monitor.yml b/flux/components/monitoring/configs/lightmare/service-monitor.yml new file mode 100644 index 00000000..f6f4e10a --- /dev/null +++ b/flux/components/monitoring/configs/lightmare/service-monitor.yml @@ -0,0 +1,17 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: lightmare + labels: + app.kubernetes.io/part-of: spectrum-monitoring + app.kubernetes.io/component: lightmare +spec: + selector: + matchLabels: + app: lightmare + namespaceSelector: + matchNames: + - lightmare + endpoints: + - port: metrics + scheme: http diff --git a/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml b/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml index 2a5c2be8..352a684d 100644 --- a/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml +++ b/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml @@ -32,7 +32,7 @@ spec: enabled: false prometheus: prometheusSpec: - retention: 24h + retention: 168h resources: requests: cpu: 200m @@ -48,6 +48,7 @@ spec: - "kube-state-metrics" - "prometheus-node-exporter" - "spectrum-monitoring" + - "lightmare" podMonitorNamespaceSelector: {} podMonitorSelector: From 74f6e407f486d057dd18307c03c9ac7fb0deb5db Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 18 Feb 2025 16:26:51 +0300 Subject: [PATCH 02/10] chore: update lightmare to 0.2.3 (#134) update lightmare --- flux/core/lightmare/app/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flux/core/lightmare/app/release.yml b/flux/core/lightmare/app/release.yml index c2c17405..edcd3802 100644 --- a/flux/core/lightmare/app/release.yml +++ b/flux/core/lightmare/app/release.yml @@ -17,7 +17,7 @@ spec: operator: image: repository: "fluencelabs/lightmare" - tag: "0.2.2@sha256:c388db52bd9e7533862c011864819bf1253e7633d13fed618597a34a54f8427f" + tag: "0.2.3@sha256:42ab72c54c359aca3219d774eeeac98de3f588c36505ceba0b4c787916d9e236" config: ccp: image: From 22725b3f92a1457369fbca22e52d0e3cd0e6b8f1 Mon Sep 17 00:00:00 2001 From: Enje Shakirova <166139731+enjenjenje@users.noreply.github.com> Date: Wed, 19 Feb 2025 14:02:16 +0100 Subject: [PATCH 03/10] chore: enabled monitoring by default (#137) * enabled monitoring by def * upped log retention to 7d --- flux/clusters/default/kustomization.yml | 1 + flux/components/monitoring/controllers/loki-stack/release.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/flux/clusters/default/kustomization.yml b/flux/clusters/default/kustomization.yml index 42e85979..44bae8b8 100644 --- a/flux/clusters/default/kustomization.yml +++ b/flux/clusters/default/kustomization.yml @@ -4,3 +4,4 @@ resources: - ../../core/local-path-provisioner - ../../core/metrics-server - ../../core/kubelet-serving-cert-approver + - ../../components/monitoring diff --git a/flux/components/monitoring/controllers/loki-stack/release.yaml b/flux/components/monitoring/controllers/loki-stack/release.yaml index a28d396a..c63673e1 100644 --- a/flux/components/monitoring/controllers/loki-stack/release.yaml +++ b/flux/components/monitoring/controllers/loki-stack/release.yaml @@ -37,4 +37,4 @@ spec: max_look_back_period: 0s table_manager: retention_deletes_enabled: true - retention_period: 72h + retention_period: 168h From f43d5c4066a938050da8a654803c4777ecf5c138 Mon Sep 17 00:00:00 2001 From: Enje Shakirova <166139731+enjenjenje@users.noreply.github.com> Date: Wed, 19 Feb 2025 15:34:42 +0100 Subject: [PATCH 04/10] fix: spectrum add qps increase to cilium config (#140) --- terraform-modules/spectrum/templates/cilium.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/terraform-modules/spectrum/templates/cilium.yml b/terraform-modules/spectrum/templates/cilium.yml index 435d4e41..7daf509e 100644 --- a/terraform-modules/spectrum/templates/cilium.yml +++ b/terraform-modules/spectrum/templates/cilium.yml @@ -75,6 +75,13 @@ socketLB: k8sServiceHost: localhost +k8sClientRateLimit: + qps: 1000 + burst: 5000 + operator: + qps: 1000 + burst: 5000 + %{ if cluster_flavour == "talos" } k8sServicePort: 7445 %{ else } From f7f6ed2906f1b6a4453bbc88746cb1a7b2eba8ee Mon Sep 17 00:00:00 2001 From: fluencebot <116741523+fluencebot@users.noreply.github.com> Date: Wed, 19 Feb 2025 17:38:18 +0300 Subject: [PATCH 05/10] chore(main): release terraform-module-spectrum 0.1.2 (#141) --- .github/release-please/manifest.json | 2 +- examples/k3s/spectrum.tf | 2 +- examples/talos/spectrum.tf | 2 +- terraform-modules/spectrum/CHANGELOG.md | 7 +++++++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/release-please/manifest.json b/.github/release-please/manifest.json index c3463109..c6294040 100644 --- a/.github/release-please/manifest.json +++ b/.github/release-please/manifest.json @@ -1,5 +1,5 @@ { "terraform-modules/talos": "0.2.1", - "terraform-modules/spectrum": "0.1.1", + "terraform-modules/spectrum": "0.1.2", "terraform-modules/k3s": "0.1.0" } \ No newline at end of file diff --git a/examples/k3s/spectrum.tf b/examples/k3s/spectrum.tf index 7c00a16d..9fa88e56 100644 --- a/examples/k3s/spectrum.tf +++ b/examples/k3s/spectrum.tf @@ -6,6 +6,6 @@ provider "helm" { module "spectrum" { depends_on = [module.k3s] - source = "git::https://github.com/fluencelabs/spectrum.git//terraform-modules/spectrum?ref=terraform-module-spectrum-v0.1.1" # x-release-please-version + source = "git::https://github.com/fluencelabs/spectrum.git//terraform-modules/spectrum?ref=terraform-module-spectrum-v0.1.2" # x-release-please-version cluster_flavour = "k3s" } diff --git a/examples/talos/spectrum.tf b/examples/talos/spectrum.tf index 28289daf..32f48036 100644 --- a/examples/talos/spectrum.tf +++ b/examples/talos/spectrum.tf @@ -1,6 +1,6 @@ module "spectrum" { depends_on = [module.talos] - source = "git::https://github.com/fluencelabs/spectrum.git//terraform-modules/spectrum?ref=terraform-module-spectrum-v0.1.1" # x-release-please-version + source = "git::https://github.com/fluencelabs/spectrum.git//terraform-modules/spectrum?ref=terraform-module-spectrum-v0.1.2" # x-release-please-version network = "main" cluster = "default" } diff --git a/terraform-modules/spectrum/CHANGELOG.md b/terraform-modules/spectrum/CHANGELOG.md index 369f2d10..ff702af7 100644 --- a/terraform-modules/spectrum/CHANGELOG.md +++ b/terraform-modules/spectrum/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.1.2](https://github.com/fluencelabs/spectrum/compare/terraform-module-spectrum-v0.1.1...terraform-module-spectrum-v0.1.2) (2025-02-19) + + +### Bug Fixes + +* spectrum add qps increase to cilium config ([#140](https://github.com/fluencelabs/spectrum/issues/140)) ([fa3df5a](https://github.com/fluencelabs/spectrum/commit/fa3df5a99d4f46cabbdb2036598e1ee18c868042)) + ## [0.1.1](https://github.com/fluencelabs/spectrum/compare/terraform-module-spectrum-v0.1.0...terraform-module-spectrum-v0.1.1) (2025-01-23) From 46e7529bf76b61e348f68c7dbb694b99f6f142bc Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 21 Feb 2025 18:51:20 +0300 Subject: [PATCH 06/10] chore: update lightmare + chart (#142) --- flux/core/lightmare/app/release.yml | 2 +- flux/core/lightmare/app/repository.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flux/core/lightmare/app/release.yml b/flux/core/lightmare/app/release.yml index edcd3802..cd712bc3 100644 --- a/flux/core/lightmare/app/release.yml +++ b/flux/core/lightmare/app/release.yml @@ -17,7 +17,7 @@ spec: operator: image: repository: "fluencelabs/lightmare" - tag: "0.2.3@sha256:42ab72c54c359aca3219d774eeeac98de3f588c36505ceba0b4c787916d9e236" + tag: "0.2.4@sha256:4fe2971e46c458265f0e7e4c101c475b1a0fffadd745d20df4ca4c7402f10b0c" config: ccp: image: diff --git a/flux/core/lightmare/app/repository.yml b/flux/core/lightmare/app/repository.yml index 5a71dd6d..0ae8857a 100644 --- a/flux/core/lightmare/app/repository.yml +++ b/flux/core/lightmare/app/repository.yml @@ -7,4 +7,4 @@ spec: interval: 5m0s url: oci://registry-1.docker.io/fluencelabs/lightmare-chart ref: - tag: 0.1.1 + tag: 0.1.2 From 13b8d731f5c689ca4e70ec70aa4b5defc10d3ee4 Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 21 Feb 2025 20:19:19 +0300 Subject: [PATCH 07/10] chore: update lightmare chart (#144) chore: update ligthmare chart --- flux/core/lightmare/app/repository.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flux/core/lightmare/app/repository.yml b/flux/core/lightmare/app/repository.yml index 0ae8857a..e6821487 100644 --- a/flux/core/lightmare/app/repository.yml +++ b/flux/core/lightmare/app/repository.yml @@ -7,4 +7,4 @@ spec: interval: 5m0s url: oci://registry-1.docker.io/fluencelabs/lightmare-chart ref: - tag: 0.1.2 + tag: 0.1.3 From a67fdb1cd7807f60ec1865248a0050db01405b72 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 24 Feb 2025 11:05:46 +0100 Subject: [PATCH 08/10] chore(deps): update registry-1.docker.io/fluencelabs/lightmare-chart docker tag to v0.1.4 (#146) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- flux/core/lightmare/app/repository.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flux/core/lightmare/app/repository.yml b/flux/core/lightmare/app/repository.yml index e6821487..a01e58c6 100644 --- a/flux/core/lightmare/app/repository.yml +++ b/flux/core/lightmare/app/repository.yml @@ -7,4 +7,4 @@ spec: interval: 5m0s url: oci://registry-1.docker.io/fluencelabs/lightmare-chart ref: - tag: 0.1.3 + tag: 0.1.4 From fd92db86b7ccb579171dfaaad31bfa7c5082e672 Mon Sep 17 00:00:00 2001 From: Enje Shakirova <166139731+enjenjenje@users.noreply.github.com> Date: Mon, 24 Feb 2025 18:34:55 +0100 Subject: [PATCH 09/10] fix: Spectrum priority class tune (#150) --- flux/clusters/default/kustomization.yml | 1 + flux/clusters/ephemeral/kustomization.yml | 1 + .../kube-prometheus-stack/release.yaml | 9 +++++++++ .../controllers/loki-stack/release.yaml | 2 ++ .../local-path-provisioner/kustomization.yml | 8 ++++++++ .../monitoring-prerequisites/kustomization.yaml | 4 ++++ .../core/monitoring-prerequisites/manifests.yaml | 9 +++++++++ terraform-modules/spectrum/flux.tf | 16 ++++++++++++++++ 8 files changed, 50 insertions(+) create mode 100644 flux/core/monitoring-prerequisites/kustomization.yaml create mode 100644 flux/core/monitoring-prerequisites/manifests.yaml diff --git a/flux/clusters/default/kustomization.yml b/flux/clusters/default/kustomization.yml index 44bae8b8..ce1947ee 100644 --- a/flux/clusters/default/kustomization.yml +++ b/flux/clusters/default/kustomization.yml @@ -4,4 +4,5 @@ resources: - ../../core/local-path-provisioner - ../../core/metrics-server - ../../core/kubelet-serving-cert-approver + - ../../core/monitoring-prerequisites - ../../components/monitoring diff --git a/flux/clusters/ephemeral/kustomization.yml b/flux/clusters/ephemeral/kustomization.yml index d63b5510..4611c9b9 100644 --- a/flux/clusters/ephemeral/kustomization.yml +++ b/flux/clusters/ephemeral/kustomization.yml @@ -9,6 +9,7 @@ resources: - ../../core/local-path-provisioner - ../../core/metrics-server - ../../core/kubelet-serving-cert-approver + - ../../core/monitoring-prerequisites - ../../components/kubevirt - ../../components/monitoring - hubble-ingress.yml diff --git a/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml b/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml index 352a684d..95090863 100644 --- a/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml +++ b/flux/components/monitoring/controllers/kube-prometheus-stack/release.yaml @@ -30,8 +30,11 @@ spec: values: alertmanager: enabled: false + prometheusOperator: + priorityClassName: spectrum-monitoring prometheus: prometheusSpec: + priorityClassName: spectrum-monitoring retention: 168h resources: requests: @@ -56,6 +59,7 @@ spec: app.kubernetes.io/part-of: spectrum-monitoring grafana: + priorityClassName: spectrum-monitoring defaultDashboardsEnabled: false adminPassword: fluence sidecar: @@ -68,3 +72,8 @@ spec: provider: allowUiUpdates: true foldersFromFilesStructure: true + + kube-state-metrics: + priorityClassName: spectrum-monitoring + prometheus-node-exporter: + priorityClassName: spectrum-monitoring diff --git a/flux/components/monitoring/controllers/loki-stack/release.yaml b/flux/components/monitoring/controllers/loki-stack/release.yaml index c63673e1..8cfc4cc2 100644 --- a/flux/components/monitoring/controllers/loki-stack/release.yaml +++ b/flux/components/monitoring/controllers/loki-stack/release.yaml @@ -24,9 +24,11 @@ spec: values: promtail: enabled: true + priorityClassName: spectrum-monitoring loki: enabled: true isDefault: false + priorityClassName: spectrum-monitoring serviceMonitor: enabled: true additionalLabels: diff --git a/flux/core/local-path-provisioner/kustomization.yml b/flux/core/local-path-provisioner/kustomization.yml index b632a809..9cc8343b 100644 --- a/flux/core/local-path-provisioner/kustomization.yml +++ b/flux/core/local-path-provisioner/kustomization.yml @@ -33,3 +33,11 @@ patches: name: local-path-storage labels: pod-security.kubernetes.io/enforce: privileged + - patch: |- + - op: replace + path: /spec/template/spec/priorityClassName + value: system-cluster-critical + target: + kind: Deployment + name: local-path-provisioner + namespace: local-path-storage diff --git a/flux/core/monitoring-prerequisites/kustomization.yaml b/flux/core/monitoring-prerequisites/kustomization.yaml new file mode 100644 index 00000000..f81cecbb --- /dev/null +++ b/flux/core/monitoring-prerequisites/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./manifests.yaml diff --git a/flux/core/monitoring-prerequisites/manifests.yaml b/flux/core/monitoring-prerequisites/manifests.yaml new file mode 100644 index 00000000..483c73b7 --- /dev/null +++ b/flux/core/monitoring-prerequisites/manifests.yaml @@ -0,0 +1,9 @@ + +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: spectrum-monitoring +value: 100000 +globalDefault: false +description: "Spectrum monitoring priority class" diff --git a/terraform-modules/spectrum/flux.tf b/terraform-modules/spectrum/flux.tf index a5a006d7..3b7d5443 100644 --- a/terraform-modules/spectrum/flux.tf +++ b/terraform-modules/spectrum/flux.tf @@ -21,6 +21,22 @@ resource "helm_release" "flux" { name = "imageAutomationController.create" value = "false" } + set { + name = "helmController.priorityClassName" + value = "system-cluster-critical" + } + set { + name = "kustomizeController.priorityClassName" + value = "system-cluster-critical" + } + set { + name = "notificationController.priorityClassName" + value = "system-cluster-critical" + } + set { + name = "sourceController.priorityClassName" + value = "system-cluster-critical" + } } resource "helm_release" "flux-sync" { From 2384f0b3187457a67270abbf39521b2319293cb5 Mon Sep 17 00:00:00 2001 From: Nick Date: Mon, 24 Feb 2025 20:42:14 +0300 Subject: [PATCH 10/10] chore: update lightmare to 0.2.5 (#153) chore: update ligthmare to 0.2.5 --- flux/core/lightmare/app/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flux/core/lightmare/app/release.yml b/flux/core/lightmare/app/release.yml index cd712bc3..96c32478 100644 --- a/flux/core/lightmare/app/release.yml +++ b/flux/core/lightmare/app/release.yml @@ -17,7 +17,7 @@ spec: operator: image: repository: "fluencelabs/lightmare" - tag: "0.2.4@sha256:4fe2971e46c458265f0e7e4c101c475b1a0fffadd745d20df4ca4c7402f10b0c" + tag: "0.2.5@sha256:fa77297d4f369285e1ec9fea40217b2f5eeacf87fb993fcc38d9a7ac7fe5c398" config: ccp: image: