From 024bf73d765248140834fbe936f9e77162b33820 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Thu, 2 May 2024 04:20:46 +0000 Subject: [PATCH] fix(grafana): fix circular dependency between grafana <-> cockroachdb_managed --- infra/tf/cockroachdb_managed/main.tf | 5 +- infra/tf/cockroachdb_managed/vars.tf | 4 + infra/tf/grafana/grafana.tf | 141 ++++++++++++++++++ .../grafana_dashboards/cache.json | 0 .../grafana_dashboards/chirp-api.json | 0 .../grafana_dashboards/chirp-operation.json | 0 .../grafana_dashboards/chirp-perf-spans.json | 0 .../grafana_dashboards/chirp-service.json | 0 .../node-exporter-full.json | 0 .../node-exporter-multiple.json | 0 .../grafana_dashboards/provisioning.json | 0 .../resource-allocations.json | 0 .../grafana_dashboards/rivet-logs.json | 0 .../grafana_dashboards/rivet-sql.json | 0 .../grafana_dashboards/traefik-services.json | 0 infra/tf/grafana/providers.tf | 10 ++ infra/tf/grafana/vars.tf | 23 +++ infra/tf/k8s_infra/grafana.tf | 136 ----------------- infra/tf/k8s_infra/prometheus.tf | 2 - .../core/src/dep/terraform/remote_states.rs | 5 +- lib/bolt/core/src/tasks/infra/mod.rs | 25 ++-- 21 files changed, 202 insertions(+), 149 deletions(-) create mode 100644 infra/tf/grafana/grafana.tf rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/cache.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/chirp-api.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/chirp-operation.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/chirp-perf-spans.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/chirp-service.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/node-exporter-full.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/node-exporter-multiple.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/provisioning.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/resource-allocations.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/rivet-logs.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/rivet-sql.json (100%) rename infra/tf/{k8s_infra => grafana}/grafana_dashboards/traefik-services.json (100%) create mode 100644 infra/tf/grafana/providers.tf create mode 100644 infra/tf/grafana/vars.tf diff --git a/infra/tf/cockroachdb_managed/main.tf b/infra/tf/cockroachdb_managed/main.tf index 74a4155886..b41d59a767 100644 --- a/infra/tf/cockroachdb_managed/main.tf +++ b/infra/tf/cockroachdb_managed/main.tf @@ -54,7 +54,10 @@ data "cockroach_cluster_cert" "main" { } resource "kubernetes_config_map" "crdb_ca" { - for_each = toset(["rivet-service", "bolt"]) + for_each = toset(flatten([ + ["rivet-service", "bolt"], + var.prometheus_enabled ? ["grafana"] : [] + ])) metadata { name = "crdb-ca" diff --git a/infra/tf/cockroachdb_managed/vars.tf b/infra/tf/cockroachdb_managed/vars.tf index edc72063a9..3352f580d2 100644 --- a/infra/tf/cockroachdb_managed/vars.tf +++ b/infra/tf/cockroachdb_managed/vars.tf @@ -17,3 +17,7 @@ variable "cockroachdb_request_unit_limit" { variable "cockroachdb_storage_limit" { type = string } + +variable "prometheus_enabled" { + type = bool +} diff --git a/infra/tf/grafana/grafana.tf b/infra/tf/grafana/grafana.tf new file mode 100644 index 0000000000..cae1e286c3 --- /dev/null +++ b/infra/tf/grafana/grafana.tf @@ -0,0 +1,141 @@ +locals { + service_grafana = lookup(var.services, "grafana", { + count = 1 + resources = { + cpu = 500 + memory = 512 + } + }) + + grafana_dashboards = { + for f in fileset("${path.module}/grafana_dashboards/", "*.json"): + "${trimsuffix(f, ".json")}" => { + body = file("${path.module}/grafana_dashboards/${f}") + } + } + + crdb_host = "${try(data.terraform_remote_state.cockroachdb_k8s.outputs.host, data.terraform_remote_state.cockroachdb_managed.outputs.host)}:${try(data.terraform_remote_state.cockroachdb_k8s.outputs.port, data.terraform_remote_state.cockroachdb_managed.outputs.port)}" +} + +module "crdb_user_grafana_secrets" { + source = "../modules/secrets" + + keys = [ "crdb/user/grafana/username", "crdb/user/grafana/password" ] +} + +resource "helm_release" "grafana" { + name = "grafana" + namespace = "grafana" + repository = "https://grafana.github.io/helm-charts" + chart = "grafana" + version = "7.3.9" + values = [yamlencode({ + "grafana.ini" = { + auth = { + disable_login_form = true + } + "auth.anonymous" = { + enabled = true + org_role = "Admin" + } + } + + resources = var.limit_resources ? { + limits = { + memory = "${local.service_grafana.resources.memory}Mi" + cpu = "${local.service_grafana.resources.cpu}m" + } + } : null + + datasources = { + "datasources.yaml" = { + apiVersion = 1 + + datasources = [ + { + name = "Prometheus" + type = "prometheus" + uid = "prometheus" + url = "http://prometheus-kube-prometheus-prometheus.prometheus:9090/" + access = "proxy" + isDefault = true + jsonData = { + httpMethod = "POST" + # prometheus.prometheusSpec.scrapeInterval + timeInterval = "30s" + } + }, + { + name = "Loki" + type = "loki" + uid = "loki" + url = "http://loki-gateway.loki.svc.cluster.local:80/" + access = "proxy" + jsonData = {} + }, + { + name = "CockroachDB" + type = "postgres" + uid = "crdb" + url = local.crdb_host + user = module.crdb_user_grafana_secrets.values["crdb/user/grafana/username"] + secureJsonData = { + password = module.crdb_user_grafana_secrets.values["crdb/user/grafana/password"] + } + jsonData = { + sslmode = "verify-ca" + sslRootCertFile = "/local/crdb/ca.crt" + } + secret = true + } + ] + } + } + + extraConfigmapMounts = [ + # TLS Cert for postgres datasource + { + name = "crdb-ca" + configMap = "crdb-ca" + mountPath = "/local/crdb/ca.crt" + subPath = "ca.crt" + readOnly = true + } + ] + + sidecar = { + dashboards = { + enabled = true + } + } + + serviceMonitor = { + enabled = true + path = "/metrics" + labels = {} + + interval = "" + scheme = "http" + tlsConfig = {} + scrapeTimeout = "15s" + + relabelings = [] + } + })] +} + +resource "kubernetes_config_map" "grafana_dashboard" { + for_each = local.grafana_dashboards + + metadata { + namespace = "grafana" + name = "grafana-rivet-${each.key}" + labels = { + grafana_dashboard = "1" + } + } + + data = { + "${each.key}.json" = each.value.body + } +} diff --git a/infra/tf/k8s_infra/grafana_dashboards/cache.json b/infra/tf/grafana/grafana_dashboards/cache.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/cache.json rename to infra/tf/grafana/grafana_dashboards/cache.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-api.json b/infra/tf/grafana/grafana_dashboards/chirp-api.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-api.json rename to infra/tf/grafana/grafana_dashboards/chirp-api.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-operation.json b/infra/tf/grafana/grafana_dashboards/chirp-operation.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-operation.json rename to infra/tf/grafana/grafana_dashboards/chirp-operation.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-perf-spans.json b/infra/tf/grafana/grafana_dashboards/chirp-perf-spans.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-perf-spans.json rename to infra/tf/grafana/grafana_dashboards/chirp-perf-spans.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/chirp-service.json b/infra/tf/grafana/grafana_dashboards/chirp-service.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/chirp-service.json rename to infra/tf/grafana/grafana_dashboards/chirp-service.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/node-exporter-full.json b/infra/tf/grafana/grafana_dashboards/node-exporter-full.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/node-exporter-full.json rename to infra/tf/grafana/grafana_dashboards/node-exporter-full.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/node-exporter-multiple.json b/infra/tf/grafana/grafana_dashboards/node-exporter-multiple.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/node-exporter-multiple.json rename to infra/tf/grafana/grafana_dashboards/node-exporter-multiple.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/provisioning.json b/infra/tf/grafana/grafana_dashboards/provisioning.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/provisioning.json rename to infra/tf/grafana/grafana_dashboards/provisioning.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/resource-allocations.json b/infra/tf/grafana/grafana_dashboards/resource-allocations.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/resource-allocations.json rename to infra/tf/grafana/grafana_dashboards/resource-allocations.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/rivet-logs.json b/infra/tf/grafana/grafana_dashboards/rivet-logs.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/rivet-logs.json rename to infra/tf/grafana/grafana_dashboards/rivet-logs.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/rivet-sql.json b/infra/tf/grafana/grafana_dashboards/rivet-sql.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/rivet-sql.json rename to infra/tf/grafana/grafana_dashboards/rivet-sql.json diff --git a/infra/tf/k8s_infra/grafana_dashboards/traefik-services.json b/infra/tf/grafana/grafana_dashboards/traefik-services.json similarity index 100% rename from infra/tf/k8s_infra/grafana_dashboards/traefik-services.json rename to infra/tf/grafana/grafana_dashboards/traefik-services.json diff --git a/infra/tf/grafana/providers.tf b/infra/tf/grafana/providers.tf new file mode 100644 index 0000000000..f4a763006c --- /dev/null +++ b/infra/tf/grafana/providers.tf @@ -0,0 +1,10 @@ +provider "kubernetes" { + config_path = var.kubeconfig_path +} + +provider "helm" { + kubernetes { + config_path = var.kubeconfig_path + } +} + diff --git a/infra/tf/grafana/vars.tf b/infra/tf/grafana/vars.tf new file mode 100644 index 0000000000..e338af62c0 --- /dev/null +++ b/infra/tf/grafana/vars.tf @@ -0,0 +1,23 @@ +variable "namespace" { + type = string +} + +# MARK: Services +variable "services" { + type = map(object({ + count = number + resources = object({ + cpu = number + memory = number + }) + })) +} + +# MARK: K8s +variable "kubeconfig_path" { + type = string +} + +variable "limit_resources" { + type = bool +} diff --git a/infra/tf/k8s_infra/grafana.tf b/infra/tf/k8s_infra/grafana.tf index 248ba37fc1..2a3346ee3e 100644 --- a/infra/tf/k8s_infra/grafana.tf +++ b/infra/tf/k8s_infra/grafana.tf @@ -1,20 +1,3 @@ -locals { - service_grafana = lookup(var.services, "grafana", { - count = 1 - resources = { - cpu = 500 - memory = 512 - } - }) - - grafana_dashboards = { - for f in fileset("${path.module}/grafana_dashboards/", "*.json"): - "${trimsuffix(f, ".json")}" => { - body = file("${path.module}/grafana_dashboards/${f}") - } - } -} - resource "kubernetes_namespace" "grafana" { count = var.prometheus_enabled ? 1 : 0 @@ -23,122 +6,3 @@ resource "kubernetes_namespace" "grafana" { } } -resource "helm_release" "grafana" { - count = var.prometheus_enabled ? 1 : 0 - depends_on = [helm_release.vpa] - - name = "grafana" - namespace = kubernetes_namespace.grafana.0.metadata.0.name - repository = "https://grafana.github.io/helm-charts" - chart = "grafana" - version = "7.3.9" - values = [yamlencode({ - "grafana.ini" = { - auth = { - disable_login_form = true - } - "auth.anonymous" = { - enabled = true - org_role = "Admin" - } - } - - resources = var.limit_resources ? { - limits = { - memory = "${local.service_grafana.resources.memory}Mi" - cpu = "${local.service_grafana.resources.cpu}m" - } - } : null - - datasources = { - "datasources.yaml" = { - apiVersion = 1 - - datasources = [ - { - name = "Prometheus" - type = "prometheus" - uid = "prometheus" - url = "http://prometheus-kube-prometheus-prometheus.prometheus:9090/" - access = "proxy" - isDefault = true - jsonData = { - httpMethod = "POST" - # prometheus.prometheusSpec.scrapeInterval - timeInterval = "30s" - } - }, - { - name = "Loki" - type = "loki" - uid = "loki" - url = "http://loki-gateway.loki.svc.cluster.local:80/" - access = "proxy" - jsonData = {} - }, - { - name = "CockroachDB" - type = "postgres" - uid = "crdb" - url = local.crdb_host - user = module.crdb_user_grafana_secrets.values["crdb/user/grafana/username"] - secureJsonData = { - password = module.crdb_user_grafana_secrets.values["crdb/user/grafana/password"] - } - jsonData = { - sslmode = "verify-ca" - sslRootCertFile = "/local/crdb/ca.crt" - } - secret = true - } - ] - } - } - - extraConfigmapMounts = [ - # TLS Cert for postgres datasource - { - name = kubernetes_config_map.crdb_ca["grafana"].metadata.0.name - configMap = "crdb-ca" - mountPath = "/local/crdb/ca.crt" - subPath = "ca.crt" - readOnly = true - } - ] - - sidecar = { - dashboards = { - enabled = true - } - } - - serviceMonitor = { - enabled = true - path = "/metrics" - labels = {} - - interval = "" - scheme = "http" - tlsConfig = {} - scrapeTimeout = "15s" - - relabelings = [] - } - })] -} - -resource "kubernetes_config_map" "grafana_dashboard" { - for_each = var.prometheus_enabled ? local.grafana_dashboards : {} - - metadata { - namespace = kubernetes_namespace.grafana.0.metadata.0.name - name = "grafana-rivet-${each.key}" - labels = { - grafana_dashboard = "1" - } - } - - data = { - "${each.key}.json" = each.value.body - } -} diff --git a/infra/tf/k8s_infra/prometheus.tf b/infra/tf/k8s_infra/prometheus.tf index e8dd15b03f..1130cc18cd 100644 --- a/infra/tf/k8s_infra/prometheus.tf +++ b/infra/tf/k8s_infra/prometheus.tf @@ -63,8 +63,6 @@ locals { ] }] : [] ]) - - crdb_host = "${try(data.terraform_remote_state.cockroachdb_k8s.outputs.host, data.terraform_remote_state.cockroachdb_managed.outputs.host)}:${try(data.terraform_remote_state.cockroachdb_k8s.outputs.port, data.terraform_remote_state.cockroachdb_managed.outputs.port)}" } module "alertmanager_secrets" { diff --git a/lib/bolt/core/src/dep/terraform/remote_states.rs b/lib/bolt/core/src/dep/terraform/remote_states.rs index cad0450c9c..6676ab23ac 100644 --- a/lib/bolt/core/src/dep/terraform/remote_states.rs +++ b/lib/bolt/core/src/dep/terraform/remote_states.rs @@ -21,7 +21,6 @@ pub fn dependency_graph(_ctx: &ProjectContext) -> HashMap<&'static str, Vec vec![ RemoteStateBuilder::default().plan_id("cockroachdb_k8s").build().unwrap(), - RemoteStateBuilder::default().plan_id("cockroachdb_managed").build().unwrap(), ], "cockroachdb_managed" => vec![ RemoteStateBuilder::default().plan_id("k8s_cluster_aws").build().unwrap(), @@ -35,6 +34,10 @@ pub fn dependency_graph(_ctx: &ProjectContext) -> HashMap<&'static str, Vec vec![ RemoteStateBuilder::default().plan_id("dns").build().unwrap(), ], + "grafana" => vec![ + RemoteStateBuilder::default().plan_id("cockroachdb_k8s").build().unwrap(), + RemoteStateBuilder::default().plan_id("cockroachdb_managed").build().unwrap(), + ], } } diff --git a/lib/bolt/core/src/tasks/infra/mod.rs b/lib/bolt/core/src/tasks/infra/mod.rs index c78c5166a2..d5ee08ba88 100644 --- a/lib/bolt/core/src/tasks/infra/mod.rs +++ b/lib/bolt/core/src/tasks/infra/mod.rs @@ -126,6 +126,15 @@ pub fn build_plan( } } + // Kubernetes + plan.push(PlanStep { + name_id: "k8s-infra", + kind: PlanStepKind::Terraform { + plan_id: "k8s_infra".into(), + needs_destroy: false, + }, + }); + // CockroachDB match ctx.ns().cockroachdb.provider { ns::CockroachDBProvider::Kubernetes {} => { @@ -148,15 +157,6 @@ pub fn build_plan( } } - // Kubernetes - plan.push(PlanStep { - name_id: "k8s-infra", - kind: PlanStepKind::Terraform { - plan_id: "k8s_infra".into(), - needs_destroy: false, - }, - }); - if ctx.tls_enabled() { // TLS plan.push(PlanStep { @@ -225,6 +225,13 @@ pub fn build_plan( // Vector if ctx.ns().prometheus.is_some() { + plan.push(PlanStep { + name_id: "grafana", + kind: PlanStepKind::Terraform { + plan_id: "grafana".into(), + needs_destroy: false, + }, + }); plan.push(PlanStep { name_id: "vector", kind: PlanStepKind::Terraform {