diff --git a/README.md b/README.md index 3ed7501..7293c8c 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,29 @@ juju config blackbox-exporter-k8s \ probes_file='@path/to/probes.yml' ``` -Note that the `relabel_configs` of each scrape job doesn't need to be specified, and will be +Note that the `relabel_configs` of each scrape job doesn't need to be specified, and will be overridden by the charm with the needed labels and the correct Blackbox Exporter url. +#### Dynamic Configuration + +The list of probes and the list of modules for probing can also be changed dynamically from other charms. +The charm offers a relation to allow charms to forward custom probe spec to Blackbox Exporter. Those are exported over the probes relation using the blackbox_exporter_probes interface: + +```shell +requires: + probes: + interface: blackbox_exporter_probes +``` + +The cusrom probes provided via relation data are merged with the probes defined in a configuration file, same with the modules which are integrated in the blackbox-config file. +In order for the charm defined probes to be probed via this charm all that is required is to relate the two charms with: + +```shell +juju relate blackbox:probes +``` + +Charms that seek to provide probes for Blackbox Exporter, can do so using the provided blackbox_exporter_probes charm library. This library ensures that probes and modules defined by a charm are forwarded correctly to Prometheus, and the metrics displayed in the associated Grafana Dashboard. + ## OCI Images This charm is published on Charmhub with blackbox exporter images from the official [quay.io/prometheus/blackbox-exporter]. diff --git a/lib/charms/blackbox_k8s/v0/blackbox_probes.py b/lib/charms/blackbox_k8s/v0/blackbox_probes.py new file mode 100644 index 0000000..abfb336 --- /dev/null +++ b/lib/charms/blackbox_k8s/v0/blackbox_probes.py @@ -0,0 +1,759 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Blackbox Exporter Probes Library. + +## Overview + +This document explains how to integrate with the Blackbox Exporter charm +for the purpose of providing a probes metrics endpoint to Prometheus. + +## Provider Library Usage + +The Blackbox Exporter charm interacts with its datasources using this charm +library. +Charms seeking to expose probes for Blackbox, may do so +using the `BlackboxProbesProvider` object from this charm library. +For the simplest use cases, the BlackboxProbesProvider object requires +to be instantiated with a list of jobs with the endpoints to monitor. +A probe in blackbox is defined by a module and a static_config target. Those +are then organised in a prometheus job for proper scraping. +The `BlackboxProbesProvider` constructor requires +the name of the relation over which a probe target +is exposed to the Blakcbox Exporter charm. This relation must use the +`blackbox_exporter_probes` interface. +The default name for the metrics endpoint relation is +`blackbox-probes`. It is strongly recommended to use the same +relation name for consistency across charms and doing so obviates the +need for an additional constructor argument. The +`BlackboxProbesProvider` object may be instantiated as follows: + + from charms.blackbox_k8s.v0.blackbox_probes import BlackboxProbesProvider + + def __init__(self, *args): + super().__init__(*args) + ... + self.probes_provider = BlackboxProbesProvider( + self, + probes=[{ + 'params': {'module': ['http_2xx']}, + 'static_configs': [ + {'targets': ['http://endpoint.com']} + ] + }] + ) + ... + +Note that the first argument (`self`) to `BlackboxProbesProvider` is +always a reference to the parent charm. + +A `BlackboxProbesProvider` object will ensure that +the list of probes are provided to Blackbox which will , +which will export them to Prometheus for scraping. +The list of probes is provided via the constructor argument `probes`. +The probes argument represents a necessary subset (module and static_configs) of a +Prometheus scrape job using Python standard data structures. +This job specification is a subset of Prometheus' own +[scrape +configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) +format but represented using Python data structures. More than one probe job +may be provided using the `probes` argument. Hence `probes` accepts a list +of dictionaries where each dictionary represents a subset of a `` +object. The currently supported configuration subset is: `job_name`, `params`, +`static_configs`. + +Suppose a client charm might want to monitor a particular service +via the http_2xx module. +This may be done by providing the following data +structure as the value of `probes`. + +``` +[ + { + 'params': { + 'module': ['http_2xx'] + }, + "static_configs": [ + { + "targets": ["http://endpoint.com"] + } + ] + } +] +``` + + +It is also possible to add labels to the given probes as such: +``` +[ + { + 'params': { + 'module': ['http_2xx'] + }, + 'static_configs': [ + { + 'targets': [address], + 'labels': {'name': endpoint-a} + } + ] + } +] + +Multiple jobs with different probes and labels are allowed, but +each job must be given a unique name: + +``` +[ + { + "job_name": "blackbox-icmp-job", + 'params': { + 'module': ['http_2xx'] + }, + 'static_configs': [ + { + 'targets': [address], + 'labels': {'name': endpoint-a} + } + ] + }, + { + "job_name": "blackbox-http-2xx", + 'params': { + 'module': ['icmp'] + }, + 'static_configs': [ + { + 'targets': [address], + 'labels': {'name': endpoint-a} + } + ] + } +] +``` + +It is also possible for the client charm to define new probing modules. +This is achieved by providing the `BlackboxProbesProvider` +constructor an optional argument (`modules`), that represents a blackbox module. +For details on how to write pass a module, see the +[docs upstream](https://github.com/prometheus/blackbox_exporter/blob/master/CONFIGURATION.md). +Further examples are provided [upstream](https://github.com/prometheus/blackbox_exporter/blob/master/example.yml). +An example of defining a module is: + +modules={ + "http_2xx_longer_timeout": { + "prober": "http" + "timeout": "30s" # default is 5s + } + } + +## Consumer Library Usage + +The `BlackboxProbesRequirer` object may be used by the Blackbox Exporter +charms to retrieve the probes to be monitored. For this +purposes a Blackbox Exporter charm needs to do two things: + +1. Instantiate the `BlackboxProbesRequirer` object by providing it a +reference to the parent (Blackbox Exporter) charm and, optionally, the name of +the relation that the Blackbox Exporter charm uses to interact with probes +targets. This relation must conform to the `blackbox_exporter_probes` +interface and it is strongly recommended that this relation be named +`probes` which is its default value. + +For example a Blackbox Exporter may instantiate the +`BlackboxProbesRequirer` in its constructor as follows + + from charms.blackbox_k8s.v0.blackbox_probes import BlackboxProbesRequirer + + def __init__(self, *args): + super().__init__(*args) + ... + self.probes_requirer = BlackboxProbesRequirer( + charm=self, + relation_name="probes", + ) + ... + +The probes requirer must be instantiated before the prometheus_scrape MetricsEndpoint Provider, +because Blackbox defines new metrics endpoints to send to Prometheus. + +2. A Blackbox Exporter charm also needs to respond to the +`TargetsChangedEvent` event of the `BlackboxProbesRequirer` by adding itself as +an observer for these events, as in + + self.framework.observe( + self.probes_requirer.on.targets_changed, + self._on_scrape_targets_changed, + ) + +In responding to the `TargetsChangedEvent` event the Blackbox Exporter +charm must update its configuration so that any new probe +is added and/or old ones removed from the list. +For this purpose the `BlackboxProbesRequirer` object +exposes a `probes()` method that returns a list of probes jobs. Each +element of this list is a probes configuration to be added to the list of jobs for +Prometheus to monitor. +Same goes for the list of client charm defined modules. The `BlackboxProbesRequirer` object +exposes a `modules()` method that returns a dict of the new modules to be added to the +Blackbox configuration file. +""" + +import logging +import json +import copy +import hashlib +from typing import Dict, List, Optional, Union, MutableMapping + +from ops import Object +from ops.charm import CharmBase +from cosl import JujuTopology +from ops.model import ModelError +from ops.framework import ( + BoundEvent, + EventBase, + EventSource, + Object, + ObjectEvents, + StoredDict, + StoredList, + StoredState, +) +from ops.model import ( + StatusBase, + ActiveStatus, + BlockedStatus, + WaitingStatus, +) +import pydantic +from pydantic import BaseModel, ConfigDict, Field + +# The unique Charmhub library identifier, never change it +LIBID = "857fd3ed0a414dc5a141b7d6818a883d" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 1 + +PYDEPS = ["pydantic"] + +logger = logging.getLogger(__name__) + +DEFAULT_RELATION_NAME = "probes" + +class DataValidationError(Exception): + """Raised when data validation fails on IPU relation data.""" + +class DatabagModel(BaseModel): + """Base databag model.""" + + model_config = ConfigDict( + # ignore any extra fields in the databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, # type: ignore + ) + """Pydantic config.""" + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") # type: ignore + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.__fields__.items()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump() # type: ignore + for key, field in self.model_fields.items(): # type: ignore + value = dct[key] + if value == field.default: + continue + databag[field.alias or key] = json.dumps(value) + + return databag + +class ProbesStaticConfigModel(BaseModel): + class Config: + extra = "allow" + + targets: List[str] = Field( + description='List of probes targets.' + ) + labels: Optional[Dict[str, str]] = Field( + description="Optional labels for the scrape targets", default=None + ) + +class ProbesJobModel(BaseModel): + class Config: + extra = "allow" + + job_name: Optional[str] = Field( + description="Name of the Prometheus scrape job, each job must be given a unique name & should be a fixed string (e.g. hardcoded literal)", + default=None, + ) + metrics_path: Optional[str] = Field( + description="Path for metrics scraping.", default=None + ) + params: Dict[str, List[str]] = Field( + description="Module for probing targets." + ) + static_configs: List[ProbesStaticConfigModel] = Field( + description="List of static configurations to probe." + ) + +class ListProbesModel(BaseModel): + probes: List[ProbesJobModel] + +class ModuleConfig(BaseModel): + class Config: + extra = "allow" + + prober: str = Field(description="Module prober.") + +class ScrapeMetadataModel(BaseModel): + class Config: + extra = "allow" + + model: str = Field(description="Juju model name.") + model_uuid: str = Field(description="Juju model UUID.", alias="model_uuid") + application: str = Field(description="Juju application name.") + unit: str = Field(description="Juju unit name.") + +class ApplicationDataModel(DatabagModel): + scrape_metadata: ScrapeMetadataModel = Field( + description="Metadata providing information about the Juju topology." + ) + scrape_probes: List[ProbesJobModel] = Field( + description="List of scrape job configurations specifying static probes targets." + ) + scrape_modules: Optional[Dict[str, ModuleConfig]] = Field( + description="List of custom blackbox probing modules." + ) + +class InvalidProbeEvent(EventBase): + """Event emitted when alert rule files are not valid.""" + + def __init__(self, handle, errors: str = ""): + super().__init__(handle) + self.errors = errors + + def snapshot(self) -> Dict: + """Save error information.""" + return {"errors": self.errors} + + def restore(self, snapshot): + """Restore error information.""" + self.errors = snapshot["errors"] + + +class BlackboxProbesProvider(Object): + """A provider object for Blackbox Exporter probes.""" + + _stored = StoredState() + + def __init__( + self, + charm: CharmBase, + probes: List[Dict], + modules: Optional[Dict] = None, + refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, + relation_name: str = DEFAULT_RELATION_NAME, + ): + super().__init__(charm, relation_name) + self._stored.set_default( + errors=[], + ) + """Construct a Blackbox Exporter client. + + Charms seeking to expose metric endpoints to be probed via Blackbox, must do so + using the `BlackboxProbesProvider` object from this charm library. + For the simplest use cases, the BlackboxProbesProvider object requires + to be instantiated with a list of jobs with the endpoints to monitor. + A probe in blackbox is defined by a module and a static_config target. Those + are then organised in a prometheus job for proper scraping. + The `BlackboxProbesProvider` constructor requires + the name of the relation over which a probe target + is exposed to the Blakcbox Exporter charm. + + Args: + charm: a `CharmBase` object which manages the + `BlackboxProbesProvider` object. Generally, this is `self` in + the instantiating class. + probes: the probes to configure in Blackbox Exporter passed as a list + of configuration probes in python structures. + modules: an optional definition of modules for Blackbox Exporter to + use. For details on how to write pass a module, see the + [docs upstream](https://github.com/prometheus/blackbox_exporter/blob/master/CONFIGURATION.md). + Further examples are provided [upstream](https://github.com/prometheus/blackbox_exporter/blob/master/example.yml). + refresh_event: additional `CharmEvents` event (or a list of them) + on which the probes and modules should be updated. + relation_name: name of the relation providing the Blackbox Probes + service. It's recommended to not change it, to ensure a + consistent experience across all charms that use the library. + """ + self.topology = JujuTopology.from_charm(charm) + self._charm = charm + self._relation_name = relation_name + + self._probes = [] if probes is None else copy.deepcopy(probes) + self._modules = {} if modules is None else copy.deepcopy(modules) + + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_changed, self._set_probes_spec) + + if not refresh_event: + if len(self._charm.meta.containers) == 1: + container = list(self._charm.meta.containers.values())[0] + refresh_event = [self._charm.on[container.name.replace("-", "_")].pebble_ready] + else: + refresh_event = [] + elif not isinstance(refresh_event, list): + refresh_event = [refresh_event] + + # always include leader elected event, so that the probes are correctly updated on new leaders + refresh_event.append(self._charm.on.leader_elected) + + module_name_prefix = f"juju_{self.topology.identifier}" + self._prefix_probes(module_name_prefix) + self._prefix_modules(module_name_prefix) + + self.framework.observe(events.relation_joined, self._set_probes_spec) + for ev in refresh_event: + self.framework.observe(ev, self._set_probes_spec) + + def _set_probes_spec(self, _=None): + """Ensure probes target information is made available to Blackbox Exporter. + + When a probes provider charm is related to a blackbox exporter charm, the + probes provider sets specification and metadata related to it. + These information are set using Juju application data, since probes are not + tied to a specific unit. + """ + if not self._charm.unit.is_leader(): + return + + errors = [] + for relation in self._charm.model.relations[self._relation_name]: + try: + databag = ApplicationDataModel( + scrape_metadata=self._scrape_metadata, + scrape_probes=self._probes, + scrape_modules=self._modules, + ).dump(relation.data[self._charm.app]) + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + error_message = f"encountered error {e} while attempting to update_relation_data." \ + f"The relation must be gone." + errors.append(error_message) + continue + raise + except pydantic.ValidationError as e: + logger.error("Invalid probes provided") + error_message = f"Invalid probes provided in relation {relation.id}: {e}" + errors.append(error_message) + + self._stored.errors = errors + + def _prefix_probes(self, prefix: str) -> None: + """Prefix the probes job_names and the probe_modules with the charm metadata. + + The probe module will be prefixed only if it is a custom module defined by the + provider and so present in the modules member. + + Args: + prefix: the prefix as a string derived from the juju topology identifier + """ + for probe in self._probes: + probe["job_name"] = "_".join(filter(None, [prefix, probe.get("job_name")])) + probe_module = probe.get("params", {}).get("module", []) + for module in probe_module: + if module in self._modules: + prefixed_module_value = f"{prefix}_{module}" + probe['params']['module'].append(prefixed_module_value) + + def _prefix_modules(self, prefix: str) -> None: + """Prefix the modules with the charm metadata.""" + self._modules = {f"{prefix}_{key}": value for key, value in self._modules.items()} + + def get_status(self) -> StatusBase: + """Collect the status of probes and errors from stored state. + + Returns: + StatusBase: Status representing the state of the probes collection. + + - ActiveStatus: All probes are valid and ready to be used. + - BlockedStatus: Errors occurred during probes parsing and require intervention. + """ + if self._stored.errors: + error_messages = "; ".join(self._stored.errors) + return BlockedStatus(f"Errors occurred in probe configuration") + return ActiveStatus() + + @property + def _scrape_metadata(self) -> dict: + """Generate scrape metadata. + + Returns: + Scrape configuration metadata for this metrics provider charm. + """ + return self.topology.as_dict() + +class TargetsChangedEvent(EventBase): + """Event emitted when Blackbox Exporter scrape targets change.""" + + def __init__(self, handle, relation_id): + super().__init__(handle) + self.relation_id = relation_id + + def snapshot(self): + """Save scrape target relation information.""" + return {"relation_id": self.relation_id} + + def restore(self, snapshot): + """Restore scrape target relation information.""" + self.relation_id = snapshot["relation_id"] + +class MonitoringEvents(ObjectEvents): + """Event descriptor for events raised by `BlackboxProbesRequirer`.""" + + targets_changed = EventSource(TargetsChangedEvent) + +def _type_convert_stored(obj): + """Convert Stored* to their appropriate types, recursively.""" + if isinstance(obj, StoredList): + return list(map(_type_convert_stored, obj)) + if isinstance(obj, StoredDict): + rdict = {} # type: Dict[Any, Any] + for k in obj.keys(): + rdict[k] = _type_convert_stored(obj[k]) + return rdict + return obj + + +class BlackboxProbesRequirer(Object): + """A requirer object for Blackbox Exporter probes.""" + + on = MonitoringEvents() # pyright: ignore + _stored = StoredState() + + def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME): + """"A requirer object for Blackbox Exporter probes. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Blackbox Exporter service. + relation_name: an optional string name of the relation between `charm` + and the Blackbox Exporter charmed service. The default is "probes". + """ + + super().__init__(charm, relation_name) + self._stored.set_default( + scrape_probes=[], + blackbox_scrape_modules={}, + errors=[], + probes_need_update=True, + modules_need_update=True, + ) + self._charm = charm + self._relation_name = relation_name + events = self._charm.on[relation_name] + self.framework.observe(events.relation_changed, self._on_probes_provider_relation_changed) + self.framework.observe( + events.relation_departed, self._on_probes_provider_relation_departed + ) + + def _on_probes_provider_relation_changed(self, event): + """Handle changes with related probes providers. + + Anytime there are changes in relations between Blackbox Exporter + and probes provider charms the Blackbox Exporter charm is informed, + through a `TargetsChangedEvent` event. The Blackbox Exporter charm can + then choose to update its scrape configuration. + + Args: + event: a `CharmEvent` in response to which the Blackbox Exporter + charm must update its scrape configuration. + """ + rel_id = event.relation.id + self._stored.probes_need_update = True + self._stored.modules_need_update = True + self.on.targets_changed.emit(relation_id=rel_id) + + def _on_probes_provider_relation_departed(self, event): + """Update job config when a probes provider departs. + + When a probes provider departs the Blackbox Exporter charm is informed + through a `TargetsChangedEvent` event so that it can update its + scrape configuration to ensure that the departed probes provider + is removed from the list of scrape jobs. + + Args: + event: a `CharmEvent` that indicates a probes provider + unit has departed. + """ + rel_id = event.relation.id + self._stored.probes_need_update = True + self._stored.modules_need_update = True + self.on.targets_changed.emit(relation_id=rel_id) + + def get_status(self) -> StatusBase: + """Collect the status of probes and errors from stored state. + + Returns: + StatusBase: Status representing the state of the probes collection. + + - ActiveStatus: All probes are valid and ready to be used. + - BlockedStatus: Errors occurred during probes parsing and require intervention. + - WaitingStatus: Probes are still being fetched or processed. + """ + if self._stored.errors: + error_messages = "; ".join(self._stored.errors) + return BlockedStatus(f"Errors occurred in probe configuration: {error_messages}") + if self._stored.needs_update: + return WaitingStatus("Probes are being updated, please wait.") + return ActiveStatus() + + def _process_and_hash_probes(self, databag): + """Extend and hash probes in one pass to make them unique.""" + scrape_probes_hashed = [] + unique_hashes = set() + for probe in databag.scrape_probes: + probe_data = probe.model_dump() + + probe_str = str(probe_data) + probe_hash = hashlib.sha256(probe_str.encode()).hexdigest() + + job_name = probe_data.get("job_name", "") + probe_data["job_name"] = f"{job_name}_{probe_hash}" + + if probe_hash not in unique_hashes: + scrape_probes_hashed.append(probe_data) + unique_hashes.add(probe_hash) + + return scrape_probes_hashed + + def _update_probes(self): + """Update the cache of probes and errors by iterating over relation data.""" + scrape_probes = [] + errors = [] + + for relation in self._charm.model.relations[self._relation_name]: + try: + if not relation.data[relation.app]: + continue + databag = ApplicationDataModel.load(relation.data[relation.app]) + scrape_probes = self._process_and_hash_probes(databag) + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError) as e: + error_message = f"Invalid probes provided in relation {relation.id}: {e}" + errors.append(error_message) + + self._stored.scrape_probes = scrape_probes + self._stored.errors = errors + self._stored.probes_need_update = False + + return scrape_probes + + def probes(self) -> list: + """Fetch the list of probes to scrape, if they need update + + Returns: + A list consisting of all the static probes configurations + for each related `BlackboxExporterProvider'. + """ + + if self._stored.probes_need_update: + self._update_probes() + + probes = [] + _type_convert_stored( + self._stored.scrape_probes # pyright: ignore + ) + return probes + + def _update_modules(self) -> dict: + """Fetch the dict of blackbox modules to configure. + + Returns: + A dict consisting of all the modueles configurations + for each related `BlackboxExporterProvider`. + """ + blackbox_scrape_modules = {} + errors = [] + + for relation in self._charm.model.relations[self._relation_name]: + try: + if not relation.data[relation.app]: + continue + databag = ApplicationDataModel.load(relation.data[relation.app]) + blackbox_scrape_modules = databag.dict(exclude_unset=True)["scrape_modules"] + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError) as e: + error_message = f"Invalid blackbox module provided in relation {relation.id}: {e}" + errors.append(error_message) + + self._stored.blackbox_scrape_modules = blackbox_scrape_modules + self._stored.errors = errors + self._stored.modules_need_update = False + + return blackbox_scrape_modules + + def modules(self) -> dict: + """Fetch the dict of blackbox modules to configure. + + Returns: + A dict consisting of all the modueles configurations + for each related `BlackboxExporterProvider`. + """ + + if self._stored.modules_need_update: + self._update_modules() + + modules = {} + modules.update(_type_convert_stored(self._stored.blackbox_scrape_modules)) + + return modules diff --git a/metadata.yaml b/metadata.yaml index 5ead2d1..47ccdea 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -47,3 +47,7 @@ requires: limit: 1 catalogue: interface: catalogue + probes: + interface: blackbox_exporter_probes + description: | + Receive a list of probes (server address and probing method) from the provider to query. diff --git a/src/charm.py b/src/charm.py index ff190f6..77de312 100755 --- a/src/charm.py +++ b/src/charm.py @@ -10,6 +10,7 @@ from urllib.parse import urlparse import yaml +from charms.blackbox_k8s.v0.blackbox_probes import BlackboxProbesRequirer from charms.catalogue_k8s.v1.catalogue import CatalogueConsumer, CatalogueItem from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider from charms.loki_k8s.v1.loki_push_api import LogForwarder @@ -32,6 +33,7 @@ from ops.pebble import PathError, ProtocolError from blackbox import ConfigUpdateFailure, WorkloadManager +from scrape_config_builder import ScrapeConfigBuilder logger = logging.getLogger(__name__) @@ -99,6 +101,15 @@ def __init__(self, *args): self._on_k8s_patch_failed, ) + self._probes_requirer = BlackboxProbesRequirer( + charm=self, + relation_name="probes", + ) + + self.framework.observe( + self._probes_requirer.on.targets_changed, self._on_probes_modules_config_changed + ) + # - Self monitoring and probes self._scraping = MetricsEndpointProvider( self, @@ -107,6 +118,7 @@ def __init__(self, *args): refresh_event=[ self.on.config_changed, self.on.update_status, + self._probes_requirer.on.targets_changed, ], ) self._grafana_dashboard_provider = GrafanaDashboardProvider(charm=self) @@ -114,6 +126,7 @@ def __init__(self, *args): self.framework.observe(self.ingress.on.ready, self._handle_ingress) self.framework.observe(self.ingress.on.revoked, self._handle_ingress) + self.catalog = CatalogueConsumer( charm=self, item=CatalogueItem( @@ -190,6 +203,9 @@ def _common_exit_hook(self) -> None: # Update config file try: self.blackbox_workload.update_config() + self._update_blackbox_config_yaml_given_dict_from_relation( + self._probes_requirer.modules() + ) except ConfigUpdateFailure as e: self.unit.status = BlockedStatus(str(e)) return @@ -230,32 +246,51 @@ def self_scraping_job(self): return [job] + def _update_blackbox_config_yaml_given_dict_from_relation(self, modules: dict) -> None: + """Update the blackbox config yaml given a dict of modules defined in relation. + + This function takes the dict of modules from the BlackboxExporterRequirer and + updates the config file with the required modules, only if they do not exist already + in the config file. + + Raises: + yaml.YAMLError: If there is an error in the YAML formatting or parsing. + TypeError: If type is not a string, as `yaml.safe_load` requires a string input. + """ + if not modules: + return + + config_file_data = self.container.pull(self._config_path).read() + + if not config_file_data: + return + + config_data = yaml.safe_load(config_file_data) + + if "modules" not in config_data: + config_data["modules"] = {} + + for module_name, module_data in modules.items(): + # Add modules from relation data only if they do not exist in the config file + if module_name not in config_data["modules"]: + config_data["modules"][module_name] = module_data + + updated_config_data = yaml.safe_dump(config_data) + self.container.push(self._config_path, updated_config_data) + self.blackbox_workload.reload() + @property def probes_scraping_jobs(self): """The scraping jobs to execute probes from Prometheus.""" - jobs = [] - external_url = urlparse(self._external_url) - probes_path = f"{external_url.path.rstrip('/')}/probe" - probes_scrape_jobs = cast(str, self.model.config.get("probes_file")) - if probes_scrape_jobs: - probes = yaml.safe_load(probes_scrape_jobs) - # Add the Blackbox Exporter's `relabel_configs` to each job - for probe in probes["scrape_configs"]: - # The relabel configs come from the official Blackbox Exporter docs; please refer - # to that for further information on what they do - probe["metrics_path"] = probes_path - probe["relabel_configs"] = [ - {"source_labels": ["__address__"], "target_label": "__param_target"}, - {"source_labels": ["__param_target"], "target_label": "instance"}, - # Copy the scrape job target to an extra label for dashboard usage - {"source_labels": ["__param_target"], "target_label": "probe_target"}, - # Set the address to scrape to the blackbox exporter url - { - "target_label": "__address__", - "replacement": f"{external_url.hostname}", - }, - ] - jobs.append(probe) + # Extract file and relation probes + file_probes_scrape_jobs = cast(str, self.model.config.get("probes_file")) + relation_probes_scrape_jobs = self._probes_requirer.probes() + + builder = ScrapeConfigBuilder(self._external_url) + jobs = builder.build_probes_scraping_jobs( + file_probes=file_probes_scrape_jobs, + relation_probes=relation_probes_scrape_jobs, + ) return jobs @@ -277,6 +312,10 @@ def _on_upgrade_charm(self, _): # the config may need update. Calling the common hook to update. self._common_exit_hook() + def _on_probes_modules_config_changed(self, _): + """Event handler for probes target changed.""" + self._common_exit_hook() + if __name__ == "__main__": main(BlackboxExporterCharm) diff --git a/src/scrape_config_builder.py b/src/scrape_config_builder.py new file mode 100644 index 0000000..4128ff6 --- /dev/null +++ b/src/scrape_config_builder.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Helper class to build scrape configurations for Blackbox Exporter.""" + +from typing import Any, Dict, List +from urllib.parse import urlparse + +import yaml + + +class ScrapeConfigBuilder: + """Helper class to build scrape configurations for Blackbox Exporter.""" + + def __init__(self, external_url: str): + """Initialize the ScrapeConfigBuilder. + + :param external_url: The external URL to be used for constructing probes' `metrics_path` and `relabel_configs`. + """ + self.external_url = external_url + + def merge_scrape_configs( + self, file_probes: Dict[str, Any], relation_probes: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """Merge the scrape_configs from both file and relation. + + Args: + file_probes: data parsed from the "probes_file" configuration, loaded as a dictionary. + Defaults to an empty dictionary if no valid YAML or config entry is found. + relation_probes: a list of dicts probes extracted from a relation. Relation probes job_names + are hashed to ensure uniqueness and avoid conflict. + + Returns: + A list of dicts representing the merged probes from both file and relation data. + """ + merged_scrape_configs = { + probe["job_name"]: probe for probe in file_probes.get("scrape_configs", []) + } + + for probe in relation_probes: + job_name = probe["job_name"] + merged_scrape_configs[job_name] = probe + + return list(merged_scrape_configs.values()) + + def build_probes_scraping_jobs( + self, + file_probes: str, + relation_probes: List[Dict[str, Any]], + ) -> List[Dict[str, Any]]: + """Build list of probes scraping jobs. + + Args: + file_probes: data parsed from the "probes_file" configuration, loaded as a dictionary. + Defaults to an empty dictionary if no valid YAML or config entry is found. + relation_probes: a list of dicts probes extracted from a relation. + + Returns: + A list of scraping jobs with blackbox relabel configs. + """ + external_url = urlparse(self.external_url) + probes_path = f"{external_url.path.rstrip('/')}/probe" + + file_probes_scrape_jobs_dict = yaml.safe_load(file_probes) if file_probes else {} + + merged_scrape_configs = self.merge_scrape_configs( + file_probes_scrape_jobs_dict, relation_probes + ) + + # Add the Blackbox Exporter's `relabel_configs` to each job + for probe in merged_scrape_configs: + probe["metrics_path"] = probes_path + probe["relabel_configs"] = [ + {"source_labels": ["__address__"], "target_label": "__param_target"}, + {"source_labels": ["__param_target"], "target_label": "instance"}, + {"source_labels": ["__param_target"], "target_label": "probe_target"}, + {"target_label": "__address__", "replacement": external_url.hostname}, + ] + + return merged_scrape_configs diff --git a/tests/unit/test_probes_provider.py b/tests/unit/test_probes_provider.py new file mode 100644 index 0000000..32140fe --- /dev/null +++ b/tests/unit/test_probes_provider.py @@ -0,0 +1,195 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +import json +import unittest +from typing import List + +from charms.blackbox_k8s.v0.blackbox_probes import BlackboxProbesProvider +from cosl import JujuTopology +from ops.charm import CharmBase +from ops.framework import StoredState +from ops.model import ( + ActiveStatus, + BlockedStatus, +) +from ops.testing import Harness + +RELATION_NAME = "probes" + +PROVIDER_META = f""" +name: provider-tester +containers: + blackbox-tester: +provides: + {RELATION_NAME}: + interface: blackbox_exporter_probes +""" + + +PROBES: List[dict] = [ + { + "job_name": "my-first-job", + "params": {"module": ["http_2xx"]}, + "static_configs": [ + { + "targets": ["10.1.238.1"], + "labels": {"some_key": "some-value"}, + } + ], + }, + { + "job_name": "my-second-job", + "params": { + "module": ["icmp"], + }, + "static_configs": [ + {"targets": ["10.1.238.1"], "labels": {"some_other_key": "some-other-value"}} + ], + }, +] + +PROBES_NOT_VALID_MISSING_STATIC_CONFIG: List[dict] = [ + { + "job_name": "my-first-job", + "params": {"module": ["http_2xx"]}, + } +] + +PROBES_NOT_VALID_MISSING_MODULE: List[dict] = [ + { + "job_name": "my-first-job", + "static_configs": [ + { + "targets": ["10.1.238.1"], + "labels": {"some_key": "some-value"}, + } + ], + }, +] + +MODULES: dict = { + "http_2xx_longer_timeout": { + "prober": "http", + "timeout": "30s", + } +} + + +class BlackboxProbesProviderCharmWithModules(CharmBase): + def __init__(self, *args, **kwargs): + super().__init__(*args) + + self.provider = BlackboxProbesProvider(self, probes=PROBES, modules=MODULES) + + +class BlackboxProbesProviderTest(unittest.TestCase): + def setUp(self): + self.harness = Harness(BlackboxProbesProviderCharmWithModules, meta=PROVIDER_META) + self.harness.set_model_name("MyUUID") + self.addCleanup(self.harness.cleanup) + self.harness.set_leader(True) + self.harness.begin() + + def test_provider_sets_scrape_metadata(self): + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + + self.harness.charm.provider._set_probes_spec() + + data = self.harness.get_relation_data(rel_id, self.harness.model.app.name) + self.assertIn("scrape_metadata", data) + scrape_metadata = data["scrape_metadata"] + self.assertIn("model", scrape_metadata) + self.assertIn("model_uuid", scrape_metadata) + self.assertIn("application", scrape_metadata) + self.assertIn("unit", scrape_metadata) + + def test_provider_sets_probes_on_relation_joined(self): + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + + self.harness.charm.provider._set_probes_spec() + + data = self.harness.get_relation_data(rel_id, self.harness.model.app.name) + self.assertIn("scrape_probes", data) + scrape_data = json.loads(data["scrape_probes"]) + self.assertEqual(scrape_data[0]["static_configs"][0]["targets"], ["10.1.238.1"]) + self.assertEqual(scrape_data[0]["params"]["module"], ["http_2xx"]) + + def test_provider_sets_modules_with_prefix_on_relation_joined(self): + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + self.harness.charm.provider._set_probes_spec() + + data = self.harness.get_relation_data(rel_id, self.harness.model.app.name) + self.assertIn("scrape_modules", data) + scrape_modules = json.loads(data["scrape_modules"]) + + topology = JujuTopology.from_dict(json.loads(data["scrape_metadata"])) + module_name_prefix = "juju_{}_".format(topology.identifier) + + self.assertIn(f"{module_name_prefix}http_2xx_longer_timeout", scrape_modules) + + def test_provider_prefixes_jobs(self): + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + + self.harness.charm.provider._set_probes_spec() + + data = self.harness.get_relation_data(rel_id, self.harness.model.app.name) + scrape_data = json.loads(data["scrape_probes"]) + topology = JujuTopology.from_dict(json.loads(data["scrape_metadata"])) + module_name_prefix = "juju_{}_".format(topology.identifier) + + self.assertEqual(scrape_data[0]["job_name"], f"{module_name_prefix}my-first-job") + + def test_provider_prefixes_modules(self): + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + + self.harness.charm.provider._set_probes_spec() + + data = self.harness.get_relation_data(rel_id, self.harness.model.app.name) + scrape_data = json.loads(data["scrape_modules"]) + topology = JujuTopology.from_dict(json.loads(data["scrape_metadata"])) + module_name_prefix = "juju_{}_".format(topology.identifier) + actual_key = next(iter(scrape_data.keys())) + expected_key = f"{module_name_prefix}http_2xx_longer_timeout" + self.assertEqual(actual_key, expected_key) + + def test_get_active_status(self): + self.addCleanup(self.harness.cleanup) + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + self.harness.charm.provider._set_probes_spec() + status = self.harness.charm.provider.get_status() + assert status == ActiveStatus() + + +class BlackboxProbesProviderCharmWithWrongProbe(CharmBase): + _stored = StoredState() + + def __init__(self, *args, **kwargs): + super().__init__(*args) + self._stored.set_default(num_events=0) + self.provider = BlackboxProbesProvider( + self, probes=PROBES_NOT_VALID_MISSING_MODULE, modules=MODULES + ) + + +class BlackboxProbesWrongProviderTest(unittest.TestCase): + def setUp(self): + self.harness = Harness(BlackboxProbesProviderCharmWithWrongProbe, meta=PROVIDER_META) + self.harness.set_model_name("MyUUID") + self.addCleanup(self.harness.cleanup) + self.harness.set_leader(True) + self.harness.begin() + + def test_get_blocked_status_on_invalid_probe(self): + self.assertEqual(self.harness.charm._stored.num_events, 0) + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + self.harness.charm.provider._set_probes_spec() + status = self.harness.charm.provider.get_status() + assert status == BlockedStatus("Errors occurred in probe configuration") diff --git a/tests/unit/test_probes_requirer.py b/tests/unit/test_probes_requirer.py new file mode 100644 index 0000000..45422ce --- /dev/null +++ b/tests/unit/test_probes_requirer.py @@ -0,0 +1,235 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +import json +import unittest +from typing import List + +from charms.blackbox_k8s.v0.blackbox_probes import BlackboxProbesRequirer +from ops.charm import CharmBase +from ops.framework import StoredState +from ops.testing import Harness + +RELATION_NAME = "probes" + +REQUIRER_META = f""" +name: requirer-tester +containers: + blackbox-tester: +requires: + {RELATION_NAME}: + interface: blackbox_probes +""" + +PROBES: List[dict] = [ + { + "job_name": "my-first-job", + "params": {"module": ["http_2xx"]}, + "static_configs": [ + { + "targets": ["10.1.238.1"], + "labels": {"some_key": "some-value"}, + } + ], + }, + { + "job_name": "my-second-job", + "params": { + "module": ["icmp"], + }, + "static_configs": [ + {"targets": ["10.1.238.1"], "labels": {"some_other_key": "some-other-value"}} + ], + }, +] + +MODULES: dict = { + "http_2xx_longer_timeout": { + "prober": "http", + "timeout": "30s", + } +} + +SCRAPE_METADATA = { + "model": "requirer-model", + "model_uuid": "12de4fae-06cc-4ceb-9089-567be09fec78", + "application": "requirer", + "charm_name": "test-charm", + "unit": "test-unit", +} + +PROBES_WITH_SAME_NAME: List[dict] = [ + { + "job_name": "my-first-job", + "params": {"module": ["http_2xx"]}, + "static_configs": [ + { + "targets": ["10.1.238.1"], + "labels": {"some_key": "some-value"}, + } + ], + }, + { + "job_name": "my-first-job", + "params": { + "module": ["icmp"], + }, + "static_configs": [ + {"targets": ["10.1.238.1"], "labels": {"some_other_key": "some-other-value"}} + ], + }, +] + +IDENTICAL_PROBES: List[dict] = [ + { + "job_name": "my-first-job", + "params": {"module": ["http_2xx"]}, + "static_configs": [ + { + "targets": ["10.1.238.1"], + "labels": {"some_key": "some-value"}, + } + ], + }, + { + "job_name": "my-first-job", + "params": {"module": ["http_2xx"]}, + "static_configs": [ + { + "targets": ["10.1.238.1"], + "labels": {"some_key": "some-value"}, + } + ], + }, +] + + +class BlackboxProbesRequirerCharm(CharmBase): + _stored = StoredState() + + def __init__(self, *args, **kwargs): + super().__init__(*args) + self._stored.set_default(num_events=0) + self.probes_requirer = BlackboxProbesRequirer(self, RELATION_NAME) + self.framework.observe(self.probes_requirer.on.targets_changed, self.record_events) + + def record_events(self, event): + self._stored.num_events += 1 + + @property + def version(self): + return "1.0.0" + + +class BlackboxProbesRequirerTest(unittest.TestCase): + def setUp(self): + self.harness = Harness(BlackboxProbesRequirerCharm, meta=REQUIRER_META) + + self.addCleanup(self.harness.cleanup) + self.harness.begin_with_initial_hooks() + + def setup_charm_relations(self): + """Create relations used by test cases.""" + rel_ids = [] + self.assertEqual(self.harness.charm._stored.num_events, 0) + rel_id = self.harness.add_relation(RELATION_NAME, "requirer") + rel_ids.append(rel_id) + self.harness.update_relation_data( + rel_id, + "requirer", + { + "scrape_metadata": json.dumps(SCRAPE_METADATA), + "scrape_probes": json.dumps(PROBES), + "scrape_modules": json.dumps(MODULES), + }, + ) + self.assertEqual(self.harness.charm._stored.num_events, 1) + + def test_requirer_notifies_on_new_scrape_metadata_relation(self): + self.assertEqual(self.harness.charm._stored.num_events, 0) + + rel_id = self.harness.add_relation(RELATION_NAME, "requirer") + self.harness.update_relation_data( + rel_id, "requirer", {"scrape_metadata": json.dumps(SCRAPE_METADATA)} + ) + self.assertEqual(self.harness.charm._stored.num_events, 1) + + def test_requirer_notifies_on_new_probes_target(self): + self.assertEqual(self.harness.charm._stored.num_events, 0) + rel_id = self.harness.add_relation(RELATION_NAME, "requirer") + self.harness.add_relation_unit(rel_id, "requirer/0") + self.harness.update_relation_data( + rel_id, "requirer/0", {"scrape_probes": json.dumps(PROBES)} + ) + self.assertEqual(self.harness.charm._stored.num_events, 1) + + def test_requirer_notifies_on_new_modules_target(self): + self.assertEqual(self.harness.charm._stored.num_events, 0) + rel_id = self.harness.add_relation(RELATION_NAME, "requirer") + self.harness.add_relation_unit(rel_id, "requirer/0") + self.harness.update_relation_data( + rel_id, "requirer/0", {"scrape_modules": json.dumps(MODULES)} + ) + self.assertEqual(self.harness.charm._stored.num_events, 1) + + def test_requirer_returns_all_probes_targets(self): + self.setup_charm_relations() + + probes = self.harness.charm.probes_requirer.probes() + self.assertEqual(len(probes), 2) + self.assertEqual(type(probes), list) + + def test_requirer_returns_all_modules(self): + self.setup_charm_relations() + + modules = self.harness.charm.probes_requirer.modules() + self.assertEqual(len(modules), 1) + self.assertEqual(type(modules), dict) + + def setup_charm_relations_same_name(self): + """Create relations used by test cases.""" + rel_ids = [] + self.assertEqual(self.harness.charm._stored.num_events, 0) + rel_id = self.harness.add_relation(RELATION_NAME, "requirer") + rel_ids.append(rel_id) + self.harness.update_relation_data( + rel_id, + "requirer", + { + "scrape_metadata": json.dumps(SCRAPE_METADATA), + "scrape_probes": json.dumps(PROBES_WITH_SAME_NAME), + "scrape_modules": json.dumps(MODULES), + }, + ) + self.assertEqual(self.harness.charm._stored.num_events, 1) + + def test_requirer_returns_all_probes_targets_hashed(self): + self.setup_charm_relations_same_name() + + probes = self.harness.charm.probes_requirer.probes() + self.assertEqual(len(probes), 2) + self.assertEqual(type(probes), list) + + def setup_charm_relations_identical(self): + """Create relations used by test cases.""" + rel_ids = [] + self.assertEqual(self.harness.charm._stored.num_events, 0) + rel_id = self.harness.add_relation(RELATION_NAME, "requirer") + rel_ids.append(rel_id) + self.harness.update_relation_data( + rel_id, + "requirer", + { + "scrape_metadata": json.dumps(SCRAPE_METADATA), + "scrape_probes": json.dumps(IDENTICAL_PROBES), + "scrape_modules": json.dumps(MODULES), + }, + ) + self.assertEqual(self.harness.charm._stored.num_events, 1) + + def test_requirer_discard_identical_probes(self): + self.setup_charm_relations_identical() + + probes = self.harness.charm.probes_requirer.probes() + self.assertEqual(len(probes), 1) + self.assertEqual(type(probes), list) diff --git a/tests/unit/test_scrape_config_builder.py b/tests/unit/test_scrape_config_builder.py new file mode 100644 index 0000000..f39041f --- /dev/null +++ b/tests/unit/test_scrape_config_builder.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +import unittest + +import yaml + +from scrape_config_builder import ScrapeConfigBuilder + + +class TestScrapeConfigBuilder(unittest.TestCase): + def setUp(self): + """Set up the test case with a common builder instance and test data.""" + self.builder = ScrapeConfigBuilder("http://blackbox-exporter:9115") + self.file_probes = { + "scrape_configs": [ + {"job_name": "config_yaml_job", "static_configs": [{"targets": ["target1"]}]} + ] + } + self.relation_probes = [ + {"job_name": "relation_job", "static_configs": [{"targets": ["target2"]}]} + ] + + def test_merge_scrape_configs(self): + """Test that file and relation probes are merged correctly.""" + merged = self.builder.merge_scrape_configs(self.file_probes, self.relation_probes) + + self.assertEqual(len(merged), 2) + self.assertIn("config_yaml_job", [job["job_name"] for job in merged]) + self.assertIn("relation_job", [job["job_name"] for job in merged]) + + def test_build_scraping_jobs(self): + """Test that the scraping jobs are built correctly with relabel_configs.""" + scraping_jobs = self.builder.build_probes_scraping_jobs( + file_probes=yaml.safe_dump(self.file_probes), + relation_probes=self.relation_probes, + ) + + for job in scraping_jobs: + self.assertIn("metrics_path", job) + self.assertIn("relabel_configs", job) + self.assertEqual(job["metrics_path"], "/probe") + self.assertIsInstance(job["relabel_configs"], list) + self.assertGreater(len(job["relabel_configs"]), 0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tox.ini b/tox.ini index 340f07b..bd9e562 100644 --- a/tox.ini +++ b/tox.ini @@ -79,7 +79,7 @@ deps = hypothesis validators>=0.21.2 -r{toxinidir}/requirements.txt - pydantic < 2.0 # from traefik_k8s.v2.ingress + pydantic > 2.0 # from traefik_k8s.v2.ingress commands = coverage run \ --source={[vars]src_path} \ @@ -93,7 +93,7 @@ commands = description = Scenario tests deps = pytest - pydantic < 2 + pydantic > 2 ops-scenario ops < 2.5.0 # https://github.com/canonical/ops-scenario/issues/48 -r{toxinidir}/requirements.txt