From 752d2115df0885ce4bf536f39da69c3bea036e18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20S=C3=A1nchez-Gallego?= Date: Tue, 7 Jan 2025 09:08:01 -0300 Subject: [PATCH] Do not emit heartbeat if network is down --- CHANGELOG.md | 7 +++++++ src/lvmbeat/actor.py | 37 ++++++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12cc3ad..6f8625c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Next version + +### 🚀 New + +* Do not emit heartbeat to ECP if the network (LCO or internet) is down. + + ## 0.1.2 - December 27, 2024 ### ✨ Improved diff --git a/src/lvmbeat/actor.py b/src/lvmbeat/actor.py index bdd9f56..410ca71 100644 --- a/src/lvmbeat/actor.py +++ b/src/lvmbeat/actor.py @@ -12,10 +12,11 @@ import os from time import time -from typing import Annotated, TypedDict +from typing import Annotated import click import httpx +from lvmopstools.utils import Trigger from pydantic import BaseModel, Field from clu.actor import AMQPActor @@ -65,13 +66,6 @@ class BeatKeywordSchema(BaseModel): ] -class NetworkStatus(TypedDict): - """Network status.""" - - lco: bool - outside: bool - - class BeatActor(AMQPActor): """Heartbeat actor.""" @@ -98,9 +92,9 @@ def __init__(self, *args, heartbeats: list[HeartbeatData] = [], **kwargs): self._last_emitted_outside: float | None = None # Track network access to other LCO services and the outside world. - self.network_status: NetworkStatus = { - "lco": True, - "outside": True, + self.network_status: dict[str, Trigger] = { + "lco": Trigger(n=3), + "outside": Trigger(n=3), } self._emit_outside_task = asyncio.create_task(self.emit_outside()) @@ -137,6 +131,13 @@ async def update(self): "Skipping since it is not critical." ) + for label, trigger in self.network_status.items(): + if trigger.is_set(): + self.log.warning( + f"Network to {label!r} is down. Not emitting heartbeat to ECP." + ) + return + if self._last_emitted_ecp and time() - self._last_emitted_ecp < 10: # Prevent emitting the heartbeat too often. return @@ -193,8 +194,18 @@ async def update_network_status(self): internet = await is_host_up("8.8.8.8") # Google DNS lco = await is_host_up("10.8.8.46") # clima.lco.cl - self.network_status["outside"] = internet - self.network_status["lco"] = lco + # Network statuses are triggers. We require the connection to fail + # three consecutive times before we consider the network down. + + if internet: + self.network_status["outside"].reset() + else: + self.network_status["outside"].set() + + if lco: + self.network_status["lco"].reset() + else: + self.network_status["lco"].set() await asyncio.sleep(15)