Skip to content

Commit

Permalink
Do not emit heartbeat if network is down
Browse files Browse the repository at this point in the history
  • Loading branch information
albireox committed Jan 7, 2025
1 parent fab7da4 commit 752d211
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 13 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## Next version

### 🚀 New

* Do not emit heartbeat to ECP if the network (LCO or internet) is down.


## 0.1.2 - December 27, 2024

### ✨ Improved
Expand Down
37 changes: 24 additions & 13 deletions src/lvmbeat/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
import os
from time import time

from typing import Annotated, TypedDict
from typing import Annotated

import click
import httpx
from lvmopstools.utils import Trigger
from pydantic import BaseModel, Field

from clu.actor import AMQPActor
Expand Down Expand Up @@ -65,13 +66,6 @@ class BeatKeywordSchema(BaseModel):
]


class NetworkStatus(TypedDict):
"""Network status."""

lco: bool
outside: bool


class BeatActor(AMQPActor):
"""Heartbeat actor."""

Expand All @@ -98,9 +92,9 @@ def __init__(self, *args, heartbeats: list[HeartbeatData] = [], **kwargs):
self._last_emitted_outside: float | None = None

# Track network access to other LCO services and the outside world.
self.network_status: NetworkStatus = {
"lco": True,
"outside": True,
self.network_status: dict[str, Trigger] = {
"lco": Trigger(n=3),
"outside": Trigger(n=3),
}

self._emit_outside_task = asyncio.create_task(self.emit_outside())
Expand Down Expand Up @@ -137,6 +131,13 @@ async def update(self):
"Skipping since it is not critical."
)

for label, trigger in self.network_status.items():
if trigger.is_set():
self.log.warning(
f"Network to {label!r} is down. Not emitting heartbeat to ECP."
)
return

if self._last_emitted_ecp and time() - self._last_emitted_ecp < 10:
# Prevent emitting the heartbeat too often.
return
Expand Down Expand Up @@ -193,8 +194,18 @@ async def update_network_status(self):
internet = await is_host_up("8.8.8.8") # Google DNS
lco = await is_host_up("10.8.8.46") # clima.lco.cl

self.network_status["outside"] = internet
self.network_status["lco"] = lco
# Network statuses are triggers. We require the connection to fail
# three consecutive times before we consider the network down.

if internet:
self.network_status["outside"].reset()
else:
self.network_status["outside"].set()

if lco:
self.network_status["lco"].reset()
else:
self.network_status["lco"].set()

await asyncio.sleep(15)

Expand Down

0 comments on commit 752d211

Please sign in to comment.