From 89b7d131252a67c5a9b808c983353df4799abe44 Mon Sep 17 00:00:00 2001 From: Cheick Keita Date: Fri, 9 Jul 2021 10:33:42 -0700 Subject: [PATCH] Fix get_dead_nodes query (#1054) --- src/api-service/__app__/onefuzzlib/workers/nodes.py | 6 ++++-- src/api-service/__app__/onefuzzlib/workers/scalesets.py | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/api-service/__app__/onefuzzlib/workers/nodes.py b/src/api-service/__app__/onefuzzlib/workers/nodes.py index 950f8e739e..4da60d36a1 100644 --- a/src/api-service/__app__/onefuzzlib/workers/nodes.py +++ b/src/api-service/__app__/onefuzzlib/workers/nodes.py @@ -434,8 +434,10 @@ def set_halt(self) -> None: def get_dead_nodes( cls, scaleset_id: UUID, expiration_period: datetime.timedelta ) -> List["Node"]: - time_filter = "heartbeat lt datetime'%s'" % ( - (datetime.datetime.utcnow() - expiration_period).isoformat() + min_date = (datetime.datetime.utcnow() - expiration_period).isoformat() + time_filter = "heartbeat lt datetime'%s' or Timestamp lt datetime'%s'" % ( + min_date, + min_date, ) return cls.search( query={"scaleset_id": [scaleset_id]}, diff --git a/src/api-service/__app__/onefuzzlib/workers/scalesets.py b/src/api-service/__app__/onefuzzlib/workers/scalesets.py index dfb02ab582..4aa20a5751 100644 --- a/src/api-service/__app__/onefuzzlib/workers/scalesets.py +++ b/src/api-service/__app__/onefuzzlib/workers/scalesets.py @@ -388,16 +388,20 @@ def cleanup_nodes(self) -> bool: if dead_nodes: logging.info( SCALESET_LOG_PREFIX - + "reimaging nodes with expired heartbeats. " + + "reimaging uninitialized nodes or nodes with expired heartbeats. " + "scaleset_id:%s nodes:%s", self.scaleset_id, ",".join(str(x.machine_id) for x in dead_nodes), ) for node in dead_nodes: + if node.heartbeat: + error_message = "node reimaged due to expired heartbeat" + else: + error_message = "node reimaged due to never receiving a heartbeat" error = Error( code=ErrorCode.TASK_FAILED, errors=[ - "node reimaged due to expired heartbeat", + error_message, f"scaleset_id:{node.scaleset_id} machine_id:{node.machine_id}", f"last heartbeat:{node.heartbeat}", ],