Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Commit

Permalink
Fix get_dead_nodes query (#1054)
Browse files Browse the repository at this point in the history
  • Loading branch information
chkeita authored Jul 9, 2021
1 parent 48978ce commit 89b7d13
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
6 changes: 4 additions & 2 deletions src/api-service/__app__/onefuzzlib/workers/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,10 @@ def set_halt(self) -> None:
def get_dead_nodes(
cls, scaleset_id: UUID, expiration_period: datetime.timedelta
) -> List["Node"]:
time_filter = "heartbeat lt datetime'%s'" % (
(datetime.datetime.utcnow() - expiration_period).isoformat()
min_date = (datetime.datetime.utcnow() - expiration_period).isoformat()
time_filter = "heartbeat lt datetime'%s' or Timestamp lt datetime'%s'" % (
min_date,
min_date,
)
return cls.search(
query={"scaleset_id": [scaleset_id]},
Expand Down
8 changes: 6 additions & 2 deletions src/api-service/__app__/onefuzzlib/workers/scalesets.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,16 +388,20 @@ def cleanup_nodes(self) -> bool:
if dead_nodes:
logging.info(
SCALESET_LOG_PREFIX
+ "reimaging nodes with expired heartbeats. "
+ "reimaging uninitialized nodes or nodes with expired heartbeats. "
+ "scaleset_id:%s nodes:%s",
self.scaleset_id,
",".join(str(x.machine_id) for x in dead_nodes),
)
for node in dead_nodes:
if node.heartbeat:
error_message = "node reimaged due to expired heartbeat"
else:
error_message = "node reimaged due to never receiving a heartbeat"
error = Error(
code=ErrorCode.TASK_FAILED,
errors=[
"node reimaged due to expired heartbeat",
error_message,
f"scaleset_id:{node.scaleset_id} machine_id:{node.machine_id}",
f"last heartbeat:{node.heartbeat}",
],
Expand Down

0 comments on commit 89b7d13

Please sign in to comment.