Skip to content

Commit

Permalink
Add 'health checks' when cluster is in same status
Browse files Browse the repository at this point in the history
  • Loading branch information
luca3rd authored and nmiculinic committed Nov 4, 2022
1 parent 43a39cf commit b176c71
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
15 changes: 6 additions & 9 deletions src/lightning_app/cli/cmd_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ def _wait_for_cluster_state(
"""
start = time.time()
elapsed = 0
current_state = V1ClusterState.UNSPECIFIED

while elapsed < max_wait_time:
cluster_resp = api_client.cluster_service_list_clusters()
Expand All @@ -228,14 +227,12 @@ def _wait_for_cluster_state(
new_cluster = clust
break
if new_cluster is not None:
if current_state != new_cluster.status.phase:
current_state = new_cluster.status.phase
echo_cluster_status_long(
cluster_id=cluster_id,
current_state=current_state,
current_reason=new_cluster.status.reason,
desired_state=target_state,
)
echo_cluster_status_long(
cluster_id=cluster_id,
current_state=new_cluster.status.phase,
current_reason=new_cluster.status.reason,
desired_state=target_state,
)
if new_cluster.status.phase == target_state:
break
time.sleep(check_timeout)
Expand Down
17 changes: 12 additions & 5 deletions tests/tests_app/cli/test_cmd_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,10 @@ def test_echo_state_change_on_desired_running(self, echo: MagicMock):
]
)
for state, reason in [
(V1ClusterState.UNSPECIFIED, ""),
(V1ClusterState.QUEUED, ""),
(V1ClusterState.PENDING, ""),
(V1ClusterState.PENDING, ""),
(V1ClusterState.PENDING, ""),
(V1ClusterState.FAILED, "some error"),
(V1ClusterState.PENDING, "retrying failure"),
(V1ClusterState.RUNNING, ""),
Expand All @@ -168,12 +169,14 @@ def test_echo_state_change_on_desired_running(self, echo: MagicMock):
check_timeout=0.1,
)

assert client.list_call_count == 6
assert echo.call_count == 5
assert client.list_call_count == 7
assert echo.call_count == 7
echo.assert_has_calls(
[
call("Cluster test-cluster is now queued"),
call("Cluster test-cluster is now pending"),
call("Cluster test-cluster is now pending"),
call("Cluster test-cluster is now pending"),
call(
"\n".join(
[
Expand Down Expand Up @@ -214,6 +217,8 @@ def test_echo_state_change_on_desired_deleted(self, echo: MagicMock):
]
)
for state in [
V1ClusterState.RUNNING,
V1ClusterState.RUNNING,
V1ClusterState.RUNNING,
V1ClusterState.DELETED,
]
Expand All @@ -228,10 +233,12 @@ def test_echo_state_change_on_desired_deleted(self, echo: MagicMock):
check_timeout=0.1,
)

assert client.list_call_count == 2
assert echo.call_count == 2
assert client.list_call_count == 4
assert echo.call_count == 4
echo.assert_has_calls(
[
call("Cluster test-cluster is terminating"),
call("Cluster test-cluster is terminating"),
call("Cluster test-cluster is terminating"),
call("Cluster test-cluster has been deleted."),
]
Expand Down

0 comments on commit b176c71

Please sign in to comment.