Skip to content

Commit

Permalink
[Serve] Set app_msg to empty string by default (ray-project#35646)
Browse files Browse the repository at this point in the history
The ApplicationState currently updates its _status directly without necessarily updating the corresponding message. This causes stale messages to sometimes appear with updated statuses. This change sets the app_msg to an empty string by default, so the message is cleared or updated whenever the status is changed.

Related issue number
Closes ray-project#35508

Signed-off-by: e428265 <arvind.chandramouli@lmco.com>
  • Loading branch information
shrekris-anyscale authored and arvind-chandra committed Aug 31, 2023
1 parent 681e411 commit 0a6f825
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 22 deletions.
64 changes: 43 additions & 21 deletions python/ray/serve/_private/application_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(

self._name = name
self._deploy_obj_ref = deploy_obj_ref
self._app_msg = ""
self._status_msg = ""
self._deployment_state_manager = deployment_state_manager
self._deployment_params: List[Dict] = []
# This set tracks old deployments that are being deleted
Expand Down Expand Up @@ -92,7 +92,7 @@ def deployments(self) -> List[str]:

def delete(self):
"""Delete the application"""
self._status = ApplicationStatus.DELETING
self._update_status(ApplicationStatus.DELETING)

def deploy(self, deployment_params: List[Dict]) -> List[str]:
"""Deploy the application.
Expand Down Expand Up @@ -141,13 +141,13 @@ def deploy(self, deployment_params: List[Dict]) -> List[str]:
"path in your application to avoid this issue."
)

self._status = ApplicationStatus.DEPLOYING
self._update_status(ApplicationStatus.DEPLOYING)
return cur_deployments_to_delete

def update_obj_ref(self, deploy_obj_ref: ObjectRef, deployment_time: int):
self._deploy_obj_ref = deploy_obj_ref
self._deployment_timestamp = deployment_time
self._status = ApplicationStatus.DEPLOYING
self._update_status(ApplicationStatus.DEPLOYING)

def _process_terminating_deployments(self):
"""Update the tracking for all deployments being deleted
Expand Down Expand Up @@ -202,42 +202,60 @@ def update(self):
ray.get(finished[0])
logger.info(f"Deploy task for app '{self._name}' ran successfully.")
except RayTaskError as e:
self._status = ApplicationStatus.DEPLOY_FAILED
# NOTE(zcin): we should use str(e) instead of traceback.format_exc()
# here because the full details of the error is not displayed
# properly with traceback.format_exc(). RayTaskError has its own
# custom __str__ function.
self._app_msg = f"Deploying app '{self._name}' failed:\n{str(e)}"
logger.warning(self._app_msg)
self._update_status(
ApplicationStatus.DEPLOY_FAILED,
status_msg=f"Deploying app '{self._name}' failed:\n{str(e)}",
)
logger.warning(self._status_msg)
return
except RuntimeEnvSetupError:
self._status = ApplicationStatus.DEPLOY_FAILED
self._app_msg = (
f"Runtime env setup for app '{self._name}' "
f"failed:\n{traceback.format_exc()}"
self._update_status(
ApplicationStatus.DEPLOY_FAILED,
status_msg=(
f"Runtime env setup for app '{self._name}' "
f"failed:\n{traceback.format_exc()}"
),
)
logger.warning(self._app_msg)
logger.warning(self._status_msg)
return
except Exception:
self._status = ApplicationStatus.DEPLOY_FAILED
self._app_msg = (
"Unexpected error occured while deploying application "
f"'{self._name}':\n{traceback.format_exc()}"
self._update_status(
ApplicationStatus.DEPLOY_FAILED,
status_msg=(
"Unexpected error occured while deploying "
f"application '{self._name}':"
f"\n{traceback.format_exc()}"
),
)
logger.warning(self._app_msg)
logger.warning(self._status_msg)
return
deployments_statuses = (
self._deployment_state_manager.get_deployment_statuses(self.deployments)
)
num_health_deployments = 0
unhealthy_deployment_names = []
for deployment_status in deployments_statuses:
if deployment_status.status == DeploymentStatus.UNHEALTHY:
self._status = ApplicationStatus.DEPLOY_FAILED
return
unhealthy_deployment_names.append(deployment_status.name)
if deployment_status.status == DeploymentStatus.HEALTHY:
num_health_deployments += 1

if len(unhealthy_deployment_names) != 0:
self._update_status(
ApplicationStatus.DEPLOY_FAILED,
status_msg=(
"The following deployments are UNHEALTHY: "
f"{unhealthy_deployment_names}"
),
)
return

if num_health_deployments == len(deployments_statuses):
self._status = ApplicationStatus.RUNNING
self._update_status(ApplicationStatus.RUNNING)

self._process_terminating_deployments()

Expand All @@ -249,7 +267,7 @@ def get_application_status_info(self) -> ApplicationStatusInfo:
"""Return the application status information"""
return ApplicationStatusInfo(
self._status,
message=self._app_msg,
message=self._status_msg,
deployment_timestamp=self._deployment_timestamp,
)

Expand All @@ -269,6 +287,10 @@ def list_deployment_details(self) -> Dict[str, DeploymentDetails]:
}
return {k: v for k, v in details.items() if v is not None}

def _update_status(self, status: ApplicationStatus, status_msg: str = ""):
self._status = status
self._status_msg = status_msg


class ApplicationStateManager:
def __init__(self, deployment_state_manager):
Expand Down
25 changes: 24 additions & 1 deletion python/ray/serve/tests/test_application_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_update_app_running(mocked_application_state_manager):


def test_update_app_deploy_failed(mocked_application_state_manager):
"""Test DEPLOYING -> DEPLOY_FAILED"""
"""Test DEPLOYING -> DEPLOY_FAILED -> DEPLOYING -> RUNNING"""
app_state_manager, deployment_state_manager = mocked_application_state_manager
app_state_manager.deploy_application("test_app", [{"name": "d1"}])
# Simulate controller
Expand All @@ -128,8 +128,31 @@ def test_update_app_deploy_failed(mocked_application_state_manager):
app_status = app_state_manager.get_app_status("test_app")
assert app_status.status == ApplicationStatus.DEPLOY_FAILED
# rerun update, application status should not make difference
deploy_failed_msg = app_status.message
assert len(deploy_failed_msg) != 0
app_state_manager.update()
assert app_status.status == ApplicationStatus.DEPLOY_FAILED
assert app_status.message == deploy_failed_msg

app_state_manager.deploy_application("test_app", [{"name": "d1"}, {"name": "d2"}])
# Simulate controller
deployment_state_manager.deploy("d1", None)
deployment_state_manager.deploy("d2", None)

app_status = app_state_manager.get_app_status("test_app")
assert app_status.status == ApplicationStatus.DEPLOYING
assert app_status.message != deploy_failed_msg
deployment_state_manager.set_deployment_statuses_healthy("d1")
deployment_state_manager.set_deployment_statuses_healthy("d2")
app_state_manager.update()
app_status = app_state_manager.get_app_status("test_app")
assert app_status.status == ApplicationStatus.RUNNING
running_msg = app_status.message
assert running_msg != deploy_failed_msg
# rerun update, application status should not make difference
app_state_manager.update()
assert app_status.status == ApplicationStatus.RUNNING
assert app_status.message == running_msg


@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
Expand Down

0 comments on commit 0a6f825

Please sign in to comment.