From a72d268a51534331d388b0dbd16a716c5805af0f Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 9 Dec 2022 19:56:26 +0900 Subject: [PATCH] Avoid using the same port number for autoscaler works (#15966) * dont hardcode port in python server * add another chglog --- examples/app_server_with_auto_scaler/app.py | 21 +++++++++++-------- src/lightning_app/CHANGELOG.md | 4 ++-- src/lightning_app/components/auto_scaler.py | 3 ++- .../components/serve/python_server.py | 6 +----- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/app_server_with_auto_scaler/app.py b/examples/app_server_with_auto_scaler/app.py index b713bd6d1dcfc..70799827776a8 100644 --- a/examples/app_server_with_auto_scaler/app.py +++ b/examples/app_server_with_auto_scaler/app.py @@ -1,3 +1,4 @@ +# ! pip install torch torchvision from typing import Any, List import torch @@ -22,10 +23,10 @@ class BatchResponse(BaseModel): class PyTorchServer(L.app.components.PythonServer): def __init__(self, *args, **kwargs): super().__init__( - port=L.app.utilities.network.find_free_network_port(), input_type=BatchRequestModel, output_type=BatchResponse, - cloud_compute=L.CloudCompute("gpu"), + *args, + **kwargs, ) def setup(self): @@ -57,16 +58,14 @@ def scale(self, replicas: int, metrics: dict) -> int: """The default scaling logic that users can override.""" # scale out if the number of pending requests exceeds max batch size. max_requests_per_work = self.max_batch_size - pending_requests_per_running_or_pending_work = metrics["pending_requests"] / ( - replicas + metrics["pending_works"] - ) - if pending_requests_per_running_or_pending_work >= max_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / (replicas + metrics["pending_works"]) + if pending_requests_per_work >= max_requests_per_work: return replicas + 1 # scale in if the number of pending requests is below 25% of max_requests_per_work min_requests_per_work = max_requests_per_work * 0.25 - pending_requests_per_running_work = metrics["pending_requests"] / replicas - if pending_requests_per_running_work < min_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / replicas + if pending_requests_per_work < min_requests_per_work: return replicas - 1 return replicas @@ -74,13 +73,17 @@ def scale(self, replicas: int, metrics: dict) -> int: app = L.LightningApp( MyAutoScaler( + # work class and args PyTorchServer, - min_replicas=2, + cloud_compute=L.CloudCompute("gpu"), + # autoscaler specific args + min_replicas=1, max_replicas=4, autoscale_interval=10, endpoint="predict", input_type=RequestModel, output_type=Any, timeout_batching=1, + max_batch_size=8, ) ) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 34f85fa568c20..3963de4787cf8 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -13,7 +13,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- +- Changed the default port of `PythonServer` from `7777` to a free port at runtime ([#15966](https://github.com/Lightning-AI/lightning/pull/15966)) - Remove the `AutoScaler` dependency `aiohttp` from the base requirements ([#15971](https://github.com/Lightning-AI/lightning/pull/15971)) @@ -30,7 +30,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- +- Fixed `AutoScaler` failing due to port collision across works ([#15966](https://github.com/Lightning-AI/lightning/pull/15966)) ## [1.8.4] - 2022-12-08 diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 629e771c50600..fc6a1a873769b 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -450,7 +450,8 @@ def workers(self) -> List[LightningWork]: def create_work(self) -> LightningWork: """Replicates a LightningWork instance with args and kwargs provided via ``__init__``.""" # TODO: Remove `start_with_flow=False` for faster initialization on the cloud - return self._work_cls(*self._work_args, **self._work_kwargs, start_with_flow=False) + self._work_kwargs.update(dict(start_with_flow=False)) + return self._work_cls(*self._work_args, **self._work_kwargs) def add_work(self, work) -> str: """Adds a new LightningWork instance. diff --git a/src/lightning_app/components/serve/python_server.py b/src/lightning_app/components/serve/python_server.py index 1868b0b357fd3..c522a25eb3f3d 100644 --- a/src/lightning_app/components/serve/python_server.py +++ b/src/lightning_app/components/serve/python_server.py @@ -75,8 +75,6 @@ class PythonServer(LightningWork, abc.ABC): @requires(["torch", "lightning_api_access"]) def __init__( # type: ignore self, - host: str = "127.0.0.1", - port: int = 7777, input_type: type = _DefaultInputData, output_type: type = _DefaultOutputData, **kwargs, @@ -84,8 +82,6 @@ def __init__( # type: ignore """The PythonServer Class enables to easily get your machine learning server up and running. Arguments: - host: Address to be used for running the server. - port: Port to be used to running the server. input_type: Optional `input_type` to be provided. This needs to be a pydantic BaseModel class. The default data type is good enough for the basic usecases and it expects the data to be a json object that has one key called `payload` @@ -129,7 +125,7 @@ def predict(self, request): ... >>> app = LightningApp(SimpleServer()) """ - super().__init__(parallel=True, host=host, port=port, **kwargs) + super().__init__(parallel=True, **kwargs) if not issubclass(input_type, BaseModel): raise TypeError("input_type must be a pydantic BaseModel class") if not issubclass(output_type, BaseModel):