From 81e19038efe6988ca4d3e78dfb24140c587fa32c Mon Sep 17 00:00:00 2001 From: Maksym Taran Date: Fri, 4 Oct 2024 17:35:30 -0700 Subject: [PATCH 1/5] Add a missing newline that was breaking formatting (#4037) What it says on the tin. --- docs/source/getting-started/installation.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index 9f251a5aafe..cf6115ee9e8 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -302,6 +302,7 @@ Fluidstack ~~~~~~~~~~~~~~~~~~ `Fluidstack `__ is a cloud provider offering low-cost GPUs. To configure Fluidstack access, go to the `Home `__ page on your Fluidstack console to generate an API key and then add the :code:`API key` to :code:`~/.fluidstack/api_key` : + .. code-block:: shell mkdir -p ~/.fluidstack From 1efd48a4df350b54f1e6d2b28afff19391aec0b4 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Sat, 5 Oct 2024 14:18:19 -0700 Subject: [PATCH 2/5] Stop using deprecated `on_event()` decorator (#4033) * Stop using deprecated `on_event()` decorator Fixes #3997 Replace deprecated `@app.on_event('startup')` decorator with lifespan event handler in `sky/serve/controller.py`. * Remove the `@app.on_event('startup')` decorator. * Add a lifespan event handler to configure the logger. * Update the `SkyServeController` class to use the lifespan event handler. * format and add decorator * format --- sky/serve/controller.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sky/serve/controller.py b/sky/serve/controller.py index 361a1293d21..5d49c1aa307 100644 --- a/sky/serve/controller.py +++ b/sky/serve/controller.py @@ -2,6 +2,7 @@ Responsible for autoscaling and replica management. """ +import contextlib import logging import threading import time @@ -49,7 +50,14 @@ def __init__(self, service_name: str, service_spec: serve.SkyServiceSpec, autoscalers.Autoscaler.from_spec(service_name, service_spec)) self._host = host self._port = port - self._app = fastapi.FastAPI() + self._app = fastapi.FastAPI(lifespan=self.lifespan) + + @contextlib.asynccontextmanager + async def lifespan(self, _: fastapi.FastAPI): + uvicorn_access_logger = logging.getLogger('uvicorn.access') + for handler in uvicorn_access_logger.handlers: + handler.setFormatter(sky_logging.FORMATTER) + yield def _run_autoscaler(self): logger.info('Starting autoscaler.') @@ -142,12 +150,6 @@ async def update_service(request: fastapi.Request): f'{common_utils.format_exception(e)}') return {'message': 'Error'} - @self._app.on_event('startup') - def configure_logger(): - uvicorn_access_logger = logging.getLogger('uvicorn.access') - for handler in uvicorn_access_logger.handlers: - handler.setFormatter(sky_logging.FORMATTER) - threading.Thread(target=self._run_autoscaler).start() logger.info('SkyServe Controller started on ' From f4886bed755a3a6ba62554ef359fbe1dcd174d78 Mon Sep 17 00:00:00 2001 From: krishnived <81918756+krishnived@users.noreply.github.com> Date: Sat, 5 Oct 2024 17:31:34 -0500 Subject: [PATCH 3/5] Fixed Typo in Readme.md (#4039) Update README.md fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a5287dbb3cd..dc7de3ea574 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ ---- :fire: *News* :fire: -- [Sep, 2024] Point, Launch and Serve **Llama 3.2** on on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/) +- [Sep, 2024] Point, Launch and Serve **Llama 3.2** on Kubernetes or Any Cloud: [**example**](./llm/llama-3_2/) - [Sep, 2024] Run and deploy [**Pixtral**](./llm/pixtral), the first open-source multimodal model from Mistral AI. - [Jul, 2024] [**Finetune**](./llm/llama-3_1-finetuning/) and [**serve**](./llm/llama-3_1/) **Llama 3.1** on your infra - [Jun, 2024] Reproduce **GPT** with [llm.c](https://github.com/karpathy/llm.c/discussions/481) on any cloud: [**guide**](./llm/gpt-2/) From b0a1ea2c54612a17569f80560445336e64c6821f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 6 Oct 2024 10:44:33 -0700 Subject: [PATCH 4/5] [docs] Add docs for internal load balancers on k8s (#4028) * Add internal ports docs * Add internal ports docs * Add internal ports docs * Add internal ports docs --- .../reference/kubernetes/kubernetes-ports.rst | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/docs/source/reference/kubernetes/kubernetes-ports.rst b/docs/source/reference/kubernetes/kubernetes-ports.rst index 0f538363131..3824b651717 100644 --- a/docs/source/reference/kubernetes/kubernetes-ports.rst +++ b/docs/source/reference/kubernetes/kubernetes-ports.rst @@ -1,7 +1,7 @@ .. _kubernetes-ports: Exposing Services on Kubernetes -------------------------------- +=============================== .. note:: This is a guide on how to configure an existing Kubernetes cluster (along with the caveats involved) to successfully expose ports and services externally through SkyPilot. @@ -23,7 +23,7 @@ If your cluster does not support LoadBalancer services, SkyPilot can also use `a .. _kubernetes-loadbalancer: LoadBalancer Service -^^^^^^^^^^^^^^^^^^^^ +-------------------- This mode exposes ports through a Kubernetes `LoadBalancer Service `__. This is the default mode used by SkyPilot. @@ -52,11 +52,53 @@ These load balancers will be automatically terminated when the cluster is delete To work around this issue, make sure all your ports have services running behind them. +Internal Load Balancers +^^^^^^^^^^^^^^^^^^^^^^^ + +To restrict your services to be accessible only within the cluster, you can set all SkyPilot services to use `internal load balancers `_. + +Depending on your cloud, set the appropriate annotation in the SkyPilot config file (``~/.sky/config.yaml``): + +.. tab-set:: + + .. tab-item:: GCP + :sync: internal-lb-gke + + .. code-block:: yaml + + # ~/.sky/config.yaml + kubernetes: + custom_metadata: + annotations: + networking.gke.io/load-balancer-type: "Internal" + + .. tab-item:: AWS + :sync: internal-lb-aws + + .. code-block:: yaml + + # ~/.sky/config.yaml + kubernetes: + custom_metadata: + annotations: + service.beta.kubernetes.io/aws-load-balancer-internal: "true" + + .. tab-item:: Azure + :sync: internal-lb-azure + + .. code-block:: yaml + + # ~/.sky/config.yaml + kubernetes: + custom_metadata: + annotations: + service.beta.kubernetes.io/azure-load-balancer-internal: "true" + .. _kubernetes-ingress: Nginx Ingress -^^^^^^^^^^^^^ +------------- This mode exposes ports by creating a Kubernetes `Ingress `_ backed by an existing `Nginx Ingress Controller `_. From d5b6d89c83ea1ee7258f68314da4c6f8add83e04 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Sun, 6 Oct 2024 13:30:14 -0700 Subject: [PATCH 5/5] Fix error handling in service update process (#4034) * Fix error handling in service update process Fixes #4030 Address error handling inconsistency in service update process. * **sky/serve/controller.py** - Modify `/controller/update_service` endpoint to return appropriate HTTP status codes. - Return 400 for client errors and 500 for server errors. - Use `responses.JSONResponse` for returning responses. * **sky/serve/serve_utils.py** - Update `update_service_encoded` function to handle different status codes. - Raise exceptions based on the response body for 400 and 500 status codes. * **sky/utils/subprocess_utils.py** - Add `stream_logs` parameter in the comment to reflect the code. * format * apply to load_balancer_sync for consistency --- sky/serve/controller.py | 21 ++++++++++++++------- sky/serve/serve_utils.py | 4 ++++ sky/utils/subprocess_utils.py | 1 + 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/sky/serve/controller.py b/sky/serve/controller.py index 5d49c1aa307..580964273ef 100644 --- a/sky/serve/controller.py +++ b/sky/serve/controller.py @@ -10,6 +10,7 @@ from typing import Any, Dict, List import fastapi +from fastapi import responses import uvicorn from sky import serve @@ -96,7 +97,8 @@ def _run_autoscaler(self): def run(self) -> None: @self._app.post('/controller/load_balancer_sync') - async def load_balancer_sync(request: fastapi.Request): + async def load_balancer_sync( + request: fastapi.Request) -> fastapi.Response: request_data = await request.json() # TODO(MaoZiming): Check aggregator type. request_aggregator: Dict[str, Any] = request_data.get( @@ -104,18 +106,21 @@ async def load_balancer_sync(request: fastapi.Request): timestamps: List[int] = request_aggregator.get('timestamps', []) logger.info(f'Received {len(timestamps)} inflight requests.') self._autoscaler.collect_request_information(request_aggregator) - return { + return responses.JSONResponse(content={ 'ready_replica_urls': self._replica_manager.get_active_replica_urls() - } + }, + status_code=200) @self._app.post('/controller/update_service') - async def update_service(request: fastapi.Request): + async def update_service(request: fastapi.Request) -> fastapi.Response: request_data = await request.json() try: version = request_data.get('version', None) if version is None: - return {'message': 'Error: version is not specified.'} + return responses.JSONResponse( + content={'message': 'Error: version is not specified.'}, + status_code=400) update_mode_str = request_data.get( 'mode', serve_utils.DEFAULT_UPDATE_MODE.value) update_mode = serve_utils.UpdateMode(update_mode_str) @@ -144,11 +149,13 @@ async def update_service(request: fastapi.Request): self._autoscaler.update_version(version, service, update_mode=update_mode) - return {'message': 'Success'} + return responses.JSONResponse(content={'message': 'Success'}, + status_code=200) except Exception as e: # pylint: disable=broad-except logger.error(f'Error in update_service: ' f'{common_utils.format_exception(e)}') - return {'message': 'Error'} + return responses.JSONResponse(content={'message': 'Error'}, + status_code=500) threading.Thread(target=self._run_autoscaler).start() diff --git a/sky/serve/serve_utils.py b/sky/serve/serve_utils.py index 4a6467a6a32..0ecf34135a7 100644 --- a/sky/serve/serve_utils.py +++ b/sky/serve/serve_utils.py @@ -302,6 +302,10 @@ def update_service_encoded(service_name: str, version: int, mode: str) -> str: raise ValueError('The service is up-ed in an old version and does not ' 'support update. Please `sky serve down` ' 'it first and relaunch the service. ') + elif resp.status_code == 400: + raise ValueError(f'Client error during service update: {resp.text}') + elif resp.status_code == 500: + raise RuntimeError(f'Server error during service update: {resp.text}') elif resp.status_code != 200: raise ValueError(f'Failed to update service: {resp.text}') diff --git a/sky/utils/subprocess_utils.py b/sky/utils/subprocess_utils.py index d1779352a81..303e3ddad99 100644 --- a/sky/utils/subprocess_utils.py +++ b/sky/utils/subprocess_utils.py @@ -77,6 +77,7 @@ def handle_returncode(returncode: int, command: The command that was run. error_msg: The error message to print. stderr: The stderr of the command. + stream_logs: Whether to stream logs. """ echo = logger.error if stream_logs else logger.debug if returncode != 0: