From d7ad9a5b346a25bc271b0818f4679990e49a849f Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Thu, 2 Jan 2025 11:11:26 -0600 Subject: [PATCH] [serve] Replace `uuid.uuid4()` with `getrandbits` (#49537) ## Why are these changes needed? Reduces CPU overhead (particularly on the proxy). This is less cryptographically secure but should be OK for our use case. App: ```python from ray import serve @serve.deployment( max_ongoing_requests=100, num_replicas=16, ray_actor_options={"num_cpus": 0}, ) class A: def __call__(self): return b"hi" app = A.bind() ``` Benchmark: ``` ab -n 10000 -c 100 http://127.0.0.1:8000/ ``` Before (~780 qps): ``` Concurrency Level: 100 Time taken for tests: 12.747 seconds Complete requests: 10000 Failed requests: 0 Total transferred: 1910000 bytes HTML transferred: 120000 bytes Requests per second: 784.47 [#/sec] (mean) Time per request: 127.475 [ms] (mean) Time per request: 1.275 [ms] (mean, across all concurrent requests) Transfer rate: 146.32 [Kbytes/sec] received Connection Times (ms) min mean[+/-sd] median max Connect: 0 0 0.6 0 21 Processing: 5 127 35.7 127 305 Waiting: 3 125 35.8 126 304 Total: 5 127 35.6 128 306 Percentage of the requests served within a certain time (ms) 50% 128 66% 138 75% 147 80% 153 90% 170 95% 188 98% 210 99% 224 100% 306 (longest request) ``` After (~820 qps): ``` Concurrency Level: 100 Time taken for tests: 12.130 seconds Complete requests: 10000 Failed requests: 0 Total transferred: 1910000 bytes HTML transferred: 120000 bytes Requests per second: 824.44 [#/sec] (mean) Time per request: 121.295 [ms] (mean) Time per request: 1.213 [ms] (mean, across all concurrent requests) Transfer rate: 153.78 [Kbytes/sec] received Connection Times (ms) min mean[+/-sd] median max Connect: 0 0 0.5 0 4 Processing: 6 121 30.1 124 230 Waiting: 4 119 30.2 123 228 Total: 7 121 30.0 124 230 Percentage of the requests served within a certain time (ms) 50% 124 66% 132 75% 138 80% 144 90% 157 95% 167 98% 181 99% 189 100% 230 (longest request) ``` ## Related issue number ## Checks - [ ] I've signed off every commit(by using the -s flag, i.e., `git commit -s`) in this PR. - [ ] I've run `scripts/format.sh` to lint the changes in this PR. - [ ] I've included any doc changes needed for https://docs.ray.io/en/master/. - [ ] I've added any new APIs to the API Reference. For example, if I added a method in Tune, I've added it in `doc/source/tune/api/` under the corresponding `.rst` file. - [ ] I've made sure the tests are passing. Note that there might be a few flaky tests, see the recent failures at https://flakey-tests.ray.io/ - Testing Strategy - [ ] Unit tests - [ ] Release tests - [ ] This PR is not tested :( --------- Signed-off-by: Edward Oakes --- python/ray/serve/_private/router.py | 5 ++--- python/ray/serve/_private/utils.py | 6 +++++- python/ray/serve/tests/test_http_headers.py | 5 +++-- .../serve/tests/unit/test_pow_2_replica_scheduler.py | 10 +++++----- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/python/ray/serve/_private/router.py b/python/ray/serve/_private/router.py index 85d391c95d52..38d08c8591c3 100644 --- a/python/ray/serve/_private/router.py +++ b/python/ray/serve/_private/router.py @@ -3,7 +3,6 @@ import logging import threading import time -import uuid from abc import ABC, abstractmethod from collections import defaultdict from contextlib import contextmanager @@ -31,7 +30,7 @@ from ray.serve._private.metrics_utils import InMemoryMetricsStore, MetricsPusher from ray.serve._private.replica_result import ReplicaResult from ray.serve._private.replica_scheduler import PendingRequest, ReplicaScheduler -from ray.serve._private.utils import resolve_deployment_response +from ray.serve._private.utils import generate_request_id, resolve_deployment_response from ray.serve.config import AutoscalingConfig from ray.serve.exceptions import BackPressureError from ray.util import metrics @@ -564,7 +563,7 @@ async def assign_request( ) -> ReplicaResult: """Assign a request to a replica and return the resulting object_ref.""" - response_id = uuid.uuid4() + response_id = generate_request_id() assign_request_task = asyncio.current_task() ray.serve.context._add_request_pending_assignment( request_meta.internal_request_id, response_id, assign_request_task diff --git a/python/ray/serve/_private/utils.py b/python/ray/serve/_private/utils.py index 1193f7722b63..c742fe4aad5c 100644 --- a/python/ray/serve/_private/utils.py +++ b/python/ray/serve/_private/utils.py @@ -542,7 +542,11 @@ def get_capacity_adjusted_num_replicas( def generate_request_id() -> str: - return str(uuid.uuid4()) + # NOTE(edoakes): we use random.getrandbits because it reduces CPU overhead + # significantly. This is less cryptographically secure but should be ok for + # request ID generation. + # See https://bugs.python.org/issue45556 for discussion. + return str(uuid.UUID(int=random.getrandbits(128), version=4)) def inside_ray_client_context() -> bool: diff --git a/python/ray/serve/tests/test_http_headers.py b/python/ray/serve/tests/test_http_headers.py index 22ddb9c530fc..b85e9816264d 100644 --- a/python/ray/serve/tests/test_http_headers.py +++ b/python/ray/serve/tests/test_http_headers.py @@ -11,6 +11,7 @@ import ray from ray import serve +from ray.serve._private.utils import generate_request_id def test_request_id_header_by_default(serve_instance): @@ -154,10 +155,10 @@ async def main(): """Sending 20 requests in parallel all with the same request id, but with different request body. """ - bodies = [{"app_name": f"an_{uuid.uuid4()}"} for _ in range(20)] + bodies = [{"app_name": f"an_{generate_request_id()}"} for _ in range(20)] connector = TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=connector) as session: - request_id = f"rid_{uuid.uuid4()}" + request_id = f"rid_{generate_request_id()}" tasks = [ send_request(session, body, request_id=request_id) for body in bodies ] diff --git a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py index ecb5efbdf6e6..348bb8377bb6 100644 --- a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py +++ b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py @@ -4,7 +4,6 @@ import random import sys import time -import uuid from typing import Optional, Set import pytest @@ -29,6 +28,7 @@ ) from ray.serve._private.replica_scheduler.pow_2_scheduler import ReplicaQueueLengthCache from ray.serve._private.test_utils import MockTimer +from ray.serve._private.utils import generate_request_id TIMER = MockTimer() @@ -184,8 +184,8 @@ def fake_pending_request( args=list(), kwargs=dict(), metadata=RequestMetadata( - request_id=str(uuid.uuid4()), - internal_request_id=str(uuid.uuid4()), + request_id=generate_request_id(), + internal_request_id=generate_request_id(), multiplexed_model_id=model_id, ), created_at=created_at, @@ -195,8 +195,8 @@ def fake_pending_request( args=list(), kwargs=dict(), metadata=RequestMetadata( - request_id=str(uuid.uuid4()), - internal_request_id=str(uuid.uuid4()), + request_id=generate_request_id(), + internal_request_id=generate_request_id(), multiplexed_model_id=model_id, ), )