[serve] Replace uuid.uuid4() with getrandbits (#49537)

## Why are these changes needed? Reduces CPU overhead (particularly on the proxy). This is less cryptographically secure but should be OK for our use case. App: ```python from ray import serve @serve.deployment( max_ongoing_requests=100, num_replicas=16, ray_actor_options={"num_cpus": 0}, ) class A: def __call__(self): return b"hi" app = A.bind() ``` Benchmark: ``` ab -n 10000 -c 100 http://127.0.0.1:8000/ ``` Before (~780 qps): ``` Concurrency Level: 100 Time taken for tests: 12.747 seconds Complete requests: 10000 Failed requests: 0 Total transferred: 1910000 bytes HTML transferred: 120000 bytes Requests per second: 784.47 [#/sec] (mean) Time per request: 127.475 [ms] (mean) Time per request: 1.275 [ms] (mean, across all concurrent requests) Transfer rate: 146.32 [Kbytes/sec] received Connection Times (ms) min mean[+/-sd] median max Connect: 0 0 0.6 0 21 Processing: 5 127 35.7 127 305 Waiting: 3 125 35.8 126 304 Total: 5 127 35.6 128 306 Percentage of the requests served within a certain time (ms) 50% 128 66% 138 75% 147 80% 153 90% 170 95% 188 98% 210 99% 224 100% 306 (longest request) ``` After (~820 qps): ``` Concurrency Level: 100 Time taken for tests: 12.130 seconds Complete requests: 10000 Failed requests: 0 Total transferred: 1910000 bytes HTML transferred: 120000 bytes Requests per second: 824.44 [#/sec] (mean) Time per request: 121.295 [ms] (mean) Time per request: 1.213 [ms] (mean, across all concurrent requests) Transfer rate: 153.78 [Kbytes/sec] received Connection Times (ms) min mean[+/-sd] median max Connect: 0 0 0.5 0 4 Processing: 6 121 30.1 124 230 Waiting: 4 119 30.2 123 228 Total: 7 121 30.0 124 230 Percentage of the requests served within a certain time (ms) 50% 124 66% 132 75% 138 80% 144 90% 157 95% 167 98% 181 99% 189 100% 230 (longest request) ``` ## Related issue number  ## Checks - [ ] I've signed off every commit(by using the -s flag, i.e., `git commit -s`) in this PR. - [ ] I've run `scripts/format.sh` to lint the changes in this PR. - [ ] I've included any doc changes needed for https://docs.ray.io/en/master/. - [ ] I've added any new APIs to the API Reference. For example, if I added a method in Tune, I've added it in `doc/source/tune/api/` under the corresponding `.rst` file. - [ ] I've made sure the tests are passing. Note that there might be a few flaky tests, see the recent failures at https://flakey-tests.ray.io/ - Testing Strategy - [ ] Unit tests - [ ] Release tests - [ ] This PR is not tested :( --------- Signed-off-by: Edward Oakes <ed.nmi.oakes@gmail.com>
ray-project · Jan 2, 2025 · d7ad9a5 · d7ad9a5
1 parent 3ffca28
commit d7ad9a5
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 11 deletions.
diff --git a/python/ray/serve/_private/router.py b/python/ray/serve/_private/router.py
@@ -3,7 +3,6 @@
 import logging
 import threading
 import time
-import uuid
 from abc import ABC, abstractmethod
 from collections import defaultdict
 from contextlib import contextmanager
@@ -31,7 +30,7 @@
 from ray.serve._private.metrics_utils import InMemoryMetricsStore, MetricsPusher
 from ray.serve._private.replica_result import ReplicaResult
 from ray.serve._private.replica_scheduler import PendingRequest, ReplicaScheduler
-from ray.serve._private.utils import resolve_deployment_response
+from ray.serve._private.utils import generate_request_id, resolve_deployment_response
 from ray.serve.config import AutoscalingConfig
 from ray.serve.exceptions import BackPressureError
 from ray.util import metrics
@@ -564,7 +563,7 @@ async def assign_request(
     ) -> ReplicaResult:
         """Assign a request to a replica and return the resulting object_ref."""
 
-        response_id = uuid.uuid4()
+        response_id = generate_request_id()
         assign_request_task = asyncio.current_task()
         ray.serve.context._add_request_pending_assignment(
             request_meta.internal_request_id, response_id, assign_request_task

diff --git a/python/ray/serve/_private/utils.py b/python/ray/serve/_private/utils.py
@@ -542,7 +542,11 @@ def get_capacity_adjusted_num_replicas(
 
 
 def generate_request_id() -> str:
-    return str(uuid.uuid4())
+    # NOTE(edoakes): we use random.getrandbits because it reduces CPU overhead
+    # significantly. This is less cryptographically secure but should be ok for
+    # request ID generation.
+    # See https://bugs.python.org/issue45556 for discussion.
+    return str(uuid.UUID(int=random.getrandbits(128), version=4))
 
 
 def inside_ray_client_context() -> bool:

diff --git a/python/ray/serve/tests/test_http_headers.py b/python/ray/serve/tests/test_http_headers.py
@@ -11,6 +11,7 @@
 
 import ray
 from ray import serve
+from ray.serve._private.utils import generate_request_id
 
 
 def test_request_id_header_by_default(serve_instance):
@@ -154,10 +155,10 @@ async def main():
         """Sending 20 requests in parallel all with the same request id, but with
         different request body.
         """
-        bodies = [{"app_name": f"an_{uuid.uuid4()}"} for _ in range(20)]
+        bodies = [{"app_name": f"an_{generate_request_id()}"} for _ in range(20)]
         connector = TCPConnector(ssl=False)
         async with aiohttp.ClientSession(connector=connector) as session:
-            request_id = f"rid_{uuid.uuid4()}"
+            request_id = f"rid_{generate_request_id()}"
             tasks = [
                 send_request(session, body, request_id=request_id) for body in bodies
             ]

diff --git a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py
@@ -4,7 +4,6 @@
 import random
 import sys
 import time
-import uuid
 from typing import Optional, Set
 
 import pytest
@@ -29,6 +28,7 @@
 )
 from ray.serve._private.replica_scheduler.pow_2_scheduler import ReplicaQueueLengthCache
 from ray.serve._private.test_utils import MockTimer
+from ray.serve._private.utils import generate_request_id
 
 TIMER = MockTimer()
 
@@ -184,8 +184,8 @@ def fake_pending_request(
             args=list(),
             kwargs=dict(),
             metadata=RequestMetadata(
-                request_id=str(uuid.uuid4()),
-                internal_request_id=str(uuid.uuid4()),
+                request_id=generate_request_id(),
+                internal_request_id=generate_request_id(),
                 multiplexed_model_id=model_id,
             ),
             created_at=created_at,
@@ -195,8 +195,8 @@ def fake_pending_request(
             args=list(),
             kwargs=dict(),
             metadata=RequestMetadata(
-                request_id=str(uuid.uuid4()),
-                internal_request_id=str(uuid.uuid4()),
+                request_id=generate_request_id(),
+                internal_request_id=generate_request_id(),
                 multiplexed_model_id=model_id,
             ),
         )