diff --git a/python/ray/serve/_private/router.py b/python/ray/serve/_private/router.py index 85d391c95d52..38d08c8591c3 100644 --- a/python/ray/serve/_private/router.py +++ b/python/ray/serve/_private/router.py @@ -3,7 +3,6 @@ import logging import threading import time -import uuid from abc import ABC, abstractmethod from collections import defaultdict from contextlib import contextmanager @@ -31,7 +30,7 @@ from ray.serve._private.metrics_utils import InMemoryMetricsStore, MetricsPusher from ray.serve._private.replica_result import ReplicaResult from ray.serve._private.replica_scheduler import PendingRequest, ReplicaScheduler -from ray.serve._private.utils import resolve_deployment_response +from ray.serve._private.utils import generate_request_id, resolve_deployment_response from ray.serve.config import AutoscalingConfig from ray.serve.exceptions import BackPressureError from ray.util import metrics @@ -564,7 +563,7 @@ async def assign_request( ) -> ReplicaResult: """Assign a request to a replica and return the resulting object_ref.""" - response_id = uuid.uuid4() + response_id = generate_request_id() assign_request_task = asyncio.current_task() ray.serve.context._add_request_pending_assignment( request_meta.internal_request_id, response_id, assign_request_task diff --git a/python/ray/serve/_private/utils.py b/python/ray/serve/_private/utils.py index 1193f7722b63..c742fe4aad5c 100644 --- a/python/ray/serve/_private/utils.py +++ b/python/ray/serve/_private/utils.py @@ -542,7 +542,11 @@ def get_capacity_adjusted_num_replicas( def generate_request_id() -> str: - return str(uuid.uuid4()) + # NOTE(edoakes): we use random.getrandbits because it reduces CPU overhead + # significantly. This is less cryptographically secure but should be ok for + # request ID generation. + # See https://bugs.python.org/issue45556 for discussion. + return str(uuid.UUID(int=random.getrandbits(128), version=4)) def inside_ray_client_context() -> bool: diff --git a/python/ray/serve/tests/test_http_headers.py b/python/ray/serve/tests/test_http_headers.py index 22ddb9c530fc..b85e9816264d 100644 --- a/python/ray/serve/tests/test_http_headers.py +++ b/python/ray/serve/tests/test_http_headers.py @@ -11,6 +11,7 @@ import ray from ray import serve +from ray.serve._private.utils import generate_request_id def test_request_id_header_by_default(serve_instance): @@ -154,10 +155,10 @@ async def main(): """Sending 20 requests in parallel all with the same request id, but with different request body. """ - bodies = [{"app_name": f"an_{uuid.uuid4()}"} for _ in range(20)] + bodies = [{"app_name": f"an_{generate_request_id()}"} for _ in range(20)] connector = TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=connector) as session: - request_id = f"rid_{uuid.uuid4()}" + request_id = f"rid_{generate_request_id()}" tasks = [ send_request(session, body, request_id=request_id) for body in bodies ] diff --git a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py index ecb5efbdf6e6..348bb8377bb6 100644 --- a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py +++ b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py @@ -4,7 +4,6 @@ import random import sys import time -import uuid from typing import Optional, Set import pytest @@ -29,6 +28,7 @@ ) from ray.serve._private.replica_scheduler.pow_2_scheduler import ReplicaQueueLengthCache from ray.serve._private.test_utils import MockTimer +from ray.serve._private.utils import generate_request_id TIMER = MockTimer() @@ -184,8 +184,8 @@ def fake_pending_request( args=list(), kwargs=dict(), metadata=RequestMetadata( - request_id=str(uuid.uuid4()), - internal_request_id=str(uuid.uuid4()), + request_id=generate_request_id(), + internal_request_id=generate_request_id(), multiplexed_model_id=model_id, ), created_at=created_at, @@ -195,8 +195,8 @@ def fake_pending_request( args=list(), kwargs=dict(), metadata=RequestMetadata( - request_id=str(uuid.uuid4()), - internal_request_id=str(uuid.uuid4()), + request_id=generate_request_id(), + internal_request_id=generate_request_id(), multiplexed_model_id=model_id, ), )