Skip to content

Commit

Permalink
[serve] Replace uuid.uuid4() with getrandbits (#49537)
Browse files Browse the repository at this point in the history
<!-- Thank you for your contribution! Please review
https://github.com/ray-project/ray/blob/master/CONTRIBUTING.rst before
opening a pull request. -->

<!-- Please add a reviewer to the assignee section when you create a PR.
If you don't have the access to it, we will shortly find a reviewer and
assign them to your PR. -->

## Why are these changes needed?

Reduces CPU overhead (particularly on the proxy). This is less
cryptographically secure but should be OK for our use case.

App:
```python
from ray import serve

@serve.deployment(
    max_ongoing_requests=100,
    num_replicas=16,
    ray_actor_options={"num_cpus": 0},
)
class A:
    def __call__(self):
        return b"hi"

app = A.bind()
```

Benchmark:
```
ab -n 10000 -c 100 http://127.0.0.1:8000/
```

Before (~780 qps):
```
Concurrency Level:      100
Time taken for tests:   12.747 seconds
Complete requests:      10000
Failed requests:        0
Total transferred:      1910000 bytes
HTML transferred:       120000 bytes
Requests per second:    784.47 [#/sec] (mean)
Time per request:       127.475 [ms] (mean)
Time per request:       1.275 [ms] (mean, across all concurrent requests)
Transfer rate:          146.32 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    0   0.6      0      21
Processing:     5  127  35.7    127     305
Waiting:        3  125  35.8    126     304
Total:          5  127  35.6    128     306

Percentage of the requests served within a certain time (ms)
  50%    128
  66%    138
  75%    147
  80%    153
  90%    170
  95%    188
  98%    210
  99%    224
 100%    306 (longest request)
```

After (~820 qps):
```
Concurrency Level:      100
Time taken for tests:   12.130 seconds
Complete requests:      10000
Failed requests:        0
Total transferred:      1910000 bytes
HTML transferred:       120000 bytes
Requests per second:    824.44 [#/sec] (mean)
Time per request:       121.295 [ms] (mean)
Time per request:       1.213 [ms] (mean, across all concurrent requests)
Transfer rate:          153.78 [Kbytes/sec] received

Connection Times (ms)
              min  mean[+/-sd] median   max
Connect:        0    0   0.5      0       4
Processing:     6  121  30.1    124     230
Waiting:        4  119  30.2    123     228
Total:          7  121  30.0    124     230

Percentage of the requests served within a certain time (ms)
  50%    124
  66%    132
  75%    138
  80%    144
  90%    157
  95%    167
  98%    181
  99%    189
 100%    230 (longest request)
```

## Related issue number

<!-- For example: "Closes #1234" -->

## Checks

- [ ] I've signed off every commit(by using the -s flag, i.e., `git
commit -s`) in this PR.
- [ ] I've run `scripts/format.sh` to lint the changes in this PR.
- [ ] I've included any doc changes needed for
https://docs.ray.io/en/master/.
- [ ] I've added any new APIs to the API Reference. For example, if I
added a
method in Tune, I've added it in `doc/source/tune/api/` under the
           corresponding `.rst` file.
- [ ] I've made sure the tests are passing. Note that there might be a
few flaky tests, see the recent failures at https://flakey-tests.ray.io/
- Testing Strategy
   - [ ] Unit tests
   - [ ] Release tests
   - [ ] This PR is not tested :(

---------

Signed-off-by: Edward Oakes <ed.nmi.oakes@gmail.com>
  • Loading branch information
edoakes authored Jan 2, 2025
1 parent 3ffca28 commit d7ad9a5
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 11 deletions.
5 changes: 2 additions & 3 deletions python/ray/serve/_private/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import threading
import time
import uuid
from abc import ABC, abstractmethod
from collections import defaultdict
from contextlib import contextmanager
Expand Down Expand Up @@ -31,7 +30,7 @@
from ray.serve._private.metrics_utils import InMemoryMetricsStore, MetricsPusher
from ray.serve._private.replica_result import ReplicaResult
from ray.serve._private.replica_scheduler import PendingRequest, ReplicaScheduler
from ray.serve._private.utils import resolve_deployment_response
from ray.serve._private.utils import generate_request_id, resolve_deployment_response
from ray.serve.config import AutoscalingConfig
from ray.serve.exceptions import BackPressureError
from ray.util import metrics
Expand Down Expand Up @@ -564,7 +563,7 @@ async def assign_request(
) -> ReplicaResult:
"""Assign a request to a replica and return the resulting object_ref."""

response_id = uuid.uuid4()
response_id = generate_request_id()
assign_request_task = asyncio.current_task()
ray.serve.context._add_request_pending_assignment(
request_meta.internal_request_id, response_id, assign_request_task
Expand Down
6 changes: 5 additions & 1 deletion python/ray/serve/_private/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,11 @@ def get_capacity_adjusted_num_replicas(


def generate_request_id() -> str:
return str(uuid.uuid4())
# NOTE(edoakes): we use random.getrandbits because it reduces CPU overhead
# significantly. This is less cryptographically secure but should be ok for
# request ID generation.
# See https://bugs.python.org/issue45556 for discussion.
return str(uuid.UUID(int=random.getrandbits(128), version=4))


def inside_ray_client_context() -> bool:
Expand Down
5 changes: 3 additions & 2 deletions python/ray/serve/tests/test_http_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import ray
from ray import serve
from ray.serve._private.utils import generate_request_id


def test_request_id_header_by_default(serve_instance):
Expand Down Expand Up @@ -154,10 +155,10 @@ async def main():
"""Sending 20 requests in parallel all with the same request id, but with
different request body.
"""
bodies = [{"app_name": f"an_{uuid.uuid4()}"} for _ in range(20)]
bodies = [{"app_name": f"an_{generate_request_id()}"} for _ in range(20)]
connector = TCPConnector(ssl=False)
async with aiohttp.ClientSession(connector=connector) as session:
request_id = f"rid_{uuid.uuid4()}"
request_id = f"rid_{generate_request_id()}"
tasks = [
send_request(session, body, request_id=request_id) for body in bodies
]
Expand Down
10 changes: 5 additions & 5 deletions python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import random
import sys
import time
import uuid
from typing import Optional, Set

import pytest
Expand All @@ -29,6 +28,7 @@
)
from ray.serve._private.replica_scheduler.pow_2_scheduler import ReplicaQueueLengthCache
from ray.serve._private.test_utils import MockTimer
from ray.serve._private.utils import generate_request_id

TIMER = MockTimer()

Expand Down Expand Up @@ -184,8 +184,8 @@ def fake_pending_request(
args=list(),
kwargs=dict(),
metadata=RequestMetadata(
request_id=str(uuid.uuid4()),
internal_request_id=str(uuid.uuid4()),
request_id=generate_request_id(),
internal_request_id=generate_request_id(),
multiplexed_model_id=model_id,
),
created_at=created_at,
Expand All @@ -195,8 +195,8 @@ def fake_pending_request(
args=list(),
kwargs=dict(),
metadata=RequestMetadata(
request_id=str(uuid.uuid4()),
internal_request_id=str(uuid.uuid4()),
request_id=generate_request_id(),
internal_request_id=generate_request_id(),
multiplexed_model_id=model_id,
),
)
Expand Down

0 comments on commit d7ad9a5

Please sign in to comment.