Skip to content
Merged
2 changes: 1 addition & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ pip install -e .
Currently, this will install lightweight tools for:
- Analyzing prefix-structured data (`datagen analyze`)
- Synthesizing structured data customizable for testing purposes (`datagen synthesize`)
Detailed information are provided in the `data_generator` directory.
Detailed information are provided in the `prefix_data_generator` directory.

The benchmarking scripts for the core dynamo components are to come soon (e.g. routing, disagg, Planner).
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from data_generator.cli import main as cli_main
from prefix_data_generator.cli import main as cli_main


def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ def main():

if args.command == "analyze":
# Import and run the analyzer main
from data_generator import prefix_analyzer
from prefix_data_generator import prefix_analyzer

sys.argv = [sys.argv[0]] + remaining
prefix_analyzer.main()
elif args.command == "synthesize":
# Import and run the synthesizer main
from data_generator import synthesizer
from prefix_data_generator import synthesizer

sys.argv = [sys.argv[0]] + remaining
synthesizer.main()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
import tempfile

import requests
from data_generator.hasher import hashes_to_texts
from data_generator.synthesizer import Synthesizer
from prefix_data_generator.hasher import hashes_to_texts
from prefix_data_generator.synthesizer import Synthesizer

# download the mooncake trace file
mooncake_trace_permalink = "https://raw.githubusercontent.com/kvcache-ai/Mooncake/f09c501b2a5d73e4d60cdeb612d7d0d54e1ec228/mooncake_trace.jsonl"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

import networkx as nx
import numpy as np
from data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from data_generator.sampler import get_cdf
from prefix_data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from prefix_data_generator.sampler import get_cdf


def _verify_tree(G: nx.DiGraph) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import json
from collections import Counter

from data_generator.logging_utils import calculate_and_print_statistics
from prefix_data_generator.logging_utils import calculate_and_print_statistics


class PrefixAnalyzer:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
import networkx as nx
import numpy as np
import pandas as pd
from data_generator.graph_utils import (
from prefix_data_generator.graph_utils import (
_mark_visited,
_merge_chains,
_precompute_transition_cdfs,
_remove_leaves,
_verify_tree,
)
from data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from data_generator.sampler import EmpiricalSampler, sample_from_cdf
from prefix_data_generator.protocols import CACHE_END, END_NODE, SUPER_ROOT
from prefix_data_generator.sampler import EmpiricalSampler, sample_from_cdf


class Synthesizer:
Expand Down Expand Up @@ -334,7 +334,7 @@ def main():
import argparse
from pathlib import Path

from data_generator.logging_utils import calculate_and_print_statistics
from prefix_data_generator.logging_utils import calculate_and_print_statistics

parser = argparse.ArgumentParser(description="Synthesize Mooncake-Esque dataset")
parser.add_argument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import random

import pytest
from data_generator.hasher import hashes_to_texts, texts_to_hashes
from prefix_data_generator.hasher import hashes_to_texts, texts_to_hashes
from tokenizers import Tokenizer, decoders, models, normalizers, pre_tokenizers
from transformers import AutoTokenizer, PreTrainedTokenizerFast

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from collections import Counter

import numpy as np
from data_generator.sampler import EmpiricalSampler
from prefix_data_generator.sampler import EmpiricalSampler


def test_empirical_sampler_distribution():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import tempfile
import unittest

from data_generator.synthesizer import Synthesizer
from prefix_data_generator.synthesizer import Synthesizer


# Helper function to create and dump data
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ dependencies = [
]

[project.scripts]
datagen = "data_generator.cli:main"
datagen = "prefix_data_generator.cli:main"

[project.urls]
Repository = "https://github.com/ai-dynamo/dynamo.git"
Expand All @@ -59,10 +59,10 @@ requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
packages = ["data_generator"]
packages = ["prefix_data_generator"]

[tool.setuptools.package-data]
data_generator = ["**/*.py"]
prefix_data_generator = ["**/*.py"]

[tool.mypy]
explicit_package_bases = true
Expand Down
123 changes: 123 additions & 0 deletions benchmarks/sin_load_generator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<!--
SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
-->

# Sinusoidal Load Generator

`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf/genai_perf).

## Usage

```bash
cd benchmarks/sin_load_generator
python sin_synth.py [OPTIONS]
```

### Basic Options

- `--block-size INT` (default: 512)
- Block size for hashing, since there is no prefix caching, the block size does not need to be the same as the engine's KV block size.

- `--total-blocks INT` (default: 10000)
- ISL prompt blocks are randomly sampled from this range. Use a larger number to reduce the chance of duplicated prompts.

- `--output-file STR` (default: auto-generated)
- Output file name (in jsonl format)
- If not specified, the script will generate a filename based on parameters

- `--time-duration INT` (default: 100)
- Total time duration of the dataset in seconds

- `--process-interval INT` (default: 1)
- Sampling interval used to generate the dataset
- Smaller interval leads to more precise changes in request rate and isl/osl ratio but longer generation time.

### Request Rate Parameters

The request rate follows a sinusoidal pattern:
```
request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * π / period * t - π / 2)
```

Note the phase shift of `-π/2` is to make the request rate start from the minimum at `t = 0`.

- `--request-rate-min FLOAT` (default: 5)
- Minimum request rate in requests per second

- `--request-rate-max FLOAT` (default: 10)
- Maximum request rate in requests per second

- `--request-rate-period FLOAT` (default: 10)
- Period of the sinusoidal request rate in seconds

### Input/Output Sequence Length Parameters

The script will generate load with requests sampled from two preset ISL/OSL combinations.
The ISL/OSL ratio defines how much of requests follow the first preset ISL/OSL pattern. ISl/OSL 0 means all requests follow the first preset ISL/OSL pattern, while ISL/OSL 1 means all requests follow the second preset ISL/OSL pattern.

The ISL/OSL ratio follows a sinusoidal pattern:
```
isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * π / period * t - π / 2)
```

Similarly, the phase shift of `-π/2` is to make the ISL/OSL ratio start from the minimum at `t = 0`.

- `--isl1 INT` (default: 100)
- Minimum input sequence length

- `--osl1 INT` (default: 2000)
- Minimum output sequence length

- `--isl2 INT` (default: 5000)
- Maximum input sequence length

- `--osl2 INT` (default: 100)
- Maximum output sequence length

- `--isl-osl-ratio-min FLOAT` (default: 0.2)
- Minimum ratio of input sequence length to output sequence length

- `--isl-osl-ratio-max FLOAT` (default: 0.8)
- Maximum ratio of input sequence length to output sequence length

- `--isl-osl-ratio-period FLOAT` (default: 10)
- Period of the sinusoidal input/output sequence length ratio

### Examples

#### Varying Request Rate with Fixed ISL/OSL Ratio

```bash
python sin_synth.py \
--time-duration 60 \
--request-rate-min 2 \
--request-rate-max 8 \
--request-rate-period 20 \
--isl1 3000 \
--osl1 150 \
--isl2 3000 \
--osl2 150 \
--output-file dataset.jsonl
```

This generates a 60-second dataset with request rates varying between 2-8 requests/second over a 20-second period, with 3000 ISL and 150 OSL. The ISL/OSL ratio is fixed at 0.2.

#### Varying ISL/OSL Ratio with Fixed Request Rate

```bash
python sin_synth.py \
--time-duration 60 \
--request-rate-min 5 \
--request-rate-max 5 \
--isl1 3000 \
--osl1 150 \
--isl2 500 \
--osl2 2000 \
--isl-osl-ratio-min 0.2 \
--isl-osl-ratio-max 0.8 \
--isl-osl-ratio-period 20 \
--output-file dataset.jsonl
```

This generates a 60-second dataset with request rate fixed at 5 requests/second, with ISL/OSL ratio varying between 0.2 and 0.8 between I3000O150 and I500O2000over a 20-second period.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def main(args):
def get_isl_osl(t):
isl_osl_ratio = (args.isl_osl_ratio_min + args.isl_osl_ratio_max) / 2 + (
args.isl_osl_ratio_max - args.isl_osl_ratio_min
) / 2 * np.sin(2 * np.pi / args.isl_osl_ratio_period * t)
) / 2 * np.sin(2 * np.pi / args.isl_osl_ratio_period * t - np.pi / 2)
logger.info(f"isl_osl_ratio at {t:.2f}: {isl_osl_ratio:.2f}")
if np.random.uniform(0, 1) < isl_osl_ratio:
return (args.isl1, args.osl1)
Expand All @@ -43,7 +43,7 @@ def get_isl_osl(t):
t_e = min(t + args.process_interval, args.time_duration)
request_rate = (args.request_rate_min + args.request_rate_max) / 2 + (
args.request_rate_max - args.request_rate_min
) / 2 * np.sin(2 * np.pi / args.request_rate_period * t)
) / 2 * np.sin(2 * np.pi / args.request_rate_period * t - np.pi / 2)
logger.info(f"request_rate at {t:.2f}: {request_rate:.2f}")
num_requests = np.random.poisson(request_rate * (t_e - t))
for req_idx in range(num_requests):
Expand Down Expand Up @@ -100,7 +100,8 @@ def get_isl_osl(t):
# request rate parameters
# for the process interval at [t, t + process_interval), the number of requests to generate is sampled
# from a poison distribution with the following parameters:
# request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t)
# request_rate(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t - pi / 2)
# the phase shift is pi / 2 to make the request rate start from the minimum at t = 0
# num_requests[t, t + process_interval) ~ Poisson(request_rate(t) * process_interval)
# requests are uniformly distributed in the interval [t, t + process_interval)
parser.add_argument(
Expand All @@ -125,7 +126,7 @@ def get_isl_osl(t):
# isl/osl parameters
# isl/osl is randomly sampled from two candidates following the isl-osl-ratio.
# at time t, the isl-osl-ratio is calculated as:
# isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t)
# isl-osl-ratio(t) = (min + max) / 2 + (max - min) / 2 * sin(2 * pi / period * t - pi / 2)
# Then, we sample [isl1/osl1, isl2/osl2] from the distribution [isl-osl-ratio(t), 1 - isl-osl-ratio(t)]
parser.add_argument(
"--isl1", type=int, default=100, help="Minimum input sequence length"
Expand Down
Loading
Loading