Skip to content
Open
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ The `guidellm benchmark` command is used to run benchmarks against a generative

- `prompt_tokens`: Average number of tokens for prompts.
- `output_tokens`: Average number of tokens for outputs.
- `TYPE_stdev`, `TYPE_min`, `TYPE_max`: Standard deviation, minimum, and maximum values for the specified type (e.g., `prompt_tokens`, `output_tokens`). If not provided, will use the provided tokens value only.
- `turns`: Average number of request-response pairs per sample. Values above `1` result in a multi-turn[^1] benchmark.
- `TYPE_stdev`, `TYPE_min`, `TYPE_max`: Standard deviation, minimum, and maximum values for the specified type (e.g., `prompt_tokens`, `output_tokens`, `turns`). If not provided, will use the provided tokens value only.
- `samples`: Number of samples to generate, defaults to 1000.
- `source`: Source text data for generation, defaults to a local copy of Pride and Prejudice.

Expand Down Expand Up @@ -261,3 +262,7 @@ If you find GuideLLM helpful in your research or projects, please consider citin
howpublished={\url{https://github.com/vllm-project/guidellm}},
}
```

- - -

[^1]: Multi-turn refers to a benchmark where each dataset row represents a series of sequential requests, with each subsequent request building upon the context of the previous ones.
4 changes: 2 additions & 2 deletions src/guidellm/benchmark/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
GenerationRequest,
GenerativeRequestLoaderDescription,
RequestLoaderDescription,
RequestT,
ResponseT,
)
from guidellm.scheduler import (
GenerativeRequestsWorkerDescription,
RequestT,
ResponseT,
SchedulerRequestResult,
WorkerDescription,
)
Expand Down
4 changes: 2 additions & 2 deletions src/guidellm/benchmark/benchmarker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
GenerationRequest,
GenerativeRequestLoaderDescription,
RequestLoaderDescription,
RequestT,
ResponseT,
)
from guidellm.scheduler import (
GenerativeRequestsWorker,
RequestsWorker,
RequestT,
ResponseT,
Scheduler,
SchedulerRequestResult,
SchedulingStrategy,
Expand Down
8 changes: 4 additions & 4 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ async def benchmark_generative_text(
),
random_seed=random_seed,
)
unique_requests = request_loader.num_unique_items(raise_err=False)
unique_samples = request_loader.num_unique_items(raise_err=False)
console.print_line(
f"Created loader with {unique_requests} unique requests from {data}.\n\n"
if unique_requests > 0
else f"Created loader with unknown number unique requests from {data}.\n\n"
f"Created loader with {unique_samples} unique samples from {data}.\n\n"
if unique_samples > 0
else f"Created loader with unknown number unique samples from {data}.\n\n"
)

profile = create_profile(rate_type=rate_type, rate=rate)
Expand Down
1 change: 1 addition & 0 deletions src/guidellm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class Settings(BaseSettings):
max_concurrency: int = 512
max_worker_processes: int = 10
max_add_requests_per_loop: int = 20
scheduler_start_delay: float = 5

# Data settings
dataset: DatasetSettings = DatasetSettings()
Expand Down
77 changes: 52 additions & 25 deletions src/guidellm/dataset/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import random
from collections.abc import Iterable, Iterator
from pathlib import Path
from typing import Any, Literal, Optional, Union
from typing import Any, Optional, TypedDict, Union

import yaml
from datasets import (
Expand Down Expand Up @@ -63,6 +63,26 @@ class SyntheticDatasetConfig(BaseModel):
gt=0,
default=None,
)
turns: int = Field(
description="The number of turns in the conversation.",
gt=0,
default=1,
)
turns_stdev: Optional[int] = Field(
description="The standard deviation of the number of turns.",
gt=0,
default=None,
)
turns_min: Optional[int] = Field(
description="The minimum number of turns in the conversation.",
gt=0,
default=None,
)
turns_max: Optional[int] = Field(
description="The maximum number of turns in the conversation.",
gt=0,
default=None,
)
samples: int = Field(
description="The number of samples to generate for the dataset.",
gt=0,
Expand Down Expand Up @@ -118,14 +138,13 @@ def parse_config_file(data: Union[str, Path]) -> "SyntheticDatasetConfig":
return SyntheticDatasetConfig(**config_dict)


class SyntheticTextItemsGenerator(
Iterable[
dict[
Literal["prompt", "prompt_tokens_count", "output_tokens_count"],
Union[str, int],
]
]
):
class SyntheticDatasetRow(TypedDict):
prompt: list[str]
prompt_tokens_count: list[int]
output_tokens_count: list[int]


class SyntheticTextItemsGenerator(Iterable[SyntheticDatasetRow]):
def __init__(
self,
config: SyntheticDatasetConfig,
Expand All @@ -141,12 +160,7 @@ def __init__(

def __iter__(
self,
) -> Iterator[
dict[
Literal["prompt", "prompt_tokens_count", "output_tokens_count"],
Union[str, int],
]
]:
) -> Iterator[SyntheticDatasetRow]:
prompt_tokens_sampler = IntegerRangeSampler(
average=self.config.prompt_tokens,
variance=self.config.prompt_tokens_stdev,
Expand All @@ -161,20 +175,33 @@ def __iter__(
max_value=self.config.output_tokens_max,
random_seed=self.random_seed + 1, # ensure diff dist from prompts
)
turns_sampler = IntegerRangeSampler(
average=self.config.turns,
variance=self.config.turns_stdev,
min_value=self.config.turns_min,
max_value=self.config.turns_max,
random_seed=self.random_seed + 7, # ensure diff dist
)
# ensure diff distribution from output tokens
rand = random.Random(self.random_seed + 2) # noqa: S311

for _, prompt_tokens, output_tokens in zip(
range(self.config.samples),
prompt_tokens_sampler,
output_tokens_sampler,
):
start_index = rand.randint(0, len(self.text_creator.words))
yield {
"prompt": self._create_prompt(prompt_tokens, start_index),
"prompt_tokens_count": prompt_tokens,
"output_tokens_count": output_tokens,
for _, turns in zip(range(self.config.samples), turns_sampler):
row: SyntheticDatasetRow = {
"prompt": [],
"prompt_tokens_count": [],
"output_tokens_count": [],
}
for _, prompt_tokens, output_tokens in zip(
range(turns),
prompt_tokens_sampler,
output_tokens_sampler,
):
start_index = rand.randint(0, len(self.text_creator.words))
row["prompt"].append(self._create_prompt(prompt_tokens, start_index))
row["prompt_tokens_count"].append(prompt_tokens)
row["output_tokens_count"].append(output_tokens)

yield row

def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
if prompt_tokens <= 0:
Expand Down
47 changes: 47 additions & 0 deletions src/guidellm/preprocess/item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from collections.abc import Sequence
from typing import Generic, Optional, TypeVar

from pydantic import Field

from guidellm.objects.pydantic import StandardBaseModel

PromptT = TypeVar("PromptT")


class Item(StandardBaseModel, Generic[PromptT]):
"""
Represents a single item in a dataset,
containing a prompt and its associated metadata.
"""

value: PromptT = Field(
description="The prompt text or data for the item.",
examples=[
"What is the capital of France?",
"Explain quantum computing in simple terms.",
],
)
prompt_tokens: Optional[int] = Field(
default=None, gt=0, description="Number of tokens in the prompt"
)
output_tokens: Optional[int] = Field(
default=None, gt=0, description="Number of tokens in the output"
)


class ItemList(Sequence[Item[PromptT]]):
"""
Represents a list of items, each containing a prompt and its metadata.
"""

shared_prefix: Optional[PromptT]

def __init__(self, *items: Item[PromptT], shared_prefix: Optional[PromptT] = None):
self.shared_prefix = shared_prefix
self._items = list(items)

def __getitem__(self, key):
return self._items[key]

def __len__(self) -> int:
return len(self._items)
6 changes: 6 additions & 0 deletions src/guidellm/request/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
RequestLoaderDescription,
)
from .request import GenerationRequest
from .session import GenerativeRequestSession, RequestSession
from .types import RequestT, ResponseT

__all__ = [
"GenerationRequest",
"GenerativeRequestLoader",
"GenerativeRequestLoaderDescription",
"GenerativeRequestSession",
"RequestLoader",
"RequestLoaderDescription",
"RequestSession",
"RequestT",
"ResponseT",
]
32 changes: 16 additions & 16 deletions src/guidellm/request/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
from transformers import PreTrainedTokenizerBase # type: ignore[import]

from guidellm.config import settings
from guidellm.dataset import ColumnInputTypes, load_dataset
from guidellm.objects import StandardBaseModel
from guidellm.request.request import GenerationRequest
from guidellm.preprocess.item import Item, ItemList
from guidellm.request.session import GenerativeRequestSession

__all__ = [
"GenerativeRequestLoader",
Expand All @@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):

class RequestLoader(Iterable):
@abstractmethod
def __iter__(self): ...
def __iter__(self) -> Iterator: ...

@abstractmethod
def __len__(self): ...
def __len__(self) -> int: ...

@property
@abstractmethod
Expand Down Expand Up @@ -105,14 +105,14 @@ def __init__(
self.preserve_iter_state = iter_type == "infinite" # ensure no caching requests
self._preserved_iter = None

def __iter__(self) -> Iterator[GenerationRequest]:
def __iter__(self) -> Iterator[GenerativeRequestSession]:
scope_create_count = 0

while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None:
scope_create_count += 1

for item in dataset_iter:
yield self._create_request(item)
yield GenerativeRequestSession(self._create_items(item))

self._preserved_iter = None

Expand Down Expand Up @@ -260,7 +260,8 @@ def _get_dataset_iter(

return dataset_iter

def _create_request(self, item: dict[str, Any]) -> GenerationRequest:
def _create_items(self, item: dict[str, Any]) -> ItemList:
prompts = item[self.column_mappings["prompt_column"]]
prompt_tokens = (
item[self.column_mappings["prompt_tokens_count_column"]]
if "prompt_tokens_count_column" in self.column_mappings
Expand All @@ -272,13 +273,12 @@ def _create_request(self, item: dict[str, Any]) -> GenerationRequest:
else None
)

return GenerationRequest(
request_type=settings.preferred_route,
content=item[self.column_mappings["prompt_column"]],
stats=(
{"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {}
),
constraints=(
{"output_tokens": output_tokens} if output_tokens is not None else {}
),
items = (
Item(value=prompt, output_tokens=out_t, prompt_tokens=in_t)
for prompt, in_t, out_t in zip(
prompts if isinstance(prompts, list) else [prompts],
prompt_tokens if isinstance(prompt_tokens, list) else [prompt_tokens],
output_tokens if isinstance(output_tokens, list) else [output_tokens],
)
)
return ItemList(*items)
Loading
Loading