diff --git a/src/guidellm/executor/__init__.py b/src/guidellm/executor/__init__.py index 300b5bd2..0d17c2f3 100644 --- a/src/guidellm/executor/__init__.py +++ b/src/guidellm/executor/__init__.py @@ -1,17 +1,21 @@ from .executor import Executor from .profile_generator import ( + FixedRateProfileGenerator, Profile, ProfileGenerationMode, ProfileGenerator, - SingleProfileGenerator, SweepProfileGenerator, + rate_type_to_load_gen_mode, + rate_type_to_profile_mode, ) __all__ = [ + "rate_type_to_load_gen_mode", + "rate_type_to_profile_mode", "Executor", "ProfileGenerationMode", "Profile", "ProfileGenerator", - "SingleProfileGenerator", + "FixedRateProfileGenerator", "SweepProfileGenerator", ] diff --git a/src/guidellm/executor/executor.py b/src/guidellm/executor/executor.py index 28f86e18..c24010db 100644 --- a/src/guidellm/executor/executor.py +++ b/src/guidellm/executor/executor.py @@ -20,7 +20,7 @@ def __init__( self, backend: Backend, request_generator: RequestGenerator, - profile_mode: ProfileGenerationMode = ProfileGenerationMode.SINGLE, + profile_mode: ProfileGenerationMode = ProfileGenerationMode.SWEEP, profile_args: Optional[Dict[str, Any]] = None, max_requests: Optional[int] = None, max_duration: Optional[float] = None, diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py index a37ad648..a6b38383 100644 --- a/src/guidellm/executor/profile_generator.py +++ b/src/guidellm/executor/profile_generator.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum -from typing import Dict, Optional, Type, Union +from typing import Dict, List, Optional, Type, Union import numpy @@ -12,16 +12,30 @@ "ProfileGenerationMode", "Profile", "ProfileGenerator", - "SingleProfileGenerator", + "FixedRateProfileGenerator", "SweepProfileGenerator", ] +rate_type_to_load_gen_mode = { + "synchronous": LoadGenerationMode.SYNCHRONOUS, + "constant": LoadGenerationMode.CONSTANT, + "poisson": LoadGenerationMode.POISSON, +} + class ProfileGenerationMode(Enum): - SINGLE = "single" + FIXED_RATE = "fixed_rate" SWEEP = "sweep" +rate_type_to_profile_mode = { + "synchronous": ProfileGenerationMode.FIXED_RATE, + "constant": ProfileGenerationMode.FIXED_RATE, + "poisson": ProfileGenerationMode.FIXED_RATE, + "sweep": ProfileGenerationMode.SWEEP, +} + + @dataclass class Profile: load_gen_mode: LoadGenerationMode @@ -55,34 +69,44 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil pass -@ProfileGenerator.register(ProfileGenerationMode.SINGLE) -class SingleProfileGenerator(ProfileGenerator): - def __init__(self, rate: float, rate_type: LoadGenerationMode): - super().__init__(ProfileGenerationMode.SINGLE) - self._rate: float = rate - self._rate_type: LoadGenerationMode = rate_type +@ProfileGenerator.register(ProfileGenerationMode.FIXED_RATE) +class FixedRateProfileGenerator(ProfileGenerator): + def __init__( + self, + load_gen_mode: Optional[LoadGenerationMode], + rates: Optional[List[float]] = None, + **kwargs, + ): + super().__init__(ProfileGenerationMode.FIXED_RATE) + if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0: + raise ValueError("custom rates are not supported in synchronous mode") + self._rates: Optional[List[float]] = rates + self._load_gen_mode = load_gen_mode self._generated: bool = False + self._rate_index: int = 0 def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: - if self._generated: - return None - - self._generated = True - - if self._rate_type == LoadGenerationMode.CONSTANT: - return Profile( - load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=self._rate - ) - elif self._rate_type == LoadGenerationMode.SYNCHRONOUS: + if self._load_gen_mode == LoadGenerationMode.SYNCHRONOUS: + if self._generated: + return None + self._generated = True return Profile( load_gen_mode=LoadGenerationMode.SYNCHRONOUS, load_gen_rate=None ) - elif self._rate_type == LoadGenerationMode.POISSON: - return Profile( - load_gen_mode=LoadGenerationMode.POISSON, load_gen_rate=self._rate - ) + elif self._load_gen_mode in { + LoadGenerationMode.CONSTANT, + LoadGenerationMode.POISSON, + }: + if self._rates: + if self._rate_index >= len(self._rates): + return None + current_rate = self._rates[self._rate_index] + self._rate_index += 1 + return Profile( + load_gen_mode=self._load_gen_mode, load_gen_rate=current_rate + ) - raise ValueError(f"Invalid rate type: {self._rate_type}") + raise ValueError(f"Invalid rate type: {self._load_gen_mode}") @ProfileGenerator.register(ProfileGenerationMode.SWEEP) diff --git a/src/guidellm/main.py b/src/guidellm/main.py index 0e7cc555..ad4a6742 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -2,7 +2,11 @@ from guidellm.backend import Backend from guidellm.core import TextGenerationBenchmarkReport -from guidellm.executor import Executor +from guidellm.executor import ( + Executor, + rate_type_to_load_gen_mode, + rate_type_to_profile_mode, +) from guidellm.request import ( EmulatedRequestGenerator, FileRequestGenerator, @@ -45,8 +49,9 @@ @click.option( "--rate", type=float, - default="1.0", + default=[1.0], help="Rate to use for constant and poisson rate types", + multiple=True, ) @click.option( "--num-seconds", @@ -106,12 +111,16 @@ def main( else: raise ValueError(f"Unknown data type: {data_type}") + profile_mode = rate_type_to_profile_mode.get(rate_type) + load_gen_mode = rate_type_to_load_gen_mode.get(rate_type, None) + if not profile_mode or not load_gen_mode: + raise ValueError("Invalid rate type") # Create executor executor = Executor( request_generator=request_generator, backend=backend, - profile_mode=rate_type, - profile_args={"rate_type": rate_type, "rate": rate}, + profile_mode=profile_mode, + profile_args={"load_gen_mode": load_gen_mode, "rates": rate}, max_requests=num_requests, max_duration=num_seconds, ) diff --git a/src/guidellm/scheduler/load_generator.py b/src/guidellm/scheduler/load_generator.py index 35779516..9ea51988 100644 --- a/src/guidellm/scheduler/load_generator.py +++ b/src/guidellm/scheduler/load_generator.py @@ -16,7 +16,7 @@ class LoadGenerationMode(str, Enum): """ - SYNCHRONOUS = "sync" + SYNCHRONOUS = "synchronous" CONSTANT = "constant" POISSON = "poisson" diff --git a/tests/integration/executor/test_report_generation.py b/tests/integration/executor/test_report_generation.py index 47d31305..b71410de 100644 --- a/tests/integration/executor/test_report_generation.py +++ b/tests/integration/executor/test_report_generation.py @@ -16,10 +16,9 @@ def test_executor_openai_single_report_generation_sync_mode( request_genrator = dummy.services.TestRequestGenerator( tokenizer="bert-base-uncased" ) - profile_generation_mode = ProfileGenerationMode.SINGLE + profile_generation_mode = ProfileGenerationMode.FIXED_RATE profile_generator_kwargs = { - "rate_type": LoadGenerationMode.SYNCHRONOUS, - "rate": 1.0, + "load_gen_mode": LoadGenerationMode.SYNCHRONOUS, } executor = Executor( @@ -55,10 +54,10 @@ def test_executor_openai_single_report_generation_constant_mode_infinite( request_genrator = dummy.services.TestRequestGenerator( tokenizer="bert-base-uncased" ) - profile_generation_mode = ProfileGenerationMode.SINGLE + profile_generation_mode = ProfileGenerationMode.FIXED_RATE profile_generator_kwargs = { - "rate_type": LoadGenerationMode.CONSTANT, - "rate": 1.0, + "load_gen_mode": LoadGenerationMode.CONSTANT, + "rates": [1.0], } executor = Executor( @@ -88,10 +87,10 @@ def test_executor_openai_single_report_generation_constant_mode_limited( request_genrator = dummy.services.TestRequestGenerator( tokenizer="bert-base-uncased" ) - profile_generation_mode = ProfileGenerationMode.SINGLE + profile_generation_mode = ProfileGenerationMode.FIXED_RATE profile_generator_kwargs = { - "rate_type": LoadGenerationMode.CONSTANT, - "rate": 1.0, + "load_gen_mode": LoadGenerationMode.CONSTANT, + "rates": [1.0], } executor = Executor( @@ -124,10 +123,10 @@ def test_executor_openai_single_report_generation_constant_mode_failed( request_genrator = dummy.services.TestRequestGenerator( tokenizer="bert-base-uncased" ) - profile_generation_mode = ProfileGenerationMode.SINGLE + profile_generation_mode = ProfileGenerationMode.FIXED_RATE profile_generator_kwargs = { - "rate_type": LoadGenerationMode.CONSTANT, - "rate": 1.0, + "load_gen_mode": LoadGenerationMode.CONSTANT, + "rates": [1.0], } executor = Executor( @@ -153,10 +152,10 @@ def test_executor_openai_single_report_generation_constant_mode_cancelled_report request_genrator = dummy.services.TestRequestGenerator( tokenizer="bert-base-uncased" ) - profile_generation_mode = ProfileGenerationMode.SINGLE + profile_generation_mode = ProfileGenerationMode.FIXED_RATE profile_generator_kwargs = { - "rate_type": LoadGenerationMode.CONSTANT, - "rate": 1.0, + "load_gen_mode": LoadGenerationMode.CONSTANT, + "rates": [1.0], } executor = Executor( diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py new file mode 100644 index 00000000..11f701ef --- /dev/null +++ b/tests/unit/executor/test_executor.py @@ -0,0 +1,87 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from guidellm.backend.base import Backend +from guidellm.executor import Executor, Profile, ProfileGenerator +from guidellm.executor.profile_generator import ProfileGenerationMode +from guidellm.request.base import RequestGenerator +from guidellm.scheduler import LoadGenerationMode + + +def test_executor_creation(): + mock_request_generator = MagicMock(spec=RequestGenerator) + mock_backend = MagicMock(spec=Backend) + profile_mode = ProfileGenerationMode.SWEEP + profile_args = None + max_requests = None + max_duration = None + executor = Executor( + mock_backend, + mock_request_generator, + profile_mode, + profile_args, + max_requests, + max_duration, + ) + assert executor.request_generator == mock_request_generator + assert executor.backend == mock_backend + assert executor.max_requests == max_requests + assert executor.max_duration == max_duration + + +@pytest.fixture +def mock_request_generator(): + return MagicMock(spec=RequestGenerator) + + +@pytest.fixture +def mock_backend(): + return MagicMock(spec=Backend) + + +@pytest.fixture +def mock_scheduler(): + with patch("guidellm.executor.executor.Scheduler") as MockScheduler: + yield MockScheduler + + +def test_executor_run(mock_request_generator, mock_backend, mock_scheduler): + mock_profile_generator = MagicMock(spec=ProfileGenerator) + profiles = [ + Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=1.0), + Profile(load_gen_mode=LoadGenerationMode.CONSTANT, load_gen_rate=2.0), + None, + ] + mock_profile_generator.next.side_effect = profiles + + with patch( + "guidellm.executor.executor.ProfileGenerator.create", + return_value=mock_profile_generator, + ): + executor = Executor( + request_generator=mock_request_generator, + backend=mock_backend, + profile_mode=ProfileGenerationMode.FIXED_RATE, + profile_args={ + "load_gen_mode": LoadGenerationMode.CONSTANT, + "rates": [1.0, 2.0], + }, + max_requests=10, + max_duration=100, + ) + + mock_benchmark = MagicMock() + mock_scheduler.return_value.run.return_value = mock_benchmark + + report = executor.run() + + assert mock_scheduler.call_count == 2 + assert len(report.benchmarks) == 2 + assert report.benchmarks[0] == mock_benchmark + assert report.benchmarks[1] == mock_benchmark + calls = mock_scheduler.call_args_list + assert calls[0][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT + assert calls[0][1]["load_gen_rate"] == 1.0 + assert calls[1][1]["load_gen_mode"] == LoadGenerationMode.CONSTANT + assert calls[1][1]["load_gen_rate"] == 2.0 diff --git a/tests/unit/executor/test_single_profile_generation_mode.py b/tests/unit/executor/test_fixed_rate_profile_generation_mode.py similarity index 80% rename from tests/unit/executor/test_single_profile_generation_mode.py rename to tests/unit/executor/test_fixed_rate_profile_generation_mode.py index 380e3f49..07f5e453 100644 --- a/tests/unit/executor/test_single_profile_generation_mode.py +++ b/tests/unit/executor/test_fixed_rate_profile_generation_mode.py @@ -1,3 +1,5 @@ +from typing import List, Optional + import pytest from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport @@ -24,12 +26,15 @@ def test_executor_single_profile_generator_benchmark_report( request_genrator = dummy.services.TestRequestGenerator( tokenizer="bert-base-uncased" ) - profile_generator_kwargs = {"rate_type": load_gen_mode, "rate": 1.0} + rates: Optional[List[float]] = [1.0] + if load_gen_mode == LoadGenerationMode.SYNCHRONOUS: + rates = None + profile_generator_kwargs = {"load_gen_mode": load_gen_mode, "rates": rates} executor = Executor( backend=openai_backend_factory(), request_generator=request_genrator, - profile_mode=ProfileGenerationMode.SINGLE, + profile_mode=ProfileGenerationMode.FIXED_RATE, profile_args=profile_generator_kwargs, max_requests=1, max_duration=None, diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py new file mode 100644 index 00000000..f0aecf12 --- /dev/null +++ b/tests/unit/executor/test_profile_generator.py @@ -0,0 +1,150 @@ +from unittest.mock import MagicMock + +import numpy +import pytest + +from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport +from guidellm.executor import ( + FixedRateProfileGenerator, + ProfileGenerationMode, + ProfileGenerator, + SweepProfileGenerator, +) +from guidellm.scheduler import LoadGenerationMode + +# Fixed Rate Profile Generator + + +def test_fixed_rate_profile_generator_creation(): + rates = [1.0] + load_gen_mode = LoadGenerationMode.CONSTANT + test_profile_generator = ProfileGenerator.create( + ProfileGenerationMode.FIXED_RATE, + **({"rates": rates, "load_gen_mode": load_gen_mode}), + ) + assert isinstance(test_profile_generator, FixedRateProfileGenerator) + assert test_profile_generator._rates == rates + assert test_profile_generator._load_gen_mode == load_gen_mode + assert test_profile_generator._rate_index == 0 + + +def test_synchronous_mode_rate_list_error(): + rates = [1.0] + load_gen_mode = LoadGenerationMode.SYNCHRONOUS + with pytest.raises( + ValueError, match="custom rates are not supported in synchronous mode" + ): + ProfileGenerator.create( + ProfileGenerationMode.FIXED_RATE, + **({"rates": rates, "load_gen_mode": load_gen_mode}), + ) + + +def test_next_with_multiple_rates(): + rates = [1.0, 2.0] + load_gen_mode = LoadGenerationMode.CONSTANT + test_profile_generator = ProfileGenerator.create( + ProfileGenerationMode.FIXED_RATE, + **({"rates": rates, "load_gen_mode": load_gen_mode}), + ) + mock_report = MagicMock(spec=TextGenerationBenchmarkReport) + for rate in rates: + current_profile = test_profile_generator.next(mock_report) + assert current_profile is not None + assert current_profile.load_gen_rate == rate + assert current_profile.load_gen_mode == LoadGenerationMode.CONSTANT + assert test_profile_generator.next(mock_report) is None + + +def test_next_with_sync_mode(): + load_gen_mode = LoadGenerationMode.SYNCHRONOUS + test_profile_generator = ProfileGenerator.create( + ProfileGenerationMode.FIXED_RATE, **({"load_gen_mode": load_gen_mode}) + ) + mock_report = MagicMock(spec=TextGenerationBenchmarkReport) + current_profile = test_profile_generator.next(mock_report) + assert current_profile is not None + assert current_profile.load_gen_rate is None + assert current_profile.load_gen_mode == LoadGenerationMode.SYNCHRONOUS + assert test_profile_generator.next(mock_report) is None + + +# Sweep Profile Generator + + +def test_sweep_profile_generator_creation(): + test_profile_generator = ProfileGenerator.create( + ProfileGenerationMode.SWEEP, **({}) + ) + assert isinstance(test_profile_generator, SweepProfileGenerator) + assert not test_profile_generator._sync_run + assert not test_profile_generator._max_found + assert test_profile_generator._pending_rates is None + assert test_profile_generator._pending_rates is None + + +def test_first_profile_is_synchronous(): + test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) + mock_report = MagicMock(spec=TextGenerationBenchmarkReport) + profile = test_profile_generator.next(mock_report) + assert profile is not None + assert profile.load_gen_rate is None + assert profile.load_gen_mode == LoadGenerationMode.SYNCHRONOUS + + +def test_rate_doubles(): + test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) + mock_report = MagicMock(spec=TextGenerationBenchmarkReport) + mock_benchmark = MagicMock(spec=TextGenerationBenchmark) + mock_benchmark.overloaded = False + mock_benchmark.rate = 2.0 + mock_benchmark.request_rate = 2.0 + benchmarks = [mock_benchmark] + mock_report.benchmarks = benchmarks + test_profile_generator.next(mock_report) + + profile = test_profile_generator.next(mock_report) + assert profile is not None + assert profile.load_gen_rate == 4.0 + + +def test_max_found(): + test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) + mock_report = MagicMock(spec=TextGenerationBenchmarkReport) + mock_benchmark = MagicMock(spec=TextGenerationBenchmark) + mock_benchmark.overloaded = False + mock_benchmark.rate = 2.0 + mock_benchmark.request_rate = 2.0 + mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark) + mock_overloaded_benchmark.overloaded = True + mock_overloaded_benchmark.rate = 4.0 + mock_overloaded_benchmark.request_rate = 4.0 + benchmarks = [mock_benchmark, mock_overloaded_benchmark] + mock_report.benchmarks = benchmarks + + test_profile_generator.next(mock_report) + profile = test_profile_generator.next(mock_report) + assert profile is not None + + # if benchmark wasn't overloaded, rates would have doubled to 8 + assert profile.load_gen_rate == 2.0 + + +def test_pending_rates(): + test_profile_generator = ProfileGenerator.create(ProfileGenerationMode.SWEEP) + mock_report = MagicMock(spec=TextGenerationBenchmarkReport) + mock_benchmark = MagicMock(spec=TextGenerationBenchmark) + mock_benchmark.overloaded = False + mock_benchmark.rate = 2.0 + mock_benchmark.request_rate = 2.0 + mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark) + mock_overloaded_benchmark.overloaded = True + mock_overloaded_benchmark.rate = 8.0 + mock_overloaded_benchmark.request_rate = 8.0 + benchmarks = [mock_benchmark, mock_overloaded_benchmark] + mock_report.benchmarks = benchmarks + profile = test_profile_generator.next(mock_report) + for expected_rate in numpy.linspace(2.0, 8.0, 10): + profile = test_profile_generator.next(mock_report) + assert profile is not None + assert profile.load_gen_rate == expected_rate