Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 83 additions & 51 deletions .github/scripts/generate_vllm_benchmark_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,42 @@
logging.basicConfig(level=logging.INFO)
# Those are H100 runners from https://github.com/pytorch-labs/pytorch-gha-infra/blob/main/multi-tenant/inventory/manual_inventory
# while ROCm runner are provided by AMD
RUNNERS_MAPPING = {
TP_TO_RUNNER_MAPPING = {
1: [
"linux.aws.h100",
"linux.rocm.gpu.mi300.2", # No single ROCm GPU?
"linux.24xl.spr-metal",
],
# NB: There is no 2xH100 runner at the momement, so let's use the next one
# in the list here which is 4xH100
2: [
"linux.aws.h100.4",
"linux.rocm.gpu.mi300.2",
"linux.24xl.spr-metal",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

24xlarge have only 1 NUMA node, so we should not put it under TP=2

],
4: [
"linux.aws.h100.4",
"linux.rocm.gpu.mi300.4",
# TODO (huydhn): Enable this when Intel's runners are ready
# "intel-cpu-emr",
],
8: [
"linux.aws.h100.8",
"linux.rocm.gpu.mi300.8",
],
}

# This mapping is needed to find out the platform of the runner
RUNNER_TO_PLATFORM_MAPPING = {
"linux.aws.h100": "cuda",
"linux.aws.h100.4": "cuda",
"linux.aws.h100.8": "cuda",
"linux.rocm.gpu.mi300.2": "rocm",
"linux.rocm.gpu.mi300.4": "rocm",
"linux.rocm.gpu.mi300.8": "rocm",
"linux.24xl.spr-metal": "cpu",
}

# All the different names vLLM uses to refer to their benchmark configs
VLLM_BENCHMARK_CONFIGS_PARAMETER = set(
[
Expand Down Expand Up @@ -76,10 +91,11 @@ def parse_args() -> Any:
help="the comma-separated list of models to benchmark",
)
parser.add_argument(
"--gpus",
"--runners",
type=str,
default="",
help="the comma-separated list of GPUs to benchmark",
help="the comma-separated list of runners to run the benchmark",
required=True,
)

return parser.parse_args()
Expand Down Expand Up @@ -107,60 +123,76 @@ def set_output(name: str, val: Any) -> None:


def generate_benchmark_matrix(
benchmark_configs_dir: str, models: List[str], gpus: List[str]
benchmark_configs_dir: str, models: List[str], runners: List[str]
) -> Dict[str, Any]:
"""
Parse all the JSON files in vLLM benchmark configs directory to get the
model name and tensor parallel size (aka number of GPUs)
model name and tensor parallel size (aka number of GPUs or CPU NUMA nodes)
"""
use_all_gpus = True if not gpus else False
benchmark_matrix: Dict[str, Any] = {
"include": [],
}

selected_models = []
for file in glob.glob(f"{benchmark_configs_dir}/*.json"):
with open(file) as f:
try:
configs = json.load(f)
except json.JSONDecodeError as e:
warning(f"Fail to load {file}: {e}")
continue

for config in configs:
param = list(VLLM_BENCHMARK_CONFIGS_PARAMETER & set(config.keys()))
assert len(param) == 1

benchmark_config = config[param[0]]
if "model" not in benchmark_config:
warning(f"Model name is not set in {benchmark_config}, skipping...")
continue
model = benchmark_config["model"].lower()

# Dedup
if model in selected_models:
continue
# and only choose the selected model:
if models and model not in models:
continue
selected_models.append(model)

if "tensor_parallel_size" in benchmark_config:
tp = benchmark_config["tensor_parallel_size"]
elif "tp" in benchmark_config:
tp = benchmark_config["tp"]
else:
tp = 8
assert tp in RUNNERS_MAPPING

for runner in RUNNERS_MAPPING[tp]:
found_runner = False
for gpu in gpus:
if gpu.lower() in runner:
found_runner = True
break

if found_runner or use_all_gpus:
platforms = set()
if not runners:
use_all_runners = True
platforms = set(v for v in RUNNER_TO_PLATFORM_MAPPING.values())
else:
use_all_runners = False
for k, v in RUNNER_TO_PLATFORM_MAPPING.items():
for r in runners:
if r.lower() in k:
platforms.add(v)

# Gather all possible benchmarks
for platform in sorted(platforms):
selected_models = []
for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
with open(file) as f:
try:
configs = json.load(f)
except json.JSONDecodeError as e:
warning(f"Fail to load {file}: {e}")
continue

for config in configs:
param = list(VLLM_BENCHMARK_CONFIGS_PARAMETER & set(config.keys()))
assert len(param) == 1

benchmark_config = config[param[0]]
if "model" not in benchmark_config:
warning(f"Model name is not set in {benchmark_config}, skipping...")
continue
model = benchmark_config["model"].lower()

# Dedup
if model in selected_models:
continue
# and only choose the selected model:
if models and model not in models:
continue
selected_models.append(model)

if "tensor_parallel_size" in benchmark_config:
tp = benchmark_config["tensor_parallel_size"]
elif "tp" in benchmark_config:
tp = benchmark_config["tp"]
else:
tp = 8
assert tp in TP_TO_RUNNER_MAPPING

for runner in TP_TO_RUNNER_MAPPING[tp]:
# Wrong platform
if (
runner not in RUNNER_TO_PLATFORM_MAPPING
or RUNNER_TO_PLATFORM_MAPPING[runner] != platform
):
continue

found_runner = any([r and r.lower() in runner for r in runners])
if not found_runner and not use_all_runners:
continue

benchmark_matrix["include"].append(
{
"runner": runner,
Expand All @@ -176,11 +208,11 @@ def generate_benchmark_matrix(
def main() -> None:
args = parse_args()
models = [m.strip().lower() for m in args.models.split(",") if m.strip()]
gpus = [m.strip().lower() for m in args.gpus.split(",") if m.strip()]
runners = [m.strip().lower() for m in args.runners.split(",") if m.strip()]
benchmark_matrix = generate_benchmark_matrix(
args.benchmark_configs_dir,
models,
gpus,
runners,
)
set_output("benchmark_matrix", benchmark_matrix)

Expand Down
15 changes: 13 additions & 2 deletions .github/scripts/setup_vllm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,27 @@ def parse_args() -> Any:
help="the list of models to benchmark",
required=True,
)
parser.add_argument(
"--device",
type=str,
default="",
help="device for the runner",
required=True,
)

return parser.parse_args()


def setup_benchmark_configs(
from_benchmark_configs_dir: str, to_benchmark_configs_dir: str, models: List[str]
from_benchmark_configs_dir: str,
to_benchmark_configs_dir: str,
models: List[str],
device: str,
) -> None:
"""
Setup the benchmark configs to run on this runner
"""
for file in glob.glob(f"{from_benchmark_configs_dir}/*.json"):
for file in glob.glob(f"{from_benchmark_configs_dir}/{device}/*.json"):
filename = os.path.basename(file)
benchmark_configs = []

Expand Down Expand Up @@ -108,6 +118,7 @@ def main() -> None:
args.from_benchmark_configs_dir,
args.to_benchmark_configs_dir,
args.models.split(","),
args.device,
)


Expand Down
Loading
Loading