Skip to content

Commit

Permalink
Automate performance benchmark (#2742)
Browse files Browse the repository at this point in the history
* Add parameterized perf test template

* Split acccuracy / perf tests

* Automate speed test setting

* Add benchmark summary fixture

* Add multi/h-label tests

* Add detection tests

* Add instance segmentationt tests

* Add tiling tests

* Add semantic segmenation tests

* Add anomaly test
  • Loading branch information
goodsong81 authored Dec 26, 2023
1 parent 922c9e1 commit 44e86bd
Show file tree
Hide file tree
Showing 10 changed files with 988 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file.
### Enhancements

- Upgrade NNCF to 2.7 and OpenVINO to 2023.2 (<https://github.com/openvinotoolkit/training_extensions/pull/2656>)
- Automate performance benchmark (<https://github.com/openvinotoolkit/training_extensions/pull/2742>)

## \[v1.5.0\]

Expand Down
4 changes: 4 additions & 0 deletions tests/perf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""OTX Perfomance tests."""

# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
202 changes: 202 additions & 0 deletions tests/perf/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""OTX Benchmark based on tools/experiment.py."""

# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0


import os
import glob
import pandas as pd
import yaml
from pathlib import Path
from typing import List, Optional

from tests.test_suite.run_test_command import check_run


class OTXBenchmark:
"""Benchmark runner based on tools/experiment.py in OTX1.x.
Example:
>>> bm = OTXBenchmark(['random_sample1', 'random_sample2'], data_root='./data/coco')
>>> atss_result = bm.run('MobileNetV2-ATSS')
>>> yolox_result = bm.run('YOLOX-TINY')
Args:
datasets (List[str]): Paths to datasets relative to the data_root.
Intended for, but not restricted to different sampling based on same dataset.
data_root (str): Path to the root of dataset directories. Defaults to './data'.
num_epoch (int): Overrides the per-model default number of epoch settings.
Defaults to 0, which means no overriding.
num_repeat (int): Number for trials with different random seed, which would be set
as range(0, num_repeat). Defaults to 1.
train_params (dict, optional): Additional training parameters.
e.x) {'learning_parameters.num_iters': 2}. Defaults to {}.
track_resources (bool): Whether to track CPU & GPU usage metrics. Defaults to False.
eval_upto (str): The last serial operation to evaluate. Choose one of ('train', 'export', 'optimize').
Operations include the preceeding ones.
e.x) Eval up to 'optimize': train -> eval -> export -> eval -> optimize -> eval
Default to 'train'.
output_root (str): Output root dirctory for logs and results. Defaults to './otx-benchmark'.
dry_run (bool): Whether to just print the OTX command without execution. Defaults to False.
tags (dict, optional): Key-values pair metadata for the experiment.
subset_dir_names (dict, optional): Specify dataset subset directory names, if any.
e.x) {"train": "train_10percent", "val": "val_all", "test": "test"}
"""

def __init__(
self,
datasets: List[str],
data_root: str = "data",
num_epoch: int = 0,
num_repeat: int = 1,
train_params: dict | None = None,
track_resources: bool = False,
eval_upto: str = "train",
output_root: str = "otx-benchmark",
dry_run: bool = False,
tags: dict | None = None,
subset_dir_names: dict | None = None,
):
self.datasets = datasets
self.data_root = data_root
self.num_epoch = num_epoch
self.num_repeat = num_repeat
self.train_params = train_params or {}
self.track_resources = track_resources
self.eval_upto = eval_upto
self.output_root = output_root
self.dry_run = dry_run
self.tags = tags or {}
self.subset_dir_names = subset_dir_names or {"train": "", "val": "", "test": ""}

def run(
self,
model_id: str,
train_params: dict = {},
tags: dict = {},
) -> pd.DataFrame | None:
"""Run configured benchmark with given model and return the result.
Args:
model_id (str): Target model identifier
train_params (dict): Overrides global benchmark train params
tags (dict): Overrides global benchmark tags
Retruns:
pd.DataFrame | None: Table with benchmark metrics
"""

# Build config file
cfg = self._build_config(model_id, train_params, tags)
cfg_dir = Path(cfg["output_path"])
cfg_dir.mkdir(parents=True, exist_ok=True)
cfg_path = cfg_dir / "cfg.yaml"
with open(cfg_path, "w") as cfg_file:
yaml.dump(cfg, cfg_file, indent=2)
cmd = [
"python",
"tools/experiment.py",
"-f",
cfg_path,
]
if self.dry_run:
cmd.append("-d")
# Run benchmark
check_run(cmd)
# Load result
result = self.load_result(cfg_dir)
return result

@staticmethod
def load_result(result_path: str) -> pd.DataFrame | None:
"""Load benchmark results recursively and merge as pd.DataFrame.
Args:
result_path (str): Result directory or speicific file.
Retruns:
pd.DataFrame: Table with benchmark metrics & options
"""
# Search csv files
if os.path.isdir(result_path):
csv_file_paths = glob.glob(f"{result_path}/**/exp_summary.csv", recursive=True)
else:
csv_file_paths = [result_path]
results = []
# Load csv data
for csv_file_path in csv_file_paths:
result = pd.read_csv(csv_file_path)
# Append metadata if any
cfg_file_path = Path(csv_file_path).parent / "cfg.yaml"
if cfg_file_path.exists():
with cfg_file_path.open("r") as cfg_file:
tags = yaml.safe_load(cfg_file).get("tags", {})
for k, v in tags.items():
result[k] = v
results.append(result)
if len(results) > 0:
return pd.concat(results, ignore_index=True)
else:
return None

def _build_config(
self,
model_id: str,
train_params: dict = {},
tags: dict = {},
) -> dict:
"""Build config for tools/expeirment.py."""
all_train_params = self.train_params.copy()
all_train_params.update(train_params)
all_tags = self.tags.copy()
all_tags.update(tags)

cfg = {}
cfg["tags"] = all_tags # metadata
cfg["output_path"] = os.path.abspath(Path(self.output_root) / "-".join(list(all_tags.values()) + [model_id]))
cfg["constants"] = {
"dataroot": os.path.abspath(self.data_root),
}
cfg["variables"] = {
"model": [model_id],
"data": self.datasets,
}
cfg["repeat"] = self.num_repeat
cfg["command"] = []
resource_param = ""
if self.track_resources:
resource_param = "--track-resource-usage all"
if self.num_epoch > 0:
self._set_num_epoch(model_id, all_train_params, self.num_epoch)
params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()])
cfg["command"].append(
"otx train ${model}"
" --train-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['train']}"
" --val-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['val']}"
" --deterministic"
f" {resource_param}"
f" params {params_str}"
)
cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}")
if self.eval_upto == "train":
return cfg

cfg["command"].append("otx export")
cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}")
if self.eval_upto == "export":
return cfg

cfg["command"].append("otx optimize")
cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}")
return cfg

@staticmethod
def _set_num_epoch(model_id: str, train_params: dict, num_epoch: int):
"""Set model specific num_epoch parameter."""
if "padim" in model_id:
return # No configurable parameter for num_epoch
elif "stfpm" in model_id:
train_params["learning_parameters.max_epochs"] = num_epoch
else:
train_params["learning_parameters.num_iters"] = num_epoch
146 changes: 146 additions & 0 deletions tests/perf/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0


import pytest
import os
import subprocess
import yaml
from pathlib import Path
from typing import List
from datetime import datetime

from otx.api.entities.model_template import ModelTemplate, ModelCategory
from .benchmark import OTXBenchmark


def pytest_addoption(parser):
"""Add custom options for perf tests."""
parser.addoption(
"--model-type",
action="store",
default="all",
choices=("default", "all"),
help="Choose default|all. Defaults to all.",
)
parser.addoption(
"--data-size",
action="store",
default="all",
choices=("small", "medium", "large", "all"),
help="Choose small|medium|large|all. Defaults to all.",
)
parser.addoption(
"--num-repeat",
action="store",
default=0,
help="Overrides default per-data-size number of repeat setting. "
"Random seeds are set to 0 ~ num_repeat-1 for the trials. "
"Defaults to 0 (small=3, medium=3, large=1).",
)
parser.addoption(
"--num-epoch",
action="store",
default=0,
help="Overrides default per-model number of epoch setting. "
"Defaults to 0 (per-model epoch & early-stopping).",
)
parser.addoption(
"--eval-upto",
action="store",
default="train",
choices=("train", "export", "optimize"),
help="Choose train|export|optimize. Defaults to train.",
)
parser.addoption(
"--data-root",
action="store",
default="data",
help="Dataset root directory.",
)
parser.addoption(
"--output-root",
action="store",
default="exp/perf",
help="Output root directory.",
)
parser.addoption(
"--dry-run",
action="store_true",
default=False,
help="Print OTX commands without execution.",
)


@pytest.fixture(scope="session")
def fxt_output_root(request: pytest.FixtureRequest) -> Path:
"""Output root + date + short commit hash."""
output_root = request.config.getoption("--output-root")
data_str = datetime.now().strftime("%Y%m%d-%H%M%S")
commit_str = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip()
return Path(output_root) / (data_str + "-" + commit_str)


@pytest.fixture
def fxt_model_id(request: pytest.FixtureRequest) -> str:
"""Skip by model category."""
model_type: str = request.config.getoption("--model-type")
model_template: ModelTemplate = request.param
if model_type == "default":
if model_template.model_category == ModelCategory.OTHER:
pytest.skip(f"{model_template.model_category} category model")
return model_template.model_template_id


@pytest.fixture
def fxt_benchmark(request: pytest.FixtureRequest, fxt_output_root: Path) -> OTXBenchmark:
"""Configure benchmark."""
# Skip by dataset size
data_size_option: str = request.config.getoption("--data-size")
data_size: str = request.param[0]
if data_size_option != "all":
if data_size_option != data_size:
pytest.skip(f"{data_size} datasets")

# Options
cfg: dict = request.param[1].copy()

tags = cfg.get("tags", {})
tags["data_size"] = data_size
cfg["tags"] = tags

num_epoch_override: int = int(request.config.getoption("--num-epoch"))
if num_epoch_override > 0: # 0: use default
cfg["num_epoch"] = num_epoch_override
if "test_speed" in request.node.name:
if cfg.get("num_epoch", 0) == 0: # No user options
cfg["num_epoch"] = 2

num_repeat_override: int = int(request.config.getoption("--num-repeat"))
if num_repeat_override > 0: # 0: use default
cfg["num_repeat"] = num_repeat_override

cfg["eval_upto"] = request.config.getoption("--eval-upto")
cfg["data_root"] = request.config.getoption("--data-root")
cfg["output_root"] = str(fxt_output_root)
cfg["dry_run"] = request.config.getoption("--dry-run")

# Create benchmark
benchmark = OTXBenchmark(
**cfg,
)

return benchmark


@pytest.fixture(scope="session", autouse=True)
def fxt_benchmark_summary(fxt_output_root: Path):
"""Summarize all results at the end of test session."""
yield
all_results = OTXBenchmark.load_result(fxt_output_root)
if all_results is not None:
print("=" * 20, "[Benchmark summary]")
print(all_results)
output_path = fxt_output_root / "benchmark-summary.csv"
all_results.to_csv(output_path, index=False)
print(f" -> Saved to {output_path}.")
Loading

0 comments on commit 44e86bd

Please sign in to comment.