From e93d732219309c29035bf869d5058ef3573bd8c0 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 23 Nov 2023 11:41:09 +0900 Subject: [PATCH 01/25] Add parameterized perf test template --- tests/perf/__init__.py | 4 + tests/perf/conftest.py | 175 ++++++++++++++++++++++++++++++ tests/perf/test_classification.py | 47 ++++++++ 3 files changed, 226 insertions(+) create mode 100644 tests/perf/__init__.py create mode 100644 tests/perf/conftest.py create mode 100644 tests/perf/test_classification.py diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py new file mode 100644 index 00000000000..36a90a5e5f6 --- /dev/null +++ b/tests/perf/__init__.py @@ -0,0 +1,4 @@ +"""OTX Perfomance tests.""" + +# Copyright (C) 2021-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py new file mode 100644 index 00000000000..e950bd0bf29 --- /dev/null +++ b/tests/perf/conftest.py @@ -0,0 +1,175 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest +import os +import subprocess +import yaml +from typing import List + +from otx.api.entities.model_template import ModelTemplate, ModelCategory + + +def pytest_addoption(parser): + """Add custom options for perf tests.""" + parser.addoption( + "--model-type", + action="store", + default="all", + help="Choose default|all. Defaults to all." + ) + parser.addoption( + "--data-size", + action="store", + default="all", + help="Choose small|medium|large|all. Defaults to all." + ) + parser.addoption( + "--num-repeat", + action="store", + default=0, + help="Overrides default per-data-size settings. Defaults to 0, which means no override." + ) + parser.addoption( + "--eval-upto", + action="store", + default="all", + help="Choose train|export|optimize. Defaults to train." + ) + parser.addoption( + "--data-root", + action="store", + default="data", + help="Dataset root directory." + ) + parser.addoption( + "--output-dir", + action="store", + default="exp/perf", + help="Output directory to save outputs." + ) + + +@pytest.fixture +def fxt_template(request: pytest.FixtureRequest): + """Skip by model template.""" + model_type: str = request.config.getoption("--model-type") + template: ModelTemplate = request.param + if model_type == "default": + if template.model_category == ModelCategory.OTHER: + pytest.skip(f"{template.model_category} model") + return template + + +@pytest.fixture +def fxt_data_setting(request: pytest.FixtureRequest): + """Skip by dataset size.""" + data_size_option: str = request.config.getoption("--data-size") + data_size: str = request.param[0] + datasets: List[str] = request.param[1]["datasets"] + num_repeat: int = request.param[1]["num_repeat"] + num_repeat_override: int = request.config.getoption("--num-repeat") + if num_repeat_override > 0: + num_repeat = num_repeat_override + + if data_size_option != "all": + if data_size_option != data_size: + pytest.skip(f"{data_size} datasets") + return data_size, datasets, num_repeat + + +@pytest.fixture +def fxt_commit_hash(): + """Short commit hash in short form.""" + return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() + + +@pytest.fixture +def fxt_build_command(request: pytest.FixtureRequest, fxt_commit_hash: str, tmp_path_factory): + """Research framework command builder.""" + eval_upto = request.config.getoption("--eval-upto") + data_root = request.config.getoption("--data-root") + data_root = os.path.abspath(data_root) + output_dir = request.config.getoption("--output-dir") + output_dir = os.path.abspath(output_dir + "-" + fxt_commit_hash) + + def build_config( + tag: str, + model_template: ModelTemplate, + datasets: List[str], + num_repeat: int, + params: str = "", + ) -> dict: + cfg = {} + cfg["output_path"] = output_dir + cfg["constants"] = { + "dataroot": data_root, + } + cfg["variables"] = { + "model": [model_template.model_template_id], + "data": datasets, + } + cfg["repeat"] = num_repeat + cfg["command"] = [] + cfg["command"].append( + "otx train ${model}" + " --train-data-roots ${dataroot}/${data}" + " --val-data-roots ${dataroot}/${data}" + " --track-resource-usage all" + " --deterministic" + f" params {params}" + ) + cfg["command"].append( + "otx eval" + " --test-data-roots ${dataroot}/${data}" + ) + if eval_upto == "train": + return cfg + + cfg["command"].append( + "otx export" + ) + cfg["command"].append( + "otx eval" + " --test-data-roots ${dataroot}/${data}" + ) + if eval_upto == "export": + return cfg + + cfg["command"].append( + "otx optimize" + ) + cfg["command"].append( + "otx eval" + " --test-data-roots ${dataroot}/${data}" + ) + return cfg + + def build_command( + tag: str, + model_template: ModelTemplate, + datasets: List[str], + num_repeat: int, + params: str = "", + ) -> List[str]: + cfg = build_config(tag, model_template, datasets, num_repeat, params) + cfg_path = tmp_path_factory.mktemp("exp")/"cfg.yaml" + print(cfg_path) + with open(cfg_path, "w") as cfg_file: + yaml.dump(cfg, cfg_file, indent=2,) + cmd = [ + "python", + "tools/experiment.py", + "-d", + "-f", + cfg_path, + ] + return cmd + + return build_command + + +class OTXBenchmark: + def __init__(self): + pass diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py new file mode 100644 index 00000000000..2ddb411647c --- /dev/null +++ b/tests/perf/test_classification.py @@ -0,0 +1,47 @@ +"""OTX Classification Perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from tests.test_suite.run_test_command import check_run + + +templates = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates +templates_names = [template.name for template in templates] + + +class TestPerfMultiClassClassification: + data_settings = { + "small": { + "datasets": [ + "small_dataset/1", + "small_dataset/2", + "small_dataset/3", + ], + "num_repeat": 3, + }, + "medium": { + "datasets": [ + "medium_dataset", + ], + "num_repeat": 3, + }, + "large": { + "datasets": [ + "large_dataset", + ], + "num_repeat": 1, + }, + } + @pytest.mark.parametrize("fxt_template", templates, ids=templates_names, indirect=True) + @pytest.mark.parametrize("fxt_data_setting", data_settings.items(), ids=data_settings.keys(), indirect=True) + def test_benchmark(self, fxt_template, fxt_data_setting, fxt_build_command): + model_template = fxt_template + data_size, datasets, num_repeat = fxt_data_setting + tag = f"multiclass-classification-{data_size}" + command = fxt_build_command(tag, model_template, datasets, num_repeat) + check_run(command) From f17bf416a4519a29dfc62fdce401f988d687e53d Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 15 Dec 2023 11:58:21 +0900 Subject: [PATCH 02/25] Split acccuracy / perf tests --- tests/perf/conftest.py | 53 +++++++++++++++++++++------ tests/perf/test_classification.py | 61 +++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 26 deletions(-) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index e950bd0bf29..1c32a6f9019 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -29,7 +29,16 @@ def pytest_addoption(parser): "--num-repeat", action="store", default=0, - help="Overrides default per-data-size settings. Defaults to 0, which means no override." + help="Overrides default per-data-size number of repeat setting. " + "Random seeds are set to 0 ~ num_repeat-1 for the trials. " + "Defaults to 0 (small=3, medium=3, large=1)." + ) + parser.addoption( + "--num-epoch", + action="store", + default=0, + help="Overrides default per-model number of epoch setting. " + "Defaults to 0 (per-model epoch & early-stopping)." ) parser.addoption( "--eval-upto", @@ -49,6 +58,12 @@ def pytest_addoption(parser): default="exp/perf", help="Output directory to save outputs." ) + parser.addoption( + "--dry-run", + action="store_true", + default=False, + help="Print OTX commands without execution." + ) @pytest.fixture @@ -63,20 +78,26 @@ def fxt_template(request: pytest.FixtureRequest): @pytest.fixture -def fxt_data_setting(request: pytest.FixtureRequest): - """Skip by dataset size.""" +def fxt_benchmark_config(request: pytest.FixtureRequest): + """Override benchmark config.""" data_size_option: str = request.config.getoption("--data-size") data_size: str = request.param[0] datasets: List[str] = request.param[1]["datasets"] - num_repeat: int = request.param[1]["num_repeat"] + if data_size_option != "all": + if data_size_option != data_size: + pytest.skip(f"{data_size} datasets") + + num_epoch: int = request.param[1].get("num_epoch", 0) # 0: per-model default + num_epoch_override: int = request.config.getoption("--num-epoch") + if num_epoch_override > 0: + num_epoch = num_epoch_override + + num_repeat: int = request.param[1].get("num_repeat", 1) num_repeat_override: int = request.config.getoption("--num-repeat") if num_repeat_override > 0: num_repeat = num_repeat_override - if data_size_option != "all": - if data_size_option != data_size: - pytest.skip(f"{data_size} datasets") - return data_size, datasets, num_repeat + return data_size, datasets, num_epoch, num_repeat @pytest.fixture @@ -93,12 +114,15 @@ def fxt_build_command(request: pytest.FixtureRequest, fxt_commit_hash: str, tmp_ data_root = os.path.abspath(data_root) output_dir = request.config.getoption("--output-dir") output_dir = os.path.abspath(output_dir + "-" + fxt_commit_hash) + dry_run = request.config.getoption("--dry-run") def build_config( tag: str, model_template: ModelTemplate, datasets: List[str], + num_epoch: int, num_repeat: int, + track_resources: bool = False, params: str = "", ) -> dict: cfg = {} @@ -112,12 +136,17 @@ def build_config( } cfg["repeat"] = num_repeat cfg["command"] = [] + if num_epoch > 0: + params = params + f" --learning_pararmeters.num_iters {num_epoch}" + resource_param = "" + if track_resources: + resource_param = " --track-resource-usage all" cfg["command"].append( "otx train ${model}" " --train-data-roots ${dataroot}/${data}" " --val-data-roots ${dataroot}/${data}" - " --track-resource-usage all" " --deterministic" + f"{resource_param}" f" params {params}" ) cfg["command"].append( @@ -150,10 +179,12 @@ def build_command( tag: str, model_template: ModelTemplate, datasets: List[str], + num_epoch: int, num_repeat: int, + track_resources: bool = False, params: str = "", ) -> List[str]: - cfg = build_config(tag, model_template, datasets, num_repeat, params) + cfg = build_config(tag, model_template, datasets, num_epoch, num_repeat, track_resources, params) cfg_path = tmp_path_factory.mktemp("exp")/"cfg.yaml" print(cfg_path) with open(cfg_path, "w") as cfg_file: @@ -161,9 +192,9 @@ def build_command( cmd = [ "python", "tools/experiment.py", - "-d", "-f", cfg_path, + "-d" if dry_run else "", ] return cmd diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 2ddb411647c..21cd390644c 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -10,38 +10,69 @@ from tests.test_suite.run_test_command import check_run -templates = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates -templates_names = [template.name for template in templates] +TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates +TEMPLATE_NAMES = [template.name for template in TEMPLATES] -class TestPerfMultiClassClassification: - data_settings = { +class TestPerfSingleLabelClassification: + BENCHMARK_CONFIGS = { "small": { "datasets": [ - "small_dataset/1", - "small_dataset/2", - "small_dataset/3", + "classification/single_label/multiclass_CUB_small/1", + "classification/single_label/multiclass_CUB_small/2", + "classification/single_label/multiclass_CUB_small/3", ], "num_repeat": 3, }, "medium": { "datasets": [ - "medium_dataset", + "classification/single_label/multiclass_CUB_medium", ], "num_repeat": 3, }, "large": { "datasets": [ - "large_dataset", + "classification/single_label/multiclass_food101_large", ], "num_repeat": 1, }, } - @pytest.mark.parametrize("fxt_template", templates, ids=templates_names, indirect=True) - @pytest.mark.parametrize("fxt_data_setting", data_settings.items(), ids=data_settings.keys(), indirect=True) - def test_benchmark(self, fxt_template, fxt_data_setting, fxt_build_command): + + @pytest.mark.parametrize("fxt_template", TEMPLATES, ids=TEMPLATE_NAMES, indirect=True) + @pytest.mark.parametrize("fxt_benchmark_config", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuarcy(self, fxt_template, fxt_benchmark_config, fxt_build_command): + """Benchmark accruacy metrics.""" + model_template = fxt_template + data_size, datasets, num_epoch, num_repeat = fxt_benchmark_config + tag = f"singlelabel-classification-accuracy-{data_size}" + command = fxt_build_command( + tag, + model_template, + datasets, + num_epoch, + num_repeat, + ) + check_run(command) + + @pytest.mark.parametrize("fxt_template", TEMPLATES, ids=TEMPLATE_NAMES, indirect=True) + @pytest.mark.parametrize("fxt_benchmark_config", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_template, fxt_benchmark_config, fxt_build_command): + """Benchmark train time per iter / infer time per image.""" model_template = fxt_template - data_size, datasets, num_repeat = fxt_data_setting - tag = f"multiclass-classification-{data_size}" - command = fxt_build_command(tag, model_template, datasets, num_repeat) + data_size, datasets, num_epoch, num_repeat = fxt_benchmark_config + # Override default iteration setting, in case there's no user input + # "--data-size large -k speed" is recommended. + if num_epoch == 0: + num_epoch = 2 + if num_repeat == 0: + num_repeat = 1 + tag = f"singlelabel-classification-speed-{data_size}" + command = fxt_build_command( + tag, + model_template, + datasets, + num_epoch, + num_repeat, + track_resources=True, # Measure CPU/GPU usages + ) check_run(command) From 8b5bfadf09d41b19fe46b28c5312735d9a8af8c2 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 15 Dec 2023 14:00:31 +0900 Subject: [PATCH 03/25] Apply datetime-based output directoy --- tests/perf/conftest.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 1c32a6f9019..803fb9c382b 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -7,6 +7,7 @@ import subprocess import yaml from typing import List +from datetime import datetime from otx.api.entities.model_template import ModelTemplate, ModelCategory @@ -88,12 +89,12 @@ def fxt_benchmark_config(request: pytest.FixtureRequest): pytest.skip(f"{data_size} datasets") num_epoch: int = request.param[1].get("num_epoch", 0) # 0: per-model default - num_epoch_override: int = request.config.getoption("--num-epoch") + num_epoch_override: int = int(request.config.getoption("--num-epoch")) if num_epoch_override > 0: num_epoch = num_epoch_override num_repeat: int = request.param[1].get("num_repeat", 1) - num_repeat_override: int = request.config.getoption("--num-repeat") + num_repeat_override: int = int(request.config.getoption("--num-repeat")) if num_repeat_override > 0: num_repeat = num_repeat_override @@ -114,6 +115,7 @@ def fxt_build_command(request: pytest.FixtureRequest, fxt_commit_hash: str, tmp_ data_root = os.path.abspath(data_root) output_dir = request.config.getoption("--output-dir") output_dir = os.path.abspath(output_dir + "-" + fxt_commit_hash) + output_dir = output_dir + "/" + datetime.now().strftime("%Y%m%d_%H%M%S") dry_run = request.config.getoption("--dry-run") def build_config( @@ -137,7 +139,7 @@ def build_config( cfg["repeat"] = num_repeat cfg["command"] = [] if num_epoch > 0: - params = params + f" --learning_pararmeters.num_iters {num_epoch}" + params = params + f" --learning_parameters.num_iters {num_epoch}" resource_param = "" if track_resources: resource_param = " --track-resource-usage all" @@ -194,8 +196,10 @@ def build_command( "tools/experiment.py", "-f", cfg_path, - "-d" if dry_run else "", ] + if dry_run: + cmd.append("-d") + return cmd return build_command From 4debeeb62443d0cb160865b27a962a41b49c9f12 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 15 Dec 2023 14:19:04 +0900 Subject: [PATCH 04/25] Fix choice options --- tests/perf/conftest.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 803fb9c382b..dc4514a3d6c 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -18,13 +18,15 @@ def pytest_addoption(parser): "--model-type", action="store", default="all", - help="Choose default|all. Defaults to all." + choices=("default", "all"), + help="Choose default|all. Defaults to all.", ) parser.addoption( "--data-size", action="store", default="all", - help="Choose small|medium|large|all. Defaults to all." + choices=("small", "medium", "large", "all"), + help="Choose small|medium|large|all. Defaults to all.", ) parser.addoption( "--num-repeat", @@ -45,6 +47,7 @@ def pytest_addoption(parser): "--eval-upto", action="store", default="all", + choices=("train", "export", "optimize"), help="Choose train|export|optimize. Defaults to train." ) parser.addoption( @@ -54,10 +57,10 @@ def pytest_addoption(parser): help="Dataset root directory." ) parser.addoption( - "--output-dir", + "--output-root", action="store", default="exp/perf", - help="Output directory to save outputs." + help="Output root directory." ) parser.addoption( "--dry-run", @@ -113,7 +116,7 @@ def fxt_build_command(request: pytest.FixtureRequest, fxt_commit_hash: str, tmp_ eval_upto = request.config.getoption("--eval-upto") data_root = request.config.getoption("--data-root") data_root = os.path.abspath(data_root) - output_dir = request.config.getoption("--output-dir") + output_dir = request.config.getoption("--output-root") output_dir = os.path.abspath(output_dir + "-" + fxt_commit_hash) output_dir = output_dir + "/" + datetime.now().strftime("%Y%m%d_%H%M%S") dry_run = request.config.getoption("--dry-run") From b9b35661fcf3944553fca4fdad1a46d3431d2ba9 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 15 Dec 2023 14:54:52 +0900 Subject: [PATCH 05/25] Exec based on model ID --- tests/perf/conftest.py | 20 ++++++++++---------- tests/perf/test_classification.py | 18 ++++++++---------- tools/experiment.py | 2 +- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index dc4514a3d6c..23272797196 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -71,14 +71,14 @@ def pytest_addoption(parser): @pytest.fixture -def fxt_template(request: pytest.FixtureRequest): - """Skip by model template.""" +def fxt_model_id(request: pytest.FixtureRequest): + """Skip by model category.""" model_type: str = request.config.getoption("--model-type") - template: ModelTemplate = request.param + model_template: ModelTemplate = request.param if model_type == "default": - if template.model_category == ModelCategory.OTHER: - pytest.skip(f"{template.model_category} model") - return template + if model_template.model_category == ModelCategory.OTHER: + pytest.skip(f"{model_template.model_category} category model") + return model_template.model_template_id @pytest.fixture @@ -123,7 +123,7 @@ def fxt_build_command(request: pytest.FixtureRequest, fxt_commit_hash: str, tmp_ def build_config( tag: str, - model_template: ModelTemplate, + model_id: str, datasets: List[str], num_epoch: int, num_repeat: int, @@ -136,7 +136,7 @@ def build_config( "dataroot": data_root, } cfg["variables"] = { - "model": [model_template.model_template_id], + "model": [model_id], "data": datasets, } cfg["repeat"] = num_repeat @@ -182,14 +182,14 @@ def build_config( def build_command( tag: str, - model_template: ModelTemplate, + model_id: str, datasets: List[str], num_epoch: int, num_repeat: int, track_resources: bool = False, params: str = "", ) -> List[str]: - cfg = build_config(tag, model_template, datasets, num_epoch, num_repeat, track_resources, params) + cfg = build_config(tag, model_id, datasets, num_epoch, num_repeat, track_resources, params) cfg_path = tmp_path_factory.mktemp("exp")/"cfg.yaml" print(cfg_path) with open(cfg_path, "w") as cfg_file: diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 21cd390644c..d350d18e9c1 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -10,8 +10,8 @@ from tests.test_suite.run_test_command import check_run -TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates -TEMPLATE_NAMES = [template.name for template in TEMPLATES] +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] class TestPerfSingleLabelClassification: @@ -38,27 +38,25 @@ class TestPerfSingleLabelClassification: }, } - @pytest.mark.parametrize("fxt_template", TEMPLATES, ids=TEMPLATE_NAMES, indirect=True) + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) @pytest.mark.parametrize("fxt_benchmark_config", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_accuarcy(self, fxt_template, fxt_benchmark_config, fxt_build_command): + def test_accuarcy(self, fxt_model_id, fxt_benchmark_config, fxt_build_command): """Benchmark accruacy metrics.""" - model_template = fxt_template data_size, datasets, num_epoch, num_repeat = fxt_benchmark_config tag = f"singlelabel-classification-accuracy-{data_size}" command = fxt_build_command( tag, - model_template, + fxt_model_id, datasets, num_epoch, num_repeat, ) check_run(command) - @pytest.mark.parametrize("fxt_template", TEMPLATES, ids=TEMPLATE_NAMES, indirect=True) + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) @pytest.mark.parametrize("fxt_benchmark_config", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_speed(self, fxt_template, fxt_benchmark_config, fxt_build_command): + def test_speed(self, fxt_model_id, fxt_benchmark_config, fxt_build_command): """Benchmark train time per iter / infer time per image.""" - model_template = fxt_template data_size, datasets, num_epoch, num_repeat = fxt_benchmark_config # Override default iteration setting, in case there's no user input # "--data-size large -k speed" is recommended. @@ -69,7 +67,7 @@ def test_speed(self, fxt_template, fxt_benchmark_config, fxt_build_command): tag = f"singlelabel-classification-speed-{data_size}" command = fxt_build_command( tag, - model_template, + fxt_model_id, datasets, num_epoch, num_repeat, diff --git a/tools/experiment.py b/tools/experiment.py index 6d9a271e547..7b2d2745064 100644 --- a/tools/experiment.py +++ b/tools/experiment.py @@ -790,7 +790,7 @@ def run_experiment_recipe(recipe_file: Union[str, Path], dryrun: bool = False): """ exp_recipe = ExpRecipeParser(recipe_file) output_path = exp_recipe.output_path - output_path.mkdir(exist_ok=True) + output_path.mkdir(parents=True, exist_ok=True) current_dir = os.getcwd() os.chdir(output_path) From c733b25fbff48c3fcd6a3bc49757f45d1be3da42 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Tue, 19 Dec 2023 17:31:09 +0900 Subject: [PATCH 06/25] Refactor out Benchmark class --- tests/perf/conftest.py | 180 +++++++++++++++++------------- tests/perf/test_classification.py | 49 ++++---- 2 files changed, 128 insertions(+), 101 deletions(-) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 23272797196..d6f076f722a 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -6,6 +6,7 @@ import os import subprocess import yaml +from pathlib import Path from typing import List from datetime import datetime @@ -70,6 +71,12 @@ def pytest_addoption(parser): ) +@pytest.fixture +def fxt_commit_hash(): + """Short commit hash.""" + return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() + + @pytest.fixture def fxt_model_id(request: pytest.FixtureRequest): """Skip by model category.""" @@ -82,83 +89,136 @@ def fxt_model_id(request: pytest.FixtureRequest): @pytest.fixture -def fxt_benchmark_config(request: pytest.FixtureRequest): - """Override benchmark config.""" +def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str): + """Configure benchmark.""" + # Skip by dataset size data_size_option: str = request.config.getoption("--data-size") data_size: str = request.param[0] - datasets: List[str] = request.param[1]["datasets"] if data_size_option != "all": if data_size_option != data_size: pytest.skip(f"{data_size} datasets") - num_epoch: int = request.param[1].get("num_epoch", 0) # 0: per-model default + # Options + cfg: dict = request.param[1].copy() + num_epoch_override: int = int(request.config.getoption("--num-epoch")) - if num_epoch_override > 0: - num_epoch = num_epoch_override + if num_epoch_override > 0: # 0: use default + cfg["num_epoch"] = num_epoch_override - num_repeat: int = request.param[1].get("num_repeat", 1) num_repeat_override: int = int(request.config.getoption("--num-repeat")) - if num_repeat_override > 0: - num_repeat = num_repeat_override + if num_repeat_override > 0: # 0: use default + cfg["num_repeat"] = num_repeat_override + + cfg["eval_upto"] = request.config.getoption("--eval-upto") + cfg["data_root"] = request.config.getoption("--data-root") + output_root = request.config.getoption("--output-root") + output_dir = fxt_commit_hash + "-" + datetime.now().strftime("%Y%m%d_%H%M%S") + cfg["output_root"] = str(Path(output_root) / output_dir) + cfg["dry_run"] = request.config.getoption("--dry-run") + + tags = cfg.get("tags", {}) + tags["data_size"] = data_size + cfg["tags"] = tags + + # Create benchmark + benchmark = OTXBenchmark( + **cfg, + ) - return data_size, datasets, num_epoch, num_repeat + return benchmark -@pytest.fixture -def fxt_commit_hash(): - """Short commit hash in short form.""" - return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() +class OTXBenchmark: + def __init__( + self, + datasets: List[str], + data_root: str = "data", + num_epoch: int = 0, + num_repeat: int = 0, + train_params: dict = {}, + track_resources: bool = False, + eval_upto: str = "train", + output_root: str = "otx-benchmark", + dry_run: bool = False, + tags: dict = {}, + ): + self.datasets = datasets + self.data_root = data_root + self.num_epoch = num_epoch + self.num_repeat = num_repeat + self.train_params = train_params + self.track_resources = track_resources + self.eval_upto = eval_upto + self.output_root = output_root + self.dry_run = dry_run + self.tags = tags + def build_command( + self, + model_id: str, + train_params: dict = {}, + tags: dict = {}, + ) -> List[str]: + cfg = self._build_config(model_id, tags, train_params) + cfg_dir = Path(self.output_root) + cfg_dir.mkdir(parents=True, exist_ok=True) + cfg_path = cfg_dir / "cfg.yaml" + print(cfg_path) + with open(cfg_path, "w") as cfg_file: + yaml.dump(cfg, cfg_file, indent=2,) + cmd = [ + "python", + "tools/experiment.py", + "-f", + cfg_path, + ] + if self.dry_run: + cmd.append("-d") + return cmd -@pytest.fixture -def fxt_build_command(request: pytest.FixtureRequest, fxt_commit_hash: str, tmp_path_factory): - """Research framework command builder.""" - eval_upto = request.config.getoption("--eval-upto") - data_root = request.config.getoption("--data-root") - data_root = os.path.abspath(data_root) - output_dir = request.config.getoption("--output-root") - output_dir = os.path.abspath(output_dir + "-" + fxt_commit_hash) - output_dir = output_dir + "/" + datetime.now().strftime("%Y%m%d_%H%M%S") - dry_run = request.config.getoption("--dry-run") - - def build_config( - tag: str, + def _build_config( + self, model_id: str, - datasets: List[str], - num_epoch: int, - num_repeat: int, - track_resources: bool = False, - params: str = "", + train_params: dict = {}, + tags: dict = {}, ) -> dict: + all_train_params = self.train_params.copy() + all_train_params.update(train_params) + all_tags = self.tags.copy() + all_tags.update(tags) + cfg = {} - cfg["output_path"] = output_dir + cfg["tags"] = all_tags # metadata + cfg["output_path"] = os.path.abspath(self.output_root) cfg["constants"] = { - "dataroot": data_root, + "dataroot": os.path.abspath(self.data_root), } cfg["variables"] = { "model": [model_id], - "data": datasets, + "data": self.datasets, + **{k: [v] for k, v in all_tags.items()}, # To be shown in result file } - cfg["repeat"] = num_repeat + cfg["repeat"] = self.num_repeat cfg["command"] = [] - if num_epoch > 0: - params = params + f" --learning_parameters.num_iters {num_epoch}" resource_param = "" - if track_resources: - resource_param = " --track-resource-usage all" + if self.track_resources: + resource_param = "--track-resource-usage all" + if self.num_epoch > 0: + all_train_params["learning_parameters.num_iters"] = self.num_epoch + params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()]) cfg["command"].append( "otx train ${model}" " --train-data-roots ${dataroot}/${data}" " --val-data-roots ${dataroot}/${data}" " --deterministic" - f"{resource_param}" - f" params {params}" + f" {resource_param}" + f" params {params_str}" ) cfg["command"].append( "otx eval" " --test-data-roots ${dataroot}/${data}" ) - if eval_upto == "train": + if self.eval_upto == "train": return cfg cfg["command"].append( @@ -168,7 +228,7 @@ def build_config( "otx eval" " --test-data-roots ${dataroot}/${data}" ) - if eval_upto == "export": + if self.eval_upto == "export": return cfg cfg["command"].append( @@ -179,35 +239,3 @@ def build_config( " --test-data-roots ${dataroot}/${data}" ) return cfg - - def build_command( - tag: str, - model_id: str, - datasets: List[str], - num_epoch: int, - num_repeat: int, - track_resources: bool = False, - params: str = "", - ) -> List[str]: - cfg = build_config(tag, model_id, datasets, num_epoch, num_repeat, track_resources, params) - cfg_path = tmp_path_factory.mktemp("exp")/"cfg.yaml" - print(cfg_path) - with open(cfg_path, "w") as cfg_file: - yaml.dump(cfg, cfg_file, indent=2,) - cmd = [ - "python", - "tools/experiment.py", - "-f", - cfg_path, - ] - if dry_run: - cmd.append("-d") - - return cmd - - return build_command - - -class OTXBenchmark: - def __init__(self): - pass diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index d350d18e9c1..78861d5a05d 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -17,6 +17,9 @@ class TestPerfSingleLabelClassification: BENCHMARK_CONFIGS = { "small": { + "tags": { + "task": "single-label-classification", + }, "datasets": [ "classification/single_label/multiclass_CUB_small/1", "classification/single_label/multiclass_CUB_small/2", @@ -25,12 +28,18 @@ class TestPerfSingleLabelClassification: "num_repeat": 3, }, "medium": { + "tags": { + "task": "single-label-classification", + }, "datasets": [ "classification/single_label/multiclass_CUB_medium", ], "num_repeat": 3, }, "large": { + "tags": { + "task": "single-label-classification", + }, "datasets": [ "classification/single_label/multiclass_food101_large", ], @@ -39,38 +48,28 @@ class TestPerfSingleLabelClassification: } @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) - @pytest.mark.parametrize("fxt_benchmark_config", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_accuarcy(self, fxt_model_id, fxt_benchmark_config, fxt_build_command): + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuarcy(self, fxt_model_id, fxt_benchmark): """Benchmark accruacy metrics.""" - data_size, datasets, num_epoch, num_repeat = fxt_benchmark_config - tag = f"singlelabel-classification-accuracy-{data_size}" - command = fxt_build_command( - tag, - fxt_model_id, - datasets, - num_epoch, - num_repeat, + command = fxt_benchmark.build_command( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, ) check_run(command) @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) - @pytest.mark.parametrize("fxt_benchmark_config", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_speed(self, fxt_model_id, fxt_benchmark_config, fxt_build_command): + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id, fxt_benchmark): """Benchmark train time per iter / infer time per image.""" - data_size, datasets, num_epoch, num_repeat = fxt_benchmark_config # Override default iteration setting, in case there's no user input # "--data-size large -k speed" is recommended. - if num_epoch == 0: - num_epoch = 2 - if num_repeat == 0: - num_repeat = 1 - tag = f"singlelabel-classification-speed-{data_size}" - command = fxt_build_command( - tag, - fxt_model_id, - datasets, - num_epoch, - num_repeat, - track_resources=True, # Measure CPU/GPU usages + if fxt_benchmark.num_epoch == 0: + fxt_benchmark.num_epoch = 2 + if fxt_benchmark.num_repeat == 0: + fxt_benchmark.num_repeat = 1 + fxt_benchmark.track_resources = True + command = fxt_benchmark.build_command( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, ) check_run(command) From 3996eb376d79eec2622468b2eb0e21060c4672d7 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Tue, 19 Dec 2023 17:59:09 +0900 Subject: [PATCH 07/25] Automate speed test setting --- tests/perf/conftest.py | 5 ++++- tests/perf/test_classification.py | 6 ------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index d6f076f722a..6495ba32fb9 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -104,6 +104,9 @@ def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str): num_epoch_override: int = int(request.config.getoption("--num-epoch")) if num_epoch_override > 0: # 0: use default cfg["num_epoch"] = num_epoch_override + if "test_speed" in request.node.name: + if cfg.get("num_epoch", 0) == 0: # No user options + cfg["num_epoch"] = 2 num_repeat_override: int = int(request.config.getoption("--num-repeat")) if num_repeat_override > 0: # 0: use default @@ -159,7 +162,7 @@ def build_command( train_params: dict = {}, tags: dict = {}, ) -> List[str]: - cfg = self._build_config(model_id, tags, train_params) + cfg = self._build_config(model_id, train_params, tags) cfg_dir = Path(self.output_root) cfg_dir.mkdir(parents=True, exist_ok=True) cfg_path = cfg_dir / "cfg.yaml" diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 78861d5a05d..88ca0767cd1 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -61,12 +61,6 @@ def test_accuarcy(self, fxt_model_id, fxt_benchmark): @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) def test_speed(self, fxt_model_id, fxt_benchmark): """Benchmark train time per iter / infer time per image.""" - # Override default iteration setting, in case there's no user input - # "--data-size large -k speed" is recommended. - if fxt_benchmark.num_epoch == 0: - fxt_benchmark.num_epoch = 2 - if fxt_benchmark.num_repeat == 0: - fxt_benchmark.num_repeat = 1 fxt_benchmark.track_resources = True command = fxt_benchmark.build_command( model_id=fxt_model_id, From 9d0e831eb06cdc588481498b6f27aab6a9d6f8ed Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Wed, 20 Dec 2023 09:50:22 +0900 Subject: [PATCH 08/25] Refacor OTXBenchmark --- tests/perf/benchmark.py | 127 ++++++++++++++++++++++++++++++ tests/perf/conftest.py | 120 +--------------------------- tests/perf/test_classification.py | 12 ++- 3 files changed, 136 insertions(+), 123 deletions(-) create mode 100644 tests/perf/benchmark.py diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py new file mode 100644 index 00000000000..b7326064d85 --- /dev/null +++ b/tests/perf/benchmark.py @@ -0,0 +1,127 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import yaml +from pathlib import Path +from typing import List + +from tests.test_suite.run_test_command import check_run + + +class OTXBenchmark: + def __init__( + self, + datasets: List[str], + data_root: str = "data", + num_epoch: int = 0, + num_repeat: int = 0, + train_params: dict = {}, + track_resources: bool = False, + eval_upto: str = "train", + output_root: str = "otx-benchmark", + dry_run: bool = False, + tags: dict = {}, + ): + self.datasets = datasets + self.data_root = data_root + self.num_epoch = num_epoch + self.num_repeat = num_repeat + self.train_params = train_params + self.track_resources = track_resources + self.eval_upto = eval_upto + self.output_root = output_root + self.dry_run = dry_run + self.tags = tags + + def run( + self, + model_id: str, + train_params: dict = {}, + tags: dict = {}, + ) -> List[str]: + # Build config file + cfg = self._build_config(model_id, train_params, tags) + cfg_dir = Path(self.output_root) + cfg_dir.mkdir(parents=True, exist_ok=True) + cfg_path = cfg_dir / "cfg.yaml" + with open(cfg_path, "w") as cfg_file: + yaml.dump(cfg, cfg_file, indent=2,) + cmd = [ + "python", + "tools/experiment.py", + "-f", + cfg_path, + ] + if self.dry_run: + cmd.append("-d") + # Run benchmark + check_run(cmd) + # Load result + result = None + return result + + def _build_config( + self, + model_id: str, + train_params: dict = {}, + tags: dict = {}, + ) -> dict: + all_train_params = self.train_params.copy() + all_train_params.update(train_params) + all_tags = self.tags.copy() + all_tags.update(tags) + + cfg = {} + cfg["tags"] = all_tags # metadata + cfg["output_path"] = os.path.abspath(self.output_root) + cfg["constants"] = { + "dataroot": os.path.abspath(self.data_root), + } + cfg["variables"] = { + "model": [model_id], + "data": self.datasets, + **{k: [v] for k, v in all_tags.items()}, # To be shown in result file + } + cfg["repeat"] = self.num_repeat + cfg["command"] = [] + resource_param = "" + if self.track_resources: + resource_param = "--track-resource-usage all" + if self.num_epoch > 0: + all_train_params["learning_parameters.num_iters"] = self.num_epoch + params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()]) + cfg["command"].append( + "otx train ${model}" + " --train-data-roots ${dataroot}/${data}" + " --val-data-roots ${dataroot}/${data}" + " --deterministic" + f" {resource_param}" + f" params {params_str}" + ) + cfg["command"].append( + "otx eval" + " --test-data-roots ${dataroot}/${data}" + ) + if self.eval_upto == "train": + return cfg + + cfg["command"].append( + "otx export" + ) + cfg["command"].append( + "otx eval" + " --test-data-roots ${dataroot}/${data}" + ) + if self.eval_upto == "export": + return cfg + + cfg["command"].append( + "otx optimize" + ) + cfg["command"].append( + "otx eval" + " --test-data-roots ${dataroot}/${data}" + ) + return cfg diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 6495ba32fb9..00c2f6c0a1c 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -11,6 +11,7 @@ from datetime import datetime from otx.api.entities.model_template import ModelTemplate, ModelCategory +from .benchmark import OTXBenchmark def pytest_addoption(parser): @@ -72,13 +73,13 @@ def pytest_addoption(parser): @pytest.fixture -def fxt_commit_hash(): +def fxt_commit_hash() -> str: """Short commit hash.""" return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() @pytest.fixture -def fxt_model_id(request: pytest.FixtureRequest): +def fxt_model_id(request: pytest.FixtureRequest) -> str: """Skip by model category.""" model_type: str = request.config.getoption("--model-type") model_template: ModelTemplate = request.param @@ -89,7 +90,7 @@ def fxt_model_id(request: pytest.FixtureRequest): @pytest.fixture -def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str): +def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str) -> OTXBenchmark: """Configure benchmark.""" # Skip by dataset size data_size_option: str = request.config.getoption("--data-size") @@ -129,116 +130,3 @@ def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str): ) return benchmark - - -class OTXBenchmark: - def __init__( - self, - datasets: List[str], - data_root: str = "data", - num_epoch: int = 0, - num_repeat: int = 0, - train_params: dict = {}, - track_resources: bool = False, - eval_upto: str = "train", - output_root: str = "otx-benchmark", - dry_run: bool = False, - tags: dict = {}, - ): - self.datasets = datasets - self.data_root = data_root - self.num_epoch = num_epoch - self.num_repeat = num_repeat - self.train_params = train_params - self.track_resources = track_resources - self.eval_upto = eval_upto - self.output_root = output_root - self.dry_run = dry_run - self.tags = tags - - def build_command( - self, - model_id: str, - train_params: dict = {}, - tags: dict = {}, - ) -> List[str]: - cfg = self._build_config(model_id, train_params, tags) - cfg_dir = Path(self.output_root) - cfg_dir.mkdir(parents=True, exist_ok=True) - cfg_path = cfg_dir / "cfg.yaml" - print(cfg_path) - with open(cfg_path, "w") as cfg_file: - yaml.dump(cfg, cfg_file, indent=2,) - cmd = [ - "python", - "tools/experiment.py", - "-f", - cfg_path, - ] - if self.dry_run: - cmd.append("-d") - return cmd - - def _build_config( - self, - model_id: str, - train_params: dict = {}, - tags: dict = {}, - ) -> dict: - all_train_params = self.train_params.copy() - all_train_params.update(train_params) - all_tags = self.tags.copy() - all_tags.update(tags) - - cfg = {} - cfg["tags"] = all_tags # metadata - cfg["output_path"] = os.path.abspath(self.output_root) - cfg["constants"] = { - "dataroot": os.path.abspath(self.data_root), - } - cfg["variables"] = { - "model": [model_id], - "data": self.datasets, - **{k: [v] for k, v in all_tags.items()}, # To be shown in result file - } - cfg["repeat"] = self.num_repeat - cfg["command"] = [] - resource_param = "" - if self.track_resources: - resource_param = "--track-resource-usage all" - if self.num_epoch > 0: - all_train_params["learning_parameters.num_iters"] = self.num_epoch - params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()]) - cfg["command"].append( - "otx train ${model}" - " --train-data-roots ${dataroot}/${data}" - " --val-data-roots ${dataroot}/${data}" - " --deterministic" - f" {resource_param}" - f" params {params_str}" - ) - cfg["command"].append( - "otx eval" - " --test-data-roots ${dataroot}/${data}" - ) - if self.eval_upto == "train": - return cfg - - cfg["command"].append( - "otx export" - ) - cfg["command"].append( - "otx eval" - " --test-data-roots ${dataroot}/${data}" - ) - if self.eval_upto == "export": - return cfg - - cfg["command"].append( - "otx optimize" - ) - cfg["command"].append( - "otx eval" - " --test-data-roots ${dataroot}/${data}" - ) - return cfg diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 88ca0767cd1..4bc5ef6d4a8 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -7,7 +7,7 @@ import pytest from otx.cli.registry import Registry -from tests.test_suite.run_test_command import check_run +from .benchmark import OTXBenchmark MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates @@ -49,21 +49,19 @@ class TestPerfSingleLabelClassification: @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_accuarcy(self, fxt_model_id, fxt_benchmark): + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): """Benchmark accruacy metrics.""" - command = fxt_benchmark.build_command( + result = fxt_benchmark.run( model_id=fxt_model_id, tags={"benchmark": "accuracy"}, ) - check_run(command) @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) - def test_speed(self, fxt_model_id, fxt_benchmark): + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): """Benchmark train time per iter / infer time per image.""" fxt_benchmark.track_resources = True - command = fxt_benchmark.build_command( + result = fxt_benchmark.run( model_id=fxt_model_id, tags={"benchmark": "speed"}, ) - check_run(command) From 1abbca828bab3fde39e921dd5088b8224afb55ca Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Wed, 20 Dec 2023 10:43:50 +0900 Subject: [PATCH 09/25] Add API doc for OTXBenchmark --- tests/perf/benchmark.py | 56 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index b7326064d85..14f055cc80e 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -3,6 +3,7 @@ import os +import pandas as pd import yaml from pathlib import Path from typing import List @@ -11,12 +12,38 @@ class OTXBenchmark: + """Benchmark runner based on tools/experiment.py in OTX1.x. + + Example: + >>> bm = OTXBenchmark(['random_sample1', 'random_sample'], data_root='./data/coco') + >>> atss_result = bm.run('MobileNetV2-ATSS') + >>> yolox_result = bm.run('YOLOX-TINY') + + Args: + datasets (List[str]): Paths to datasets relative to the data_root. + Intended for, but not restricted to different sampling based on same dataset. + data_root (str): Path to the root of dataset directories. Defaults to './data'. + num_epoch (int): Overrides the per-model default number of epoch settings. + Defaults to 0, which means no overriding. + num_repeat (int): Number for trials with different random seed, which would be set + as range(0, num_repeat). Defaults to 1. + train_params (dict): Additional training parameters. + e.x) {'learning_parameters.num_iters': 2}. Defaults to {}. + track_resources (bool): Whether to track CPU & GPU usage metrics. Defaults to False. + eval_upto (str): The last serial operation to evaluate. Choose on of ('train', 'export', 'optimize'). + Operations include the preceeding ones. + e.x) Eval up to 'optimize': train -> eval -> export -> eval -> optimize -> eval + Default to 'train'. + output_root (str): Output path for logs and results. Defaults to './otx-benchmark'. + dry_run (bool): Whether to just print the OTX command without execution. Defaults to False. + tags (dict): Key-values pair metadata for the experiment. Defaults to {}. + """ def __init__( self, datasets: List[str], data_root: str = "data", num_epoch: int = 0, - num_repeat: int = 0, + num_repeat: int = 1, train_params: dict = {}, track_resources: bool = False, eval_upto: str = "train", @@ -40,7 +67,18 @@ def run( model_id: str, train_params: dict = {}, tags: dict = {}, - ) -> List[str]: + ) -> pd.DataFrame + """Run benchmark and return the result. + + Args: + model_id (str): Target model identifier + train_params (dict): Overrides global benchmark train params + tags (dict): Overrides global benchmark tags + + Retruns: + pd.DataFrame: Table with benchmark metrics + """ + # Build config file cfg = self._build_config(model_id, train_params, tags) cfg_dir = Path(self.output_root) @@ -59,9 +97,21 @@ def run( # Run benchmark check_run(cmd) # Load result - result = None + result = self.load_result() return result + def load_result(self, result_path: str = None) -> pd.DataFrame: + """Load result as pd.DataFrame format. + + Args: + result_path (str): Result directory or speicific file. + Defaults to None to search the benchmark output root. + + Retruns: + pd.DataFrame: Table with benchmark metrics + """ + return None + def _build_config( self, model_id: str, From 752928b13d2e67db44e232e896c0bf25a8c9e8e2 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Wed, 20 Dec 2023 11:23:52 +0900 Subject: [PATCH 10/25] Add csv loading --- tests/perf/benchmark.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 14f055cc80e..ecbc1c51978 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -6,7 +6,7 @@ import pandas as pd import yaml from pathlib import Path -from typing import List +from typing import List, Optional from tests.test_suite.run_test_command import check_run @@ -67,7 +67,7 @@ def run( model_id: str, train_params: dict = {}, tags: dict = {}, - ) -> pd.DataFrame + ) -> pd.DataFrame: """Run benchmark and return the result. Args: @@ -100,7 +100,7 @@ def run( result = self.load_result() return result - def load_result(self, result_path: str = None) -> pd.DataFrame: + def load_result(self, result_path: Optional[str] = None) -> pd.DataFrame: """Load result as pd.DataFrame format. Args: @@ -110,7 +110,11 @@ def load_result(self, result_path: str = None) -> pd.DataFrame: Retruns: pd.DataFrame: Table with benchmark metrics """ - return None + if result_path is None: + csv_file_path = Path(self.output_root) / "exp_summary.csv" + elif os.path.isdir(result_path): + csv_file_path = Path(result_path) / "exp_summary.csv" + return pd.read_csv(csv_file_path) def _build_config( self, From 046a88291ce1632482e17691f3c327ac352498cd Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Wed, 20 Dec 2023 13:44:45 +0900 Subject: [PATCH 11/25] Add tags to benchmark result --- tests/perf/benchmark.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index ecbc1c51978..4cfff421d2d 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -110,11 +110,22 @@ def load_result(self, result_path: Optional[str] = None) -> pd.DataFrame: Retruns: pd.DataFrame: Table with benchmark metrics """ + # Load csv data if result_path is None: csv_file_path = Path(self.output_root) / "exp_summary.csv" elif os.path.isdir(result_path): csv_file_path = Path(result_path) / "exp_summary.csv" - return pd.read_csv(csv_file_path) + result = pd.read_csv(csv_file_path) + + # Append metadata if any + cfg_file_path: Path = csv_file_path.parent / "cfg.yaml" + if cfg_file_path.exists(): + with cfg_file_path.open("r") as cfg_file: + tags = yaml.safe_load(cfg_file).get("tags", {}) + for k, v in tags.items(): + result[k] = v + + return result def _build_config( self, From c5ff73e77b95d99dbf683c0a6c2cfe0e4243cf5a Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Wed, 20 Dec 2023 18:33:12 +0900 Subject: [PATCH 12/25] Add benchmark summary fixture --- tests/perf/benchmark.py | 53 +++++++++++++++++++++++------------------ tests/perf/conftest.py | 37 ++++++++++++++++++---------- 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 4cfff421d2d..7bb3b41087e 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -3,6 +3,7 @@ import os +import glob import pandas as pd import yaml from pathlib import Path @@ -34,7 +35,7 @@ class OTXBenchmark: Operations include the preceeding ones. e.x) Eval up to 'optimize': train -> eval -> export -> eval -> optimize -> eval Default to 'train'. - output_root (str): Output path for logs and results. Defaults to './otx-benchmark'. + output_root (str): Output root dirctory for logs and results. Defaults to './otx-benchmark'. dry_run (bool): Whether to just print the OTX command without execution. Defaults to False. tags (dict): Key-values pair metadata for the experiment. Defaults to {}. """ @@ -81,7 +82,7 @@ def run( # Build config file cfg = self._build_config(model_id, train_params, tags) - cfg_dir = Path(self.output_root) + cfg_dir = Path(cfg["output_path"]) cfg_dir.mkdir(parents=True, exist_ok=True) cfg_path = cfg_dir / "cfg.yaml" with open(cfg_path, "w") as cfg_file: @@ -97,35 +98,39 @@ def run( # Run benchmark check_run(cmd) # Load result - result = self.load_result() + result = self.load_result(cfg_dir) return result - def load_result(self, result_path: Optional[str] = None) -> pd.DataFrame: - """Load result as pd.DataFrame format. + @staticmethod + def load_result(result_path: str) -> pd.DataFrame: + """Load benchmark results recursively and merge as pd.DataFrame. Args: result_path (str): Result directory or speicific file. - Defaults to None to search the benchmark output root. Retruns: - pd.DataFrame: Table with benchmark metrics + pd.DataFrame: Table with benchmark metrics & options """ + # Search csv files + if os.path.isdir(result_path): + csv_file_paths = glob.glob(f"{result_path}/**/exp_summary.csv", recursive=True) + else: + csv_file_paths = [result_path] + results = [] # Load csv data - if result_path is None: - csv_file_path = Path(self.output_root) / "exp_summary.csv" - elif os.path.isdir(result_path): - csv_file_path = Path(result_path) / "exp_summary.csv" - result = pd.read_csv(csv_file_path) - - # Append metadata if any - cfg_file_path: Path = csv_file_path.parent / "cfg.yaml" - if cfg_file_path.exists(): - with cfg_file_path.open("r") as cfg_file: - tags = yaml.safe_load(cfg_file).get("tags", {}) - for k, v in tags.items(): - result[k] = v - - return result + for csv_file_path in csv_file_paths: + result = pd.read_csv(csv_file_path) + # Append metadata if any + cfg_file_path = Path(csv_file_path).parent / "cfg.yaml" + if cfg_file_path.exists(): + with cfg_file_path.open("r") as cfg_file: + tags = yaml.safe_load(cfg_file).get("tags", {}) + for k, v in tags.items(): + result[k] = v + results.append(result) + if len(results) > 0: + results = pd.concat(results, ignore_index=True) + return results def _build_config( self, @@ -140,7 +145,9 @@ def _build_config( cfg = {} cfg["tags"] = all_tags # metadata - cfg["output_path"] = os.path.abspath(self.output_root) + cfg["output_path"] = os.path.abspath( + Path(self.output_root) / "-".join(list(all_tags.values()) + [model_id]) + ) cfg["constants"] = { "dataroot": os.path.abspath(self.data_root), } diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 00c2f6c0a1c..a505269c221 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -72,10 +72,13 @@ def pytest_addoption(parser): ) -@pytest.fixture -def fxt_commit_hash() -> str: - """Short commit hash.""" - return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() +@pytest.fixture(scope="session") +def fxt_output_root(request: pytest.FixtureRequest) -> Path: + """Output root + date + short commit hash.""" + output_root = request.config.getoption("--output-root") + data_str = datetime.now().strftime("%Y%m%d-%H%M%S") + commit_str = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() + return Path(output_root) / (data_str + "-" + commit_str) @pytest.fixture @@ -90,7 +93,7 @@ def fxt_model_id(request: pytest.FixtureRequest) -> str: @pytest.fixture -def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str) -> OTXBenchmark: +def fxt_benchmark(request: pytest.FixtureRequest, fxt_output_root: Path) -> OTXBenchmark: """Configure benchmark.""" # Skip by dataset size data_size_option: str = request.config.getoption("--data-size") @@ -102,6 +105,10 @@ def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str) -> OTXBe # Options cfg: dict = request.param[1].copy() + tags = cfg.get("tags", {}) + tags["data_size"] = data_size + cfg["tags"] = tags + num_epoch_override: int = int(request.config.getoption("--num-epoch")) if num_epoch_override > 0: # 0: use default cfg["num_epoch"] = num_epoch_override @@ -115,18 +122,24 @@ def fxt_benchmark(request: pytest.FixtureRequest, fxt_commit_hash: str) -> OTXBe cfg["eval_upto"] = request.config.getoption("--eval-upto") cfg["data_root"] = request.config.getoption("--data-root") - output_root = request.config.getoption("--output-root") - output_dir = fxt_commit_hash + "-" + datetime.now().strftime("%Y%m%d_%H%M%S") - cfg["output_root"] = str(Path(output_root) / output_dir) + cfg["output_root"] = str(fxt_output_root) cfg["dry_run"] = request.config.getoption("--dry-run") - tags = cfg.get("tags", {}) - tags["data_size"] = data_size - cfg["tags"] = tags - # Create benchmark benchmark = OTXBenchmark( **cfg, ) return benchmark + + +@pytest.fixture(scope="session", autouse=True) +def fxt_benchmark_summary(fxt_output_root: Path): + """Summarize all results at the end of test session.""" + yield + all_results = OTXBenchmark.load_result(fxt_output_root) + print("="*20, "[Benchmark summary]") + print(all_results) + output_path = fxt_output_root / "benchmark-summary.csv" + all_results.to_csv(output_path, index=False) + print(f" -> Saved to {output_path}.") From ed553dfa25e086026a151b9ee3430f5d2e995d01 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 10:10:56 +0900 Subject: [PATCH 13/25] Add multi/h-label tests --- tests/perf/benchmark.py | 10 ++- tests/perf/conftest.py | 11 ++-- tests/perf/test_classification.py | 106 ++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 8 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 7bb3b41087e..ce8c720d7ab 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -1,3 +1,5 @@ +"""OTX Benchmark based on tools/experiment.py.""" + # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -102,7 +104,7 @@ def run( return result @staticmethod - def load_result(result_path: str) -> pd.DataFrame: + def load_result(result_path: str) -> pd.DataFrame | None: """Load benchmark results recursively and merge as pd.DataFrame. Args: @@ -129,8 +131,9 @@ def load_result(result_path: str) -> pd.DataFrame: result[k] = v results.append(result) if len(results) > 0: - results = pd.concat(results, ignore_index=True) - return results + return pd.concat(results, ignore_index=True) + else: + return None def _build_config( self, @@ -138,6 +141,7 @@ def _build_config( train_params: dict = {}, tags: dict = {}, ) -> dict: + """Build config for tools/expeirment.py.""" all_train_params = self.train_params.copy() all_train_params.update(train_params) all_tags = self.tags.copy() diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index a505269c221..88bd53b1d67 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -138,8 +138,9 @@ def fxt_benchmark_summary(fxt_output_root: Path): """Summarize all results at the end of test session.""" yield all_results = OTXBenchmark.load_result(fxt_output_root) - print("="*20, "[Benchmark summary]") - print(all_results) - output_path = fxt_output_root / "benchmark-summary.csv" - all_results.to_csv(output_path, index=False) - print(f" -> Saved to {output_path}.") + if all_results: + print("="*20, "[Benchmark summary]") + print(all_results) + output_path = fxt_output_root / "benchmark-summary.csv" + all_results.to_csv(output_path, index=False) + print(f" -> Saved to {output_path}.") diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index 4bc5ef6d4a8..ba2ccc49e40 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -65,3 +65,109 @@ def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): model_id=fxt_model_id, tags={"benchmark": "speed"}, ) + + +class TestPerfMultiLabelClassification: + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "multi-label-classification", + }, + "datasets": [ + "classification/multi_label/multilabel_CUB_small/1", + "classification/multi_label/multilabel_CUB_small/2", + "classification/multi_label/multilabel_CUB_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "multi-label-classification", + }, + "datasets": [ + "classification/multi_label/multilabel_CUB_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "multi-label-classification", + }, + "datasets": [ + "classification/multi_label/multilabel_food101_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfHierarchicalLabelClassification: + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "h-label-classification", + }, + "datasets": [ + "classification/h_label/h_label_CUB_small/1", + "classification/h_label/h_label_CUB_small/2", + "classification/h_label/h_label_CUB_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "h-label-classification", + }, + "datasets": [ + "classification/h_label/h_label_CUB_medium", + ], + "num_repeat": 3, + }, + # TODO: Add large dataset + # "large": { + # "tags": { + # "task": "h-label-classification", + # }, + # "datasets": [ + # ], + # "num_repeat": 1, + # }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) From d6609f24b77cb5c9deb64e566bacd6106c7d4293 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 10:16:01 +0900 Subject: [PATCH 14/25] Fix pre-commit --- tests/perf/benchmark.py | 30 ++++++++---------------------- tests/perf/conftest.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index ce8c720d7ab..169b2b01072 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -41,6 +41,7 @@ class OTXBenchmark: dry_run (bool): Whether to just print the OTX command without execution. Defaults to False. tags (dict): Key-values pair metadata for the experiment. Defaults to {}. """ + def __init__( self, datasets: List[str], @@ -88,7 +89,7 @@ def run( cfg_dir.mkdir(parents=True, exist_ok=True) cfg_path = cfg_dir / "cfg.yaml" with open(cfg_path, "w") as cfg_file: - yaml.dump(cfg, cfg_file, indent=2,) + yaml.dump(cfg, cfg_file, indent=2) cmd = [ "python", "tools/experiment.py", @@ -149,9 +150,7 @@ def _build_config( cfg = {} cfg["tags"] = all_tags # metadata - cfg["output_path"] = os.path.abspath( - Path(self.output_root) / "-".join(list(all_tags.values()) + [model_id]) - ) + cfg["output_path"] = os.path.abspath(Path(self.output_root) / "-".join(list(all_tags.values()) + [model_id])) cfg["constants"] = { "dataroot": os.path.abspath(self.data_root), } @@ -176,28 +175,15 @@ def _build_config( f" {resource_param}" f" params {params_str}" ) - cfg["command"].append( - "otx eval" - " --test-data-roots ${dataroot}/${data}" - ) + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") if self.eval_upto == "train": return cfg - cfg["command"].append( - "otx export" - ) - cfg["command"].append( - "otx eval" - " --test-data-roots ${dataroot}/${data}" - ) + cfg["command"].append("otx export") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") if self.eval_upto == "export": return cfg - cfg["command"].append( - "otx optimize" - ) - cfg["command"].append( - "otx eval" - " --test-data-roots ${dataroot}/${data}" - ) + cfg["command"].append("otx optimize") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") return cfg diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 88bd53b1d67..5a16751842d 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -36,39 +36,39 @@ def pytest_addoption(parser): default=0, help="Overrides default per-data-size number of repeat setting. " "Random seeds are set to 0 ~ num_repeat-1 for the trials. " - "Defaults to 0 (small=3, medium=3, large=1)." + "Defaults to 0 (small=3, medium=3, large=1).", ) parser.addoption( "--num-epoch", action="store", default=0, help="Overrides default per-model number of epoch setting. " - "Defaults to 0 (per-model epoch & early-stopping)." + "Defaults to 0 (per-model epoch & early-stopping).", ) parser.addoption( "--eval-upto", action="store", default="all", choices=("train", "export", "optimize"), - help="Choose train|export|optimize. Defaults to train." + help="Choose train|export|optimize. Defaults to train.", ) parser.addoption( "--data-root", action="store", default="data", - help="Dataset root directory." + help="Dataset root directory.", ) parser.addoption( "--output-root", action="store", default="exp/perf", - help="Output root directory." + help="Output root directory.", ) parser.addoption( "--dry-run", action="store_true", default=False, - help="Print OTX commands without execution." + help="Print OTX commands without execution.", ) @@ -77,7 +77,7 @@ def fxt_output_root(request: pytest.FixtureRequest) -> Path: """Output root + date + short commit hash.""" output_root = request.config.getoption("--output-root") data_str = datetime.now().strftime("%Y%m%d-%H%M%S") - commit_str = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip() + commit_str = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip() return Path(output_root) / (data_str + "-" + commit_str) @@ -139,7 +139,7 @@ def fxt_benchmark_summary(fxt_output_root: Path): yield all_results = OTXBenchmark.load_result(fxt_output_root) if all_results: - print("="*20, "[Benchmark summary]") + print("=" * 20, "[Benchmark summary]") print(all_results) output_path = fxt_output_root / "benchmark-summary.csv" all_results.to_csv(output_path, index=False) From 519de9d5497b9fa43d35bd893728ceee379bf8fe Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 10:54:20 +0900 Subject: [PATCH 15/25] Add detection tests --- tests/perf/benchmark.py | 3 +- tests/perf/test_classification.py | 2 +- tests/perf/test_detection.py | 68 +++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 tests/perf/test_detection.py diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 169b2b01072..853e7ee33eb 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -72,7 +72,7 @@ def run( train_params: dict = {}, tags: dict = {}, ) -> pd.DataFrame: - """Run benchmark and return the result. + """Run configured benchmark with given model and return the result. Args: model_id (str): Target model identifier @@ -157,7 +157,6 @@ def _build_config( cfg["variables"] = { "model": [model_id], "data": self.datasets, - **{k: [v] for k, v in all_tags.items()}, # To be shown in result file } cfg["repeat"] = self.num_repeat cfg["command"] = [] diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index ba2ccc49e40..eae5a982835 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -1,4 +1,4 @@ -"""OTX Classification Perfomance tests.""" +"""OTX Classification perfomance tests.""" # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/tests/perf/test_detection.py b/tests/perf/test_detection.py new file mode 100644 index 00000000000..45235f00ed2 --- /dev/null +++ b/tests/perf/test_detection.py @@ -0,0 +1,68 @@ +"""OTX Detection perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="DETECTION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfDetection: + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "detection", + }, + "datasets": [ + "detection/pothole_small/1", + "detection/pothole_small/2", + "detection/pothole_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "detection", + }, + "datasets": [ + "detection/pothole_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "detection", + }, + "datasets": [ + "detection/vitens_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + From e447a3c8229cf27b03f31bdd67b69057d2759e2f Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 13:46:56 +0900 Subject: [PATCH 16/25] Add instance segmentationt tests --- tests/perf/test_classification.py | 24 +++++--- tests/perf/test_detection.py | 3 +- tests/perf/test_instance_segmenatation.py | 69 +++++++++++++++++++++++ 3 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 tests/perf/test_instance_segmenatation.py diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py index eae5a982835..15a9b4dd133 100644 --- a/tests/perf/test_classification.py +++ b/tests/perf/test_classification.py @@ -15,10 +15,12 @@ class TestPerfSingleLabelClassification: + """Benchmark single-label classification.""" + BENCHMARK_CONFIGS = { "small": { "tags": { - "task": "single-label-classification", + "task": "single_label_classification", }, "datasets": [ "classification/single_label/multiclass_CUB_small/1", @@ -29,7 +31,7 @@ class TestPerfSingleLabelClassification: }, "medium": { "tags": { - "task": "single-label-classification", + "task": "single_label_classification", }, "datasets": [ "classification/single_label/multiclass_CUB_medium", @@ -38,7 +40,7 @@ class TestPerfSingleLabelClassification: }, "large": { "tags": { - "task": "single-label-classification", + "task": "single_label_classification", }, "datasets": [ "classification/single_label/multiclass_food101_large", @@ -68,10 +70,12 @@ def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): class TestPerfMultiLabelClassification: + """Benchmark multi-label classification.""" + BENCHMARK_CONFIGS = { "small": { "tags": { - "task": "multi-label-classification", + "task": "multi_label_classification", }, "datasets": [ "classification/multi_label/multilabel_CUB_small/1", @@ -82,7 +86,7 @@ class TestPerfMultiLabelClassification: }, "medium": { "tags": { - "task": "multi-label-classification", + "task": "multi_label_classification", }, "datasets": [ "classification/multi_label/multilabel_CUB_medium", @@ -91,7 +95,7 @@ class TestPerfMultiLabelClassification: }, "large": { "tags": { - "task": "multi-label-classification", + "task": "multi_label_classification", }, "datasets": [ "classification/multi_label/multilabel_food101_large", @@ -121,10 +125,12 @@ def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): class TestPerfHierarchicalLabelClassification: + """Benchmark hierarchcial-label classification.""" + BENCHMARK_CONFIGS = { "small": { "tags": { - "task": "h-label-classification", + "task": "hierarchical_label_classification", }, "datasets": [ "classification/h_label/h_label_CUB_small/1", @@ -135,7 +141,7 @@ class TestPerfHierarchicalLabelClassification: }, "medium": { "tags": { - "task": "h-label-classification", + "task": "hierarchical_label_classification", }, "datasets": [ "classification/h_label/h_label_CUB_medium", @@ -145,7 +151,7 @@ class TestPerfHierarchicalLabelClassification: # TODO: Add large dataset # "large": { # "tags": { - # "task": "h-label-classification", + # "task": "hierarchical_label_classification", # }, # "datasets": [ # ], diff --git a/tests/perf/test_detection.py b/tests/perf/test_detection.py index 45235f00ed2..81ed71c0bac 100644 --- a/tests/perf/test_detection.py +++ b/tests/perf/test_detection.py @@ -15,6 +15,8 @@ class TestPerfDetection: + """Benchmark basic object detection.""" + BENCHMARK_CONFIGS = { "small": { "tags": { @@ -65,4 +67,3 @@ def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): model_id=fxt_model_id, tags={"benchmark": "speed"}, ) - diff --git a/tests/perf/test_instance_segmenatation.py b/tests/perf/test_instance_segmenatation.py new file mode 100644 index 00000000000..1c649dc2e74 --- /dev/null +++ b/tests/perf/test_instance_segmenatation.py @@ -0,0 +1,69 @@ +"""OTX Instance Segmentation perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="INSTANCE_SEGMENTATION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfInstanceSegmentation: + """Benchmark basic instance segmentation.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "instance_seg/wgisd_small/1", + "instance_seg/wgisd_small/2", + "instance_seg/wgisd_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "instance_seg/coco_car_person_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "instance_seg/bdd_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) From 742e8cc489059f7930ba4ed42cceec0b4497ba41 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 14:06:01 +0900 Subject: [PATCH 17/25] Add tiling tests --- tests/perf/test_instance_segmenatation.py | 61 +++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/perf/test_instance_segmenatation.py b/tests/perf/test_instance_segmenatation.py index 1c649dc2e74..3257f690139 100644 --- a/tests/perf/test_instance_segmenatation.py +++ b/tests/perf/test_instance_segmenatation.py @@ -67,3 +67,64 @@ def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): model_id=fxt_model_id, tags={"benchmark": "speed"}, ) + + +class TestPerfTilingInstanceSegmentation: + """Benchmark tiling instance segmentation.""" + + TILING_PARAMS = { + "tiling_parameters.enable_tiling": 1, + } + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "tiling_instance_seg/vitens_aeromonas_small/1", + "tiling_instance_seg/vitens_aeromonas_small/2", + "tiling_instance_seg/vitens_aeromonas_small/3", + ], + "num_repeat": 3, + "train_params": TILING_PARAMS, + }, + "medium": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "tiling_instance_seg/vitens_aeromonas_medium", + ], + "num_repeat": 3, + "train_params": TILING_PARAMS, + }, + "large": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "tiling_instance_seg/bdd_large", + ], + "num_repeat": 1, + "train_params": TILING_PARAMS, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) From c37c1444a125d23ae70e76c05e0dc2e8e5601ad3 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 15:10:18 +0900 Subject: [PATCH 18/25] Add semantic segmenation tests --- tests/perf/benchmark.py | 2 +- tests/perf/test_instance_segmenatation.py | 6 +- tests/perf/test_semantic_segmentation.py | 69 +++++++++++++++++++++++ 3 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 tests/perf/test_semantic_segmentation.py diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 853e7ee33eb..36e0a39a0b4 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -18,7 +18,7 @@ class OTXBenchmark: """Benchmark runner based on tools/experiment.py in OTX1.x. Example: - >>> bm = OTXBenchmark(['random_sample1', 'random_sample'], data_root='./data/coco') + >>> bm = OTXBenchmark(['random_sample1', 'random_sample2'], data_root='./data/coco') >>> atss_result = bm.run('MobileNetV2-ATSS') >>> yolox_result = bm.run('YOLOX-TINY') diff --git a/tests/perf/test_instance_segmenatation.py b/tests/perf/test_instance_segmenatation.py index 3257f690139..6e4a1a9b275 100644 --- a/tests/perf/test_instance_segmenatation.py +++ b/tests/perf/test_instance_segmenatation.py @@ -78,7 +78,7 @@ class TestPerfTilingInstanceSegmentation: BENCHMARK_CONFIGS = { "small": { "tags": { - "task": "instance_segmentation", + "task": "tiling_instance_segmentation", }, "datasets": [ "tiling_instance_seg/vitens_aeromonas_small/1", @@ -90,7 +90,7 @@ class TestPerfTilingInstanceSegmentation: }, "medium": { "tags": { - "task": "instance_segmentation", + "task": "tiling_instance_segmentation", }, "datasets": [ "tiling_instance_seg/vitens_aeromonas_medium", @@ -100,7 +100,7 @@ class TestPerfTilingInstanceSegmentation: }, "large": { "tags": { - "task": "instance_segmentation", + "task": "tiling_instance_segmentation", }, "datasets": [ "tiling_instance_seg/bdd_large", diff --git a/tests/perf/test_semantic_segmentation.py b/tests/perf/test_semantic_segmentation.py new file mode 100644 index 00000000000..4ec28f6726c --- /dev/null +++ b/tests/perf/test_semantic_segmentation.py @@ -0,0 +1,69 @@ +"""OTX Semantic Segmentation perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="SEGMENTATION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfSemanticSegmentation: + """Benchmark basic semantic segmentation.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "semantic_segmentation", + }, + "datasets": [ + "semantic_seg/kvasir_small/1", + "semantic_seg/kvasir_small/2", + "semantic_seg/kvasir_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "semantic_segmentation", + }, + "datasets": [ + "semantic_seg/kvasir_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "semantic_segmentation", + }, + "datasets": [ + "semantic_seg/kvasir_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) From 233b18cdbd170276a10306908835b366b35d4b1d Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 16:27:01 +0900 Subject: [PATCH 19/25] Add anomaly test --- tests/perf/test_anomaly.py | 184 +++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 tests/perf/test_anomaly.py diff --git a/tests/perf/test_anomaly.py b/tests/perf/test_anomaly.py new file mode 100644 index 00000000000..db16f7172ea --- /dev/null +++ b/tests/perf/test_anomaly.py @@ -0,0 +1,184 @@ +"""OTX Anomaly perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +class TestPerfAnomalyClassification: + """Benchmark anomaly classification.""" + + MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ANOMALY_CLASSIFICATION").templates + MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "anomaly_classification", + }, + "datasets": [ + "anomaly/mvtec/bottle_small/1", + "anomaly/mvtec/bottle_small/2", + "anomaly/mvtec/bottle_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "anomaly_classification", + }, + "datasets": [ + "anomaly/mvtec/wood_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "anomaly_classification", + }, + "datasets": [ + "anomaly/mvtec/hazelnut_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfAnomalyDetection: + """Benchmark anomaly detection.""" + + MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ANOMALY_DETECTION").templates + MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "anomaly_detection", + }, + "datasets": [ + "anomaly/mvtec/bottle_small/1", + "anomaly/mvtec/bottle_small/2", + "anomaly/mvtec/bottle_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "anomaly_detection", + }, + "datasets": [ + "anomaly/mvtec/wood_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "anomaly_detection", + }, + "datasets": [ + "anomaly/mvtec/hazelnut_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfAnomalySegmentation: + """Benchmark anomaly segmentation.""" + + MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ANOMALY_SEGMENTATION").templates + MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "anomaly_segmentation", + }, + "datasets": [ + "anomaly/mvtec/bottle_small/1", + "anomaly/mvtec/bottle_small/2", + "anomaly/mvtec/bottle_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "anomaly_segmentation", + }, + "datasets": [ + "anomaly/mvtec/wood_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "anomaly_segmentation", + }, + "datasets": [ + "anomaly/mvtec/hazelnut_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) From 8db6c60afacf71f902c898261ff270ac6b337469 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Thu, 21 Dec 2023 17:52:32 +0900 Subject: [PATCH 20/25] Fix anomaly max_epochs setting --- tests/perf/benchmark.py | 12 +++++++++++- tests/perf/conftest.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 36e0a39a0b4..bba5e36d55d 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -164,7 +164,7 @@ def _build_config( if self.track_resources: resource_param = "--track-resource-usage all" if self.num_epoch > 0: - all_train_params["learning_parameters.num_iters"] = self.num_epoch + self._set_num_epoch(model_id, all_train_params, self.num_epoch) params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()]) cfg["command"].append( "otx train ${model}" @@ -186,3 +186,13 @@ def _build_config( cfg["command"].append("otx optimize") cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") return cfg + + @staticmethod + def _set_num_epoch(model_id:str, train_params: dict, num_epoch: int): + """Set model specific num_epoch parameter.""" + if "padim" in model_id: + return # No configurable parameter for num_epoch + elif "stfpm" in model_id: + train_params["learning_parameters.max_epochs"] = num_epoch + else: + train_params["learning_parameters.num_iters"] = num_epoch diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index 5a16751842d..b85d3e2b869 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -138,7 +138,7 @@ def fxt_benchmark_summary(fxt_output_root: Path): """Summarize all results at the end of test session.""" yield all_results = OTXBenchmark.load_result(fxt_output_root) - if all_results: + if all_results is not None: print("=" * 20, "[Benchmark summary]") print(all_results) output_path = fxt_output_root / "benchmark-summary.csv" From e6cd073fb522f84d935984f471f6227f0d72c44e Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 22 Dec 2023 09:54:59 +0900 Subject: [PATCH 21/25] Fix pre-commit --- tests/perf/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index bba5e36d55d..79178e63f1f 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -188,7 +188,7 @@ def _build_config( return cfg @staticmethod - def _set_num_epoch(model_id:str, train_params: dict, num_epoch: int): + def _set_num_epoch(model_id: str, train_params: dict, num_epoch: int): """Set model specific num_epoch parameter.""" if "padim" in model_id: return # No configurable parameter for num_epoch From 94bdc0036cdca34044d241247f5cbca67a8ca823 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 22 Dec 2023 15:08:59 +0900 Subject: [PATCH 22/25] Add subset_dir_name cfg for seg datasets --- tests/perf/benchmark.py | 12 +++++++----- tests/perf/test_semantic_segmentation.py | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 79178e63f1f..18c94c0536d 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -54,6 +54,7 @@ def __init__( output_root: str = "otx-benchmark", dry_run: bool = False, tags: dict = {}, + **kwargs, ): self.datasets = datasets self.data_root = data_root @@ -65,6 +66,7 @@ def __init__( self.output_root = output_root self.dry_run = dry_run self.tags = tags + self.subset_dir_names = kwargs.get("subset_dir_names", {"train": "", "val": "", "test": ""}) def run( self, @@ -168,23 +170,23 @@ def _build_config( params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()]) cfg["command"].append( "otx train ${model}" - " --train-data-roots ${dataroot}/${data}" - " --val-data-roots ${dataroot}/${data}" + " --train-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['train']}" + " --val-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['val']}" " --deterministic" f" {resource_param}" f" params {params_str}" ) - cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}") if self.eval_upto == "train": return cfg cfg["command"].append("otx export") - cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}") if self.eval_upto == "export": return cfg cfg["command"].append("otx optimize") - cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}") return cfg @staticmethod diff --git a/tests/perf/test_semantic_segmentation.py b/tests/perf/test_semantic_segmentation.py index 4ec28f6726c..a5ca4086f83 100644 --- a/tests/perf/test_semantic_segmentation.py +++ b/tests/perf/test_semantic_segmentation.py @@ -27,6 +27,7 @@ class TestPerfSemanticSegmentation: "semantic_seg/kvasir_small/2", "semantic_seg/kvasir_small/3", ], + "subset_dir_names": {"train": "train", "val": "val", "test": "test"}, "num_repeat": 3, }, "medium": { @@ -36,6 +37,7 @@ class TestPerfSemanticSegmentation: "datasets": [ "semantic_seg/kvasir_medium", ], + "subset_dir_names": {"train": "train", "val": "val", "test": "test"}, "num_repeat": 3, }, "large": { @@ -45,6 +47,7 @@ class TestPerfSemanticSegmentation: "datasets": [ "semantic_seg/kvasir_large", ], + "subset_dir_names": {"train": "train", "val": "val", "test": "test"}, "num_repeat": 1, }, } From fab92516888eb7a89042bcdb3a22eedd14c38a0f Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Fri, 22 Dec 2023 15:11:34 +0900 Subject: [PATCH 23/25] Update changelog.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68b075dca53..30554245175 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,12 @@ All notable changes to this project will be documented in this file. ### New features -- Add zero-shot visual prompting (https://github.com/openvinotoolkit/training_extensions/pull/2616) +- Add zero-shot visual prompting () ### Enhancements - Upgrade NNCF to 2.7 and OpenVINO to 2023.2 () +- Automate performance benchmark () ## \[v1.5.0\] From 91bfec13541e70258511473e29b8b56dfed4c3d1 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Tue, 26 Dec 2023 12:09:29 +0900 Subject: [PATCH 24/25] Reflect review comments --- tests/perf/__init__.py | 2 +- tests/perf/benchmark.py | 18 +++++++++--------- tests/perf/conftest.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py index 36a90a5e5f6..9984d0cb25b 100644 --- a/tests/perf/__init__.py +++ b/tests/perf/__init__.py @@ -1,4 +1,4 @@ """OTX Perfomance tests.""" -# Copyright (C) 2021-2022 Intel Corporation +# Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 18c94c0536d..2a2c18a8146 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -33,7 +33,7 @@ class OTXBenchmark: train_params (dict): Additional training parameters. e.x) {'learning_parameters.num_iters': 2}. Defaults to {}. track_resources (bool): Whether to track CPU & GPU usage metrics. Defaults to False. - eval_upto (str): The last serial operation to evaluate. Choose on of ('train', 'export', 'optimize'). + eval_upto (str): The last serial operation to evaluate. Choose one of ('train', 'export', 'optimize'). Operations include the preceeding ones. e.x) Eval up to 'optimize': train -> eval -> export -> eval -> optimize -> eval Default to 'train'. @@ -48,32 +48,32 @@ def __init__( data_root: str = "data", num_epoch: int = 0, num_repeat: int = 1, - train_params: dict = {}, + train_params: dict | None = None, track_resources: bool = False, eval_upto: str = "train", output_root: str = "otx-benchmark", dry_run: bool = False, - tags: dict = {}, - **kwargs, + tags: dict | None = None, + subset_dir_names: dict | None = None, ): self.datasets = datasets self.data_root = data_root self.num_epoch = num_epoch self.num_repeat = num_repeat - self.train_params = train_params + self.train_params = train_params or {} self.track_resources = track_resources self.eval_upto = eval_upto self.output_root = output_root self.dry_run = dry_run - self.tags = tags - self.subset_dir_names = kwargs.get("subset_dir_names", {"train": "", "val": "", "test": ""}) + self.tags = tags or {} + self.subset_dir_names = subset_dir_names or {"train": "", "val": "", "test": ""} def run( self, model_id: str, train_params: dict = {}, tags: dict = {}, - ) -> pd.DataFrame: + ) -> pd.DataFrame | None: """Run configured benchmark with given model and return the result. Args: @@ -82,7 +82,7 @@ def run( tags (dict): Overrides global benchmark tags Retruns: - pd.DataFrame: Table with benchmark metrics + pd.DataFrame | None: Table with benchmark metrics """ # Build config file diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py index b85d3e2b869..0d831d50dd1 100644 --- a/tests/perf/conftest.py +++ b/tests/perf/conftest.py @@ -48,7 +48,7 @@ def pytest_addoption(parser): parser.addoption( "--eval-upto", action="store", - default="all", + default="train", choices=("train", "export", "optimize"), help="Choose train|export|optimize. Defaults to train.", ) From 5e88a6664b8044e1e64a913d8bd6b254ab909387 Mon Sep 17 00:00:00 2001 From: Songki Choi Date: Tue, 26 Dec 2023 13:48:37 +0900 Subject: [PATCH 25/25] Refine doc string --- tests/perf/benchmark.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py index 2a2c18a8146..f39ed806731 100644 --- a/tests/perf/benchmark.py +++ b/tests/perf/benchmark.py @@ -30,7 +30,7 @@ class OTXBenchmark: Defaults to 0, which means no overriding. num_repeat (int): Number for trials with different random seed, which would be set as range(0, num_repeat). Defaults to 1. - train_params (dict): Additional training parameters. + train_params (dict, optional): Additional training parameters. e.x) {'learning_parameters.num_iters': 2}. Defaults to {}. track_resources (bool): Whether to track CPU & GPU usage metrics. Defaults to False. eval_upto (str): The last serial operation to evaluate. Choose one of ('train', 'export', 'optimize'). @@ -39,7 +39,9 @@ class OTXBenchmark: Default to 'train'. output_root (str): Output root dirctory for logs and results. Defaults to './otx-benchmark'. dry_run (bool): Whether to just print the OTX command without execution. Defaults to False. - tags (dict): Key-values pair metadata for the experiment. Defaults to {}. + tags (dict, optional): Key-values pair metadata for the experiment. + subset_dir_names (dict, optional): Specify dataset subset directory names, if any. + e.x) {"train": "train_10percent", "val": "val_all", "test": "test"} """ def __init__(