diff --git a/graph_net/test/ast_graph_variable_rename_test.sh b/graph_net/test/ast_graph_variable_rename_test.sh index 08990a5ae..5667837b1 100755 --- a/graph_net/test/ast_graph_variable_rename_test.sh +++ b/graph_net/test/ast_graph_variable_rename_test.sh @@ -28,7 +28,7 @@ EOF ) \ 2>&1 | tee "$RENAMED_PATH/graph_rename.log" -python3 -m graph_net.torch.test_compiler \ +python3 -m graph_net_bench.torch.test_compiler \ --model-path-prefix $GRAPH_NET_ROOT \ --allow-list $model_list \ --compiler graph_variable_renamer_validator \ diff --git a/graph_net/test/fx_graph_module_unserialize_test.sh b/graph_net/test/fx_graph_module_unserialize_test.sh index 41a21aa92..171f3c625 100755 --- a/graph_net/test/fx_graph_module_unserialize_test.sh +++ b/graph_net/test/fx_graph_module_unserialize_test.sh @@ -55,7 +55,7 @@ EOF ) -python3 -m graph_net.torch.test_compiler \ +python3 -m graph_net_bench.torch.test_compiler \ --model-path-prefix $GRAPH_NET_ROOT \ --allow-list $model_list \ --compiler range_decomposer_validator \ diff --git a/graph_net/test/graph_variable_rename_test.sh b/graph_net/test/graph_variable_rename_test.sh index b58f34b88..1ceaa33bc 100755 --- a/graph_net/test/graph_variable_rename_test.sh +++ b/graph_net/test/graph_variable_rename_test.sh @@ -28,7 +28,7 @@ EOF ) \ 2>&1 | tee "$RENAMED_PATH/graph_rename.log" -python3 -m graph_net.torch.test_compiler \ +python3 -m graph_net_bench.torch.test_compiler \ --model-path-prefix $GRAPH_NET_ROOT \ --allow-list $model_list \ --compiler graph_variable_renamer_validator \ diff --git a/graph_net/test/typical_sequence_decomposer_test.sh b/graph_net/test/typical_sequence_decomposer_test.sh index 59810756e..5f9b8c4a1 100755 --- a/graph_net/test/typical_sequence_decomposer_test.sh +++ b/graph_net/test/typical_sequence_decomposer_test.sh @@ -82,7 +82,7 @@ EOF ) -python3 -m graph_net.torch.test_compiler \ +python3 -m graph_net_bench.torch.test_compiler \ --model-path-prefix $GRAPH_NET_ROOT \ --allow-list $model_list \ --compiler range_decomposer_validator \ diff --git a/graph_net/torch/backend/graph_compiler_backend.py b/graph_net/torch/backend/graph_compiler_backend.py deleted file mode 100644 index 44fb9cc44..000000000 --- a/graph_net/torch/backend/graph_compiler_backend.py +++ /dev/null @@ -1,6 +0,0 @@ -class GraphCompilerBackend: - def __call__(self, model): - raise NotImplementedError() - - def synchronize(self): - raise NotImplementedError() diff --git a/graph_net/torch/backend/nope_backend.py b/graph_net/torch/backend/nope_backend.py deleted file mode 100644 index 688fd8e12..000000000 --- a/graph_net/torch/backend/nope_backend.py +++ /dev/null @@ -1,11 +0,0 @@ -import torch -from .graph_compiler_backend import GraphCompilerBackend - - -class NopeBackend(GraphCompilerBackend): - def __call__(self, model): - return model - - def synchronize(self): - if torch.cuda.is_available(): - torch.cuda.synchronize() diff --git a/graph_net/torch/fx_graph_module_util.py b/graph_net/torch/fx_graph_module_util.py index 3ad882f22..22baa34e1 100644 --- a/graph_net/torch/fx_graph_module_util.py +++ b/graph_net/torch/fx_graph_module_util.py @@ -1,5 +1,6 @@ import os import inspect +from graph_net.torch.utils import get_named_tensors from graph_net.tensor_meta import TensorMeta from graph_net.imp_util import load_module from dataclasses import asdict @@ -38,8 +39,6 @@ def _get_tensor_metas(model_path): def _create_inputs_by_metas(module, tensor_metas, use_dummy_inputs): tensor_meta_attrs_list = [asdict(tensor_meta) for tensor_meta in tensor_metas] - from graph_net.torch.utils import get_named_tensors - named_tensors = get_named_tensors(tensor_meta_attrs_list, use_dummy_inputs) name2tensor = {k: v for k, v in named_tensors} return tuple( diff --git a/graph_net/torch/static_to_dynamic.py b/graph_net/torch/static_to_dynamic.py index 54a4de3b7..eb4822a72 100644 --- a/graph_net/torch/static_to_dynamic.py +++ b/graph_net/torch/static_to_dynamic.py @@ -1,9 +1,11 @@ import traceback import logging import torch -from graph_net.torch.utils import get_named_tensors +from graph_net.torch.utils import ( + get_named_tensors, + apply_templates, +) from torch.fx.passes.shape_prop import ShapeProp -from graph_net.torch.utils import apply_templates from pathlib import Path import inspect from graph_net.torch.fx_graph_parse_util import parse_sole_graph_module diff --git a/graph_net/torch/test_compiler.py b/graph_net/torch/test_compiler.py deleted file mode 100755 index 5be2f5b61..000000000 --- a/graph_net/torch/test_compiler.py +++ /dev/null @@ -1,542 +0,0 @@ -from . import utils -import subprocess -import argparse -import importlib.util -import torch -from pathlib import Path -from typing import Type -import sys -import os -import os.path -import traceback -import json -import random -import numpy as np -import platform -import base64 -from graph_net.torch.backend.graph_compiler_backend import GraphCompilerBackend -from graph_net.torch.backend.tvm_backend import TvmBackend -from graph_net.torch.backend.xla_backend import XlaBackend -from graph_net.torch.backend.inductor_backend import InductorBackend -from graph_net.torch.backend.tensorrt_backend import TensorRTBackend -from graph_net.torch.backend.blade_disc_backend import BladeDISCBackend -from graph_net.torch.backend.nope_backend import NopeBackend -from graph_net.torch.backend.unstable_to_stable_backend import UnstableToStableBackend -from graph_net.torch.backend.range_decomposer_validator_backend import ( - RangeDecomposerValidatorBackend, -) -from graph_net.torch.backend.graph_variable_renamer_validator_backend import ( - GraphVariableRenamerValidatorBackend, -) -from graph_net_bench import test_compiler_util -from graph_net import model_path_util -from graph_net_bench import path_utils - - -registry_backend = { - "tvm": TvmBackend(), - "xla": XlaBackend(), - "inductor": InductorBackend(), - "tensorrt": TensorRTBackend(), - "bladedisc": BladeDISCBackend(), - "nope": NopeBackend(), - "unstable_to_stable": UnstableToStableBackend(), - "range_decomposer_validator": RangeDecomposerValidatorBackend(), - "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend(), -} - - -def set_seed(random_seed): - random.seed(random_seed) - np.random.seed(random_seed) - torch.manual_seed(random_seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(random_seed) - torch.cuda.manual_seed_all(random_seed) - - -def get_hardward_name(args): - hardware_name = "unknown" - if "cuda" in args.device: - hardware_name = torch.cuda.get_device_name(args.device) - elif args.device == "cpu": - hardware_name = platform.processor() - return hardware_name - - -def get_compile_framework_version(args): - if args.compiler in ["inductor", "nope", "unstable_to_stable"]: - return torch.__version__ - elif args.compiler in ["tvm", "xla", "tensorrt", "bladedisc"]: - # Assuming compiler object has a version attribute - return f"{args.compiler.capitalize()} {args.compiler.version}" - return "unknown" - - -def load_class_from_file( - args: argparse.Namespace, class_name: str, device: str -) -> Type[torch.nn.Module]: - file_path = f"{args.model_path}/model.py" - file = Path(file_path).resolve() - module_name = file.stem - - with open(file_path, "r", encoding="utf-8") as f: - model_code = f.read() - model_code = utils.modify_code_by_device(model_code, device) - spec = importlib.util.spec_from_loader(module_name, loader=None) - module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module - compiled_code = compile(model_code, filename=file, mode="exec") - exec(compiled_code, module.__dict__) - - model_class = getattr(module, class_name, None) - setattr(model_class, "__graph_net_file_path__", file_path) - setattr(model_class, "__graph_net_device__", device) - return model_class - - -def convert_to_dict(config_str): - if config_str is None or config_str == "None": - return {} - config_str = base64.b64decode(config_str).decode("utf-8") - config = json.loads(config_str) - assert isinstance(config, dict), f"config should be a dict. {config_str=}" - return config - - -def get_compiler_backend(args) -> GraphCompilerBackend: - assert args.compiler in registry_backend, f"Unknown compiler: {args.compiler}" - backend = registry_backend[args.compiler] - if args.config is not None: - backend.config = convert_to_dict(args.config) - return backend - - -def get_model(args): - device = "xla" if args.compiler == "xla" else args.device - - # device: Torch device object specifying the target device for model loading (e.g., 'cuda', 'cpu', 'xla') - model_class = load_class_from_file(args, class_name="GraphModule", device=device) - model = model_class().to(torch.device(args.device)) - return model - - -def get_input_dict(args): - inputs_params = utils.load_converted_from_text(f"{args.model_path}") - params = inputs_params["weight_info"] - for tensor_meta in params.values(): - if "device" in tensor_meta["info"]: - tensor_meta["info"]["device"] = args.device - return { - k: utils.replay_tensor(v).to(torch.device(args.device)) - for k, v in params.items() - } - - -def measure_performance(model_call, args, compiler): - stats = {} - outs = model_call() - - # Warmup runs - for _ in range(args.warmup): - model_call() - compiler.synchronize() - - hardware_name = get_hardward_name(args) - print( - f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}", - file=sys.stderr, - flush=True, - ) - - if "cuda" in args.device: - """ - Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings, - With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench - """ - - e2e_times = [] - gpu_times = [] - - for i in range(args.trials): - # End-to-end timing (naive_timer) - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - # GPU-only timing (CUDA Events) - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - start_event.record() - - model_call() - - end_event.record() - compiler.synchronize() - - gpu_time_ms = start_event.elapsed_time(end_event) - e2e_times.append(duration_box.value) - gpu_times.append(gpu_time_ms) - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", - file=sys.stderr, - flush=True, - ) - - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) - - else: # CPU or other devices - e2e_times = [] - for i in range(args.trials): - duration_box = test_compiler_util.DurationBox(-1) - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): - model_call() - print( - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", - file=sys.stderr, - flush=True, - ) - e2e_times.append(duration_box.value) - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) - - return outs, stats - - -def test_single_model(args): - compiler = get_compiler_backend(args) - input_dict = get_input_dict(args) - model = get_model(args) - model_path = os.path.normpath(args.model_path) - test_compiler_util.print_with_log_prompt( - "[Processing]", model_path, args.log_prompt - ) - test_compiler_util.print_basic_config( - args, get_hardward_name(args), get_compile_framework_version(args) - ) - - runtime_seed = 1024 - eager_failure = False - expected_out = None - eager_time_stats = {} - - try: - - def eager_model_call(): - return model(**input_dict) - - expected_out, eager_time_stats = measure_performance( - eager_model_call, args, compiler - ) - - torch.manual_seed(runtime_seed) - if not isinstance(expected_out, tuple): - expected_out = (expected_out,) - except (TypeError, RuntimeError) as e: - print(f"Eager model execution failed: {str(e)}", file=sys.stderr) - eager_failure = True - - compiled_failure = False - compiled_model = None - compiled_time_stats = {} - - try: - compiled_model = compiler(model) - torch.manual_seed(runtime_seed) - - def compiled_model_call(): - return compiled_model(**input_dict) - - compiled_out, compiled_time_stats = measure_performance( - compiled_model_call, args, compiler - ) - - if not isinstance(compiled_out, tuple): - compiled_out = (compiled_out,) - if args.compiler == "xla": - compiled_out = tuple(item.to("cpu").to("cuda") for item in compiled_out) - except (TypeError, RuntimeError) as e: - print(f"Compiled model execution failed: {str(e)}", file=sys.stderr) - compiled_failure = True - print("\n--- Full Traceback ---") - traceback.print_exc() - print(f"debug-model-execution {type(e).__name__} {args.model_path}", flush=True) - except Exception as e: - compiled_failure = True - print("\n--- Full Traceback ---") - traceback.print_exc() - print(f"debug-model-execution {type(e).__name__} {args.model_path}", flush=True) - - if eager_failure: - print(f"{args.log_prompt} [Result] status: failed", file=sys.stderr, flush=True) - print( - f"{args.log_prompt} [Fail due to eager model execution error.]", - file=sys.stderr, - flush=True, - ) - elif compiled_failure: - print(f"{args.log_prompt} [Result] status: failed", file=sys.stderr, flush=True) - print( - f"{args.log_prompt} [Fail due to compiled model execution error.]", - file=sys.stderr, - flush=True, - ) - else: - compare_correctness(expected_out, compiled_out, args) - - print( - f"{args.log_prompt} [Result] status: success", file=sys.stderr, flush=True - ) - - test_compiler_util.print_times_and_speedup( - args, eager_time_stats, compiled_time_stats - ) - - -def print_and_store_cmp(key, cmp_func, args, expected_out, compiled_out, **kwargs): - cmp_ret = cmp_func(expected_out, compiled_out, **kwargs) - print( - f"{args.log_prompt} [Correctness]{key}: {cmp_ret}", - file=sys.stderr, - flush=True, - ) - return cmp_ret - - -def compare_correctness(expected_out, compiled_out, args): - eager_dtypes = [ - ( - str(x.dtype).replace("torch.", "") - if isinstance(x, torch.Tensor) - else type(x).__name__ - ) - for x in expected_out - ] - compiled_dtypes = [ - ( - str(x.dtype).replace("torch.", "") - if isinstance(x, torch.Tensor) - else type(x).__name__ - ) - for x in compiled_out - ] - - # datatype check - type_match = test_compiler_util.check_output_datatype( - args, eager_dtypes, compiled_dtypes - ) - - if type_match: - test_compiler_util.check_equal( - args, - expected_out, - compiled_out, - cmp_equal_func=get_cmp_equal, - ) - - test_compiler_util.check_allclose( - args, - expected_out, - compiled_out, - cmp_all_close_func=get_cmp_all_close, - cmp_max_diff_func=get_cmp_max_diff, - cmp_mean_diff_func=get_cmp_mean_diff, - ) - - -def get_cmp_equal(expected_out, compiled_out): - return " ".join( - str(int(torch.equal(a, b))) for a, b in zip(expected_out, compiled_out) - ) - - -def get_cmp_all_close(expected_out, compiled_out, atol, rtol): - return " ".join( - str(int(torch.allclose(a, b, atol=atol, rtol=rtol))) - for a, b in zip(expected_out, compiled_out) - ) - - -def get_cmp_max_diff(expected_out, compiled_out): - return " ".join( - # Transform to float to handle LongTensor output of some models, which cannnot be processed with torch.max(). - str(torch.max(torch.abs(a.float() - b.float())).item()) - for a, b in zip(expected_out, compiled_out) - ) - - -def get_cmp_mean_diff(expected_out, compiled_out): - return " ".join( - # To handle LongTensor - str(torch.mean(torch.abs(a.float() - b.float())).item()) - for a, b in zip(expected_out, compiled_out) - ) - - -def get_cmp_diff_count(expected_out, compiled_out, atol, rtol): - results = [] - for a, b in zip(expected_out, compiled_out): - # To handle LongTensor - if a.is_floating_point() and b.is_floating_point(): - diff_count = torch.sum(~torch.isclose(a, b, atol=atol, rtol=rtol)).item() - else: - diff_count = torch.sum(a != b).item() - results.append(str(diff_count)) - return " ".join(results) - - -def test_multi_models(args): - test_samples = model_path_util.get_allow_samples(args.allow_list) - - sample_idx = 0 - failed_samples = [] - module_name = os.path.splitext(os.path.basename(__file__))[0] - for model_path in path_utils.get_recursively_model_path(args.model_path): - if test_samples is None or os.path.abspath(model_path) in test_samples: - print( - f"[{sample_idx}] {module_name}, model_path: {model_path}", - file=sys.stderr, - flush=True, - ) - cmd = " ".join( - [ - sys.executable, - f"-m graph_net.torch.{module_name}", - f"--model-path {model_path}", - f"--compiler {args.compiler}", - f"--device {args.device}", - f"--warmup {args.warmup}", - f"--trials {args.trials}", - f"--log-prompt {args.log_prompt}", - f"--config {args.config}", - ] - ) - try: - process = subprocess.Popen(cmd, shell=True) - cmd_ret = process.wait() - except KeyboardInterrupt: - print("KeyboardInterrupt") - sys.exit(1) - except Exception: - print("\n--- Full Traceback ---") - traceback.print_exc() - if cmd_ret != 0: - failed_samples.append(model_path) - sample_idx += 1 - - print( - f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", - file=sys.stderr, - flush=True, - ) - for model_path in failed_samples: - print(f"- {model_path}", file=sys.stderr, flush=True) - - -def test_multi_models_with_prefix(args): - assert os.path.isdir(args.model_path_prefix) - assert os.path.isfile(args.allow_list) - test_samples = model_path_util.get_allow_samples(args.allow_list) - py_module_name = os.path.splitext(os.path.basename(__file__))[0] - for rel_model_path in test_samples: - model_path = os.path.join(args.model_path_prefix, rel_model_path) - if not os.path.exists(model_path): - continue - if not os.path.exists(os.path.join(model_path, "model.py")): - continue - cmd = " ".join( - [ - sys.executable, - f"-m graph_net.torch.{py_module_name}", - f"--model-path {model_path}", - f"--compiler {args.compiler}", - f"--device {args.device}", - f"--warmup {args.warmup}", - f"--trials {args.trials}", - f"--log-prompt {args.log_prompt}", - f"--config {args.config}", - ] - ) - try: - process = subprocess.Popen(cmd, shell=True) - process.wait() - except KeyboardInterrupt: - print("KeyboardInterrupt") - sys.exit(1) - except Exception: - print("\n--- Full Traceback ---") - traceback.print_exc() - - -def main(args): - if args.model_path_prefix is not None: - test_multi_models_with_prefix(args) - return - assert os.path.isdir(args.model_path) - - initalize_seed = 123 - set_seed(random_seed=initalize_seed) - - if path_utils.is_single_model_dir(args.model_path): - test_single_model(args) - else: - test_multi_models(args) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Test compiler performance.") - parser.add_argument( - "--model-path", - type=str, - required=False, - default=None, - help="Path to model file(s), each subdirectory containing graph_net.json will be regarded as a model", - ) - parser.add_argument( - "--compiler", - type=str, - required=False, - default="inductor", - help="Path to customized compiler python file", - ) - parser.add_argument( - "--device", - type=str, - required=False, - default="cuda", - help="Device for testing the compiler (e.g., 'cpu' or 'cuda')", - ) - parser.add_argument( - "--warmup", type=int, required=False, default=3, help="Number of warmup steps" - ) - parser.add_argument( - "--trials", type=int, required=False, default=5, help="Number of timing trials" - ) - parser.add_argument( - "--log-prompt", - type=str, - required=False, - default="graph-net-test-compiler-log", - help="Log prompt for performance log filtering.", - ) - parser.add_argument( - "--allow-list", - type=str, - required=False, - default=None, - help="Path to samples list, each line contains a sample path", - ) - parser.add_argument( - "--model-path-prefix", - type=str, - required=False, - default=None, - help="Prefix path to model path list", - ) - parser.add_argument( - "--config", - type=str, - required=False, - default=None, - help="base64 encode configuration json.", - ) - args = parser.parse_args() - main(args=args) diff --git a/graph_net/torch/test_reference_device.py b/graph_net/torch/test_reference_device.py index a6fe7e9e0..f022d2ba5 100644 --- a/graph_net/torch/test_reference_device.py +++ b/graph_net/torch/test_reference_device.py @@ -10,7 +10,7 @@ from graph_net_bench import path_utils from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net.torch import test_compiler +from graph_net_bench.torch import test_compiler def get_reference_log_path(reference_dir, model_path): diff --git a/graph_net/torch/test_target_device.py b/graph_net/torch/test_target_device.py index 3e0a09daa..ec2085a32 100644 --- a/graph_net/torch/test_target_device.py +++ b/graph_net/torch/test_target_device.py @@ -8,7 +8,7 @@ from graph_net_bench import path_utils from graph_net_bench import test_compiler_util from graph_net import model_path_util -from graph_net.torch import test_compiler, test_reference_device +from graph_net_bench.torch import test_compiler, test_reference_device def parse_config_from_reference_log(log_path): diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py old mode 100755 new mode 100644 index 62837fc8c..b397a9b6d --- a/graph_net/torch/utils.py +++ b/graph_net/torch/utils.py @@ -221,21 +221,6 @@ def load_converted_from_text(file_path): } -def convert_tensor_meta_attrs_list_to_named_tensors(tensor_meta_attrs_list): - tensors_wrappers = convert_tensor_meta_attrs_list_to_tensors_wrappers( - tensor_meta_attrs_list - ) - ret = [] - for i, tensors_wrapper in enumerate(tensors_wrappers): - name = tensors_wrapper["name"] - # shape = tensors_wrapper["info"]['shape'] - # logging.warning(f"before replay_tensor {i=} {shape=}") - tensor = replay_tensor(tensors_wrapper) - # logging.warning(f"after replay_tensor {i=} {shape=}") - ret.append((name, tensor)) - return ret - - def get_named_tensors(tensor_meta_attrs_list, use_dummy_inputs): tensors_wrappers = convert_tensor_meta_attrs_list_to_tensors_wrappers( tensor_meta_attrs_list @@ -324,10 +309,6 @@ def _get_classes(file_path): yield from inspect.getmembers(unnamed, inspect.isclass) -def extract_dynamic_shapes(example_inputs): - pass - - def replay_tensor(info): device = info["info"]["device"] dtype = info["info"]["dtype"] diff --git a/graph_net/torch/backend/blade_disc_backend.py b/graph_net_bench/torch/backend/blade_disc_backend.py similarity index 94% rename from graph_net/torch/backend/blade_disc_backend.py rename to graph_net_bench/torch/backend/blade_disc_backend.py index 5af7b8490..42803a12c 100644 --- a/graph_net/torch/backend/blade_disc_backend.py +++ b/graph_net_bench/torch/backend/blade_disc_backend.py @@ -28,6 +28,9 @@ def compile(self, module, *args, **kwargs): class BladeDISCBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def __call__(self, model): return BladeDISCCompiledModule(model) diff --git a/graph_net/torch/backend/graph_variable_renamer_validator_backend.py b/graph_net_bench/torch/backend/graph_variable_renamer_validator_backend.py similarity index 95% rename from graph_net/torch/backend/graph_variable_renamer_validator_backend.py rename to graph_net_bench/torch/backend/graph_variable_renamer_validator_backend.py index 902dd761c..ab2018b7d 100755 --- a/graph_net/torch/backend/graph_variable_renamer_validator_backend.py +++ b/graph_net_bench/torch/backend/graph_variable_renamer_validator_backend.py @@ -4,6 +4,7 @@ from graph_net.tensor_meta import TensorMeta import os import importlib.util +from .graph_compiler_backend import GraphCompilerBackend class RenamedModelAdapter(torch.nn.Module): @@ -27,7 +28,10 @@ def _convert_by_name_mapping(self, kwargs): return new_kwargs -class GraphVariableRenamerValidatorBackend: +class GraphVariableRenamerValidatorBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def _get_rename_mapping(self, model_dir: Path): mapping = {} for meta_file in ["input_meta.py", "weight_meta.py"]: diff --git a/graph_net/torch/backend/inductor_backend.py b/graph_net_bench/torch/backend/inductor_backend.py similarity index 82% rename from graph_net/torch/backend/inductor_backend.py rename to graph_net_bench/torch/backend/inductor_backend.py index e39a9d08f..5200e3032 100644 --- a/graph_net/torch/backend/inductor_backend.py +++ b/graph_net_bench/torch/backend/inductor_backend.py @@ -3,6 +3,9 @@ class InductorBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def __call__(self, model): return torch.compile(model, backend="inductor") diff --git a/graph_net/torch/backend/range_decomposer_validator_backend.py b/graph_net_bench/torch/backend/range_decomposer_validator_backend.py similarity index 95% rename from graph_net/torch/backend/range_decomposer_validator_backend.py rename to graph_net_bench/torch/backend/range_decomposer_validator_backend.py index 375c562c9..c260f47b8 100644 --- a/graph_net/torch/backend/range_decomposer_validator_backend.py +++ b/graph_net_bench/torch/backend/range_decomposer_validator_backend.py @@ -4,6 +4,7 @@ import os import importlib.util from typing import List +from .graph_compiler_backend import GraphCompilerBackend class ComposedModel(nn.Module): @@ -37,7 +38,10 @@ def _convert_inputs(self, subgraph, input_kwargs): } -class RangeDecomposerValidatorBackend: +class RangeDecomposerValidatorBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def _load_model_instance(self, path: str, device: str) -> torch.nn.Module: class_name = "GraphModule" model_file = os.path.join(path, "model.py") diff --git a/graph_net/torch/backend/tensorrt_backend.py b/graph_net_bench/torch/backend/tensorrt_backend.py similarity index 88% rename from graph_net/torch/backend/tensorrt_backend.py rename to graph_net_bench/torch/backend/tensorrt_backend.py index 5fa8524b3..e0490c48e 100644 --- a/graph_net/torch/backend/tensorrt_backend.py +++ b/graph_net_bench/torch/backend/tensorrt_backend.py @@ -8,6 +8,9 @@ class TensorRTBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def __call__(self, model): if torch_tensorrt is None: raise ImportError("torch_tensorrt not installed") diff --git a/graph_net/torch/backend/tvm_backend.py b/graph_net_bench/torch/backend/tvm_backend.py similarity index 96% rename from graph_net/torch/backend/tvm_backend.py rename to graph_net_bench/torch/backend/tvm_backend.py index 4577bfcae..435999f18 100644 --- a/graph_net/torch/backend/tvm_backend.py +++ b/graph_net_bench/torch/backend/tvm_backend.py @@ -1,6 +1,5 @@ import torch import inspect -import numpy as np from .graph_compiler_backend import GraphCompilerBackend try: @@ -59,6 +58,9 @@ def compile(self, module, **kwargs): class TvmBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def __call__(self, model, **kwargs): if torch.cuda.is_available(): device = "cuda" @@ -75,5 +77,5 @@ def version(self): from importlib.metadata import version return version("tvm") - except: + except ImportError: return "unknown" diff --git a/graph_net/torch/backend/unstable_to_stable_backend.py b/graph_net_bench/torch/backend/unstable_to_stable_backend.py similarity index 99% rename from graph_net/torch/backend/unstable_to_stable_backend.py rename to graph_net_bench/torch/backend/unstable_to_stable_backend.py index c85497ab5..bca0792c7 100644 --- a/graph_net/torch/backend/unstable_to_stable_backend.py +++ b/graph_net_bench/torch/backend/unstable_to_stable_backend.py @@ -2,10 +2,13 @@ import torch import sys from .graph_compiler_backend import GraphCompilerBackend -from ..fx_graph_serialize_util import serialize_graph_module_to_str +from graph_net.torch.fx_graph_serialize_util import serialize_graph_module_to_str class UnstableToStableBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def __call__(self, model): # Perform unstable API check before running the model unstable_api = os.getenv("DISALLOWED_UNSTABLE_API", "").strip() diff --git a/graph_net/torch/backend/xla_backend.py b/graph_net_bench/torch/backend/xla_backend.py similarity index 93% rename from graph_net/torch/backend/xla_backend.py rename to graph_net_bench/torch/backend/xla_backend.py index 2c4d253b1..4793d8687 100644 --- a/graph_net/torch/backend/xla_backend.py +++ b/graph_net_bench/torch/backend/xla_backend.py @@ -25,6 +25,9 @@ def forward(self, **kwargs): class XlaBackend(GraphCompilerBackend): + def __init__(self, config): + super().__init__(config) + def __call__(self, model): if torch_xla is None: raise ImportError("torch_xla not installed") diff --git a/graph_net_bench/torch/test_compiler.py b/graph_net_bench/torch/test_compiler.py index cf801db12..08f0ac63d 100755 --- a/graph_net_bench/torch/test_compiler.py +++ b/graph_net_bench/torch/test_compiler.py @@ -15,15 +15,37 @@ import platform import base64 from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend +from graph_net_bench.torch.backend.tvm_backend import TvmBackend +from graph_net_bench.torch.backend.xla_backend import XlaBackend +from graph_net_bench.torch.backend.inductor_backend import InductorBackend +from graph_net_bench.torch.backend.tensorrt_backend import TensorRTBackend +from graph_net_bench.torch.backend.blade_disc_backend import BladeDISCBackend from graph_net_bench.torch.backend.nope_backend import NopeBackend from graph_net_bench.torch.backend.pass_mgr_backend import PassMgrBackend +from graph_net_bench.torch.backend.unstable_to_stable_backend import ( + UnstableToStableBackend, +) +from graph_net_bench.torch.backend.range_decomposer_validator_backend import ( + RangeDecomposerValidatorBackend, +) +from graph_net_bench.torch.backend.graph_variable_renamer_validator_backend import ( + GraphVariableRenamerValidatorBackend, +) from graph_net_bench import test_compiler_util from graph_net_bench import path_utils compiler_backend_name2class = { + "tvm": TvmBackend, + "xla": XlaBackend, + "inductor": InductorBackend, + "tensorrt": TensorRTBackend, + "bladedisc": BladeDISCBackend, "nope": NopeBackend, "pass_mgr": PassMgrBackend, + "unstable_to_stable": UnstableToStableBackend, + "range_decomposer_validator": RangeDecomposerValidatorBackend, + "graph_variable_renamer_validator": GraphVariableRenamerValidatorBackend, } diff --git a/graph_net_bench/torch/utils.py b/graph_net_bench/torch/utils.py index 62837fc8c..c937ff4de 100755 --- a/graph_net_bench/torch/utils.py +++ b/graph_net_bench/torch/utils.py @@ -7,17 +7,6 @@ kLiteralTensorSize = 64 -def apply_templates(forward_code: str) -> str: - tab = " " - forward_code = f"\n{tab}".join(forward_code.split("\n")) - imports = "import torch" - if "device" in forward_code: - imports += "\n\nfrom torch import device" - if "inf" in forward_code: - imports += "\n\nfrom torch import inf" - return f"{imports}\n\nclass GraphModule(torch.nn.Module):\n{tab}{forward_code}" - - def get_limited_precision_float_str(value): if not isinstance(value, float): return value @@ -28,230 +17,6 @@ def get_limited_precision_float_str(value): return f"{value:.3f}" -def convert_state_and_inputs_impl(state_dict, example_inputs): - def tensor_info(tensor): - is_float = tensor.dtype.is_floating_point - mean = float(tensor.mean().item()) if is_float else None - std = None - if is_float: - if tensor.numel() <= 1: - std = 0.0 - else: - std = float(tensor.std().item()) - return { - "shape": list(tensor.shape), - "dtype": str(tensor.dtype), - "device": str(tensor.device), - "mean": get_limited_precision_float_str(mean), - "std": get_limited_precision_float_str(std), - } - - def process_tensor(tensor): - if not isinstance(tensor, torch.Tensor): - return {"type": "unknown", "value": tensor} - - info = tensor_info(tensor) - if tensor.dtype in [torch.int8, torch.int16, torch.int32, torch.int64]: - if tensor.numel() < kLiteralTensorSize: - return { - "type": "small_int_tensor", - "data": tensor.clone(), - "info": info, - } - else: - return { - "type": "big_int_tensor_by_range", - "min_val": tensor.min().item(), - "max_val": tensor.max().item(), - "info": info, - } - elif tensor.numel() < kLiteralTensorSize: - return {"type": "small_tensor", "data": tensor.clone(), "info": info} - else: - return {"type": "random_tensor", "info": info} - - if isinstance(example_inputs, torch.Tensor): - processed_inputs = process_tensor(example_inputs) - elif isinstance(example_inputs, (list, tuple)): - processed_inputs = [process_tensor(t) for t in example_inputs] - else: - processed_inputs = {"type": "unknown", "value": example_inputs} - - def handle_named_tensors(tensor): - if not isinstance(tensor, torch.Tensor): - return {"type": "unknown", "value": tensor} - info = tensor_info(tensor) - if tensor.dtype in [torch.int8, torch.int16, torch.int32, torch.int64]: - if tensor.numel() < kLiteralTensorSize: - return { - "info": info, - "data": tensor.clone(), - "type": "small_int_tensor", - } - else: - return { - "info": info, - "min_val": tensor.min().item(), - "max_val": tensor.max().item(), - "type": "big_int_tensor_by_range", - } - if tensor.numel() < kLiteralTensorSize: - return {"info": info, "data": tensor.clone(), "type": "small_tensor"} - else: - return {"info": info, "data": None, "type": "random_tensor"} - - processed_weights = { - key: handle_named_tensors(tensor) for key, tensor in state_dict.items() - } - - # dynamic_shapes = extract_dynamic_shapes(example_inputs) - return { - "input_info": processed_inputs, - "weight_info": processed_weights, - "dynamic_shapes": None, - } - - -def convert_state_and_inputs(state_dict, example_inputs): - return convert_state_and_inputs_impl(state_dict, example_inputs) - - -def save_constraints_text(converted, file_path): - lines = [] - if converted["dynamic_shapes"] is not None: - raise NotImplementedError("Handling constraints is not implemented yet.") - with open(file_path, "w") as f: - f.write("\n".join(lines)) - - -def save_converted_to_text(converted, file_path): - def format_data(data): - if data is None: - return "None" - elif isinstance(data, torch.Tensor): - if data.dtype.is_floating_point: - - def float_to_str(x): - if math.isinf(x): - return "float('inf')" if x > 0 else "float('-inf')" - if math.isnan(x): - return "float('nan')" - return f"{x:.6f}" - - return "[{}]".format( - ", ".join(float_to_str(x) for x in data.flatten().tolist()) - ) - else: - return "[{}]".format(", ".join(f"{x}" for x in data.flatten().tolist())) - else: - return repr(data) - - def process_tensor_info(tensor_info, name_prefix="example_input"): - tensor_type = tensor_info.get("type") - info = tensor_info.get("info", {}) - dtype = info.get("dtype", "torch.float") - shape = info.get("shape", []) - device = info.get("device", "cpu") - mean = info.get("mean", 0.0) - std = info.get("std", 1.0) - uid = f"{name_prefix}_tensor_meta_{tensor_info.get('name', '')}" - - lines = [ - (f"class {uid}:"), - (f"\tname = \"{tensor_info.get('name', '')}\""), - (f"\tshape = {shape}"), - (f'\tdtype = "{dtype}"'), - (f'\tdevice = "{device}"'), - (f"\tmean = {get_limited_precision_float_str(mean)}"), - (f"\tstd = {get_limited_precision_float_str(std)}"), - ] - if tensor_type == "big_int_tensor_by_range": - lines.append(f"\tmin_val = {tensor_info['min_val']}") - lines.append(f"\tmax_val = {tensor_info['max_val']}") - elif "data" in tensor_info: - data_list = ( - tensor_info["data"].flatten() - if isinstance(tensor_info["data"], torch.Tensor) - else tensor_info["data"] - ) - lines.append(f"\tdata = {format_data(data_list)}") - - lines.append("") - return lines - - input_infos = converted["input_info"] - if isinstance(input_infos, dict): - input_infos = [input_infos] - - input_lines = [] - for idx, input_info in enumerate(input_infos): - input_info["name"] = f"input_{idx}" - input_lines.extend(process_tensor_info(input_info, name_prefix="Program_input")) - - with open(f"{file_path}/input_meta.py", "w") as f: - f.write("\n".join(input_lines)) - - weight_lines = [] - for name, weight_info in converted["weight_info"].items(): - weight_info["name"] = name - weight_lines.extend( - process_tensor_info(weight_info, name_prefix="Program_weight") - ) - - with open(f"{file_path}/weight_meta.py", "w") as f: - f.write("\n".join(weight_lines)) - - -def load_model_inputs_converted_from_text(file_path): - return load_converted_from_text(file_path) - - -def load_converted_from_text(file_path): - input_info = list(convert_meta_classes_to_tensors(f"{file_path}/input_meta.py")) - - weight_info = { - data["name"]: data - for data in convert_meta_classes_to_tensors(f"{file_path}/weight_meta.py") - } - - return { - "input_info": input_info, - "weight_info": weight_info, - "dynamic_shapes": None, - } - - -def convert_tensor_meta_attrs_list_to_named_tensors(tensor_meta_attrs_list): - tensors_wrappers = convert_tensor_meta_attrs_list_to_tensors_wrappers( - tensor_meta_attrs_list - ) - ret = [] - for i, tensors_wrapper in enumerate(tensors_wrappers): - name = tensors_wrapper["name"] - # shape = tensors_wrapper["info"]['shape'] - # logging.warning(f"before replay_tensor {i=} {shape=}") - tensor = replay_tensor(tensors_wrapper) - # logging.warning(f"after replay_tensor {i=} {shape=}") - ret.append((name, tensor)) - return ret - - -def get_named_tensors(tensor_meta_attrs_list, use_dummy_inputs): - tensors_wrappers = convert_tensor_meta_attrs_list_to_tensors_wrappers( - tensor_meta_attrs_list - ) - ret = [] - for i, tensors_wrapper in enumerate(tensors_wrappers): - name = tensors_wrapper["name"] - # shape = tensors_wrapper["info"]['shape'] - if use_dummy_inputs: - tensor = get_dummy_tensor(tensors_wrapper) - else: - tensor = replay_tensor(tensors_wrapper) - ret.append((name, tensor)) - return ret - - def convert_meta_classes_to_tensors(file_path): tensor_meta_attrs_list = [ { @@ -324,8 +89,19 @@ def _get_classes(file_path): yield from inspect.getmembers(unnamed, inspect.isclass) -def extract_dynamic_shapes(example_inputs): - pass +def load_converted_from_text(file_path): + input_info = list(convert_meta_classes_to_tensors(f"{file_path}/input_meta.py")) + + weight_info = { + data["name"]: data + for data in convert_meta_classes_to_tensors(f"{file_path}/weight_meta.py") + } + + return { + "input_info": input_info, + "weight_info": weight_info, + "dynamic_shapes": None, + } def replay_tensor(info):