diff --git a/graph_net/paddle/random_util.py b/graph_net/paddle/random_util.py new file mode 100644 index 000000000..1661dbf87 --- /dev/null +++ b/graph_net/paddle/random_util.py @@ -0,0 +1,61 @@ +import os +import pickle +import numpy as np +import random +import re +import paddle + +from graph_net.paddle import samples_util + + +def set_seed(random_seed): + paddle.seed(random_seed) + random.seed(random_seed) + np.random.seed(random_seed) + + +def _extract_model_name_for_original_sample(model_path): + fields = model_path.rstrip("/").split(os.sep) + pattern = r"^subgraph(_\d+)?$" + model_name = ( + f"{fields[-2]}_{fields[-1]}" if re.match(pattern, fields[-1]) else fields[-1] + ) + return model_name + + +def _extract_model_name_for_decomposed_subgraph(model_path): + # Parse model name and subgraph index + model_name_with_subgraph_idx = model_path.rstrip("/").split(os.sep)[-1] + model_name = "_".join(model_name_with_subgraph_idx.split("_")[:-1]) + return model_name + + +def _generate_random_state_filename(model_path): + samples_dir = samples_util.get_default_samples_directory() + if os.path.abspath(model_path).startswith(samples_dir): + model_name = _extract_model_name_for_original_sample(model_path) + else: + model_name = _extract_model_name_for_decomposed_subgraph(model_path) + return f"{model_name}.random_states.pkl" + + +def save_random_states(model_path, output_dir, random_state_dict): + filepath = os.path.join(output_dir, _generate_random_state_filename(model_path)) + print(f"Write to {filepath}.", flush=True) + try: + with open(filepath, "wb") as f: + pickle.dump(random_state_dict, f) + except Exception: + print(f"Fail to open {filepath}.") + + +def load_random_states(model_path, output_dir): + filepath = os.path.join(output_dir, _generate_random_state_filename(model_path)) + print(f"Read from {filepath}.", flush=True) + random_states = None + try: + with open(filepath, "rb") as f: + random_states = pickle.load(f) + except Exception: + print(f"Fail to open {filepath}.") + return random_states diff --git a/graph_net/paddle/run_model.py b/graph_net/paddle/run_model.py index 376ff7220..bd546917c 100644 --- a/graph_net/paddle/run_model.py +++ b/graph_net/paddle/run_model.py @@ -3,19 +3,12 @@ import base64 import argparse import numpy as np -import random os.environ["FLAGS_logging_pir_py_code_dir"] = "/tmp/dump" import paddle from graph_net import imp_util -from graph_net.paddle import utils - - -def set_seed(random_seed): - paddle.seed(random_seed) - random.seed(random_seed) - np.random.seed(random_seed) +from graph_net.paddle import utils, random_util def load_class_from_file(file_path: str, class_name: str): @@ -31,17 +24,20 @@ def get_input_dict(model_path): params = inputs_params["weight_info"] inputs = inputs_params["input_info"] + random_state_dict = {} input_dict = {} for name, meta in params.items(): original_name = ( meta["original_name"] if meta.get("original_name", None) else name ) + random_state_dict[name] = np.random.get_state() input_dict[name] = paddle.nn.parameter.Parameter( utils.replay_tensor(meta), name=original_name ) for name, meta in inputs.items(): + random_state_dict[name] = np.random.get_state() input_dict[name] = utils.replay_tensor(meta) - return input_dict + return input_dict, random_state_dict def _convert_to_dict(config_str): @@ -66,9 +62,6 @@ def _get_decorator(args): def main(args): - initalize_seed = 123 - set_seed(random_seed=initalize_seed) - model_path = args.model_path model_class = load_class_from_file( f"{model_path}/model.py", class_name="GraphModule" @@ -77,7 +70,12 @@ def main(args): model = model_class() print(f"{model_path=}") - input_dict = get_input_dict(args.model_path) + initalize_seed = 123 + random_util.set_seed(random_seed=initalize_seed) + + input_dict, random_state_dict = get_input_dict(args.model_path) + output_dir = "/work/GraphNet/graph_net/test/outputs/random_states" + random_util.save_random_states(model_path, output_dir, random_state_dict) model = _get_decorator(args)(model) model(**input_dict) @@ -98,4 +96,5 @@ def main(args): help="decorator configuration string", ) args = parser.parse_args() + print(args) main(args=args) diff --git a/graph_net/paddle/test_compiler.py b/graph_net/paddle/test_compiler.py index 6a743f26f..2af2fef0f 100644 --- a/graph_net/paddle/test_compiler.py +++ b/graph_net/paddle/test_compiler.py @@ -5,16 +5,14 @@ import sys import os import numpy as np -import random import platform import traceback import subprocess import re -from graph_net.paddle import utils from graph_net import path_utils from graph_net import test_compiler_util - +from graph_net.paddle import utils, random_util from graph_net.paddle.backend.graph_compiler_backend import GraphCompilerBackend from graph_net.paddle.backend.cinn_backend import CinnBackend from graph_net.paddle.backend.nope_backend import NopeBackend @@ -31,12 +29,6 @@ def get_compiler_backend(args) -> GraphCompilerBackend: return registry_backend[args.compiler] -def set_seed(random_seed): - paddle.seed(random_seed) - random.seed(random_seed) - np.random.seed(random_seed) - - def init_env(args): if test_compiler_util.is_gpu_device(args.device): paddle.set_flags({"FLAGS_cudnn_exhaustive_search": 1}) @@ -96,14 +88,25 @@ def get_model(model_path): return model_class() -def get_input_dict(model_path): +def get_input_dict(model_path, random_states_path=None): inputs_params = utils.load_converted_from_text(f"{model_path}") params = inputs_params["weight_info"] inputs = inputs_params["input_info"] - params.update(inputs) - state_dict = {k: utils.replay_tensor(v) for k, v in params.items()} - return state_dict + + random_states = ( + random_util.load_random_states(model_path, random_states_path) + if random_states_path + else None + ) + + input_dict = {} + for name, meta in params.items(): + if random_states is not None and random_states.get(name, None) is not None: + np.random.set_state(random_states[name]) + tensor = utils.replay_tensor(meta) + input_dict[name] = tensor + return input_dict def get_input_spec(model_path): @@ -486,7 +489,7 @@ def main(args): assert args.device in ["cuda", "dcu", "xpu", "cpu"] initalize_seed = 123 - set_seed(random_seed=initalize_seed) + random_util.set_seed(random_seed=initalize_seed) if path_utils.is_single_model_dir(args.model_path): test_single_model(args) diff --git a/graph_net/paddle/test_reference_device.py b/graph_net/paddle/test_reference_device.py index 1f1591231..c2aa28fec 100644 --- a/graph_net/paddle/test_reference_device.py +++ b/graph_net/paddle/test_reference_device.py @@ -9,7 +9,7 @@ from graph_net import path_utils from graph_net import test_compiler_util -from graph_net.paddle import test_compiler +from graph_net.paddle import random_util, test_compiler def get_reference_log_path(reference_dir, model_path): @@ -38,8 +38,10 @@ def test_single_model(args): compiler = test_compiler.get_compiler_backend(args) test_compiler.check_and_print_gpu_utilization(compiler) - input_dict = test_compiler.get_input_dict(model_path) - model = test_compiler.get_model(model_path) + input_dict = test_compiler.get_input_dict( + args.model_path, args.random_states_path + ) + model = test_compiler.get_model(args.model_path) model.eval() test_compiler_util.print_with_log_prompt( @@ -107,6 +109,7 @@ def test_multi_models(args): f"--trials {args.trials}", f"--log-prompt {args.log_prompt}", f"--seed {args.seed}", + f"--random-states-path {args.random_states_path}", f"--reference-dir {args.reference_dir}", ] ) @@ -130,7 +133,7 @@ def main(args): assert args.compiler in {"cinn", "nope"} assert args.device in ["cuda"] - test_compiler.set_seed(random_seed=args.seed) + random_util.set_seed(random_seed=args.seed) test_compiler.init_env(args) ref_dump_dir = Path(args.reference_dir) @@ -191,6 +194,12 @@ def main(args): default=123, help="Random seed (default: 123)", ) + parser.add_argument( + "--random-states-path", + type=str, + required=False, + help="Path to random-states of model (s)", + ) parser.add_argument( "--reference-dir", type=str, diff --git a/graph_net/paddle/test_target_device.py b/graph_net/paddle/test_target_device.py index bce95ef9c..766ad9631 100644 --- a/graph_net/paddle/test_target_device.py +++ b/graph_net/paddle/test_target_device.py @@ -7,7 +7,7 @@ import paddle from graph_net import path_utils from graph_net import test_compiler_util -from graph_net.paddle import test_compiler, test_reference_device +from graph_net.paddle import random_util, test_compiler, test_reference_device def parse_config_from_reference_log(log_path): @@ -53,7 +53,7 @@ def update_args_and_set_seed(args, model_path): vars(args)["compiler"] = config.get("compiler") vars(args)["trials"] = int(config.get("trials")) vars(args)["warmup"] = int(config.get("warmup")) - test_compiler.set_seed(random_seed=int(config.get("seed"))) + random_util.set_seed(random_seed=int(config.get("seed"))) return args @@ -67,8 +67,8 @@ def test_single_model(args): compiler = test_compiler.get_compiler_backend(args) test_compiler.check_and_print_gpu_utilization(compiler) - input_dict = test_compiler.get_input_dict(model_path) - model = test_compiler.get_model(model_path) + input_dict = test_compiler.get_input_dict(args.model_path, args.random_states_path) + model = test_compiler.get_model(args.model_path) model.eval() test_compiler_util.print_basic_config( @@ -146,6 +146,7 @@ def test_multi_models(args): f"--device {args.device}", f"--log-prompt {args.log_prompt}", f"--reference-dir {args.reference_dir}", + f"--random-states-path {args.random_states_path}", ] ) cmd_ret = os.system(cmd) @@ -210,5 +211,11 @@ def main(args): default=None, help="Path to samples list, each line contains a sample path", ) + parser.add_argument( + "--random-states-path", + type=str, + required=False, + help="Path to random-states of model (s)", + ) args = parser.parse_args() main(args=args) diff --git a/graph_net/subgraph_decompose_and_evaluation_step.py b/graph_net/subgraph_decompose_and_evaluation_step.py index cb222c111..ab434089e 100755 --- a/graph_net/subgraph_decompose_and_evaluation_step.py +++ b/graph_net/subgraph_decompose_and_evaluation_step.py @@ -388,7 +388,11 @@ def run_decomposer_for_multi_models( def run_evaluation( - framework: str, test_cmd_b64: str, work_dir: str, log_path: str + framework: str, + test_cmd_b64: str, + work_dir: str, + random_states_dir: str, + log_path: str, ) -> int: """Executes the test command on the batch directory.""" @@ -400,6 +404,7 @@ def run_evaluation( test_module_arguments["reference-dir"] = os.path.join( work_dir, "reference_device_outputs" ) + test_module_arguments["random-states-path"] = random_states_dir cmd = [sys.executable, "-m", f"graph_net.{framework}.{test_module_name}"] + [ item @@ -751,6 +756,7 @@ def main(args): # --- Step 1: Prepare Tasks and Workspace --- decompose_config = prepare_tasks_and_verify(args, current_pass_id, base_output_dir) work_dir = get_decompose_workspace_path(base_output_dir, current_pass_id) + random_states_dir = os.path.join(base_output_dir, "random_states") if not os.path.exists(work_dir): os.makedirs(work_dir, exist_ok=True) @@ -768,7 +774,9 @@ def main(args): log_path = os.path.join(work_dir, f"log_{task_controller.test_module_name}.txt") if task_controller.task_scheduler["run_evaluation"]: print(f"\n--- Phase 2: Evaluation ({task_controller.test_module_name}) ---") - run_evaluation(args.framework, args.test_config, work_dir, log_path) + run_evaluation( + args.framework, args.test_config, work_dir, random_states_dir, log_path + ) # --- Step 4: Analysis --- if task_controller.task_scheduler["post_analysis"]: