nod-ai · kuhar · Aug 22, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/tuning/libtuner.py b/tuning/libtuner.py
@@ -216,7 +216,7 @@ def generate_display_DBR(
     candidate_id: int = 0, mean_time: float = random.uniform(100.0, 500.0)
 ) -> str:
     """Generate dispatch_benchmark_result string for displaying"""
-    return f"{candidate_id}\tMean Time: {mean_time:.1f}\n"
+    return f"{candidate_id}\tMean Time: {mean_time:.1f}"
 
 
 def generate_display_MBR(
@@ -229,9 +229,11 @@ def generate_display_MBR(
     if calibrated_diff:
         percentage_change = calibrated_diff * 100
         change_str = f"({percentage_change:+.3f}%)"
-        res_str = f"Benchmarking: {candidate_vmfb_path_str} on device {device_id}: {t1:.3g} {change_str}\n\n"
+        res_str = f"Benchmarking: {candidate_vmfb_path_str} on device {device_id}: {t1:.3g} {change_str}"
     else:
-        res_str = f"Benchmarking: {candidate_vmfb_path_str} on device {device_id}: {t1:.3g}\n\n"
+        res_str = (
+            f"Benchmarking: {candidate_vmfb_path_str} on device {device_id}: {t1:.3g}"
+        )
     return res_str
 
 
@@ -515,7 +517,7 @@ def run_command(
 def run_command_wrapper(task_tuple: TaskPack) -> TaskResult:
     """pool.imap_unordered can't iterate an iterable of iterables input, this function helps dividing arguments"""
     if task_tuple.command_need_device_id:
-        # worker searches for special symbol and substitute to correct device_id
+        # Worker searches for the special symbol and substitutes it with the actual device_id
         pattern = re.compile(re.escape(DEVICE_ID_PLACEHOLDER))
         task_tuple.command = [
             pattern.sub(str(device_id), s) for s in task_tuple.command
@@ -527,9 +529,9 @@ def run_command_wrapper(task_tuple: TaskPack) -> TaskResult:
 
     task_result = TaskResult(
         res, task_tuple.candidate_id, device_id=str(-1)
-    )  # main process
+    )  # Main process
     if device_id:
-        task_result = TaskResult(res, task_tuple.candidate_id, device_id)  # sub process
+        task_result = TaskResult(res, task_tuple.candidate_id, device_id)  # Subprocess
 
     time.sleep(task_tuple.cooling_time)
 
@@ -829,14 +831,16 @@ def parse_dispatch_benchmark_results(
 ) -> tuple[list[ParsedDisptachBenchmarkResult], list[str]]:
     benchmark_result_configs = []
     dump_list = []
+    incomplete_list = []
 
     for benchmark_result in benchmark_results:
         res_str = benchmark_result.result.stdout
         candidate_id = benchmark_result.candidate_id
-        if res_str is None:
-            continue
         res = IREEBenchmarkResult(candidate_id, res_str)
         benchmark_time = res.get_mean_time()
+        if benchmark_time is None:
+            incomplete_list.append(candidate_id)
+            continue
         assert benchmark_time is not None
         candidate_trackers[candidate_id].first_benchmark_time = benchmark_time
         candidate_trackers[candidate_id].spec_path = (
@@ -846,7 +850,7 @@ def parse_dispatch_benchmark_results(
         mlir_path = candidate_trackers[candidate_id].dispatch_mlir_path
         spec_path = candidate_trackers[candidate_id].spec_path
         assert mlir_path is not None and spec_path is not None
-        dump_list.append(generate_display_DBR(candidate_id, benchmark_time))
+        dump_list.append(generate_display_DBR(candidate_id, benchmark_time) + "\n")
 
         benchmark_result_configs.append(
             (
@@ -858,6 +862,10 @@ def parse_dispatch_benchmark_results(
                 )
             )
         )
+
+    if incomplete_list:
+        dump_list += [f"Candidate {i} not incompleted" for i in incomplete_list]
+
     return benchmark_result_configs, dump_list
 
 
@@ -1053,22 +1061,6 @@ def compile_models(
     )
 
 
-def sort_candidates_by_first_benchmark_times(
-    candidate_indexes: list[int], candidate_trackers: list[CandidateTracker]
-) -> list[int]:
-    """Sorts candidate indexes based on their first benchmark times in ascending order"""
-    # Get the first benchmark times, defaulting to a large number if None
-    first_benchmark_times = [
-        candidate_trackers[index].first_benchmark_time or float("inf")
-        for index in candidate_indexes
-    ]
-    combined = list(zip(candidate_indexes, first_benchmark_times))
-    combined_sorted = sorted(combined, key=lambda x: x[1])
-    sorted_indexes, _ = zip(*combined_sorted)
-
-    return list(sorted_indexes)
-
-
 def group_benchmark_results_by_device_id(
     benchmark_results: list[TaskResult],
 ) -> list[list[TaskResult]]:
@@ -1099,10 +1091,11 @@ def parse_model_benchmark_results(
     candidate_results: list[TaskResult],
     baseline_results: list[TaskResult],
 ):
+    """Update candidate_tracker and format a list of result strings to be saved later."""
     candidate_results = sorted(candidate_results, key=lambda br: br.device_id)
     baseline_results = sorted(baseline_results, key=lambda tr: tr.device_id)
 
-    # Assign candidates to same groups by device_id
+    # Assign candidates to the same groups by device_id
     grouped_candidate_results = group_benchmark_results_by_device_id(candidate_results)
 
     # Insert baseline results to the head of each list
@@ -1113,27 +1106,25 @@ def parse_model_benchmark_results(
     dump_list = []
     incomplete_list: list[tuple[int, Optional[str]]] = (
         []
-    )  # format: [(candidate_id, device_id)], baseline will have candidate_id=0
+    )  # format: [(candidate_id, device_id)]
 
+    baseline_time = None
     for same_device_results in grouped_benchmark_results:
         dump_unsort_list: list[tuple[float, str]] = []
         for task_result in same_device_results:
-            # Skip if benchmark failed.
             result_str = task_result.result.stdout
             candidate_id = task_result.candidate_id
             device_id = task_result.device_id
 
+            # Check if benchmarking has completed
             if result_str is None:
-                # TODO: change incomplete process detection
+                incomplete_list.append((candidate_id, device_id))
+                if candidate_id == 0:
+                    baseline_time = None
                 continue
 
             res = IREEBenchmarkResult(candidate_id, result_str)
             benchmark_time = res.get_mean_time()
-
-            # Check completion
-            if benchmark_time == None:
-                incomplete_list.append((candidate_id, device_id))
-                continue
             assert benchmark_time is not None
 
             # Record baseline benchmarking result and skip rest processes
@@ -1143,10 +1134,13 @@ def parse_model_benchmark_results(
                     candidate_id
                 ].compiled_model_path
                 assert baseline_vmfb_path is not None
-                dump_str = generate_display_MBR(
-                    candidate_vmfb_path_str=baseline_vmfb_path.as_posix(),
-                    device_id=device_id,
-                    t1=benchmark_time,
+                dump_str = (
+                    generate_display_MBR(
+                        candidate_vmfb_path_str=baseline_vmfb_path.as_posix(),
+                        device_id=device_id,
+                        t1=benchmark_time,
+                    )
+                    + "\n\n"
                 )
                 dump_list.append(dump_str)
                 continue
@@ -1155,26 +1149,29 @@ def parse_model_benchmark_results(
             candidate_trackers[candidate_id].model_benchmark_time = benchmark_time
             candidate_trackers[candidate_id].model_benchmark_device_id = device_id
 
-            # Skip improvement calculation if no baseline data.
-            if baseline_time is None:
-                dump_unsort_list.append((benchmark_time, result_str))
-                continue
-
             # Calculate candidate improvement based on baseline.
-            candidate_trackers[candidate_id].baseline_benchmark_time = baseline_time
-            calibrated_benchmark_diff = (benchmark_time - baseline_time) / baseline_time
-            candidate_trackers[candidate_id].calibrated_benchmark_diff = (
-                calibrated_benchmark_diff
-            )
+            if baseline_time:
+                candidate_trackers[candidate_id].baseline_benchmark_time = baseline_time
+                calibrated_benchmark_diff = (
+                    benchmark_time - baseline_time
+                ) / baseline_time
+                candidate_trackers[candidate_id].calibrated_benchmark_diff = (
+                    calibrated_benchmark_diff
+                )
+            else:
+                calibrated_benchmark_diff = None
 
             # Collect candidate dump str
             candidate_vmfb_path = candidate_trackers[candidate_id].compiled_model_path
             assert candidate_vmfb_path is not None
-            dump_str = generate_display_MBR(
-                candidate_vmfb_path_str=candidate_vmfb_path.as_posix(),
-                device_id=device_id,
-                t1=benchmark_time,
-                calibrated_diff=calibrated_benchmark_diff,
+            dump_str = (
+                generate_display_MBR(
+                    candidate_vmfb_path_str=candidate_vmfb_path.as_posix(),
+                    device_id=device_id,
+                    t1=benchmark_time,
+                    calibrated_diff=calibrated_benchmark_diff,
+                )
+                + "\n\n"
             )
 
             dump_unsort_list.append((benchmark_time, dump_str))
@@ -1188,7 +1185,7 @@ def parse_model_benchmark_results(
     for index, device in incomplete_list:
         file_path = candidate_trackers[index].compiled_model_path
         assert file_path is not None
-        error_msg = f"Benchmarking result of {file_path.as_posix()} on deivce {device} is incomplete"
+        error_msg = f"Benchmarking result of {file_path.as_posix()} on device {device} is incomplete"
         handle_error(condition=True, msg=error_msg, level=logging.WARNING)
         dump_list.append(error_msg + "\n")
 

diff --git a/tuning/punet_autotune.py b/tuning/punet_autotune.py
@@ -66,12 +66,13 @@ def get_model_compile_command(
     ) -> list[str]:
         mlir_spec_path = candidate_tracker.spec_path
         assert mlir_spec_path is not None
+        script_dir = Path(__file__).resolve().parent
         target_dir = mlir_spec_path.resolve().parent.parent.parent
         output_name = f"unet_candidate_{candidate_tracker.candidate_id}.vmfb"
         command = [
             "timeout",
             "300s",
-            "../int8-model/compile-punet-base.sh",
+            (script_dir / "../int8-model/...").as_posix(),
             "./tools/iree-compile",
             "gfx942",
             f"{mlir_spec_path.resolve()}",