Wave scaling patch (#35)

johncalesp · web-flow · commit fd4d9ef8880a · 2023-08-25T15:33:11.000-04:00
* quick hotfix for transformer models

* added logging

* changed spacing

* fixed division by 0
diff --git a/analyzer/habitat/analysis/wave_scaling/resimplified.py b/analyzer/habitat/analysis/wave_scaling/resimplified.py
@@ -2,7 +2,8 @@
 
 from habitat.analysis.kernels import PredictedKernel
 from habitat.analysis.wave_scaling.common import calculate_wave_info
-
+import logging
+logger = logging.getLogger(__name__)
 
 def resimplified_wave_scaling(
     kernel,
@@ -23,8 +24,9 @@ def resimplified_wave_scaling(
     # Check if the kernel is too "small" - if it doesn't fill a single wave
     # on the current device AND if it doesn't fill a single wave on the
     # destination device
-    if (kernel.num_blocks // origin_wave_size == 0 and
-            kernel.num_blocks // dest_wave_size == 0):
+    if (origin_wave_size == 0 or dest_wave_size == 0):
+        logger.warn(f"One or more invalid wave sizes: kernel: {kernel.name} origin: {origin_wave_size}, dest: {dest_wave_size}")
+    if ((origin_wave_size == 0 or dest_wave_size == 0) or (kernel.num_blocks // origin_wave_size == 0 and kernel.num_blocks // dest_wave_size == 0)):
         # We scale the run time by the compute factor only
         origin_max_occupancy = math.ceil(
             kernel.num_blocks / origin_device.num_sms
diff --git a/analyzer/habitat/analysis/wave_scaling/roofline.py b/analyzer/habitat/analysis/wave_scaling/roofline.py
@@ -26,8 +26,8 @@ def roofline_wave_scaling(
     # 1. Check if the kernel is too "small" - if it doesn't fill a single wave
     #    on the current device AND if it doesn't fill a single wave on the
     #    destination device
-    if (kernel.num_blocks // origin_wave_size == 0 and
-            kernel.num_blocks // dest_wave_size == 0):
+    if ((origin_wave_size == 0 or dest_wave_size == 0) or (kernel.num_blocks // origin_wave_size == 0 and
+            kernel.num_blocks // dest_wave_size == 0)):
         # We scale the run time by the compute factor only
         origin_max_occupancy = math.ceil(
             kernel.num_blocks / origin_device.num_sms