address review comments

triton-lang · Jan 3, 2025 · 7d2ea8c · 7d2ea8c
1 parent b02ebad
commit 7d2ea8c
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 18 deletions.
diff --git a/python/test/unit/test_address_sanitizer.py b/python/test/unit/test_address_sanitizer.py
@@ -3,24 +3,6 @@
 
 import triton
 
-# It is recommended to disable various memory caching strategies both within the ROCm stack and PyTorch
-# This will give the address sanitizer the best chance at finding the memory fault where it originates,
-# otherwise it could be masked by writing past the end of a cached block within a larger allocation.
-os.environ["HSA_DISABLE_FRAGMENT_ALLOCATOR"] = "1"
-os.environ["AMD_PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
-os.environ["PYTORCH_NO_HIP_MEMORY_CACHING"] = "1"
-os.environ["TRITON_ENABLE_ASAN"] = "1"
-
-# HSA_XNACK here is required to set the xnack+ setting for the GPU at runtime.
-# If it is not set and the default xnack setting of the system is xnack-
-# a runtime error something like "No kernel image found" will occur. The system
-# xnack setting can be found through rocminfo. xnack+ is required for ASAN.
-# More information about xnack in general can be found here:
-# https://llvm.org/docs/AMDGPUUsage.html#target-features
-# https://rocm.docs.amd.com/en/docs-6.1.0/conceptual/gpu-memory.html
-
-os.environ["HSA_XNACK"] = "1"
-
 
 def is_hip():
     return triton.runtime.driver.active.get_current_target().backend == "hip"
@@ -29,6 +11,24 @@ def is_hip():
 def test_address_sanitizer():
     if not is_hip():
         return  #not supported on NV backend
+
+    # It is recommended to disable various memory caching strategies both within the ROCm stack and PyTorch
+    # This will give the address sanitizer the best chance at finding the memory fault where it originates,
+    # otherwise it could be masked by writing past the end of a cached block within a larger allocation.
+    os.environ["HSA_DISABLE_FRAGMENT_ALLOCATOR"] = "1"
+    os.environ["AMD_PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
+    os.environ["PYTORCH_NO_HIP_MEMORY_CACHING"] = "1"
+    os.environ["TRITON_ENABLE_ASAN"] = "1"
+
+    # HSA_XNACK here is required to set the xnack+ setting for the GPU at runtime.
+    # If it is not set and the default xnack setting of the system is xnack-
+    # a runtime error something like "No kernel image found" will occur. The system
+    # xnack setting can be found through rocminfo. xnack+ is required for ASAN.
+    # More information about xnack in general can be found here:
+    # https://llvm.org/docs/AMDGPUUsage.html#target-features
+    # https://rocm.docs.amd.com/en/docs-6.1.0/conceptual/gpu-memory.html
+    os.environ["HSA_XNACK"] = "1"
+
     out = subprocess.Popen(["python", "address_sanitizer_helper.py"], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
     assert "Begin function __asan_report" in out.stdout.read().decode()
     assert "heap-buffer-overflow" in out.stderr.read().decode()
diff --git a/python/triton/compiler/compiler.py b/python/triton/compiler/compiler.py
@@ -304,6 +304,10 @@ def compile(src, target=None, options=None):
     # This is needed to safely finalize threads pool inside context: if current process forks before
     # python GC deletes context object, thread pool in child process will be invalid, which could
     # lead to child crash or hang.
+    # However disabling multithreading causes the code to hang if the ASAN pass is enabled
+    # this is likely due to the llvm-symbolizer forking a process
+    # TODO: Reconcile the difference here between the ASAN and non-ASAN path with enabling
+    # multithreading in the MLIR context
     if not os.environ.get("TRITON_ENABLE_ASAN", "0") == "1":
         context.disable_multithreading()
     # return handle to compiled kernel

diff --git a/python/triton/tools/extra/cuda b/python/triton/tools/extra/cuda
@@ -0,0 +1 @@
+/var/lib/jenkins/triton/third_party/nvidia/tools/cuda
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/var/lib/jenkins/triton/third_party/nvidia/tools/cuda