Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
CRobeck committed Jan 3, 2025
1 parent b02ebad commit 7d2ea8c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 18 deletions.
36 changes: 18 additions & 18 deletions python/test/unit/test_address_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,6 @@

import triton

# It is recommended to disable various memory caching strategies both within the ROCm stack and PyTorch
# This will give the address sanitizer the best chance at finding the memory fault where it originates,
# otherwise it could be masked by writing past the end of a cached block within a larger allocation.
os.environ["HSA_DISABLE_FRAGMENT_ALLOCATOR"] = "1"
os.environ["AMD_PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
os.environ["PYTORCH_NO_HIP_MEMORY_CACHING"] = "1"
os.environ["TRITON_ENABLE_ASAN"] = "1"

# HSA_XNACK here is required to set the xnack+ setting for the GPU at runtime.
# If it is not set and the default xnack setting of the system is xnack-
# a runtime error something like "No kernel image found" will occur. The system
# xnack setting can be found through rocminfo. xnack+ is required for ASAN.
# More information about xnack in general can be found here:
# https://llvm.org/docs/AMDGPUUsage.html#target-features
# https://rocm.docs.amd.com/en/docs-6.1.0/conceptual/gpu-memory.html

os.environ["HSA_XNACK"] = "1"


def is_hip():
return triton.runtime.driver.active.get_current_target().backend == "hip"
Expand All @@ -29,6 +11,24 @@ def is_hip():
def test_address_sanitizer():
if not is_hip():
return #not supported on NV backend

# It is recommended to disable various memory caching strategies both within the ROCm stack and PyTorch
# This will give the address sanitizer the best chance at finding the memory fault where it originates,
# otherwise it could be masked by writing past the end of a cached block within a larger allocation.
os.environ["HSA_DISABLE_FRAGMENT_ALLOCATOR"] = "1"
os.environ["AMD_PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
os.environ["PYTORCH_NO_HIP_MEMORY_CACHING"] = "1"
os.environ["TRITON_ENABLE_ASAN"] = "1"

# HSA_XNACK here is required to set the xnack+ setting for the GPU at runtime.
# If it is not set and the default xnack setting of the system is xnack-
# a runtime error something like "No kernel image found" will occur. The system
# xnack setting can be found through rocminfo. xnack+ is required for ASAN.
# More information about xnack in general can be found here:
# https://llvm.org/docs/AMDGPUUsage.html#target-features
# https://rocm.docs.amd.com/en/docs-6.1.0/conceptual/gpu-memory.html
os.environ["HSA_XNACK"] = "1"

out = subprocess.Popen(["python", "address_sanitizer_helper.py"], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
assert "Begin function __asan_report" in out.stdout.read().decode()
assert "heap-buffer-overflow" in out.stderr.read().decode()
4 changes: 4 additions & 0 deletions python/triton/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,10 @@ def compile(src, target=None, options=None):
# This is needed to safely finalize threads pool inside context: if current process forks before
# python GC deletes context object, thread pool in child process will be invalid, which could
# lead to child crash or hang.
# However disabling multithreading causes the code to hang if the ASAN pass is enabled
# this is likely due to the llvm-symbolizer forking a process
# TODO: Reconcile the difference here between the ASAN and non-ASAN path with enabling
# multithreading in the MLIR context
if not os.environ.get("TRITON_ENABLE_ASAN", "0") == "1":
context.disable_multithreading()
# return handle to compiled kernel
Expand Down
1 change: 1 addition & 0 deletions python/triton/tools/extra/cuda

0 comments on commit 7d2ea8c

Please sign in to comment.