Skip to content

Commit

Permalink
Fix the double loading problem of libamdhip64.so.
Browse files Browse the repository at this point in the history
PyTorch wheel has its own bundled libamdhip64.so, and the original logic has problems
detecting this shared object file because it can be installed at any place with
venv/DT_RUNPATH/LD_LIBRARY_PATH.

Now Triton enumerates the address space with dl_iterate_phdr, and prefer the
libamdhip64.so that is already loaded into address space over everything other
than TRITON_LIBHIP_PATH.
  • Loading branch information
xinyazhang committed Jul 2, 2024
1 parent d9fc9fe commit a189c11
Showing 1 changed file with 54 additions and 0 deletions.
54 changes: 54 additions & 0 deletions third_party/amd/backend/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,53 @@
include_dir = [os.path.join(dirname, "include")]


def _linux_find_already_mmapped_dylib():
import platform;
if platform.system() != 'Linux':
return ''
from ctypes import (
Structure,
c_char,
c_void_p,
c_char_p,
c_int,
c_size_t,
POINTER,
CFUNCTYPE,
CDLL,
memmove,
)
from ctypes.util import find_library
import ctypes
class dl_phdr_info(Structure):
_fields_ = [
('dlpi_addr', c_void_p),
('dlpi_name', c_char_p),
# Don't care the remaining fields
]
# callback_t must use POINTER(c_char) to avoid copying
callback_t = CFUNCTYPE(c_int, POINTER(dl_phdr_info), POINTER(c_size_t), POINTER(c_char))
dl_iterate_phdr = CDLL('libc.so.6').dl_iterate_phdr
# argtypes must use c_char_p to accept ctypes.create_string_buffer
dl_iterate_phdr.argtypes = [callback_t, c_char_p]
dl_iterate_phdr.restype = c_int

MAX_PATH = 4096
path = ctypes.create_string_buffer(MAX_PATH + 1)
def callback(info, size, data):
dlpi_name = info.contents.dlpi_name
p = Path(os.fsdecode(dlpi_name))
if 'libamdhip64.so' in p.name:
memmove(data, dlpi_name, min(MAX_PATH, len(dlpi_name)))
return 1
return 0
found = dl_iterate_phdr(callback_t(callback), path)
if found:
filepath = os.fsdecode(ctypes.string_at(path))
print(f'_linux_find_already_mmapped_dylib {filepath=}')
return filepath
return ''

@functools.lru_cache()
def _get_path_to_hip_runtime_dylib():
lib_name = "libamdhip64.so"
Expand All @@ -24,6 +71,13 @@ def _get_path_to_hip_runtime_dylib():
return env_libhip_path
raise RuntimeError(f"TRITON_LIBHIP_PATH '{env_libhip_path}' does not point to a valid {lib_name}")

# If the shared object is already mmapped to address space, use it.
mmapped_libhip = _linux_find_already_mmapped_dylib()
if mmapped_libhip:
if os.path.exists(mmapped_libhip):
return mmapped_libhip
raise RuntimeError(f"Memory Mapped '{mmapped_libhip}' does not point to a valid {lib_name}")

paths = []

import site
Expand Down

0 comments on commit a189c11

Please sign in to comment.