Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR #17295: [ROCm] clang support #17945

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions build_tools/configure/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ class CudaCompiler(ArgparseableEnum):
NVCC = enum.auto()


class RocmCompiler(ArgparseableEnum):
HIPCC = enum.auto()


class OS(ArgparseableEnum):
LINUX = enum.auto()
MACOS = enum.auto()
Expand Down Expand Up @@ -263,6 +267,9 @@ class XLAConfigOptions:
cuda_compiler: CudaCompiler
using_nccl: bool

# ROCM specific
rocm_compiler: RocmCompiler

def to_bazelrc_lines(
self,
dpav: DiscoverablePathsAndVersions,
Expand Down Expand Up @@ -352,6 +359,17 @@ def to_bazelrc_lines(
elif self.backend == Backend.ROCM:
build_and_test_tag_filters.append("-cuda-only")
build_and_test_tag_filters.append("-sycl-only")

compiler_pair = self.rocm_compiler, self.host_compiler

if compiler_pair == (RocmCompiler.HIPCC, HostCompiler.CLANG):
rc.append("build --config rocm")
# This is demanded by rocm_configure.bzl.
rc.append(f"build --action_env CLANG_COMPILER_PATH={dpav.clang_path}")
elif compiler_pair == (RocmCompiler.HIPCC, HostCompiler.GCC):
rc.append("build --config rocm")
else:
raise NotImplementedError("ROCm clang with host compiler not supported")
elif self.backend == Backend.SYCL:
build_and_test_tag_filters.append("-cuda-only")
build_and_test_tag_filters.append("-rocm-only")
Expand Down
7 changes: 7 additions & 0 deletions build_tools/configure/configure_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Backend = configure.Backend
HostCompiler = configure.HostCompiler
CudaCompiler = configure.CudaCompiler
RocmCompiler = configure.RocmCompiler
OS = configure.OS

_PYTHON_BIN_PATH = "/usr/bin/python3"
Expand Down Expand Up @@ -98,6 +99,7 @@ def test_clang_bazelrc(self):
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.NVCC,
using_nccl=False,
rocm_compiler=RocmCompiler.HIPCC,
)

bazelrc_lines = config.to_bazelrc_lines(
Expand All @@ -119,6 +121,7 @@ def test_gcc_bazelrc(self):
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.NVCC,
using_nccl=False,
rocm_compiler=RocmCompiler.HIPCC,
)

bazelrc_lines = config.to_bazelrc_lines(
Expand All @@ -139,6 +142,7 @@ def test_cuda_clang_bazelrc(self):
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.CLANG,
using_nccl=False,
rocm_compiler=RocmCompiler.HIPCC,
)

bazelrc_lines = config.to_bazelrc_lines(
Expand All @@ -160,6 +164,7 @@ def test_default_cuda_clang_bazelrc(self):
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.CLANG,
using_nccl=False,
rocm_compiler=RocmCompiler.HIPCC,
)

bazelrc_lines = config.to_bazelrc_lines(
Expand All @@ -181,6 +186,7 @@ def test_nvcc_clang_bazelrc(self):
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.NVCC,
using_nccl=False,
rocm_compiler=RocmCompiler.HIPCC,
)

bazelrc_lines = config.to_bazelrc_lines(
Expand All @@ -202,6 +208,7 @@ def test_nvcc_gcc_bazelrc(self):
compiler_options=list(_COMPILER_OPTIONS),
cuda_compiler=CudaCompiler.NVCC,
using_nccl=False,
rocm_compiler=RocmCompiler.HIPCC,
)

bazelrc_lines = config.to_bazelrc_lines(
Expand Down
7 changes: 3 additions & 4 deletions third_party/tsl/third_party/gpus/crosstool/BUILD.rocm.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,18 @@ cc_toolchain_config(
"-fdata-sections",
],
dbg_compile_flags = ["-g"],
cxx_flags = ["-std=c++14"],
cxx_flags = ["-std=c++17"],
link_flags = [
"-fuse-ld=gold",
"-Wl,-no-as-needed",
"-Wl,-z,relro,-z,now",
"-pass-exit-codes",
],
link_libs = [
"-lstdc++",
"-lm",
],
link_libs = [],
opt_link_flags = [],
unfiltered_compile_flags = [
"-fno-canonical-system-headers",
"-Wno-builtin-macro-redefined",
"-D__DATE__=\"redacted\"",
"-D__TIMESTAMP__=\"redacted\"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ import pipes

# Template values set by rocm_configure.bzl.
CPU_COMPILER = ('%{cpu_compiler}')
HOST_COMPILER_PATH = ('%{host_compiler_path}')

HIPCC_PATH = '%{hipcc_path}'
PREFIX_DIR = os.path.dirname(HOST_COMPILER_PATH)
HIPCC_ENV = '%{hipcc_env}'
HIP_RUNTIME_PATH = '%{hip_runtime_path}'
HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
Expand Down Expand Up @@ -75,6 +77,7 @@ def GetHostCompilerOptions(argv):
parser.add_argument('--sysroot', nargs=1)
parser.add_argument('-g', nargs='*', action='append')
parser.add_argument('-fno-canonical-system-headers', action='store_true')
parser.add_argument('-no-canonical-prefixes', action='store_true')
parser.add_argument('--genco', action='store_true')

args, _ = parser.parse_known_args(argv)
Expand All @@ -87,7 +90,7 @@ def GetHostCompilerOptions(argv):
opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
if args.g:
opts += ' -g' + ' -g'.join(sum(args.g, []))
if args.fno_canonical_system_headers:
if args.fno_canonical_system_headers or args.no_canonical_prefixes:
opts += ' -no-canonical-prefixes'
if args.sysroot:
opts += ' --sysroot ' + args.sysroot[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,6 @@ def _impl(ctx):
flag_group(
flags = [
"-no-canonical-prefixes",
"-fno-canonical-system-headers",
]
),
],
Expand Down
97 changes: 73 additions & 24 deletions third_party/tsl/third_party/gpus/rocm_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
`rocm_configure` depends on the following environment variables:

* `TF_NEED_ROCM`: Whether to enable building with ROCm.
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path.
* `TF_ROCM_CLANG`: Whether to use clang for C++ and HIPCC for ROCm compilation.
* `TF_SYSROOT`: The sysroot to use when compiling.
* `CLANG_COMPILER_PATH`: The clang compiler path that will be used for
host code compilation if TF_ROCM_CLANG is 1.
* `ROCM_PATH`: The path to the ROCm toolkit. Default is `/opt/rocm`.
* `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets.
"""
Expand Down Expand Up @@ -39,6 +43,8 @@ load(

_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
_GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
_CLANG_COMPILER_PATH = "CLANG_COMPILER_PATH"
_TF_SYSROOT = "TF_SYSROOT"
_ROCM_TOOLKIT_PATH = "ROCM_PATH"
_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
Expand Down Expand Up @@ -72,12 +78,15 @@ def verify_build_defines(params):
".",
)

def find_cc(repository_ctx):
def find_cc(repository_ctx, use_rocm_clang):
"""Find the C++ compiler."""

# Return a dummy value for GCC detection here to avoid error
target_cc_name = "gcc"
cc_path_envvar = _GCC_HOST_COMPILER_PATH
if use_rocm_clang:
target_cc_name = "clang"
cc_path_envvar = _CLANG_COMPILER_PATH
else:
target_cc_name = "gcc"
cc_path_envvar = _GCC_HOST_COMPILER_PATH
cc_name = target_cc_name

cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar)
Expand All @@ -99,24 +108,26 @@ def _cxx_inc_convert(path):
path = path.strip()
return path

def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"
sysroot = []
if tf_sysroot:
sysroot += ["--sysroot", tf_sysroot]

# TODO: We pass -no-canonical-prefixes here to match the compiler flags,
# but in rocm_clang CROSSTOOL file that is a `feature` and we should
# handle the case when it's disabled and no flag is passed
result = raw_exec(repository_ctx, [
cc,
"-no-canonical-prefixes",
"-E",
"-x" + lang,
"-",
"-v",
])
] + sysroot)
stderr = err_out(result)
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
Expand All @@ -138,14 +149,24 @@ def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
for p in inc_dirs.split("\n")
]

def get_cxx_inc_directories(repository_ctx, cc):
def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot):
"""Compute the list of default C and C++ include directories."""

# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
includes_cpp = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
True,
tf_sysroot,
)
includes_c = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
False,
tf_sysroot,
)

includes_cpp_set = depset(includes_cpp)
return includes_cpp + [
Expand Down Expand Up @@ -207,6 +228,7 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin):
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/16.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/17.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/17/include")
inc_dirs.append(rocm_toolkit_path + "/lib/llvm/lib/clang/17/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/18/include")
if int(rocm_config.rocm_version_number) >= 60200:
inc_dirs.append(rocm_toolkit_path + "/lib/llvm/lib/clang/18/include")
Expand Down Expand Up @@ -539,6 +561,16 @@ def _genrule(src_dir, genrule_name, command, outs):
")\n"
)

def _flag_enabled(repository_ctx, flag_name):
return get_host_environ(repository_ctx, flag_name) == "1"

def _use_rocm_clang(repository_ctx):
# Returns the flag if we need to use clang for the host.
return _flag_enabled(repository_ctx, "TF_ROCM_CLANG")

def _tf_sysroot(repository_ctx):
return get_host_environ(repository_ctx, _TF_SYSROOT, "")

def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
amdgpu_target_flags = ["--amdgpu-target=" +
amdgpu_target for amdgpu_target in amdgpu_targets]
Expand Down Expand Up @@ -674,6 +706,10 @@ def _create_local_rocm_repository(repository_ctx):
hiprand_include +
rocrand_include),
}

is_rocm_clang = _use_rocm_clang(repository_ctx)
tf_sysroot = _tf_sysroot(repository_ctx)

if rocm_libs["hipblaslt"] != None:
repository_dict["%{hipblaslt_lib}"] = rocm_libs["hipblaslt"].file_name

Expand All @@ -689,24 +725,36 @@ def _create_local_rocm_repository(repository_ctx):

# Set up crosstool/

cc = find_cc(repository_ctx)
cc = find_cc(repository_ctx, is_rocm_clang)
host_compiler_includes = get_cxx_inc_directories(
repository_ctx,
cc,
tf_sysroot,
)

host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc)

host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX, "/usr/bin")
# host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc)

rocm_defines = {}

rocm_defines["%{builtin_sysroot}"] = tf_sysroot
rocm_defines["%{compiler}"] = "unknown"
if is_rocm_clang:
rocm_defines["%{compiler}"] = "clang"
host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX, "/usr/bin")
rocm_defines["%{host_compiler_prefix}"] = host_compiler_prefix
rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + host_compiler_prefix
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = ""
rocm_defines["%{unfiltered_compile_flags}"] = ""
rocm_defines["%{rocm_hipcc_files}"] = "[]"

rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + "/hcc/compiler/bin"

# For gcc, do not canonicalize system header paths; some versions of gcc
# pick the shortest possible path for system includes when creating the
# .d file - given that includes that are prefixed with "../" multiple
# time quickly grow longer than the root of the tree, this can lead to
# bazel's header check failing.
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\""
if is_rocm_clang:
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-no-canonical-prefixes\""
else:
# For gcc, do not canonicalize system header paths; some versions of gcc
# pick the shortest possible path for system includes when creating the
# .d file - given that includes that are prefixed with "../" multiple
# time quickly grow longer than the root of the tree, this can lead to
# bazel's header check failing.
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\""

rocm_defines["%{unfiltered_compile_flags}"] = to_list_of_strings([
"-DTENSORFLOW_USE_ROCM=1",
Expand Down Expand Up @@ -834,6 +882,7 @@ _ENVIRONS = [
_GCC_HOST_COMPILER_PATH,
_GCC_HOST_COMPILER_PREFIX,
"TF_NEED_ROCM",
"TF_ROCM_CLANG",
"TF_NEED_CUDA", # Needed by the `if_gpu_is_configured` macro
_ROCM_TOOLKIT_PATH,
_TF_ROCM_AMDGPU_TARGETS,
Expand Down
Loading
Loading