Skip to content

Commit

Permalink
Merge branch 'sycl' into sycl
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiZibrov authored Jul 4, 2024
2 parents b73f1a5 + c844334 commit ef5a829
Show file tree
Hide file tree
Showing 31 changed files with 353 additions and 172 deletions.
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(LLVM_LINK_COMPONENTS
ScalarOpts
Support
SYCLLowerIR
SYCLNativeCPUUtils
Target
TargetParser
TransformUtils
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
<< MWGPCU << 0;
} else {
// The value is guaranteed to be > 0, pass it to the metadata.
addNVVMMetadata(F, "minnctapersm", attrValue(MWGPCU->getValue()));
addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue()));
HasMinWorkGroupPerCU = true;
}
}
Expand Down
14 changes: 7 additions & 7 deletions clang/test/CodeGenSYCL/launch_bounds_nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// compute unit and maximum work groups per multi-processor attributes, that
// correspond to CUDA's launch bounds. Expect max_work_group_size,
// min_work_groups_per_cu and max_work_groups_per_mp that are mapped to
// maxntidx, minnctapersm, maxclusterrank PTX directives respectively.
// maxntidx, minctasm, and maxclusterrank NVVM annotations respectively.

#include "sycl.hpp"

Expand Down Expand Up @@ -49,22 +49,22 @@ int main() {
// CHECK: define dso_local void @{{.*}}kernel_name3() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM]] !max_work_group_size ![[MWGS_2:[0-9]+]]

// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 512}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minnctapersm", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minctasm", i32 2}
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxclusterrank", i32 4}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 384}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minnctapersm", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxclusterrank", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 384}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minnctapersm", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minctasm", i32 6}
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxclusterrank", i32 6}

// CHECK: ![[MWGPC]] = !{i32 2}
Expand Down
18 changes: 9 additions & 9 deletions devops/dependencies.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"linux": {
"compute_runtime": {
"github_tag": "24.13.29138.7",
"version": "24.13.29138.7",
"url": "https://github.com/intel/compute-runtime/releases/tag/24.13.29138.7",
"github_tag": "24.22.29735.20",
"version": "24.22.29735.20",
"url": "https://github.com/intel/compute-runtime/releases/tag/24.22.29735.20",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"igc": {
"github_tag": "igc-1.0.16510.2",
"version": "1.0.16510.2",
"url": "https://github.com/intel/intel-graphics-compiler/releases/tag/igc-1.0.16510.2",
"github_tag": "igc-1.0.16900.23",
"version": "1.0.16900.23",
"url": "https://github.com/intel/intel-graphics-compiler/releases/tag/igc-1.0.16900.23",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"cm": {
Expand All @@ -19,9 +19,9 @@
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"level_zero": {
"github_tag": "v1.16.14",
"version": "v1.16.14",
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.16.14",
"github_tag": "v1.17.17",
"version": "v1.17.17",
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.17.17",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"tbb": {
Expand Down
6 changes: 3 additions & 3 deletions llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class ModuleSplitterBase {
Module &getInputModule() { return Input.getModule(); }

std::unique_ptr<Module> releaseInputModule() {
return std::move(Input.releaseModulePtr());
return Input.releaseModulePtr();
}

public:
Expand Down Expand Up @@ -274,9 +274,9 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly,
bool EmitOnlyKernelsAsEntryPoints);

#ifndef NDEBUG
void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0);
void dumpEntryPoints(const EntryPointSet &C, const char *Msg = "", int Tab = 0);
void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false,
const char *msg = "", int Tab = 0);
const char *Msg = "", int Tab = 0);
#endif // NDEBUG

struct SplitModule {
Expand Down
9 changes: 3 additions & 6 deletions llvm/include/llvm/SYCLLowerIR/Support.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,9 @@
} while (false)

#define CHECK_AND_EXIT(E) \
{ \
Error LocE = std::move(E); \
if (LocE) { \
logAllUnhandledErrors(std::move(LocE), WithColor::error(errs())); \
exit(1); \
} \
if (Error LocE = E) { \
logAllUnhandledErrors(std::move(LocE), WithColor::error(errs())); \
exit(1); \
}

namespace llvm {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ add_subdirectory(LineEditor)
add_subdirectory(ProfileData)
add_subdirectory(Passes)
add_subdirectory(SYCLLowerIR)
add_subdirectory(SYCLNativeCPUUtils)
add_subdirectory(TargetParser)
add_subdirectory(TextAPI)
add_subdirectory(ToolDrivers)
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,6 @@ add_llvm_component_library(LLVMSYCLLowerIR
LocalAccessorToSharedMemory.cpp
GlobalOffset.cpp
TargetHelpers.cpp
PrepareSYCLNativeCPU.cpp
RenameKernelSYCLNativeCPU.cpp
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
PipelineSYCLNativeCPU.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Expand Down
22 changes: 16 additions & 6 deletions llvm/lib/SYCLLowerIR/ModuleSplitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/Transforms/Utils/Cloning.h"
Expand Down Expand Up @@ -643,13 +644,23 @@ void ModuleDesc::restoreLinkageOfDirectInvokeSimdTargets() {
}
}

// Predicate for Internalize pass.
bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
if (!canBeImportedFunction(*F))
return false;
return true;
}

// TODO: try to move all passes (cleanup, spec consts, compile time properties)
// in one place and execute MPM.run() only once.
void ModuleDesc::cleanup() {
ModuleAnalysisManager MAM;
MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
ModulePassManager MPM;
// Do cleanup.
if (SupportDynamicLinking)
MPM.addPass(InternalizePass(mustPreserveGV));
MPM.addPass(GlobalDCEPass()); // Delete unreachable globals.
MPM.addPass(StripDeadDebugInfoPass()); // Remove dead debug info.
MPM.addPass(StripDeadPrototypesPass()); // Remove dead func decls.
Expand Down Expand Up @@ -1143,8 +1154,8 @@ SmallVector<ModuleDesc, 2> splitByESIMD(ModuleDesc &&MD,
}

if (EntryPointGroups.size() == 1) {
Result.emplace_back(std::move(MD.releaseModulePtr()),
std::move(EntryPointGroups[0]), MD.Props);
Result.emplace_back(MD.releaseModulePtr(), std::move(EntryPointGroups[0]),
MD.Props);
return Result;
}

Expand All @@ -1153,18 +1164,17 @@ SmallVector<ModuleDesc, 2> splitByESIMD(ModuleDesc &&MD,
if (Group.isEsimd()) {
// For ESIMD module, we use full call graph of all entry points and all
// ESIMD functions.
Result.emplace_back(
std::move(extractESIMDSubModule(MD, std::move(Group), CG)));
Result.emplace_back(extractESIMDSubModule(MD, std::move(Group), CG));
} else {
// For non-ESIMD module we only use non-ESIMD functions. Additional filter
// is needed, because there could be uses of ESIMD functions from
// non-ESIMD functions through invoke_simd. If that is the case, both
// modules are expected to be linked back together after ESIMD functions
// were processed and therefore it is fine to return an "incomplete"
// module here.
Result.emplace_back(std::move(extractCallGraph(
Result.emplace_back(extractCallGraph(
MD, std::move(Group), CG,
[=](const Function *F) -> bool { return !isESIMDFunction(*F); })));
[=](const Function *F) -> bool { return !isESIMDFunction(*F); }));
}
}

Expand Down
98 changes: 98 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
add_llvm_component_library(LLVMSYCLNativeCPUUtils
PipelineSYCLNativeCPU.cpp
PrepareSYCLNativeCPU.cpp
RenameKernelSYCLNativeCPU.cpp
ConvertToMuxBuiltinsSYCLNativeCPU.cpp


ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR

LINK_COMPONENTS
Analysis
Core
Support
Passes
Target
TargetParser
TransformUtils
ipo
)

set(OCK_SOURCE_DIR "" CACHE PATH "Root of the local checkout of the oneAPI Construction Kit")
set(OCK_GIT_REPO "" CACHE STRING "Git repository for the oneAPI Construction Kit FetchContent")
set(OCK_GIT_TAG "" CACHE STRING "Git tag for the oneAPI Construction Kit FetchContent")
option(NATIVECPU_OCK_USE_FETCHCONTENT "Use FetchContent to acquire oneAPI Construction Kit source code" On)
option(NATIVECPU_USE_OCK "Use the oneAPI Construction Kit for Native CPU" ON)

# Don't fetch OCK if Native CPU is not enabled.
if(NOT "native_cpu" IN_LIST SYCL_ENABLE_PLUGINS)
set(NATIVECPU_USE_OCK Off CACHE BOOL "Use the oneAPI Construction Kit for Native CPU" FORCE)
endif()

if(NATIVECPU_USE_OCK)
if(NATIVECPU_OCK_USE_FETCHCONTENT)
set(OCK_GIT_INTERNAL_REPO "https://github.com/codeplaysoftware/oneapi-construction-kit.git")
# commit 05e6e1b211704224fbdc6394d85d637f57fafdaf
# Merge: 256027e8 fbc2e567
# Author: Pietro Ghiglio <pietro.ghiglio@codeplay.com>
# Date: Fri May 17 13:08:15 2024 +0200
# Merge pull request #458 from PietroGhg/pietro/link_aggressiveinstcombine
# Link vecz to aggressiveinstcombine
set(OCK_GIT_INTERNAL_TAG 05e6e1b211704224fbdc6394d85d637f57fafdaf)

# Overwrite OCK_GIT_INTERNAL_REPO/OCK_GIT_INTERNAL_TAG if the corresponding options are set
if(OCK_GIT_REPO)
set(OCK_GIT_INTERNAL_REPO "${OCK_GIT_REPO}")
endif()
if(OCK_GIT_TAG)
set(OCK_GIT_INTERNAL_TAG "${OCK_GIT_TAG}")
endif()
include(FetchContent)
FetchContent_Declare(oneapi-ck
GIT_REPOSITORY "${OCK_GIT_INTERNAL_REPO}"
GIT_TAG "${OCK_GIT_INTERNAL_TAG}"
)
FetchContent_GetProperties(oneapi-ck)
if(NOT oneapi-ck_POPULATED)
message(STATUS "Cloning oneAPI Construction Kit from ${OCK_GIT_INTERNAL_REPO}, tag ${OCK_GIT_INTERNAL_TAG}")
FetchContent_Populate(oneapi-ck)
message(STATUS "oneAPI Construction Kit cloned in ${oneapi-ck_SOURCE_DIR}")
set(OCK_SOURCE_DIR_INTERNAL ${oneapi-ck_SOURCE_DIR}/compiler_passes)
set(OCK_BINARY_DIR_INTERNAL ${oneapi-ck_BINARY_DIR})
endif()
elseif(OCK_SOURCE_DIR)
set(OCK_SOURCE_DIR_INTERNAL "${OCK_SOURCE_DIR}/compiler_passes")
set(OCK_BINARY_DIR_INTERNAL "${CMAKE_CURRENT_BINARY_DIR}/oneapi-construction-kit")
else()
message(FATAL_ERROR "NATIVECPU_OCK_USE_FETCHCONTENT is Off and OCK_SOURCE_DIR not set")
endif()

set(CA_ENABLE_API "cl" CACHE STRING "" FORCE)
add_subdirectory(
${OCK_SOURCE_DIR_INTERNAL}
${OCK_BINARY_DIR_INTERNAL} EXCLUDE_FROM_ALL)

install(TARGETS compiler-pipeline
EXPORT;LLVMExports
LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT compiler-pipeline
ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT compiler-pipeline
RUNTIME DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT compiler-pipeline)
set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS compiler-pipeline)
install(TARGETS vecz
EXPORT;LLVMExports
LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT vecz
ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT vecz
RUNTIME DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT vecz)
set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS vecz)
install(TARGETS multi_llvm EXPORT;LLVMExports)
set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS multi_llvm)
target_compile_definitions(LLVMSYCLNativeCPUUtils PRIVATE NATIVECPU_USE_OCK)
target_include_directories(LLVMSYCLNativeCPUUtils PRIVATE
${oneapi-ck_SOURCE_DIR}/modules/compiler/multi_llvm/include
${oneapi-ck_SOURCE_DIR}/modules/cargo/include
${oneapi-ck_SOURCE_DIR}/modules/compiler/vecz/include
${oneapi-ck_SOURCE_DIR}/modules/compiler/utils/include)
target_link_libraries(LLVMSYCLNativeCPUUtils PRIVATE compiler-pipeline vecz)

endif()
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
#include "compiler/utils/prepare_barriers_pass.h"
#include "compiler/utils/sub_group_analysis.h"
#include "compiler/utils/work_item_loops_pass.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "vecz/pass.h"
#include "vecz/vecz_target_info.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#endif

using namespace llvm;
Expand Down Expand Up @@ -85,5 +85,4 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
#endif
MPM.addPass(PrepareSYCLNativeCPUPass());
MPM.addPass(RenameKernelSYCLNativeCPUPass());

}
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6257,7 +6257,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
AddressSpace, CostKind);

unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
MVT VT =
MVT::getVectorVT(TLI->getSimpleValueType(DL, VecTy->getScalarType()), VF);

InstructionCost MaskCost;
if (UseMaskedMemOp) {
Expand Down
Loading

0 comments on commit ef5a829

Please sign in to comment.