Skip to content

[SYCL] WG-shared global variables must have external linkage #1279

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//

#include "clang/CodeGen/BackendUtil.h"
#include "SYCLLowerIR/LowerWGScope.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGSYCLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void CGSYCLRuntime::emitWorkGroupLocalVarDecl(CodeGenFunction &CGF,
#endif // NDEBUG
// generate global variable in the address space selected by the clang CodeGen
// (should be local)
CGF.EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
CGF.EmitStaticVarDecl(D, llvm::GlobalValue::ExternalLinkage);
}

bool CGSYCLRuntime::actOnAutoVarEmit(CodeGenFunction &CGF, const VarDecl &D,
Expand Down
4 changes: 1 addition & 3 deletions clang/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
add_subdirectory(SYCLLowerIR)

set(LLVM_LINK_COMPONENTS
Analysis
BitReader
Expand All @@ -23,6 +21,7 @@ set(LLVM_LINK_COMPONENTS
Remarks
ScalarOpts
Support
SYCLLowerIR
Target
TransformUtils
)
Expand Down Expand Up @@ -112,5 +111,4 @@ add_clang_library(clangCodeGen
clangFrontend
clangLex
clangSerialization
clangSYCLLowerIR
)
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CodeGenAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
#include "MacroPPCallbacks.h"
#include "SYCLLowerIR/LowerWGScope.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
Expand Down Expand Up @@ -39,6 +38,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Pass.h"
#include "llvm/SYCLLowerIR/LowerWGScope.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TimeProfiler.h"
Expand Down Expand Up @@ -335,7 +335,7 @@ namespace clang {
if (LangOpts.SYCLIsDevice) {
PrettyStackTraceString CrashInfo("Pre-linking SYCL passes");
legacy::PassManager PreLinkingSyclPasses;
PreLinkingSyclPasses.add(createSYCLLowerWGScopePass());
PreLinkingSyclPasses.add(llvm::createSYCLLowerWGScopePass());
PreLinkingSyclPasses.run(*getModule());
}

Expand Down
18 changes: 0 additions & 18 deletions clang/lib/CodeGen/SYCLLowerIR/CMakeLists.txt

This file was deleted.

43 changes: 0 additions & 43 deletions clang/test/CodeGenSYCL/hier_par.cpp

This file was deleted.

21 changes: 21 additions & 0 deletions clang/test/CodeGenSYCL/wg_scope_var.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl-is-device -disable-llvm-passes -I %S/Inputs -emit-llvm %s -o - | FileCheck %s

// Checked that local variables declared by the user in PWFG scope are turned into globals in the local address space.
// CHECK: @{{.*myLocal.*}} = addrspace(3) global i32 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: I think the size of int depends on the host ABI, which is not set by the test, so on some platforms this check might fail due to sizeof(int) != i32.
It's probably better to set aux-target-triple.


#include "sycl.hpp"

using namespace cl::sycl;

int main() {
queue myQueue;

myQueue.submit([&](handler &cgh) {
cgh.parallel_for_work_group<class kernel>(
range<3>(2, 2, 2), range<3>(2, 2, 2), [=](group<3> myGroup) {
int myLocal;
});
});

return 0;
}
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ void initializeStripNonDebugSymbolsPass(PassRegistry&);
void initializeStripNonLineTableDebugInfoPass(PassRegistry&);
void initializeStripSymbolsPass(PassRegistry&);
void initializeStructurizeCFGPass(PassRegistry&);
void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &);
void initializeTailCallElimPass(PassRegistry&);
void initializeTailDuplicatePass(PassRegistry&);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/LinkAllPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/SYCLLowerIR/LowerWGScope.h"
#include "llvm/Support/Valgrind.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO.h"
Expand Down Expand Up @@ -199,6 +200,7 @@ namespace {
(void) llvm::createMergeFunctionsPass();
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandMemCmpPass();
(void)llvm::createSYCLLowerWGScopePass();
std::string buf;
llvm::raw_string_ostream os(buf);
(void) llvm::createPrintModulePass(os);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class SYCLLowerWGScopePass : public PassInfoMixin<SYCLLowerWGScopePass> {
};

FunctionPass *createSYCLLowerWGScopePass();
void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &);

} // namespace llvm

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ add_subdirectory(AsmParser)
add_subdirectory(LineEditor)
add_subdirectory(ProfileData)
add_subdirectory(Passes)
add_subdirectory(SYCLLowerIR)
add_subdirectory(TextAPI)
add_subdirectory(ToolDrivers)
add_subdirectory(XRay)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/LLVMBuild.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ subdirectories =
Passes
ProfileData
Support
SYCLLowerIR
TableGen
TextAPI
Target
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/SYCLLowerIR/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
add_llvm_component_library(LLVMSYCLLowerIR
LowerWGScope.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR

DEPENDS
intrinsics_gen
)
20 changes: 20 additions & 0 deletions llvm/lib/SYCLLowerIR/LLVMBuild.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
;===- ./lib/SYCLLowerIR/LLVMBuild.txt -----------------------------*- Conf -*--===;
;
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;

[component_0]
type = Group
name = SYCLLowerIR
parent = Libraries
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,7 @@
// et. al.
//===----------------------------------------------------------------------===//

#include "LowerWGScope.h"

#include "clang/Basic/AddressSpaces.h"

#include "llvm/SYCLLowerIR/LowerWGScope.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
Expand All @@ -85,6 +82,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"

Expand Down Expand Up @@ -840,7 +838,7 @@ GlobalVariable *spirv::createWGLocalVariable(Module &M, Type *T,
new GlobalVariable(M, // module
T, // type
false, // isConstant
GlobalValue::InternalLinkage, // Linkage
GlobalValue::ExternalLinkage, // Linkage
UndefValue::get(T), // Initializer
Name, // Name
nullptr, // InsertBefore
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/SYCLLowerIR/byval_arg.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; RUN: opt < %s -LowerWGScope -S | FileCheck %s

; Check that argument of the function marked with !work_group_scope
; attribute passed as byval is shared by leader work item via local
; memory to all work items

%struct.baz = type { i64 }

; CHECK: @[[SHADOW:[a-zA-Z0-9]+]] = unnamed_addr addrspace(3) global %struct.baz

define internal spir_func void @wibble(%struct.baz* byval(%struct.baz) %arg1) !work_group_scope !0 {
; CHECK-LABEL: @wibble(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex
; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[CMPZ]], label [[LEADER:%.*]], label [[MERGE:%.*]]
; CHECK: leader:
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.baz* [[ARG1:%.*]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 bitcast (%struct.baz addrspace(3)* @[[SHADOW]] to i8 addrspace(3)*), i8* [[TMP2]], i64 8, i1 false)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272)
; CHECK-NEXT: ret void
;
ret void
}

!0 = !{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is !0 for?

79 changes: 79 additions & 0 deletions llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -LowerWGScope -S | FileCheck %s

; Check that allocas which correspond to PFWI lambda object and a local copy of the PFWG lambda object
; are properly handled by LowerWGScope pass. Check that WG-shared local "shadow" variables are created
; and before each PFWI invocation leader WI stores its private copy of the variable into the shadow,
; then all WIs load the shadow value into their private copies ("materialize" the private copy).

%struct.bar = type { i8 }
%struct.zot = type { %struct.widget, %struct.widget, %struct.widget, %struct.foo }
%struct.widget = type { %struct.barney }
%struct.barney = type { [3 x i64] }
%struct.foo = type { %struct.barney }
%struct.foo.0 = type { i8 }

; CHECK: @[[PFWG_SHADOW:.*]] = unnamed_addr addrspace(3) global %struct.bar addrspace(4)*
; CHECK: @[[PFWI_SHADOW:.*]] = unnamed_addr addrspace(3) global %struct.foo.0
; CHECK: @[[GROUP_SHADOW:.*]] = unnamed_addr addrspace(3) global %struct.zot

define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.zot* byval(%struct.zot) align 8 %arg1) align 2 !work_group_scope !0 {
; CHECK-LABEL: @wibble(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex
; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]]
; CHECK: leader:
; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.zot* [[ARG1:%.*]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.zot addrspace(3)* @[[GROUP_SHADOW]] to i8 addrspace(3)*), i8* align 8 [[TMP1]], i64 96, i1 false)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272)
; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BAR:%.*]] addrspace(4)*, align 8
; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1
; CHECK-NEXT: [[ID:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex
; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[ID]], 0
; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]]
; CHECK: wg_leader:
; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[ARG:%.*]], [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast [[STRUCT_ZOT:%.*]] addrspace(3)* @[[GROUP_SHADOW]] to [[STRUCT_ZOT]] addrspace(4)*
; CHECK-NEXT: store [[STRUCT_ZOT]] addrspace(4)* [[TMP4]], [[STRUCT_ZOT]] addrspace(4)* addrspace(3)* @wibbleWG_tmp4
; CHECK-NEXT: br label [[WG_CF]]
; CHECK: wg_cf:
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex
; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP3]], 0
; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]]
; CHECK: TestMat:
; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.foo.0* [[TMP2]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 getelementptr inbounds (%struct.foo.0, [[STRUCT_FOO_0]] addrspace(3)* @[[PFWI_SHADOW]], i32 0, i32 0), i8* align 1 [[TMP4]], i64 1, i1 false)
; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP]]
; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD]], [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @[[PFWG_SHADOW]]
; CHECK-NEXT: br label [[LEADERMAT]]
; CHECK: LeaderMat:
; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272)
; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @[[PFWG_SHADOW]]
; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD1]], [[STRUCT_BAR]] addrspace(4)** [[TMP]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.foo.0* [[TMP2]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 1 [[TMP5]], i8 addrspace(3)* align 8 getelementptr inbounds (%struct.foo.0, [[STRUCT_FOO_0]] addrspace(3)* @[[PFWI_SHADOW]], i32 0, i32 0), i64 1, i1 false)
; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272)
; CHECK-NEXT: [[WG_VAL_TMP4:%.*]] = load [[STRUCT_ZOT]] addrspace(4)*, [[STRUCT_ZOT]] addrspace(4)* addrspace(3)* @wibbleWG_tmp4
; CHECK-NEXT: call spir_func void @bar(%struct.zot addrspace(4)* [[WG_VAL_TMP4]], %struct.foo.0* byval(%struct.foo.0) align 1 [[TMP2]])
; CHECK-NEXT: ret void
;
bb:
%tmp = alloca %struct.bar addrspace(4)*, align 8
%tmp2 = alloca %struct.foo.0, align 1
store %struct.bar addrspace(4)* %arg, %struct.bar addrspace(4)** %tmp, align 8
%tmp3 = load %struct.bar addrspace(4)*, %struct.bar addrspace(4)** %tmp, align 8
%tmp4 = addrspacecast %struct.zot* %arg1 to %struct.zot addrspace(4)*
call spir_func void @bar(%struct.zot addrspace(4)* %tmp4, %struct.foo.0* byval(%struct.foo.0) align 1 %tmp2)
ret void
}

define internal spir_func void @bar(%struct.zot addrspace(4)* %arg, %struct.foo.0* byval(%struct.foo.0) align 1 %arg1) align 2 !work_item_scope !0 !parallel_for_work_item !0 {
bb:
ret void
}

!0 = !{}
1 change: 1 addition & 0 deletions llvm/tools/bugpoint/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ set(LLVM_LINK_COMPONENTS
ObjCARCOpts
ScalarOpts
Support
SYCLLowerIR
Target
TransformUtils
Vectorize
Expand Down
1 change: 1 addition & 0 deletions llvm/tools/opt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(LLVM_LINK_COMPONENTS
Remarks
ScalarOpts
Support
SYCLLowerIR
Target
TransformUtils
Vectorize
Expand Down
1 change: 1 addition & 0 deletions llvm/tools/opt/opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ int main(int argc, char **argv) {
initializeWriteBitcodePassPass(Registry);
initializeHardwareLoopsPass(Registry);
initializeTypePromotionPass(Registry);
initializeSYCLLowerWGScopeLegacyPassPass(Registry);

#ifdef BUILD_EXAMPLES
initializeExampleIRTransforms(Registry);
Expand Down
3 changes: 0 additions & 3 deletions sycl/test/hier_par/hier_par_basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// TODO: ptxas fatal : Unresolved extern function '__spirv_ControlBarrier'
// XFAIL: cuda

// This test checks hierarchical parallelism invocation APIs, but without any
// data or code with side-effects between the work group and work item scopes.

Expand Down
Loading