Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2680,6 +2680,11 @@ def fsycl_footer_path_EQ : Joined<["-"], "fsycl-footer-path=">,
def fno_sycl_link_spirv : Flag<["-"], "fno-sycl-link-spirv">,
Flags<[CoreOption]>, HelpText<"Disable adding of the default (spir64) triple "
"when discovered in user specified objects and archives.">;
def fsycl_max_parallel_jobs_EQ : Joined<["-"], "fsycl-max-parallel-link-jobs=">,
Flags<[CoreOption]>, Group<f_Group>,
HelpText<"Experimental feature. Controls the maximum parallelism of actions performed "
"on SYCL device code post-link, i.e. the generation of SPIR-V device images "
"or AOT compilation of each device image.">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[NoXarchOption,CoreOption,CC1Option,FC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8803,6 +8803,11 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
TCArgs.MakeArgString("--out-file-list=" + OutputFileName));
ForeachArgs.push_back(
TCArgs.MakeArgString("--out-replace=" + OutputFileName));
StringRef ParallelJobs =
TCArgs.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
if (!ParallelJobs.empty())
ForeachArgs.push_back(TCArgs.MakeArgString("--jobs=" + ParallelJobs));

ForeachArgs.push_back(TCArgs.MakeArgString("--"));
ForeachArgs.push_back(TCArgs.MakeArgString(Cmd->getExecutable()));

Expand Down
39 changes: 25 additions & 14 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA,
std::unique_ptr<Command> InputCommand,
const InputInfoList &InputFiles,
const InputInfo &Output, const Tool *T,
StringRef Increment,
StringRef Ext = "out") {
StringRef Increment, StringRef Ext,
StringRef ParallelJobs) {
// Construct llvm-foreach command.
// The llvm-foreach command looks like this:
// llvm-foreach --in-file-list=a.list --in-replace='{}' -- echo '{}'
Expand All @@ -123,6 +123,9 @@ void SYCL::constructLLVMForeachCommand(Compilation &C, const JobAction &JA,
if (!Increment.empty())
ForeachArgs.push_back(
C.getArgs().MakeArgString("--out-increment=" + Increment));
if (!ParallelJobs.empty())
ForeachArgs.push_back(C.getArgs().MakeArgString("--jobs=" + ParallelJobs));

ForeachArgs.push_back(C.getArgs().MakeArgString("--"));
ForeachArgs.push_back(
C.getArgs().MakeArgString(InputCommand->getExecutable()));
Expand Down Expand Up @@ -395,10 +398,12 @@ void SYCL::fpga::BackendCompiler::constructOpenCLAOTCommand(
const char *Exec = C.getArgs().MakeArgString(ExecPath);
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, "", ForeachExt);
else
this, "", ForeachExt, ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down Expand Up @@ -560,10 +565,12 @@ void SYCL::fpga::BackendCompiler::ConstructJob(
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
addFPGATimingDiagnostic(Cmd, C);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, ReportOptArg, ForeachExt);
else
this, ReportOptArg, ForeachExt, ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down Expand Up @@ -599,10 +606,12 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
const char *Exec = C.getArgs().MakeArgString(ExecPath);
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, "");
else
this, "", "out", ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down Expand Up @@ -632,10 +641,12 @@ void SYCL::x86_64::BackendCompiler::ConstructJob(
const char *Exec = C.getArgs().MakeArgString(ExecPath);
auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, None);
if (!ForeachInputs.empty())
if (!ForeachInputs.empty()) {
StringRef ParallelJobs =
Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
this, "");
else
this, "", "out", ParallelJobs);
} else
C.addCommand(std::move(Cmd));
}

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/SYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ void constructLLVMForeachCommand(Compilation &C, const JobAction &JA,
std::unique_ptr<Command> InputCommand,
const InputInfoList &InputFiles,
const InputInfo &Output, const Tool *T,
StringRef Increment, StringRef Ext);
StringRef Increment, StringRef Ext = "out",
StringRef ParallelJobs = "");

// Runs llvm-spirv to convert spirv to bc, llvm-link, which links multiple LLVM
// bitcode. Converts generated bc back to spirv using llvm-spirv, wraps with
Expand Down
16 changes: 16 additions & 0 deletions clang/test/Driver/sycl-offload-with-split.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,22 @@

/// ###########################################################################

/// Check parallel compilation enforcement for split modules when running SPIR-V translation and AOT compilation
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64-unknown-unknown %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=aoc
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fintelfpga -Xshardware %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=aoc
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_gen-unknown-unknown %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=ocloc
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fsycl-max-parallel-link-jobs=4 -fsycl-targets=spir64_x86_64-unknown-unknown %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefixes=CHK-PARALLEL-JOBS,CHK-PARALLEL-JOBS-AOT -DBE_COMPILER=opencl-aot
// CHK-PARALLEL-JOBS: llvm-foreach{{.*}} "--jobs=4" "--" "{{.*}}llvm-spirv{{.*}}"
// CHK-PARALLEL-JOBS-AOT: llvm-foreach{{.*}} "--jobs=4" "--" "{{.*}}[[BE_COMPILER]]{{.*}}

/// ###########################################################################

/// offload with multiple targets, including AOT
// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fsycl-device-code-split -fsycl-targets=spir64-unknown-unknown,spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown -ccc-print-phases %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG %s
Expand Down
9 changes: 9 additions & 0 deletions sycl/doc/UsersManual.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ and not recommended to use in production environment.
* auto - the compiler will use a heuristic to select the best way of
splitting device code. This is default mode.

**`-fsycl-max-parallel-link-jobs=<n>`**

Experimental feature. When specified, it informs the compiler
that it can simultaneously spawn up to `N` processes to perform
actions required to link DPC++ application. This option is
only useful in SYCL mode. It only takes effect if link action
needs to be executed, i.e. it won't have any effect in presence of
options like `-c` or `-E`. Default value of `N` is 1.

**`-f[no-]sycl-device-lib=<lib1>[,<lib2>,...]`**

Enables/disables linking of the device libraries. Supported libraries:
Expand Down