Skip to content

Commit e77143e

Browse files
authored
Merge pull request intel#162 from frasercrmck/masked-group-ops
[vecz] Don't mask work-group collective operations
2 parents 2d3a772 + fbd39f6 commit e77143e

File tree

2 files changed

+59
-1
lines changed

2 files changed

+59
-1
lines changed

llvm/lib/SYCLNativeCPUUtils/compiler_passes/vecz/source/transform/control_flow_conversion_pass.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,8 @@ bool ControlFlowConversionState::Impl::applyMaskToCall(CallInst *CI,
12921292
}
12931293

12941294
// Builtins without side effects do not need to be masked.
1295-
auto const props = Ctx.builtins().analyzeBuiltin(*callee).properties;
1295+
auto const builtin = Ctx.builtins().analyzeBuiltin(*callee);
1296+
auto const props = builtin.properties;
12961297
if (props & compiler::utils::eBuiltinPropertyNoSideEffects) {
12971298
LLVM_DEBUG(dbgs() << "vecz-cf: Called function is an pure builtin\n");
12981299
return true;
@@ -1313,6 +1314,18 @@ bool ControlFlowConversionState::Impl::applyMaskToCall(CallInst *CI,
13131314
dbgs() << "vecz-cf: Called function does not have any side-effects\n");
13141315
return true;
13151316
}
1317+
// We don't want to mask work-group collective builtins, because they are
1318+
// barriers (see above). This should actually be a rare situation, as these
1319+
// builtins are required to be uniform/convergent and so either all
1320+
// work-items or no work-items should hit them. Most of the time, this
1321+
// situation relies on the vectorizer failing to trace the branch flow and
1322+
// failing to realize the conditions are in fact uniform.
1323+
if (auto info = Ctx.builtins().isMuxGroupCollective(builtin.ID);
1324+
info && info->isWorkGroupScope()) {
1325+
LLVM_DEBUG(
1326+
dbgs() << "vecz-cf: Called function is a work-group collective\n");
1327+
return true;
1328+
}
13161329

13171330
// Create the new function and replace the old one with it
13181331
CallInst *newCI = emitMaskedVersion(CI, mask);
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; Copyright (C) Codeplay Software Limited
2+
;
3+
; Licensed under the Apache License, Version 2.0 (the "License") with LLVM
4+
; Exceptions; you may not use this file except in compliance with the License.
5+
; You may obtain a copy of the License at
6+
;
7+
; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
8+
;
9+
; Unless required by applicable law or agreed to in writing, software
10+
; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11+
; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12+
; License for the specific language governing permissions and limitations
13+
; under the License.
14+
;
15+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
16+
17+
; RUN: veczc -vecz-passes="cfg-convert" -vecz-simd-width=4 -S < %s | FileCheck %s
18+
19+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
20+
target triple = "spir64-unknown-unknown"
21+
22+
declare i64 @__mux_get_local_id()
23+
declare i32 @__mux_work_group_scan_inclusive_smax_i32(i32, i32)
24+
25+
; CHECK-LABEL: define spir_kernel void @__vecz_v4_foo()
26+
; CHECK-NOT: @__vecz_b_masked___mux_work_group_scan_inclusive_smax_i32
27+
define spir_kernel void @foo() {
28+
entry:
29+
%0 = call i64 @__mux_get_local_id()
30+
br i1 false, label %for.body.i11, label %if.end.i105.i
31+
32+
for.body.i11:
33+
%1 = icmp slt i64 %0, 0
34+
br i1 %1, label %if.end.i13, label %if.end.i13
35+
36+
if.end.i13:
37+
br i1 false, label %exit, label %if.end.i105.i
38+
39+
if.end.i105.i:
40+
%2 = call i32 @__mux_work_group_scan_inclusive_smax_i32(i32 0, i32 0)
41+
br label %exit
42+
43+
exit:
44+
ret void
45+
}

0 commit comments

Comments
 (0)