Skip to content

Commit 94385eb

Browse files
committed
Fix llvm#73. Store the translation result into task args. See more.
At this moment we have decided that address translation is going to be performed only for reductions and device dependencies. This is because we only support one indirection translation, so int **p; in([10](p[3])) will not work. Maybe in the future we can add translation for those exprs. that can be translatable, although in smp is unnecessary... Fix reduction init function. Closes llvm#73
1 parent aa297f0 commit 94385eb

File tree

6 files changed

+131
-91
lines changed

6 files changed

+131
-91
lines changed

Diff for: clang/lib/CodeGen/CGOmpSsRuntime.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1820,13 +1820,13 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
18201820
// initializer(omp_priv = ...)
18211821
// initializer(omp_priv(...))
18221822
CGF.EmitExprAsInit(PrivVD->getInit(), PrivVD,
1823-
CGF.MakeAddrLValue(PrivLV.getPointer(CGF), PrivLV.getType(), PrivLV.getAlignment()),
1823+
CGF.MakeAddrLValue(PrivCur, PrivLV.getType(), PrivLV.getAlignment()),
18241824
/*capturedByInit=*/false);
18251825
}
18261826
} else {
18271827
assert(RHSVD->hasInit() && "RHSVD has no initializer");
18281828
CGF.EmitExprAsInit(RHSVD->getInit(), RHSVD,
1829-
CGF.MakeAddrLValue(PrivLV.getPointer(CGF), PrivLV.getType(), PrivLV.getAlignment()),
1829+
CGF.MakeAddrLValue(PrivCur, PrivLV.getType(), PrivLV.getAlignment()),
18301830
/*capturedByInit=*/false);
18311831
}
18321832

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*--------------------------------------------------------------------
2+
(C) Copyright 2006-2013 Barcelona Supercomputing Center
3+
Centro Nacional de Supercomputacion
4+
5+
This file is part of Mercurium C/C++ source-to-source compiler.
6+
7+
See AUTHORS file in the top level directory for information
8+
regarding developers and contributors.
9+
10+
This library is free software; you can redistribute it and/or
11+
modify it under the terms of the GNU Lesser General Public
12+
License as published by the Free Software Foundation; either
13+
version 3 of the License, or (at your option) any later version.
14+
15+
Mercurium C/C++ source-to-source compiler is distributed in the hope
16+
that it will be useful, but WITHOUT ANY WARRANTY; without even the
17+
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
18+
PURPOSE. See the GNU Lesser General Public License for more
19+
details.
20+
21+
You should have received a copy of the GNU Lesser General Public
22+
License along with Mercurium C/C++ source-to-source compiler; if
23+
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
24+
Cambridge, MA 02139, USA.
25+
--------------------------------------------------------------------*/
26+
27+
// RUN: %oss-cxx-compile-and-run
28+
// RUN: %oss-cxx-O2-compile-and-run
29+
// UNSUPPORTED: true
30+
31+
#include <cassert>
32+
33+
#define N 5000
34+
35+
int main() {
36+
int size = N;
37+
int vla[size];
38+
// VLA init
39+
for (int i = 0; i < N; ++i)
40+
vla[i] = 1;
41+
42+
for (int i = 0; i < N; ++i) {
43+
#pragma oss task reduction(+: [size]vla)
44+
{ vla[i]++; }
45+
}
46+
#pragma oss taskwait
47+
// Check
48+
for (int i = 0; i < N; ++i) {
49+
assert(vla[i] == 2);
50+
}
51+
}

Diff for: clang/test/OmpSs/IR/task_reduction.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ void foo(int x) {
1212
// CHECK: %1 = call token @llvm.directive.region.entry() [ "DIR.OSS"([5 x i8] c"TASK\00"), "QUAL.OSS.SHARED"(i32* %x.addr), "QUAL.OSS.DEP.WEAKREDUCTION"(i32 6000, i32* %x.addr, %struct._depend_unpack_t.0 (i32*)* @compute_dep.1, i32* %x.addr), "QUAL.OSS.DEP.REDUCTION.INIT"(i32* %x.addr, void (i32*, i32*, i64)* @red_init), "QUAL.OSS.DEP.REDUCTION.COMBINE"(i32* %x.addr, void (i32*, i32*, i64)* @red_comb) ]
1313

1414
// CHECK: define internal void @red_init(i32* %0, i32* %1, i64 %2)
15-
// CHECK: store i32 0, i32* %3, align 4
15+
// CHECK: store i32 0, i32* %arrayctor.dst.cur, align 4
1616

1717
// CHECK: define internal void @red_comb(i32* %0, i32* %1, i64 %2)
1818
// CHECK: %add = add nsw i32 %7, %8

Diff for: clang/test/OmpSs/IR/task_reduction.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ void foo(int &rx) {
99
// CHECK: %1 = call token @llvm.directive.region.entry() [ "DIR.OSS"([5 x i8] c"TASK\00"), "QUAL.OSS.SHARED"(i32* %0), "QUAL.OSS.DEP.REDUCTION"(i32 6000, i32* %0, %struct._depend_unpack_t (i32*)* @compute_dep, i32* %0), "QUAL.OSS.DEP.REDUCTION.INIT"(i32* %0, void (i32*, i32*, i64)* @red_init), "QUAL.OSS.DEP.REDUCTION.COMBINE"(i32* %0, void (i32*, i32*, i64)* @red_comb) ]
1010

1111
// CHECK: define internal void @red_init(i32* %0, i32* %1, i64 %2)
12-
// CHECK: store i32 0, i32* %3, align 4
12+
// CHECK: store i32 0, i32* %arrayctor.dst.cur, align 4
1313

1414
// CHECK: define internal void @red_comb(i32* %0, i32* %1, i64 %2)
1515
// CHECK: %add = add nsw i32 %7, %8

Diff for: llvm/lib/Transforms/OmpSs/OmpSsTransform.cpp

+26-33
Original file line numberDiff line numberDiff line change
@@ -1009,22 +1009,15 @@ struct OmpSs : public ModulePass {
10091009
return FuncVar;
10101010
}
10111011

1012-
// Build a new storage for the translated reduction
1013-
// returns the storage of the translated reduction
1014-
void translateReductionUnpackedDSA(IRBuilder<> &IRB, const DependInfo *DepInfo,
1015-
Value *DSA, Value *&UnpackedDSA,
1016-
Value *AddrTranslationTable,
1017-
const std::map<Value *, int> &DepSymToIdx) {
1012+
// Rewrites task_args using address_translation
1013+
void translateDep(
1014+
IRBuilder<> &IRB, const DependInfo *DepInfo, Value *DSA,
1015+
Value *&UnpackedDSA, Value *AddrTranslationTable,
1016+
const std::map<Value *, int> &DepSymToIdx) {
1017+
10181018
Function *ComputeDepFun = cast<Function>(DepInfo->ComputeDepFun);
10191019
CallInst *CallComputeDep = IRB.CreateCall(ComputeDepFun, DepInfo->Args);
1020-
llvm::Value *Base = IRB.CreateExtractValue(CallComputeDep, 0);
1021-
1022-
// Save the original type since we are going to cast...
1023-
Type *UnpackedDSAType = UnpackedDSA->getType();
1024-
Type *BaseType = Base->getType();
1025-
1026-
// Storage of the translated DSA
1027-
AllocaInst *UnpackedDSATranslated = IRB.CreateAlloca(BaseType);
1020+
llvm::Value *DepBase = IRB.CreateExtractValue(CallComputeDep, 0);
10281021

10291022
Value *Idx[2];
10301023
Idx[0] = ConstantInt::get(Type::getInt32Ty(IRB.getContext()), DepSymToIdx.at(DSA));
@@ -1039,21 +1032,20 @@ struct OmpSs : public ModulePass {
10391032
DeviceAddr = IRB.CreateLoad(DeviceAddr);
10401033

10411034
// Res = device_addr + (DSA_addr - local_addr)
1042-
Base = IRB.CreateBitCast(Base, Type::getInt8PtrTy(IRB.getContext()));
1043-
UnpackedDSA = IRB.CreateGEP(Base, IRB.CreateNeg(LocalAddr));
1044-
UnpackedDSA = IRB.CreateGEP(UnpackedDSA, DeviceAddr);
1045-
UnpackedDSA = IRB.CreateBitCast(UnpackedDSA, BaseType );
1046-
1047-
IRB.CreateStore(UnpackedDSA, UnpackedDSATranslated);
1048-
1049-
// FIXME: Since we have no info about if we have to pass to unpack a load of the alloca
1050-
// or not, check if the type has changed after call to compute_dep.
1051-
// Pointers -> no load
1052-
// basic types/structs/arrays/vla -> load
1053-
if (UnpackedDSAType == BaseType)
1054-
UnpackedDSA = IRB.CreateLoad(UnpackedDSATranslated);
1055-
else
1056-
UnpackedDSA = UnpackedDSATranslated;
1035+
Value *Translation = IRB.CreateBitCast(DepBase, Type::getInt8PtrTy(IRB.getContext()));
1036+
Translation = IRB.CreateGEP(Translation, IRB.CreateNeg(LocalAddr));
1037+
Translation = IRB.CreateGEP(Translation, DeviceAddr);
1038+
1039+
// Store the translation in task_args
1040+
if (auto *LUnpackedDSA = dyn_cast<LoadInst>(UnpackedDSA)) {
1041+
Translation = IRB.CreateBitCast(Translation, LUnpackedDSA->getType());
1042+
IRB.CreateStore(Translation, LUnpackedDSA->getPointerOperand());
1043+
// Reload what we have translated
1044+
UnpackedDSA = IRB.CreateLoad(LUnpackedDSA->getPointerOperand());
1045+
} else {
1046+
Translation = IRB.CreateBitCast(Translation, UnpackedDSA->getType()->getPointerElementType());
1047+
IRB.CreateStore(Translation, UnpackedDSA);
1048+
}
10571049
}
10581050

10591051
// Given a Outline Function assuming that task args are the first parameter, and
@@ -1147,10 +1139,11 @@ struct OmpSs : public ModulePass {
11471139
SmallVector<Value *, 4> UnpackParamsCopy(UnpackParams);
11481140
for (auto &DepInfo : DependsInfo.List) {
11491141
if (DepInfo->isReduction()) {
1150-
Value *DepBaseDSA = DepInfo->Args[0];
1151-
translateReductionUnpackedDSA(BBBuilder, DepInfo.get(), DepBaseDSA,
1152-
UnpackParams[StructToIdxMap.lookup(DepBaseDSA)],
1153-
AddrTranslationTable, DirInfo.DirEnv.DepSymToIdx);
1142+
Value *DepBaseDSA = DepInfo->Base;
1143+
translateDep(
1144+
BBBuilder, DepInfo.get(), DepBaseDSA,
1145+
UnpackParams[StructToIdxMap.lookup(DepBaseDSA)],
1146+
AddrTranslationTable, DirInfo.DirEnv.DepSymToIdx);
11541147
}
11551148
}
11561149
for (Instruction &I : *BBBuilder.GetInsertBlock()) {

Diff for: llvm/test/Transforms/OmpSs/task_reduction.ll

+50-54
Original file line numberDiff line numberDiff line change
@@ -51,40 +51,38 @@ entry:
5151
; CHECK: define internal void @nanos6_ol_task_region_foo0(%nanos6_task_args_foo0* %task_args, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table) {
5252
; CHECK-NEXT: entry:
5353
; CHECK-NEXT: %gep_n.addr = getelementptr %nanos6_task_args_foo0, %nanos6_task_args_foo0* %task_args, i32 0, i32 0
54-
; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr
54+
; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr, align 8
5555
; CHECK-NEXT: %gep_vla = getelementptr %nanos6_task_args_foo0, %nanos6_task_args_foo0* %task_args, i32 0, i32 1
56-
; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla
56+
; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla, align 8
5757
; CHECK-NEXT: %capt_gep = getelementptr %nanos6_task_args_foo0, %nanos6_task_args_foo0* %task_args, i32 0, i32 2
58-
; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep
58+
; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep, align 8
5959
; CHECK-NEXT: %0 = call %struct._depend_unpack_t @compute_dep(i32* %load_gep_n.addr)
6060
; CHECK-NEXT: %1 = extractvalue %struct._depend_unpack_t %0, 0
61-
; CHECK-NEXT: %2 = alloca i32*
6261
; CHECK-NEXT: %local_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 0
63-
; CHECK-NEXT: %3 = load i64, i64* %local_lookup_n.addr
62+
; CHECK-NEXT: %2 = load i64, i64* %local_lookup_n.addr, align 8
6463
; CHECK-NEXT: %device_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 1
65-
; CHECK-NEXT: %4 = load i64, i64* %device_lookup_n.addr
66-
; CHECK-NEXT: %5 = bitcast i32* %1 to i8*
67-
; CHECK-NEXT: %6 = sub i64 0, %3
68-
; CHECK-NEXT: %7 = getelementptr i8, i8* %5, i64 %6
69-
; CHECK-NEXT: %8 = getelementptr i8, i8* %7, i64 %4
70-
; CHECK-NEXT: %9 = bitcast i8* %8 to i32*
71-
; CHECK-NEXT: store i32* %9, i32** %2
72-
; CHECK-NEXT: %10 = load i32*, i32** %2
73-
; CHECK-NEXT: %11 = call %struct._depend_unpack_t.0 @compute_dep.1(i32* %load_gep_vla, i64 %load_capt_gep)
74-
; CHECK-NEXT: %12 = extractvalue %struct._depend_unpack_t.0 %11, 0
75-
; CHECK-NEXT: %13 = alloca i32*
64+
; CHECK-NEXT: %3 = load i64, i64* %device_lookup_n.addr, align 8
65+
; CHECK-NEXT: %4 = bitcast i32* %1 to i8*
66+
; CHECK-NEXT: %5 = sub i64 0, %2
67+
; CHECK-NEXT: %6 = getelementptr i8, i8* %4, i64 %5
68+
; CHECK-NEXT: %7 = getelementptr i8, i8* %6, i64 %3
69+
; CHECK-NEXT: %8 = bitcast i8* %7 to i32*
70+
; CHECK-NEXT: store i32* %8, i32** %gep_n.addr, align 8
71+
; CHECK-NEXT: %9 = load i32*, i32** %gep_n.addr, align 8
72+
; CHECK-NEXT: %10 = call %struct._depend_unpack_t.0 @compute_dep.1(i32* %load_gep_vla, i64 %load_capt_gep)
73+
; CHECK-NEXT: %11 = extractvalue %struct._depend_unpack_t.0 %10, 0
7674
; CHECK-NEXT: %local_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 0
77-
; CHECK-NEXT: %14 = load i64, i64* %local_lookup_vla
75+
; CHECK-NEXT: %12 = load i64, i64* %local_lookup_vla, align 8
7876
; CHECK-NEXT: %device_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 1
79-
; CHECK-NEXT: %15 = load i64, i64* %device_lookup_vla
80-
; CHECK-NEXT: %16 = bitcast i32* %12 to i8*
81-
; CHECK-NEXT: %17 = sub i64 0, %14
82-
; CHECK-NEXT: %18 = getelementptr i8, i8* %16, i64 %17
83-
; CHECK-NEXT: %19 = getelementptr i8, i8* %18, i64 %15
84-
; CHECK-NEXT: %20 = bitcast i8* %19 to i32*
85-
; CHECK-NEXT: store i32* %20, i32** %13
86-
; CHECK-NEXT: %21 = load i32*, i32** %13
87-
; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo0(i32* %10, i32* %21, i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
77+
; CHECK-NEXT: %13 = load i64, i64* %device_lookup_vla, align 8
78+
; CHECK-NEXT: %14 = bitcast i32* %11 to i8*
79+
; CHECK-NEXT: %15 = sub i64 0, %12
80+
; CHECK-NEXT: %16 = getelementptr i8, i8* %14, i64 %15
81+
; CHECK-NEXT: %17 = getelementptr i8, i8* %16, i64 %13
82+
; CHECK-NEXT: %18 = bitcast i8* %17 to i32*
83+
; CHECK-NEXT: store i32* %18, i32** %gep_vla, align 8
84+
; CHECK-NEXT: %19 = load i32*, i32** %gep_vla, align 8
85+
; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo0(i32* %9, i32* %19, i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
8886
; CHECK-NEXT: ret void
8987
; CHECK-NEXT: }
9088

@@ -137,7 +135,7 @@ entry:
137135
arrayctor.loop: ; preds = %arrayctor.loop, %entry
138136
%arrayctor.dst.cur = phi i32* [ %3, %entry ], [ %arrayctor.dst.next, %arrayctor.loop ]
139137
%arrayctor.src.cur = phi i32* [ %4, %entry ], [ %arrayctor.src.next, %arrayctor.loop ]
140-
store i32 0, i32* %3, align 4
138+
store i32 0, i32* %arrayctor.dst.cur, align 4
141139
%arrayctor.dst.next = getelementptr inbounds i32, i32* %arrayctor.dst.cur, i64 1
142140
%arrayctor.src.next = getelementptr inbounds i32, i32* %arrayctor.src.cur, i64 1
143141
%arrayctor.done = icmp eq i32* %arrayctor.dst.next, %arrayctor.dst.end
@@ -237,40 +235,38 @@ entry:
237235
; CHECK: define internal void @nanos6_ol_task_region_foo10(%nanos6_task_args_foo10* %task_args, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table) {
238236
; CHECK-NEXT: entry:
239237
; CHECK-NEXT: %gep_n.addr = getelementptr %nanos6_task_args_foo10, %nanos6_task_args_foo10* %task_args, i32 0, i32 0
240-
; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr
238+
; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr, align 8
241239
; CHECK-NEXT: %gep_vla = getelementptr %nanos6_task_args_foo10, %nanos6_task_args_foo10* %task_args, i32 0, i32 1
242-
; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla
240+
; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla, align 8
243241
; CHECK-NEXT: %capt_gep = getelementptr %nanos6_task_args_foo10, %nanos6_task_args_foo10* %task_args, i32 0, i32 2
244-
; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep
242+
; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep, align 8
245243
; CHECK-NEXT: %0 = call %struct._depend_unpack_t.1 @compute_dep.4(i32* %load_gep_n.addr)
246244
; CHECK-NEXT: %1 = extractvalue %struct._depend_unpack_t.1 %0, 0
247-
; CHECK-NEXT: %2 = alloca i32*
248245
; CHECK-NEXT: %local_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 0
249-
; CHECK-NEXT: %3 = load i64, i64* %local_lookup_n.addr
246+
; CHECK-NEXT: %2 = load i64, i64* %local_lookup_n.addr, align 8
250247
; CHECK-NEXT: %device_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 1
251-
; CHECK-NEXT: %4 = load i64, i64* %device_lookup_n.addr
252-
; CHECK-NEXT: %5 = bitcast i32* %1 to i8*
253-
; CHECK-NEXT: %6 = sub i64 0, %3
254-
; CHECK-NEXT: %7 = getelementptr i8, i8* %5, i64 %6
255-
; CHECK-NEXT: %8 = getelementptr i8, i8* %7, i64 %4
256-
; CHECK-NEXT: %9 = bitcast i8* %8 to i32*
257-
; CHECK-NEXT: store i32* %9, i32** %2
258-
; CHECK-NEXT: %10 = load i32*, i32** %2
259-
; CHECK-NEXT: %11 = call %struct._depend_unpack_t.2 @compute_dep.5(i32* %load_gep_vla, i64 %load_capt_gep)
260-
; CHECK-NEXT: %12 = extractvalue %struct._depend_unpack_t.2 %11, 0
261-
; CHECK-NEXT: %13 = alloca i32*
248+
; CHECK-NEXT: %3 = load i64, i64* %device_lookup_n.addr, align 8
249+
; CHECK-NEXT: %4 = bitcast i32* %1 to i8*
250+
; CHECK-NEXT: %5 = sub i64 0, %2
251+
; CHECK-NEXT: %6 = getelementptr i8, i8* %4, i64 %5
252+
; CHECK-NEXT: %7 = getelementptr i8, i8* %6, i64 %3
253+
; CHECK-NEXT: %8 = bitcast i8* %7 to i32*
254+
; CHECK-NEXT: store i32* %8, i32** %gep_n.addr, align 8
255+
; CHECK-NEXT: %9 = load i32*, i32** %gep_n.addr, align 8
256+
; CHECK-NEXT: %10 = call %struct._depend_unpack_t.2 @compute_dep.5(i32* %load_gep_vla, i64 %load_capt_gep)
257+
; CHECK-NEXT: %11 = extractvalue %struct._depend_unpack_t.2 %10, 0
262258
; CHECK-NEXT: %local_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 0
263-
; CHECK-NEXT: %14 = load i64, i64* %local_lookup_vla
259+
; CHECK-NEXT: %12 = load i64, i64* %local_lookup_vla, align 8
264260
; CHECK-NEXT: %device_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 1
265-
; CHECK-NEXT: %15 = load i64, i64* %device_lookup_vla
266-
; CHECK-NEXT: %16 = bitcast i32* %12 to i8*
267-
; CHECK-NEXT: %17 = sub i64 0, %14
268-
; CHECK-NEXT: %18 = getelementptr i8, i8* %16, i64 %17
269-
; CHECK-NEXT: %19 = getelementptr i8, i8* %18, i64 %15
270-
; CHECK-NEXT: %20 = bitcast i8* %19 to i32*
271-
; CHECK-NEXT: store i32* %20, i32** %13
272-
; CHECK-NEXT: %21 = load i32*, i32** %13
273-
; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo10(i32* %10, i32* %21, i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
261+
; CHECK-NEXT: %13 = load i64, i64* %device_lookup_vla, align 8
262+
; CHECK-NEXT: %14 = bitcast i32* %11 to i8*
263+
; CHECK-NEXT: %15 = sub i64 0, %12
264+
; CHECK-NEXT: %16 = getelementptr i8, i8* %14, i64 %15
265+
; CHECK-NEXT: %17 = getelementptr i8, i8* %16, i64 %13
266+
; CHECK-NEXT: %18 = bitcast i8* %17 to i32*
267+
; CHECK-NEXT: store i32* %18, i32** %gep_vla, align 8
268+
; CHECK-NEXT: %19 = load i32*, i32** %gep_vla, align 8
269+
; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo10(i32* %9, i32* %19, i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
274270
; CHECK-NEXT: ret void
275271
; CHECK-NEXT: }
276272

@@ -314,7 +310,7 @@ entry:
314310
arrayctor.loop: ; preds = %arrayctor.loop, %entry
315311
%arrayctor.dst.cur = phi i32* [ %3, %entry ], [ %arrayctor.dst.next, %arrayctor.loop ]
316312
%arrayctor.src.cur = phi i32* [ %4, %entry ], [ %arrayctor.src.next, %arrayctor.loop ]
317-
store i32 0, i32* %3, align 4
313+
store i32 0, i32* %arrayctor.dst.cur, align 4
318314
%arrayctor.dst.next = getelementptr inbounds i32, i32* %arrayctor.dst.cur, i64 1
319315
%arrayctor.src.next = getelementptr inbounds i32, i32* %arrayctor.src.cur, i64 1
320316
%arrayctor.done = icmp eq i32* %arrayctor.dst.next, %arrayctor.dst.end

0 commit comments

Comments
 (0)