@@ -51,40 +51,38 @@ entry:
51
51
; CHECK: define internal void @nanos6_ol_task_region_foo0(%nanos6_task_args_foo0* %task_args, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table) {
52
52
; CHECK-NEXT: entry:
53
53
; CHECK-NEXT: %gep_n.addr = getelementptr %nanos6_task_args_foo0, %nanos6_task_args_foo0* %task_args, i32 0, i32 0
54
- ; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr
54
+ ; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr, align 8
55
55
; CHECK-NEXT: %gep_vla = getelementptr %nanos6_task_args_foo0, %nanos6_task_args_foo0* %task_args, i32 0, i32 1
56
- ; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla
56
+ ; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla, align 8
57
57
; CHECK-NEXT: %capt_gep = getelementptr %nanos6_task_args_foo0, %nanos6_task_args_foo0* %task_args, i32 0, i32 2
58
- ; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep
58
+ ; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep, align 8
59
59
; CHECK-NEXT: %0 = call %struct._depend_unpack_t @compute_dep(i32* %load_gep_n.addr)
60
60
; CHECK-NEXT: %1 = extractvalue %struct._depend_unpack_t %0, 0
61
- ; CHECK-NEXT: %2 = alloca i32*
62
61
; CHECK-NEXT: %local_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 0
63
- ; CHECK-NEXT: %3 = load i64, i64* %local_lookup_n.addr
62
+ ; CHECK-NEXT: %2 = load i64, i64* %local_lookup_n.addr, align 8
64
63
; CHECK-NEXT: %device_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 1
65
- ; CHECK-NEXT: %4 = load i64, i64* %device_lookup_n.addr
66
- ; CHECK-NEXT: %5 = bitcast i32* %1 to i8*
67
- ; CHECK-NEXT: %6 = sub i64 0, %3
68
- ; CHECK-NEXT: %7 = getelementptr i8, i8* %5, i64 %6
69
- ; CHECK-NEXT: %8 = getelementptr i8, i8* %7, i64 %4
70
- ; CHECK-NEXT: %9 = bitcast i8* %8 to i32*
71
- ; CHECK-NEXT: store i32* %9, i32** %2
72
- ; CHECK-NEXT: %10 = load i32*, i32** %2
73
- ; CHECK-NEXT: %11 = call %struct._depend_unpack_t.0 @compute_dep.1(i32* %load_gep_vla, i64 %load_capt_gep)
74
- ; CHECK-NEXT: %12 = extractvalue %struct._depend_unpack_t.0 %11, 0
75
- ; CHECK-NEXT: %13 = alloca i32*
64
+ ; CHECK-NEXT: %3 = load i64, i64* %device_lookup_n.addr, align 8
65
+ ; CHECK-NEXT: %4 = bitcast i32* %1 to i8*
66
+ ; CHECK-NEXT: %5 = sub i64 0, %2
67
+ ; CHECK-NEXT: %6 = getelementptr i8, i8* %4, i64 %5
68
+ ; CHECK-NEXT: %7 = getelementptr i8, i8* %6, i64 %3
69
+ ; CHECK-NEXT: %8 = bitcast i8* %7 to i32*
70
+ ; CHECK-NEXT: store i32* %8, i32** %gep_n.addr, align 8
71
+ ; CHECK-NEXT: %9 = load i32*, i32** %gep_n.addr, align 8
72
+ ; CHECK-NEXT: %10 = call %struct._depend_unpack_t.0 @compute_dep.1(i32* %load_gep_vla, i64 %load_capt_gep)
73
+ ; CHECK-NEXT: %11 = extractvalue %struct._depend_unpack_t.0 %10, 0
76
74
; CHECK-NEXT: %local_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 0
77
- ; CHECK-NEXT: %14 = load i64, i64* %local_lookup_vla
75
+ ; CHECK-NEXT: %12 = load i64, i64* %local_lookup_vla, align 8
78
76
; CHECK-NEXT: %device_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 1
79
- ; CHECK-NEXT: %15 = load i64, i64* %device_lookup_vla
80
- ; CHECK-NEXT: %16 = bitcast i32* %12 to i8*
81
- ; CHECK-NEXT: %17 = sub i64 0, %14
82
- ; CHECK-NEXT: %18 = getelementptr i8, i8* %16 , i64 %17
83
- ; CHECK-NEXT: %19 = getelementptr i8, i8* %18 , i64 %15
84
- ; CHECK-NEXT: %20 = bitcast i8* %19 to i32*
85
- ; CHECK-NEXT: store i32* %20 , i32** %13
86
- ; CHECK-NEXT: %21 = load i32*, i32** %13
87
- ; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo0(i32* %10 , i32* %21 , i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
77
+ ; CHECK-NEXT: %13 = load i64, i64* %device_lookup_vla, align 8
78
+ ; CHECK-NEXT: %14 = bitcast i32* %11 to i8*
79
+ ; CHECK-NEXT: %15 = sub i64 0, %12
80
+ ; CHECK-NEXT: %16 = getelementptr i8, i8* %14 , i64 %15
81
+ ; CHECK-NEXT: %17 = getelementptr i8, i8* %16 , i64 %13
82
+ ; CHECK-NEXT: %18 = bitcast i8* %17 to i32*
83
+ ; CHECK-NEXT: store i32* %18 , i32** %gep_vla, align 8
84
+ ; CHECK-NEXT: %19 = load i32*, i32** %gep_vla, align 8
85
+ ; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo0(i32* %9 , i32* %19 , i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
88
86
; CHECK-NEXT: ret void
89
87
; CHECK-NEXT: }
90
88
@@ -137,7 +135,7 @@ entry:
137
135
arrayctor.loop: ; preds = %arrayctor.loop, %entry
138
136
%arrayctor.dst.cur = phi i32* [ %3 , %entry ], [ %arrayctor.dst.next , %arrayctor.loop ]
139
137
%arrayctor.src.cur = phi i32* [ %4 , %entry ], [ %arrayctor.src.next , %arrayctor.loop ]
140
- store i32 0 , i32* %3 , align 4
138
+ store i32 0 , i32* %arrayctor.dst.cur , align 4
141
139
%arrayctor.dst.next = getelementptr inbounds i32 , i32* %arrayctor.dst.cur , i64 1
142
140
%arrayctor.src.next = getelementptr inbounds i32 , i32* %arrayctor.src.cur , i64 1
143
141
%arrayctor.done = icmp eq i32* %arrayctor.dst.next , %arrayctor.dst.end
@@ -237,40 +235,38 @@ entry:
237
235
; CHECK: define internal void @nanos6_ol_task_region_foo10(%nanos6_task_args_foo10* %task_args, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table) {
238
236
; CHECK-NEXT: entry:
239
237
; CHECK-NEXT: %gep_n.addr = getelementptr %nanos6_task_args_foo10, %nanos6_task_args_foo10* %task_args, i32 0, i32 0
240
- ; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr
238
+ ; CHECK-NEXT: %load_gep_n.addr = load i32*, i32** %gep_n.addr, align 8
241
239
; CHECK-NEXT: %gep_vla = getelementptr %nanos6_task_args_foo10, %nanos6_task_args_foo10* %task_args, i32 0, i32 1
242
- ; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla
240
+ ; CHECK-NEXT: %load_gep_vla = load i32*, i32** %gep_vla, align 8
243
241
; CHECK-NEXT: %capt_gep = getelementptr %nanos6_task_args_foo10, %nanos6_task_args_foo10* %task_args, i32 0, i32 2
244
- ; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep
242
+ ; CHECK-NEXT: %load_capt_gep = load i64, i64* %capt_gep, align 8
245
243
; CHECK-NEXT: %0 = call %struct._depend_unpack_t.1 @compute_dep.4(i32* %load_gep_n.addr)
246
244
; CHECK-NEXT: %1 = extractvalue %struct._depend_unpack_t.1 %0, 0
247
- ; CHECK-NEXT: %2 = alloca i32*
248
245
; CHECK-NEXT: %local_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 0
249
- ; CHECK-NEXT: %3 = load i64, i64* %local_lookup_n.addr
246
+ ; CHECK-NEXT: %2 = load i64, i64* %local_lookup_n.addr, align 8
250
247
; CHECK-NEXT: %device_lookup_n.addr = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 0, i32 1
251
- ; CHECK-NEXT: %4 = load i64, i64* %device_lookup_n.addr
252
- ; CHECK-NEXT: %5 = bitcast i32* %1 to i8*
253
- ; CHECK-NEXT: %6 = sub i64 0, %3
254
- ; CHECK-NEXT: %7 = getelementptr i8, i8* %5, i64 %6
255
- ; CHECK-NEXT: %8 = getelementptr i8, i8* %7, i64 %4
256
- ; CHECK-NEXT: %9 = bitcast i8* %8 to i32*
257
- ; CHECK-NEXT: store i32* %9, i32** %2
258
- ; CHECK-NEXT: %10 = load i32*, i32** %2
259
- ; CHECK-NEXT: %11 = call %struct._depend_unpack_t.2 @compute_dep.5(i32* %load_gep_vla, i64 %load_capt_gep)
260
- ; CHECK-NEXT: %12 = extractvalue %struct._depend_unpack_t.2 %11, 0
261
- ; CHECK-NEXT: %13 = alloca i32*
248
+ ; CHECK-NEXT: %3 = load i64, i64* %device_lookup_n.addr, align 8
249
+ ; CHECK-NEXT: %4 = bitcast i32* %1 to i8*
250
+ ; CHECK-NEXT: %5 = sub i64 0, %2
251
+ ; CHECK-NEXT: %6 = getelementptr i8, i8* %4, i64 %5
252
+ ; CHECK-NEXT: %7 = getelementptr i8, i8* %6, i64 %3
253
+ ; CHECK-NEXT: %8 = bitcast i8* %7 to i32*
254
+ ; CHECK-NEXT: store i32* %8, i32** %gep_n.addr, align 8
255
+ ; CHECK-NEXT: %9 = load i32*, i32** %gep_n.addr, align 8
256
+ ; CHECK-NEXT: %10 = call %struct._depend_unpack_t.2 @compute_dep.5(i32* %load_gep_vla, i64 %load_capt_gep)
257
+ ; CHECK-NEXT: %11 = extractvalue %struct._depend_unpack_t.2 %10, 0
262
258
; CHECK-NEXT: %local_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 0
263
- ; CHECK-NEXT: %14 = load i64, i64* %local_lookup_vla
259
+ ; CHECK-NEXT: %12 = load i64, i64* %local_lookup_vla, align 8
264
260
; CHECK-NEXT: %device_lookup_vla = getelementptr %nanos6_address_translation_entry_t, %nanos6_address_translation_entry_t* %address_translation_table, i32 1, i32 1
265
- ; CHECK-NEXT: %15 = load i64, i64* %device_lookup_vla
266
- ; CHECK-NEXT: %16 = bitcast i32* %12 to i8*
267
- ; CHECK-NEXT: %17 = sub i64 0, %14
268
- ; CHECK-NEXT: %18 = getelementptr i8, i8* %16 , i64 %17
269
- ; CHECK-NEXT: %19 = getelementptr i8, i8* %18 , i64 %15
270
- ; CHECK-NEXT: %20 = bitcast i8* %19 to i32*
271
- ; CHECK-NEXT: store i32* %20 , i32** %13
272
- ; CHECK-NEXT: %21 = load i32*, i32** %13
273
- ; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo10(i32* %10 , i32* %21 , i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
261
+ ; CHECK-NEXT: %13 = load i64, i64* %device_lookup_vla, align 8
262
+ ; CHECK-NEXT: %14 = bitcast i32* %11 to i8*
263
+ ; CHECK-NEXT: %15 = sub i64 0, %12
264
+ ; CHECK-NEXT: %16 = getelementptr i8, i8* %14 , i64 %15
265
+ ; CHECK-NEXT: %17 = getelementptr i8, i8* %16 , i64 %13
266
+ ; CHECK-NEXT: %18 = bitcast i8* %17 to i32*
267
+ ; CHECK-NEXT: store i32* %18 , i32** %gep_vla, align 8
268
+ ; CHECK-NEXT: %19 = load i32*, i32** %gep_vla, align 8
269
+ ; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo10(i32* %9 , i32* %19 , i64 %load_capt_gep, i8* %device_env, %nanos6_address_translation_entry_t* %address_translation_table)
274
270
; CHECK-NEXT: ret void
275
271
; CHECK-NEXT: }
276
272
@@ -314,7 +310,7 @@ entry:
314
310
arrayctor.loop: ; preds = %arrayctor.loop, %entry
315
311
%arrayctor.dst.cur = phi i32* [ %3 , %entry ], [ %arrayctor.dst.next , %arrayctor.loop ]
316
312
%arrayctor.src.cur = phi i32* [ %4 , %entry ], [ %arrayctor.src.next , %arrayctor.loop ]
317
- store i32 0 , i32* %3 , align 4
313
+ store i32 0 , i32* %arrayctor.dst.cur , align 4
318
314
%arrayctor.dst.next = getelementptr inbounds i32 , i32* %arrayctor.dst.cur , i64 1
319
315
%arrayctor.src.next = getelementptr inbounds i32 , i32* %arrayctor.src.cur , i64 1
320
316
%arrayctor.done = icmp eq i32* %arrayctor.dst.next , %arrayctor.dst.end
0 commit comments