Skip to content

Commit 2651d09

Browse files
huaatiantru
authored andcommitted
[llvm][CodeGen] Resolve issues when updating live intervals in window scheduler (#101945)
Corrupted live interval information can cause window scheduling to crash in some cases. By adding the missing MBB's live interval information in the ModuloScheduleExpander, the information can be correctly analyzed in the window scheduler. (cherry picked from commit 43ba109)
1 parent f64404e commit 2651d09

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed

llvm/lib/CodeGen/ModuloSchedule.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
130130
// Generate the prolog instructions that set up the pipeline.
131131
generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
132132
MF.insert(BB->getIterator(), KernelBB);
133+
LIS.insertMBBInMaps(KernelBB);
133134

134135
// Rearrange the instructions to generate the new, pipelined loop,
135136
// and update register names as needed.
@@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
210211
NewBB->transferSuccessors(PredBB);
211212
PredBB->addSuccessor(NewBB);
212213
PredBB = NewBB;
214+
LIS.insertMBBInMaps(NewBB);
213215

214216
// Generate instructions for each appropriate stage. Process instructions
215217
// in original program order.
@@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(
283285

284286
PredBB->replaceSuccessor(LoopExitBB, NewBB);
285287
NewBB->addSuccessor(LoopExitBB);
288+
LIS.insertMBBInMaps(NewBB);
286289

287290
if (EpilogStart == LoopExitBB)
288291
EpilogStart = NewBB;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
# REQUIRES: asserts
2+
#
3+
# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
4+
# RUN: -window-sched=force -filetype=null -window-search-num=100 \
5+
# RUN: -window-search-ratio=100 -window-diff-limit=0 -verify-machineinstrs \
6+
# RUN: 2>&1 | FileCheck %s
7+
8+
# The bug was reported at https://github.com/llvm/llvm-project/pull/99454.
9+
# It is caused by the corruption of live intervals in certain scenarios.
10+
#
11+
# We check the newly generated MBBs after successful scheduling here.
12+
# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}.
13+
# CHECK: prolog:
14+
# CHECK: bb.5:
15+
# CHECK: New block
16+
# CHECK: bb.6:
17+
# CHECK: epilog:
18+
# CHECK: bb.7:
19+
# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}.
20+
# CHECK: prolog:
21+
# CHECK: bb.8:
22+
# CHECK: New block
23+
# CHECK: bb.9:
24+
# CHECK: epilog:
25+
# CHECK: bb.10:
26+
27+
--- |
28+
target triple = "hexagon"
29+
30+
@_dp_ctrl_calc_tu_temp2_fp = global i64 0
31+
@_dp_ctrl_calc_tu_temp1_fp = global i32 0
32+
@dp_panel_update_tu_timings___trans_tmp_5 = global i64 0
33+
@_dp_ctrl_calc_tu___trans_tmp_8 = global i64 0
34+
35+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
36+
declare i8 @div64_u64_rem(i32, ptr)
37+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
38+
39+
define void @dp_ctrl_calc_tu_parameters() {
40+
if.end.i:
41+
%rem.i11.i = alloca i64, align 8
42+
%rem.i.i = alloca i64, align 8
43+
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %rem.i11.i)
44+
%call.i.i = call i8 @div64_u64_rem(i32 0, ptr nonnull %rem.i11.i)
45+
%conv1.i.i = zext i8 %call.i.i to i64
46+
%rem.promoted.i.i = load i64, ptr %rem.i11.i, align 8
47+
br label %do.body.i.i
48+
49+
do.body.i.i:
50+
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %do.body.i.i ], [ -32, %if.end.i ]
51+
%sub9.i.i = phi i64 [ %rem.promoted.i.i, %if.end.i ], [ %sub8.i.i.7, %do.body.i.i ]
52+
%res_abs.0.i.i = phi i64 [ %conv1.i.i, %if.end.i ], [ %res_abs.1.i.i.7, %do.body.i.i ]
53+
%cmp.not.i.i = icmp ne i64 %sub9.i.i, 0
54+
%sub.i.neg.i = sext i1 %cmp.not.i.i to i64
55+
%sub8.i.i = add i64 %sub9.i.i, %sub.i.neg.i
56+
%0 = shl i64 %res_abs.0.i.i, 2
57+
%1 = select i1 %cmp.not.i.i, i64 2, i64 0
58+
%shl.i.i.5 = or disjoint i64 %0, %1
59+
%cmp.not.i.i.5 = icmp ne i64 %sub8.i.i, 0
60+
%sub.i.neg.i.5 = sext i1 %cmp.not.i.i.5 to i64
61+
%sub8.i.i.5 = add i64 %sub8.i.i, %sub.i.neg.i.5
62+
%or.i.i.5 = zext i1 %cmp.not.i.i.5 to i64
63+
%res_abs.1.i.i.5 = or disjoint i64 %shl.i.i.5, %or.i.i.5
64+
%cmp.not.i.i.6 = icmp ne i64 %sub8.i.i.5, 0
65+
%sub.i.neg.i.6 = sext i1 %cmp.not.i.i.6 to i64
66+
%sub8.i.i.6 = add i64 %sub8.i.i.5, %sub.i.neg.i.6
67+
%2 = shl i64 %res_abs.1.i.i.5, 2
68+
%3 = select i1 %cmp.not.i.i.6, i64 2, i64 0
69+
%shl.i.i.7 = or disjoint i64 %2, %3
70+
%cmp.not.i.i.7 = icmp ne i64 %sub8.i.i.6, 0
71+
%sub.i.neg.i.7 = sext i1 %cmp.not.i.i.7 to i64
72+
%sub8.i.i.7 = add i64 %sub8.i.i.6, %sub.i.neg.i.7
73+
%or.i.i.7 = zext i1 %cmp.not.i.i.7 to i64
74+
%res_abs.1.i.i.7 = or disjoint i64 %shl.i.i.7, %or.i.i.7
75+
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 8
76+
%tobool.not.i.i.7 = icmp eq i32 %lsr.iv.next2, 0
77+
br i1 %tobool.not.i.i.7, label %fec_check.i, label %do.body.i.i
78+
79+
fec_check.i:
80+
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %rem.i11.i)
81+
store i64 %res_abs.1.i.i.7, ptr @_dp_ctrl_calc_tu_temp2_fp, align 8
82+
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %rem.i11.i)
83+
%call.i12.i = call i8 @div64_u64_rem(i32 0, ptr nonnull %rem.i11.i)
84+
%conv1.i13.i = zext i8 %call.i12.i to i64
85+
%rem.promoted.i14.i = load i64, ptr %rem.i11.i, align 8
86+
br label %do.body.i15.i
87+
88+
do.body.i15.i:
89+
%lsr.iv = phi i32 [ %lsr.iv.next, %do.body.i15.i ], [ -32, %fec_check.i ]
90+
%sub9.i16.i = phi i64 [ %rem.promoted.i14.i, %fec_check.i ], [ %sub8.i22.i.7, %do.body.i15.i ]
91+
%res_abs.0.i17.i = phi i64 [ %conv1.i13.i, %fec_check.i ], [ %res_abs.1.i24.i.7, %do.body.i15.i ]
92+
%cmp.not.i20.i = icmp ugt i64 %sub9.i16.i, 999
93+
%sub.i21.neg.i = select i1 %cmp.not.i20.i, i64 -1000, i64 0
94+
%sub8.i22.i = add i64 %sub.i21.neg.i, %sub9.i16.i
95+
%4 = shl i64 %res_abs.0.i17.i, 2
96+
%5 = select i1 %cmp.not.i20.i, i64 2, i64 0
97+
%shl.i19.i.7 = or disjoint i64 %4, %5
98+
%cmp.not.i20.i.7 = icmp ugt i64 %sub8.i22.i, 999
99+
%sub.i21.neg.i.7 = select i1 %cmp.not.i20.i.7, i64 -1000, i64 0
100+
%sub8.i22.i.7 = add i64 %sub.i21.neg.i.7, %sub8.i22.i
101+
%or.i23.i.7 = zext i1 %cmp.not.i20.i.7 to i64
102+
%res_abs.1.i24.i.7 = or disjoint i64 %shl.i19.i.7, %or.i23.i.7
103+
%lsr.iv.next = add nsw i32 %lsr.iv, 8
104+
%tobool.not.i26.i.7 = icmp eq i32 %lsr.iv.next, 0
105+
br i1 %tobool.not.i26.i.7, label %_dp_ctrl_calc_tu.exit, label %do.body.i15.i
106+
107+
_dp_ctrl_calc_tu.exit:
108+
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %rem.i11.i)
109+
%conv.i = trunc i64 %res_abs.1.i24.i.7 to i32
110+
store i32 %conv.i, ptr @_dp_ctrl_calc_tu_temp1_fp, align 4
111+
%conv5.i = and i64 %res_abs.1.i24.i.7, 4294967295
112+
store i64 %conv5.i, ptr @dp_panel_update_tu_timings___trans_tmp_5, align 8
113+
store i64 %res_abs.1.i.i.7, ptr @_dp_ctrl_calc_tu___trans_tmp_8, align 8
114+
ret void
115+
}
116+
117+
...
118+
---
119+
name: dp_ctrl_calc_tu_parameters
120+
tracksRegLiveness: true
121+
stack:
122+
- { id: 0, name: rem.i11.i, type: default, offset: 0, size: 8, alignment: 8}
123+
body: |
124+
bb.0:
125+
successors: %bb.1(0x80000000)
126+
127+
%0:intregs = A2_tfrsi 0
128+
%1:intregs = PS_fi %stack.0.rem.i11.i, 0
129+
%2:intregs = A2_tfrsi 0
130+
%3:doubleregs = A4_combineir 0, %2
131+
%4:doubleregs = L2_loadrd_io %stack.0.rem.i11.i, 0
132+
%5:doubleregs = A2_tfrpi 0
133+
J2_loop0i %bb.1, 4, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
134+
135+
bb.1 (machine-block-address-taken):
136+
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
137+
138+
%6:doubleregs = PHI %4, %bb.0, %7, %bb.1
139+
%8:doubleregs = PHI %3, %bb.0, %9, %bb.1
140+
%10:predregs = C2_cmpeqp %6, %5
141+
%11:intregs = C2_muxii %10, 0, -1
142+
%12:doubleregs = A2_addsp %11, %6
143+
%13:doubleregs = S2_asl_i_p %8, 2
144+
%14:intregs = S2_setbit_i %13.isub_lo, 1
145+
%15:intregs = C2_mux %10, %13.isub_lo, %14
146+
%16:predregs = C2_cmpeqp %12, %5
147+
%17:intregs = C2_muxii %16, 0, -1
148+
%18:doubleregs = A2_addsp %17, %12
149+
%19:intregs = S2_setbit_i %15, 0
150+
%20:intregs = C2_mux %16, %15, %19
151+
%21:predregs = C2_cmpeqp %18, %5
152+
%22:intregs = C2_muxii %21, 0, -1
153+
%23:doubleregs = A2_addsp %22, %18
154+
%24:intregs = S2_asl_i_r %20, 2
155+
%25:intregs = S2_extractu %8.isub_lo, 2, 28
156+
%26:intregs = S2_asl_i_r_or %25, %13.isub_hi, 2
157+
%27:intregs = S2_setbit_i %24, 1
158+
%28:intregs = C2_mux %21, %24, %27
159+
%29:predregs = C2_cmpeqp %23, %5
160+
%30:intregs = C2_muxii %29, 0, -1
161+
%7:doubleregs = A2_addsp %30, %23
162+
%31:intregs = S2_setbit_i %28, 0
163+
%32:intregs = C2_mux %29, %28, %31
164+
%9:doubleregs = REG_SEQUENCE %26, %subreg.isub_hi, %32, %subreg.isub_lo
165+
ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
166+
J2_jump %bb.2, implicit-def dead $pc
167+
168+
bb.2:
169+
successors: %bb.3(0x80000000)
170+
171+
S2_storerdgp @_dp_ctrl_calc_tu_temp2_fp, %9, implicit $gp
172+
%33:intregs = A2_tfrsi 0
173+
%34:intregs = PS_fi %stack.0.rem.i11.i, 0
174+
%35:intregs = A2_tfrsi 0
175+
%36:doubleregs = L2_loadrd_io %stack.0.rem.i11.i, 0
176+
%37:doubleregs = A2_tfrpi 124
177+
%38:intregs = A2_tfrsi -1000
178+
%39:intregs = A2_tfrsi -1
179+
J2_loop0i %bb.3, 4, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
180+
181+
bb.3 (machine-block-address-taken):
182+
successors: %bb.4(0x04000000), %bb.3(0x7c000000)
183+
184+
%40:doubleregs = PHI %36, %bb.2, %41, %bb.3
185+
%42:intregs = PHI %35, %bb.2, %43, %bb.3
186+
%44:intregs = PHI %33, %bb.2, %45, %bb.3
187+
%46:doubleregs = S2_lsr_i_p %40, 3
188+
%47:predregs = C2_cmpgtup %46, %37
189+
%48:intregs = C2_mux %47, %38, %33
190+
%49:intregs = C2_mux %47, %39, %33
191+
%50:doubleregs = REG_SEQUENCE %49, %subreg.isub_hi, %48, %subreg.isub_lo
192+
%51:doubleregs = A2_addp %50, %40
193+
%52:intregs = S2_asl_i_r %42, 2
194+
%53:intregs = S2_extractu %42, 2, 30
195+
%45:intregs = S2_asl_i_r_or %53, %44, 2
196+
%54:intregs = S2_setbit_i %52, 1
197+
%55:intregs = C2_mux %47, %54, %52
198+
%56:doubleregs = S2_lsr_i_p %51, 3
199+
%57:predregs = C2_cmpgtup %56, %37
200+
%58:intregs = C2_mux %57, %38, %33
201+
%59:intregs = C2_mux %57, %39, %33
202+
%60:doubleregs = REG_SEQUENCE %59, %subreg.isub_hi, %58, %subreg.isub_lo
203+
%41:doubleregs = A2_addp %60, %51
204+
%61:intregs = S2_setbit_i %55, 0
205+
%43:intregs = C2_mux %57, %61, %55
206+
ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
207+
J2_jump %bb.4, implicit-def dead $pc
208+
209+
bb.4:
210+
S2_storerigp @_dp_ctrl_calc_tu_temp1_fp, %43, implicit $gp
211+
%62:intregs = A2_tfrsi 0
212+
%63:doubleregs = REG_SEQUENCE %43, %subreg.isub_lo, %62, %subreg.isub_hi
213+
S2_storerdgp @dp_panel_update_tu_timings___trans_tmp_5, %63, implicit $gp
214+
S2_storerdgp @_dp_ctrl_calc_tu___trans_tmp_8, %9, implicit $gp
215+
PS_jmpret $r31, implicit-def dead $pc
216+
217+
...

0 commit comments

Comments
 (0)