@@ -52,3 +52,126 @@ define float @ff(ptr %p) {
5252 %sum = fadd float %sum3 , %v4
5353 ret float %sum
5454}
55+
56+ define void @combine_v16i8 (ptr noundef align 16 %ptr1 , ptr noundef align 16 %ptr2 ) {
57+ ; ENABLED-LABEL: combine_v16i8
58+ ; ENABLED: ld.v4.u32
59+ %val0 = load i8 , ptr %ptr1 , align 16
60+ %ptr1.1 = getelementptr inbounds i8 , ptr %ptr1 , i64 1
61+ %val1 = load i8 , ptr %ptr1.1 , align 1
62+ %ptr1.2 = getelementptr inbounds i8 , ptr %ptr1 , i64 2
63+ %val2 = load i8 , ptr %ptr1.2 , align 2
64+ %ptr1.3 = getelementptr inbounds i8 , ptr %ptr1 , i64 3
65+ %val3 = load i8 , ptr %ptr1.3 , align 1
66+ %ptr1.4 = getelementptr inbounds i8 , ptr %ptr1 , i64 4
67+ %val4 = load i8 , ptr %ptr1.4 , align 4
68+ %ptr1.5 = getelementptr inbounds i8 , ptr %ptr1 , i64 5
69+ %val5 = load i8 , ptr %ptr1.5 , align 1
70+ %ptr1.6 = getelementptr inbounds i8 , ptr %ptr1 , i64 6
71+ %val6 = load i8 , ptr %ptr1.6 , align 2
72+ %ptr1.7 = getelementptr inbounds i8 , ptr %ptr1 , i64 7
73+ %val7 = load i8 , ptr %ptr1.7 , align 1
74+ %ptr1.8 = getelementptr inbounds i8 , ptr %ptr1 , i64 8
75+ %val8 = load i8 , ptr %ptr1.8 , align 8
76+ %ptr1.9 = getelementptr inbounds i8 , ptr %ptr1 , i64 9
77+ %val9 = load i8 , ptr %ptr1.9 , align 1
78+ %ptr1.10 = getelementptr inbounds i8 , ptr %ptr1 , i64 10
79+ %val10 = load i8 , ptr %ptr1.10 , align 2
80+ %ptr1.11 = getelementptr inbounds i8 , ptr %ptr1 , i64 11
81+ %val11 = load i8 , ptr %ptr1.11 , align 1
82+ %ptr1.12 = getelementptr inbounds i8 , ptr %ptr1 , i64 12
83+ %val12 = load i8 , ptr %ptr1.12 , align 4
84+ %ptr1.13 = getelementptr inbounds i8 , ptr %ptr1 , i64 13
85+ %val13 = load i8 , ptr %ptr1.13 , align 1
86+ %ptr1.14 = getelementptr inbounds i8 , ptr %ptr1 , i64 14
87+ %val14 = load i8 , ptr %ptr1.14 , align 2
88+ %ptr1.15 = getelementptr inbounds i8 , ptr %ptr1 , i64 15
89+ %val15 = load i8 , ptr %ptr1.15 , align 1
90+ %lane0 = zext i8 %val0 to i32
91+ %lane1 = zext i8 %val1 to i32
92+ %lane2 = zext i8 %val2 to i32
93+ %lane3 = zext i8 %val3 to i32
94+ %lane4 = zext i8 %val4 to i32
95+ %lane5 = zext i8 %val5 to i32
96+ %lane6 = zext i8 %val6 to i32
97+ %lane7 = zext i8 %val7 to i32
98+ %lane8 = zext i8 %val8 to i32
99+ %lane9 = zext i8 %val9 to i32
100+ %lane10 = zext i8 %val10 to i32
101+ %lane11 = zext i8 %val11 to i32
102+ %lane12 = zext i8 %val12 to i32
103+ %lane13 = zext i8 %val13 to i32
104+ %lane14 = zext i8 %val14 to i32
105+ %lane15 = zext i8 %val15 to i32
106+ %red.1 = add i32 %lane0 , %lane1
107+ %red.2 = add i32 %red.1 , %lane2
108+ %red.3 = add i32 %red.2 , %lane3
109+ %red.4 = add i32 %red.3 , %lane4
110+ %red.5 = add i32 %red.4 , %lane5
111+ %red.6 = add i32 %red.5 , %lane6
112+ %red.7 = add i32 %red.6 , %lane7
113+ %red.8 = add i32 %red.7 , %lane8
114+ %red.9 = add i32 %red.8 , %lane9
115+ %red.10 = add i32 %red.9 , %lane10
116+ %red.11 = add i32 %red.10 , %lane11
117+ %red.12 = add i32 %red.11 , %lane12
118+ %red.13 = add i32 %red.12 , %lane13
119+ %red.14 = add i32 %red.13 , %lane14
120+ %red = add i32 %red.14 , %lane15
121+ store i32 %red , ptr %ptr2 , align 4
122+ ret void
123+ }
124+
125+ define void @combine_v8i16 (ptr noundef align 16 %ptr1 , ptr noundef align 16 %ptr2 ) {
126+ ; ENABLED-LABEL: combine_v8i16
127+ ; ENABLED: ld.v4.b32
128+ %val0 = load i16 , ptr %ptr1 , align 16
129+ %ptr1.1 = getelementptr inbounds i16 , ptr %ptr1 , i64 1
130+ %val1 = load i16 , ptr %ptr1.1 , align 2
131+ %ptr1.2 = getelementptr inbounds i16 , ptr %ptr1 , i64 2
132+ %val2 = load i16 , ptr %ptr1.2 , align 4
133+ %ptr1.3 = getelementptr inbounds i16 , ptr %ptr1 , i64 3
134+ %val3 = load i16 , ptr %ptr1.3 , align 2
135+ %ptr1.4 = getelementptr inbounds i16 , ptr %ptr1 , i64 4
136+ %val4 = load i16 , ptr %ptr1.4 , align 4
137+ %ptr1.5 = getelementptr inbounds i16 , ptr %ptr1 , i64 5
138+ %val5 = load i16 , ptr %ptr1.5 , align 2
139+ %ptr1.6 = getelementptr inbounds i16 , ptr %ptr1 , i64 6
140+ %val6 = load i16 , ptr %ptr1.6 , align 4
141+ %ptr1.7 = getelementptr inbounds i16 , ptr %ptr1 , i64 7
142+ %val7 = load i16 , ptr %ptr1.7 , align 2
143+ %lane0 = zext i16 %val0 to i32
144+ %lane1 = zext i16 %val1 to i32
145+ %lane2 = zext i16 %val2 to i32
146+ %lane3 = zext i16 %val3 to i32
147+ %lane4 = zext i16 %val4 to i32
148+ %lane5 = zext i16 %val5 to i32
149+ %lane6 = zext i16 %val6 to i32
150+ %lane7 = zext i16 %val7 to i32
151+ %red.1 = add i32 %lane0 , %lane1
152+ %red.2 = add i32 %red.1 , %lane2
153+ %red.3 = add i32 %red.2 , %lane3
154+ %red.4 = add i32 %red.3 , %lane4
155+ %red.5 = add i32 %red.4 , %lane5
156+ %red.6 = add i32 %red.5 , %lane6
157+ %red = add i32 %red.6 , %lane7
158+ store i32 %red , ptr %ptr2 , align 4
159+ ret void
160+ }
161+
162+ define void @combine_v4i32 (ptr noundef align 16 %ptr1 , ptr noundef align 16 %ptr2 ) {
163+ ; ENABLED-LABEL: combine_v4i32
164+ ; ENABLED: ld.v4.u32
165+ %val0 = load i32 , ptr %ptr1 , align 16
166+ %ptr1.1 = getelementptr inbounds i32 , ptr %ptr1 , i64 1
167+ %val1 = load i32 , ptr %ptr1.1 , align 4
168+ %ptr1.2 = getelementptr inbounds i32 , ptr %ptr1 , i64 2
169+ %val2 = load i32 , ptr %ptr1.2 , align 8
170+ %ptr1.3 = getelementptr inbounds i32 , ptr %ptr1 , i64 3
171+ %val3 = load i32 , ptr %ptr1.3 , align 4
172+ %red.1 = add i32 %val0 , %val1
173+ %red.2 = add i32 %red.1 , %val2
174+ %red = add i32 %red.2 , %val3
175+ store i32 %red , ptr %ptr2 , align 4
176+ ret void
177+ }
0 commit comments