@@ -64,11 +64,23 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) {
64
64
65
65
void test_builtin_reduce_addf (float4 vf4 , double4 vd4 ) {
66
66
// CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
67
- // CHECK-NEXT: call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[VF4]])
67
+ // CHECK-NEXT: [[ARRF1:%.+]] = extractelement <4 x float> [[VF4]], i64 0
68
+ // CHECK-NEXT: [[ARRF2:%.+]] = extractelement <4 x float> [[VF4]], i64 1
69
+ // CHECK-NEXT: [[ADDF1:%.+]] = fadd float [[ARRF1]], [[ARRF2]]
70
+ // CHECK-NEXT: [[ARRF3:%.+]] = extractelement <4 x float> [[VF4]], i64 2
71
+ // CHECK-NEXT: [[ADDF2:%.+]] = fadd float [[ADDF1]], [[ARRF3]]
72
+ // CHECK-NEXT: [[ARRF4:%.+]] = extractelement <4 x float> [[VF4]], i64 3
73
+ // CHECK-NEXT: [[ADDF3:%.+]] = fadd float [[ADDF2]], [[ARRF4]]
68
74
float r2 = __builtin_reduce_add (vf4 );
69
75
70
76
// CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
71
- // CHECK-NEXT: call double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[VD4]])
77
+ // CHECK-NEXT: [[ARR1:%.+]] = extractelement <4 x double> [[VD4]], i64 0
78
+ // CHECK-NEXT: [[ARR2:%.+]] = extractelement <4 x double> [[VD4]], i64 1
79
+ // CHECK-NEXT: [[ADD1:%.+]] = fadd double [[ARR1]], [[ARR2]]
80
+ // CHECK-NEXT: [[ARR3:%.+]] = extractelement <4 x double> [[VD4]], i64 2
81
+ // CHECK-NEXT: [[ADD2:%.+]] = fadd double [[ADD1]], [[ARR3]]
82
+ // CHECK-NEXT: [[ARR4:%.+]] = extractelement <4 x double> [[VD4]], i64 3
83
+ // CHECK-NEXT: [[ADD3:%.+]] = fadd double [[ADD2]], [[ARR4]]
72
84
double r3 = __builtin_reduce_add (vd4 );
73
85
}
74
86
@@ -96,11 +108,23 @@ void test_builtin_reduce_add(si8 vi1, u4 vu1) {
96
108
97
109
void test_builtin_reduce_mulf (float4 vf4 , double4 vd4 ) {
98
110
// CHECK: [[VF4:%.+]] = load <4 x float>, ptr %vf4.addr, align 16
99
- // CHECK-NEXT: call float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[VF4]])
111
+ // CHECK-NEXT: [[ARRF1:%.+]] = extractelement <4 x float> [[VF4]], i64 0
112
+ // CHECK-NEXT: [[ARRF2:%.+]] = extractelement <4 x float> [[VF4]], i64 1
113
+ // CHECK-NEXT: [[MULF1:%.+]] = fmul float [[ARRF1]], [[ARRF2]]
114
+ // CHECK-NEXT: [[ARRF3:%.+]] = extractelement <4 x float> [[VF4]], i64 2
115
+ // CHECK-NEXT: [[MULF2:%.+]] = fmul float [[MULF1]], [[ARRF3]]
116
+ // CHECK-NEXT: [[ARRF4:%.+]] = extractelement <4 x float> [[VF4]], i64 3
117
+ // CHECK-NEXT: [[MULF3:%.+]] = fmul float [[MULF2]], [[ARRF4]]
100
118
float r2 = __builtin_reduce_mul (vf4 );
101
119
102
120
// CHECK: [[VD4:%.+]] = load <4 x double>, ptr %vd4.addr, align 16
103
- // CHECK-NEXT: call double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> [[VD4]])
121
+ // CHECK-NEXT: [[ARR1:%.+]] = extractelement <4 x double> [[VD4]], i64 0
122
+ // CHECK-NEXT: [[ARR2:%.+]] = extractelement <4 x double> [[VD4]], i64 1
123
+ // CHECK-NEXT: [[MUL1:%.+]] = fmul double [[ARR1]], [[ARR2]]
124
+ // CHECK-NEXT: [[ARR3:%.+]] = extractelement <4 x double> [[VD4]], i64 2
125
+ // CHECK-NEXT: [[MUL2:%.+]] = fmul double [[MUL1]], [[ARR3]]
126
+ // CHECK-NEXT: [[ARR4:%.+]] = extractelement <4 x double> [[VD4]], i64 3
127
+ // CHECK-NEXT: [[MUL3:%.+]] = fmul double [[MUL2]], [[ARR4]]
104
128
double r3 = __builtin_reduce_mul (vd4 );
105
129
}
106
130
0 commit comments