@@ -17,7 +17,7 @@ pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
17
17
#[ allow( improper_ctypes) ]
18
18
extern "C" {
19
19
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fabd.v1f64" ) ]
20
- fn vabd_f64_ ( a : float64x1_t , a : float64x1_t ) -> float64x1_t ;
20
+ fn vabd_f64_ ( a : float64x1_t , b : float64x1_t ) -> float64x1_t ;
21
21
}
22
22
vabd_f64_ ( a, b)
23
23
}
@@ -30,7 +30,7 @@ pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
30
30
#[ allow( improper_ctypes) ]
31
31
extern "C" {
32
32
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fabd.v2f64" ) ]
33
- fn vabdq_f64_ ( a : float64x2_t , a : float64x2_t ) -> float64x2_t ;
33
+ fn vabdq_f64_ ( a : float64x2_t , b : float64x2_t ) -> float64x2_t ;
34
34
}
35
35
vabdq_f64_ ( a, b)
36
36
}
@@ -1087,7 +1087,7 @@ pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
1087
1087
#[ allow( improper_ctypes) ]
1088
1088
extern "C" {
1089
1089
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmax.v1f64" ) ]
1090
- fn vmax_f64_ ( a : float64x1_t , a : float64x1_t ) -> float64x1_t ;
1090
+ fn vmax_f64_ ( a : float64x1_t , b : float64x1_t ) -> float64x1_t ;
1091
1091
}
1092
1092
vmax_f64_ ( a, b)
1093
1093
}
@@ -1100,7 +1100,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
1100
1100
#[ allow( improper_ctypes) ]
1101
1101
extern "C" {
1102
1102
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmax.v2f64" ) ]
1103
- fn vmaxq_f64_ ( a : float64x2_t , a : float64x2_t ) -> float64x2_t ;
1103
+ fn vmaxq_f64_ ( a : float64x2_t , b : float64x2_t ) -> float64x2_t ;
1104
1104
}
1105
1105
vmaxq_f64_ ( a, b)
1106
1106
}
@@ -1113,7 +1113,7 @@ pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
1113
1113
#[ allow( improper_ctypes) ]
1114
1114
extern "C" {
1115
1115
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmin.v1f64" ) ]
1116
- fn vmin_f64_ ( a : float64x1_t , a : float64x1_t ) -> float64x1_t ;
1116
+ fn vmin_f64_ ( a : float64x1_t , b : float64x1_t ) -> float64x1_t ;
1117
1117
}
1118
1118
vmin_f64_ ( a, b)
1119
1119
}
@@ -1126,11 +1126,69 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
1126
1126
#[ allow( improper_ctypes) ]
1127
1127
extern "C" {
1128
1128
#[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.fmin.v2f64" ) ]
1129
- fn vminq_f64_ ( a : float64x2_t , a : float64x2_t ) -> float64x2_t ;
1129
+ fn vminq_f64_ ( a : float64x2_t , b : float64x2_t ) -> float64x2_t ;
1130
1130
}
1131
1131
vminq_f64_ ( a, b)
1132
1132
}
1133
1133
1134
+ /// Calculates the square root of each lane.
1135
+ #[ inline]
1136
+ #[ target_feature( enable = "neon" ) ]
1137
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1138
+ pub unsafe fn vsqrt_f32 ( a : float32x2_t ) -> float32x2_t {
1139
+ simd_fsqrt ( a)
1140
+ }
1141
+
1142
+ /// Calculates the square root of each lane.
1143
+ #[ inline]
1144
+ #[ target_feature( enable = "neon" ) ]
1145
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1146
+ pub unsafe fn vsqrtq_f32 ( a : float32x4_t ) -> float32x4_t {
1147
+ simd_fsqrt ( a)
1148
+ }
1149
+
1150
+ /// Calculates the square root of each lane.
1151
+ #[ inline]
1152
+ #[ target_feature( enable = "neon" ) ]
1153
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1154
+ pub unsafe fn vsqrt_f64 ( a : float64x1_t ) -> float64x1_t {
1155
+ simd_fsqrt ( a)
1156
+ }
1157
+
1158
+ /// Calculates the square root of each lane.
1159
+ #[ inline]
1160
+ #[ target_feature( enable = "neon" ) ]
1161
+ #[ cfg_attr( test, assert_instr( fsqrt) ) ]
1162
+ pub unsafe fn vsqrtq_f64 ( a : float64x2_t ) -> float64x2_t {
1163
+ simd_fsqrt ( a)
1164
+ }
1165
+
1166
+ /// Reciprocal square-root estimate.
1167
+ #[ inline]
1168
+ #[ target_feature( enable = "neon" ) ]
1169
+ #[ cfg_attr( test, assert_instr( frsqrte) ) ]
1170
+ pub unsafe fn vrsqrte_f64 ( a : float64x1_t ) -> float64x1_t {
1171
+ #[ allow( improper_ctypes) ]
1172
+ extern "C" {
1173
+ #[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.frsqrte.v1f64" ) ]
1174
+ fn vrsqrte_f64_ ( a : float64x1_t ) -> float64x1_t ;
1175
+ }
1176
+ vrsqrte_f64_ ( a)
1177
+ }
1178
+
1179
+ /// Reciprocal square-root estimate.
1180
+ #[ inline]
1181
+ #[ target_feature( enable = "neon" ) ]
1182
+ #[ cfg_attr( test, assert_instr( frsqrte) ) ]
1183
+ pub unsafe fn vrsqrteq_f64 ( a : float64x2_t ) -> float64x2_t {
1184
+ #[ allow( improper_ctypes) ]
1185
+ extern "C" {
1186
+ #[ cfg_attr( target_arch = "aarch64" , link_name = "llvm.aarch64.neon.frsqrte.v2f64" ) ]
1187
+ fn vrsqrteq_f64_ ( a : float64x2_t ) -> float64x2_t ;
1188
+ }
1189
+ vrsqrteq_f64_ ( a)
1190
+ }
1191
+
1134
1192
#[ cfg( test) ]
1135
1193
mod test {
1136
1194
use super :: * ;
@@ -2233,4 +2291,52 @@ mod test {
2233
2291
let r: f64x2 = transmute ( vminq_f64 ( transmute ( a) , transmute ( b) ) ) ;
2234
2292
assert_eq ! ( r, e) ;
2235
2293
}
2294
+
2295
+ #[ simd_test( enable = "neon" ) ]
2296
+ unsafe fn test_vsqrt_f32 ( ) {
2297
+ let a: f32x2 = f32x2:: new ( 4.0 , 9.0 ) ;
2298
+ let e: f32x2 = f32x2:: new ( 2.0 , 3.0 ) ;
2299
+ let r: f32x2 = transmute ( vsqrt_f32 ( transmute ( a) ) ) ;
2300
+ assert_eq ! ( r, e) ;
2301
+ }
2302
+
2303
+ #[ simd_test( enable = "neon" ) ]
2304
+ unsafe fn test_vsqrtq_f32 ( ) {
2305
+ let a: f32x4 = f32x4:: new ( 4.0 , 9.0 , 16.0 , 25.0 ) ;
2306
+ let e: f32x4 = f32x4:: new ( 2.0 , 3.0 , 4.0 , 5.0 ) ;
2307
+ let r: f32x4 = transmute ( vsqrtq_f32 ( transmute ( a) ) ) ;
2308
+ assert_eq ! ( r, e) ;
2309
+ }
2310
+
2311
+ #[ simd_test( enable = "neon" ) ]
2312
+ unsafe fn test_vsqrt_f64 ( ) {
2313
+ let a: f64 = 4.0 ;
2314
+ let e: f64 = 2.0 ;
2315
+ let r: f64 = transmute ( vsqrt_f64 ( transmute ( a) ) ) ;
2316
+ assert_eq ! ( r, e) ;
2317
+ }
2318
+
2319
+ #[ simd_test( enable = "neon" ) ]
2320
+ unsafe fn test_vsqrtq_f64 ( ) {
2321
+ let a: f64x2 = f64x2:: new ( 4.0 , 9.0 ) ;
2322
+ let e: f64x2 = f64x2:: new ( 2.0 , 3.0 ) ;
2323
+ let r: f64x2 = transmute ( vsqrtq_f64 ( transmute ( a) ) ) ;
2324
+ assert_eq ! ( r, e) ;
2325
+ }
2326
+
2327
+ #[ simd_test( enable = "neon" ) ]
2328
+ unsafe fn test_vrsqrte_f64 ( ) {
2329
+ let a: f64 = 1.0 ;
2330
+ let e: f64 = 0.998046875 ;
2331
+ let r: f64 = transmute ( vrsqrte_f64 ( transmute ( a) ) ) ;
2332
+ assert_eq ! ( r, e) ;
2333
+ }
2334
+
2335
+ #[ simd_test( enable = "neon" ) ]
2336
+ unsafe fn test_vrsqrteq_f64 ( ) {
2337
+ let a: f64x2 = f64x2:: new ( 1.0 , 2.0 ) ;
2338
+ let e: f64x2 = f64x2:: new ( 0.998046875 , 0.705078125 ) ;
2339
+ let r: f64x2 = transmute ( vrsqrteq_f64 ( transmute ( a) ) ) ;
2340
+ assert_eq ! ( r, e) ;
2341
+ }
2236
2342
}
0 commit comments