@@ -587,13 +587,11 @@ define <4 x float> @asin_4x(ptr %a) {
587587; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 
588588; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) 
589589; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 
590- ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 
591- ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) 
592- ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 
593- ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 
594- ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) 
595- ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 
596- ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]] 
590+ ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 
591+ ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]]) 
592+ ; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 
593+ ; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 
594+ ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]] 
597595; 
598596entry:
599597  %0  = load  <4  x float >, ptr  %a , align  16 
@@ -666,13 +664,11 @@ define <4 x float> @acos_4x(ptr %a) {
666664; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 
667665; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) 
668666; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 
669- ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 
670- ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) 
671- ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 
672- ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 
673- ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) 
674- ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 
675- ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]] 
667+ ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 
668+ ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]]) 
669+ ; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 
670+ ; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 
671+ ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]] 
676672; 
677673entry:
678674  %0  = load  <4  x float >, ptr  %a , align  16 
@@ -745,13 +741,11 @@ define <4 x float> @atan_4x(ptr %a) {
745741; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 
746742; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) 
747743; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 
748- ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 
749- ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) 
750- ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 
751- ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 
752- ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) 
753- ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 
754- ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]] 
744+ ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 
745+ ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]]) 
746+ ; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 
747+ ; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 
748+ ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]] 
755749; 
756750entry:
757751  %0  = load  <4  x float >, ptr  %a , align  16 
@@ -824,13 +818,11 @@ define <4 x float> @sinh_4x(ptr %a) {
824818; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 
825819; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) 
826820; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 
827- ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 
828- ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) 
829- ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 
830- ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 
831- ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) 
832- ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 
833- ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]] 
821+ ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 
822+ ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]]) 
823+ ; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 
824+ ; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 
825+ ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]] 
834826; 
835827entry:
836828  %0  = load  <4  x float >, ptr  %a , align  16 
@@ -903,13 +895,11 @@ define <4 x float> @cosh_4x(ptr %a) {
903895; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 
904896; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) 
905897; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 
906- ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 
907- ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) 
908- ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 
909- ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 
910- ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) 
911- ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 
912- ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]] 
898+ ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 
899+ ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]]) 
900+ ; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 
901+ ; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 
902+ ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]] 
913903; 
914904entry:
915905  %0  = load  <4  x float >, ptr  %a , align  16 
@@ -982,13 +972,11 @@ define <4 x float> @tanh_4x(ptr %a) {
982972; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 
983973; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) 
984974; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 
985- ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 
986- ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) 
987- ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 
988- ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 
989- ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) 
990- ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 
991- ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]] 
975+ ; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3> 
976+ ; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]]) 
977+ ; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 
978+ ; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 
979+ ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]] 
992980; 
993981entry:
994982  %0  = load  <4  x float >, ptr  %a , align  16 
0 commit comments