@@ -739,3 +739,248 @@ if.else:
739739 %r.4 = tail call fast <5 x float > @llvm.fma.v5f32 (<5 x float > %r.3 , <5 x float > %s4 , <5 x float > %b )
740740 ret <5 x float > %r.4
741741}
742+
743+ declare <8 x half > @llvm.fmuladd.v8f16 (<8 x half >, <8 x half >, <8 x half >)
744+
745+ define <8 x half > @sink_shufflevector_fmuladd_v8f16 (i1 %c , <8 x half > %a , <8 x half > %b ) {
746+ ; NOFP16-LABEL: @sink_shufflevector_fmuladd_v8f16(
747+ ; NOFP16-NEXT: entry:
748+ ; NOFP16-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
749+ ; NOFP16-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
750+ ; NOFP16-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
751+ ; NOFP16-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
752+ ; NOFP16-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
753+ ; NOFP16-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
754+ ; NOFP16-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
755+ ; NOFP16-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
756+ ; NOFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
757+ ; NOFP16: if.then:
758+ ; NOFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
759+ ; NOFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
760+ ; NOFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
761+ ; NOFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
762+ ; NOFP16-NEXT: ret <8 x half> [[R_3]]
763+ ; NOFP16: if.else:
764+ ; NOFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
765+ ; NOFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
766+ ; NOFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
767+ ; NOFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
768+ ; NOFP16-NEXT: ret <8 x half> [[R_7]]
769+ ;
770+ ; FULLFP16-LABEL: @sink_shufflevector_fmuladd_v8f16(
771+ ; FULLFP16-NEXT: entry:
772+ ; FULLFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
773+ ; FULLFP16: if.then:
774+ ; FULLFP16-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
775+ ; FULLFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[B]])
776+ ; FULLFP16-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
777+ ; FULLFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_0]], <8 x half> [[TMP1]], <8 x half> [[B]])
778+ ; FULLFP16-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
779+ ; FULLFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_1]], <8 x half> [[TMP2]], <8 x half> [[B]])
780+ ; FULLFP16-NEXT: [[TMP3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
781+ ; FULLFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_2]], <8 x half> [[TMP3]], <8 x half> [[B]])
782+ ; FULLFP16-NEXT: ret <8 x half> [[R_3]]
783+ ; FULLFP16: if.else:
784+ ; FULLFP16-NEXT: [[TMP4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
785+ ; FULLFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B]], <8 x half> [[TMP4]], <8 x half> [[B]])
786+ ; FULLFP16-NEXT: [[TMP5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
787+ ; FULLFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_4]], <8 x half> [[TMP5]], <8 x half> [[B]])
788+ ; FULLFP16-NEXT: [[TMP6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
789+ ; FULLFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_5]], <8 x half> [[TMP6]], <8 x half> [[B]])
790+ ; FULLFP16-NEXT: [[TMP7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
791+ ; FULLFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_6]], <8 x half> [[TMP7]], <8 x half> [[B]])
792+ ; FULLFP16-NEXT: ret <8 x half> [[R_7]]
793+ ;
794+ entry:
795+ %s0 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > zeroinitializer
796+ %s1 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
797+ %s2 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >
798+ %s3 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 , i32 3 , i32 3 , i32 3 , i32 3 >
799+ %s4 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
800+ %s5 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 5 , i32 5 , i32 5 , i32 5 , i32 5 , i32 5 , i32 5 , i32 5 >
801+ %s6 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 6 , i32 6 , i32 6 , i32 6 , i32 6 , i32 6 , i32 6 , i32 6 >
802+ %s7 = shufflevector <8 x half > %a , <8 x half > poison, <8 x i32 > <i32 7 , i32 7 , i32 7 , i32 7 , i32 7 , i32 7 , i32 7 , i32 7 >
803+ br i1 %c , label %if.then , label %if.else
804+
805+ if.then:
806+ %r.0 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %b , <8 x half > %s0 , <8 x half > %b )
807+ %r.1 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %r.0 , <8 x half > %s1 , <8 x half > %b )
808+ %r.2 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %r.1 , <8 x half > %s2 , <8 x half > %b )
809+ %r.3 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %r.2 , <8 x half > %s3 , <8 x half > %b )
810+ ret <8 x half > %r.3
811+
812+ if.else:
813+ %r.4 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %b , <8 x half > %s4 , <8 x half > %b )
814+ %r.5 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %r.4 , <8 x half > %s5 , <8 x half > %b )
815+ %r.6 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %r.5 , <8 x half > %s6 , <8 x half > %b )
816+ %r.7 = tail call fast <8 x half > @llvm.fmuladd.v8f16 (<8 x half > %r.6 , <8 x half > %s7 , <8 x half > %b )
817+ ret <8 x half > %r.7
818+ }
819+
820+ declare <4 x float > @llvm.fmuladd.v4f32 (<4 x float >, <4 x float >, <4 x float >)
821+
822+ define <4 x float > @sink_shufflevector_fmuladd_v4f32 (i1 %c , <8 x float > %a , <4 x float > %b ) {
823+ ; CHECK-LABEL: @sink_shufflevector_fmuladd_v4f32(
824+ ; CHECK-NEXT: entry:
825+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
826+ ; CHECK: if.then:
827+ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
828+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
829+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
830+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[R_0]], <4 x float> [[TMP1]], <4 x float> [[B]])
831+ ; CHECK-NEXT: ret <4 x float> [[R_1]]
832+ ; CHECK: if.else:
833+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
834+ ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[B]], <4 x float> [[TMP2]], <4 x float> [[B]])
835+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
836+ ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[R_2]], <4 x float> [[TMP3]], <4 x float> [[B]])
837+ ; CHECK-NEXT: ret <4 x float> [[R_3]]
838+ ;
839+ entry:
840+ %s0 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > zeroinitializer
841+ %s1 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
842+ %s2 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 >
843+ %s3 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 >
844+ br i1 %c , label %if.then , label %if.else
845+
846+ if.then:
847+ %r.0 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %b , <4 x float > %s0 , <4 x float > %b )
848+ %r.1 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %r.0 , <4 x float > %s1 , <4 x float > %b )
849+ ret <4 x float > %r.1
850+
851+ if.else:
852+ %r.2 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %b , <4 x float > %s2 , <4 x float > %b )
853+ %r.3 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %r.2 , <4 x float > %s3 , <4 x float > %b )
854+ ret <4 x float > %r.3
855+ }
856+
857+ define <4 x float > @sink_shufflevector_first_arg_fmuladd_v4f3 (i1 %c , <8 x float > %a , <4 x float > %b ) {
858+ ; CHECK-LABEL: @sink_shufflevector_first_arg_fmuladd_v4f3(
859+ ; CHECK-NEXT: entry:
860+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
861+ ; CHECK: if.then:
862+ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
863+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP0]], <4 x float> [[B:%.*]], <4 x float> [[B]])
864+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
865+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> [[R_0]], <4 x float> [[B]])
866+ ; CHECK-NEXT: ret <4 x float> [[R_1]]
867+ ; CHECK: if.else:
868+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
869+ ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> [[B]], <4 x float> [[B]])
870+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
871+ ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP3]], <4 x float> [[R_2]], <4 x float> [[B]])
872+ ; CHECK-NEXT: ret <4 x float> [[R_3]]
873+ ;
874+ entry:
875+ %s0 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > zeroinitializer
876+ %s1 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 >
877+ %s2 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 >
878+ %s3 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 >
879+ br i1 %c , label %if.then , label %if.else
880+
881+ if.then:
882+ %r.0 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %s0 , <4 x float > %b , <4 x float > %b )
883+ %r.1 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %s1 , <4 x float > %r.0 , <4 x float > %b )
884+ ret <4 x float > %r.1
885+
886+ if.else:
887+ %r.2 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %s2 , <4 x float > %b , <4 x float > %b )
888+ %r.3 = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %s3 , <4 x float > %r.2 , <4 x float > %b )
889+ ret <4 x float > %r.3
890+ }
891+
892+
893+
894+ declare <2 x double > @llvm.fmuladd.v2f64 (<2 x double >, <2 x double >, <2 x double >)
895+
896+ define <2 x double > @sink_shufflevector_fmuladd_v2f64 (i1 %c , <2 x double > %a , <2 x double > %b ) {
897+ ; CHECK-LABEL: @sink_shufflevector_fmuladd_v2f64(
898+ ; CHECK-NEXT: entry:
899+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
900+ ; CHECK: if.then:
901+ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
902+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[B:%.*]], <2 x double> [[TMP0]], <2 x double> [[B]])
903+ ; CHECK-NEXT: ret <2 x double> [[R_0]]
904+ ; CHECK: if.else:
905+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
906+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[B]], <2 x double> [[TMP1]], <2 x double> [[B]])
907+ ; CHECK-NEXT: ret <2 x double> [[R_1]]
908+ ;
909+ entry:
910+ %s0 = shufflevector <2 x double > %a , <2 x double > poison, <2 x i32 > zeroinitializer
911+ %s1 = shufflevector <2 x double > %a , <2 x double > poison, <2 x i32 > <i32 1 , i32 1 >
912+ br i1 %c , label %if.then , label %if.else
913+
914+ if.then:
915+ %r.0 = tail call fast <2 x double > @llvm.fmuladd.v2f64 (<2 x double > %b , <2 x double > %s0 , <2 x double > %b )
916+ ret <2 x double > %r.0
917+
918+ if.else:
919+ %r.1 = tail call fast <2 x double > @llvm.fmuladd.v2f64 (<2 x double > %b , <2 x double > %s1 , <2 x double > %b )
920+ ret <2 x double > %r.1
921+ }
922+
923+ define <4 x float > @do_not_sink_out_of_range_shufflevector_fmuladd_v4f32 (i1 %c , <8 x float > %a , <4 x float > %b ) {
924+ ; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fmuladd_v4f32(
925+ ; CHECK-NEXT: entry:
926+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
927+ ; CHECK: if.then:
928+ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
929+ ; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
930+ ; CHECK-NEXT: ret <4 x float> [[R]]
931+ ; CHECK: if.else:
932+ ; CHECK-NEXT: ret <4 x float> zeroinitializer
933+ ;
934+ entry:
935+ %s4 = shufflevector <8 x float > %a , <8 x float > poison, <4 x i32 > <i32 4 , i32 4 , i32 4 , i32 4 >
936+ br i1 %c , label %if.then , label %if.else
937+
938+ if.then:
939+ %r = tail call fast <4 x float > @llvm.fmuladd.v4f32 (<4 x float > %b , <4 x float > %s4 , <4 x float > %b )
940+ ret <4 x float > %r
941+
942+ if.else:
943+ ret <4 x float > zeroinitializer
944+ }
945+
946+ declare <5 x float > @llvm.fmuladd.v5f32 (<5 x float >, <5 x float >, <5 x float >)
947+
948+ define <5 x float > @sink_shufflevector_fmuladd_v5f32 (i1 %c , <8 x float > %a , <5 x float > %b ) {
949+ ; CHECK-LABEL: @sink_shufflevector_fmuladd_v5f32(
950+ ; CHECK-NEXT: entry:
951+ ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
952+ ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
953+ ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
954+ ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
955+ ; CHECK: if.then:
956+ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> zeroinitializer
957+ ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[B:%.*]], <5 x float> [[TMP0]], <5 x float> [[B]])
958+ ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]])
959+ ; CHECK-NEXT: ret <5 x float> [[R_1]]
960+ ; CHECK: if.else:
961+ ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]])
962+ ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]])
963+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
964+ ; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[R_3]], <5 x float> [[TMP1]], <5 x float> [[B]])
965+ ; CHECK-NEXT: ret <5 x float> [[R_4]]
966+ ;
967+ entry:
968+ %s0 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > zeroinitializer
969+ %s1 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 4 >
970+ %s2 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 4 >
971+ %s3 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 3 , i32 3 , i32 3 , i32 3 , i32 4 >
972+ %s4 = shufflevector <8 x float > %a , <8 x float > poison, <5 x i32 > <i32 4 , i32 4 , i32 4 , i32 4 , i32 4 >
973+ br i1 %c , label %if.then , label %if.else
974+
975+ if.then:
976+ %r.0 = tail call fast <5 x float > @llvm.fmuladd.v5f32 (<5 x float > %b , <5 x float > %s0 , <5 x float > %b )
977+ %r.1 = tail call fast <5 x float > @llvm.fmuladd.v5f32 (<5 x float > %r.0 , <5 x float > %s1 , <5 x float > %b )
978+ ret <5 x float > %r.1
979+
980+ if.else:
981+ %r.2 = tail call fast <5 x float > @llvm.fmuladd.v5f32 (<5 x float > %b , <5 x float > %s2 , <5 x float > %b )
982+ %r.3 = tail call fast <5 x float > @llvm.fmuladd.v5f32 (<5 x float > %r.2 , <5 x float > %s3 , <5 x float > %b )
983+ %r.4 = tail call fast <5 x float > @llvm.fmuladd.v5f32 (<5 x float > %r.3 , <5 x float > %s4 , <5 x float > %b )
984+ ret <5 x float > %r.4
985+ }
986+
0 commit comments