@@ -763,6 +763,75 @@ define <16 x i8> @combine_and_pshufb_or_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
763763 ret <16 x i8 > %4
764764}
765765
766+ define <16 x i8 > @combine_lshr_pshufb (<4 x i32 > %a0 ) {
767+ ; SSE-LABEL: combine_lshr_pshufb:
768+ ; SSE: # %bb.0:
769+ ; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
770+ ; SSE-NEXT: retq
771+ ;
772+ ; AVX1-LABEL: combine_lshr_pshufb:
773+ ; AVX1: # %bb.0:
774+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
775+ ; AVX1-NEXT: retq
776+ ;
777+ ; AVX2-LABEL: combine_lshr_pshufb:
778+ ; AVX2: # %bb.0:
779+ ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
780+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
781+ ; AVX2-NEXT: retq
782+ ;
783+ ; AVX512F-LABEL: combine_lshr_pshufb:
784+ ; AVX512F: # %bb.0:
785+ ; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
787+ ; AVX512F-NEXT: retq
788+ %shr = lshr <4 x i32 > %a0 , <i32 24 , i32 0 , i32 8 , i32 16 >
789+ %bc = bitcast <4 x i32 > %shr to <16 x i8 >
790+ %shuffle = shufflevector <16 x i8 > %bc , <16 x i8 > poison, <16 x i32 > <i32 1 , i32 2 , i32 3 , i32 0 , i32 5 , i32 6 , i32 7 , i32 4 , i32 9 , i32 10 , i32 11 , i32 8 , i32 12 , i32 13 , i32 14 , i32 15 >
791+ ret <16 x i8 > %shuffle
792+ }
793+
794+ define <16 x i8 > @combine_shl_pshufb (<4 x i32 > %a0 ) {
795+ ; SSSE3-LABEL: combine_shl_pshufb:
796+ ; SSSE3: # %bb.0:
797+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798+ ; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
799+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
800+ ; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
801+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
802+ ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
803+ ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
804+ ; SSSE3-NEXT: retq
805+ ;
806+ ; SSE41-LABEL: combine_shl_pshufb:
807+ ; SSE41: # %bb.0:
808+ ; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
809+ ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
810+ ; SSE41-NEXT: retq
811+ ;
812+ ; AVX1-LABEL: combine_shl_pshufb:
813+ ; AVX1: # %bb.0:
814+ ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
815+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
816+ ; AVX1-NEXT: retq
817+ ;
818+ ; AVX2-LABEL: combine_shl_pshufb:
819+ ; AVX2: # %bb.0:
820+ ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
821+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
822+ ; AVX2-NEXT: retq
823+ ;
824+ ; AVX512F-LABEL: combine_shl_pshufb:
825+ ; AVX512F: # %bb.0:
826+ ; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
827+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
828+ ; AVX512F-NEXT: retq
829+ %shr = shl <4 x i32 > %a0 , <i32 0 , i32 8 , i32 16 , i32 16 >
830+ %bc = bitcast <4 x i32 > %shr to <16 x i8 >
831+ %shuffle = shufflevector <16 x i8 > %bc , <16 x i8 > poison, <16 x i32 > <i32 1 , i32 2 , i32 3 , i32 0 , i32 5 , i32 6 , i32 7 , i32 4 , i32 9 , i32 10 , i32 11 , i32 8 , i32 12 , i32 13 , i32 14 , i32 15 >
832+ ret <16 x i8 > %shuffle
833+ }
834+
766835define <16 x i8 > @constant_fold_pshufb () {
767836; SSE-LABEL: constant_fold_pshufb:
768837; SSE: # %bb.0:
0 commit comments