Skip to content

Commit 4b835da

Browse files
committed
detect flipped trn miscategorised as SK_InsertSubvector
1 parent ece0baf commit 4b835da

File tree

3 files changed

+8
-9
lines changed

3 files changed

+8
-9
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6113,7 +6113,7 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
61136113
unsigned Unused;
61146114
if (LT.second.isFixedLengthVector() &&
61156115
LT.second.getVectorNumElements() == Mask.size() &&
6116-
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
6116+
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_InsertSubvector) &&
61176117
(isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
61186118
isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
61196119
isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||

llvm/test/Analysis/CostModel/AArch64/shuffle-transpose.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
158158

159159
define <2 x i32> @trn1.v2i32_flipped(<2 x i32> %v0, <2 x i32> %v1) {
160160
; CHECK-LABEL: 'trn1.v2i32_flipped'
161-
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 0>
161+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 0>
162162
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %tmp0
163163
;
164164
%tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 2, i32 0>
@@ -266,7 +266,7 @@ define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
266266

267267
define <2 x float> @trn1.v2f32_flipped(<2 x float> %v0, <2 x float> %v1) {
268268
; CHECK-LABEL: 'trn1.v2f32_flipped'
269-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 2, i32 0>
269+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 2, i32 0>
270270
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %tmp0
271271
;
272272
%tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 2, i32 0>

llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -645,19 +645,18 @@ define i1 @tryMapToRange(ptr %values, ptr %result, <2 x i64> %hi, <2 x i64> %lo)
645645
; CHECK-NEXT: [[S1:%.*]] = sext <2 x i1> [[C1]] to <2 x i64>
646646
; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x i64> [[S1]] to <16 x i8>
647647
; CHECK-NEXT: [[A1:%.*]] = and <16 x i8> [[BC1]], <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
648-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i8> [[A1]], i64 0
649-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i8> [[A1]], i64 8
650648
; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i64> [[L]], [[LO:%.*]]
651649
; CHECK-NEXT: [[S2:%.*]] = sext <2 x i1> [[C2]] to <2 x i64>
652650
; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x i64> [[S2]] to <16 x i8>
653651
; CHECK-NEXT: [[A2:%.*]] = and <16 x i8> [[BC2]], <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
654-
; CHECK-NEXT: [[E3:%.*]] = extractelement <16 x i8> [[A2]], i64 0
655-
; CHECK-NEXT: [[E4:%.*]] = extractelement <16 x i8> [[A2]], i64 8
656652
; CHECK-NEXT: [[REASS_SUB:%.*]] = sub <2 x i64> [[L]], [[LO]]
657653
; CHECK-NEXT: [[ADD_I_I_I_I_I_I:%.*]] = add <2 x i64> [[REASS_SUB]], splat (i64 1)
658654
; CHECK-NEXT: store <2 x i64> [[ADD_I_I_I_I_I_I]], ptr [[RESULT:%.*]], align 8
659-
; CHECK-NEXT: [[O3:%.*]] = or i8 [[TMP4]], [[TMP5]]
660-
; CHECK-NEXT: [[O2:%.*]] = or i8 [[E4]], [[E3]]
655+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A1]], <16 x i8> [[A2]], <2 x i32> <i32 8, i32 24>
656+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A1]], <16 x i8> [[A2]], <2 x i32> <i32 0, i32 16>
657+
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i8> [[TMP1]], [[TMP2]]
658+
; CHECK-NEXT: [[O3:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
659+
; CHECK-NEXT: [[O2:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
661660
; CHECK-NEXT: [[O4:%.*]] = or i8 [[O3]], [[O2]]
662661
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[O4]], 0
663662
; CHECK-NEXT: ret i1 [[C]]

0 commit comments

Comments
 (0)