Skip to content

Commit 0353c2c

Browse files
committed
[InstCombine] fold shuffles with FP<->Int cast operands
shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask) This is similar to a recent transform with fneg ( b331a7e ), but this is intentionally the most conservative first step to try to avoid regressions in codegen. There are several restrictions that could be removed as follow-up enhancements. Note that a cast with a unary shuffle is currently canonicalized in the other direction (shuffle after cast - D103038 ). We might want to invert that to be consistent with this patch.
1 parent aca3329 commit 0353c2c

File tree

2 files changed

+75
-12
lines changed

2 files changed

+75
-12
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

+51
Original file line numberDiff line numberDiff line change
@@ -2275,6 +2275,54 @@ static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
22752275
return nullptr;
22762276
}
22772277

2278+
/// Canonicalize casts after shuffle.
2279+
static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
2280+
InstCombiner::BuilderTy &Builder) {
2281+
// Do we have 2 matching cast operands?
2282+
auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2283+
auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2284+
if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2285+
Cast0->getSrcTy() != Cast1->getSrcTy())
2286+
return nullptr;
2287+
2288+
// TODO: Allow other opcodes? That would require easing the type restrictions
2289+
// below here.
2290+
CastInst::CastOps CastOpcode = Cast0->getOpcode();
2291+
switch (CastOpcode) {
2292+
case Instruction::FPToSI:
2293+
case Instruction::FPToUI:
2294+
case Instruction::SIToFP:
2295+
case Instruction::UIToFP:
2296+
break;
2297+
default:
2298+
return nullptr;
2299+
}
2300+
2301+
VectorType *ShufTy = Shuf.getType();
2302+
VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2303+
VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2304+
2305+
// TODO: Allow length-changing shuffles?
2306+
if (ShufTy != ShufOpTy)
2307+
return nullptr;
2308+
2309+
// TODO: Allow element-size-changing casts?
2310+
assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2311+
"Expected fixed vector operands for casts and binary shuffle");
2312+
if (CastSrcTy->getPrimitiveSizeInBits() != ShufOpTy->getPrimitiveSizeInBits())
2313+
return nullptr;
2314+
2315+
// At least one of the operands must have only one use (the shuffle).
2316+
if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2317+
return nullptr;
2318+
2319+
// shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2320+
Value *X = Cast0->getOperand(0);
2321+
Value *Y = Cast1->getOperand(0);
2322+
Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2323+
return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2324+
}
2325+
22782326
/// Try to fold an extract subvector operation.
22792327
static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
22802328
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
@@ -2573,6 +2621,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
25732621
if (Instruction *I = foldFNegShuffle(SVI, Builder))
25742622
return I;
25752623

2624+
if (Instruction *I = foldCastShuffle(SVI, Builder))
2625+
return I;
2626+
25762627
APInt UndefElts(VWidth, 0);
25772628
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
25782629
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {

llvm/test/Transforms/InstCombine/vec_shuffle.ll

+24-12
Original file line numberDiff line numberDiff line change
@@ -2038,9 +2038,8 @@ define <2 x half> @uitofp_unary_shuf_narrow_narrow_elts(<4 x i32> %x) {
20382038

20392039
define <4 x i32> @fptosi_shuf(<4 x float> %x, <4 x float> %y) {
20402040
; CHECK-LABEL: @fptosi_shuf(
2041-
; CHECK-NEXT: [[NX:%.*]] = fptosi <4 x float> [[X:%.*]] to <4 x i32>
2042-
; CHECK-NEXT: [[NY:%.*]] = fptosi <4 x float> [[Y:%.*]] to <4 x i32>
2043-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[NX]], <4 x i32> [[NY]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
2041+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
2042+
; CHECK-NEXT: [[R:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
20442043
; CHECK-NEXT: ret <4 x i32> [[R]]
20452044
;
20462045
%nx = fptosi <4 x float> %x to <4 x i32>
@@ -2051,9 +2050,8 @@ define <4 x i32> @fptosi_shuf(<4 x float> %x, <4 x float> %y) {
20512050

20522051
define <3 x i16> @fptoui_shuf(<3 x half> %x, <3 x half> %y) {
20532052
; CHECK-LABEL: @fptoui_shuf(
2054-
; CHECK-NEXT: [[NX:%.*]] = fptoui <3 x half> [[X:%.*]] to <3 x i16>
2055-
; CHECK-NEXT: [[NY:%.*]] = fptoui <3 x half> [[Y:%.*]] to <3 x i16>
2056-
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i16> [[NX]], <3 x i16> [[NY]], <3 x i32> <i32 0, i32 undef, i32 4>
2053+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x i32> <i32 0, i32 undef, i32 4>
2054+
; CHECK-NEXT: [[R:%.*]] = fptoui <3 x half> [[TMP1]] to <3 x i16>
20572055
; CHECK-NEXT: ret <3 x i16> [[R]]
20582056
;
20592057
%nx = fptoui <3 x half> %x to <3 x i16>
@@ -2062,6 +2060,8 @@ define <3 x i16> @fptoui_shuf(<3 x half> %x, <3 x half> %y) {
20622060
ret <3 x i16> %r
20632061
}
20642062

2063+
; negative test - must have same source types
2064+
20652065
define <3 x i16> @fptoui_shuf_different_source_types(<3 x float> %x, <3 x half> %y) {
20662066
; CHECK-LABEL: @fptoui_shuf_different_source_types(
20672067
; CHECK-NEXT: [[NX:%.*]] = fptoui <3 x float> [[X:%.*]] to <3 x i16>
@@ -2075,6 +2075,8 @@ define <3 x i16> @fptoui_shuf_different_source_types(<3 x float> %x, <3 x half>
20752075
ret <3 x i16> %r
20762076
}
20772077

2078+
; negative test - must have same size elements
2079+
20782080
define <4 x i32> @fptoui_shuf_widen_elts(<4 x half> %x, <4 x half> %y) {
20792081
; CHECK-LABEL: @fptoui_shuf_widen_elts(
20802082
; CHECK-NEXT: [[NX:%.*]] = fptosi <4 x half> [[X:%.*]] to <4 x i32>
@@ -2088,6 +2090,8 @@ define <4 x i32> @fptoui_shuf_widen_elts(<4 x half> %x, <4 x half> %y) {
20882090
ret <4 x i32> %r
20892091
}
20902092

2093+
; negative test - must have same size elements
2094+
20912095
define <4 x float> @sitofp_shuf_narrow_elts(<4 x i64> %x, <4 x i64> %y) {
20922096
; CHECK-LABEL: @sitofp_shuf_narrow_elts(
20932097
; CHECK-NEXT: [[NX:%.*]] = sitofp <4 x i64> [[X:%.*]] to <4 x float>
@@ -2101,12 +2105,14 @@ define <4 x float> @sitofp_shuf_narrow_elts(<4 x i64> %x, <4 x i64> %y) {
21012105
ret <4 x float> %r
21022106
}
21032107

2108+
; one extra use is ok
2109+
21042110
define <4 x float> @uitofp_shuf_extra_use1(<4 x i32> %x, <4 x i32> %y) {
21052111
; CHECK-LABEL: @uitofp_shuf_extra_use1(
21062112
; CHECK-NEXT: [[NX:%.*]] = uitofp <4 x i32> [[X:%.*]] to <4 x float>
21072113
; CHECK-NEXT: call void @use4(<4 x float> [[NX]])
2108-
; CHECK-NEXT: [[NY:%.*]] = uitofp <4 x i32> [[Y:%.*]] to <4 x float>
2109-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> <i32 0, i32 0, i32 4, i32 5>
2114+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 0, i32 4, i32 5>
2115+
; CHECK-NEXT: [[R:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
21102116
; CHECK-NEXT: ret <4 x float> [[R]]
21112117
;
21122118
%nx = uitofp <4 x i32> %x to <4 x float>
@@ -2116,12 +2122,14 @@ define <4 x float> @uitofp_shuf_extra_use1(<4 x i32> %x, <4 x i32> %y) {
21162122
ret <4 x float> %r
21172123
}
21182124

2125+
; one extra use is ok
2126+
21192127
define <4 x float> @sitofp_shuf_extra_use2(<4 x i32> %x, <4 x i32> %y) {
21202128
; CHECK-LABEL: @sitofp_shuf_extra_use2(
2121-
; CHECK-NEXT: [[NX:%.*]] = sitofp <4 x i32> [[X:%.*]] to <4 x float>
21222129
; CHECK-NEXT: [[NY:%.*]] = sitofp <4 x i32> [[Y:%.*]] to <4 x float>
21232130
; CHECK-NEXT: call void @use4(<4 x float> [[NY]])
2124-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[NX]], <4 x float> [[NY]], <4 x i32> <i32 7, i32 1, i32 4, i32 0>
2131+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y]], <4 x i32> <i32 7, i32 1, i32 4, i32 0>
2132+
; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
21252133
; CHECK-NEXT: ret <4 x float> [[R]]
21262134
;
21272135
%nx = sitofp <4 x i32> %x to <4 x float>
@@ -2131,6 +2139,8 @@ define <4 x float> @sitofp_shuf_extra_use2(<4 x i32> %x, <4 x i32> %y) {
21312139
ret <4 x float> %r
21322140
}
21332141

2142+
; negative test - both ops have extra uses
2143+
21342144
define <2 x float> @sitofp_shuf_extra_use3(<2 x i32> %x, <2 x i32> %y) {
21352145
; CHECK-LABEL: @sitofp_shuf_extra_use3(
21362146
; CHECK-NEXT: [[NX:%.*]] = sitofp <2 x i32> [[X:%.*]] to <2 x float>
@@ -2148,6 +2158,8 @@ define <2 x float> @sitofp_shuf_extra_use3(<2 x i32> %x, <2 x i32> %y) {
21482158
ret <2 x float> %r
21492159
}
21502160

2161+
; negative test - mismatched casts
2162+
21512163
define <4 x i32> @fptoi_shuf(<4 x float> %x, <4 x float> %y) {
21522164
; CHECK-LABEL: @fptoi_shuf(
21532165
; CHECK-NEXT: [[NX:%.*]] = fptoui <4 x float> [[X:%.*]] to <4 x i32>
@@ -2161,7 +2173,7 @@ define <4 x i32> @fptoi_shuf(<4 x float> %x, <4 x float> %y) {
21612173
ret <4 x i32> %r
21622174
}
21632175

2164-
; length-changing shuffle
2176+
; negative test - length-changing shuffle
21652177

21662178
define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) {
21672179
; CHECK-LABEL: @sitofp_shuf_widen(
@@ -2176,7 +2188,7 @@ define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) {
21762188
ret <4 x float> %r
21772189
}
21782190

2179-
; length-changing shuffle
2191+
; negative test - length-changing shuffle
21802192

21812193
define <2 x float> @uitofp_shuf_narrow(<4 x i32> %x, <4 x i32> %y) {
21822194
; CHECK-LABEL: @uitofp_shuf_narrow(

0 commit comments

Comments
 (0)