diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 0e6cf59e25750..e6fe74891f97a 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -42,7 +42,7 @@ static bool findAndReplaceVectors(Module &M); class DataScalarizerVisitor : public InstVisitor { public: DataScalarizerVisitor() : GlobalMap() {} - bool visit(Function &F); + bool visit(Instruction &I); // InstVisitor methods. They return true if the instruction was scalarized, // false if nothing changed. bool visitInstruction(Instruction &I) { return false; } @@ -67,28 +67,11 @@ class DataScalarizerVisitor : public InstVisitor { private: GlobalVariable *lookupReplacementGlobal(Value *CurrOperand); DenseMap GlobalMap; - SmallVector PotentiallyDeadInstrs; - bool finish(); }; -bool DataScalarizerVisitor::visit(Function &F) { +bool DataScalarizerVisitor::visit(Instruction &I) { assert(!GlobalMap.empty()); - ReversePostOrderTraversal RPOT(&F.getEntryBlock()); - for (BasicBlock *BB : RPOT) { - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { - Instruction *I = &*II; - bool Done = InstVisitor::visit(I); - ++II; - if (Done && I->getType()->isVoidTy()) - I->eraseFromParent(); - } - } - return finish(); -} - -bool DataScalarizerVisitor::finish() { - RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); - return true; + return InstVisitor::visit(I); } GlobalVariable * @@ -106,6 +89,20 @@ bool DataScalarizerVisitor::visitLoadInst(LoadInst &LI) { unsigned NumOperands = LI.getNumOperands(); for (unsigned I = 0; I < NumOperands; ++I) { Value *CurrOpperand = LI.getOperand(I); + ConstantExpr *CE = dyn_cast(CurrOpperand); + if (CE && CE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEP = + cast(CE->getAsInstruction()); + OldGEP->insertBefore(&LI); + IRBuilder<> Builder(&LI); + LoadInst *NewLoad = + Builder.CreateLoad(LI.getType(), OldGEP, LI.getName()); + NewLoad->setAlignment(LI.getAlign()); + LI.replaceAllUsesWith(NewLoad); + LI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; + } if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) LI.setOperand(I, NewGlobal); } @@ -116,32 +113,48 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) { unsigned NumOperands = SI.getNumOperands(); for (unsigned I = 0; I < NumOperands; ++I) { Value *CurrOpperand = SI.getOperand(I); - if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) { - SI.setOperand(I, NewGlobal); + ConstantExpr *CE = dyn_cast(CurrOpperand); + if (CE && CE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEP = + cast(CE->getAsInstruction()); + OldGEP->insertBefore(&SI); + IRBuilder<> Builder(&SI); + StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP); + NewStore->setAlignment(SI.getAlign()); + SI.replaceAllUsesWith(NewStore); + SI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; } + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) + SI.setOperand(I, NewGlobal); } return false; } bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + unsigned NumOperands = GEPI.getNumOperands(); + GlobalVariable *NewGlobal = nullptr; for (unsigned I = 0; I < NumOperands; ++I) { Value *CurrOpperand = GEPI.getOperand(I); - GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand); - if (!NewGlobal) - continue; - IRBuilder<> Builder(&GEPI); - - SmallVector Indices; - for (auto &Index : GEPI.indices()) - Indices.push_back(Index); - - Value *NewGEP = - Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices); - - GEPI.replaceAllUsesWith(NewGEP); - PotentiallyDeadInstrs.emplace_back(&GEPI); + NewGlobal = lookupReplacementGlobal(CurrOpperand); + if (NewGlobal) + break; } + if (!NewGlobal) + return false; + + IRBuilder<> Builder(&GEPI); + SmallVector Indices; + for (auto &Index : GEPI.indices()) + Indices.push_back(Index); + + Value *NewGEP = + Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices, + GEPI.getName(), GEPI.getNoWrapFlags()); + GEPI.replaceAllUsesWith(NewGEP); + GEPI.eraseFromParent(); return true; } @@ -247,17 +260,13 @@ static bool findAndReplaceVectors(Module &M) { for (User *U : make_early_inc_range(G.users())) { if (isa(U) && isa(U)) { ConstantExpr *CE = cast(U); - convertUsersOfConstantsToInstructions(CE, - /*RestrictToFunc=*/nullptr, - /*RemoveDeadConstants=*/false, - /*IncludeSelf=*/true); - } - if (isa(U)) { - Instruction *Inst = cast(U); - Function *F = Inst->getFunction(); - if (F) - Impl.visit(*F); + for (User *UCE : make_early_inc_range(CE->users())) { + if (Instruction *Inst = dyn_cast(UCE)) + Impl.visit(*Inst); + } } + if (Instruction *Inst = dyn_cast(U)) + Impl.visit(*Inst); } } } diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp index e4a3bc76eeacd..293fd4974e1fe 100644 --- a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp +++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp @@ -164,11 +164,18 @@ bool DXILFlattenArraysVisitor::visitLoadInst(LoadInst &LI) { Value *CurrOpperand = LI.getOperand(I); ConstantExpr *CE = dyn_cast(CurrOpperand); if (CE && CE->getOpcode() == Instruction::GetElementPtr) { - convertUsersOfConstantsToInstructions(CE, - /*RestrictToFunc=*/nullptr, - /*RemoveDeadConstants=*/false, - /*IncludeSelf=*/true); - return false; + GetElementPtrInst *OldGEP = + cast(CE->getAsInstruction()); + OldGEP->insertBefore(&LI); + + IRBuilder<> Builder(&LI); + LoadInst *NewLoad = + Builder.CreateLoad(LI.getType(), OldGEP, LI.getName()); + NewLoad->setAlignment(LI.getAlign()); + LI.replaceAllUsesWith(NewLoad); + LI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; } } return false; @@ -180,11 +187,17 @@ bool DXILFlattenArraysVisitor::visitStoreInst(StoreInst &SI) { Value *CurrOpperand = SI.getOperand(I); ConstantExpr *CE = dyn_cast(CurrOpperand); if (CE && CE->getOpcode() == Instruction::GetElementPtr) { - convertUsersOfConstantsToInstructions(CE, - /*RestrictToFunc=*/nullptr, - /*RemoveDeadConstants=*/false, - /*IncludeSelf=*/true); - return false; + GetElementPtrInst *OldGEP = + cast(CE->getAsInstruction()); + OldGEP->insertBefore(&SI); + + IRBuilder<> Builder(&SI); + StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP); + NewStore->setAlignment(SI.getAlign()); + SI.replaceAllUsesWith(NewStore); + SI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; } } return false; @@ -317,10 +330,17 @@ bool DXILFlattenArraysVisitor::visit(Function &F) { static void collectElements(Constant *Init, SmallVectorImpl &Elements) { // Base case: If Init is not an array, add it directly to the vector. - if (!isa(Init->getType())) { + auto *ArrayTy = dyn_cast(Init->getType()); + if (!ArrayTy) { Elements.push_back(Init); return; } + unsigned ArrSize = ArrayTy->getNumElements(); + if (isa(Init)) { + for (unsigned I = 0; I < ArrSize; ++I) + Elements.push_back(Constant::getNullValue(ArrayTy->getElementType())); + return; + } // Recursive case: Process each element in the array. if (auto *ArrayConstant = dyn_cast(Init)) { diff --git a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll new file mode 100644 index 0000000000000..3ae5832ce8322 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-flatten-arrays,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + + +@ZerroInitArr = internal constant [2 x [3 x float]] [[3 x float] zeroinitializer, [3 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00]], align 16 + + +define internal void @main() { +; CHECK-LABEL: define internal void @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 1 +; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 2 +; CHECK-NEXT: [[DOTI03:%.*]] = load float, ptr [[TMP1]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr [8 x [3 x float]], ptr @ZerroInitArr, i32 0, i32 1 + %.i0 = load float, ptr %0, align 16 + %1 = getelementptr [8 x [3 x float]], ptr @ZerroInitArr, i32 0, i32 2 + %.i03 = load float, ptr %1, align 16 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll index 5972520383650..4e522c6ef5da7 100644 --- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll +++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll @@ -21,7 +21,6 @@ ; CHECK-NOT: @groushared2dArrayofVectors ; CHECK-NOT: @groushared2dArrayofVectors.scalarized - define <4 x i32> @load_array_vec_test() #0 { ; CHECK-LABEL: define <4 x i32> @load_array_vec_test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -33,18 +32,13 @@ define <4 x i32> @load_array_vec_test() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [2 x [3 x float]], ptr addrspace(3) [[TMP9]], i32 0, i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1 +; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2 +; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3 +; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4 ; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]] ; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]] @@ -87,7 +81,7 @@ define <4 x i32> @load_vec_test() #0 { define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { ; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test( ; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]] +; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr @@ -121,18 +115,13 @@ define <4 x i32> @multid_load_test() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x [3 x [4 x i32]]], ptr addrspace(3) [[TMP9]], i32 0, i32 1, i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1 +; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2 +; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3 +; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4 ; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]] ; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]] diff --git a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll new file mode 100644 index 0000000000000..25dc2c36b4e1f --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays,function(scalarizer),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + + +@StaticArr = internal constant [8 x <3 x float>] [<3 x float> zeroinitializer, <3 x float> splat (float 5.000000e-01), <3 x float> , <3 x float> , <3 x float> , <3 x float> , <3 x float> , <3 x float> ], align 16 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define internal void @main() #1 { +; CHECK-LABEL: define internal void @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), align 16 +; CHECK-NEXT: [[DOTI1:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), i32 1), align 4 +; CHECK-NEXT: [[DOTI2:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), i32 2), align 8 +; CHECK-NEXT: [[DOTI01:%.*]] = load float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), align 16 +; CHECK-NEXT: [[DOTI12:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), i32 1), align 4 +; CHECK-NEXT: [[DOTI23:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), i32 2), align 8 +; CHECK-NEXT: ret void +; +entry: + %arrayidx = getelementptr inbounds [8 x <3 x float>], ptr @StaticArr, i32 0, i32 1 + %2 = load <3 x float>, ptr %arrayidx, align 16 + %arrayidx2 = getelementptr inbounds [8 x <3 x float>], ptr @StaticArr, i32 0, i32 2 + %3 = load <3 x float>, ptr %arrayidx2, align 16 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll index a32db8b8e3995..ed1e9109b7b18 100644 --- a/llvm/test/CodeGen/DirectX/scalar-load.ll +++ b/llvm/test/CodeGen/DirectX/scalar-load.ll @@ -2,10 +2,10 @@ ; Make sure we can load groupshared, static vectors and arrays of vectors -@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 -@"vecData" = external addrspace(3) global <4 x i32>, align 4 +@arrayofVecData = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@vecData = external addrspace(3) global <4 x i32>, align 4 @staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> , <4 x i32> , <4 x i32> ], align 4 -@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 +@groushared2dArrayofVectors = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 ; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 ; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 @@ -19,12 +19,12 @@ ; CHECK-LABEL: load_array_vec_test -define <4 x i32> @load_array_vec_test() #0 { - ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 - %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 - %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 - %3 = add <4 x i32> %1, %2 - ret <4 x i32> %3 +define <3 x float> @load_array_vec_test() #0 { + ; CHECK-COUNT-6: load float, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 + %1 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 + %2 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 + %3 = fadd <3 x float> %1, %2 + ret <3 x float> %3 } ; CHECK-LABEL: load_vec_test @@ -36,8 +36,14 @@ define <4 x i32> @load_vec_test() #0 { ; CHECK-LABEL: load_static_array_of_vec_test define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { - ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index - ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr inbounds [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index + ; CHECK: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr i32, ptr {{.*}}, i32 1 + ; CHECK: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr i32, ptr {{.*}}, i32 2 + ; CHECK: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr i32, ptr {{.*}}, i32 3 + ; CHECK: load i32, ptr {{.*}}, align 4 %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index %4 = load <4 x i32>, <4 x i32>* %3, align 4 ret <4 x i32> %4