-
Notifications
You must be signed in to change notification settings - Fork 0
[MLIR][OpenMP] Updates to initial Taskloop Bounds Implementation #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: kaviya_taskloop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -395,18 +395,19 @@ Value *createFakeIntVal(IRBuilderBase &Builder, | |||||||
| OpenMPIRBuilder::InsertPointTy OuterAllocaIP, | ||||||||
| llvm::SmallVectorImpl<Instruction *> &ToBeDeleted, | ||||||||
| OpenMPIRBuilder::InsertPointTy InnerAllocaIP, | ||||||||
| const Twine &Name = "", bool AsPtr = true) { | ||||||||
| const Twine &Name = "", bool AsPtr = true, IntegerType *IntTy = nullptr) { | ||||||||
| Builder.restoreIP(OuterAllocaIP); | ||||||||
| IntTy = IntTy ? IntTy : Builder.getInt32Ty(); | ||||||||
| Instruction *FakeVal; | ||||||||
| AllocaInst *FakeValAddr = | ||||||||
| Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr"); | ||||||||
| Builder.CreateAlloca(IntTy, nullptr, Name + ".addr"); | ||||||||
| ToBeDeleted.push_back(FakeValAddr); | ||||||||
|
|
||||||||
| if (AsPtr) { | ||||||||
| FakeVal = FakeValAddr; | ||||||||
| } else { | ||||||||
| FakeVal = | ||||||||
| Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val"); | ||||||||
| Builder.CreateLoad(IntTy, FakeValAddr, Name + ".val"); | ||||||||
| ToBeDeleted.push_back(FakeVal); | ||||||||
| } | ||||||||
|
|
||||||||
|
|
@@ -415,10 +416,10 @@ Value *createFakeIntVal(IRBuilderBase &Builder, | |||||||
| Instruction *UseFakeVal; | ||||||||
| if (AsPtr) { | ||||||||
| UseFakeVal = | ||||||||
| Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use"); | ||||||||
| Builder.CreateLoad(IntTy, FakeVal, Name + ".use"); | ||||||||
| } else { | ||||||||
| UseFakeVal = | ||||||||
| cast<BinaryOperator>(Builder.CreateAdd(FakeVal, Builder.getInt32(10))); | ||||||||
| cast<BinaryOperator>(Builder.CreateAdd(FakeVal, ConstantInt::get(IntTy, 10))); | ||||||||
| } | ||||||||
| ToBeDeleted.push_back(UseFakeVal); | ||||||||
| return FakeVal; | ||||||||
|
|
@@ -751,7 +752,8 @@ void OpenMPIRBuilder::finalize(Function *Fn) { | |||||||
| for (auto *V : OI.ExcludeArgsFromAggregate) | ||||||||
| Extractor.excludeArgFromAggregate(V); | ||||||||
|
|
||||||||
| Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); | ||||||||
| SetVector<Value *> Outputs; | ||||||||
| Function *OutlinedFn = Extractor.extractCodeRegion(CEAC, OI.Inputs, Outputs); | ||||||||
|
|
||||||||
| // Forward target-cpu, target-features attributes to the outlined function. | ||||||||
| auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu"); | ||||||||
|
|
@@ -1979,22 +1981,38 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( | |||||||
| OI.ExitBB = TaskloopExitBB; | ||||||||
|
|
||||||||
| // Add the thread ID argument. | ||||||||
| SmallVector<Instruction *, 4> ToBeDeleted; | ||||||||
| SmallVector<Instruction *> ToBeDeleted; | ||||||||
| // dummy instruction to be used as a fake argument | ||||||||
| OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( | ||||||||
| Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, "global.tid", false)); | ||||||||
| Value *FakeLB = createFakeIntVal(Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, | ||||||||
| "lb", /*AsPtr=*/false, Builder.getInt64Ty()); | ||||||||
| Value *FakeUB = createFakeIntVal(Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, | ||||||||
| "ub", /*AsPtr=*/false, Builder.getInt64Ty()); | ||||||||
| Value *FakeStep = createFakeIntVal(Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, | ||||||||
| "step", /*AsPtr=*/false, Builder.getInt64Ty()); | ||||||||
| // For Taskloop, we want to force the bounds being the first 3 inputs in the aggregate struct*/ | ||||||||
| OI.Inputs.insert(FakeLB); | ||||||||
| OI.Inputs.insert(FakeUB); | ||||||||
| OI.Inputs.insert(FakeStep); | ||||||||
|
|
||||||||
| OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied, | ||||||||
| TaskloopAllocaBB, CLI, Loc, | ||||||||
| ToBeDeleted](Function &OutlinedFn) mutable { | ||||||||
| ToBeDeleted, FakeLB, FakeUB, FakeStep](Function &OutlinedFn) mutable { | ||||||||
| // Replace the Stale CI by appropriate RTL function call. | ||||||||
| assert(OutlinedFn.hasOneUse() && | ||||||||
| "there must be a single user for the outlined function"); | ||||||||
| CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back()); | ||||||||
|
|
||||||||
| // HasShareds is true if any variables are captured in the outlined region, | ||||||||
| // false otherwise. | ||||||||
| bool HasShareds = StaleCI->arg_size() > 1; | ||||||||
| // Create the casting for the Bounds Values that can be used when outlining to replace the uses of the fakes with real values */ | ||||||||
| BasicBlock *CodeReplBB = StaleCI->getParent(); | ||||||||
| IRBuilderBase::InsertPoint CurrentIp = Builder.saveIP(); | ||||||||
| Builder.SetInsertPoint(CodeReplBB->getFirstInsertionPt()); | ||||||||
| Value *CastedLBVal = Builder.CreateIntCast(LBVal, Builder.getInt64Ty(), true, "lb64"); | ||||||||
| Value *CastedUBVal = Builder.CreateIntCast(UBVal, Builder.getInt64Ty(), true, "ub64"); | ||||||||
| Value *CastedStepVal = Builder.CreateIntCast(StepVal, Builder.getInt64Ty(), true, "step64"); | ||||||||
| Builder.restoreIP(CurrentIp); | ||||||||
|
|
||||||||
| Builder.SetInsertPoint(StaleCI); | ||||||||
|
|
||||||||
| // Gather the arguments for emitting the runtime call for | ||||||||
|
|
@@ -2015,20 +2033,18 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( | |||||||
| Value *TaskSize = Builder.getInt64( | ||||||||
| divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8)); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we are utilizing The required size for storing loop bounds can be reserved in |
||||||||
|
|
||||||||
| Value *SharedsSize = Builder.getInt64(0); | ||||||||
| if (HasShareds) { | ||||||||
| AllocaInst *ArgStructAlloca = | ||||||||
| dyn_cast<AllocaInst>(StaleCI->getArgOperand(1)); | ||||||||
| assert(ArgStructAlloca && | ||||||||
| "Unable to find the alloca instruction corresponding to arguments " | ||||||||
| "for extracted function"); | ||||||||
| StructType *ArgStructType = | ||||||||
| dyn_cast<StructType>(ArgStructAlloca->getAllocatedType()); | ||||||||
| assert(ArgStructType && "Unable to find struct type corresponding to " | ||||||||
| "arguments for extracted function"); | ||||||||
| SharedsSize = | ||||||||
| Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); | ||||||||
| } | ||||||||
| Value *Shareds = StaleCI->getArgOperand(1); | ||||||||
| AllocaInst *ArgStructAlloca = | ||||||||
| dyn_cast<AllocaInst>(Shareds); | ||||||||
| assert(ArgStructAlloca && | ||||||||
| "Unable to find the alloca instruction corresponding to arguments " | ||||||||
| "for extracted function"); | ||||||||
| StructType *ArgStructType = | ||||||||
| dyn_cast<StructType>(ArgStructAlloca->getAllocatedType()); | ||||||||
| assert(ArgStructType && "Unable to find struct type corresponding to " | ||||||||
| "arguments for extracted function"); | ||||||||
| Value *SharedsSize = | ||||||||
| Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType)); | ||||||||
|
|
||||||||
| // Emit the @__kmpc_omp_task_alloc runtime call | ||||||||
| // The runtime call returns a pointer to an area where the task captured | ||||||||
|
|
@@ -2038,31 +2054,21 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( | |||||||
| /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, | ||||||||
| /*task_func=*/&OutlinedFn}); | ||||||||
|
|
||||||||
| Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); | ||||||||
| Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); | ||||||||
| Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, | ||||||||
| SharedsSize); | ||||||||
| // Get the pointer to loop lb, ub, step from task ptr | ||||||||
| // and set up the lowerbound,upperbound and step values | ||||||||
| llvm::Value *lb = | ||||||||
| Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 5); | ||||||||
| Value *LbVal_ext = Builder.CreateSExt(LBVal, Builder.getInt64Ty()); | ||||||||
| Builder.CreateStore(LbVal_ext, lb); | ||||||||
|
|
||||||||
| llvm::Value *ub = | ||||||||
| Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 6); | ||||||||
| Value *UbVal_ext = Builder.CreateSExt(UBVal, Builder.getInt64Ty()); | ||||||||
| Builder.CreateStore(UbVal_ext, ub); | ||||||||
|
|
||||||||
| llvm::Value *step = | ||||||||
| Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 7); | ||||||||
| Value *Step_ext = Builder.CreateSExt(StepVal, Builder.getInt64Ty()); | ||||||||
| Builder.CreateStore(Step_ext, step); | ||||||||
| llvm::Value *loadstep = Builder.CreateLoad(Builder.getInt64Ty(), step); | ||||||||
| llvm::Value *Lb = Builder.CreateStructGEP(ArgStructType, TaskShareds, 0); | ||||||||
| Builder.CreateStore(CastedLBVal, Lb); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
The values of lb,ub and step are already populated in StructArg. You can directly access it and pass the pointer to the runtime call |
||||||||
|
|
||||||||
| if (HasShareds) { | ||||||||
| Value *Shareds = StaleCI->getArgOperand(1); | ||||||||
| Align Alignment = TaskData->getPointerAlignment(M.getDataLayout()); | ||||||||
| Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData); | ||||||||
| Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment, | ||||||||
| SharedsSize); | ||||||||
| } | ||||||||
| llvm::Value *Ub = Builder.CreateStructGEP(ArgStructType, TaskShareds, 1); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GEP to StructArg and get the upper bound value. |
||||||||
| Builder.CreateStore(CastedUBVal, Ub); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. Remove the store instruction. |
||||||||
|
|
||||||||
| llvm::Value *Step = Builder.CreateStructGEP(ArgStructType, TaskShareds, 2); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| Builder.CreateStore(CastedStepVal, Step); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove the store. |
||||||||
| llvm::Value *Loadstep = Builder.CreateLoad(Builder.getInt64Ty(), Step); | ||||||||
|
|
||||||||
| // set up the arguments for emitting kmpc_taskloop runtime call | ||||||||
| // setting default values for ifval, nogroup, sched, grainsize, task_dup | ||||||||
|
|
@@ -2074,8 +2080,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( | |||||||
| // TODO: Handle the case when TaskDup pointer isn't empty | ||||||||
| Value *TaskDup = Constant::getNullValue(Builder.getPtrTy()); | ||||||||
|
|
||||||||
| Value *Args[] = {Ident, ThreadID, TaskData, IfVal, lb, ub, | ||||||||
| loadstep, NoGroup, Sched, GrainSize, TaskDup}; | ||||||||
| Value *Args[] = {Ident, ThreadID, TaskData, IfVal, Lb, Ub, | ||||||||
| Loadstep, NoGroup, Sched, GrainSize, TaskDup}; | ||||||||
|
|
||||||||
| // taskloop runtime call | ||||||||
| Function *TaskloopFn = | ||||||||
|
|
@@ -2091,32 +2097,60 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( | |||||||
|
|
||||||||
| Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin()); | ||||||||
|
|
||||||||
| if (HasShareds) { | ||||||||
| LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); | ||||||||
| OutlinedFn.getArg(1)->replaceUsesWithIf( | ||||||||
| Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); | ||||||||
| } | ||||||||
| LoadInst *SharedsOutlined = | ||||||||
| Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1)); | ||||||||
| OutlinedFn.getArg(1)->replaceUsesWithIf( | ||||||||
| SharedsOutlined, | ||||||||
| [SharedsOutlined](Use &U) { return U.getUser() != SharedsOutlined; }); | ||||||||
|
|
||||||||
| Value *IV = CLI->getIndVar(); | ||||||||
| Type *IVTy = IV->getType(); | ||||||||
| Constant *One = ConstantInt::get(IVTy, 1); | ||||||||
|
|
||||||||
| Value *TaskLB = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, | ||||||||
| OutlinedFn.getArg(1), 5, "gep_lb"); | ||||||||
| Value *LoadTaskLB = Builder.CreateLoad(Builder.getInt64Ty(), TaskLB); | ||||||||
| Value *LowerBound = Builder.CreateTrunc(LoadTaskLB, IVTy, "lb"); | ||||||||
|
|
||||||||
| Value *TaskUB = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, | ||||||||
| OutlinedFn.getArg(1), 6, "gep_ub"); | ||||||||
| Value *LoadTaskUB = Builder.CreateLoad(Builder.getInt64Ty(), TaskUB); | ||||||||
| Value *UpperBound = Builder.CreateTrunc(LoadTaskUB, IVTy, "ub"); | ||||||||
| Constant *One = ConstantInt::get(Builder.getInt64Ty(), 1); | ||||||||
|
|
||||||||
| // When outlining, CodeExtractor will create GEP's to the LowerBound and | ||||||||
| // UpperBound. These GEP's can be reused for loading the tasks respective | ||||||||
| // bounds. | ||||||||
| Value *TaskLB = nullptr; | ||||||||
| Value *TaskUB = nullptr; | ||||||||
| Value *LoadTaskLB = nullptr; | ||||||||
| Value *LoadTaskUB = nullptr; | ||||||||
| for (Instruction &I : *TaskloopAllocaBB) { | ||||||||
| if (I.getOpcode() == Instruction::GetElementPtr) { | ||||||||
| GetElementPtrInst &Gep = cast<GetElementPtrInst>(I); | ||||||||
| if (Gep.getOperand(0) != SharedsOutlined) | ||||||||
| continue; | ||||||||
| if (ConstantInt *CI = dyn_cast<ConstantInt>(Gep.getOperand(2))) { | ||||||||
| switch (CI->getZExtValue()) { | ||||||||
| case 0: | ||||||||
| TaskLB = &I; | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would also be good to check that the value being indexed is the right one, not just the numeric value of the index.
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a check to make sure the GEP Instruction being checked is using the Shared's as its first operand. |
||||||||
| break; | ||||||||
| case 1: | ||||||||
| TaskUB = &I; | ||||||||
| break; | ||||||||
| } | ||||||||
| } | ||||||||
| } else if (I.getOpcode() == Instruction::Load) { | ||||||||
| LoadInst &Load = cast<LoadInst>(I); | ||||||||
| if (Load.getPointerOperand() == TaskLB) { | ||||||||
| assert(TaskLB != nullptr && "Expected value for TaskLB"); | ||||||||
| LoadTaskLB = &I; | ||||||||
| } else if (Load.getPointerOperand() == TaskUB) { | ||||||||
| assert(TaskUB != nullptr && "Expected value for TaskUB"); | ||||||||
| LoadTaskUB = &I; | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| Builder.SetInsertPoint(CLI->getPreheader()->getTerminator()); | ||||||||
|
|
||||||||
| Value *TripCountMinusOne = Builder.CreateSub(UpperBound, LowerBound); | ||||||||
| assert(LoadTaskLB != nullptr && "Expected value for LoadTaskLB"); | ||||||||
| assert(LoadTaskUB != nullptr && "Expected value for LoadTaskUB"); | ||||||||
| Value *TripCountMinusOne = Builder.CreateSub(LoadTaskUB, LoadTaskLB); | ||||||||
| Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt"); | ||||||||
| Value *CastedTripCount = Builder.CreateIntCast(TripCount, IVTy, true); | ||||||||
| Value *CastedTaskLB = Builder.CreateIntCast(LoadTaskLB, IVTy, true); | ||||||||
| // set the trip count in the CLI | ||||||||
| CLI->setTripCount(TripCount); | ||||||||
| CLI->setTripCount(CastedTripCount); | ||||||||
|
|
||||||||
| Builder.SetInsertPoint(CLI->getBody(), | ||||||||
| CLI->getBody()->getFirstInsertionPt()); | ||||||||
|
|
@@ -2127,12 +2161,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( | |||||||
| if (Add->getOpcode() == llvm::Instruction::Add) { | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tom raised a concern that this add instruction pattern might also match other unrelated add instructions, and we discussed this in my PR: llvm#166903 (comment) He suggested looking at the wsloop and distribute implementations for guidance on how this is handled there. I have not had a chance to dig into that yet. Could you please check this once? |
||||||||
| if (llvm::isa<llvm::BinaryOperator>(Add->getOperand(0))) { | ||||||||
| // update the starting index of the loop | ||||||||
| Add->setOperand(1, LowerBound); | ||||||||
| Add->setOperand(1, CastedTaskLB); | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| FakeLB->replaceAllUsesWith(CastedLBVal); | ||||||||
| FakeUB->replaceAllUsesWith(CastedUBVal); | ||||||||
| FakeStep->replaceAllUsesWith(CastedStepVal); | ||||||||
| for (Instruction *I : llvm::reverse(ToBeDeleted)) { | ||||||||
| I->eraseFromParent(); | ||||||||
| } | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.