Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Refactor Loop TapirTarget / Add GPU Backend #73

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
128 changes: 128 additions & 0 deletions include/llvm/Transforms/Tapir/PTXABI.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
***************************************************************************
* Copyright (c) 2017, Los Alamos National Security, LLC.
* All rights reserved.
*
* Copyright 2010. Los Alamos National Security, LLC. This software was
* produced under U.S. Government contract DE-AC52-06NA25396 for Los
* Alamos National Laboratory (LANL), which is operated by Los Alamos
* National Security, LLC for the U.S. Department of Energy. The
* U.S. Government has rights to use, reproduce, and distribute this
* software. NEITHER THE GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY,
* LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY
* FOR THE USE OF THIS SOFTWARE. If software is modified to produce
* derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, redistribution and use in source and binary forms,
* with or without modification, are permitted provided that the
* following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* * Neither the name of Los Alamos National Security, LLC, Los
* Alamos National Laboratory, LANL, the U.S. Government, nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
***************************************************************************/

#ifndef PTX_ABI_H_
#define PTX_ABI_H_

#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeBuilder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Transforms/Tapir/LoopSpawning.h"
#include "llvm/Transforms/Tapir/TapirUtils.h"
#include <deque>

namespace llvm {

/// PTXABILoopSpawning uses the Cilk Plus ABI to handle Tapir loops.
class PTXABILoopSpawning : public LoopOutline {
public:
PTXABILoopSpawning(Loop *OrigLoop, ScalarEvolution &SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC,
OptimizationRemarkEmitter &ORE)
: LoopOutline(OrigLoop, SE, LI, DT, AC, ORE)
{}

bool processLoop();

virtual ~PTXABILoopSpawning() {}

protected:

// private:
// /// Report an analysis message to assist the user in diagnosing loops that are
// /// not transformed. These are handled as LoopAccessReport rather than
// /// VectorizationReport because the << operator of LoopSpawningReport returns
// /// LoopAccessReport.
// void emitAnalysis(const LoopAccessReport &Message) const {
// emitAnalysisDiag(OrigLoop, *ORE, Message);
// }
private:
uint32_t nextKernelId_ = 0;
};

class PTXABI : public TapirTarget {
public:
PTXABI();
Value *GetOrCreateWorker8(Function &F) override final;
void createSync(SyncInst &inst, ValueToValueMapTy &DetachCtxToStackFrame)
override final;

Function *createDetach(DetachInst &Detach,
ValueToValueMapTy &DetachCtxToStackFrame,
DominatorTree &DT, AssumptionCache &AC) override final;
void preProcessFunction(Function &F) override final;
void postProcessFunction(Function &F) override final;
void postProcessHelper(Function &F) override final;
bool processMain(Function &F) override final;

};

} // end of llvm namespace

#endif
3 changes: 2 additions & 1 deletion include/llvm/Transforms/Tapir/TapirTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ enum class TapirTargetType {
Cilk = 2,
OpenMP = 3,
CilkR = 4,
Qthreads = 5
Qthreads = 5,
PTX = 6
};

} // end namespace llvm
Expand Down
1 change: 1 addition & 0 deletions include/llvm/Transforms/Tapir/TapirUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Function *extractDetachBodyToFunction(DetachInst &Detach,

class TapirTarget {
public:
virtual ~TapirTarget() {};
//! For use in loopspawning grainsize calculation
virtual Value *GetOrCreateWorker8(Function &F) = 0;
virtual void createSync(SyncInst &inst,
Expand Down
8 changes: 5 additions & 3 deletions include/llvm/Transforms/Utils/TapirUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class LoopSpawningHints {
enum SpawningStrategy {
ST_SEQ,
ST_DAC,
ST_GPU,
ST_END,
};

Expand Down Expand Up @@ -93,7 +94,8 @@ class LoopSpawningHints {
return "Spawn iterations sequentially";
case LoopSpawningHints::ST_DAC:
return "Use divide-and-conquer";
case LoopSpawningHints::ST_END:
case LoopSpawningHints::ST_GPU:
return "Use gpu";
default:
return "Unknown";
}
Expand Down Expand Up @@ -142,8 +144,8 @@ class LoopSpawningHints {
/// 4) The loop only branches to the exit block from the header or the latch.
bool isCanonicalTapirLoop(const Loop *L, bool print = false);

//! Identify if a loop could be a DAC loop
bool isDACFor(Loop* L);
//! Identify if a loop could should be handled manually by a parallel loop backend
bool isBackendParallelFor(Loop* L);

/// canDetach - Return true if the given function can perform a detach, false
/// otherwise.
Expand Down
1 change: 1 addition & 0 deletions lib/Transforms/Tapir/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_llvm_library(LLVMTapirOpts
CilkABI.cpp
OpenMPABI.cpp
PTXABI.cpp
QthreadsABI.cpp
SmallBlock.cpp
RedundantSpawn.cpp
Expand Down
41 changes: 40 additions & 1 deletion lib/Transforms/Tapir/LoopSpawning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Tapir.h"
#include "llvm/Transforms/Tapir/Outline.h"
#include "llvm/Transforms/Tapir/PTXABI.h"
#include "llvm/Transforms/Tapir/TapirUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
Expand Down Expand Up @@ -75,7 +76,9 @@ static cl::opt<TapirTargetType> ClTapirTarget(
clEnumValN(TapirTargetType::OpenMP,
"openmp", "OpenMP"),
clEnumValN(TapirTargetType::Qthreads,
"qthreads", "Qthreads")));
"qthreads", "Qthreads"),
clEnumValN(TapirTargetType::PTX,
"ptx", "PTX")));

namespace {
// /// \brief This modifies LoopAccessReport to initialize message with
Expand Down Expand Up @@ -115,6 +118,13 @@ static void emitMissedWarning(Function *F, Loop *L,
<< "Tapir loop not transformed: "
<< "failed to use divide-and-conquer loop spawning");
break;
case LoopSpawningHints::ST_GPU:
ORE->emit(DiagnosticInfoOptimizationFailure(
DEBUG_TYPE, "FailedRequestedSpawning",
L->getStartLoc(), L->getHeader())
<< "Tapir loop not transformed: "
<< "failed to use GPU loop spawning");
break;
case LoopSpawningHints::ST_SEQ:
ORE->emit(DiagnosticInfoOptimizationFailure(
DEBUG_TYPE, "SpawningDisabled",
Expand Down Expand Up @@ -1417,6 +1427,35 @@ bool LoopSpawningImpl::processLoop(Loop *L) {
case LoopSpawningHints::ST_SEQ:
DEBUG(dbgs() << "LS: Hints dictate sequential spawning.\n");
break;
case LoopSpawningHints::ST_GPU:
DEBUG(dbgs() << "LS: Hints dictate DAC spawning.\n");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This debug statement will print the wrong message.

{
DebugLoc DLoc = L->getStartLoc();
BasicBlock *Header = L->getHeader();
PTXABILoopSpawning DLS(L, SE, &LI, &DT, &AC, ORE);
// CilkABILoopSpawning DLS(L, SE, &LI, &DT, &AC, ORE);
// DACLoopSpawning DLS(L, SE, LI, DT, TLI, TTI, ORE);
if (DLS.processLoop()) {
DEBUG({
if (verifyFunction(*L->getHeader()->getParent())) {
dbgs() << "Transformed function is invalid.\n";
return false;
}
});
// Report success.
ORE.emit(OptimizationRemark(LS_NAME, "DACSpawning", DLoc, Header)
<< "spawning iterations using divide-and-conquer");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line looks like it reports the wrong message.

return true;
} else {
// Report failure.
ORE.emit(OptimizationRemarkMissed(LS_NAME, "NoDACSpawning", DLoc,
Header)
<< "cannot spawn iterations using divide-and-conquer");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line looks like it reports the wrong message.

emitMissedWarning(F, L, Hints, &ORE);
return false;
}
}
break;
case LoopSpawningHints::ST_DAC:
DEBUG(dbgs() << "LS: Hints dictate DAC spawning.\n");
{
Expand Down
16 changes: 9 additions & 7 deletions lib/Transforms/Tapir/OpenMPABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,19 +488,19 @@ Function* formatFunctionToTask(Function* extracted, CallInst* cal) {
IRBuilder<> CallerIRBuilder(cal);
auto *SharedsTySize =
CallerIRBuilder.getInt64(DL.getTypeAllocSize(SharedsTy));
auto *KmpTaskTTy = createKmpTaskTTy(C);
//unused -- auto *KmpTaskTTy = createKmpTaskTTy(C);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just delete this line and the other //unused dead lines in this file? Are we worried we'll need them later?

auto *KmpTaskTWithPrivatesTy = createKmpTaskTWithPrivatesTy(SharedsTy);//KmpTaskTTy);
auto *KmpTaskTWithPrivatesPtrTy =
PointerType::getUnqual(KmpTaskTWithPrivatesTy);
auto *KmpTaskTWithPrivatesTySize =
CallerIRBuilder.getInt64(DL.getTypeAllocSize(KmpTaskTWithPrivatesTy));

auto *VoidTy = Type::getVoidTy(C);
auto *Int8PtrTy = Type::getInt8PtrTy(C);
// unused -- auto *Int8PtrTy = Type::getInt8PtrTy(C);
auto *Int32Ty = Type::getInt32Ty(C);

auto *CopyFnTy = FunctionType::get(VoidTy, {Int8PtrTy}, true);
auto *CopyFnPtrTy = PointerType::getUnqual(CopyFnTy);
// unused -- auto *CopyFnTy = FunctionType::get(VoidTy, {Int8PtrTy}, true);
// unused -- auto *CopyFnPtrTy = PointerType::getUnqual(CopyFnTy);

auto *OutlinedFnTy = FunctionType::get(
VoidTy,
Expand Down Expand Up @@ -593,12 +593,12 @@ Function *llvm::OpenMPABI::createDetach(DetachInst &detach,
ValueToValueMapTy &DetachCtxToStackFrame,
DominatorTree &DT, AssumptionCache &AC) {
BasicBlock *detB = detach.getParent();
Function &F = *(detB->getParent());
// unused -- Function &F = *(detB->getParent());

BasicBlock *Spawned = detach.getDetached();
BasicBlock *Continue = detach.getContinue();

Module *M = F.getParent();
// unused -- Module *M = F.getParent();

CallInst *cal = nullptr;
Function *extracted = extractDetachBodyToFunction(detach, DT, AC, &cal);
Expand Down Expand Up @@ -676,7 +676,7 @@ void llvm::OpenMPABI::postProcessFunction(Function &F) {
}
}

for(int i=1; i<VisitedVec.size(); i++) {
for(unsigned int i=1; i<VisitedVec.size(); i++) {
for (auto P : predecessors(VisitedVec[i])) {
if (Visited.count(P) == 0) {
std::swap(VisitedVec[0], VisitedVec[i]);
Expand Down Expand Up @@ -803,6 +803,8 @@ void llvm::OpenMPABI::postProcessFunction(Function &F) {
OpenMPRuntimeFunction::OMPRTL__kmpc_fork_call, F.getParent());
// Replace the old call with __kmpc_fork_call
auto *ForkCall = emitRuntimeCall(ForkRTFn, OMPRegionFnArgs, "", b);
assert(ForkCall != 0); // play it safe -- something better to do here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm being nit-picky, but for matching code style, I think we would write assert(ForkCall && "Some helpful error message."); or assert(ForkCall != nullptr && "Some helpful error message.");

ExtractedFnCI->eraseFromParent();
RegionFn->eraseFromParent();
}
Expand Down
Loading