Skip to content

Commit

Permalink
add ptx backend
Browse files Browse the repository at this point in the history
  • Loading branch information
wsmoses committed Jul 23, 2018
1 parent 2031611 commit 439e83f
Show file tree
Hide file tree
Showing 13 changed files with 924 additions and 18 deletions.
128 changes: 128 additions & 0 deletions include/llvm/Transforms/Tapir/PTXABI.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
***************************************************************************
* Copyright (c) 2017, Los Alamos National Security, LLC.
* All rights reserved.
*
* Copyright 2010. Los Alamos National Security, LLC. This software was
* produced under U.S. Government contract DE-AC52-06NA25396 for Los
* Alamos National Laboratory (LANL), which is operated by Los Alamos
* National Security, LLC for the U.S. Department of Energy. The
* U.S. Government has rights to use, reproduce, and distribute this
* software. NEITHER THE GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY,
* LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY
* FOR THE USE OF THIS SOFTWARE. If software is modified to produce
* derivative works, such modified software should be clearly marked,
* so as not to confuse it with the version available from LANL.
*
* Additionally, redistribution and use in source and binary forms,
* with or without modification, are permitted provided that the
* following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* * Neither the name of Los Alamos National Security, LLC, Los
* Alamos National Laboratory, LANL, the U.S. Government, nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
***************************************************************************/

#ifndef PTX_ABI_H_
#define PTX_ABI_H_

#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeBuilder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Transforms/Tapir/LoopSpawning.h"
#include "llvm/Transforms/Tapir/TapirUtils.h"
#include <deque>

namespace llvm {

/// PTXABILoopSpawning uses the Cilk Plus ABI to handle Tapir loops.
class PTXABILoopSpawning : public LoopOutline {
public:
PTXABILoopSpawning(Loop *OrigLoop, ScalarEvolution &SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC,
OptimizationRemarkEmitter &ORE)
: LoopOutline(OrigLoop, SE, LI, DT, AC, ORE)
{}

bool processLoop();

virtual ~PTXABILoopSpawning() {}

protected:

// private:
// /// Report an analysis message to assist the user in diagnosing loops that are
// /// not transformed. These are handled as LoopAccessReport rather than
// /// VectorizationReport because the << operator of LoopSpawningReport returns
// /// LoopAccessReport.
// void emitAnalysis(const LoopAccessReport &Message) const {
// emitAnalysisDiag(OrigLoop, *ORE, Message);
// }
private:
uint32_t nextKernelId_ = 0;
};

class PTXABI : public TapirTarget {
public:
PTXABI();
Value *GetOrCreateWorker8(Function &F) override final;
void createSync(SyncInst &inst, ValueToValueMapTy &DetachCtxToStackFrame)
override final;

Function *createDetach(DetachInst &Detach,
ValueToValueMapTy &DetachCtxToStackFrame,
DominatorTree &DT, AssumptionCache &AC) override final;
void preProcessFunction(Function &F) override final;
void postProcessFunction(Function &F) override final;
void postProcessHelper(Function &F) override final;
bool processMain(Function &F) override final;

};

} // end of llvm namespace

#endif
3 changes: 2 additions & 1 deletion include/llvm/Transforms/Tapir/TapirTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ enum class TapirTargetType {
Cilk = 2,
OpenMP = 3,
CilkR = 4,
Qthreads = 5
Qthreads = 5,
PTX = 6
};

} // end namespace llvm
Expand Down
1 change: 1 addition & 0 deletions include/llvm/Transforms/Tapir/TapirUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Function *extractDetachBodyToFunction(DetachInst &Detach,

class TapirTarget {
public:
virtual ~TapirTarget() {};
//! For use in loopspawning grainsize calculation
virtual Value *GetOrCreateWorker8(Function &F) = 0;
virtual void createSync(SyncInst &inst,
Expand Down
8 changes: 5 additions & 3 deletions include/llvm/Transforms/Utils/TapirUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class LoopSpawningHints {
enum SpawningStrategy {
ST_SEQ,
ST_DAC,
ST_GPU,
ST_END,
};

Expand Down Expand Up @@ -93,7 +94,8 @@ class LoopSpawningHints {
return "Spawn iterations sequentially";
case LoopSpawningHints::ST_DAC:
return "Use divide-and-conquer";
case LoopSpawningHints::ST_END:
case LoopSpawningHints::ST_GPU:
return "Use gpu";
default:
return "Unknown";
}
Expand Down Expand Up @@ -142,8 +144,8 @@ class LoopSpawningHints {
/// 4) The loop only branches to the exit block from the header or the latch.
bool isCanonicalTapirLoop(const Loop *L, bool print = false);

//! Identify if a loop could be a DAC loop
bool isDACFor(Loop* L);
//! Identify if a loop could should be handled manually by a parallel loop backend
bool isBackendParallelFor(Loop* L);

/// canDetach - Return true if the given function can perform a detach, false
/// otherwise.
Expand Down
1 change: 1 addition & 0 deletions lib/Transforms/Tapir/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_llvm_library(LLVMTapirOpts
CilkABI.cpp
OpenMPABI.cpp
PTXABI.cpp
QthreadsABI.cpp
SmallBlock.cpp
RedundantSpawn.cpp
Expand Down
41 changes: 40 additions & 1 deletion lib/Transforms/Tapir/LoopSpawning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Tapir.h"
#include "llvm/Transforms/Tapir/Outline.h"
#include "llvm/Transforms/Tapir/PTXABI.h"
#include "llvm/Transforms/Tapir/TapirUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
Expand Down Expand Up @@ -75,7 +76,9 @@ static cl::opt<TapirTargetType> ClTapirTarget(
clEnumValN(TapirTargetType::OpenMP,
"openmp", "OpenMP"),
clEnumValN(TapirTargetType::Qthreads,
"qthreads", "Qthreads")));
"qthreads", "Qthreads"),
clEnumValN(TapirTargetType::PTX,
"ptx", "PTX")));

namespace {
// /// \brief This modifies LoopAccessReport to initialize message with
Expand Down Expand Up @@ -115,6 +118,13 @@ static void emitMissedWarning(Function *F, Loop *L,
<< "Tapir loop not transformed: "
<< "failed to use divide-and-conquer loop spawning");
break;
case LoopSpawningHints::ST_GPU:
ORE->emit(DiagnosticInfoOptimizationFailure(
DEBUG_TYPE, "FailedRequestedSpawning",
L->getStartLoc(), L->getHeader())
<< "Tapir loop not transformed: "
<< "failed to use GPU loop spawning");
break;
case LoopSpawningHints::ST_SEQ:
ORE->emit(DiagnosticInfoOptimizationFailure(
DEBUG_TYPE, "SpawningDisabled",
Expand Down Expand Up @@ -1417,6 +1427,35 @@ bool LoopSpawningImpl::processLoop(Loop *L) {
case LoopSpawningHints::ST_SEQ:
DEBUG(dbgs() << "LS: Hints dictate sequential spawning.\n");
break;
case LoopSpawningHints::ST_GPU:
DEBUG(dbgs() << "LS: Hints dictate DAC spawning.\n");
{
DebugLoc DLoc = L->getStartLoc();
BasicBlock *Header = L->getHeader();
PTXABILoopSpawning DLS(L, SE, &LI, &DT, &AC, ORE);
// CilkABILoopSpawning DLS(L, SE, &LI, &DT, &AC, ORE);
// DACLoopSpawning DLS(L, SE, LI, DT, TLI, TTI, ORE);
if (DLS.processLoop()) {
DEBUG({
if (verifyFunction(*L->getHeader()->getParent())) {
dbgs() << "Transformed function is invalid.\n";
return false;
}
});
// Report success.
ORE.emit(OptimizationRemark(LS_NAME, "DACSpawning", DLoc, Header)
<< "spawning iterations using divide-and-conquer");
return true;
} else {
// Report failure.
ORE.emit(OptimizationRemarkMissed(LS_NAME, "NoDACSpawning", DLoc,
Header)
<< "cannot spawn iterations using divide-and-conquer");
emitMissedWarning(F, L, Hints, &ORE);
return false;
}
}
break;
case LoopSpawningHints::ST_DAC:
DEBUG(dbgs() << "LS: Hints dictate DAC spawning.\n");
{
Expand Down
16 changes: 9 additions & 7 deletions lib/Transforms/Tapir/OpenMPABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,19 +488,19 @@ Function* formatFunctionToTask(Function* extracted, CallInst* cal) {
IRBuilder<> CallerIRBuilder(cal);
auto *SharedsTySize =
CallerIRBuilder.getInt64(DL.getTypeAllocSize(SharedsTy));
auto *KmpTaskTTy = createKmpTaskTTy(C);
//unused -- auto *KmpTaskTTy = createKmpTaskTTy(C);
auto *KmpTaskTWithPrivatesTy = createKmpTaskTWithPrivatesTy(SharedsTy);//KmpTaskTTy);
auto *KmpTaskTWithPrivatesPtrTy =
PointerType::getUnqual(KmpTaskTWithPrivatesTy);
auto *KmpTaskTWithPrivatesTySize =
CallerIRBuilder.getInt64(DL.getTypeAllocSize(KmpTaskTWithPrivatesTy));

auto *VoidTy = Type::getVoidTy(C);
auto *Int8PtrTy = Type::getInt8PtrTy(C);
// unused -- auto *Int8PtrTy = Type::getInt8PtrTy(C);
auto *Int32Ty = Type::getInt32Ty(C);

auto *CopyFnTy = FunctionType::get(VoidTy, {Int8PtrTy}, true);
auto *CopyFnPtrTy = PointerType::getUnqual(CopyFnTy);
// unused -- auto *CopyFnTy = FunctionType::get(VoidTy, {Int8PtrTy}, true);
// unused -- auto *CopyFnPtrTy = PointerType::getUnqual(CopyFnTy);

auto *OutlinedFnTy = FunctionType::get(
VoidTy,
Expand Down Expand Up @@ -593,12 +593,12 @@ Function *llvm::OpenMPABI::createDetach(DetachInst &detach,
ValueToValueMapTy &DetachCtxToStackFrame,
DominatorTree &DT, AssumptionCache &AC) {
BasicBlock *detB = detach.getParent();
Function &F = *(detB->getParent());
// unused -- Function &F = *(detB->getParent());

BasicBlock *Spawned = detach.getDetached();
BasicBlock *Continue = detach.getContinue();

Module *M = F.getParent();
// unused -- Module *M = F.getParent();

CallInst *cal = nullptr;
Function *extracted = extractDetachBodyToFunction(detach, DT, AC, &cal);
Expand Down Expand Up @@ -676,7 +676,7 @@ void llvm::OpenMPABI::postProcessFunction(Function &F) {
}
}

for(int i=1; i<VisitedVec.size(); i++) {
for(unsigned int i=1; i<VisitedVec.size(); i++) {
for (auto P : predecessors(VisitedVec[i])) {
if (Visited.count(P) == 0) {
std::swap(VisitedVec[0], VisitedVec[i]);
Expand Down Expand Up @@ -803,6 +803,8 @@ void llvm::OpenMPABI::postProcessFunction(Function &F) {
OpenMPRuntimeFunction::OMPRTL__kmpc_fork_call, F.getParent());
// Replace the old call with __kmpc_fork_call
auto *ForkCall = emitRuntimeCall(ForkRTFn, OMPRegionFnArgs, "", b);
assert(ForkCall != 0); // play it safe -- something better to do here?

ExtractedFnCI->eraseFromParent();
RegionFn->eraseFromParent();
}
Expand Down
Loading

0 comments on commit 439e83f

Please sign in to comment.