Skip to content

Commit

Permalink
Add a load of comments
Browse files Browse the repository at this point in the history
  • Loading branch information
MacDue committed Feb 12, 2025
1 parent cc6fcd3 commit 8065001
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1473,11 +1473,13 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind,
unsigned Index = -1) const;

/* Downstream change: #87 (sincos vectorization)*/
/// \return The expected cost of aggregate inserts and extracts. This is
/// used when the instruction is not available; a typical use case is to
/// provision the cost of vectorization/scalarization in vectorizer passes.
InstructionCost getInsertExtractValueCost(unsigned Opcode,
TTI::TargetCostKind CostKind) const;
/* End downstream change: #87 */

/// \return The cost of replication shuffle of \p VF elements typed \p EltTy
/// \p ReplicationFactor times.
Expand Down Expand Up @@ -2211,8 +2213,10 @@ class TargetTransformInfo::Concept {
const APInt &DemandedDstElts,
TTI::TargetCostKind CostKind) = 0;

/* Downstream change: #87 (sincos vectorization)*/
virtual InstructionCost
getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) = 0;
/* End downstream change: #87 */

virtual InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
Expand Down Expand Up @@ -2935,11 +2939,13 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
DemandedDstElts, CostKind);
}
/* Downstream change: #87 (sincos vectorization)*/
InstructionCost
getInsertExtractValueCost(unsigned Opcode,
TTI::TargetCostKind CostKind) override {
return Impl.getInsertExtractValueCost(Opcode, CostKind);
}
/* End downstream change: #87 */
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,7 @@ class TargetTransformInfoImplBase {
return 1;
}

/* Downstream change: #87 (sincos vectorization)*/
InstructionCost
getInsertExtractValueCost(unsigned Opcode,
TTI::TargetCostKind CostKind) const {
Expand All @@ -755,6 +756,7 @@ class TargetTransformInfoImplBase {
return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
return TTI::TCC_Free;
}
/* End downstream change: #87 */

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
Expand Down Expand Up @@ -1309,9 +1311,11 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
case Instruction::Freeze:
return TTI::TCC_Free;
/* Downstream change: #87 (sincos vectorization)*/
case Instruction::ExtractValue:
case Instruction::InsertValue:
return TargetTTI->getInsertExtractValueCost(Opcode, CostKind);
/* End downstream change: #87 */
case Instruction::Alloca:
if (cast<AllocaInst>(U)->isStaticAlloca())
return TTI::TCC_Free;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,10 @@ class LoopVectorizationLegality {
/// has a vectorized variant available.
bool hasVectorCallVariants() const { return VecCallVariantsFound; }

/* Downstream change: #87 (sincos vectorization)*/
// Removed hasStructVectorCall()
/* End downstream change: #87 */

unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }

Expand Down Expand Up @@ -635,6 +639,10 @@ class LoopVectorizationLegality {
/// the use of those function variants.
bool VecCallVariantsFound = false;

/* Downstream change: #87 (sincos vectorization)*/
// Removed StructVecCallFound
/* End downstream change: #87 */

/// Keep track of all the countable and uncountable exiting blocks if
/// the exact backedge taken count is not computable.
SmallVector<BasicBlock *, 4> CountableExitingBlocks;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,7 @@ TargetTransformInfo::getVectorInstrCost(const Instruction &I, Type *Val,
return Cost;
}

/* Downstream change: #87 (sincos vectorization)*/
InstructionCost TargetTransformInfo::getInsertExtractValueCost(
unsigned Opcode, TTI::TargetCostKind CostKind) const {
assert((Opcode == Instruction::InsertValue ||
Expand All @@ -1122,6 +1123,7 @@ InstructionCost TargetTransformInfo::getInsertExtractValueCost(
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
/* End downstream change: #87 */

InstructionCost TargetTransformInfo::getReplicationShuffleCost(
Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI && !VFDatabase::getMappings(*CI).empty())
VecCallVariantsFound = true;

/* Downstream change: #87 (sincos vectorization)*/
auto CanWidenInstructionTy = [](Instruction const &Inst) {
Type *InstTy = Inst.getType();
if (!isa<StructType>(InstTy))
Expand All @@ -965,6 +966,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
all_of(Inst.users(), IsaPred<ExtractValueInst>);
};
/* End downstream change: #87 */

// Check that the instruction return type is vectorizable.
// We can't vectorize casts from vector type to scalar type.
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2350,9 +2350,11 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
VPReplicateRecipe *RepRecipe,
const VPLane &Lane,
VPTransformState &State) {
/* Downstream change: #87 (sincos vectorization)*/
assert((!Instr->getType()->isAggregateType() ||
canVectorizeTy(Instr->getType())) &&
"Expected vectorizable or non-aggregate type.");
/* End downstream change: #87 */

// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
Expand Down Expand Up @@ -2857,11 +2859,13 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
return ScalarCallCost;
}

/* Downstream change: #87 (sincos vectorization)*/
static Type *maybeVectorizeType(Type *Ty, ElementCount VF) {
if (VF.isScalar() || !canVectorizeTy(Ty))
return Ty;
return toVectorizedTy(Ty, VF);
}
/* End downstream change: #87 */

InstructionCost
LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
Expand Down Expand Up @@ -3607,6 +3611,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
}
}

/* Downstream change: #87 (sincos vectorization)*/
if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
if (IsOutOfScope(EVI->getAggregateOperand())) {
AddToWorklistIfAllowed(EVI);
Expand All @@ -3617,6 +3622,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
assert(isa<CallInst>(EVI->getAggregateOperand()) &&
"Expected aggregate value to be call return value");
}
/* End downstream change: #87 */

// If there's no pointer operand, there's nothing to do.
auto *Ptr = getLoadStorePointerOperand(&I);
Expand Down Expand Up @@ -4496,6 +4502,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
llvm_unreachable("unhandled recipe");
}

/* Downstream change: #87 (sincos vectorization)*/
auto WillGenerateTargetVectors = [&TTI, VF](Type *VectorTy) {
unsigned NumLegalParts = TTI.getNumberOfParts(VectorTy);
if (!NumLegalParts)
Expand All @@ -4511,6 +4518,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
// Two or more elements that share a register - are vectorized.
return NumLegalParts < VF.getKnownMinValue();
};
/* End downstream change: #87 */

// If no def nor is a store, e.g., branches, continue - no value to check.
if (R.getNumDefinedValues() == 0 &&
Expand All @@ -4528,8 +4536,10 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
if (!Visited.insert({ScalarTy}).second)
continue;
Type *WideTy = toVectorizedTy(ScalarTy, VF);
/* Downstream change: #87 (sincos vectorization)*/
if (any_of(getContainedTypes(WideTy), WillGenerateTargetVectors))
return true;
/* End downstream change: #87 */
}
}

Expand Down Expand Up @@ -5485,13 +5495,15 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
// Compute the scalarization overhead of needed insertelement instructions
// and phi nodes.
if (isScalarWithPredication(I, VF) && !I->getType()->isVoidTy()) {
/* Downstream change: #87 (sincos vectorization)*/
Type *WideTy = toVectorizedTy(I->getType(), VF);
for (Type *VectorTy : getContainedTypes(WideTy)) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
/*Insert=*/true,
/*Extract=*/false, CostKind);
}
/* End downstream change: #87 */
ScalarCost +=
VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
}
Expand All @@ -5502,6 +5514,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
// overhead.
for (Use &U : I->operands())
if (auto *J = dyn_cast<Instruction>(U.get())) {
/* Downstream change: #87 (sincos vectorization)*/
assert(canVectorizeTy(J->getType()) &&
"Instruction has non-scalar type");
if (CanBeScalarized(J))
Expand All @@ -5515,6 +5528,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
/*Extract*/ true, CostKind);
}
}
/* End downstream change: #87 */
}

// Scale the total scalar cost by block probability.
Expand Down Expand Up @@ -5992,6 +6006,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
return 0;

InstructionCost Cost = 0;
/* Downstream change: #87 (sincos vectorization)*/
Type *RetTy = toVectorizedTy(I->getType(), VF);
if (!RetTy->isVoidTy() &&
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore())) {
Expand All @@ -6003,6 +6018,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
/*Extract=*/false, CostKind);
}
}
/* End downstream change: #87 */

// Some targets keep addresses scalar.
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
Expand Down Expand Up @@ -6260,9 +6276,11 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {

bool MaskRequired = Legal->isMaskRequired(CI);
// Compute corresponding vector type for return value and arguments.
/* Downstream change: #87 (sincos vectorization)*/
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
for (Type *ScalarTy : ScalarTys)
Tys.push_back(toVectorizedTy(ScalarTy, VF));
/* End downstream change: #87 */

// An in-loop reduction using an fmuladd intrinsic is a special case;
// we don't want the normal cost for that intrinsic.
Expand Down Expand Up @@ -6452,6 +6470,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
HasSingleCopyAfterVectorization(I, VF));
VectorTy = RetTy;
} else
// Downstream change: #87 (sincos vectorization)
VectorTy = toVectorizedTy(RetTy, VF);

if (VF.isVector() && VectorTy->isVectorTy() &&
Expand Down Expand Up @@ -8600,6 +8619,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
}
return new VPWidenRecipe(*I, make_range(NewOps.begin(), NewOps.end()));
}
/* Downstream change: #87 (sincos vectorization)*/
case Instruction::ExtractValue: {
SmallVector<VPValue *> NewOps(Operands);
Type *I32Ty = IntegerType::getInt32Ty(I->getContext());
Expand All @@ -8609,6 +8629,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
NewOps.push_back(Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, Idx, false)));
return new VPWidenRecipe(*I, make_range(NewOps.begin(), NewOps.end()));
}
/* End downstream change: #87 */
};
}

Expand Down Expand Up @@ -9889,6 +9910,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
VectorType::get(UI->getType(), State.VF));
State.set(this, Poison);
}
// Downstream change: #87 (sincos vectorization)
State.packScalarIntoVectorizedValue(this, *State.Lane);
}
return;
Expand Down Expand Up @@ -10406,6 +10428,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

/* Downstream change: #87 (sincos vectorization)*/
// Remove StructCallVectorizationUnsupported failure.
/* End downstream change: #87 */

// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,12 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
/* Downstream change: #87 (sincos vectorization)*/
Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorizedValue(Def, Lane);
/* End downstream change: #87 */
VectorValue = get(Def);
}
Builder.restoreIP(OldIP);
Expand Down Expand Up @@ -390,6 +392,7 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) {
Builder.SetCurrentDebugLocation(DIL);
}

/* Downstream change: #87 (sincos vectorization)*/
void VPTransformState::packScalarIntoVectorizedValue(VPValue *Def,
const VPLane &Lane) {
Value *ScalarInst = get(Def, Lane);
Expand All @@ -409,6 +412,7 @@ void VPTransformState::packScalarIntoVectorizedValue(VPValue *Def,
}
set(Def, WideValue);
}
/* End downstream change: #87 */

BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ struct VPTransformState {
set(Def, V, VPLane(0));
return;
}
// Downstream change: #87 (sincos vectorization)
assert((VF.isScalar() || isVectorizedTy(V->getType())) &&
"scalar values must be stored as (0, 0)");
Data.VPV2Vector[Def] = V;
Expand Down Expand Up @@ -325,9 +326,11 @@ struct VPTransformState {
/// Set the debug location in the builder using the debug location \p DL.
void setDebugLocFrom(DebugLoc DL);

/* Downstream change: #87 (sincos vectorization)*/
/// Construct the vectorized value of a scalarized value \p V one lane at a
/// time.
void packScalarIntoVectorizedValue(VPValue *Def, const VPLane &Lane);
/* End downstream change: #87 */

/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,14 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
case Instruction::FNeg:
case Instruction::Freeze:
return inferScalarType(R->getOperand(0));
/* Downstream change: #87 (sincos vectorization)*/
case Instruction::ExtractValue: {
assert(R->getNumOperands() == 2 && "expected single level extractvalue");
auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
return StructTy->getTypeAtIndex(CI->getZExtValue());
}
/* End downstream change: #87 */
default:
break;
}
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
Arguments.push_back(V);
}

// Downstream change: #87 (sincos vectorization)
Type *RetTy = toVectorizedTy(Ctx.Types.inferScalarType(this), VF);
SmallVector<Type *> ParamTys;
for (unsigned I = 0; I != getNumOperands(); ++I)
Expand Down Expand Up @@ -1422,6 +1423,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
break;
}
/* Downstream change: #87 (sincos vectorization)*/
case Instruction::ExtractValue: {
assert(getNumOperands() == 2 && "expected single level extractvalue");
Value *Op = State.get(getOperand(0));
Expand All @@ -1430,6 +1432,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
State.set(this, Extract);
break;
}
/* End downstream change: #87 */
case Instruction::Freeze: {
Value *Op = State.get(getOperand(0));

Expand Down Expand Up @@ -1531,10 +1534,12 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
Ctx.CostKind);
}
/* Downstream change: #87 (sincos vectorization)*/
case Instruction::ExtractValue: {
return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
Ctx.CostKind);
}
/* End downstream change: #87 */
case Instruction::ICmp:
case Instruction::FCmp: {
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
Expand Down

0 comments on commit 8065001

Please sign in to comment.