Skip to content

Commit

Permalink
[VP] IR support for vector-predicated integer operations
Browse files Browse the repository at this point in the history
This patch is part of the integer patch set of the Vector Predication
extension (D57504).

VP / integer slice / patch #1
  • Loading branch information
simoll committed Nov 18, 2019
1 parent eedb964 commit 62c4a45
Show file tree
Hide file tree
Showing 11 changed files with 1,187 additions and 4 deletions.
668 changes: 668 additions & 0 deletions llvm/docs/LangRef.rst

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions llvm/include/llvm/IR/IntrinsicInst.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,48 @@ namespace llvm {
/// @}
};

/// This is the common base class for vector predication intrinsics.
class VPIntrinsic : public IntrinsicInst {
public:
static Optional<int> GetMaskParamPos(Intrinsic::ID IntrinsicID);
static Optional<int> GetVectorLengthParamPos(Intrinsic::ID IntrinsicID);

/// The llvm.vp.* intrinsics for this instruction Opcode
static Intrinsic::ID GetForOpcode(unsigned OC);

// Whether \p ID is a VP intrinsic ID.
static bool IsVPIntrinsic(Intrinsic::ID);

/// \return the mask parameter or nullptr.
Value *getMaskParam() const;

/// \return the vector length parameter or nullptr.
Value *getVectorLengthParam() const;

/// \return whether the vector length param can be ignored.
bool canIgnoreVectorLengthParam() const;

/// \return the static element count (vector number of elements) the vector
/// length parameter applies to.
ElementCount getVectorLength() const;

// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return IsVPIntrinsic(I->getIntrinsicID());
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}

// Equivalent non-predicated opcode
unsigned getFunctionalOpcode() const {
return GetFunctionalOpcodeForVP(getIntrinsicID());
}

// Equivalent non-predicated opcode
static unsigned GetFunctionalOpcodeForVP(Intrinsic::ID ID);
};

/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
Expand Down
77 changes: 77 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class IntrinsicProperty;
// effects. It may be CSE'd deleted if dead, etc.
def IntrNoMem : IntrinsicProperty;

// IntrNoSync - Threads executing the intrinsic will not synchronize using
// memory or other means.
def IntrNoSync : IntrinsicProperty;

// IntrReadMem - This intrinsic only reads from memory. It does not write to
// memory and has no other side effects. Therefore, it cannot be moved across
// potentially aliasing stores. However, it can be reordered otherwise and can
Expand Down Expand Up @@ -1099,6 +1103,79 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWil
def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;

//===---------------- Vector Predication Intrinsics --------------===//

// Binary operators
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;

}


//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
Expand Down
84 changes: 84 additions & 0 deletions llvm/include/llvm/IR/VPIntrinsics.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
//===-- IR/VPIntrinsics.def - Describes llvm.vp.* Intrinsics -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains descriptions of the various Vector Predication intrinsics.
// This is used as a central place for enumerating the different instructions
// and should eventually be the place to put comments about the instructions.
//
//===----------------------------------------------------------------------===//

// NOTE: NO INCLUDE GUARD DESIRED!

// Provide definitions of macros so that users of this file do not have to
// define everything to use it...
//
#ifndef REGISTER_VP_INTRINSIC
#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
#endif

// Map this VP intrinsic to its functional Opcode
#ifndef HANDLE_VP_TO_OC
#define HANDLE_VP_TO_OC(VPID, OC)
#endif

///// Integer Arithmetic /////

// llvm.vp.add(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_add, 2, 3)
HANDLE_VP_TO_OC(vp_add, Add)

// llvm.vp.and(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_and, 2, 3)
HANDLE_VP_TO_OC(vp_and, And)

// llvm.vp.ashr(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
HANDLE_VP_TO_OC(vp_ashr, AShr)

// llvm.vp.lshr(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
HANDLE_VP_TO_OC(vp_lshr, LShr)

// llvm.vp.mul(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
HANDLE_VP_TO_OC(vp_mul, Mul)

// llvm.vp.or(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_or, 2, 3)
HANDLE_VP_TO_OC(vp_or, Or)

// llvm.vp.sdiv(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
HANDLE_VP_TO_OC(vp_sdiv, SDiv)

// llvm.vp.shl(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
HANDLE_VP_TO_OC(vp_shl, Shl)

// llvm.vp.srem(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
HANDLE_VP_TO_OC(vp_srem, SRem)

// llvm.vp.sub(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
HANDLE_VP_TO_OC(vp_sub, Sub)

// llvm.vp.udiv(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
HANDLE_VP_TO_OC(vp_udiv, UDiv)

// llvm.vp.urem(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
HANDLE_VP_TO_OC(vp_urem, URem)

// llvm.vp.xor(x,y,mask,vlen)
REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
HANDLE_VP_TO_OC(vp_xor, Xor)

#undef REGISTER_VP_INTRINSIC
#undef HANDLE_VP_TO_OC
113 changes: 113 additions & 0 deletions llvm/lib/IR/IntrinsicInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,119 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
}
}

ElementCount VPIntrinsic::getVectorLength() const {
auto GetVectorLengthOfType = [](const Type *T) -> ElementCount {
auto VT = cast<VectorType>(T);
auto ElemCount = VT->getElementCount();
return ElemCount;
};

auto VPMask = getMaskParam();
return GetVectorLengthOfType(VPMask->getType());
}

Value *VPIntrinsic::getMaskParam() const {
auto maskPos = GetMaskParamPos(getIntrinsicID());
if (maskPos)
return getArgOperand(maskPos.getValue());
return nullptr;
}

Value *VPIntrinsic::getVectorLengthParam() const {
auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
if (vlenPos)
return getArgOperand(vlenPos.getValue());
return nullptr;
}

Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
default:
return None;

#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
return MASKPOS;
#include "llvm/IR/VPIntrinsics.def"
}
}

Optional<int> VPIntrinsic::GetVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
default:
return None;

#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
return VLENPOS;
#include "llvm/IR/VPIntrinsics.def"
}
}

bool VPIntrinsic::IsVPIntrinsic(Intrinsic::ID ID) {
switch (ID) {
default:
return false;

#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
break;
#include "llvm/IR/VPIntrinsics.def"
}
return true;
}

// Equivalent non-predicated opcode
unsigned VPIntrinsic::GetFunctionalOpcodeForVP(Intrinsic::ID ID) {
switch (ID) {
default:
return Instruction::Call;

#define HANDLE_VP_TO_OC(VPID, OC) \
case Intrinsic::VPID: \
return Instruction::OC;
#include "llvm/IR/VPIntrinsics.def"
}
}

Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned OC) {
switch (OC) {
default:
return Intrinsic::not_intrinsic;

#define HANDLE_VP_TO_OC(VPID, OC) \
case Instruction::OC: \
return Intrinsic::VPID;
#include "llvm/IR/VPIntrinsics.def"
}
}

bool VPIntrinsic::canIgnoreVectorLengthParam() const {
// No vlen param - no lanes masked-off by it.
auto *VLParam = getVectorLengthParam();
if (!VLParam)
return true;

// Can ignore if MSB of vlen is set.
auto VLConst = dyn_cast<ConstantInt>(VLParam);
if (VLConst && VLConst->getSExtValue() < 0)
return true;

// Vlen param greater-equal type vlen - no lanes masked-off.
if (VLConst) {
auto ElemCount = getVectorLength();
if (ElemCount.Scalable)
return false;

uint64_t VLNum = VLConst->getZExtValue();
if (VLNum >= ElemCount.Min)
return true;
}

// Cannot ignore vlen param by default.
return false;
}

Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
switch (getIntrinsicID()) {
case Intrinsic::uadd_with_overflow:
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/Verifier/vp-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; RUN: opt --verify %s

define void @test_vp_int(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) {
%r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
ret void
}

; integer arith
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
; bit arith
declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
1 change: 1 addition & 0 deletions llvm/unittests/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ add_llvm_unittest(IRTests
ValueTest.cpp
VectorTypesTest.cpp
VerifierTest.cpp
VPIntrinsicTest.cpp
WaymarkTest.cpp
)

Expand Down
Loading

0 comments on commit 62c4a45

Please sign in to comment.