[VP] IR support for vector-predicated integer operations

This patch is part of the integer patch set of the Vector Predication extension (D57504). VP / integer slice / patch #1
sx-aurora-dev · Nov 18, 2019 · 62c4a45 · 62c4a45
1 parent eedb964
commit 62c4a45
Show file tree

Hide file tree

Showing 11 changed files with 1,187 additions and 4 deletions.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -206,6 +206,48 @@ namespace llvm {
     /// @}
   };
 
+  /// This is the common base class for vector predication intrinsics.
+  class VPIntrinsic : public IntrinsicInst {
+  public:
+    static Optional<int> GetMaskParamPos(Intrinsic::ID IntrinsicID);
+    static Optional<int> GetVectorLengthParamPos(Intrinsic::ID IntrinsicID);
+
+    /// The llvm.vp.* intrinsics for this instruction Opcode
+    static Intrinsic::ID GetForOpcode(unsigned OC);
+
+    // Whether \p ID is a VP intrinsic ID.
+    static bool IsVPIntrinsic(Intrinsic::ID);
+
+    /// \return the mask parameter or nullptr.
+    Value *getMaskParam() const;
+
+    /// \return the vector length parameter or nullptr.
+    Value *getVectorLengthParam() const;
+
+    /// \return whether the vector length param can be ignored.
+    bool canIgnoreVectorLengthParam() const;
+
+    /// \return the static element count (vector number of elements) the vector
+    /// length parameter applies to.
+    ElementCount getVectorLength() const;
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static bool classof(const IntrinsicInst *I) {
+      return IsVPIntrinsic(I->getIntrinsicID());
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    // Equivalent non-predicated opcode
+    unsigned getFunctionalOpcode() const {
+      return GetFunctionalOpcodeForVP(getIntrinsicID());
+    }
+
+    // Equivalent non-predicated opcode
+    static unsigned GetFunctionalOpcodeForVP(Intrinsic::ID ID);
+  };
+
   /// This is the common base class for constrained floating point intrinsics.
   class ConstrainedFPIntrinsic : public IntrinsicInst {
   public:

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
@@ -27,6 +27,10 @@ class IntrinsicProperty;
 // effects.  It may be CSE'd deleted if dead, etc.
 def IntrNoMem : IntrinsicProperty;
 
+// IntrNoSync - Threads executing the intrinsic will not synchronize using
+// memory or other means.
+def IntrNoSync : IntrinsicProperty;
+
 // IntrReadMem - This intrinsic only reads from memory. It does not write to
 // memory and has no other side effects. Therefore, it cannot be moved across
 // potentially aliasing stores. However, it can be reordered otherwise and can
@@ -1099,6 +1103,79 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWil
 def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty],
                            [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
 
+//===---------------- Vector Predication Intrinsics --------------===//
+
+// Binary operators
+let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
+  def int_vp_add : Intrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_mul  : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ],
+                              [ LLVMMatchType<0>,
+                                LLVMMatchType<0>,
+                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                llvm_i32_ty]>;
+  def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_or : Intrinsic<[ llvm_anyvector_ty ],
+                            [ LLVMMatchType<0>,
+                              LLVMMatchType<0>,
+                              LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                              llvm_i32_ty]>;
+  def int_vp_and : Intrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+
+}
+
+
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,

diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -0,0 +1,84 @@
+//===-- IR/VPIntrinsics.def - Describes llvm.vp.* Intrinsics -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the various Vector Predication intrinsics.
+// This is used as a central place for enumerating the different instructions
+// and should eventually be the place to put comments about the instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+// Provide definitions of macros so that users of this file do not have to
+// define everything to use it...
+//
+#ifndef REGISTER_VP_INTRINSIC
+#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)
+#endif
+
+// Map this VP intrinsic to its functional Opcode
+#ifndef HANDLE_VP_TO_OC
+#define HANDLE_VP_TO_OC(VPID, OC)
+#endif
+
+///// Integer Arithmetic /////
+
+// llvm.vp.add(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_add, 2, 3)
+HANDLE_VP_TO_OC(vp_add, Add)
+
+// llvm.vp.and(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_and, 2, 3)
+HANDLE_VP_TO_OC(vp_and, And)
+
+// llvm.vp.ashr(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_ashr, 2, 3)
+HANDLE_VP_TO_OC(vp_ashr, AShr)
+
+// llvm.vp.lshr(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_lshr, 2, 3)
+HANDLE_VP_TO_OC(vp_lshr, LShr)
+
+// llvm.vp.mul(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_mul, 2, 3)
+HANDLE_VP_TO_OC(vp_mul, Mul)
+
+// llvm.vp.or(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_or, 2, 3)
+HANDLE_VP_TO_OC(vp_or, Or)
+
+// llvm.vp.sdiv(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3)
+HANDLE_VP_TO_OC(vp_sdiv, SDiv)
+
+// llvm.vp.shl(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_shl, 2, 3)
+HANDLE_VP_TO_OC(vp_shl, Shl)
+
+// llvm.vp.srem(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_srem, 2, 3)
+HANDLE_VP_TO_OC(vp_srem, SRem)
+
+// llvm.vp.sub(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_sub, 2, 3)
+HANDLE_VP_TO_OC(vp_sub, Sub)
+
+// llvm.vp.udiv(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_udiv, 2, 3)
+HANDLE_VP_TO_OC(vp_udiv, UDiv)
+
+// llvm.vp.urem(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_urem, 2, 3)
+HANDLE_VP_TO_OC(vp_urem, URem)
+
+// llvm.vp.xor(x,y,mask,vlen)
+REGISTER_VP_INTRINSIC(vp_xor, 2, 3)
+HANDLE_VP_TO_OC(vp_xor, Xor)
+
+#undef REGISTER_VP_INTRINSIC
+#undef HANDLE_VP_TO_OC
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
@@ -160,6 +160,119 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
   }
 }
 
+ElementCount VPIntrinsic::getVectorLength() const {
+  auto GetVectorLengthOfType = [](const Type *T) -> ElementCount {
+    auto VT = cast<VectorType>(T);
+    auto ElemCount = VT->getElementCount();
+    return ElemCount;
+  };
+
+  auto VPMask = getMaskParam();
+  return GetVectorLengthOfType(VPMask->getType());
+}
+
+Value *VPIntrinsic::getMaskParam() const {
+  auto maskPos = GetMaskParamPos(getIntrinsicID());
+  if (maskPos)
+    return getArgOperand(maskPos.getValue());
+  return nullptr;
+}
+
+Value *VPIntrinsic::getVectorLengthParam() const {
+  auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
+  if (vlenPos)
+    return getArgOperand(vlenPos.getValue());
+  return nullptr;
+}
+
+Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  default:
+    return None;
+
+#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)                          \
+  case Intrinsic::VPID:                                                        \
+    return MASKPOS;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
+Optional<int> VPIntrinsic::GetVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  default:
+    return None;
+
+#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)                          \
+  case Intrinsic::VPID:                                                        \
+    return VLENPOS;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
+bool VPIntrinsic::IsVPIntrinsic(Intrinsic::ID ID) {
+  switch (ID) {
+  default:
+    return false;
+
+#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)                          \
+  case Intrinsic::VPID:                                                        \
+    break;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+  return true;
+}
+
+// Equivalent non-predicated opcode
+unsigned VPIntrinsic::GetFunctionalOpcodeForVP(Intrinsic::ID ID) {
+  switch (ID) {
+  default:
+    return Instruction::Call;
+
+#define HANDLE_VP_TO_OC(VPID, OC)                                              \
+  case Intrinsic::VPID:                                                        \
+    return Instruction::OC;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
+Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned OC) {
+  switch (OC) {
+  default:
+    return Intrinsic::not_intrinsic;
+
+#define HANDLE_VP_TO_OC(VPID, OC)                                              \
+  case Instruction::OC:                                                        \
+    return Intrinsic::VPID;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
+bool VPIntrinsic::canIgnoreVectorLengthParam() const {
+  // No vlen param - no lanes masked-off by it.
+  auto *VLParam = getVectorLengthParam();
+  if (!VLParam)
+    return true;
+
+  // Can ignore if MSB of vlen is set.
+  auto VLConst = dyn_cast<ConstantInt>(VLParam);
+  if (VLConst && VLConst->getSExtValue() < 0)
+    return true;
+
+  // Vlen param greater-equal type vlen - no lanes masked-off.
+  if (VLConst) {
+    auto ElemCount = getVectorLength();
+    if (ElemCount.Scalable)
+      return false;
+
+    uint64_t VLNum = VLConst->getZExtValue();
+    if (VLNum >= ElemCount.Min)
+      return true;
+  }
+
+  // Cannot ignore vlen param by default.
+  return false;
+}
+
 Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
   switch (getIntrinsicID()) {
     case Intrinsic::uadd_with_overflow:

diff --git a/llvm/test/Verifier/vp-intrinsics.ll b/llvm/test/Verifier/vp-intrinsics.ll
@@ -0,0 +1,34 @@
+; RUN: opt --verify %s 
+
+define void @test_vp_int(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) {
+  %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) 
+  %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) 
+  %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
+  ret void
+}
+
+; integer arith
+declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+; bit arith
+declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) 
+declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) 
+declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_unittest(IRTests
   ValueTest.cpp
   VectorTypesTest.cpp
   VerifierTest.cpp
+  VPIntrinsicTest.cpp
   WaymarkTest.cpp
   )