Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

End to end support for bfp16 scl2vec intrinsics #278

Open
wants to merge 2 commits into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/BuiltinsAIE2P.def
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,6 @@ BUILTIN(__builtin_aie2p_tanh, "V16yV16g", "nc")

//division/mod
BUILTIN(__builtin_aie2p_divstep, "vUi&Ui&Ui", "nc")

// SHUFFLE
BUILTIN(__builtin_aie2p_vshuffle_576_bfp16, "vV64cV8cV64cV8ciV64c&V8c&", "nc")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as for the llvm intrinsic, we don't need the 576 in the name

29 changes: 28 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22048,6 +22048,8 @@ static llvm::Intrinsic::ID getAIE2PIntrinsicFunction(unsigned BuiltinID) {
return Intrinsic::aie2p_vsub_lt32;
case AIE::BI__builtin_aie2p_divstep:
return Intrinsic::aie2p_divs;
case AIE::BI__builtin_aie2p_vshuffle_576_bfp16:
return Intrinsic::aie2p_vshuffle_576_bfp16;
default:
break;
}
Expand Down Expand Up @@ -22274,6 +22276,30 @@ Value *CodeGenFunction::EmitAIEBuiltinExpr(unsigned BuiltinID,
Value *DivAddr = EmitLValue(E->getArg(1)).getPointer(*this);
return Builder.CreateDefaultAlignedStore(Div, DivAddr);
}
case AIE::BI__builtin_aie2p_vshuffle_576_bfp16: {
SmallVector<Value *, 3> Ops;
for (unsigned I = 0; I < E->getNumArgs() - 2; I++)
Ops.push_back(EmitScalarExpr(E->getArg(I)));

llvm::Intrinsic::ID IntrinsicID = getAIEIntrinsicFunction(BuiltinID, Arch);
assert(IntrinsicID != Intrinsic::not_intrinsic);
Function *F = CGM.getIntrinsic(IntrinsicID);
Value *Val = Builder.CreateCall(F, Ops);

// The first member of the returned struct is the mantissa part of bfp16,
// store it to the first input reference
Value *Mant = Builder.CreateExtractValue(Val, 0);
Value *MantAddr =
EmitLValue(E->getArg(E->getNumArgs() - 2)).getPointer(*this);
Builder.CreateDefaultAlignedStore(Mant, MantAddr);

// The second member of the returned struct is the exponent part of bfp16
// store it to the second input reference
Value *Exp = Builder.CreateExtractValue(Val, 1);
Value *ExpAddr =
EmitLValue(E->getArg(E->getNumArgs() - 1)).getPointer(*this);
return Builder.CreateDefaultAlignedStore(Exp, ExpAddr);
}
default:
break;
}
Expand Down Expand Up @@ -22452,7 +22478,8 @@ Value *CodeGenFunction::EmitAIE2PBuiltinExpr(unsigned BuiltinID,
case AIE::BI__builtin_aie2p_vsub_lt8:
case AIE::BI__builtin_aie2p_vsub_lt16:
case AIE::BI__builtin_aie2p_vsub_lt32:
case AIE::BI__builtin_aie2p_divstep: {
case AIE::BI__builtin_aie2p_divstep:
case AIE::BI__builtin_aie2p_vshuffle_576_bfp16: {
return this->EmitAIEBuiltinExpr(BuiltinID, E, Arch);
}
default:
Expand Down
4 changes: 0 additions & 4 deletions clang/lib/Headers/aie2p_aie_api_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,10 +812,6 @@ inline __attribute__((always_inline)) v256uint8_sparse shuffle(v256uint8_sparse
inline __attribute__((always_inline)) v128uint16_sparse shuffle(v128uint16_sparse , int );
inline __attribute__((always_inline)) v128uint8_sparse shuffle(v128uint8_sparse , int );
inline __attribute__((always_inline)) v64uint16_sparse shuffle(v64uint16_sparse , int );
inline __attribute__((always_inline)) v64bfp16ebs8 shuffle(v64bfp16ebs8 , v64bfp16ebs8 , unsigned int );
inline __attribute__((always_inline)) v64bfp16ebs16 shuffle(v64bfp16ebs16 , v64bfp16ebs16 , unsigned int );
inline __attribute__((always_inline)) v64bfp16ebs8 shuffle(v64bfp16ebs8 , unsigned int );
inline __attribute__((always_inline)) v64bfp16ebs16 shuffle(v64bfp16ebs16 , unsigned int );
inline __attribute__((always_inline)) v128bfp16ebs8 shuffle(v128bfp16ebs8 , unsigned int );
inline __attribute__((always_inline)) v128bfp16ebs16_sparse shuffle(v128bfp16ebs16_sparse , unsigned int );
inline __attribute__((always_inline)) v256bfp16ebs16_sparse shuffle(v256bfp16ebs16_sparse , unsigned int );
Expand Down
28 changes: 28 additions & 0 deletions clang/lib/Headers/aie2p_scl2vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -1806,4 +1806,32 @@ INTRINSIC(v64accfloat) broadcast_zero_to_v64accfloat() {
return __builtin_bit_cast(v64accfloat, (float)0 - v64float{0});
}

INTRINSIC(v64bfp16ebs8)
shuffle(v64bfp16ebs8 a, v64bfp16ebs8 b, unsigned int mode) {
v64bfp16ebs8 res;
__builtin_aie2p_vshuffle_576_bfp16(a.mantissa, a.exponent, b.mantissa,
b.exponent, mode, (v64char &)res.mantissa,
(v8char &)res.exponent);
return res;
}

INTRINSIC(v64bfp16ebs16)
shuffle(v64bfp16ebs16 a, v64bfp16ebs16 b, unsigned int mode) {
v64bfp16ebs16 res;
__builtin_aie2p_vshuffle_576_bfp16(a.mantissa, a.exponent, b.mantissa,
b.exponent, mode, (v64char &)res.mantissa,
(v8char &)res.exponent);
return res;
}

INTRINSIC(v64bfp16ebs8) shuffle(v64bfp16ebs8 a, unsigned mode) {
v64bfp16ebs8 unDef;
return shuffle(a, unDef, mode);
}

INTRINSIC(v64bfp16ebs16) shuffle(v64bfp16ebs16 a, unsigned mode) {
v64bfp16ebs16 unDef;
return shuffle(a, unDef, mode);
}

#endif /*__AIEV2_SCL2VEC_H__*/
3 changes: 3 additions & 0 deletions clang/lib/Headers/aiebase_typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ typedef int16_t v32int16 __attribute__((__vector_size__(64)))
__attribute__((aligned(__MIN_ALIGNMENT)));
typedef int8_t v64int8 __attribute__((__vector_size__(64)))
__attribute__((aligned(__MIN_ALIGNMENT)));
typedef char v64char __attribute__((__vector_size__(64)))
__attribute__((aligned(__MIN_ALIGNMENT)));
typedef uint32_t v8uint64 __attribute__((__vector_size__(64)))
__attribute__((aligned(__MIN_ALIGNMENT)));
typedef uint32_t v16uint32 __attribute__((__vector_size__(64)))
Expand Down Expand Up @@ -246,6 +248,7 @@ typedef int16_t v4int16 __attribute__((__vector_size__(8)));
typedef uint16_t v4uint16 __attribute__((__vector_size__(8)));
typedef uint8_t v8uint8 __attribute__((__vector_size__(8)));
typedef int8_t v8int8 __attribute__((__vector_size__(8)));
typedef char v8char __attribute__((__vector_size__(8)));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for these two types, just use v64int8 and v8int8?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are defined in the global header now as V8c and V64c. Could you use just one or the other after rebasing?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And they are need to be able to cast as the builtins take a v64c (a vector of char) and we define v64int8/v8int8 as a vectors of int8_t

typedef buint8_t v16uint4 __attribute__((__vector_size__(8)));
typedef bint8_t v16int4 __attribute__((__vector_size__(8)));
/* vector types */
Expand Down
75 changes: 70 additions & 5 deletions clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -212,7 +212,6 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){
v128uint4 test_upd_elem(v128uint4 v, int idx, v2uint4 b) {
return upd_elem(v, idx, b);
}
//
// AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_DU8_iDv2_S_(
// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIE2P-NEXT: entry:
Expand All @@ -233,7 +232,6 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){
v128uint4 test_upd_elem(v128uint4 v, int idx, v8uint4 b) {
return upd_elem(v, idx, b);
}
//
// AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_DU8_iDv8_S_(
// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIE2P-NEXT: entry:
Expand All @@ -260,7 +258,6 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){
v64uint8 test_upd_elem(v64uint8 v, int idx, unsigned char b) {
return upd_elem(v, idx, b);
}
//
// AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_hiDv2_h(
// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIE2P-NEXT: entry:
Expand Down Expand Up @@ -332,7 +329,6 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){
v32uint16 test_upd_elem(v32uint16 v, int idx, v4uint16 b) {
return upd_elem(v, idx, b);
}
//
// AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_jij(
// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIE2P-NEXT: entry:
Expand Down Expand Up @@ -624,6 +620,7 @@ v16int32 test_shuffle_u64(mask64 b, unsigned int m) {
return shuffle_u64(b, m);
}

//
// AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z11test_insertDv16_iiDv2_j(
// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
// AIE2P-NEXT: entry:
Expand Down Expand Up @@ -729,3 +726,71 @@ v64accfloat
test_broadcast_zero_to_v64accfloat() {
return broadcast_zero_to_v64accfloat();
}

// AIE2P-LABEL: define dso_local %struct.v64bfp16ebs8 @_Z12shuffle_test12v64bfp16ebs8S_j(
// AIE2P-SAME: [[STRUCT_V64BFP16EBS8:%.*]] [[A_COERCE:%.*]], [[STRUCT_V64BFP16EBS8]] [[B_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR1]] {
// AIE2P-NEXT: entry:
// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0
// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1
// AIE2P-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 0
// AIE2P-NEXT: [[B_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 1
// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I]], <64 x i8> [[B_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT_I]], i32 [[MODE]])
// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0
// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1
// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP1]], 0
// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP2]], 1
// AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS8]] [[DOTFCA_1_INSERT_I]]
//
v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, v64bfp16ebs8 b, unsigned int mode) {
return shuffle(a, b, mode);
}

// AIE2P-LABEL: define dso_local %struct.v64bfp16ebs16 @_Z12shuffle_test13v64bfp16ebs16S_j(
// AIE2P-SAME: [[STRUCT_V64BFP16EBS16:%.*]] [[A_COERCE:%.*]], [[STRUCT_V64BFP16EBS16]] [[B_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR1]] {
// AIE2P-NEXT: entry:
// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0
// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1
// AIE2P-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 0
// AIE2P-NEXT: [[B_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 1
// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I]], <64 x i8> [[B_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT_I]], i32 [[MODE]])
// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0
// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1
// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP1]], 0
// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP2]], 1
// AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS16]] [[DOTFCA_1_INSERT_I]]
//
v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, v64bfp16ebs16 b, unsigned int mode) {
return shuffle(a, b, mode);
}

// AIE2P-LABEL: define dso_local %struct.v64bfp16ebs8 @_Z12shuffle_test12v64bfp16ebs8j(
// AIE2P-SAME: [[STRUCT_V64BFP16EBS8:%.*]] [[A_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR1]] {
// AIE2P-NEXT: entry:
// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0
// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1
// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I_I]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]])
// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0
// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1
// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP1]], 0
// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP2]], 1
// AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS8]] [[DOTFCA_1_INSERT_I_I]]
//
v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, unsigned mode) {
return shuffle(a ,mode);
}

// AIE2P-LABEL: define dso_local %struct.v64bfp16ebs16 @_Z12shuffle_test13v64bfp16ebs16j(
// AIE2P-SAME: [[STRUCT_V64BFP16EBS16:%.*]] [[A_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR1]] {
// AIE2P-NEXT: entry:
// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0
// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1
// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I_I]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]])
// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0
// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1
// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP1]], 0
// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP2]], 1
// AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS16]] [[DOTFCA_1_INSERT_I_I]]
//
v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, unsigned mode) {
return shuffle(a, mode);
}
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAIE2P.td
Original file line number Diff line number Diff line change
Expand Up @@ -572,4 +572,10 @@ def int_aie2p_sqrtf : ClangBuiltin<"__builtin_aie2p_sqrtf">, AIE2PNLF;
// DIVS
def int_aie2p_divs : AIE2PDIVS;

// BFP16 MAC MUL
class AIE2PSHUFFLEBFP16
: Intrinsic<[llvm_v64i8_ty, llvm_v8i8_ty], [llvm_v64i8_ty, llvm_v8i8_ty, llvm_v64i8_ty, llvm_v8i8_ty, llvm_i32_ty],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should use DefaultAttrsIntrinsic instead of Intrinsic

[IntrNoMem]>;
def int_aie2p_vshuffle_576_bfp16 : AIE2PSHUFFLEBFP16;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This selects to the same instruction whether we come from v64bfp16ebs8 (aka 576 size) or v64bfp16ebs16 (aka 544 size) so I would just name it aie2p_vshuffle_bfp16


} // TargetPrefix = "aie2p"
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/AIE2RegisterBanks.td
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
//===----------------------------------------------------------------------===//

include "AIEBaseRegisterBanks.td"
def GPRRegBank : RegisterBank<"GPRRegBank", [eR, eL]>;
def AccRegBank : RegisterBank<"AccRegBank", [ACC256, ACC512, ACC1024]>;
14 changes: 9 additions & 5 deletions llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,15 @@ bool AIEBaseInstructionSelector::selectG_IMPLICIT_DEF(
// Make sure no input operands are passed to IMPLICIT_DEF
while (I.getNumOperands() > 1)
I.removeOperand(1);
const MachineOperand &DstOp = I.getOperand(0);
const RegisterBank &RB = *RBI.getRegBank(DstOp.getReg(), MRI, TRI);
const TargetRegisterClass &RC =
TRI.getMinClassForRegBank(RB, MRI.getType(DstOp.getReg()));
return RBI.constrainGenericRegister(DstOp.getReg(), RC, MRI);
const Register DstReg = I.getOperand(0).getReg();
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(DstReg);
const TargetRegisterClass *DstRC =
RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
if (!DstRC) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, if this is needed, could you put it in its own commit along with the tests that it affects?

const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
DstRC = &TRI.getMinClassForRegBank(RB, MRI.getType(DstReg));
}
return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
}

bool AIEBaseInstructionSelector::selectG_PHI(MachineInstr &I,
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AIE/AIEBaseRegisterBanks.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//

def GPRRegBank : RegisterBank<"GPRRegBank", [eR, eL]>;
def PTRRegBank : RegisterBank<"PTRRegBank", [eP]>;
def MODRegBank : RegisterBank<"MODRegBank", [mDm]>;
def VRegBank : RegisterBank<"VRegBank", [VEC128, VEC256, VEC512, VEC1024]>;
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ unsigned AIE2PInstrInfo::getOpCode(MachineInstr &I) const {
return isSigned ? AIE2P::VUNPACK_mv_unpack_x_unpackSign1
: AIE2P::VUNPACK_mv_unpack_x_unpackSign0;
}
case Intrinsic::aie2p_vshuffle_576_bfp16:
return AIE2P::VSHUFFLE_vec_shuffle_ex;
default:
llvm_unreachable("Unexpected Intrinsic ID");
}
Expand Down
55 changes: 55 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class AIE2PInstructionSelector : public AIEBaseInstructionSelector {
bool selectReadTM(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVUNPACK(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVPACK(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVSHUFFLE_BFP(MachineInstr &I, MachineRegisterInfo &MRI);

private:
bool selectImpl(MachineInstr &I,
Expand Down Expand Up @@ -253,6 +254,8 @@ bool AIE2PInstructionSelector::select(MachineInstr &I) {
case Intrinsic::aie2p_v16bf16_to_v16accfloat:
case Intrinsic::aie2p_v32bf16_to_v32accfloat:
return selectVCONV(I, MRI);
case Intrinsic::aie2p_vshuffle_576_bfp16:
return selectVSHUFFLE_BFP(I, MRI);
default:
return selectImpl(I, *CoverageInfo);
}
Expand Down Expand Up @@ -2910,6 +2913,58 @@ AIE2PInstructionSelector::getCombinedOpcodeSRSUPS(
return {};
}

bool AIE2PInstructionSelector ::selectVSHUFFLE_BFP(MachineInstr &I,
MachineRegisterInfo &MRI) {
Register DstMant = I.getOperand(0).getReg();
Register DstExp = I.getOperand(1).getReg();
Register Src1Mant = I.getOperand(3).getReg();
Register Src1Exp = I.getOperand(4).getReg();
Register Src2Mant = I.getOperand(5).getReg();
Register Src2Exp = I.getOperand(6).getReg();
Register Mode = I.getOperand(7).getReg();

unsigned OpCode = TII.getOpCode(I);
Register Src1Reg = MRI.createVirtualRegister(&AIE2P::mEXmRegClass);
MachineInstrBuilder RegSeq1 =
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {Src1Reg}, {})
.addReg(Src1Mant)
.addImm(AIE2P::sub_bfp16_x)
.addReg(Src1Exp)
.addImm(AIE2P::sub_bfp16_e);
Register Src2Reg = MRI.createVirtualRegister(&AIE2P::mEXnRegClass);
MachineInstrBuilder RegSeq2 =
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {Src2Reg}, {})
.addReg(Src2Mant)
.addImm(AIE2P::sub_bfp16_x)
.addReg(Src2Exp)
.addImm(AIE2P::sub_bfp16_e);
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq1,
AIE2P::VEC512RegClass, RegSeq1->getOperand(1));
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq1,
AIE2P::EXPVEC64RegClass, RegSeq1->getOperand(3));
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq2,
AIE2P::VEC512RegClass, RegSeq2->getOperand(1));
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *RegSeq2,
AIE2P::EXPVEC64RegClass, RegSeq2->getOperand(3));

Register DstReg = MRI.createVirtualRegister(&AIE2P::mEXmRegClass);
MachineInstrBuilder MI =
MIB.buildInstr(OpCode, {DstReg}, {Src1Reg, Src2Reg, Mode});

auto MantCopyMI = MIB.buildInstr(TargetOpcode::COPY, {DstMant}, {})
.addReg(DstReg, 0, AIE2P::sub_bfp16_x);
auto ExpCopyMI = MIB.buildInstr(TargetOpcode::COPY, {DstExp}, {})
.addReg(DstReg, 0, AIE2P::sub_bfp16_e);
constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *MantCopyMI,
AIE2P::VEC512RegClass, MantCopyMI->getOperand(0));

constrainOperandRegClass(*MF, TRI, MRI, TII, RBI, *ExpCopyMI,
AIE2P::EXPVEC64RegClass, ExpCopyMI->getOperand(0));

I.eraseFromParent();
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
}

namespace llvm {
InstructionSelector *
createAIE2PInstructionSelector(const AIE2PTargetMachine &TM,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PRegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,9 @@ AIE2PRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AIE2P::eSRegClassID:
case AIE2P::mS2RegClassID:
case AIE2P::mS3RegClassID:
case AIE2P::EXPVEC64RegClassID:
case AIE2P::EXPVEC64_with_sub_hi_exp_in_eEheRegClassID:
case AIE2P::EXPVEC64_with_sub_hi_exp_in_eEhoRegClassID:
return GPRs;
case AIE2P::ePRegClassID:
case AIE2P::eSpecial20RegClassID:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PRegisterBanks.td
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@

include "AIEBaseRegisterBanks.td"
def AccRegBank : RegisterBank<"AccRegBank", [ACC512, ACC1024, ACC2048]>;
def GPRRegBank : RegisterBank<"GPRRegBank", [eR, eL, eE, EXPVEC64]>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, include a new line in the end.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not be needed after rebase

Loading
Loading