-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Support fptoi like ops for fp16 vectors input when only have Zvfhmin #67532
Conversation
…Zvfhmin This patch supports FP_TO_SINT, FP_TO_UINT, VP_FP_TO_SINT and VP_FP_TO_UINT for fp16 vectors input when we only have Zvfhmin but no Zvfh.
@llvm/pr-subscribers-backend-risc-v ChangesThis patch supports FP_TO_SINT, FP_TO_UINT, VP_FP_TO_SINT and VP_FP_TO_UINT for fp16 vectors input when we only have Zvfhmin but no Zvfh. Patch is 113.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67532.diff 11 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2b8e5aeeb86405a..e16c4ec8e3d472c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5732,6 +5732,22 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
[[fallthrough]];
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ if (SDValue Op1 = Op.getOperand(0);
+ Op1.getValueType().isVector() &&
+ Op1.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op1.getValueType() == MVT::nxv32f16)
+ return SplitVectorOp(Op, DAG);
+ // f16 -> f32
+ SDLoc DL(Op);
+ MVT NVT = MVT::getVectorVT(MVT::f32,
+ Op1.getValueType().getVectorElementCount());
+ SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
+ // f32 -> int
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
+ }
+ [[fallthrough]];
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_SINT_TO_FP:
@@ -6278,6 +6294,22 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
[[fallthrough]];
case ISD::VP_FP_TO_SINT:
case ISD::VP_FP_TO_UINT:
+ if (SDValue Op1 = Op.getOperand(0);
+ Op1.getValueType().isVector() &&
+ Op1.getValueType().getScalarType() == MVT::f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16())) {
+ if (Op1.getValueType() == MVT::nxv32f16)
+ return SplitVPOp(Op, DAG);
+ // f16 -> f32
+ SDLoc DL(Op);
+ MVT NVT = MVT::getVectorVT(MVT::f32,
+ Op1.getValueType().getVectorElementCount());
+ SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
+ // f32 -> int
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ {WidenVec, Op.getOperand(1), Op.getOperand(2)});
+ }
return lowerVPFPIntConvOp(Op, DAG);
case ISD::VP_SETCC:
if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 4e44abc2c0b7fa4..44b96d076df4552 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -1,8 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32,LMULMAX8RV32ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64,LMULMAX8RV64ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32,LMULMAX1RV32ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64,LMULMAX1RV64ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV32,LMULMAX8RV32ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8RV64,LMULMAX8RV64ZVFHMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV32,LMULMAX1RV32ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1RV64,LMULMAX1RV64ZVFHMIN
define void @fp2si_v2f32_v2i32(ptr %x, ptr %y) {
; CHECK-LABEL: fp2si_v2f32_v2i32:
@@ -589,25 +593,145 @@ define void @fp2ui_v2f16_v2i64(ptr %x, ptr %y) {
}
define <2 x i1> @fp2si_v2f16_v2i1(<2 x half> %x) {
-; CHECK-LABEL: fp2si_v2f16_v2i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT: vand.vi v8, v9, 1
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: ret
+; LMULMAX8RV32ZVFH-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX8RV32ZVFH: # %bb.0:
+; LMULMAX8RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX8RV32ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8
+; LMULMAX8RV32ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX8RV32ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV32ZVFH-NEXT: ret
+;
+; LMULMAX8RV64ZVFH-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX8RV64ZVFH: # %bb.0:
+; LMULMAX8RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX8RV64ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8
+; LMULMAX8RV64ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX8RV64ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV64ZVFH-NEXT: ret
+;
+; LMULMAX1RV32ZVFH-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX1RV32ZVFH: # %bb.0:
+; LMULMAX1RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX1RV32ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8
+; LMULMAX1RV32ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX1RV32ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV32ZVFH-NEXT: ret
+;
+; LMULMAX1RV64ZVFH-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX1RV64ZVFH: # %bb.0:
+; LMULMAX1RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX1RV64ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8
+; LMULMAX1RV64ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX1RV64ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV64ZVFH-NEXT: ret
+;
+; LMULMAX8RV32ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX8RV32ZVFHMIN: # %bb.0:
+; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX8RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX8RV32ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9
+; LMULMAX8RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX8RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV32ZVFHMIN-NEXT: ret
+;
+; LMULMAX8RV64ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX8RV64ZVFHMIN: # %bb.0:
+; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX8RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX8RV64ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9
+; LMULMAX8RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX8RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV64ZVFHMIN-NEXT: ret
+;
+; LMULMAX1RV32ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX1RV32ZVFHMIN: # %bb.0:
+; LMULMAX1RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX1RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX1RV32ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9
+; LMULMAX1RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX1RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV32ZVFHMIN-NEXT: ret
+;
+; LMULMAX1RV64ZVFHMIN-LABEL: fp2si_v2f16_v2i1:
+; LMULMAX1RV64ZVFHMIN: # %bb.0:
+; LMULMAX1RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX1RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX1RV64ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9
+; LMULMAX1RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX1RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV64ZVFHMIN-NEXT: ret
%z = fptosi <2 x half> %x to <2 x i1>
ret <2 x i1> %z
}
define <2 x i1> @fp2ui_v2f16_v2i1(<2 x half> %x) {
-; CHECK-LABEL: fp2ui_v2f16_v2i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT: vand.vi v8, v9, 1
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: ret
+; LMULMAX8RV32ZVFH-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX8RV32ZVFH: # %bb.0:
+; LMULMAX8RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX8RV32ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; LMULMAX8RV32ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX8RV32ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV32ZVFH-NEXT: ret
+;
+; LMULMAX8RV64ZVFH-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX8RV64ZVFH: # %bb.0:
+; LMULMAX8RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX8RV64ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; LMULMAX8RV64ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX8RV64ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV64ZVFH-NEXT: ret
+;
+; LMULMAX1RV32ZVFH-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX1RV32ZVFH: # %bb.0:
+; LMULMAX1RV32ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX1RV32ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; LMULMAX1RV32ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX1RV32ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV32ZVFH-NEXT: ret
+;
+; LMULMAX1RV64ZVFH-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX1RV64ZVFH: # %bb.0:
+; LMULMAX1RV64ZVFH-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; LMULMAX1RV64ZVFH-NEXT: vfncvt.rtz.xu.f.w v9, v8
+; LMULMAX1RV64ZVFH-NEXT: vand.vi v8, v9, 1
+; LMULMAX1RV64ZVFH-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV64ZVFH-NEXT: ret
+;
+; LMULMAX8RV32ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX8RV32ZVFHMIN: # %bb.0:
+; LMULMAX8RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX8RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX8RV32ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9
+; LMULMAX8RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX8RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV32ZVFHMIN-NEXT: ret
+;
+; LMULMAX8RV64ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX8RV64ZVFHMIN: # %bb.0:
+; LMULMAX8RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX8RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX8RV64ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9
+; LMULMAX8RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX8RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX8RV64ZVFHMIN-NEXT: ret
+;
+; LMULMAX1RV32ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX1RV32ZVFHMIN: # %bb.0:
+; LMULMAX1RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX1RV32ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX1RV32ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9
+; LMULMAX1RV32ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX1RV32ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV32ZVFHMIN-NEXT: ret
+;
+; LMULMAX1RV64ZVFHMIN-LABEL: fp2ui_v2f16_v2i1:
+; LMULMAX1RV64ZVFHMIN: # %bb.0:
+; LMULMAX1RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; LMULMAX1RV64ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; LMULMAX1RV64ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9
+; LMULMAX1RV64ZVFHMIN-NEXT: vand.vi v8, v8, 1
+; LMULMAX1RV64ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; LMULMAX1RV64ZVFHMIN-NEXT: ret
%z = fptoui <2 x half> %x to <2 x i1>
ret <2 x i1> %z
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
index 92799ea39136562..dab4b4d9926e1db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll
@@ -1,27 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x i1> @vfptosi_v4i1_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vfptosi_v4i1_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
-; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfptosi_v4i1_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
+; ZVFH-NEXT: vmsne.vi v0, v8, 0, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfptosi_v4i1_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0, v0.t
+; ZVFHMIN-NEXT: ret
%v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x i1> %v
}
define <4 x i1> @vfptosi_v4i1_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vfptosi_v4i1_v4f16_unmasked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfptosi_v4i1_v4f16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v8, v8
+; ZVFH-NEXT: vmsne.vi v0, v8, 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfptosi_v4i1_v4f16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9
+; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0
+; ZVFHMIN-NEXT: ret
%v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl)
ret <4 x i1> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
index 9a78c4daeb88f92..c673e396914bf39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll
@@ -1,16 +1,28 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <4 x i7> @llvm.vp.fptosi.v4i7.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x i7> @vfptosi_v4i7_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vfptosi_v4i7_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfptosi_v4i7_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t
+; ZVFH-NEXT: vmv1r.v v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfptosi_v4i7_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZVFHMIN-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; ZVFHMIN-NEXT: ret
%v = call <4 x i7> @llvm.vp.fptosi.v4i7.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x i7> %v
}
@@ -18,23 +30,43 @@ define <4 x i7> @vfptosi_v4i7_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %ev
declare <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x i8> @vfptosi_v4i8_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vfptosi_v4i8_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfptosi_v4i8_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t
+; ZVFH-NEXT: vmv1r.v v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfptosi_v4i8_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZVFHMIN-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; ZVFHMIN-NEXT: ret
%v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x i8> %v
}
define <4 x i8> @vfptosi_v4i8_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
-; CHECK-LABEL: vfptosi_v4i8_v4f16_unmasked:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfptosi_v4i8_v4f16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; ZVFH-NEXT: vfncvt.rtz.x.f.w v9, v8
+; ZVFH-NEXT: vmv1r.v v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfptosi_v4i8_v4f16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZVFHMIN-NEXT: vnsrl.wi v8, v8, 0
+; ZVFHMIN-NEXT: ret
%v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl)
ret <4 x i8> %v
}
@@ -42,21 +74,37 @@ define <4 x i8> @vfptosi_v4i8_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
declare <4 x i16> @llvm.vp.fptosi.v4i16.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x i16> @vfptosi_v4i16_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vfptosi_v4i16_v4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfptosi_v4i16_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfptosi_v4i16_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
%v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x i16> %v
}...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch supports FP_TO_SINT, FP_TO_UINT, VP_FP_TO_SINT and VP_FP_TO_UINT for fp16 vectors input when we only have Zvfhmin but no Zvfh.