-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LoongArch] Custom legalize vector_shuffle to xvinsve0.{w/d}
when possible
#161156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesFull diff: https://github.com/llvm/llvm-project/pull/161156.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 098bcfa67d1d3..4cfbfca45d359 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2319,6 +2319,53 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
}
+/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // LoongArch LASX only supports xvinsve0.{w/d}.
+ if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
+ VT != MVT::v4f64)
+ return SDValue();
+
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ int MaskSize = Mask.size();
+ assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Check if exactly one element of the Mask is replaced by 'Replaced', while
+ // all other elements are either 'Base + i' or undef (-1). On success, return
+ // the index of the replaced element. Otherwise, just return -1.
+ auto checkReplaceOne = [&](int Base, int Replaced) -> int {
+ int Idx = -1;
+ for (int i = 0; i < MaskSize; ++i) {
+ if (Mask[i] == Base + i || Mask[i] == -1)
+ continue;
+ if (Mask[i] != Replaced)
+ return -1;
+ if (Idx == -1)
+ Idx = i;
+ else
+ return -1;
+ }
+ return Idx;
+ };
+
+ // Case 1: the lowest element of V2 replaces one element in V1.
+ int Idx = checkReplaceOne(0, MaskSize);
+ if (Idx != -1)
+ return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
+ DAG.getConstant(Idx, DL, GRLenVT));
+
+ // Case 2: the lowest element of V1 replaces one element in V2.
+ Idx = checkReplaceOne(MaskSize, 0);
+ if (Idx != -1)
+ return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
+ DAG.getConstant(Idx, DL, GRLenVT));
+
+ return SDValue();
+}
+
/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
@@ -2595,6 +2642,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
@@ -7453,6 +7503,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(XVPERM)
NODE_NAME_CASE(XVREPLVE0)
NODE_NAME_CASE(XVREPLVE0Q)
+ NODE_NAME_CASE(XVINSVE0)
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)
NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9b60a9fd53726..8a4d7748467c7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -151,6 +151,7 @@ enum NodeType : unsigned {
XVPERM,
XVREPLVE0,
XVREPLVE0Q,
+ XVINSVE0,
// Extended vector element extraction
VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index bbc0489620193..5143d53bad719 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -20,6 +20,7 @@ def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>;
def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>;
def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>;
+def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>;
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1708,6 +1709,14 @@ def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm),
(XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>;
// XVINSVE0_{W/D}
+def : Pat<(loongarch_xvinsve0 v8i32:$xd, v8i32:$xj, uimm3:$imm),
+ (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
+def : Pat<(loongarch_xvinsve0 v4i64:$xd, v4i64:$xj, uimm2:$imm),
+ (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
+def : Pat<(loongarch_xvinsve0 v8f32:$xd, v8f32:$xj, uimm3:$imm),
+ (XVINSVE0_W v8f32:$xd, v8f32:$xj, uimm3:$imm)>;
+def : Pat<(loongarch_xvinsve0 v4f64:$xd, v4f64:$xj, uimm2:$imm),
+ (XVINSVE0_D v4f64:$xd, v4f64:$xj, uimm2:$imm)>;
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
(XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32),
uimm3:$imm)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll
index b5d5c9c15d7c8..e1784f81c2a07 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
;; xvinsve0.w
define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind {
@@ -8,10 +8,8 @@ define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -22,52 +20,13 @@ entry:
}
define void @xvinsve0_v8i32_l_4(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_l_4:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: ld.w $a2, $a2, 0
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_l_4:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 5
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpermi.q $xr2, $xr0, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_l_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 4
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -81,10 +40,8 @@ define void @xvinsve0_v8f32_l(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x float>, ptr %a
@@ -99,10 +56,8 @@ define void @xvinsve0_v8i32_h_1(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 1
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -113,52 +68,13 @@ entry:
}
define void @xvinsve0_v8i32_h_6(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v8i32_h_6:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4
-; LA32-NEXT: ld.w $a1, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 5
-; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA32-NEXT: xvst $xr2, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v8i32_h_6:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7
-; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
-; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
-; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
-; LA64-NEXT: xvst $xr1, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v8i32_h_6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 6
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
%vb = load <8 x i32>, ptr %b
@@ -172,10 +88,8 @@ define void @xvinsve0_v8f32_h(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x float>, ptr %a
@@ -191,10 +105,8 @@ define void @xvinsve0_v4i64_l_1(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
@@ -205,44 +117,13 @@ entry:
}
define void @xvinsve0_v4i64_l_2(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v4i64_l_2:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0
-; LA32-NEXT: xvld $xr1, $a1, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr2, 2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v4i64_l_2:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a2, $a2, 0
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v4i64_l_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -256,10 +137,8 @@ define void @xvinsve0_v4f64_l(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x double>, ptr %a
@@ -274,10 +153,8 @@ define void @xvinsve0_v4i64_h_0(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
@@ -288,44 +165,13 @@ entry:
}
define void @xvinsve0_v4i64_h_2(ptr %d, ptr %a, ptr %b) nounwind {
-; LA32-LABEL: xvinsve0_v4i64_h_2:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
-; LA32-NEXT: xvld $xr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 6
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 7
-; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 2
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
-; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 3
-; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3
-; LA32-NEXT: xvpermi.q $xr0, $xr2, 2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: xvinsve0_v4i64_h_2:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: ld.d $a1, $a1, 0
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
-; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
-; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
-; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
-; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
-; LA64-NEXT: xvst $xr2, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: xvinsve0_v4i64_h_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 2
+; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: ret
entry:
%va = load <4 x i64>, ptr %a
%vb = load <4 x i64>, ptr %b
@@ -339,10 +185,8 @@ define void @xvinsve0_v4f64_h(ptr %d, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 0
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <4 x double>, ptr %a
|
Same as #160857 which has been closed because of my silly mistake. |
heiher
approved these changes
Sep 29, 2025
2e1e086
to
92a3130
Compare
mahesh-attarde
pushed a commit
to mahesh-attarde/llvm-project
that referenced
this pull request
Oct 3, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.