From 38de37840c4707c0699e3a7dcfec4748d8077832 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 26 Sep 2025 19:07:22 +0800 Subject: [PATCH 1/5] [LoongArch] Custom legalize vector_shuffle to `xvinsve0.{w/d}` when possible --- .../LoongArch/LoongArchISelLowering.cpp | 52 +++++++++++++++++++ .../Target/LoongArch/LoongArchISelLowering.h | 1 + .../LoongArch/LoongArchLASXInstrInfo.td | 9 ++++ .../ir-instruction/shuffle-as-xvinsve0.ll | 4 +- 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 098bcfa67d1d3..5c6a4a33f4caa 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2319,6 +2319,54 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); } +// Check if exactly one element of the Mask is replaced by 'Replaced', while +// all other elements are either 'Base + i' or undef (-1). On success, return +// the index of the replaced element. Otherwise, just return -1. +static int checkReplaceOne(ArrayRef Mask, int Base, int Replaced) { + int MaskSize = Mask.size(); + int Idx = -1; + for (int i = 0; i < MaskSize; ++i) { + if (Mask[i] == Base + i || Mask[i] == -1) + continue; + if (Mask[i] != Replaced) + return -1; + if (Idx == -1) + Idx = i; + else + return -1; + } + return Idx; +} + +/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible). +static SDValue +lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // LoongArch LASX only supports xvinsve0.{w/d}. + if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 && + VT != MVT::v4f64) + return SDValue(); + + MVT GRLenVT = Subtarget.getGRLenVT(); + int MaskSize = Mask.size(); + assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size"); + + // Case 1: the lowest element of V2 replaces one element in V1. + int Idx = checkReplaceOne(Mask, 0, MaskSize); + if (Idx != -1) + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2, + DAG.getConstant(Idx, DL, GRLenVT)); + + // Case 2: the lowest element of V1 replaces one element in V2. + Idx = checkReplaceOne(Mask, MaskSize, 0); + if (Idx != -1) + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1, + DAG.getConstant(Idx, DL, GRLenVT)); + + return SDValue(); +} + /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, @@ -2595,6 +2643,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, Zeroable))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget))) + return Result; if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; @@ -7453,6 +7504,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(XVPERM) NODE_NAME_CASE(XVREPLVE0) NODE_NAME_CASE(XVREPLVE0Q) + NODE_NAME_CASE(XVINSVE0) NODE_NAME_CASE(VPICK_SEXT_ELT) NODE_NAME_CASE(VPICK_ZEXT_ELT) NODE_NAME_CASE(VREPLVE) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 9b60a9fd53726..8a4d7748467c7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -151,6 +151,7 @@ enum NodeType : unsigned { XVPERM, XVREPLVE0, XVREPLVE0Q, + XVINSVE0, // Extended vector element extraction VPICK_SEXT_ELT, diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index bbc0489620193..5143d53bad719 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -20,6 +20,7 @@ def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>; def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>; def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>; def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>; +def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>; def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>; def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>; def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>; @@ -1708,6 +1709,14 @@ def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm), (XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>; // XVINSVE0_{W/D} +def : Pat<(loongarch_xvinsve0 v8i32:$xd, v8i32:$xj, uimm3:$imm), + (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>; +def : Pat<(loongarch_xvinsve0 v4i64:$xd, v4i64:$xj, uimm2:$imm), + (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>; +def : Pat<(loongarch_xvinsve0 v8f32:$xd, v8f32:$xj, uimm3:$imm), + (XVINSVE0_W v8f32:$xd, v8f32:$xj, uimm3:$imm)>; +def : Pat<(loongarch_xvinsve0 v4f64:$xd, v4f64:$xj, uimm2:$imm), + (XVINSVE0_D v4f64:$xd, v4f64:$xj, uimm2:$imm)>; def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm), (XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32), uimm3:$imm)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll index b5d5c9c15d7c8..54c7420854317 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ;; xvinsve0.w define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind { From 38b87f6a8a7cdaeb1f180035df6c757636c6b859 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 26 Sep 2025 20:20:12 +0800 Subject: [PATCH 2/5] update tests --- .../ir-instruction/shuffle-as-xvinsve0.ll | 244 ++++-------------- 1 file changed, 44 insertions(+), 200 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll index 54c7420854317..e1784f81c2a07 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll @@ -8,10 +8,8 @@ define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x i32>, ptr %a @@ -22,52 +20,13 @@ entry: } define void @xvinsve0_v8i32_l_4(ptr %d, ptr %a, ptr %b) nounwind { -; LA32-LABEL: xvinsve0_v8i32_l_4: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ld.w $a2, $a2, 0 -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvinsve0_v8i32_l_4: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a2, 0 -; LA64-NEXT: xvld $xr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 5 -; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 1 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 6 -; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 2 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 7 -; LA64-NEXT: vinsgr2vr.w $vr0, $a1, 3 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 1 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 2 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 3 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3 -; LA64-NEXT: xvpermi.q $xr2, $xr0, 2 -; LA64-NEXT: xvst $xr2, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvinsve0_v8i32_l_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 4 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <8 x i32>, ptr %a %vb = load <8 x i32>, ptr %b @@ -81,10 +40,8 @@ define void @xvinsve0_v8f32_l(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x float>, ptr %a @@ -99,10 +56,8 @@ define void @xvinsve0_v8i32_h_1(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 1 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x i32>, ptr %a @@ -113,52 +68,13 @@ entry: } define void @xvinsve0_v8i32_h_6(ptr %d, ptr %a, ptr %b) nounwind { -; LA32-LABEL: xvinsve0_v8i32_h_6: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a2, 0 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 4 -; LA32-NEXT: ld.w $a1, $a1, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 5 -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvinsve0_v8i32_h_6: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a2, 0 -; LA64-NEXT: xvld $xr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 1 -; LA64-NEXT: xvpickve2gr.w $a1, $xr1, 0 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 2 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3 -; LA64-NEXT: xvpermi.q $xr1, $xr2, 2 -; LA64-NEXT: xvst $xr1, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvinsve0_v8i32_h_6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 6 +; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <8 x i32>, ptr %a %vb = load <8 x i32>, ptr %b @@ -172,10 +88,8 @@ define void @xvinsve0_v8f32_h(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 0 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x float>, ptr %a @@ -191,10 +105,8 @@ define void @xvinsve0_v4i64_l_1(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x i64>, ptr %a @@ -205,44 +117,13 @@ entry: } define void @xvinsve0_v4i64_l_2(ptr %d, ptr %a, ptr %b) nounwind { -; LA32-LABEL: xvinsve0_v4i64_l_2: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a2, 0 -; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 0 -; LA32-NEXT: xvld $xr1, $a1, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 6 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 7 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 3 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3 -; LA32-NEXT: xvpermi.q $xr0, $xr2, 2 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvinsve0_v4i64_l_2: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ld.d $a2, $a2, 0 -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvinsve0_v4i64_l_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -256,10 +137,8 @@ define void @xvinsve0_v4f64_l(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0) -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x double>, ptr %a @@ -274,10 +153,8 @@ define void @xvinsve0_v4i64_h_0(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0) -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 0 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x i64>, ptr %a @@ -288,44 +165,13 @@ entry: } define void @xvinsve0_v4i64_h_2(ptr %d, ptr %a, ptr %b) nounwind { -; LA32-LABEL: xvinsve0_v4i64_h_2: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: xvld $xr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 6 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 7 -; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 1 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 2 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr1, 3 -; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 3 -; LA32-NEXT: xvpermi.q $xr0, $xr2, 2 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: xvinsve0_v4i64_h_2: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ld.d $a1, $a1, 0 -; LA64-NEXT: xvld $xr0, $a2, 0 -; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: xvinsve0_v4i64_h_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 2 +; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -339,10 +185,8 @@ define void @xvinsve0_v4f64_h(ptr %d, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0) -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 0 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x double>, ptr %a From ee46ce71467ec8017bd14bdbc79dfc175f213934 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Mon, 29 Sep 2025 10:51:51 +0800 Subject: [PATCH 3/5] address heiher's comment --- .../LoongArch/LoongArchISelLowering.cpp | 67 +++++++++++-------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 5c6a4a33f4caa..de53b322eb418 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2319,25 +2319,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); } -// Check if exactly one element of the Mask is replaced by 'Replaced', while -// all other elements are either 'Base + i' or undef (-1). On success, return -// the index of the replaced element. Otherwise, just return -1. -static int checkReplaceOne(ArrayRef Mask, int Base, int Replaced) { - int MaskSize = Mask.size(); - int Idx = -1; - for (int i = 0; i < MaskSize; ++i) { - if (Mask[i] == Base + i || Mask[i] == -1) - continue; - if (Mask[i] != Replaced) - return -1; - if (Idx == -1) - Idx = i; - else - return -1; - } - return Idx; -} - /// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible). static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, @@ -2348,21 +2329,49 @@ lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, VT != MVT::v4f64) return SDValue(); - MVT GRLenVT = Subtarget.getGRLenVT(); int MaskSize = Mask.size(); assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size"); - // Case 1: the lowest element of V2 replaces one element in V1. - int Idx = checkReplaceOne(Mask, 0, MaskSize); - if (Idx != -1) - return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2, - DAG.getConstant(Idx, DL, GRLenVT)); + // Check if exactly one element of the Mask is replaced by the lowest element + // of the other vector, while all other elements are either its index or + // undef (-1). On success, return the index of the replaced element and the + // two result vectors. Otherwise, just return -1 and empty. + auto DetectReplaced = [&](ArrayRef Mask, SDValue V1, + SDValue V2) -> std::tuple { + int Idx1 = -1, Idx2 = -1; + bool Found1 = true, Found2 = true; + + for (int i = 0; i < MaskSize; ++i) { + int Cur = Mask[i]; + // Case 1: the lowest element of V2 replaces one element in V1. + bool Match1 = (Cur == i || Cur == -1 || (Cur == MaskSize && Idx1 == -1)); + // Case 2: the lowest element of V1 replaces one element in V2. + bool Match2 = + (Cur == MaskSize + i || Cur == -1 || (Cur == 0 && Idx2 == -1)); + + // Record the index of the replaced element. + if (Match1 && Cur == MaskSize && Idx1 == -1) + Idx1 = i; + if (Match2 && Cur == 0 && Idx2 == -1) + Idx2 = i; + + Found1 &= Match1; + Found2 &= Match2; + if (!Found1 && !Found2) + return {SDValue(), SDValue(), -1}; + } + + if (Found1 && Idx1 != -1) + return {V1, V2, Idx1}; + if (Found2 && Idx2 != -1) + return {V2, V1, Idx2}; + return {SDValue(), SDValue(), -1}; + }; - // Case 2: the lowest element of V1 replaces one element in V2. - Idx = checkReplaceOne(Mask, MaskSize, 0); + auto [Src, Ins, Idx] = DetectReplaced(Mask, V1, V2); if (Idx != -1) - return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1, - DAG.getConstant(Idx, DL, GRLenVT)); + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, Src, Ins, + DAG.getConstant(Idx, DL, Subtarget.getGRLenVT())); return SDValue(); } From e4fdb633f6397b2446b9934d545d1de432cc9821 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Mon, 29 Sep 2025 15:26:35 +0800 Subject: [PATCH 4/5] Revert "address heiher's comment" This reverts commit ce39fc0ccd48ff376b42d3325a1fdb3febcd7372. --- .../LoongArch/LoongArchISelLowering.cpp | 67 ++++++++----------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index de53b322eb418..5c6a4a33f4caa 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2319,6 +2319,25 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); } +// Check if exactly one element of the Mask is replaced by 'Replaced', while +// all other elements are either 'Base + i' or undef (-1). On success, return +// the index of the replaced element. Otherwise, just return -1. +static int checkReplaceOne(ArrayRef Mask, int Base, int Replaced) { + int MaskSize = Mask.size(); + int Idx = -1; + for (int i = 0; i < MaskSize; ++i) { + if (Mask[i] == Base + i || Mask[i] == -1) + continue; + if (Mask[i] != Replaced) + return -1; + if (Idx == -1) + Idx = i; + else + return -1; + } + return Idx; +} + /// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible). static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, @@ -2329,49 +2348,21 @@ lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, VT != MVT::v4f64) return SDValue(); + MVT GRLenVT = Subtarget.getGRLenVT(); int MaskSize = Mask.size(); assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size"); - // Check if exactly one element of the Mask is replaced by the lowest element - // of the other vector, while all other elements are either its index or - // undef (-1). On success, return the index of the replaced element and the - // two result vectors. Otherwise, just return -1 and empty. - auto DetectReplaced = [&](ArrayRef Mask, SDValue V1, - SDValue V2) -> std::tuple { - int Idx1 = -1, Idx2 = -1; - bool Found1 = true, Found2 = true; - - for (int i = 0; i < MaskSize; ++i) { - int Cur = Mask[i]; - // Case 1: the lowest element of V2 replaces one element in V1. - bool Match1 = (Cur == i || Cur == -1 || (Cur == MaskSize && Idx1 == -1)); - // Case 2: the lowest element of V1 replaces one element in V2. - bool Match2 = - (Cur == MaskSize + i || Cur == -1 || (Cur == 0 && Idx2 == -1)); - - // Record the index of the replaced element. - if (Match1 && Cur == MaskSize && Idx1 == -1) - Idx1 = i; - if (Match2 && Cur == 0 && Idx2 == -1) - Idx2 = i; - - Found1 &= Match1; - Found2 &= Match2; - if (!Found1 && !Found2) - return {SDValue(), SDValue(), -1}; - } - - if (Found1 && Idx1 != -1) - return {V1, V2, Idx1}; - if (Found2 && Idx2 != -1) - return {V2, V1, Idx2}; - return {SDValue(), SDValue(), -1}; - }; + // Case 1: the lowest element of V2 replaces one element in V1. + int Idx = checkReplaceOne(Mask, 0, MaskSize); + if (Idx != -1) + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2, + DAG.getConstant(Idx, DL, GRLenVT)); - auto [Src, Ins, Idx] = DetectReplaced(Mask, V1, V2); + // Case 2: the lowest element of V1 replaces one element in V2. + Idx = checkReplaceOne(Mask, MaskSize, 0); if (Idx != -1) - return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, Src, Ins, - DAG.getConstant(Idx, DL, Subtarget.getGRLenVT())); + return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1, + DAG.getConstant(Idx, DL, GRLenVT)); return SDValue(); } From 92a313058b24b1942c78d282951ecd36fc9e6b25 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Mon, 29 Sep 2025 17:06:47 +0800 Subject: [PATCH 5/5] using lambda --- .../LoongArch/LoongArchISelLowering.cpp | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 5c6a4a33f4caa..4cfbfca45d359 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2319,25 +2319,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); } -// Check if exactly one element of the Mask is replaced by 'Replaced', while -// all other elements are either 'Base + i' or undef (-1). On success, return -// the index of the replaced element. Otherwise, just return -1. -static int checkReplaceOne(ArrayRef Mask, int Base, int Replaced) { - int MaskSize = Mask.size(); - int Idx = -1; - for (int i = 0; i < MaskSize; ++i) { - if (Mask[i] == Base + i || Mask[i] == -1) - continue; - if (Mask[i] != Replaced) - return -1; - if (Idx == -1) - Idx = i; - else - return -1; - } - return Idx; -} - /// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible). static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, @@ -2352,14 +2333,32 @@ lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef Mask, MVT VT, int MaskSize = Mask.size(); assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size"); + // Check if exactly one element of the Mask is replaced by 'Replaced', while + // all other elements are either 'Base + i' or undef (-1). On success, return + // the index of the replaced element. Otherwise, just return -1. + auto checkReplaceOne = [&](int Base, int Replaced) -> int { + int Idx = -1; + for (int i = 0; i < MaskSize; ++i) { + if (Mask[i] == Base + i || Mask[i] == -1) + continue; + if (Mask[i] != Replaced) + return -1; + if (Idx == -1) + Idx = i; + else + return -1; + } + return Idx; + }; + // Case 1: the lowest element of V2 replaces one element in V1. - int Idx = checkReplaceOne(Mask, 0, MaskSize); + int Idx = checkReplaceOne(0, MaskSize); if (Idx != -1) return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2, DAG.getConstant(Idx, DL, GRLenVT)); // Case 2: the lowest element of V1 replaces one element in V2. - Idx = checkReplaceOne(Mask, MaskSize, 0); + Idx = checkReplaceOne(MaskSize, 0); if (Idx != -1) return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1, DAG.getConstant(Idx, DL, GRLenVT));