Skip to content

Conversation

@heiher
Copy link
Member

@heiher heiher commented Jul 22, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Jul 22, 2025

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/149953.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+20-4)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+204)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index a31fa57fcd8c6..cc8b853432118 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4700,13 +4700,29 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
     UseLASX = true;
     break;
   };
-  if (UseLASX && !(Subtarget.has32S() && Subtarget.hasExtLASX()))
-    return SDValue();
   Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
                       : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
-  Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
 
-  SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src);
+  SDValue V;
+  if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
+    if (Src.getSimpleValueType() == MVT::v32i8) {
+      SDValue Lo, Hi;
+      std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
+      Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
+      Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
+      Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+                       DAG.getConstant(16, DL, MVT::i8));
+      V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+    } else if (UseLASX) {
+      return SDValue();
+    }
+  }
+
+  if (!V) {
+    Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
+    V = DAG.getNode(Opc, DL, MVT::i64, Src);
+  }
+
   EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
   V = DAG.getZExtOrTrunc(V, DL, T);
   return DAG.getBitcast(VT, V);
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
index ad57bbf9ee5c0..7fa591db5d1fa 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
@@ -603,3 +603,207 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) {
   %2 = bitcast <4 x i1> %1 to i4
   ret i4 %2
 }
+
+define i32 @vmsk2_eq_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_eq_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vseqi.b $vr0, $vr0, 0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vseqi.b $vr0, $vr1, 0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp eq <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_sgt_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sgt_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vslt.b $vr0, $vr2, $vr1
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sgt <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_sgt_allones_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sgt_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vrepli.b $vr2, -1
+; CHECK-NEXT:    vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vslt.b $vr0, $vr2, $vr1
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sgt <32 x i8> %a, splat (i8 -1)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_sge_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sge_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vsle.b $vr0, $vr2, $vr0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vsle.b $vr0, $vr2, $vr1
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sge <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_slt_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_slt_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr1
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_sle_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sle_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vslei.b $vr0, $vr0, 0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vslei.b $vr0, $vr1, 0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_sle_allones_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_sle_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vslei.b $vr0, $vr0, -1
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vslei.b $vr0, $vr1, -1
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <32 x i8> %a, splat (i8 -1)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_ne_allzeros_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_ne_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vseqi.b $vr0, $vr0, 0
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vseqi.b $vr0, $vr1, 0
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp ne <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @vmsk2_sgt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK-LABEL: vmsk2_sgt_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vslt.b $vr0, $vr3, $vr1
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+  %x = icmp sgt <32 x i8> %a, %b
+  %res = bitcast <32 x i1> %x to i32
+  ret i32 %res
+}
+
+define i32 @vmsk2_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
+; CHECK-LABEL: vmsk2_sgt_and_sgt_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vslt.b $vr0, $vr2, $vr0
+; CHECK-NEXT:    vslt.b $vr1, $vr3, $vr1
+; CHECK-NEXT:    vslt.b $vr2, $vr6, $vr4
+; CHECK-NEXT:    vslt.b $vr3, $vr7, $vr5
+; CHECK-NEXT:    vand.v $vr1, $vr1, $vr3
+; CHECK-NEXT:    vand.v $vr0, $vr0, $vr2
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vmskltz.b $vr0, $vr1
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+  %x0 = icmp sgt <32 x i8> %a, %b
+  %x1 = icmp sgt <32 x i8> %c, %d
+  %y = and <32 x i1> %x0, %x1
+  %res = bitcast <32 x i1> %y to i32
+  ret i32 %res
+}
+
+define i32 @vmsk2_trunc_i8(<32 x i8> %a) {
+; CHECK-LABEL: vmsk2_trunc_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vslli.b $vr0, $vr0, 7
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a0, $vr0, 0
+; CHECK-NEXT:    vslli.b $vr0, $vr1, 7
+; CHECK-NEXT:    vmskltz.b $vr0, $vr0
+; CHECK-NEXT:    vpickve2gr.hu $a1, $vr0, 0
+; CHECK-NEXT:    slli.d $a1, $a1, 16
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ret
+  %y = trunc <32 x i8> %a to <32 x i1>
+  %res = bitcast <32 x i1> %y to i32
+  ret i32 %res
+}

@heiher heiher requested review from tangaac and wangleiat July 23, 2025 03:17
@heiher
Copy link
Member Author

heiher commented Aug 22, 2025

ping.

@heiher heiher merged commit 78b45b4 into llvm:main Aug 28, 2025
9 checks passed
@heiher heiher deleted the opt-vmsk branch August 28, 2025 11:04
t-a-james pushed a commit to t-a-james/llvm-project that referenced this pull request Aug 28, 2025
…oject into bugprone-method-hiding

* 'bugprone-method-hiding' of github.com:t-a-james/llvm-project: (230 commits)
  [SimplifyCFG] Move token type check into canReplaceOperandWithVariable()
  [ADT] Fix signed integer overflow (llvm#155826)
  [Offload] Update LIBOMPTARGET_INFO text for `attach` map-type. (llvm#155509)
  [CMake][AIX] Enable CMP0182: Create shared library archives by default (llvm#155686)
  AMDGPU: Add tests for atomics with AGPR operands (llvm#155820)
  [AArch64] Split zero cycle zeoring per register class (llvm#154561)
  [gn build] Port fa883e1
  [mlir][tosa] Allow shift operand of tosa::MulOp as non-constant (llvm#155197)
  [AArch64][NFC] Add MCInstrAnalysis unittests (llvm#155609)
  [Offload][OpenMP] Tests require libc on GPU for printf (llvm#155785)
  AMDGPU: Add missing verifier tests for load/store AGPR case (llvm#155815)
  [lldb-mcp] Fix building for Windows
  Revert "[lldb] Correct a usage after a rename was merged. (llvm#155720)"
  Revert "[lldb] NFC Moving mcp::Transport into its own file. (llvm#155711)"
  [lldb][test] Run ranges::ref_vew test only for libc++ (llvm#155813)
  [SCCP][FuncSpec] Poison unreachable constant global variable user (llvm#155753)
  [LoongArch] Lowering v32i8 vector mask generation to `VMSKLTZ` (llvm#149953)
  [flang][docs][NFC] Remove stray backtick (llvm#154974)
  [MLIR] Apply clang-tidy fixes for misc-use-internal-linkage in LinalgOps.cpp (NFC)
  [MLIR] Apply clang-tidy fixes for performance-move-const-arg in VariantValue.cpp (NFC)
  ...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants