[AMDGPU] Folding imm offset in more cases for scratch access #70634

ruiling · 2023-10-30T08:06:33Z

For scratch load/store, our hardware only accept non-negative value in SGPR/VGPR. Besides the case that we can prove from known bits, we can also prove that the value in base will be non-negative: 1.) When the ADD for the address calculation has NonUnsignedWrap flag. 2.) When the immediate offset is already negative.

llvmbot · 2023-10-30T08:07:39Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: Ruiling, Song (ruiling)

Changes

For scratch load/store, our hardware only accept non-negative value in SGPR/VGPR. Besides the case that we can prove from known bits, we can also prove that the value in base will be non-negative: 1.) When the ADD for the address calculation has NonUnsignedWrap flag. 2.) When the immediate offset is already negative.

Patch is 105.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70634.diff

8 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+19-5)
(modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (+2-1)
(modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+9-9)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll (+32-60)
(modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+192-226)
(modified) llvm/test/CodeGen/AMDGPU/function-returns.ll (+21-30)
(modified) llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll (+284-308)
(modified) llvm/test/CodeGen/AMDGPU/memory_clause.ll (+6-12)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b5ceaaa14b4fd5e..691d644badd24b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1146,10 +1146,23 @@ bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
   return CurDAG->SignBitIsZero(Base);
 }
 
-bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
+bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr, SDValue Base,
                                                 uint64_t FlatVariant) const {
   if (FlatVariant != SIInstrFlags::FlatScratch)
     return true;
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    // For `nuw` addition, we should not have negative base address.
+    if (Addr->getFlags().hasNoUnsignedWrap())
+      return true;
+
+    auto *RHS = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    // If the immediate offset is negative, we should not have the base being
+    // negative as well.
+    if (RHS && RHS->getSExtValue() < 0)
+      return true;
+  }
+
   // When value in 32-bit Base can be negative calculate scratch offset using
   // 32-bit add instruction, otherwise use Base(unsigned) + offset.
   return CurDAG->SignBitIsZero(Base);
@@ -1549,7 +1562,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
   if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
     SDValue N0, N1;
     if (isBaseWithConstantOffset64(Addr, N0, N1) &&
-        isFlatScratchBaseLegal(N0, FlatVariant)) {
+        isFlatScratchBaseLegal(Addr, N0, FlatVariant)) {
       int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
 
       const SIInstrInfo *TII = Subtarget->getInstrInfo();
@@ -1782,7 +1795,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
   int64_t COffsetVal = 0;
 
   if (CurDAG->isBaseWithConstantOffset(Addr) &&
-      isFlatScratchBaseLegal(Addr.getOperand(0))) {
+      isFlatScratchBaseLegal(Addr, Addr.getOperand(0))) {
     COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
     SAddr = Addr.getOperand(0);
   } else {
@@ -1860,7 +1873,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
           CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
         VAddr = SDValue(VMov, 0);
         SAddr = LHS;
-        if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
+        if (!isFlatScratchBaseLegal(Addr, SAddr))
           return false;
         if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
           return false;
@@ -1886,7 +1899,8 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
     return false;
   }
 
-  if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
+  if (!isFlatScratchBaseLegal(Addr, SAddr) ||
+      !isFlatScratchBaseLegal(Addr, VAddr))
     return false;
 
   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index a8a606f60a3faee..8a47757f70bbfbc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -155,7 +155,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
                         unsigned Size) const;
   bool isFlatScratchBaseLegal(
-      SDValue Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
+      SDValue Addr, SDValue Base,
+      uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
 
   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index af023835c529776..329f0a2068cb072 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -704,11 +704,11 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
 ; FLATSCR:       ; %bb.0: ; %bb
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; FLATSCR-NEXT:    scratch_load_short_d16_hi v1, v0, off
-; FLATSCR-NEXT:    v_add_u32_e32 v2, 2, v0
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR-NEXT:    v_pk_sub_u16 v0, v1, -12 op_sel_hi:[1,0]
-; FLATSCR-NEXT:    scratch_load_short_d16 v0, v2, off
+; FLATSCR-NEXT:    v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT:    scratch_load_short_d16 v1, v0, off offset:2
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR-NEXT:    v_mov_b32_e32 v0, v1
 ; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private_other_dep:
@@ -726,22 +726,22 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
 ; FLATSCR_GFX10:       ; %bb.0: ; %bb
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; FLATSCR_GFX10-NEXT:    scratch_load_short_d16_hi v1, v0, off
-; FLATSCR_GFX10-NEXT:    v_add_nc_u32_e32 v2, 2, v0
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT:    v_pk_sub_u16 v0, v1, -12 op_sel_hi:[1,0]
-; FLATSCR_GFX10-NEXT:    scratch_load_short_d16 v0, v2, off
+; FLATSCR_GFX10-NEXT:    v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; FLATSCR_GFX10-NEXT:    scratch_load_short_d16 v1, v0, off offset:2
 ; FLATSCR_GFX10-NEXT:    s_waitcnt vmcnt(0)
+; FLATSCR_GFX10-NEXT:    v_mov_b32_e32 v0, v1
 ; FLATSCR_GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: chain_hi_to_lo_private_other_dep:
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    scratch_load_d16_hi_b16 v1, v0, off
-; GFX11-NEXT:    v_add_nc_u32_e32 v2, 2, v0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_pk_sub_u16 v0, v1, -12 op_sel_hi:[1,0]
-; GFX11-NEXT:    scratch_load_d16_b16 v0, v2, off
+; GFX11-NEXT:    v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX11-NEXT:    scratch_load_d16_b16 v1, v0, off offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %gep_lo = getelementptr inbounds i16, ptr addrspace(5) %ptr, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll
index c849cf08094e718..ad4d4a4a30fc6d0 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-i8-i16.ll
@@ -11,16 +11,14 @@ define amdgpu_ps void @test_scratch_load_i8_zext_v(ptr addrspace(5) %in, ptr %ou
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, 1, v0
-; GFX10-NEXT:    scratch_load_ubyte v0, v0, off
+; GFX10-NEXT:    scratch_load_ubyte v0, v0, off offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i8_zext_v:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v0
-; GFX11-NEXT:    scratch_load_u8 v0, v0, off
+; GFX11-NEXT:    scratch_load_u8 v0, v0, off offset:1
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -38,16 +36,14 @@ define amdgpu_ps void @test_scratch_load_i8_sext_v(ptr addrspace(5) %in, ptr %ou
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, 1, v0
-; GFX10-NEXT:    scratch_load_sbyte v0, v0, off
+; GFX10-NEXT:    scratch_load_sbyte v0, v0, off offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i8_sext_v:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v0
-; GFX11-NEXT:    scratch_load_i8 v0, v0, off
+; GFX11-NEXT:    scratch_load_i8 v0, v0, off offset:1
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -65,16 +61,14 @@ define amdgpu_ps void @test_scratch_load_i16_zext_v(ptr addrspace(5) %in, ptr %o
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, 2, v0
-; GFX10-NEXT:    scratch_load_ushort v0, v0, off
+; GFX10-NEXT:    scratch_load_ushort v0, v0, off offset:2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i16_zext_v:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_add_nc_u32_e32 v0, 2, v0
-; GFX11-NEXT:    scratch_load_u16 v0, v0, off
+; GFX11-NEXT:    scratch_load_u16 v0, v0, off offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -92,16 +86,14 @@ define amdgpu_ps void @test_scratch_load_i16_sext_v(ptr addrspace(5) %in, ptr %o
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, 2, v0
-; GFX10-NEXT:    scratch_load_sshort v0, v0, off
+; GFX10-NEXT:    scratch_load_sshort v0, v0, off offset:2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i16_sext_v:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_add_nc_u32_e32 v0, 2, v0
-; GFX11-NEXT:    scratch_load_i16 v0, v0, off
+; GFX11-NEXT:    scratch_load_i16 v0, v0, off offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -359,16 +351,14 @@ define amdgpu_ps void @test_scratch_load_i8_zext_s(ptr addrspace(5) inreg %in, p
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    s_add_i32 s2, s2, 1
-; GFX10-NEXT:    scratch_load_ubyte v2, off, s2
+; GFX10-NEXT:    scratch_load_ubyte v2, off, s2 offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[0:1], v2
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i8_zext_s:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_add_i32 s0, s0, 1
-; GFX11-NEXT:    scratch_load_u8 v2, off, s0
+; GFX11-NEXT:    scratch_load_u8 v2, off, s0 offset:1
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_endpgm
@@ -386,16 +376,14 @@ define amdgpu_ps void @test_scratch_load_i8_sext_s(ptr addrspace(5) inreg %in, p
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    s_add_i32 s2, s2, 1
-; GFX10-NEXT:    scratch_load_sbyte v2, off, s2
+; GFX10-NEXT:    scratch_load_sbyte v2, off, s2 offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[0:1], v2
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i8_sext_s:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_add_i32 s0, s0, 1
-; GFX11-NEXT:    scratch_load_i8 v2, off, s0
+; GFX11-NEXT:    scratch_load_i8 v2, off, s0 offset:1
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_endpgm
@@ -413,16 +401,14 @@ define amdgpu_ps void @test_scratch_load_i16_zext_s(ptr addrspace(5) inreg %in,
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    s_add_i32 s2, s2, 2
-; GFX10-NEXT:    scratch_load_ushort v2, off, s2
+; GFX10-NEXT:    scratch_load_ushort v2, off, s2 offset:2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[0:1], v2
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i16_zext_s:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_add_i32 s0, s0, 2
-; GFX11-NEXT:    scratch_load_u16 v2, off, s0
+; GFX11-NEXT:    scratch_load_u16 v2, off, s0 offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_endpgm
@@ -440,16 +426,14 @@ define amdgpu_ps void @test_scratch_load_i16_sext_s(ptr addrspace(5) inreg %in,
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    s_add_i32 s2, s2, 2
-; GFX10-NEXT:    scratch_load_sshort v2, off, s2
+; GFX10-NEXT:    scratch_load_sshort v2, off, s2 offset:2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[0:1], v2
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i16_sext_s:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_add_i32 s0, s0, 2
-; GFX11-NEXT:    scratch_load_i16 v2, off, s0
+; GFX11-NEXT:    scratch_load_i16 v2, off, s0 offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_endpgm
@@ -713,19 +697,16 @@ define amdgpu_ps void @test_scratch_load_i8_zext_svs(ptr addrspace(5) inreg %in,
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 1
-; GFX10-NEXT:    scratch_load_ubyte v0, v0, off
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s2
+; GFX10-NEXT:    scratch_load_ubyte v0, v0, off offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i8_zext_svs:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 1
-; GFX11-NEXT:    scratch_load_u8 v0, v0, off
+; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX11-NEXT:    scratch_load_u8 v0, v0, off offset:1
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -745,19 +726,16 @@ define amdgpu_ps void @test_scratch_load_i8_sext_svs(ptr addrspace(5) inreg %in,
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 1
-; GFX10-NEXT:    scratch_load_sbyte v0, v0, off
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s2
+; GFX10-NEXT:    scratch_load_sbyte v0, v0, off offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i8_sext_svs:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 1
-; GFX11-NEXT:    scratch_load_i8 v0, v0, off
+; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX11-NEXT:    scratch_load_i8 v0, v0, off offset:1
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -777,19 +755,16 @@ define amdgpu_ps void @test_scratch_load_i16_zext_svs(ptr addrspace(5) inreg %in
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 2
-; GFX10-NEXT:    scratch_load_ushort v0, v0, off
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s2
+; GFX10-NEXT:    scratch_load_ushort v0, v0, off offset:2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i16_zext_svs:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 2
-; GFX11-NEXT:    scratch_load_u16 v0, v0, off
+; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX11-NEXT:    scratch_load_u16 v0, v0, off offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
@@ -809,19 +784,16 @@ define amdgpu_ps void @test_scratch_load_i16_sext_svs(ptr addrspace(5) inreg %in
 ; GFX10-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 2
-; GFX10-NEXT:    scratch_load_sshort v0, v0, off
+; GFX10-NEXT:    v_lshl_add_u32 v0, v0, 2, s2
+; GFX10-NEXT:    scratch_load_sshort v0, v0, off offset:2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    flat_store_dword v[1:2], v0
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: test_scratch_load_i16_sext_svs:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 2
-; GFX11-NEXT:    scratch_load_i16 v0, v0, off
+; GFX11-NEXT:    v_lshl_add_u32 v0, v0, 2, s0
+; GFX11-NEXT:    scratch_load_i16 v0, v0, off offset:2
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    flat_store_b32 v[1:2], v0
 ; GFX11-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 07b3df2a8520aae..fe984ffa653a5ef 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -576,11 +576,10 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
 ; GFX9-NEXT:    v_add_u32_e32 v1, 4, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 15
-; GFX9-NEXT:    v_sub_u32_e32 v0, 4, v0
 ; GFX9-NEXT:    scratch_store_dword v1, v2, off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u32_e32 v0, 0x7c, v0
-; GFX9-NEXT:    scratch_load_dword v0, v0, off glc
+; GFX9-NEXT:    v_sub_u32_e32 v0, 4, v0
+; GFX9-NEXT:    scratch_load_dword v0, v0, off offset:124 glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -592,24 +591,22 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 15
-; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 4, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, 4, v0
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0x7c, v1
-; GFX10-NEXT:    scratch_store_dword v0, v2, off
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, 4, v0
+; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 4, v0
+; GFX10-NEXT:    scratch_store_dword v1, v2, off
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    scratch_load_dword v0, v1, off glc dlc
+; GFX10-NEXT:    scratch_load_dword v0, v0, off offset:124 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_endpgm
 ;
 ; GFX11-LABEL: store_load_vindex_kernel:
 ; GFX11:       ; %bb.0: ; %bb
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 4, v0
-; GFX11-NEXT:    v_dual_mov_b32 v2, 15 :: v_dual_add_nc_u32 v1, 0x7c, v1
-; GFX11-NEXT:    scratch_store_b32 v0, v2, off offset:4 dlc
+; GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 4, v0
+; GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:4 dlc
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    scratch_load_b32 v0, v1, off glc dlc
+; GFX11-NEXT:    scratch_load_b32 v0, v2, off offset:124 glc dlc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_endpgm
 ;
@@ -628,8 +625,7 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
 ; GFX9-PAL-NEXT:    scratch_store_dword v1, v2, off
 ; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT:    v_add_u32_e32 v0, 0x7c, v0
-; GFX9-PAL-NEXT:    scratch_lo...
[truncated]

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

arsenm · 2023-10-30T08:51:16Z

For scratch load/store, our hardware only accept non-negative value in SGPR/VGPR.

Should refine this to specify this is for MUBUF, scratch instructions do have signed offsets (we also still need to implement stack support using negative offsetting)

ruiling · 2023-10-31T00:45:32Z

For scratch load/store, our hardware only accept non-negative value in SGPR/VGPR.

Should refine this to specify this is for MUBUF, scratch instructions do have signed offsets (we also still need to implement stack support using negative offsetting)

I don't quite get your point here, the change is for scratch_load/scratch_store. The commit message is saying the address offset in SGPR/VGPR should be non-negative per hardware requirement. If I understand correctly, only the immediate offset in scratch_load/store can be negative, right?

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

jayfoad · 2023-10-31T09:25:24Z

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

@@ -1146,10 +1146,23 @@ bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
  return CurDAG->SignBitIsZero(Base);
 }

-bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
+bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr, SDValue Base,


I don't think you can implement this in isFlatScratchBaseLegal. If the address is saddr+ioffset or vaddr+ioffset then this function is only called on the saddr or vaddr part, so this is not the right place to check for nuw addition.

I think the whole address will be passed to the Addr instead of base in the case saddr+ioffset and vaddr+ioffset? see AMDGPUDAGToDAGISel::SelectFlatOffsetImpl() and AMDGPUDAGToDAGISel::SelectScratchSAddr(). I am trying to add some comment locally explaining the function:

// This is used to check whether the address of scratch_load/store in the // form of `base + immediate offset` is legal with respect to the hardware's // requirement that the SGPR/VGPR address offset in the flat scratch instruction // should be unsigned.

BTW, the function was only passed the base part, I have added the whole address as well as the base.

OK, I see. I think this patch is probably OK as-is but the code that it touches could really benefit from some refactoring. For example isFlatScratchBaseLegal no longer just checks the "base" part of the address, and it has to look for ADD nodes that have already been discovered by its callers. In the case of SGPR+VGPR+offset addressing, we could check that both ADDs have the nuw flag set.

Maybe selectScratchOffset, selectScratchSAddr and selectScratchSVAddr could be combined into one function with flags saying whether it should match saddr or vaddr or both.

I would argue that the job of the isFlatScratchBaseLegal() does not change too much. Its job is still to prove that base part of the address can be put in SGPR/VGPR, but with additional context, we can now do things better. So far, the extra checking happened inside it is trivial. Maybe there should be a better name for it:)

I feel we might need more code to check for both ADDs in case of SGPR+VGPR+Imm, because passing in two bases does not mean that the SGPR and VGPR are in two different ADDs. So I just leave a TODO in case someone came up with a simple code implementation later.

I am not sure what the ideal form of refactor would be for the three select* functions. I would really prefer someone else could help on that later as a separate NFC change.

I have added the check for two ADDs, and refine the code to be more reasonable. Please help take a look. Thanks!

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

ruiling · 2023-11-03T06:56:34Z

Hi @arsenm @jayfoad do you still have concern over the PR?

github-actions · 2023-11-06T12:23:11Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

ruiling · 2023-11-17T01:07:34Z

ping

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

ruiling · 2023-11-22T11:26:46Z

ping

nhaehnle

I don't know. The whole CheckBothOperands thing seems fragile to me and as if something wasn't quite right yet. See also the inline comments.

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

nhaehnle · 2023-11-22T18:39:21Z

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

+  // If the immediate offset is negative, the base address cannot also be
+  // negative. Although in theory we could get positive value by adding two
+  // negative values (like in the case: 0x80000010 + 0x80000020 = 0x30), but
+  // isLegalFLATOffset() will help filter out such values later.


This reliance on interactions between multiple methods seems fragile :(

But it maybe not as fragile as you think. Do you see a situation this might be broken? Or do you think adding a check here would be helpful? like checking the Imm > -0x40000000 which should be enough to be helpful, then base address should be non-negative. Otherwise, the result address would be either negative or too large that would fall out of the region a thread can access.

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll

ruiling · 2023-11-23T02:03:37Z

I don't know. The whole CheckBothOperands thing seems fragile to me and as if something wasn't quite right yet. See also the inline comments.

CheckBothOperands is for the case when we put both operands into SGPR and VGPR separately, so we need to check against both operands. If the flag is false, we only need to check against the first operand which will be put in SGPR/VGPR. Feel free to propose a better way for this:)

ruiling · 2023-11-27T09:43:15Z

I don't know. The whole CheckBothOperands thing seems fragile to me and as if something wasn't quite right yet. See also the inline comments.

CheckBothOperands is for the case when we put both operands into SGPR and VGPR separately, so we need to check against both operands. If the flag is false, we only need to check against the first operand which will be put in SGPR/VGPR. Feel free to propose a better way for this:)

The CheckBothOperands has been removed in latest commit.

arsenm

Really wish we could get alive or something to prove this works

arsenm · 2023-11-29T01:37:14Z

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

  if (FlatVariant != SIInstrFlags::FlatScratch)
    return true;


Pre-existing issue but I'd expect this case to not be called in the first place?

Yes, agree. I did not think carefully about this. I think the caller can write like if (FlatVariant != FlatScratch) || isFlatScratchBaseLegal().

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

arsenm · 2023-11-29T01:38:35Z

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

-  // When value in 32-bit Base can be negative calculate scratch offset using
-  // 32-bit add instruction, otherwise use Base(unsigned) + offset.
-  return KB->signBitIsZero(Base);
+  auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);


I think auto hurts here

Agree. But it is also weird to have a type name take almost one extra line:( I have changed it to getDefIgnoringCopies() which matches the need here exactly.

For flat scratch load/store, our hardware only accept non-negative value in SGPR/VGPR. Besides the case that we can prove from known bits, we can also prove that the value in `base` will be non-negative: 1.) When the address calculatioon has NonUnsignedWrap property (either an OR instruction or ADD with NoUnsignedWrap flag). 2.) When the immediate offset is already negative and withn a specific range.

ruiling · 2023-11-29T04:46:28Z

The failure " | Flang :: Driver/ctofortran.f90" is unrelated. I will merge the change.

MaskRay · 2023-11-29T05:46:30Z

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

-    Register Base, uint64_t FlatVariant) const {
-  if (FlatVariant != SIInstrFlags::FlatScratch)
+// Return whether the operation has NoUnsignedWrap property.
+bool isNoUnsignedWrap(MachineInstr *Addr) {


I added static to make this internal linkage and changed a };to } in commit 9535e01

…8193) #70634 has disabled use of potentially negative scratch offsets, but we can use it on GFX12. --------- Co-authored-by: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

…vm#78193) llvm#70634 has disabled use of potentially negative scratch offsets, but we can use it on GFX12. --------- Co-authored-by: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

ruiling requested review from jayfoad, arsenm and nhaehnle October 30, 2023 08:06

llvmbot added the backend:AMDGPU label Oct 30, 2023

arsenm requested changes Oct 30, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

ruiling commented Oct 31, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

jayfoad reviewed Oct 31, 2023

View reviewed changes

llvmbot added the llvm:globalisel label Oct 31, 2023

ruiling commented Oct 31, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

ruiling force-pushed the fold-more-scratch-imm branch from d676da0 to b6c8367 Compare November 1, 2023 03:26

jayfoad reviewed Nov 1, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

ruiling mentioned this pull request Nov 7, 2023

AMDGPU: Return legal addressmode correctly for flat scratch #71494

Merged

jayfoad reviewed Nov 7, 2023

View reviewed changes

arsenm reviewed Nov 8, 2023

View reviewed changes

arsenm reviewed Nov 9, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

arsenm reviewed Nov 17, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Show resolved Hide resolved

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

nhaehnle reviewed Nov 22, 2023

View reviewed changes

arsenm approved these changes Nov 29, 2023

View reviewed changes

ruiling force-pushed the fold-more-scratch-imm branch from 5f67952 to 1e8d9dd Compare November 29, 2023 04:34

ruiling merged commit c1511a6 into llvm:main Nov 29, 2023

MaskRay reviewed Nov 29, 2023

View reviewed changes

jayfoad mentioned this pull request Jan 15, 2024

[AMDGPU] Allow potentially negative flat scratch offsets on GFX12 #78193

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Folding imm offset in more cases for scratch access #70634

[AMDGPU] Folding imm offset in more cases for scratch access #70634

ruiling commented Oct 30, 2023

llvmbot commented Oct 30, 2023 •

edited

Loading

arsenm commented Oct 30, 2023

ruiling commented Oct 31, 2023

jayfoad Oct 31, 2023

ruiling Oct 31, 2023

ruiling Oct 31, 2023

jayfoad Nov 1, 2023

ruiling Nov 1, 2023

ruiling Nov 6, 2023

ruiling commented Nov 3, 2023

github-actions bot commented Nov 6, 2023 •

edited

Loading

ruiling commented Nov 17, 2023

ruiling commented Nov 22, 2023

nhaehnle left a comment

nhaehnle Nov 22, 2023

ruiling Nov 23, 2023

ruiling commented Nov 23, 2023

ruiling commented Nov 27, 2023

arsenm left a comment

arsenm Nov 29, 2023

ruiling Nov 29, 2023

arsenm Nov 29, 2023

ruiling Nov 29, 2023

ruiling commented Nov 29, 2023

MaskRay Nov 29, 2023

[AMDGPU] Folding imm offset in more cases for scratch access #70634

[AMDGPU] Folding imm offset in more cases for scratch access #70634

Conversation

ruiling commented Oct 30, 2023

llvmbot commented Oct 30, 2023 • edited Loading

arsenm commented Oct 30, 2023

ruiling commented Oct 31, 2023

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ruiling commented Nov 3, 2023

github-actions bot commented Nov 6, 2023 • edited Loading

ruiling commented Nov 17, 2023

ruiling commented Nov 22, 2023

nhaehnle left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ruiling commented Nov 23, 2023

ruiling commented Nov 27, 2023

arsenm left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ruiling commented Nov 29, 2023

Choose a reason for hiding this comment

llvmbot commented Oct 30, 2023 •

edited

Loading

github-actions bot commented Nov 6, 2023 •

edited

Loading