Skip to content

Commit

Permalink
[AMDGPU] Disable inline constants for pseudo scalar transcendentals (l…
Browse files Browse the repository at this point in the history
…lvm#104395)

Prevent operand folding from inlining constants into pseudo scalar
transcendental f16 instructions.
However still allow literal constants.

(cherry picked from commit fc6300a)

Change-Id: I5cd412741939cc812150dbb24bd2735a64573b70
  • Loading branch information
perlfu authored and shiltian committed Sep 4, 2024
1 parent d0eeb21 commit 27734be
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }

/// \returns true if inline constants are not supported for F16 pseudo
/// scalar transcendentals.
bool hasNoF16PseudoScalarTransInlineConstants() const {
return getGeneration() == GFX12;
}

/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5804,6 +5804,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
} else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
isF16PseudoScalarTrans(MI.getOpcode()) &&
isInlineConstant(*MO, OpInfo)) {
return false;
}

if (MO->isReg()) {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
}

static bool isF16PseudoScalarTrans(unsigned Opcode) {
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
Opcode == AMDGPU::V_S_LOG_F16_e64 ||
Opcode == AMDGPU::V_S_RCP_F16_e64 ||
Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
Opcode == AMDGPU::V_S_SQRT_F16_e64;
}

static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
Expand Down
120 changes: 120 additions & 0 deletions llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s

# Do not use inline constants for f16 pseudo scalar transcendentals.
# But allow literal constants.

---
name: exp_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: exp_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: exp_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: exp_f16_literal
; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: log_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: log_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: log_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: log_f16_literal
; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rcp_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rcp_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rcp_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rcp_f16_literal
; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rsq_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rsq_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: rsq_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: rsq_f16_literal
; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: sqrt_f16_imm
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: sqrt_f16_imm
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 15360
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

---
name: sqrt_f16_literal
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: sqrt_f16_literal
; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
%0:sgpr_32 = S_MOV_B32 16960
%1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
...

0 comments on commit 27734be

Please sign in to comment.