Skip to content

Commit

Permalink
[AMDGPU][ISel] Add more trunc store actions regarding bf16
Browse files Browse the repository at this point in the history
  • Loading branch information
shiltian committed Apr 29, 2024
1 parent 11f4f45 commit ccc29eb
Show file tree
Hide file tree
Showing 3 changed files with 672 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
setTruncStoreAction(MVT::v3f32, MVT::v3bf16, Expand);
setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Expand);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
Expand All @@ -330,6 +331,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);

setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Expand);
setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);

setTruncStoreAction(MVT::v3i32, MVT::v3i8, Expand);
Expand All @@ -339,17 +341,21 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v3i64, MVT::v3i8, Expand);
setTruncStoreAction(MVT::v3i64, MVT::v3i1, Expand);
setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand);
setTruncStoreAction(MVT::v3f64, MVT::v3bf16, Expand);
setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand);

setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);

setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);

setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand);
setTruncStoreAction(MVT::v16f64, MVT::v16bf16, Expand);
setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand);
setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,40 @@ entry:
ret void
}

define void @v3(<3 x float> %num, ptr addrspace(1) %p) {
; CHECK-LABEL: v3:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v5, v4
; CHECK-NEXT: v_mov_b32_e32 v4, v3
; CHECK-NEXT: v_bfe_u32 v3, v0, 16, 1
; CHECK-NEXT: s_movk_i32 s4, 0x7fff
; CHECK-NEXT: v_add3_u32 v3, v3, v0, s4
; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0
; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
; CHECK-NEXT: v_bfe_u32 v3, v1, 16, 1
; CHECK-NEXT: v_add3_u32 v3, v3, v1, s4
; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1
; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
; CHECK-NEXT: s_mov_b32 s5, 0x7060302
; CHECK-NEXT: v_perm_b32 v0, v1, v0, s5
; CHECK-NEXT: v_bfe_u32 v1, v2, 16, 1
; CHECK-NEXT: v_add3_u32 v1, v1, v2, s4
; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v2
; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; CHECK-NEXT: global_store_short_d16_hi v[4:5], v1, off offset:4
; CHECK-NEXT: global_store_dword v[4:5], v0, off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%conv = fptrunc <3 x float> %num to <3 x bfloat>
store <3 x bfloat> %conv, ptr addrspace(1) %p, align 8
ret void
}

define void @v4(<4 x float> %num, ptr addrspace(1) %p) {
; CHECK-LABEL: v4:
; CHECK: ; %bb.0: ; %entry
Expand Down
Loading

0 comments on commit ccc29eb

Please sign in to comment.