Skip to content

Commit

Permalink
[AMDGPU][SplitModule] Allow non-kernels to be treated as roots (#95902)
Browse files Browse the repository at this point in the history
I initially assumed only kernels could be roots, but that is wrong. A
function with no callers also needs to be a root to ensure it is
correctly handled.
They're very rare because we usually internalize everything, and
internal functions with no callers would be deleted.

When they are present, we need to also consider their dependencies and
act accordingly. Previously, we could put a function "by default" in P0,
but it could call another function with internal linkage defined in
another module which was of course incorrect.

Fixes SWDEV-467695
  • Loading branch information
Pierre-vh authored Jun 24, 2024
1 parent 9e8ccf6 commit 1c025fb
Show file tree
Hide file tree
Showing 7 changed files with 294 additions and 112 deletions.
246 changes: 138 additions & 108 deletions llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=0
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=0
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s

Expand Down
36 changes: 36 additions & 0 deletions llvm/test/tools/llvm-split/AMDGPU/debug-non-kernel-root.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -debug 2>&1 | FileCheck %s --implicit-check-not="[root]"
; REQUIRES: asserts

; func_3 is never directly called, it needs to be considered
; as a root to handle this module correctly.

; CHECK: [root] kernel_1
; CHECK-NEXT: [dependency] func_1
; CHECK-NEXT: [dependency] func_2
; CHECK-NEXT: [root] func_3
; CHECK-NEXT: [dependency] func_2

define amdgpu_kernel void @kernel_1() {
entry:
call void @func_1()
ret void
}

define linkonce_odr hidden void @func_1() {
entry:
%call = call i32 @func_2()
ret void
}

define linkonce_odr hidden i32 @func_2() #0 {
entry:
ret i32 0
}

define void @func_3() {
entry:
%call = call i32 @func_2()
ret void
}

attributes #0 = { noinline optnone }
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=1.2 -amdgpu-module-splitting-large-kernel-merge-overlap=0.5
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=1.2 -amdgpu-module-splitting-large-function-merge-overlap=0.5
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=0
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 %s
; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 %s
; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 %s
Expand Down
44 changes: 44 additions & 0 deletions llvm/test/tools/llvm-split/AMDGPU/non-kernels-dependencies.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=DEFINE %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=DEFINE %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=DEFINE %s

; 3 functions with each their own dependencies should go into 3
; distinct partitions.

; CHECK0: define void @C
; CHECK0: define internal void @HelperC

; CHECK1: define void @B
; CHECK1: define internal void @HelperB

; CHECK2: define void @A
; CHECK2: define internal void @HelperA


define void @A() {
call void @HelperA()
ret void
}

define internal void @HelperA() {
ret void
}

define void @B() {
call void @HelperB()
ret void
}

define internal void @HelperB() {
ret void
}

define void @C() {
call void @HelperC()
ret void
}

define internal void @HelperC() {
ret void
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=DEFINE %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=DEFINE %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=DEFINE %s

; We have 4 function:
; - Each function has an internal helper
; - @A and @B's helpers does an indirect call.
;
; For non-kernels, indirect calls shouldn't matter, so
; @CallCandidate doesn't have to be in A/B's partition, unlike
; in the corresponding tests for kernels where it has to.

; CHECK0: define hidden void @HelperA
; CHECK0: define hidden void @HelperB
; CHECK0: define internal void @HelperC
; CHECK0: define internal void @HelperD
; CHECK0: define void @A
; CHECK0: define void @B

; CHECK1: define internal void @HelperD
; CHECK1: define void @D

; CHECK2: define hidden void @CallCandidate
; CHECK2: define internal void @HelperC
; CHECK2: define void @C

@addrthief = global [3 x ptr] [ptr @HelperA, ptr @HelperB, ptr @CallCandidate]

define internal void @HelperA(ptr %call) {
call void %call()
ret void
}

define internal void @HelperB(ptr %call) {
call void @HelperC()
call void %call()
call void @HelperD()
ret void
}

define internal void @CallCandidate() {
ret void
}

define internal void @HelperC() {
ret void
}

define internal void @HelperD() {
ret void
}

define void @A(ptr %call) {
call void @HelperA(ptr %call)
ret void
}

define void @B(ptr %call) {
call void @HelperB(ptr %call)
ret void
}

define void @C() {
call void @HelperC()
ret void
}

define void @D() {
call void @HelperD()
ret void
}

0 comments on commit 1c025fb

Please sign in to comment.