forked from kokkos/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CSSPGO] Unblock optimizations with pseudo probe instrumentation.
The IR/MIR pseudo probe intrinsics don't get materialized into real machine instructions and therefore they don't incur runtime cost directly. However, they come with indirect cost by blocking certain optimizations. Some of the blocking are intentional (such as blocking code merge) for better counts quality while the others are accidental. This change unblocks perf-critical optimizations that do not affect counts quality. They include: 1. IR InstCombine, sinking load operation to shorten lifetimes. 2. MIR LiveRangeShrink, similar to kokkos#1 3. MIR TwoAddressInstructionPass, i.e, opeq transform 4. MIR function argument copy elision 5. IR stack protection. (though not perf-critical but nice to have). Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D95982
- Loading branch information
Showing
15 changed files
with
209 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
; RUN: opt -passes=instcombine -S < %s | FileCheck %s | ||
|
||
%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 } | ||
%struct.CompAtomExt = type { i32 } | ||
%struct.CompAtom = type { %class.Vector, float, i16, i8, i8 } | ||
%class.Vector = type { double, double, double } | ||
%class.ComputeNonbondedWorkArrays = type { %class.ResizeArray, %class.ResizeArray.0, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray.2, %class.ResizeArray.2 } | ||
%class.ResizeArray.0 = type { i32 (...)**, %class.ResizeArrayRaw.1* } | ||
%class.ResizeArrayRaw.1 = type <{ double*, i8*, i32, i32, i32, float, i32, [4 x i8] }> | ||
%class.ResizeArray = type { i32 (...)**, %class.ResizeArrayRaw* } | ||
%class.ResizeArrayRaw = type <{ i16*, i8*, i32, i32, i32, float, i32, [4 x i8] }> | ||
%class.ResizeArray.2 = type { i32 (...)**, %class.ResizeArrayRaw.3* } | ||
%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }> | ||
%class.Pairlists = type { i16*, i32, i32 } | ||
|
||
;; Check the minPart4 and minPart assignments are merged. | ||
; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 | ||
; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 | ||
|
||
define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 { | ||
entry: | ||
%savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11 | ||
%0 = load i32, i32* %savePairlists3, align 8 | ||
%usePairlists4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 12 | ||
%1 = load i32, i32* %usePairlists4, align 4 | ||
%tobool54.not = icmp eq i32 %0, 0 | ||
br i1 %tobool54.not, label %lor.lhs.false55, label %if.end109 | ||
|
||
lor.lhs.false55: ; preds = %entry | ||
%tobool56.not = icmp eq i32 %1, 0 | ||
br i1 %tobool56.not, label %if.end109, label %if.end109.thread | ||
|
||
if.end109.thread: ; preds = %lor.lhs.false55 | ||
%minPart4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 | ||
%2 = load i32, i32* %minPart4, align 4 | ||
call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 2, i32 0, i64 -1) | ||
br label %if.then138 | ||
|
||
if.end109: ; preds = %lor.lhs.false55, %entry | ||
%minPart = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 | ||
%3 = load i32, i32* %minPart, align 4 | ||
call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 3, i32 0, i64 -1) | ||
%tobool116.not = icmp eq i32 %1, 0 | ||
br i1 %tobool116.not, label %if.then117, label %if.then138 | ||
|
||
if.then117: ; preds = %if.end109 | ||
ret void | ||
|
||
if.then138: ; preds = %if.end109.thread, %if.end109 | ||
%4 = phi i32 [ %2, %if.end109.thread ], [ %3, %if.end109 ] | ||
%tobool139.not = icmp eq i32 %4, 0 | ||
br i1 %tobool139.not, label %if.else147, label %if.then140 | ||
|
||
if.then140: ; preds = %if.then138 | ||
ret void | ||
|
||
if.else147: ; preds = %if.then138 | ||
ret void | ||
} | ||
|
||
declare dso_local void @_ZN9Pairlists8addIndexEv() align 2 | ||
|
||
; Function Attrs: inaccessiblememonly nounwind willreturn | ||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 | ||
|
||
attributes #0 = { inaccessiblememonly nounwind willreturn } |
33 changes: 33 additions & 0 deletions
33
llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
; PR1075 | ||
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -pseudo-probe-for-profiling -O3 | FileCheck %s | ||
|
||
define float @foo(float %x) #0 { | ||
%tmp1 = fmul float %x, 3.000000e+00 | ||
%tmp3 = fmul float %x, 5.000000e+00 | ||
%tmp5 = fmul float %x, 7.000000e+00 | ||
%tmp7 = fmul float %x, 1.100000e+01 | ||
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1) | ||
%tmp10 = fadd float %tmp1, %tmp3 | ||
%tmp12 = fadd float %tmp10, %tmp5 | ||
%tmp14 = fadd float %tmp12, %tmp7 | ||
ret float %tmp14 | ||
; CHECK: mulss | ||
; CHECK: mulss | ||
; CHECK: addss | ||
; CHECK: mulss | ||
; CHECK: addss | ||
; CHECK: mulss | ||
; CHECK: addss | ||
; CHECK: ret | ||
} | ||
|
||
; Function Attrs: inaccessiblememonly nounwind willreturn | ||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { inaccessiblememonly nounwind willreturn } | ||
|
||
!llvm.pseudo_probe_desc = !{!0} | ||
|
||
!0 = !{i64 6699318081062747564, i64 4294967295, !"foo", null} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
; RUN: llc -mtriple=x86_64-- -stop-after=peephole-opt -o - %s | FileCheck %s | ||
|
||
define internal i32 @arc_compare() { | ||
entry: | ||
%0 = load i64, i64* undef, align 8 | ||
br i1 undef, label %return, label %if.end | ||
|
||
if.end: ; preds = %entry | ||
; Chek a register copy has been sinked into the compare instruction. | ||
; CHECK: %[[#REG:]]:gr64 = IMPLICIT_DEF | ||
; CHECK-NOT: %[[#]]:gr64 = MOV64rm %[[#REG]] | ||
; CHECK: PSEUDO_PROBE 5116412291814990879, 3, 0, 0 | ||
; CHECK: CMP64mr %[[#REG]], 1 | ||
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1) | ||
%cmp4 = icmp slt i64 %0, undef | ||
br i1 %cmp4, label %return, label %if.end6 | ||
|
||
if.end6: ; preds = %if.end | ||
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 5, i32 0, i64 -1) | ||
br label %return | ||
|
||
return: ; preds = %if.end6, %if.end, %entry | ||
ret i32 undef | ||
} | ||
|
||
; Function Attrs: inaccessiblememonly nounwind willreturn | ||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 | ||
|
||
attributes #0 = { inaccessiblememonly nounwind willreturn } |
37 changes: 37 additions & 0 deletions
37
llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
; RUN: llc -stop-after=twoaddressinstruction -mtriple=x86_64-- -o - %s | FileCheck %s | ||
|
||
|
||
define dso_local double @twoaddressinstruction() local_unnamed_addr { | ||
for.end: | ||
%0 = load i64, i64* undef, align 8 | ||
br label %for.body14.preheader | ||
|
||
for.body14.preheader: ; preds = %for.end | ||
br i1 undef, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14.preheader.new | ||
|
||
for.body14.preheader.new: ; preds = %for.body14.preheader | ||
%unroll_iter136 = and i64 %0, -4 | ||
br label %for.body14 | ||
|
||
for.cond25.preheader.loopexit.unr-lcssa: ; preds = %for.body14, %for.body14.preheader | ||
%indvars.iv127.unr = phi i64 [ 1, %for.body14.preheader ], [ %indvars.iv.next128.3, %for.body14 ] | ||
ret double undef | ||
|
||
for.body14: ; preds = %for.body14, %for.body14.preheader.new | ||
%indvars.iv127 = phi i64 [ 1, %for.body14.preheader.new ], [ %indvars.iv.next128.3, %for.body14 ] | ||
%niter137 = phi i64 [ %unroll_iter136, %for.body14.preheader.new ], [ %niter137.nsub.3, %for.body14 ] | ||
%indvars.iv.next128.3 = add nuw nsw i64 %indvars.iv127, 4 | ||
; CHECK: PSEUDO_PROBE -6878943695821059507, 9, 0, 0 | ||
call void @llvm.pseudoprobe(i64 -6878943695821059507, i64 9, i32 0, i64 -1) | ||
;; Check an opeq form of instruction is created. | ||
; CHECK: %[[#REG:]]:gr64_nosp = COPY killed %[[#]] | ||
; CHECK: %[[#REG]]:gr64_nosp = nuw ADD64ri8 %[[#REG]], 4, implicit-def dead $eflags | ||
%niter137.nsub.3 = add i64 %niter137, -4 | ||
%niter137.ncmp.3 = icmp eq i64 %niter137.nsub.3, 0 | ||
br i1 %niter137.ncmp.3, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14 | ||
} | ||
|
||
; Function Attrs: inaccessiblememonly nounwind willreturn | ||
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 | ||
|
||
attributes #0 = { inaccessiblememonly nounwind willreturn } |