Skip to content

Commit bb15861

Browse files
committed
[MemCpyOpt] Relax libcall checks
Rather than blocking the whole MemCpyOpt pass if the libcalls are not available, only disable creation of new memset/memcpy intrinsics where only load/stores were used previously. This only affects the store merging and load-store conversion optimization. Other optimizations are derived from existing intrinsics, which are well-defined in the absence of libcalls -- not having the libcalls just means that call simplification won't convert them to intrinsics. This is a weaker variation of D104801, which dropped these checks entirely. Ideally we would not couple emission of intrinsics to libcall availability at all, but as the intrinsics may be legalized to libcalls we need to be a bit careful right now. Differential Revision: https://reviews.llvm.org/D106769
1 parent fb27e8c commit bb15861

File tree

2 files changed

+76
-9
lines changed

2 files changed

+76
-9
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

+13-9
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
673673
LI->getParent() == SI->getParent()) {
674674

675675
auto *T = LI->getType();
676-
if (T->isAggregateType()) {
676+
// Don't introduce calls to memcpy/memmove intrinsics out of thin air if
677+
// the corresponding libcalls are not available.
678+
// TODO: We should really distinguish between libcall availability and
679+
// our ability to introduce intrinsics.
680+
if (T->isAggregateType() && TLI->has(LibFunc_memcpy) &&
681+
TLI->has(LibFunc_memmove)) {
677682
MemoryLocation LoadLoc = MemoryLocation::get(LI);
678683

679684
// We use alias analysis to check if an instruction may store to
@@ -796,6 +801,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
796801
}
797802
}
798803

804+
// The following code creates memset intrinsics out of thin air. Don't do
805+
// this if the corresponding libfunc is not available.
806+
// TODO: We should really distinguish between libcall availability and
807+
// our ability to introduce intrinsics.
808+
if (!TLI->has(LibFunc_memset))
809+
return false;
810+
799811
// There are two cases that are interesting for this code to handle: memcpy
800812
// and memset. Right now we only handle memset.
801813

@@ -1548,9 +1560,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
15481560
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
15491561
/// not to alias.
15501562
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1551-
if (!TLI->has(LibFunc_memmove))
1552-
return false;
1553-
15541563
// See if the pointers alias.
15551564
if (!AA->isNoAlias(MemoryLocation::getForDest(M),
15561565
MemoryLocation::getForSource(M)))
@@ -1754,11 +1763,6 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
17541763
MSSA = MSSA_;
17551764
MemorySSAUpdater MSSAU_(MSSA_);
17561765
MSSAU = MSSA_ ? &MSSAU_ : nullptr;
1757-
// If we don't have at least memset and memcpy, there is little point of doing
1758-
// anything here. These are required by a freestanding implementation, so if
1759-
// even they are disabled, there is no point in trying hard.
1760-
if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
1761-
return false;
17621766

17631767
while (true) {
17641768
if (!iterateOnFunction(F))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -memcpyopt < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS
3+
; RUN: opt -S -memcpyopt -mtriple=amdgcn-- < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS
4+
5+
; REQUIRES: amdgpu-registered-target
6+
7+
define void @dont_create_memset(ptr %p) {
8+
; LIBCALLS-LABEL: @dont_create_memset(
9+
; LIBCALLS-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
10+
; LIBCALLS-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i64 2
11+
; LIBCALLS-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i64 3
12+
; LIBCALLS-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P]], i8 0, i64 16, i1 false)
13+
; LIBCALLS-NEXT: ret void
14+
;
15+
; NO-LIBCALLS-LABEL: @dont_create_memset(
16+
; NO-LIBCALLS-NEXT: store i32 0, ptr [[P:%.*]], align 4
17+
; NO-LIBCALLS-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[P]], i64 1
18+
; NO-LIBCALLS-NEXT: store i32 0, ptr [[P1]], align 4
19+
; NO-LIBCALLS-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i64 2
20+
; NO-LIBCALLS-NEXT: store i32 0, ptr [[P2]], align 4
21+
; NO-LIBCALLS-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i64 3
22+
; NO-LIBCALLS-NEXT: store i32 0, ptr [[P3]], align 4
23+
; NO-LIBCALLS-NEXT: ret void
24+
;
25+
store i32 0, ptr %p
26+
%p1 = getelementptr i32, ptr %p, i64 1
27+
store i32 0, ptr %p1
28+
%p2 = getelementptr i32, ptr %p, i64 2
29+
store i32 0, ptr %p2
30+
%p3 = getelementptr i32, ptr %p, i64 3
31+
store i32 0, ptr %p3
32+
ret void
33+
}
34+
35+
%ty = type { i64 }
36+
37+
define void @dont_create_memcpy(ptr %p1, ptr %p2) {
38+
; LIBCALLS-LABEL: @dont_create_memcpy(
39+
; LIBCALLS-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 [[P1:%.*]], i64 8, i1 false)
40+
; LIBCALLS-NEXT: ret void
41+
;
42+
; NO-LIBCALLS-LABEL: @dont_create_memcpy(
43+
; NO-LIBCALLS-NEXT: [[V:%.*]] = load [[TY:%.*]], ptr [[P1:%.*]], align 4
44+
; NO-LIBCALLS-NEXT: store [[TY]] [[V]], ptr [[P2:%.*]], align 4
45+
; NO-LIBCALLS-NEXT: ret void
46+
;
47+
%v = load %ty, ptr %p1
48+
store %ty %v, ptr %p2
49+
ret void
50+
}
51+
52+
define void @forward_memcpy(ptr noalias %p1, ptr noalias %p2, ptr noalias %p3) {
53+
; CHECK-LABEL: @forward_memcpy(
54+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P2:%.*]], ptr [[P1:%.*]], i64 16, i1 false)
55+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P3:%.*]], ptr [[P1]], i64 16, i1 false)
56+
; CHECK-NEXT: ret void
57+
;
58+
call void @llvm.memcpy.p0.p0.i64(ptr %p2, ptr %p1, i64 16, i1 false)
59+
call void @llvm.memcpy.p0.p0.i64(ptr %p3, ptr %p2, i64 16, i1 false)
60+
ret void
61+
}
62+
63+
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

0 commit comments

Comments
 (0)