Skip to content

Commit 1ddf180

Browse files
authored
[flang] introduce fir.copy to avoid load store of aggregates (#130289)
Introduce a FIR operation to do memcopy/memmove of compile time constant size types. This is to avoid requiring derived type copies to done with load/store which is badly supported in LLVM when the aggregate type is "big" (no threshold can easily be defined here, better to always avoid them for fir.type). This was the root cause of the regressions caused by #114002 which introduced a load/store of fir.type<> which caused hand/asserts to fire in LLVM on several benchmarks. See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type
1 parent cb7298f commit 1ddf180

File tree

7 files changed

+187
-5
lines changed

7 files changed

+187
-5
lines changed

flang/include/flang/Optimizer/Dialect/FIROps.td

+44
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def IsBoxAddressOrValueTypePred
6868
def fir_BoxAddressOrValueType : Type<IsBoxAddressOrValueTypePred,
6969
"fir.box or fir.class type or reference">;
7070

71+
def RefOfConstantSizeAggregateTypePred
72+
: CPred<"::fir::isRefOfConstantSizeAggregateType($_self)">;
73+
def AnyRefOfConstantSizeAggregateType : TypeConstraint<
74+
RefOfConstantSizeAggregateTypePred,
75+
"a reference type to a constant size fir.array, fir.char, or fir.type">;
76+
7177
//===----------------------------------------------------------------------===//
7278
// Memory SSA operations
7379
//===----------------------------------------------------------------------===//
@@ -342,6 +348,44 @@ def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
342348
}];
343349
}
344350

351+
def fir_CopyOp : fir_Op<"copy", []> {
352+
let summary = "copy constant size memory";
353+
354+
let description = [{
355+
Copy the memory from a source with compile time constant size to
356+
a destination of the same type.
357+
358+
This is meant to be used for aggregate types where load and store
359+
are not appropriate to make a copy because LLVM is not meant to
360+
handle load and store of "big" aggregates.
361+
362+
Its "no_overlap" attribute allows indicating that the source and destination
363+
are known to not overlap at compile time.
364+
365+
```
366+
!t =!fir.type<t{x:!fir.array<1000xi32>}>
367+
fir.copy %x to %y : !fir.ref<!t>, !fir.ref<!t>
368+
```
369+
TODO: add FirAliasTagOpInterface to carry TBAA.
370+
}];
371+
372+
let arguments = (ins Arg<AnyRefOfConstantSizeAggregateType, "", [MemRead]>:$source,
373+
Arg<AnyRefOfConstantSizeAggregateType, "", [MemWrite]>:$destination,
374+
OptionalAttr<UnitAttr>:$no_overlap);
375+
376+
let builders = [OpBuilder<(ins "mlir::Value":$source,
377+
"mlir::Value":$destination,
378+
CArg<"bool", "false">:$no_overlap)>];
379+
380+
let assemblyFormat = [{
381+
$source `to` $destination (`no_overlap` $no_overlap^)?
382+
attr-dict `:` type(operands)
383+
}];
384+
385+
let hasVerifier = 1;
386+
}
387+
388+
345389
def fir_SaveResultOp : fir_Op<"save_result", [AttrSizedOperandSegments]> {
346390
let summary = [{
347391
save an array, box, or record function result SSA-value to a memory location

flang/include/flang/Optimizer/Dialect/FIRType.h

+7
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,13 @@ inline bool isBoxProcAddressType(mlir::Type t) {
498498
return t && mlir::isa<fir::BoxProcType>(t);
499499
}
500500

501+
inline bool isRefOfConstantSizeAggregateType(mlir::Type t) {
502+
t = fir::dyn_cast_ptrEleTy(t);
503+
return t &&
504+
mlir::isa<fir::CharacterType, fir::RecordType, fir::SequenceType>(t) &&
505+
!hasDynamicSize(t);
506+
}
507+
501508
/// Return a string representation of `ty`.
502509
///
503510
/// fir.array<10x10xf32> -> prefix_10x10xf32

flang/lib/Optimizer/CodeGen/CodeGen.cpp

+35-5
Original file line numberDiff line numberDiff line change
@@ -3545,6 +3545,36 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
35453545
}
35463546
};
35473547

3548+
/// `fir.copy` --> `llvm.memcpy` or `llvm.memmove`
3549+
struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
3550+
using FIROpConversion::FIROpConversion;
3551+
3552+
llvm::LogicalResult
3553+
matchAndRewrite(fir::CopyOp copy, OpAdaptor adaptor,
3554+
mlir::ConversionPatternRewriter &rewriter) const override {
3555+
mlir::Location loc = copy.getLoc();
3556+
mlir::Value llvmSource = adaptor.getSource();
3557+
mlir::Value llvmDestination = adaptor.getDestination();
3558+
mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
3559+
mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
3560+
mlir::Value copySize =
3561+
genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
3562+
3563+
mlir::LLVM::AliasAnalysisOpInterface newOp;
3564+
if (copy.getNoOverlap())
3565+
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3566+
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
3567+
else
3568+
newOp = rewriter.create<mlir::LLVM::MemmoveOp>(
3569+
loc, llvmDestination, llvmSource, copySize, /*isVolatile=*/false);
3570+
3571+
// TODO: propagate TBAA once FirAliasTagOpInterface added to CopyOp.
3572+
attachTBAATag(newOp, copyTy, copyTy, nullptr);
3573+
rewriter.eraseOp(copy);
3574+
return mlir::success();
3575+
}
3576+
};
3577+
35483578
namespace {
35493579

35503580
/// Convert `fir.unboxchar` into two `llvm.extractvalue` instructions. One for
@@ -4148,11 +4178,11 @@ void fir::populateFIRToLLVMConversionPatterns(
41484178
BoxOffsetOpConversion, BoxProcHostOpConversion, BoxRankOpConversion,
41494179
BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion,
41504180
CmpcOpConversion, ConvertOpConversion, CoordinateOpConversion,
4151-
DTEntryOpConversion, DeclareOpConversion, DivcOpConversion,
4152-
EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion,
4153-
ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion,
4154-
FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion,
4155-
InsertOnRangeOpConversion, IsPresentOpConversion,
4181+
CopyOpConversion, DTEntryOpConversion, DeclareOpConversion,
4182+
DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion,
4183+
EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion,
4184+
FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion,
4185+
GlobalOpConversion, InsertOnRangeOpConversion, IsPresentOpConversion,
41564186
LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion,
41574187
NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion,
41584188
SelectOpConversion, SelectRankOpConversion, SelectTypeOpConversion,

flang/lib/Optimizer/Dialect/FIROps.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -3940,6 +3940,26 @@ void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
39403940
build(builder, result, value, memref, {});
39413941
}
39423942

3943+
//===----------------------------------------------------------------------===//
3944+
// CopyOp
3945+
//===----------------------------------------------------------------------===//
3946+
3947+
void fir::CopyOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
3948+
mlir::Value source, mlir::Value destination,
3949+
bool noOverlap) {
3950+
mlir::UnitAttr noOverlapAttr =
3951+
noOverlap ? builder.getUnitAttr() : mlir::UnitAttr{};
3952+
build(builder, result, source, destination, noOverlapAttr);
3953+
}
3954+
3955+
llvm::LogicalResult fir::CopyOp::verify() {
3956+
mlir::Type sourceType = fir::unwrapRefType(getSource().getType());
3957+
mlir::Type destinationType = fir::unwrapRefType(getDestination().getType());
3958+
if (sourceType != destinationType)
3959+
return emitOpError("source and destination must have the same value type");
3960+
return mlir::success();
3961+
}
3962+
39433963
//===----------------------------------------------------------------------===//
39443964
// StringLitOp
39453965
//===----------------------------------------------------------------------===//

flang/test/Fir/copy-codegen.fir

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Test fir.copy codegen.
2+
// RUN: fir-opt --fir-to-llvm-ir %s -o - | FileCheck %s
3+
4+
!t=!fir.type<sometype{i:!fir.array<9xi32>}>
5+
6+
module attributes {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
7+
8+
func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
9+
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
10+
return
11+
}
12+
// CHECK-LABEL: llvm.func @test_copy_1(
13+
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
14+
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
15+
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
16+
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
17+
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
18+
// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
19+
// CHECK: llvm.return
20+
// CHECK: }
21+
22+
func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
23+
fir.copy %arg0 to %arg1 : !fir.ref<!t>, !fir.ref<!t>
24+
return
25+
}
26+
// CHECK-LABEL: llvm.func @test_copy_2(
27+
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
28+
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
29+
// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
30+
// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
31+
// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
32+
// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
33+
// CHECK: llvm.return
34+
// CHECK: }
35+
}

flang/test/Fir/fir-ops.fir

+9
Original file line numberDiff line numberDiff line change
@@ -933,3 +933,12 @@ func.func @test_call_arg_attrs_indirect(%arg0: i16, %arg1: (i16)-> i16) -> i16 {
933933
%0 = fir.call %arg1(%arg0) : (i16 {llvm.noundef, llvm.signext}) -> (i16 {llvm.signext})
934934
return %0 : i16
935935
}
936+
937+
// CHECK-LABEL: @test_copy(
938+
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.type<sometype{i:i32}>>,
939+
// CHECK-SAME: %[[VAL_1:.*]]: !fir.ptr<!fir.type<sometype{i:i32}>>
940+
func.func @test_copy(%arg0: !fir.ref<!fir.type<sometype{i:i32}>>, %arg1: !fir.ptr<!fir.type<sometype{i:i32}>>) {
941+
// CHECK: fir.copy %[[VAL_0]] to %[[VAL_1]] no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
942+
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!fir.type<sometype{i:i32}>>, !fir.ptr<!fir.type<sometype{i:i32}>>
943+
return
944+
}

flang/test/Fir/invalid.fir

+37
Original file line numberDiff line numberDiff line change
@@ -1018,3 +1018,40 @@ func.func @bad_is_assumed_size(%arg0: !fir.ref<!fir.array<*:none>>) {
10181018
%1 = fir.is_assumed_size %arg0 : (!fir.ref<!fir.array<*:none>>) -> i1
10191019
return
10201020
}
1021+
1022+
// -----
1023+
1024+
!t=!fir.type<sometype{i:i32}>
1025+
!t2=!fir.type<sometype2{j:i32}>
1026+
func.func @bad_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t2>) {
1027+
// expected-error@+1{{'fir.copy' op source and destination must have the same value type}}
1028+
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t2>
1029+
return
1030+
}
1031+
1032+
// -----
1033+
1034+
!t=!fir.type<sometype{i:i32}>
1035+
func.func @bad_copy_2(%arg0: !fir.ref<!t>, %arg1: !t) {
1036+
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.type<sometype{i:i32}>'}}
1037+
fir.copy %arg1 to %arg0 no_overlap : !t, !fir.ref<!t>
1038+
return
1039+
}
1040+
1041+
// -----
1042+
1043+
!t=!fir.array<?xi32>
1044+
func.func @bad_copy_3(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
1045+
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<!fir.array<?xi32>>'}}
1046+
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
1047+
return
1048+
}
1049+
1050+
// -----
1051+
1052+
!t=f32
1053+
func.func @bad_copy_4(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
1054+
// expected-error@+1{{'fir.copy' op operand #0 must be a reference type to a constant size fir.array, fir.char, or fir.type, but got '!fir.ref<f32>'}}
1055+
fir.copy %arg0 to %arg1 no_overlap : !fir.ref<!t>, !fir.ref<!t>
1056+
return
1057+
}

0 commit comments

Comments
 (0)