-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang] Rely on global initialization for simpler derived types #114002
Conversation
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-flang-fir-hlfir Author: None (NimishMishra) ChangesCurrently, all derived types are initialized through Full diff: https://github.com/llvm/llvm-project/pull/114002.diff 6 Files Affected:
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index de394a39e112ed..ac285c846fc7d1 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -67,7 +67,7 @@ bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym);
/// Call default initialization runtime routine to initialize \p var.
void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym,
+ const Fortran::lower::pft::Variable &var,
Fortran::lower::SymMap &symMap);
/// Create a fir::GlobalOp given a module variable definition. This is intended
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index cc51d5a9bb8daf..c261d3b6c10fb8 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -776,9 +776,10 @@ mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
/// Call default initialization runtime routine to initialize \p var.
void Fortran::lower::defaultInitializeAtRuntime(
Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
+ const Fortran::lower::pft::Variable &var, Fortran::lower::SymMap &symMap) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
+ const Fortran::semantics::Symbol &sym = var.getSymbol();
fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
if (Fortran::semantics::IsOptional(sym)) {
// 15.5.2.12 point 3, absent optional dummies are not initialized.
@@ -793,11 +794,35 @@ void Fortran::lower::defaultInitializeAtRuntime(
})
.end();
} else {
- mlir::Value box = builder.createBox(loc, exv);
- fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+ /// For "simpler" types, relying on "_FortranAInitialize"
+ /// leads to poor runtime performance. Hence optimize
+ /// the same.
+ const Fortran::semantics::DeclTypeSpec *declTy = sym.GetType();
+ mlir::Type symTy = converter.genType(var);
+ if (!var.isAlias() && !hasAllocatableDirectComponent(sym) &&
+ declTy->category() ==
+ Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
+ !mlir::isa<fir::SequenceType>(symTy) &&
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+ std::string globalName = converter.mangleName(sym) + "_globalinit";
+ mlir::Location loc = genLocation(converter, sym);
+ mlir::StringAttr linkage = getLinkageAttribute(builder, var);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym);
+ fir::GlobalOp global =
+ defineGlobal(converter, var, globalName, linkage, dataAttr);
+ auto addrOf = builder.create<fir::AddrOfOp>(loc, global.resultType(),
+ global.getSymbol());
+ fir::LoadOp load = builder.create<fir::LoadOp>(loc, addrOf.getResult());
+ builder.create<fir::StoreOp>(loc, load, fir::getBase(exv));
+ } else {
+ mlir::Value box = builder.createBox(loc, exv);
+ fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+ }
}
}
-
enum class VariableCleanUp { Finalize, Deallocate };
/// Check whether a local variable needs to be finalized according to clause
/// 7.5.6.3 point 3 or if it is an allocatable that must be deallocated. Note
@@ -943,8 +968,7 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
if (needDummyIntentoutFinalization(var))
finalizeAtRuntime(converter, var, symMap);
if (mustBeDefaultInitializedAtRuntime(var))
- Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
- symMap);
+ Fortran::lower::defaultInitializeAtRuntime(converter, var, symMap);
if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
auto *builder = &converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
@@ -1185,8 +1209,7 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
// do not try optimizing this to single default initializations of
// the equivalenced storages. Keep lowering simple.
if (mustBeDefaultInitializedAtRuntime(var))
- Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
- symMap);
+ Fortran::lower::defaultInitializeAtRuntime(converter, var, symMap);
}
//===--------------------------------------------------------------===//
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 709ac402cc702d..ba8b7177953bb5 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -118,7 +118,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
if (!isFirstPrivate &&
Fortran::lower::hasDefaultInitialization(sym->GetUltimate()))
- Fortran::lower::defaultInitializeAtRuntime(converter, *sym, *symTable);
+ Fortran::lower::defaultInitializeAtRuntime(converter, pft::Variable{*sym},
+ *symTable);
}
void DataSharingProcessor::copyFirstPrivateSymbol(
diff --git a/flang/test/Lower/HLFIR/structure-constructor.f90 b/flang/test/Lower/HLFIR/structure-constructor.f90
index 41d08c14f5fa98..68a29015f60177 100644
--- a/flang/test/Lower/HLFIR/structure-constructor.f90
+++ b/flang/test/Lower/HLFIR/structure-constructor.f90
@@ -98,12 +98,9 @@ end subroutine test3
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}> {bindc_name = "res", uniq_name = "_QFtest3Eres"}
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtest3Eres"} : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
-! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1 : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
-! CHECK: %[[VAL_5:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref<!fir.char<1,{{[0-9]*}}>>
-! CHECK: %[[VAL_6:.*]] = arith.constant {{[0-9]*}} : i32
-! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_4]] : (!fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<none>
-! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.char<1,{{[0-9]*}}>>) -> !fir.ref<i8>
-! CHECK: %[[VAL_9:.*]] = fir.call @_FortranAInitialize(%[[VAL_7]], %[[VAL_8]], %[[VAL_6]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK: %[[ADDR:.*]] = fir.address_of(@_QFtest3Eres_globalinit) : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: fir.store %[[LOADED_VAL]] to %[[VAL_3]]#1 : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest3Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "ctor.temp"} : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
! CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_11]]#0 : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
diff --git a/flang/test/Lower/default-initialization.f90 b/flang/test/Lower/default-initialization.f90
index 7a6133452b3a25..f6e37d57f19eb4 100644
--- a/flang/test/Lower/default-initialization.f90
+++ b/flang/test/Lower/default-initialization.f90
@@ -22,9 +22,9 @@ module test_dinit
! CHECK-LABEL: func @_QMtest_dinitPlocal()
subroutine local
! CHECK: %[[x:.*]] = fir.alloca !fir.type<_QMtest_dinitTt{i:i32}>
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFlocalEx_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t) :: x
print *, x%i
end subroutine
@@ -56,9 +56,9 @@ subroutine local_alloc_comp
! CHECK-LABEL: func @_QMtest_dinitPresult() -> !fir.type<_QMtest_dinitTt{i:i32}>
function result()
! CHECK: %[[x:.*]] = fir.alloca !fir.type<_QMtest_dinitTt{i:i32}>
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFresultEresult_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t) :: result
end function
@@ -66,9 +66,9 @@ function result()
! CHECK-LABEL: func @_QMtest_dinitPintent_out(
! CHECK-SAME: %[[x:.*]]: !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
subroutine intent_out(x)
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFintent_outEx_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t), intent(out) :: x
end subroutine
diff --git a/flang/test/Lower/pointer-default-init.f90 b/flang/test/Lower/pointer-default-init.f90
index 0fb42683a3486b..0e97f3bea90024 100644
--- a/flang/test/Lower/pointer-default-init.f90
+++ b/flang/test/Lower/pointer-default-init.f90
@@ -38,7 +38,9 @@ subroutine test_local()
type(t) :: x
end subroutine
! CHECK-LABEL: func.func @_QPtest_local() {
-! CHECK: fir.call @_FortranAInitialize(
+! CHECK: %[[ADDR:.*]] = fir.address_of(@_QFtest_localEx_globalinit) : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: fir.store %[[LOAD]] to {{.*}} : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
subroutine test_saved()
use test, only : t
|
For the following test case, here are the runtimes with/without this patch: Without patch: With patch: About 9 seconds of improvement in runtime.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall approach looks good to me, some comments inlined.
Can you also give some performance improvement number to backup/document this PR? [edit: just saw your comment above, thanks]
7e1f567
to
1b3ab16
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the update, few comments, the direction looks good to me.
@@ -67,7 +67,7 @@ bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym); | |||
|
|||
/// Call default initialization runtime routine to initialize \p var. | |||
void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter, | |||
const Fortran::semantics::Symbol &sym, | |||
const Fortran::lower::pft::Variable &var, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can switch to Symbol here now and have the caller do the var.getSymbol()
.
converter.genType()
accepts Symbol too, and var.isTarget
can be replaced by symbol.getUltimate().attrs().test(Fortran::semantics::Attr::TARGET)
Using Symbol for helpers is better when that is sufficient because that enables using them in places where pft::Variable not accessible (in general, one should not create a "pft::Variable" by simply wrapping a Symbol, some analysis/invariants are needed to rule out it is an alias for instance).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation; yeah we would ideally try to avoid pft::Variable{sym}
.
flang/lib/Lower/ConvertVariable.cpp
Outdated
mlir::StringAttr linkage = builder.createInternalLinkage(); | ||
cuf::DataAttributeAttr dataAttr = | ||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), | ||
sym); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@clementval. I am not sure copying the CUDA data attributes from the local/dummy symbol that must be default initialized to the global makes sense. Are some attributes needed for the global if the dynamic initialization happens on the device, or will the global with the initial image be automatically cloned/mapped to the device if needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need some clarification. In my opinion the read only global relates more to the type than the symbol, and it is weird that it could carry the symbol attribute that may be meaningless/wrong for a global (given the symbol could be a dummy/local).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jeanPerier is right, we do not want the cuda attribute to be set on the init global. The attribute would be set on an instance of the derived type but it is not required on the init global itself. Can you remove it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation. I have removed the cuda attribute.
flang/lib/Lower/ConvertVariable.cpp
Outdated
}); | ||
} else if (!global) { | ||
global = builder.createGlobal(loc, symTy, globalName, linkage, | ||
mlir::Attribute{}, isConstant(sym), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isConstant(sym)
is what should be swapped with true
to get read only memory.
flang/lib/Lower/ConvertVariable.cpp
Outdated
!mlir::isa<fir::SequenceType>(symTy) && | ||
!sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) && | ||
!sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) { | ||
std::string globalName = converter.mangleName(*declTy->AsDerived()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using the derived makes sense to me, but some prefix/suffix would be nice (fir.TypeInfoOp ops already uses that has MLIR symbol name, better avoid conflicts here), as well as using the compiler generated name prefix (doGenerated()).
I would go for fir::NameUniquer::doGenerated(mangledName + kNameSeparator + kDerivedTypeInitSuffix)
where kDerivedTypeInitSuffix
can be defined as init
in Optimizer/Support/InternalNames.h.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. I've pushed a new commit with these changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jeanPerier Gentle ping for review. Are you okay with the latest changes?
1b3ab16
to
d0f986a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks on the right path, thanks.
A few more points/questions about the attributes taken from to symbol (outside from the fact that the global should be accessible on the device if the copy from it is happening on the device, I do not think any attributes from the symbol should be propagated to this compiler generated global that is more related to the type than it is to the symbol).
flang/lib/Lower/ConvertVariable.cpp
Outdated
global = builder.createGlobal( | ||
loc, symTy, globalName, linkage, mlir::Attribute{}, | ||
/*isConst=*/true, | ||
sym.GetUltimate().attrs().test(Fortran::semantics::Attr::TARGET), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are you setting the TARGET attribute here?
I do not think that is relevant for the read only global with the initial value, it is not being used in a Fortran pointer assignment as far as I can tell.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah you are right, we are not using a Fortran pointer assignment here, so TARGET doesn't really make sense.
I have removed it.
flang/lib/Lower/ConvertVariable.cpp
Outdated
mlir::StringAttr linkage = builder.createInternalLinkage(); | ||
cuf::DataAttributeAttr dataAttr = | ||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), | ||
sym); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need some clarification. In my opinion the read only global relates more to the type than the symbol, and it is weird that it could carry the symbol attribute that may be meaningless/wrong for a global (given the symbol could be a dummy/local).
d0f986a
to
9b76593
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
9b76593
to
6cf6403
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for addressing my comments!
Since this commit I am having trouble building cam4_r from spec2017. The compiler hangs building cosp.fppized.f90 (with this commit reverted it takes about 1 second to compile to llvm bytecode. The flags I am using are Do you have a spec license or will you need a reproducer? |
… derived types" (#130278) Reverts llvm/llvm-project#114002 This causes a regression building cam4_r from spec2017
I also saw issues with this patch today (asserts hit in LLVM). |
Introduce a FIR operation to do memcopy/memmove of compile time constant size types. This is to avoid requiring derived type copies to done with load/store which is badly supported in LLVM when the aggregate type is "big" (no threshold can easily be defined here, better to always avoid them for fir.type). This was the root cause of the regressions caused by #114002 which introduced a load/store of fir.type<> which caused hand/asserts to fire in LLVM on several benchmarks. See https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-creating-values-of-aggregate-type
…pes" (#130290) Currently, all derived types are initialized through `_FortranAInitialize`, which is functionally correct, but bears poor runtime performance. This patch falls back on global initialization for "simpler" derived types to speed up the initialization. Note: this relands #114002 with the fix for the LLVM timeout regressions that have been seen. The fix is to use the added fir.copy to avoid aggregate load/store. Co-authored-by: NimishMishra <42909663+NimishMishra@users.noreply.github.com>
…m#114002) Currently, all derived types are initialized through `_FortranAInitialize`, which is functionally correct, but bears poor runtime performance. This patch falls back on global initialization for "simpler" derived types to speed up the initialization.
…pes" (llvm#130278) Reverts llvm#114002 This causes a regression building cam4_r from spec2017
Currently, all derived types are initialized through
_FortranAInitialize
, which is functionally correct, but bears poor runtime performance. This patch falls back on global initialization for "simpler" derived types to speed up the initialization.