Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance gf checkpointing: faster MPIO writing, checkpoint file backup, and code refinement #82

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 119 additions & 150 deletions app/form/SeasQDDiscreteGreenOperator.cpp

Large diffs are not rendered by default.

19 changes: 18 additions & 1 deletion app/form/SeasQDDiscreteGreenOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@

namespace tndm {

class SeasQDDiscreteGreenOperator;

struct GreensFunctionIndices {
PetscInt slip_block_size;
PetscInt num_local_elements;
PetscInt m_bs; // traction block size
PetscInt n_bs; // always 1
PetscInt m;
PetscInt n;
PetscInt mb_offset;
PetscInt nb_offset;
int rank;
MPI_Comm comm;

GreensFunctionIndices(SeasQDDiscreteGreenOperator const& op);
};

class SeasQDDiscreteGreenOperator : public SeasQDOperator {
public:
using base = SeasQDOperator;
Expand Down Expand Up @@ -59,7 +76,6 @@ class SeasQDDiscreteGreenOperator : public SeasQDOperator {
void update_traction(double time, BlockVector const& state);

private:
void compute_discrete_greens_function();
void compute_boundary_traction();
PetscInt create_discrete_greens_function();
void partial_assemble_discrete_greens_function(LocalSimplexMesh<DomainDimension> const& mesh,
Expand All @@ -70,6 +86,7 @@ class SeasQDDiscreteGreenOperator : public SeasQDOperator {
PetscInt n_gf_);
// all logic associated with matix craetion, loading / partial assembly is done here
void get_discrete_greens_function(LocalSimplexMesh<DomainDimension> const& mesh);
void back_up_file(std::string file_to_backup);
void write_discrete_greens_traction();
void load_discrete_greens_traction();
void get_boundary_traction();
Expand Down
17 changes: 14 additions & 3 deletions app/tandem/SEAS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,21 @@ struct operator_specifics<SeasQDDiscreteGreenOperator>

static auto make(LocalSimplexMesh<DomainDimension> const& mesh, Config const& cfg,
seas::ContextBase& ctx) {
auto const& cfgcp = cfg.gf_checkpoint_config;

std::optional<std::string> prefix;
double freq_cputime;
if (!cfgcp) {
prefix = std::nullopt;
freq_cputime = 1e10;
} else {
prefix = cfgcp->prefix;
freq_cputime = cfgcp->frequency_cputime_minutes;
}

auto seasop = std::make_shared<SeasQDDiscreteGreenOperator>(
std::move(ctx.dg()), std::move(ctx.adapter()), std::move(ctx.friction()), mesh,
cfg.gf_checkpoint_prefix, cfg.gf_checkpoint_every_nmins, cfg.matrix_free,
MGConfig(cfg.mg_coarse_level, cfg.mg_strategy));
std::move(ctx.dg()), std::move(ctx.adapter()), std::move(ctx.friction()), mesh, prefix,
freq_cputime, cfg.matrix_free, MGConfig(cfg.mg_coarse_level, cfg.mg_strategy));
ctx.setup_seasop(*seasop);
seasop->warmup();
return seasop;
Expand Down
21 changes: 14 additions & 7 deletions app/tandem/SeasConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ template <typename Derived> void setOutputConfigSchema(TableSchema<Derived>& out
.help("Maximum time difference between samples.");
}

template <typename Derived>
void setGfCheckpointConfigSchema(TableSchema<Derived>& gfCheckpointSchema) {
gfCheckpointSchema.add_value("prefix", up_cast<Derived>(&Derived::prefix))
.default_value("gf_checkpoint")
.help("Path where Green's function operator and RHS will be checkpointed.");
gfCheckpointSchema
.add_value("freq_cputime", up_cast<Derived>(&Derived::frequency_cputime_minutes))
.default_value(30.0)
.help("CPU time (minutes) frequency between Green's function operator checkpoints");
}

template <typename Derived> void setDomainOutputConfigSchema(TableSchema<Derived>& outputSchema) {
setOutputConfigSchema(outputSchema);

Expand Down Expand Up @@ -124,13 +135,6 @@ void setConfigSchema(TableSchema<Config>& schema,
.default_value(false)
.help("Assert that boundary is a linear function of time (i.e. boundary(x, t) = f(x) t).");

schema.add_value("gf_checkpoint_prefix", &Config::gf_checkpoint_prefix)
.help("Path where Green's function operator and RHS will be checkpointed.");
schema.add_value("gf_checkpoint_every_nmins", &Config::gf_checkpoint_every_nmins)
.default_value(30.0)
.help("time interval, in minutes, at which the Green's function operator data is saved to "
"disk.");

schema.add_value("matrix_free", &Config::matrix_free)
.default_value(false)
.help("Use matrix-free operators.");
Expand Down Expand Up @@ -169,6 +173,9 @@ void setConfigSchema(TableSchema<Config>& schema,
auto& domainProbeOutputSchema =
schema.add_table("domain_probe_output", &Config::domain_probe_output);
detail::setProbeOutputConfigSchema(domainProbeOutputSchema);

auto& gfCheckpointSchema = schema.add_table("gf_checkpoint", &Config::gf_checkpoint_config);
detail::setGfCheckpointConfigSchema(gfCheckpointSchema);
}

} // namespace tndm
9 changes: 6 additions & 3 deletions app/tandem/SeasConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ struct ProbeOutputConfig : TabularOutputConfig {
std::vector<Probe<DomainDimension>> probes;
};

struct GfCheckpointConfig {
std::string prefix;
double frequency_cputime_minutes;
};

struct Config {
std::optional<double> resolution;
double final_time;
Expand All @@ -75,15 +80,13 @@ struct Config {
MGStrategy mg_strategy;
unsigned mg_coarse_level;

std::optional<std::string> gf_checkpoint_prefix;
double gf_checkpoint_every_nmins;

std::optional<GenMeshConfig<DomainDimension>> generate_mesh;
std::optional<OutputConfig> fault_output;
std::optional<TabularOutputConfig> fault_scalar_output;
std::optional<DomainOutputConfig> domain_output;
std::optional<ProbeOutputConfig> fault_probe_output;
std::optional<ProbeOutputConfig> domain_probe_output;
std::optional<GfCheckpointConfig> gf_checkpoint_config;
};

void setConfigSchema(TableSchema<Config>& schema,
Expand Down
11 changes: 7 additions & 4 deletions docs/first-model/parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ Tandem simulation parameters are written in the toml script. Here are the key pa
- **mg_coarse_level**: Polynomial degree of coarsest MG level. Default = 1.
- **mg_strategy**: MG level selection strategy. Available options: [TwoLevel | Logarithmic | Full]. Default = TwoLevel.

When using :code:`mode=QDGreen`, you can use Green's function checkpointing feature by defining the following parameters:

- **gf_checkpoint_prefix**: Path where Green's function operator and RHS will be checkpointed.
- **gf_checkpoint_every_nmins**: Time interval, in minutes, at which the Green's function operator data is saved to disk. Default = 30.0.
Green's function operator checkpointing
---------------------------------------

When using :code:`mode=QDGreen`, Green's function checkpointing is parameterized by the **[gf_checkpoint]** section. Here are the key parameters and their descriptions:

- **prefix**: Path where Green's function operator and RHS will be checkpointed.
- **freq_cputime**: CPU time (minutes) frequency between Green's function operator checkpoints.

Output configurations
---------------------
Expand Down Expand Up @@ -86,4 +89,4 @@ Commented parameter file:
# Domain outputs
[domain_output]
prefix = "output/domain"
rtol = 0.1
rtol = 0.1
5 changes: 3 additions & 2 deletions examples/tandem/2d/BP6/bp6_A.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ scenario = "bp6"
ref_normal = [-1, 0]
boundary_linear = true

gf_checkpoint_prefix = "GreensFunctions/bp6_hf250"
[gf_checkpoint]
prefix = "GreensFunctions/bp6_hf250"

[fault_output]
prefix = "outputs_A_250m/fault"
Expand Down Expand Up @@ -420,4 +421,4 @@ probes = [
{ name = "+198", x = [0.0, 19.8] },
{ name = "+199", x = [0.0, 19.9] },
{ name = "+200", x = [0.0, 20.0] }
]
]
5 changes: 3 additions & 2 deletions examples/tandem/2d/BP6/bp6_S.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ scenario = "bp6"
ref_normal = [-1, 0]
boundary_linear = true

gf_checkpoint_prefix = "GreensFunctions/bp6_hf50"
[gf_checkpoint]
prefix = "GreensFunctions/bp6_hf50"

[fault_output]
prefix = "outputs_S_50m/fault"
Expand Down Expand Up @@ -420,4 +421,4 @@ probes = [
{ name = "+198", x = [0.0, 19.8] },
{ name = "+199", x = [0.0, 19.9] },
{ name = "+200", x = [0.0, 20.0] }
]
]