From ecea0edd72bacb65f37516e1f6a2976e5b23818b Mon Sep 17 00:00:00 2001 From: jeremylt Date: Mon, 28 Jun 2021 10:12:32 -0600 Subject: [PATCH] bddc - initial version of bddc example --- examples/petsc/.gitignore | 2 + examples/petsc/Makefile | 12 +- examples/petsc/bddc.c | 572 ++++++++++++++++++++++++++ examples/petsc/bddc.h | 56 +++ examples/petsc/include/libceedsetup.h | 4 + examples/petsc/include/matops.h | 6 + examples/petsc/include/petscutils.h | 1 + examples/petsc/include/structs.h | 77 ++-- examples/petsc/src/libceedsetup.c | 255 +++++++++++- examples/petsc/src/matops.c | 282 ++++++++++++- examples/petsc/src/petscutils.c | 13 + 11 files changed, 1248 insertions(+), 32 deletions(-) create mode 100644 examples/petsc/bddc.c create mode 100644 examples/petsc/bddc.h diff --git a/examples/petsc/.gitignore b/examples/petsc/.gitignore index b3720a421f..39ef027ba6 100644 --- a/examples/petsc/.gitignore +++ b/examples/petsc/.gitignore @@ -2,6 +2,7 @@ *.vtu *.a /area +/bddc /bps /bpsraw /bpssphere @@ -9,6 +10,7 @@ /dmswarm /multigrid /petsc-area +/petsc-bddc /petsc-bps /petsc-bpsraw /petsc-bpssphere diff --git a/examples/petsc/Makefile b/examples/petsc/Makefile index 3a3aee1d19..c0ffabacec 100644 --- a/examples/petsc/Makefile +++ b/examples/petsc/Makefile @@ -28,7 +28,7 @@ LDLIBS = $(call pkgconf, --libs-only-l $(PETSc.pc) $(ceed.pc)) -lm OBJDIR := build SRCDIR := src -all: area bps bpsraw bpssphere dmswarm multigrid +all: area bddc bps bpsraw bpssphere dmswarm multigrid utils.c := $(sort $(wildcard $(SRCDIR)/*.c)) utils.o = $(utils.c:%.c=$(OBJDIR)/%.o) @@ -40,7 +40,13 @@ area.o = $(area.c:%.c=$(OBJDIR)/%.o) area: $(area.o) libutils.a | $(PETSc.pc) $(ceed.pc) $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ -bps.c := bps.c +bddc.c := bddc.c +bddc.c := bddc.c $(utils.c) +bddc.o = $(bddc.c:%.c=$(OBJDIR)/%.o) +bddc: $(bddc.o) libutils.a | $(PETSc.pc) $(ceed.pc) + $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ + +bps.c := bps.c $(utils.c) bps.o = $(bps.c:%.c=$(OBJDIR)/%.o) bps: $(bps.o) libutils.a | $(PETSc.pc) $(ceed.pc) $(call quiet,LINK.o) $(CEED_LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ @@ -94,7 +100,7 @@ print: $(PETSc.pc) $(ceed.pc) @true clean: - $(RM) -r $(OBJDIR) *.vtu area bps bpsraw bpssphere dmswarm multigrid libutils.a + $(RM) -r $(OBJDIR) *.vtu area bddc bps bpsraw bpssphere dmswarm multigrid libutils.a $(PETSc.pc): $(if $(wildcard $@),,$(error \ diff --git a/examples/petsc/bddc.c b/examples/petsc/bddc.c new file mode 100644 index 0000000000..f3327c5210 --- /dev/null +++ b/examples/petsc/bddc.c @@ -0,0 +1,572 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +// libCEED + PETSc Example: CEED BPs 3-6 with BDDC +// +// This example demonstrates a simple usage of libCEED with PETSc to solve the +// CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. +// +// The code uses higher level communication protocols in DMPlex. +// +// Build with: +// +// make bddc [PETSC_DIR=] [CEED_DIR=] +// +// Sample runs: +// +// bddc -problem bp3 +// bddc -problem bp4 +// bddc -problem bp5 -ceed /cpu/self +// bddc -problem bp6 -ceed /gpu/cuda +// +//TESTARGS -ceed {ceed_resource} -test -problem bp3 -degree 3 + +/// @file +/// CEED BPs 1-6 BDDC example using PETSc +const char help[] = "Solve CEED BPs using BDDC with PETSc and DMPlex\n"; + +#include "bddc.h" + +// The BDDC example uses vectors in three spaces +// +// Fine mesh: Broken mesh: Vertex mesh: Broken vertex mesh: +// x----x----x x----x x----x x x x x x x x +// | | | | | | | +// | | | | | | | +// x----x----x x----x x----x x x x x x x x +// +// Vectors are organized as follows +// - *_Pi : vector on the vertex mesh +// - *_Pi_r : vector on the broken vertex mesh +// - *_r : vector on the broken mesh, all points but vertices +// - *_Gamma : vector on the broken mesh, face/vertex/edge points +// - *_I : vector on the broken mesh, interior points +// - * : all other vectors are on the fine mesh + +int main(int argc, char **argv) { + MPI_Comm comm; + char filename[PETSC_MAX_PATH_LEN], ceed_resource[PETSC_MAX_PATH_LEN] = "/cpu/self"; + double my_rt_start, my_rt, rt_min, rt_max; + PetscInt degree = 3, q_extra, l_size, xl_size, g_size, l_Pi_size, xl_Pi_size, g_Pi_size, dim = 3, mesh_elem[3] = {3, 3, 3}, num_comp_u = 1; + PetscScalar *r; + PetscScalar eps = 1.0; + PetscBool test_mode, benchmark_mode, read_mesh, write_solution; + PetscLogStage solve_stage; + DM dm, dm_Pi; + SNES snes_Pi, snes_Pi_r; + KSP ksp, ksp_S_Pi, ksp_S_Pi_r; + PC pc; + Mat mat_O, mat_S_Pi, mat_S_Pi_r; + Vec X, X_loc, X_Pi, X_Pi_loc, X_Pi_r_loc, rhs, rhs_loc; + PetscMemType mem_type; + OperatorApplyContext apply_ctx, error_ctx; + BDDCApplyContext bddc_ctx; + Ceed ceed; + CeedData ceed_data; + CeedDataBDDC ceed_data_bddc; + CeedVector rhs_ceed, target; + CeedQFunction qf_error, qf_restrict, qf_prolong; + CeedOperator op_error; + BPType bp_choice; + InjectionType injection; + + PetscCall(PetscInitialize(&argc, &argv, NULL, help)); + comm = PETSC_COMM_WORLD; + + // Parse command line options + PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); + bp_choice = CEED_BP3; + PetscCall(PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bp_types, (PetscEnum)bp_choice, (PetscEnum *)&bp_choice, NULL)); + num_comp_u = bp_options[bp_choice].num_comp_u; + test_mode = PETSC_FALSE; + PetscCall(PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL)); + benchmark_mode = PETSC_FALSE; + PetscCall(PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL)); + write_solution = PETSC_FALSE; + PetscCall(PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL)); + PetscCall(PetscOptionsScalar("-eps", "Epsilon parameter for Kershaw mesh transformation", NULL, eps, &eps, NULL)); + if (eps > 1 || eps <= 0) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "-eps %g must be (0,1]", (double)PetscRealPart(eps)); + degree = test_mode ? 3 : 2; + PetscCall(PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, °ree, NULL)); + if (degree < 2) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_OUTOFRANGE, "-degree %" PetscInt_FMT " must be at least 2", degree); + q_extra = bp_options[bp_choice].q_extra; + PetscCall(PetscOptionsInt("-q_extra", "Number of extra quadrature points", NULL, q_extra, &q_extra, NULL)); + PetscCall(PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceed_resource, ceed_resource, sizeof(ceed_resource), NULL)); + injection = INJECTION_SCALED; + PetscCall(PetscOptionsEnum("-injection", "Injection strategy to use", NULL, injection_types, (PetscEnum)injection, (PetscEnum *)&injection, NULL)); + read_mesh = PETSC_FALSE; + PetscCall(PetscOptionsString("-mesh", "Read mesh from file", NULL, filename, filename, sizeof(filename), &read_mesh)); + if (!read_mesh) { + PetscInt tmp = dim; + PetscCall(PetscOptionsIntArray("-cells", "Number of cells per dimension", NULL, mesh_elem, &tmp, NULL)); + } + PetscOptionsEnd(); + + // Set up libCEED + CeedInit(ceed_resource, &ceed); + CeedMemType mem_type_backend; + CeedGetPreferredMemType(ceed, &mem_type_backend); + + // Setup DMs + if (read_mesh) { + PetscCall(DMPlexCreateFromFile(PETSC_COMM_WORLD, filename, NULL, PETSC_TRUE, &dm)); + } else { + PetscCall(DMPlexCreateBoxMesh(PETSC_COMM_WORLD, dim, PETSC_FALSE, mesh_elem, NULL, NULL, NULL, PETSC_TRUE, &dm)); + } + + { + DM dm_dist = NULL; + PetscPartitioner part; + + PetscCall(DMPlexGetPartitioner(dm, &part)); + PetscCall(PetscPartitionerSetFromOptions(part)); + PetscCall(DMPlexDistribute(dm, 0, NULL, &dm_dist)); + if (dm_dist) { + PetscCall(DMDestroy(&dm)); + dm = dm_dist; + } + } + + // Apply Kershaw mesh transformation + PetscCall(Kershaw(dm, eps)); + + VecType vec_type; + switch (mem_type_backend) { + case CEED_MEM_HOST: + vec_type = VECSTANDARD; + break; + case CEED_MEM_DEVICE: { + const char *resolved; + CeedGetResource(ceed, &resolved); + if (strstr(resolved, "/gpu/cuda")) vec_type = VECCUDA; + else if (strstr(resolved, "/gpu/hip/occa")) vec_type = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678 + else if (strstr(resolved, "/gpu/hip")) vec_type = VECHIP; + else vec_type = VECSTANDARD; + } + } + PetscCall(DMSetVecType(dm, vec_type)); + + // Setup DM + PetscCall(DMSetFromOptions(dm)); + PetscCall(DMGetDimension(dm, &dim)); + PetscCall(SetupDMByDegree(dm, degree, bp_options[bp_choice].q_extra, num_comp_u, dim, bp_options[bp_choice].enforce_bc)); + + // Set up subdomain vertex DM + PetscCall(DMClone(dm, &dm_Pi)); + PetscCall(DMSetVecType(dm_Pi, vec_type)); + PetscCall(SetupVertexDMFromDM(dm, dm_Pi, degree + bp_options[bp_choice].q_extra, num_comp_u, bp_options[bp_choice].enforce_bc)); + + // Create vectors + // -- Fine mesh + PetscCall(DMCreateGlobalVector(dm, &X)); + PetscCall(VecGetLocalSize(X, &l_size)); + PetscCall(VecGetSize(X, &g_size)); + PetscCall(DMCreateLocalVector(dm, &X_loc)); + PetscCall(VecGetSize(X_loc, &xl_size)); + // -- Vertex mesh + PetscCall(DMCreateGlobalVector(dm_Pi, &X_Pi)); + PetscCall(VecGetLocalSize(X_Pi, &l_Pi_size)); + PetscCall(VecGetSize(X_Pi, &g_Pi_size)); + PetscCall(DMCreateLocalVector(dm_Pi, &X_Pi_loc)); + PetscCall(VecGetSize(X_Pi_loc, &xl_Pi_size)); + + // Operator + PetscCall(PetscNew(&apply_ctx)); + PetscCall(MatCreateShell(comm, l_size, l_size, g_size, g_size, apply_ctx, &mat_O)); + PetscCall(MatShellSetOperation(mat_O, MATOP_MULT, (void (*)(void))MatMult_Ceed)); + PetscCall(MatShellSetOperation(mat_O, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiag)); + PetscCall(MatShellSetVecType(mat_O, vec_type)); + + // Print global grid information + if (!test_mode) { + PetscInt P = degree + 1, Q = P + q_extra; + + const char *used_resource; + CeedGetResource(ceed, &used_resource); + + PetscCall(VecGetType(X, &vec_type)); + + PetscCall(PetscPrintf(comm, + "\n-- CEED Benchmark Problem %" PetscInt_FMT " -- libCEED + PETSc + BDDC --\n" + " PETSc:\n" + " PETSc Vec Type : %s\n" + " libCEED:\n" + " libCEED Backend : %s\n" + " libCEED Backend MemType : %s\n" + " Mesh:\n" + " Number of 1D Basis Nodes (p) : %" PetscInt_FMT "\n" + " Number of 1D Quadrature Points (q) : %" PetscInt_FMT "\n" + " Global Nodes : %" PetscInt_FMT "\n" + " Owned Nodes : %" PetscInt_FMT "\n" + " DoF per node : %" PetscInt_FMT "\n" + " BDDC:\n" + " Injection : %s\n" + " Global Interface Nodes : %" PetscInt_FMT "\n" + " Owned Interface Nodes : %" PetscInt_FMT "\n", + bp_choice + 1, vec_type, used_resource, CeedMemTypes[mem_type_backend], P, Q, g_size / num_comp_u, l_size / num_comp_u, + num_comp_u, injection_types[injection], g_Pi_size, l_Pi_size)); + } + + // Create RHS vector + PetscCall(VecDuplicate(X, &rhs)); + PetscCall(VecDuplicate(X_loc, &rhs_loc)); + PetscCall(VecZeroEntries(rhs_loc)); + PetscCall(VecGetArrayAndMemType(rhs_loc, &r, &mem_type)); + CeedVectorCreate(ceed, xl_size, &rhs_ceed); + CeedVectorSetArray(rhs_ceed, MemTypeP2C(mem_type), CEED_USE_POINTER, r); + + // Set up libCEED operator + PetscCall(PetscCalloc1(1, &ceed_data)); + PetscCall(SetupLibceedByDegree(dm, ceed, degree, dim, q_extra, dim, num_comp_u, g_size, xl_size, bp_options[bp_choice], ceed_data, true, rhs_ceed, + &target)); + + // Gather RHS + CeedVectorTakeArray(rhs_ceed, MemTypeP2C(mem_type), NULL); + PetscCall(VecRestoreArrayAndMemType(rhs_loc, &r)); + PetscCall(VecZeroEntries(rhs)); + PetscCall(DMLocalToGlobal(dm, rhs_loc, ADD_VALUES, rhs)); + CeedVectorDestroy(&rhs_ceed); + + // Set up libCEED operator on interface vertices + PetscCall(PetscNew(&ceed_data_bddc)); + PetscCall(SetupLibceedBDDC(dm_Pi, ceed_data, ceed_data_bddc, g_Pi_size, xl_Pi_size, bp_options[bp_choice])); + + // Create the injection/restriction QFunction + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_INTERP, &qf_restrict); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_INTERP, CEED_EVAL_NONE, &qf_prolong); + + // Create the error QFunction + CeedQFunctionCreateInterior(ceed, 1, bp_options[bp_choice].error, bp_options[bp_choice].error_loc, &qf_error); + CeedQFunctionAddInput(qf_error, "u", num_comp_u, CEED_EVAL_INTERP); + CeedQFunctionAddInput(qf_error, "true_soln", num_comp_u, CEED_EVAL_NONE); + CeedQFunctionAddInput(qf_error, "qdata", ceed_data->q_data_size, CEED_EVAL_NONE); + CeedQFunctionAddOutput(qf_error, "error", num_comp_u, CEED_EVAL_INTERP); + + // Create the error operator + CeedOperatorCreate(ceed, qf_error, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_error); + CeedOperatorSetField(op_error, "u", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_error, "true_soln", ceed_data->elem_restr_u_i, CEED_BASIS_COLLOCATED, target); + CeedOperatorSetField(op_error, "qdata", ceed_data->elem_restr_qd_i, CEED_BASIS_COLLOCATED, ceed_data->q_data); + CeedOperatorSetField(op_error, "error", ceed_data->elem_restr_u, ceed_data->basis_u, CEED_VECTOR_ACTIVE); + + // Calculate multiplicity + { + PetscScalar *x; + + // CEED vector + PetscCall(VecZeroEntries(X_loc)); + PetscCall(VecGetArray(X_loc, &x)); + CeedVectorSetArray(ceed_data->x_ceed, CEED_MEM_HOST, CEED_USE_POINTER, x); + + // Multiplicity + CeedElemRestrictionGetMultiplicity(ceed_data->elem_restr_u, ceed_data->x_ceed); + + // Restore vector + CeedVectorTakeArray(ceed_data->x_ceed, CEED_MEM_HOST, &x); + PetscCall(VecRestoreArray(X_loc, &x)); + + // Local-to-global + PetscCall(VecZeroEntries(X)); + PetscCall(DMLocalToGlobal(dm, X_loc, ADD_VALUES, X)); + + // Global-to-local + PetscCall(DMGlobalToLocal(dm, X, INSERT_VALUES, X_loc)); + PetscCall(VecZeroEntries(X)); + + // Multiplicity scaling + PetscCall(VecReciprocal(X_loc)); + + // CEED vector + PetscCall(VecGetArray(X_loc, &x)); + CeedVectorSetArray(ceed_data->x_ceed, CEED_MEM_HOST, CEED_USE_POINTER, x); + + // Inject multiplicity + CeedOperatorApply(ceed_data_bddc->op_inject_r, ceed_data->x_ceed, ceed_data_bddc->mult_ceed, CEED_REQUEST_IMMEDIATE); + // Restore vector + CeedVectorTakeArray(ceed_data->x_ceed, CEED_MEM_HOST, &x); + PetscCall(VecRestoreArray(X_loc, &x)); + PetscCall(VecZeroEntries(X_loc)); + } + + // Setup dummy SNESs + { + // Schur compliment operator + // -- Jacobian Matrix + PetscCall(DMSetMatType(dm_Pi, MATAIJ)); + PetscCall(DMCreateMatrix(dm_Pi, &mat_S_Pi)); + + // -- Dummy SNES + PetscCall(SNESCreate(comm, &snes_Pi)); + PetscCall(SNESSetDM(snes_Pi, dm_Pi)); + PetscCall(SNESSetSolution(snes_Pi, X_Pi)); + + // -- Jacobian function + PetscCall(SNESSetJacobian(snes_Pi, mat_S_Pi, mat_S_Pi, NULL, NULL)); + + // -- Residual evaluation function + PetscCall(PetscNew(&bddc_ctx)); + PetscCall(SNESSetFunction(snes_Pi, X_Pi, FormResidual_BDDCSchur, bddc_ctx)); + } + { + // Element Schur compliment operator + // -- Vectors + PetscInt num_elem, elem_size; + + CeedElemRestrictionGetNumElements(ceed_data_bddc->elem_restr_Pi, &num_elem); + CeedElemRestrictionGetElementSize(ceed_data_bddc->elem_restr_Pi, &elem_size); + PetscCall(VecCreate(comm, &X_Pi_r_loc)); + PetscCall(VecSetSizes(X_Pi_r_loc, num_elem * elem_size, PETSC_DECIDE)); + PetscCall(VecSetType(X_Pi_r_loc, vec_type)); + + // -- Jacobian Matrix + PetscCall(MatCreateSeqAIJ(comm, elem_size * num_elem, elem_size * num_elem, elem_size, NULL, &mat_S_Pi_r)); + for (PetscInt e = 0; e < num_elem; e++) { + for (PetscInt i = 0; i < elem_size; i++) { + for (PetscInt j = 0; j < elem_size; j++) { + PetscInt row = e * elem_size + i; + PetscInt col = e * elem_size + j; + PetscScalar value = i + j; + PetscCall(MatSetValues(mat_S_Pi_r, 1, &row, 1, &col, &value, INSERT_VALUES)); + } + } + } + PetscCall(MatAssemblyBegin(mat_S_Pi_r, MAT_FINAL_ASSEMBLY)); + PetscCall(MatAssemblyEnd(mat_S_Pi_r, MAT_FINAL_ASSEMBLY)); + + // -- Dummy SNES + PetscCall(SNESCreate(comm, &snes_Pi_r)); + PetscCall(SNESSetSolution(snes_Pi_r, X_Pi_r_loc)); + + // -- Jacobian function + PetscCall(SNESSetJacobian(snes_Pi_r, mat_S_Pi_r, mat_S_Pi_r, NULL, NULL)); + + // -- Residual evaluation function + PetscCall(SNESSetFunction(snes_Pi_r, X_Pi_r_loc, FormResidual_BDDCElementSchur, bddc_ctx)); + } + + // Set up MatShell user data + { + apply_ctx->comm = comm; + apply_ctx->dm = dm; + apply_ctx->X_loc = X_loc; + PetscCall(VecDuplicate(X_loc, &apply_ctx->Y_loc)); + apply_ctx->x_ceed = ceed_data->x_ceed; + apply_ctx->y_ceed = ceed_data->y_ceed; + apply_ctx->op = ceed_data->op_apply; + apply_ctx->ceed = ceed; + } + + // Set up PCShell user data (also used for Schur operators) + { + bddc_ctx->comm = comm; + bddc_ctx->dm = dm; + bddc_ctx->dm_Pi = dm_Pi; + bddc_ctx->X_loc = X_loc; + bddc_ctx->Y_loc = apply_ctx->Y_loc; + bddc_ctx->X_Pi = X_Pi; + PetscCall(VecDuplicate(X_Pi, &bddc_ctx->Y_Pi)); + bddc_ctx->X_Pi_loc = X_Pi_loc; + PetscCall(VecDuplicate(X_Pi_loc, &bddc_ctx->Y_Pi_loc)); + bddc_ctx->X_Pi_r_loc = X_Pi_r_loc; + PetscCall(VecDuplicate(X_Pi_r_loc, &bddc_ctx->Y_Pi_r_loc)); + bddc_ctx->ceed_data_bddc = ceed_data_bddc; + bddc_ctx->mat_S_Pi = mat_S_Pi; + bddc_ctx->mat_S_Pi_r = mat_S_Pi_r; + PetscCall(KSPCreate(comm, &ksp_S_Pi)); + PetscCall(KSPCreate(comm, &ksp_S_Pi_r)); + bddc_ctx->ksp_S_Pi = ksp_S_Pi; + bddc_ctx->ksp_S_Pi_r = ksp_S_Pi_r; + bddc_ctx->snes_Pi = snes_Pi; + bddc_ctx->snes_Pi_r = snes_Pi_r; + bddc_ctx->is_harmonic = injection == INJECTION_HARMONIC; + } + + // Set up KSP + PetscCall(KSPCreate(comm, &ksp)); + { + PetscCall(KSPSetType(ksp, KSPCG)); + PetscCall(KSPSetNormType(ksp, KSP_NORM_NATURAL)); + PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); + } + PetscCall(KSPSetFromOptions(ksp)); + PetscCall(KSPSetOperators(ksp, mat_O, mat_O)); + + // Set up PCShell + PetscCall(KSPGetPC(ksp, &pc)); + { + PetscCall(PCSetType(pc, PCSHELL)); + PetscCall(PCShellSetContext(pc, bddc_ctx)); + PetscCall(PCShellSetApply(pc, PCShellApply_BDDC)); + PetscCall(PCShellSetSetUp(pc, PCShellSetup_BDDC)); + + // Set up Schur compilemnt solvers + { + // -- Vertex mesh AMG + PC pc_S_Pi; + PetscCall(KSPSetType(ksp_S_Pi, KSPPREONLY)); + PetscCall(KSPSetOperators(ksp_S_Pi, mat_S_Pi, mat_S_Pi)); + + PetscCall(KSPGetPC(ksp_S_Pi, &pc_S_Pi)); + PetscCall(PCSetType(pc_S_Pi, PCGAMG)); + + PetscCall(KSPSetOptionsPrefix(ksp_S_Pi, "S_Pi_")); + PetscCall(PCSetOptionsPrefix(pc_S_Pi, "S_Pi_")); + PetscCall(KSPSetFromOptions(ksp_S_Pi)); + PetscCall(PCSetFromOptions(pc_S_Pi)); + } + { + // -- Broken mesh AMG + PC pc_S_Pi_r; + PetscCall(KSPSetType(ksp_S_Pi_r, KSPPREONLY)); + PetscCall(KSPSetOperators(ksp_S_Pi_r, mat_S_Pi_r, mat_S_Pi_r)); + + PetscCall(KSPGetPC(ksp_S_Pi_r, &pc_S_Pi_r)); + PetscCall(PCSetType(pc_S_Pi_r, PCGAMG)); + + PetscCall(KSPSetOptionsPrefix(ksp_S_Pi_r, "S_Pi_r_")); + PetscCall(PCSetOptionsPrefix(pc_S_Pi_r, "S_Pi_r_")); + PetscCall(KSPSetFromOptions(ksp_S_Pi_r)); + PetscCall(PCSetFromOptions(pc_S_Pi_r)); + } + } + + // First run, if benchmarking + if (benchmark_mode) { + PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1)); + PetscCall(VecZeroEntries(X)); + my_rt_start = MPI_Wtime(); + PetscCall(KSPSolve(ksp, rhs, X)); + my_rt = MPI_Wtime() - my_rt_start; + PetscCall(MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm)); + // Set maxits based on first iteration timing + if (my_rt > 0.02) { + PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5)); + } else { + PetscCall(KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20)); + } + } + + // Timed solve + PetscCall(VecZeroEntries(X)); + PetscCall(PetscBarrier((PetscObject)ksp)); + + // -- Performance logging + PetscCall(PetscLogStageRegister("Solve Stage", &solve_stage)); + PetscCall(PetscLogStagePush(solve_stage)); + + // -- Solve + my_rt_start = MPI_Wtime(); + PetscCall(KSPSolve(ksp, rhs, X)); + my_rt = MPI_Wtime() - my_rt_start; + + // -- Performance logging + PetscCall(PetscLogStagePop()); + + // Output results + { + KSPType ksp_type; + KSPConvergedReason reason; + PCType pc_type; + PetscReal rnorm; + PetscInt its; + PetscCall(KSPGetType(ksp, &ksp_type)); + PetscCall(KSPGetConvergedReason(ksp, &reason)); + PetscCall(KSPGetIterationNumber(ksp, &its)); + PetscCall(KSPGetResidualNorm(ksp, &rnorm)); + PetscCall(PCGetType(pc, &pc_type)); + if (!test_mode || reason < 0 || rnorm > 1e-8) { + PetscCall(PetscPrintf(comm, + " KSP:\n" + " KSP Type : %s\n" + " KSP Convergence : %s\n" + " Total KSP Iterations : %" PetscInt_FMT "\n" + " Final rnorm : %e\n", + ksp_type, KSPConvergedReasons[reason], its, (double)rnorm)); + PetscCall(PetscPrintf(comm, + " BDDC:\n" + " PC Type : %s\n", + pc_type)); + } + if (!test_mode) { + PetscCall(PetscPrintf(comm, " Performance:\n")); + } + { + // Set up error operator context + PetscCall(PetscNew(&error_ctx)); + PetscCall(SetupErrorOperatorCtx(comm, dm, ceed, ceed_data, X_loc, op_error, error_ctx)); + PetscScalar l2_error; + PetscCall(ComputeL2Error(X, &l2_error, error_ctx)); + PetscReal tol = 5e-2; + if (!test_mode || l2_error > tol) { + PetscCall(MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm)); + PetscCall(MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm)); + PetscCall(PetscPrintf(comm, + " L2 Error : %e\n" + " CG Solve Time : %g (%g) sec\n", + (double)l2_error, rt_max, rt_min)); + } + } + if (benchmark_mode && (!test_mode)) { + PetscCall(PetscPrintf(comm, " DoFs/Sec in CG : %g (%g) million\n", 1e-6 * g_size * its / rt_max, + 1e-6 * g_size * its / rt_min)); + } + } + + if (write_solution) { + PetscViewer vtk_viewer_soln; + + PetscCall(PetscViewerCreate(comm, &vtk_viewer_soln)); + PetscCall(PetscViewerSetType(vtk_viewer_soln, PETSCVIEWERVTK)); + PetscCall(PetscViewerFileSetName(vtk_viewer_soln, "solution.vtu")); + PetscCall(VecView(X, vtk_viewer_soln)); + PetscCall(PetscViewerDestroy(&vtk_viewer_soln)); + } + + // Cleanup + PetscCall(VecDestroy(&X)); + PetscCall(VecDestroy(&X_loc)); + PetscCall(VecDestroy(&apply_ctx->Y_loc)); + PetscCall(VecDestroy(&X_Pi)); + PetscCall(VecDestroy(&bddc_ctx->Y_Pi)); + PetscCall(VecDestroy(&X_Pi_loc)); + PetscCall(VecDestroy(&bddc_ctx->Y_Pi_loc)); + PetscCall(VecDestroy(&X_Pi_r_loc)); + PetscCall(VecDestroy(&bddc_ctx->Y_Pi_r_loc)); + PetscCall(VecDestroy(&rhs)); + PetscCall(VecDestroy(&rhs_loc)); + PetscCall(MatDestroy(&mat_O)); + PetscCall(MatDestroy(&mat_S_Pi)); + PetscCall(MatDestroy(&mat_S_Pi_r)); + PetscCall(PetscFree(apply_ctx)); + PetscCall(PetscFree(error_ctx)); + PetscCall(PetscFree(bddc_ctx)); + PetscCall(CeedDataDestroy(0, ceed_data)); + PetscCall(CeedDataBDDCDestroy(ceed_data_bddc)); + PetscCall(DMDestroy(&dm)); + PetscCall(DMDestroy(&dm_Pi)); + PetscCall(KSPDestroy(&ksp)); + PetscCall(KSPDestroy(&ksp_S_Pi)); + PetscCall(KSPDestroy(&ksp_S_Pi_r)); + PetscCall(SNESDestroy(&snes_Pi)); + PetscCall(SNESDestroy(&snes_Pi_r)); + CeedVectorDestroy(&target); + CeedQFunctionDestroy(&qf_error); + CeedQFunctionDestroy(&qf_restrict); + CeedQFunctionDestroy(&qf_prolong); + CeedOperatorDestroy(&op_error); + CeedDestroy(&ceed); + return PetscFinalize(); +} diff --git a/examples/petsc/bddc.h b/examples/petsc/bddc.h new file mode 100644 index 0000000000..959db80c7d --- /dev/null +++ b/examples/petsc/bddc.h @@ -0,0 +1,56 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#ifndef bddc_h +#define bddc_h + +#include +#include +#include +#include +#include +#include +#include + +#include "bps.h" +#include "include/bpsproblemdata.h" +#include "include/libceedsetup.h" +#include "include/matops.h" +#include "include/petscutils.h" +#include "include/structs.h" + +#if PETSC_VERSION_LT(3, 12, 0) +#ifdef PETSC_HAVE_CUDA +#include +// Note: With PETSc prior to version 3.12.0, providing the source path to +// include 'cublas_v2.h' will be needed to use 'petsccuda.h'. +#endif +#endif + +// ----------------------------------------------------------------------------- +// Command Line Options +// ----------------------------------------------------------------------------- + +// Coarsening options +typedef enum { + INJECTION_SCALED = 0, + INJECTION_HARMONIC = 1, +} InjectionType; +static const char *const injection_types[] = { + "scaled", "harmonic", "InjectionType", "INJECTION", 0, +}; + +#endif // bddc_h diff --git a/examples/petsc/include/libceedsetup.h b/examples/petsc/include/libceedsetup.h index 4510b83f11..700f1d8054 100644 --- a/examples/petsc/include/libceedsetup.h +++ b/examples/petsc/include/libceedsetup.h @@ -17,10 +17,14 @@ #include "structs.h" PetscErrorCode CeedDataDestroy(CeedInt i, CeedData data); +PetscErrorCode CeedDataBDDCDestroy(CeedDataBDDC data); PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedInt topo_dim, CeedInt q_extra, PetscInt num_comp_x, PetscInt num_comp_u, PetscInt g_size, PetscInt xl_size, BPData bp_data, CeedData data, PetscBool setup_rhs, CeedVector rhs_ceed, CeedVector *target); PetscErrorCode CeedLevelTransferSetup(DM dm, Ceed ceed, CeedInt level, CeedInt num_comp_u, CeedData *data, BPData bp_data, Vec fine_mult); +PetscErrorCode SetupLibceedBDDC(DM dm_Pi, CeedData data_fine, CeedDataBDDC data_bddc, PetscInt g_vertex_size, PetscInt xl_vertex_size, + BPData bp_data); PetscErrorCode SetupErrorOperator(DM dm, Ceed ceed, BPData bp_data, CeedInt topo_dim, PetscInt num_comp_x, PetscInt num_comp_u, CeedOperator *op_error); + #endif // libceed_petsc_examples_setup_h diff --git a/examples/petsc/include/matops.h b/examples/petsc/include/matops.h index 2ef4760c02..a01016c626 100644 --- a/examples/petsc/include/matops.h +++ b/examples/petsc/include/matops.h @@ -25,6 +25,12 @@ PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y); PetscErrorCode FormResidual_Ceed(SNES snes, Vec X, Vec Y, void *ctx); PetscErrorCode MatMult_Prolong(Mat A, Vec X, Vec Y); PetscErrorCode MatMult_Restrict(Mat A, Vec X, Vec Y); +PetscErrorCode PCShellApply_BDDC(PC pc, Vec X, Vec Y); +PetscErrorCode PCShellSetup_BDDC(PC pc); +PetscErrorCode MatMult_BDDCElementSchur(BDDCApplyContext bddc_ctx, Vec X, Vec Y); +PetscErrorCode FormResidual_BDDCElementSchur(SNES snes, Vec X, Vec Y, void *ctx); +PetscErrorCode MatMult_BDDCSchur(BDDCApplyContext bddc_ctx, Vec X, Vec Y); +PetscErrorCode FormResidual_BDDCSchur(SNES snes, Vec X, Vec Y, void *ctx); PetscErrorCode ComputeL2Error(Vec X, PetscScalar *l2_error, OperatorApplyContext op_error_ctx); #endif // libceed_petsc_examples_matops_h diff --git a/examples/petsc/include/petscutils.h b/examples/petsc/include/petscutils.h index 708135b590..200a9bcc63 100644 --- a/examples/petsc/include/petscutils.h +++ b/examples/petsc/include/petscutils.h @@ -26,6 +26,7 @@ PetscErrorCode VecReadP2C(Vec X_petsc, PetscMemType *mem_type, CeedVector x_ce PetscErrorCode VecReadC2P(CeedVector x_ceed, PetscMemType mem_type, Vec X_petsc); PetscErrorCode Kershaw(DM dm_orig, PetscScalar eps); PetscErrorCode SetupDMByDegree(DM dm, PetscInt p_degree, PetscInt q_extra, PetscInt num_comp_u, PetscInt topo_dim, bool enforce_bc); +PetscErrorCode SetupVertexDMFromDM(DM dm, DM dm_vertex, PetscInt q_extra, PetscInt num_comp_u, PetscBool enforce_bc); PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt height, DMLabel domain_label, CeedInt value, CeedElemRestriction *elem_restr); CeedElemTopology ElemTopologyP2C(DMPolytopeType cell_type); PetscErrorCode DMFieldToDSField(DM dm, DMLabel domain_label, PetscInt dm_field, PetscInt *ds_field); diff --git a/examples/petsc/include/structs.h b/examples/petsc/include/structs.h index 3ba61dfa56..0edacd3d5e 100644 --- a/examples/petsc/include/structs.h +++ b/examples/petsc/include/structs.h @@ -14,32 +14,6 @@ #include #include -// ----------------------------------------------------------------------------- -// PETSc Operator Structs -// ----------------------------------------------------------------------------- - -// Data for PETSc Matshell -typedef struct OperatorApplyContext_ *OperatorApplyContext; -struct OperatorApplyContext_ { - MPI_Comm comm; - DM dm; - Vec X_loc, Y_loc, diag; - CeedVector x_ceed, y_ceed; - CeedOperator op; - Ceed ceed; -}; - -// Data for PETSc Prolong/Restrict Matshells -typedef struct ProlongRestrContext_ *ProlongRestrContext; -struct ProlongRestrContext_ { - MPI_Comm comm; - DM dmc, dmf; - Vec loc_vec_c, loc_vec_f, mult_vec; - CeedVector ceed_vec_c, ceed_vec_f; - CeedOperator op_prolong, op_restrict; - Ceed ceed; -}; - // ----------------------------------------------------------------------------- // libCEED Data Structs // ----------------------------------------------------------------------------- @@ -56,6 +30,18 @@ struct CeedData_ { CeedInt q_data_size; }; +// libCEED data struct for BDDC +typedef struct CeedDataBDDC_ *CeedDataBDDC; +struct CeedDataBDDC_ { + CeedBasis basis_Pi, basis_Pi_r; + CeedInt strides[3]; + CeedElemRestriction elem_restr_Pi, elem_restr_Pi_r, elem_restr_r; + CeedOperator op_Pi_r, op_r_Pi, op_Pi_Pi, op_r_r, op_r_r_inv, op_inject_Pi, op_inject_Pi_r, op_inject_r, op_restrict_Pi, op_restrict_Pi_r, + op_restrict_r; + CeedVector x_ceed, y_ceed, x_Pi_ceed, y_Pi_ceed, x_Pi_r_ceed, y_Pi_r_ceed, x_r_ceed, y_r_ceed, z_r_ceed, mult_ceed, mask_r_ceed, mask_Gamma_ceed, + mask_I_ceed; +}; + // BP specific data typedef struct { CeedInt num_comp_x, num_comp_u, topo_dim, q_data_size, q_extra; @@ -84,4 +70,43 @@ struct RunParams_ { PetscLogStage solve_stage; }; +// ----------------------------------------------------------------------------- +// PETSc Operator Structs +// ----------------------------------------------------------------------------- + +// Data for PETSc Matshell +typedef struct OperatorApplyContext_ *OperatorApplyContext; +struct OperatorApplyContext_ { + MPI_Comm comm; + DM dm; + Vec X_loc, Y_loc, diag; + CeedVector x_ceed, y_ceed; + CeedOperator op; + Ceed ceed; +}; + +// Data for PETSc Prolong/Restrict Matshells +typedef struct ProlongRestrContext_ *ProlongRestrContext; +struct ProlongRestrContext_ { + MPI_Comm comm; + DM dmc, dmf; + Vec loc_vec_c, loc_vec_f, mult_vec; + CeedVector ceed_vec_c, ceed_vec_f; + CeedOperator op_prolong, op_restrict; + Ceed ceed; +}; + +// Data for PETSc PCshell +typedef struct BDDCApplyContext_ *BDDCApplyContext; +struct BDDCApplyContext_ { + MPI_Comm comm; + DM dm, dm_Pi; + SNES snes_Pi, snes_Pi_r; + KSP ksp_S_Pi, ksp_S_Pi_r; + Mat mat_S_Pi, mat_S_Pi_r; + Vec X_loc, Y_loc, X_Pi, Y_Pi, X_Pi_loc, Y_Pi_loc, X_Pi_r_loc, Y_Pi_r_loc; + PetscBool is_harmonic; + CeedDataBDDC ceed_data_bddc; +}; + #endif // libceed_petsc_examples_structs_h diff --git a/examples/petsc/src/libceedsetup.c b/examples/petsc/src/libceedsetup.c index 58cd60019a..06f8f1750a 100644 --- a/examples/petsc/src/libceedsetup.c +++ b/examples/petsc/src/libceedsetup.c @@ -36,6 +36,44 @@ PetscErrorCode CeedDataDestroy(CeedInt i, CeedData data) { PetscFunctionReturn(PETSC_SUCCESS); }; +// ----------------------------------------------------------------------------- +// Destroy libCEED BDDC objects +// ----------------------------------------------------------------------------- +PetscErrorCode CeedDataBDDCDestroy(CeedDataBDDC data) { + PetscFunctionBeginUser; + + CeedBasisDestroy(&data->basis_Pi); + CeedBasisDestroy(&data->basis_Pi_r); + CeedElemRestrictionDestroy(&data->elem_restr_Pi); + CeedElemRestrictionDestroy(&data->elem_restr_Pi_r); + CeedElemRestrictionDestroy(&data->elem_restr_r); + CeedOperatorDestroy(&data->op_Pi_r); + CeedOperatorDestroy(&data->op_r_Pi); + CeedOperatorDestroy(&data->op_Pi_Pi); + CeedOperatorDestroy(&data->op_r_r); + CeedOperatorDestroy(&data->op_r_r_inv); + CeedOperatorDestroy(&data->op_inject_Pi); + CeedOperatorDestroy(&data->op_inject_Pi_r); + CeedOperatorDestroy(&data->op_inject_r); + CeedOperatorDestroy(&data->op_restrict_Pi); + CeedOperatorDestroy(&data->op_restrict_Pi_r); + CeedOperatorDestroy(&data->op_restrict_r); + CeedVectorDestroy(&data->x_Pi_ceed); + CeedVectorDestroy(&data->y_Pi_ceed); + CeedVectorDestroy(&data->x_Pi_r_ceed); + CeedVectorDestroy(&data->y_Pi_r_ceed); + CeedVectorDestroy(&data->x_r_ceed); + CeedVectorDestroy(&data->y_r_ceed); + CeedVectorDestroy(&data->z_r_ceed); + CeedVectorDestroy(&data->mult_ceed); + CeedVectorDestroy(&data->mask_r_ceed); + CeedVectorDestroy(&data->mask_Gamma_ceed); + CeedVectorDestroy(&data->mask_I_ceed); + PetscCall(PetscFree(data)); + + PetscFunctionReturn(PETSC_SUCCESS); +}; + // ----------------------------------------------------------------------------- // Set up libCEED for a given degree // ----------------------------------------------------------------------------- @@ -157,6 +195,7 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedInt to CeedVectorDestroy(&x_coord); // Save libCEED data required for level + data->ceed = ceed; data->basis_x = basis_x; data->basis_u = basis_u; data->elem_restr_x = elem_restr_x; @@ -177,6 +216,7 @@ PetscErrorCode SetupLibceedByDegree(DM dm, Ceed ceed, CeedInt degree, CeedInt to // ----------------------------------------------------------------------------- PetscErrorCode CeedLevelTransferSetup(DM dm, Ceed ceed, CeedInt level, CeedInt num_comp_u, CeedData *data, BPData bp_data, Vec fine_mult) { PetscFunctionBeginUser; + // Restriction - Fine to corse CeedOperator op_restrict; // Interpolation - Corse to fine @@ -212,6 +252,217 @@ PetscErrorCode CeedLevelTransferSetup(DM dm, Ceed ceed, CeedInt level, CeedInt n PetscFunctionReturn(PETSC_SUCCESS); }; +// ----------------------------------------------------------------------------- +// Set up libCEED for BDDC interface vertices +// ----------------------------------------------------------------------------- +PetscErrorCode SetupLibceedBDDC(DM dm_Pi, CeedData data_fine, CeedDataBDDC data_bddc, PetscInt g_vertex_size, PetscInt xl_vertex_size, + BPData bp_data) { + Ceed ceed = data_fine->ceed; + CeedBasis basis_Pi, basis_Pi_r, basis_u = data_fine->basis_u; + CeedElemRestriction elem_restr_Pi, elem_restr_Pi_r, elem_restr_r; + CeedOperator op_Pi_r, op_r_Pi, op_Pi_Pi, op_r_r, op_r_r_inv, op_inject_Pi, op_inject_Pi_r, op_inject_r, op_restrict_Pi, op_restrict_Pi_r, + op_restrict_r; + CeedVector x_Pi_ceed, y_Pi_ceed, x_Pi_r_ceed, y_Pi_r_ceed, mult_ceed, x_r_ceed, y_r_ceed, z_r_ceed, mask_r_ceed, mask_Gamma_ceed, mask_I_ceed; + CeedInt topo_dim, num_comp_u, P, P_Pi = 2, Q, num_qpts, num_elem, elem_size; + + PetscFunctionBeginUser; + + // CEED basis + // -- Basis for interface vertices + CeedBasisGetDimension(basis_u, &topo_dim); + CeedBasisGetNumComponents(basis_u, &num_comp_u); + CeedBasisGetNumNodes1D(basis_u, &P); + elem_size = CeedIntPow(P, topo_dim); + CeedBasisGetNumQuadraturePoints1D(basis_u, &Q); + CeedBasisGetNumQuadraturePoints(basis_u, &num_qpts); + CeedScalar *interp_1d, *grad_1d, *q_ref_1d, *q_weight_1d; + interp_1d = calloc(2 * Q, sizeof(CeedScalar)); + CeedScalar const *temp; + CeedBasisGetInterp1D(basis_u, &temp); + memcpy(interp_1d, temp, Q * sizeof(CeedScalar)); + memcpy(&interp_1d[1 * Q], &temp[(P - 1) * Q], Q * sizeof(CeedScalar)); + grad_1d = calloc(2 * Q, sizeof(CeedScalar)); + CeedBasisGetGrad1D(basis_u, &temp); + memcpy(grad_1d, temp, Q * sizeof(CeedScalar)); + memcpy(&grad_1d[1 * Q], &temp[(P - 1) * Q], Q * sizeof(CeedScalar)); + q_ref_1d = calloc(Q, sizeof(CeedScalar)); + CeedBasisGetQRef(basis_u, &temp); + memcpy(q_ref_1d, temp, Q * sizeof(CeedScalar)); + q_weight_1d = calloc(Q, sizeof(CeedScalar)); + CeedBasisGetQWeights(basis_u, &temp); + memcpy(q_weight_1d, temp, Q * sizeof(CeedScalar)); + CeedBasisCreateTensorH1(ceed, topo_dim, num_comp_u, P_Pi, Q, interp_1d, grad_1d, q_ref_1d, q_weight_1d, &basis_Pi); + free(interp_1d); + free(grad_1d); + free(q_ref_1d); + free(q_weight_1d); + // -- Basis for injection/restriction + interp_1d = calloc(2 * P, sizeof(CeedScalar)); + interp_1d[0] = 1; + interp_1d[2 * P - 1] = 1; // Pick off corner vertices + grad_1d = calloc(2 * P, sizeof(CeedScalar)); + q_ref_1d = calloc(2, sizeof(CeedScalar)); + q_weight_1d = calloc(2, sizeof(CeedScalar)); + CeedBasisCreateTensorH1(ceed, topo_dim, num_comp_u, P, P_Pi, interp_1d, grad_1d, q_ref_1d, q_weight_1d, &basis_Pi_r); + free(interp_1d); + free(grad_1d); + free(q_ref_1d); + free(q_weight_1d); + + // CEED restrictions + // -- Interface vertex restriction + PetscCall(CreateRestrictionFromPlex(ceed, dm_Pi, 0, 0, 0, &elem_restr_Pi)); + + // -- Subdomain restriction + CeedElemRestrictionGetNumElements(elem_restr_Pi, &num_elem); + CeedInt strides_r[3] = {num_comp_u, 1, num_comp_u * elem_size}; + CeedElemRestrictionCreateStrided(ceed, num_elem, elem_size, num_comp_u, num_comp_u * num_elem * elem_size, strides_r, &elem_restr_r); + + // -- Broken subdomain restriction + CeedInt strides_Pi_r[3] = {num_comp_u, 1, num_comp_u * 8}; + CeedElemRestrictionCreateStrided(ceed, num_elem, 8, num_comp_u, num_comp_u * num_elem * 8, strides_Pi_r, &elem_restr_Pi_r); + + // Create the persistent vectors that will be needed + CeedVectorCreate(ceed, xl_vertex_size, &x_Pi_ceed); + CeedVectorCreate(ceed, xl_vertex_size, &y_Pi_ceed); + CeedVectorCreate(ceed, 8 * num_elem, &x_Pi_r_ceed); + CeedVectorCreate(ceed, 8 * num_elem, &y_Pi_r_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &mult_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &x_r_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &y_r_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &z_r_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &mask_r_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &mask_Gamma_ceed); + CeedVectorCreate(ceed, num_comp_u * elem_size * num_elem, &mask_I_ceed); + + // -- Masks for subdomains + CeedScalar *mask_r_array, *mask_Gamma_array, *mask_I_array; + CeedVectorGetArrayWrite(mask_r_ceed, CEED_MEM_HOST, &mask_r_array); + CeedVectorGetArrayWrite(mask_Gamma_ceed, CEED_MEM_HOST, &mask_Gamma_array); + CeedVectorGetArrayWrite(mask_I_ceed, CEED_MEM_HOST, &mask_I_array); + for (CeedInt e = 0; e < num_elem; e++) { + for (CeedInt n = 0; n < elem_size; n++) { + PetscBool r = n != 0 * (P - 1) && n != 1 * P - 1 && n != P * (P - 1) && n != P * P - 1 && n != P * P * (P - 1) + 0 && + n != P * P * (P - 1) + 1 * P - 1 && n != P * P * (P - 1) + P * (P - 1) && n != P * P * (P - 1) + P * P - 1; + PetscBool Gamma = + n % P == 0 || n % P == P - 1 || (n / P) % P == 0 || (n / P) % P == P - 1 || (n / (P * P)) % P == 0 || (n / (P * P)) % P == P - 1; + for (CeedInt c = 0; c < num_comp_u; c++) { + CeedInt index = strides_r[0] * n + strides_r[1] * c + strides_r[2] * e; + mask_r_array[index] = r ? 1.0 : 0.0; + mask_Gamma_array[index] = Gamma ? 1.0 : 0.0; + mask_I_array[index] = !Gamma ? 1.0 : 0.0; + } + } + } + CeedVectorRestoreArray(mask_r_ceed, &mask_r_array); + CeedVectorRestoreArray(mask_Gamma_ceed, &mask_Gamma_array); + CeedVectorRestoreArray(mask_I_ceed, &mask_I_array); + + // Create the mass or diff operator + // -- Interface vertices + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_Pi_Pi); + CeedOperatorSetName(op_Pi_Pi, "BDDC Pi, Pi operator"); + CeedOperatorSetField(op_Pi_Pi, "u", elem_restr_Pi, basis_Pi, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_Pi_Pi, "qdata", data_fine->elem_restr_qd_i, CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_Pi_Pi, "v", elem_restr_Pi, basis_Pi, CEED_VECTOR_ACTIVE); + // -- Subdomains to interface vertices + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_Pi_r); + CeedOperatorSetName(op_Pi_r, "BDDC Pi, r operator"); + CeedOperatorSetField(op_Pi_r, "u", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_Pi_r, "qdata", data_fine->elem_restr_qd_i, CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_Pi_r, "v", elem_restr_Pi, basis_Pi, CEED_VECTOR_ACTIVE); + // -- Interface vertices to subdomains + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_r_Pi); + CeedOperatorSetName(op_r_Pi, "BDDC r, Pi operator"); + CeedOperatorSetField(op_r_Pi, "u", elem_restr_Pi, basis_Pi, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_r_Pi, "qdata", data_fine->elem_restr_qd_i, CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_r_Pi, "v", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomains + CeedOperatorCreate(ceed, data_fine->qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_r_r); + CeedOperatorSetName(op_r_r, "BDDC r, r operator"); + CeedOperatorSetField(op_r_r, "u", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_r_r, "qdata", data_fine->elem_restr_qd_i, CEED_BASIS_COLLOCATED, data_fine->q_data); + CeedOperatorSetField(op_r_r, "v", elem_restr_r, basis_u, CEED_VECTOR_ACTIVE); + // -- Subdomain FDM inverse + CeedOperatorCreateFDMElementInverse(op_r_r, &op_r_r_inv, CEED_REQUEST_IMMEDIATE); + + // Injection and restriction operators + CeedQFunction qf_identity, qf_inject_Pi, qf_restrict_Pi; + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_NONE, &qf_identity); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_INTERP, CEED_EVAL_NONE, &qf_inject_Pi); + CeedQFunctionCreateIdentity(ceed, num_comp_u, CEED_EVAL_NONE, CEED_EVAL_INTERP, &qf_restrict_Pi); + // -- Injection to interface vertices + CeedOperatorCreate(ceed, qf_inject_Pi, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_inject_Pi); + CeedOperatorSetName(op_inject_Pi, "BDDC Pi injection operator"); + CeedOperatorSetField(op_inject_Pi, "input", elem_restr_r, basis_Pi_r, CEED_VECTOR_ACTIVE); // Note: from r to Pi + CeedOperatorSetField(op_inject_Pi, "output", elem_restr_Pi, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + // -- Injection to broken interface vertices + CeedOperatorCreate(ceed, qf_restrict_Pi, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_inject_Pi_r); + CeedOperatorSetName(op_inject_Pi_r, "BDDC Pi, r injection operator"); + CeedOperatorSetField(op_inject_Pi_r, "input", elem_restr_Pi_r, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); // Note: from r to Pi_r + CeedOperatorSetField(op_inject_Pi_r, "output", elem_restr_r, basis_Pi_r, CEED_VECTOR_ACTIVE); + // -- Injection to subdomains + CeedOperatorCreate(ceed, qf_identity, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_inject_r); + CeedOperatorSetName(op_inject_r, "BDDC r injection operator"); + CeedOperatorSetField(op_inject_r, "input", data_fine->elem_restr_u, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_inject_r, "output", elem_restr_r, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + // -- Restriction from interface vertices + CeedOperatorCreate(ceed, qf_restrict_Pi, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_restrict_Pi); + CeedOperatorSetName(op_restrict_Pi, "BDDC Pi restriction operator"); + CeedOperatorSetField(op_restrict_Pi, "input", elem_restr_Pi, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_restrict_Pi, "output", elem_restr_r, basis_Pi_r, CEED_VECTOR_ACTIVE); // Note: from Pi to r + // -- Restriction from interface vertices + CeedOperatorCreate(ceed, qf_inject_Pi, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_restrict_Pi_r); + CeedOperatorSetName(op_restrict_Pi_r, "BDDC Pi, r restriction operator"); + CeedOperatorSetField(op_restrict_Pi_r, "input", elem_restr_r, basis_Pi_r, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_restrict_Pi_r, "output", elem_restr_Pi_r, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); // Note: from Pi_r to r + // -- Restriction from subdomains + CeedOperatorCreate(ceed, qf_identity, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_restrict_r); + CeedOperatorSetName(op_restrict_r, "BDDC r restriction operator"); + CeedOperatorSetField(op_restrict_r, "input", elem_restr_r, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + CeedOperatorSetField(op_restrict_r, "output", data_fine->elem_restr_u, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); + // -- Cleanup + CeedQFunctionDestroy(&qf_identity); + CeedQFunctionDestroy(&qf_inject_Pi); + CeedQFunctionDestroy(&qf_restrict_Pi); + + // Save libCEED data required for level + data_bddc->basis_Pi = basis_Pi; + data_bddc->basis_Pi_r = basis_Pi_r; + data_bddc->elem_restr_Pi = elem_restr_Pi; + data_bddc->elem_restr_Pi_r = elem_restr_Pi_r; + data_bddc->elem_restr_r = elem_restr_r; + data_bddc->op_Pi_r = op_Pi_r; + data_bddc->op_r_Pi = op_r_Pi; + data_bddc->op_Pi_Pi = op_Pi_Pi; + data_bddc->op_r_r = op_r_r; + data_bddc->op_r_r_inv = op_r_r_inv; + data_bddc->op_inject_r = op_inject_r; + data_bddc->op_inject_Pi = op_inject_Pi; + data_bddc->op_inject_Pi_r = op_inject_Pi_r; + data_bddc->op_restrict_r = op_restrict_r; + data_bddc->op_restrict_Pi = op_restrict_Pi; + data_bddc->op_restrict_Pi_r = op_restrict_Pi_r; + data_bddc->x_Pi_ceed = x_Pi_ceed; + data_bddc->y_Pi_ceed = y_Pi_ceed; + data_bddc->x_Pi_r_ceed = x_Pi_r_ceed; + data_bddc->y_Pi_r_ceed = y_Pi_r_ceed; + data_bddc->mult_ceed = mult_ceed; + data_bddc->x_r_ceed = x_r_ceed; + data_bddc->y_r_ceed = y_r_ceed; + data_bddc->z_r_ceed = z_r_ceed; + data_bddc->mask_r_ceed = mask_r_ceed; + data_bddc->mask_Gamma_ceed = mask_Gamma_ceed; + data_bddc->mask_I_ceed = mask_I_ceed; + data_bddc->x_ceed = data_fine->x_ceed; + data_bddc->y_ceed = data_fine->y_ceed; + + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// Set up libCEED error operator +// ----------------------------------------------------------------------------- PetscErrorCode SetupErrorOperator(DM dm, Ceed ceed, BPData bp_data, CeedInt topo_dim, PetscInt num_comp_x, PetscInt num_comp_u, CeedOperator *op_error) { DM dm_coord; @@ -333,4 +584,6 @@ PetscErrorCode SetupErrorOperator(DM dm, Ceed ceed, BPData bp_data, CeedInt topo CeedBasisDestroy(&basis_u); PetscFunctionReturn(PETSC_SUCCESS); -} +}; + +// ----------------------------------------------------------------------------- diff --git a/examples/petsc/src/matops.c b/examples/petsc/src/matops.c index 92c645b3c5..608bb62324 100644 --- a/examples/petsc/src/matops.c +++ b/examples/petsc/src/matops.c @@ -169,17 +169,295 @@ PetscErrorCode MatMult_Restrict(Mat A, Vec X, Vec Y) { PetscFunctionReturn(PETSC_SUCCESS); } +// ----------------------------------------------------------------------------- +// This function sets up the BDDC preconditioner +// ----------------------------------------------------------------------------- +PetscErrorCode PCShellSetup_BDDC(PC pc) { + BDDCApplyContext bddc_ctx; + + PetscFunctionBeginUser; + PetscCall(PCShellGetContext(pc, (void *)&bddc_ctx)); + + // Assemble mat for element Schur AMG + PetscCall(VecZeroEntries(bddc_ctx->X_Pi_r_loc)); + PetscCall(SNESComputeJacobianDefaultColor(bddc_ctx->snes_Pi_r, bddc_ctx->X_Pi_r_loc, bddc_ctx->mat_S_Pi_r, bddc_ctx->mat_S_Pi_r, NULL)); + PetscCall(MatAssemblyBegin(bddc_ctx->mat_S_Pi_r, MAT_FINAL_ASSEMBLY)); + PetscCall(MatAssemblyEnd(bddc_ctx->mat_S_Pi_r, MAT_FINAL_ASSEMBLY)); + + // Assemble mat for Schur AMG + PetscCall(VecZeroEntries(bddc_ctx->X_Pi)); + PetscCall(SNESComputeJacobianDefaultColor(bddc_ctx->snes_Pi, bddc_ctx->X_Pi, bddc_ctx->mat_S_Pi, bddc_ctx->mat_S_Pi, NULL)); + PetscCall(MatAssemblyBegin(bddc_ctx->mat_S_Pi, MAT_FINAL_ASSEMBLY)); + PetscCall(MatAssemblyEnd(bddc_ctx->mat_S_Pi, MAT_FINAL_ASSEMBLY)); + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// This function provides the action of the element Schur compliment +// ----------------------------------------------------------------------------- +PetscErrorCode MatMult_BDDCElementSchur(BDDCApplyContext bddc_ctx, Vec X_Pi_r_loc, Vec Y_Pi_r_loc) { + CeedDataBDDC data = bddc_ctx->ceed_data_bddc; + PetscMemType x_mem_type, y_mem_type; + + PetscFunctionBeginUser; + // Set arrays in libCEED + PetscCall(VecReadP2C(X_Pi_r_loc, &x_mem_type, data->x_Pi_r_ceed)); + PetscCall(VecP2C(Y_Pi_r_loc, &y_mem_type, data->y_Pi_r_ceed)); + + // Apply action on Schur compliment + // Y_Pi_r = -B A_r,r^-1 B^T X_Pi_r + // -- X_r = B^T X_Pi_r + CeedOperatorApply(data->op_inject_Pi_r, data->x_Pi_r_ceed, data->x_r_ceed, CEED_REQUEST_IMMEDIATE); + // -- Y_r = A_r,r^-1 X_r + CeedOperatorApply(data->op_r_r_inv, data->x_r_ceed, data->y_r_ceed, CEED_REQUEST_IMMEDIATE); + // -- Y_Pi_r = -B Y_r + CeedOperatorApply(data->op_restrict_Pi_r, data->y_r_ceed, data->y_Pi_r_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorScale(data->y_Pi_r_ceed, -1.0); + + // Restore arrays + PetscCall(VecReadC2P(data->x_Pi_r_ceed, x_mem_type, X_Pi_r_loc)); + PetscCall(VecC2P(data->y_Pi_r_ceed, y_mem_type, Y_Pi_r_loc)); + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// This function assembles the element Schur compliment for the dummy SNES +// ----------------------------------------------------------------------------- +PetscErrorCode FormResidual_BDDCElementSchur(SNES snes, Vec X_Pi_r_loc, Vec Y_Pi_r_loc, void *ctx) { + BDDCApplyContext bddc_ctx = (BDDCApplyContext)ctx; + + PetscFunctionBeginUser; + PetscCall(MatMult_BDDCElementSchur(bddc_ctx, X_Pi_r_loc, Y_Pi_r_loc)); + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// This function provides the action of the element inverse +// ----------------------------------------------------------------------------- +PetscErrorCode BDDCArrInv(BDDCApplyContext bddc_ctx, CeedVector x_r_ceed, CeedVector y_r_ceed) { + CeedDataBDDC data = bddc_ctx->ceed_data_bddc; + PetscMemType x_mem_type, y_mem_type; + + PetscFunctionBeginUser; + // Y_r = A_r,r^-1 (I + B S^-1 B^T A_r,r^-1) X_r + // -- X_r = (I + B S^-1 B^T A_r,r^-1) X_r + // ---- Y_r = A_r,r^-1 X_r + CeedVectorPointwiseMult(x_r_ceed, x_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_r_r_inv, x_r_ceed, y_r_ceed, CEED_REQUEST_IMMEDIATE); + // ---- Y_Pi_r = B^T Y_r + PetscCall(VecP2C(bddc_ctx->Y_Pi_r_loc, &y_mem_type, data->y_Pi_r_ceed)); + CeedOperatorApply(data->op_restrict_Pi_r, y_r_ceed, data->y_Pi_r_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecC2P(data->y_Pi_r_ceed, y_mem_type, bddc_ctx->Y_Pi_r_loc)); + // ---- X_Pi_r = S^-1 Y_Pi_r + PetscCall(KSPSolve(bddc_ctx->ksp_S_Pi_r, bddc_ctx->Y_Pi_r_loc, bddc_ctx->X_Pi_r_loc)); + // ---- X_r += B X_Pi_r + PetscCall(VecReadP2C(bddc_ctx->X_Pi_r_loc, &x_mem_type, data->x_Pi_r_ceed)); + CeedOperatorApplyAdd(data->op_inject_Pi_r, data->x_Pi_r_ceed, x_r_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecReadC2P(data->x_Pi_r_ceed, x_mem_type, bddc_ctx->X_Pi_r_loc)); + // -- Y_r = A_r,r^-1 X_r + CeedOperatorApply(data->op_r_r_inv, x_r_ceed, y_r_ceed, CEED_REQUEST_IMMEDIATE); + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// This function provides the action of the Schur compliment +// ----------------------------------------------------------------------------- +PetscErrorCode MatMult_BDDCSchur(BDDCApplyContext bddc_ctx, Vec X_Pi, Vec Y_Pi) { + CeedDataBDDC data = bddc_ctx->ceed_data_bddc; + PetscMemType x_mem_type, y_mem_type; + + PetscFunctionBeginUser; + // Global-to-Local + PetscCall(VecZeroEntries(bddc_ctx->X_Pi_loc)); + PetscCall(DMGlobalToLocal(bddc_ctx->dm_Pi, X_Pi, INSERT_VALUES, bddc_ctx->X_Pi_loc)); + // Set arrays in libCEED + PetscCall(VecReadP2C(bddc_ctx->X_Pi_loc, &x_mem_type, data->x_Pi_ceed)); + PetscCall(VecP2C(bddc_ctx->Y_Pi_loc, &y_mem_type, data->y_Pi_ceed)); + + // Apply action on Schur compliment + // Y_Pi = (A_Pi,Pi - A_Pi,r A_r,r^-1 A_r,Pi) X_Pi + // -- X_r = A_r,Pi X_Pi + CeedOperatorApply(data->op_r_Pi, data->x_Pi_ceed, data->x_r_ceed, CEED_REQUEST_IMMEDIATE); + // -- Y_r = A_r,r^-1 X_r + PetscCall(BDDCArrInv(bddc_ctx, data->x_r_ceed, data->y_r_ceed)); + // -- Y_Pi = -A_Pi,r Y_r + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_Pi_r, data->y_r_ceed, data->y_Pi_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorScale(data->y_Pi_ceed, -1.0); + // -- Y_Pi += A_Pi,Pi X_Pi + CeedOperatorApplyAdd(data->op_Pi_Pi, data->x_Pi_ceed, data->y_Pi_ceed, CEED_REQUEST_IMMEDIATE); + + // Restore arrays + PetscCall(VecReadC2P(data->x_Pi_ceed, x_mem_type, bddc_ctx->X_Pi_loc)); + PetscCall(VecC2P(data->y_Pi_ceed, y_mem_type, bddc_ctx->Y_Pi_loc)); + // Local-to-Global + PetscCall(VecZeroEntries(Y_Pi)); + PetscCall(DMLocalToGlobal(bddc_ctx->dm_Pi, bddc_ctx->Y_Pi_loc, ADD_VALUES, Y_Pi)); + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// This function assembles the Schur compliment for the dummy SNES +// ----------------------------------------------------------------------------- +PetscErrorCode FormResidual_BDDCSchur(SNES snes, Vec X_Pi, Vec Y_Pi, void *ctx) { + BDDCApplyContext bddc_ctx = (BDDCApplyContext)ctx; + + PetscFunctionBeginUser; + PetscCall(MatMult_BDDCSchur(bddc_ctx, X_Pi, Y_Pi)); + PetscFunctionReturn(PETSC_SUCCESS); +}; + +// ----------------------------------------------------------------------------- +// This function uses libCEED to compute the action of the BDDC preconditioner +// ----------------------------------------------------------------------------- +PetscErrorCode PCShellApply_BDDC(PC pc, Vec X, Vec Y) { + BDDCApplyContext bddc_ctx; + CeedDataBDDC data; + PetscMemType x_mem_type, y_mem_type; + + PetscFunctionBeginUser; + PetscCall(PCShellGetContext(pc, (void *)&bddc_ctx)); + data = bddc_ctx->ceed_data_bddc; + + // Inject to broken space + // -- Scaled injection, point multiply by 1/multiplicity + // ---- Global-to-Local + PetscCall(VecZeroEntries(bddc_ctx->X_loc)); + PetscCall(DMGlobalToLocal(bddc_ctx->dm, X, INSERT_VALUES, bddc_ctx->X_loc)); + // ---- Inject to Y_r + PetscCall(VecReadP2C(bddc_ctx->X_loc, &x_mem_type, data->x_ceed)); + CeedOperatorApply(data->op_inject_r, data->x_ceed, data->y_r_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecReadC2P(data->x_ceed, x_mem_type, bddc_ctx->X_loc)); + // -- Harmonic injection, scaled with jump map + if (bddc_ctx->is_harmonic) { + CeedVectorPointwiseMult(data->x_r_ceed, data->y_r_ceed, data->mask_I_ceed); + // ---- Z_r = A_I,I^-1 X_r + PetscCall(BDDCArrInv(bddc_ctx, data->x_r_ceed, data->z_r_ceed)); + // ---- X_r = - A_Gamma,I Z_r + CeedVectorPointwiseMult(data->z_r_ceed, data->z_r_ceed, data->mask_I_ceed); + CeedOperatorApply(data->op_r_r, data->z_r_ceed, data->x_r_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mask_Gamma_ceed); + // ---- J^T (jump map) + CeedVectorPointwiseMult(data->z_r_ceed, data->x_r_ceed, data->mult_ceed); + // ------ Local-to-Global + PetscCall(VecP2C(bddc_ctx->Y_loc, &y_mem_type, data->y_ceed)); + CeedOperatorApply(data->op_restrict_r, data->z_r_ceed, data->y_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecC2P(data->y_ceed, y_mem_type, bddc_ctx->Y_loc)); + PetscCall(VecZeroEntries(Y)); + PetscCall(DMLocalToGlobal(bddc_ctx->dm, bddc_ctx->Y_loc, ADD_VALUES, Y)); + // ------ Global-to-Local + PetscCall(VecZeroEntries(bddc_ctx->Y_loc)); + PetscCall(DMGlobalToLocal(bddc_ctx->dm, Y, INSERT_VALUES, bddc_ctx->Y_loc)); + PetscCall(VecReadP2C(bddc_ctx->Y_loc, &y_mem_type, data->y_ceed)); + CeedOperatorApply(data->op_inject_r, data->y_ceed, data->z_r_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorAXPY(data->z_r_ceed, -1.0, data->x_r_ceed); + // ---- Y_r -= J^T (- A_Gamma,I A_I,I^-1) Y_r + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mult_ceed); + CeedVectorAXPY(data->y_r_ceed, -1.0, data->z_r_ceed); + } else { + CeedVectorPointwiseMult(data->y_r_ceed, data->y_r_ceed, data->mult_ceed); + } + // ---- Inject to Y_Pi + PetscCall(VecP2C(bddc_ctx->Y_Pi_loc, &y_mem_type, data->y_Pi_ceed)); + CeedOperatorApply(data->op_inject_Pi, data->y_r_ceed, data->y_Pi_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecC2P(data->y_Pi_ceed, y_mem_type, bddc_ctx->Y_Pi_loc)); + // ---- Global-To-Local + PetscCall(VecZeroEntries(bddc_ctx->Y_Pi)); + PetscCall(DMLocalToGlobal(bddc_ctx->dm_Pi, bddc_ctx->Y_Pi_loc, ADD_VALUES, bddc_ctx->Y_Pi)); + // Note: current values in Y_Pi, Y_r + + // K_u^-T - update nodal values from subdomain + // -- X_r = A_r,r^-1 Y_r + PetscCall(BDDCArrInv(bddc_ctx, data->y_r_ceed, data->x_r_ceed)); + // -- X_Pi = A_Pi,r X_r + PetscCall(VecP2C(bddc_ctx->X_Pi_loc, &x_mem_type, data->x_Pi_ceed)); + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mask_r_ceed); + CeedOperatorApply(data->op_Pi_r, data->x_r_ceed, data->x_Pi_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecC2P(data->x_Pi_ceed, x_mem_type, bddc_ctx->X_Pi_loc)); + PetscCall(VecZeroEntries(bddc_ctx->X_Pi)); + PetscCall(DMLocalToGlobal(bddc_ctx->dm_Pi, bddc_ctx->X_Pi_loc, ADD_VALUES, bddc_ctx->X_Pi)); + // -- Y_Pi -= A_Pi_r A_r,r^-1 Y_r == X_Pi + PetscCall(VecAXPY(bddc_ctx->Y_Pi, -1.0, bddc_ctx->X_Pi)); + // Note: current values in Y_Pi, Y_r + + // P^-1 - subdomain and Schur compliment solve + // -- X_r = A_r,r^-1 Y_r + PetscCall(BDDCArrInv(bddc_ctx, data->y_r_ceed, data->x_r_ceed)); + // -- X_Pi = S_Pi^-1 Y_Pi + PetscCall(KSPSolve(bddc_ctx->ksp_S_Pi, bddc_ctx->Y_Pi, bddc_ctx->X_Pi)); + // Note: current values in X_Pi, X_r + + // K_u^-1 - update subdomain values from nodes + // -- Y_r = A_r,Pi X_Pi + PetscCall(VecZeroEntries(bddc_ctx->X_Pi_loc)); + PetscCall(DMGlobalToLocal(bddc_ctx->dm_Pi, bddc_ctx->X_Pi, INSERT_VALUES, bddc_ctx->X_Pi_loc)); + PetscCall(VecReadP2C(bddc_ctx->X_Pi_loc, &x_mem_type, data->x_Pi_ceed)); + CeedOperatorApply(data->op_r_Pi, data->x_Pi_ceed, data->y_r_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecReadC2P(data->x_Pi_ceed, x_mem_type, bddc_ctx->X_Pi_loc)); + // -- Z_r = A_r,r^-1 Y_r + PetscCall(BDDCArrInv(bddc_ctx, data->y_r_ceed, data->z_r_ceed)); + // -- X_r -= A_r,r^-1 A_r,Pi X_Pi == Z_r + CeedVectorAXPY(data->x_r_ceed, -1.0, data->z_r_ceed); + // Note: current values in X_Pi, X_r + + // Restrict to fine space + // -- Scaled restriction, point multiply by 1/multiplicity + // ---- Copy X_Pi to X_r + PetscCall(VecReadP2C(bddc_ctx->X_Pi_loc, &x_mem_type, data->x_Pi_ceed)); + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mask_r_ceed); + CeedOperatorApplyAdd(data->op_restrict_Pi, data->x_Pi_ceed, data->x_r_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecReadC2P(data->x_Pi_ceed, x_mem_type, bddc_ctx->X_Pi_loc)); + // -- Harmonic injection, scaled with jump map + if (bddc_ctx->is_harmonic) { + // ---- J^T (jump map) + CeedVectorPointwiseMult(data->z_r_ceed, data->x_r_ceed, data->mult_ceed); + // ------ Local-to-Global + PetscCall(VecP2C(bddc_ctx->Y_loc, &y_mem_type, data->y_ceed)); + CeedOperatorApply(data->op_restrict_r, data->z_r_ceed, data->y_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecC2P(data->y_ceed, y_mem_type, bddc_ctx->Y_loc)); + PetscCall(VecZeroEntries(Y)); + PetscCall(DMLocalToGlobal(bddc_ctx->dm, bddc_ctx->Y_loc, ADD_VALUES, Y)); + // ------ Global-to-Local + PetscCall(VecZeroEntries(bddc_ctx->Y_loc)); + PetscCall(DMGlobalToLocal(bddc_ctx->dm, Y, INSERT_VALUES, bddc_ctx->Y_loc)); + PetscCall(VecReadP2C(bddc_ctx->Y_loc, &y_mem_type, data->y_ceed)); + CeedOperatorApply(data->op_inject_r, data->y_ceed, data->z_r_ceed, CEED_REQUEST_IMMEDIATE); + CeedVectorAXPY(data->z_r_ceed, -1.0, data->x_r_ceed); + PetscCall(VecReadC2P(data->y_ceed, y_mem_type, bddc_ctx->Y_loc)); + // ---- Y_r = A_I,Gamma Z_r + CeedVectorPointwiseMult(data->z_r_ceed, data->z_r_ceed, data->mask_Gamma_ceed); + CeedOperatorApply(data->op_r_r, data->z_r_ceed, data->y_r_ceed, CEED_REQUEST_IMMEDIATE); + // ---- Z_r = A_I,I^-1 Y_r + PetscCall(BDDCArrInv(bddc_ctx, data->y_r_ceed, data->z_r_ceed)); + // ---- X_r += A_I,I^-1 A_I,Gamma J X_r + CeedVectorPointwiseMult(data->z_r_ceed, data->z_r_ceed, data->mask_I_ceed); + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mult_ceed); + CeedVectorAXPY(data->x_r_ceed, -1.0, data->z_r_ceed); + } else { + CeedVectorPointwiseMult(data->x_r_ceed, data->x_r_ceed, data->mult_ceed); + } + // ---- Restrict to Y + PetscCall(VecP2C(bddc_ctx->Y_loc, &y_mem_type, data->y_ceed)); + CeedOperatorApply(data->op_restrict_r, data->x_r_ceed, data->y_ceed, CEED_REQUEST_IMMEDIATE); + PetscCall(VecC2P(data->y_ceed, y_mem_type, bddc_ctx->Y_loc)); + // ---- Local-to-Global + PetscCall(VecZeroEntries(Y)); + PetscCall(DMLocalToGlobal(bddc_ctx->dm, bddc_ctx->Y_loc, ADD_VALUES, Y)); + // Note: current values in Y + PetscFunctionReturn(PETSC_SUCCESS); +}; + // ----------------------------------------------------------------------------- // This function calculates the error in the final solution // ----------------------------------------------------------------------------- PetscErrorCode ComputeL2Error(Vec X, PetscScalar *l2_error, OperatorApplyContext op_error_ctx) { - Vec E; + PetscScalar error_sq = 1.0; + Vec E; PetscFunctionBeginUser; PetscCall(VecDuplicate(X, &E)); PetscCall(ApplyLocal_Ceed(X, E, op_error_ctx)); PetscCall(VecViewFromOptions(E, NULL, "-error_view")); - PetscScalar error_sq = 1.0; PetscCall(VecSum(E, &error_sq)); *l2_error = sqrt(error_sq); PetscCall(VecDestroy(&E)); diff --git a/examples/petsc/src/petscutils.c b/examples/petsc/src/petscutils.c index 2a95b36372..e68bf04bf9 100644 --- a/examples/petsc/src/petscutils.c +++ b/examples/petsc/src/petscutils.c @@ -184,6 +184,19 @@ PetscErrorCode SetupDMByDegree(DM dm, PetscInt p_degree, PetscInt q_extra, Petsc PetscFunctionReturn(PETSC_SUCCESS); } +// ----------------------------------------------------------------------------- +// This function sets up a BDDC vertex only DM from an existing fine DM +// ----------------------------------------------------------------------------- +PetscErrorCode SetupVertexDMFromDM(DM dm, DM dm_vertex, PetscInt q_extra, PetscInt num_comp_u, PetscBool enforce_bc) { + PetscInt dim; + + PetscFunctionBeginUser; + PetscCall(DMGetDimension(dm, &dim)); + PetscCall(SetupDMByDegree(dm_vertex, 1, q_extra, num_comp_u, dim, enforce_bc)); + + PetscFunctionReturn(PETSC_SUCCESS); +} + // ----------------------------------------------------------------------------- // Get CEED restriction data from DMPlex // -----------------------------------------------------------------------------