Skip to content

Commit

Permalink
Merge pull request #37 from aperijake/nodal_integration
Browse files Browse the repository at this point in the history
some optimizing of internal force calc
  • Loading branch information
aperijake authored Oct 2, 2024
2 parents d54d762 + f928c52 commit c063bfd
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 46 deletions.
6 changes: 3 additions & 3 deletions include/ComputeInternalForceFunctors.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct ComputeInternalForceFromIntegrationPointFunctor {
ComputeInternalForceFromIntegrationPointFunctor(FunctionsFunctor &functions_functor, IntegrationFunctor &integration_functor, StressFunctor &stress_functor)
: m_functions_functor(functions_functor), m_integration_functor(integration_functor), m_stress_functor(stress_functor) {}

KOKKOS_INLINE_FUNCTION void operator()(const Kokkos::Array<Eigen::Matrix<double, NumNodes, 3>, 3> &gathered_node_data, Eigen::Matrix<double, NumNodes, 3> &force, size_t actual_num_neighbors) const {
KOKKOS_INLINE_FUNCTION void operator()(const Kokkos::Array<Eigen::Matrix<double, NumNodes, 3>, 2> &gathered_node_data, Eigen::Matrix<double, NumNodes, 3> &force, size_t actual_num_neighbors) const {
const Eigen::Matrix<double, NumNodes, 3> &node_coordinates = gathered_node_data[0];
const Eigen::Matrix<double, NumNodes, 3> &node_displacements = gathered_node_data[1];
// const Eigen::Matrix<double, NumNodes, 3> &node_velocities = gathered_node_data[2];
Expand Down Expand Up @@ -51,7 +51,7 @@ struct ComputeInternalForceFromSmoothingCellFunctor {
ComputeInternalForceFromSmoothingCellFunctor(StressFunctor &stress_functor)
: m_stress_functor(stress_functor) {}

KOKKOS_INLINE_FUNCTION void operator()(const Kokkos::Array<Eigen::Matrix<double, 3, 3>, 2> &gathered_node_data_gradient, Eigen::Matrix<double, MaxNumNodes, 3> &force, const Eigen::Matrix<double, MaxNumNodes, 3> &b_matrix, double volume, size_t actual_num_neighbors) const {
KOKKOS_INLINE_FUNCTION void operator()(const Kokkos::Array<Eigen::Matrix<double, 3, 3>, 1> &gathered_node_data_gradient, Eigen::Matrix<double, MaxNumNodes, 3> &force, const Eigen::Matrix<double, MaxNumNodes, 3> &b_matrix, double volume, size_t actual_num_neighbors) const {
const Eigen::Matrix3d &displacement_gradient = gathered_node_data_gradient[0];
// const Eigen::Matrix3d& velocity_gradient = gathered_node_data_gradient[1];

Expand All @@ -71,7 +71,7 @@ struct ComputeStressOnSmoothingCellFunctor {
ComputeStressOnSmoothingCellFunctor(StressFunctor &stress_functor)
: m_stress_functor(stress_functor) {}

KOKKOS_INLINE_FUNCTION const Eigen::Matrix<double, 3, 3> operator()(const Kokkos::Array<Eigen::Matrix<double, 3, 3>, 2> &gathered_node_data_gradient) const {
KOKKOS_INLINE_FUNCTION const Eigen::Matrix<double, 3, 3> operator()(const Kokkos::Array<Eigen::Matrix<double, 3, 3>, 1> &gathered_node_data_gradient) const {
const Eigen::Matrix3d &displacement_gradient = gathered_node_data_gradient[0];
// const Eigen::Matrix3d& velocity_gradient = gathered_node_data_gradient[1];

Expand Down
64 changes: 32 additions & 32 deletions include/ElementProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,6 @@ class ElementGatherScatterProcessor {

template <typename StressFunctor>
void for_each_cell_gather_scatter_nodal_data(const SmoothedCellData &scd, StressFunctor &stress_functor) {
// Get the number of cells
size_t num_cells = scd.NumCells();

// Get th ngp mesh
auto ngp_mesh = m_ngp_mesh;

Expand All @@ -235,38 +232,41 @@ class ElementGatherScatterProcessor {
}
auto ngp_field_to_scatter = *m_ngp_field_to_scatter;

// kokkos loop over all the cells
// Get the number of cells
const size_t num_cells = scd.NumCells();

// Loop over all the cells
Kokkos::parallel_for(
"for_each_cell_gather_scatter_nodal_data", num_cells, KOKKOS_LAMBDA(const size_t cell_id) {
// Set up the field data to gather
Kokkos::Array<Eigen::Matrix<double, 3, 3>, NumFields> field_data_to_gather_gradient;
for (size_t f = 0; f < NumFields; ++f) {
field_data_to_gather_gradient[f].setZero();
}

auto node_local_offsets = scd.GetCellNodeLocalOffsets(cell_id);
auto node_function_derivatives = scd.GetCellFunctionDerivatives(cell_id);
const auto node_local_offsets = scd.GetCellNodeLocalOffsets(cell_id);
const auto node_function_derivatives = scd.GetCellFunctionDerivatives(cell_id);

size_t num_nodes = node_local_offsets.extent(0);
const size_t num_nodes = node_local_offsets.extent(0);

// Compute the field gradients. TODO(jake): probably want to flip the order of the loops. Construct derivative operator matrix and multiply by field data.
for (size_t f = 0; f < NumFields; ++f) {
field_data_to_gather_gradient[f].fill(0.0);
for (size_t k = 0; k < num_nodes; ++k) {
stk::mesh::Entity node(node_local_offsets[k]);
stk::mesh::FastMeshIndex node_index = ngp_mesh.fast_mesh_index(node);
size_t derivative_offset = k * 3;
// Pre-allocation for the function derivatives
Eigen::Matrix<double, 1, 3> function_derivatives;

// Create function_derivatives as a row vector
Eigen::Matrix<double, 1, 3> function_derivatives;
for (size_t j = 0; j < 3; ++j) {
function_derivatives(j) = node_function_derivatives(derivative_offset + j);
}
// Compute the field gradients
for (size_t k = 0; k < num_nodes; ++k) {
// Populate the function derivatives
const size_t derivative_offset = k * 3;
for (size_t j = 0; j < 3; ++j) {
function_derivatives(j) = node_function_derivatives(derivative_offset + j);
}

// Add the field gradient
const stk::mesh::Entity node(node_local_offsets[k]);
const stk::mesh::FastMeshIndex node_index = ngp_mesh.fast_mesh_index(node);
for (size_t f = 0; f < NumFields; ++f) {
// Perform the matrix multiplication for field_data_to_gather_gradient
for (size_t i = 0; i < 3; ++i) {
double field_data_ki = ngp_fields_to_gather[f](node_index, i);
Eigen::Matrix<double, 1, 3> field_data_ki_matrix = field_data_ki * function_derivatives;
for (size_t j = 0; j < 3; ++j) {
field_data_to_gather_gradient[f](i, j) += field_data_ki_matrix(j);
}
field_data_to_gather_gradient[f].row(i) += ngp_fields_to_gather[f](node_index, i) * function_derivatives;
}
}
}
Expand All @@ -277,18 +277,18 @@ class ElementGatherScatterProcessor {

// Scatter the force to the nodes
for (size_t k = 0; k < num_nodes; ++k) {
stk::mesh::Entity node(node_local_offsets[k]);
stk::mesh::FastMeshIndex node_index = ngp_mesh.fast_mesh_index(node);
Eigen::Matrix<double, 3, 1> function_derivatives;
size_t derivative_offset = k * 3;

// Create function_derivatives as a row vector
// Populate the function derivatives
const size_t derivative_offset = k * 3;
for (size_t j = 0; j < 3; ++j) {
function_derivatives(j) = node_function_derivatives(derivative_offset + j);
}

// Create a eigen vector for the force
Eigen::Matrix<double, 3, 1> force = pk1_stress_neg_volume * function_derivatives;
// Calculate the force
const Eigen::Matrix<double, 3, 1> force = pk1_stress_neg_volume * function_derivatives.transpose();

// Add the force to the node
const stk::mesh::Entity node(node_local_offsets[k]);
const stk::mesh::FastMeshIndex node_index = ngp_mesh.fast_mesh_index(node);
for (size_t j = 0; j < 3; ++j) {
Kokkos::atomic_add(&ngp_field_to_scatter(node_index, j), force(j));
}
Expand Down
4 changes: 2 additions & 2 deletions include/ElementReproducingKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class ElementReproducingKernel : public ElementBase {
// Create the element processor
std::string force_field_name = m_use_one_pass_method ? "force_coefficients" : "force_local";
const FieldQueryData<double> field_query_data_scatter = {force_field_name, FieldQueryState::None};
m_element_processor = std::make_shared<ElementGatherScatterProcessor<2, true>>(m_field_query_data_gather, field_query_data_scatter, m_mesh_data, m_part_names);
m_element_processor = std::make_shared<ElementGatherScatterProcessor<1, true>>(m_field_query_data_gather, field_query_data_scatter, m_mesh_data, m_part_names);
}

void FindNeighbors() {
Expand Down Expand Up @@ -117,7 +117,7 @@ class ElementReproducingKernel : public ElementBase {
const std::vector<std::string> m_part_names;
std::shared_ptr<aperi::MeshData> m_mesh_data;
double m_kernel_radius_scale_factor;
std::shared_ptr<aperi::ElementGatherScatterProcessor<2, true>> m_element_processor;
std::shared_ptr<aperi::ElementGatherScatterProcessor<1, true>> m_element_processor;
std::shared_ptr<aperi::SmoothedCellData> m_smoothed_cell_data;
bool m_use_one_pass_method;
};
Expand Down
4 changes: 2 additions & 2 deletions include/ElementSmoothedTetrahedron4.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class ElementSmoothedTetrahedron4 : public ElementBase {
void CreateElementProcessor() {
// Create the element processor
const FieldQueryData<double> field_query_data_scatter = {"force_coefficients", FieldQueryState::None};
m_element_processor = std::make_shared<ElementGatherScatterProcessor<2, true>>(m_field_query_data_gather, field_query_data_scatter, m_mesh_data, m_part_names);
m_element_processor = std::make_shared<ElementGatherScatterProcessor<1, true>>(m_field_query_data_gather, field_query_data_scatter, m_mesh_data, m_part_names);
}

void ComputeNeighborValues() {
Expand Down Expand Up @@ -121,7 +121,7 @@ class ElementSmoothedTetrahedron4 : public ElementBase {
const std::vector<FieldQueryData<double>> m_field_query_data_gather;
const std::vector<std::string> m_part_names;
std::shared_ptr<aperi::MeshData> m_mesh_data;
std::shared_ptr<aperi::ElementGatherScatterProcessor<2, true>> m_element_processor;
std::shared_ptr<aperi::ElementGatherScatterProcessor<1, true>> m_element_processor;
std::shared_ptr<aperi::SmoothedCellData> m_smoothed_cell_data;
};

Expand Down
4 changes: 2 additions & 2 deletions include/ElementTetrahedron4.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class ElementTetrahedron4 : public ElementBase {
return;
}
const FieldQueryData<double> field_query_data_scatter = {"force_coefficients", FieldQueryState::None};
m_element_processor = std::make_shared<aperi::ElementGatherScatterProcessor<3, false>>(m_field_query_data_gather, field_query_data_scatter, m_mesh_data, m_part_names);
m_element_processor = std::make_shared<aperi::ElementGatherScatterProcessor<2, false>>(m_field_query_data_gather, field_query_data_scatter, m_mesh_data, m_part_names);
}

// Create and destroy functors. Must be public to run on device.
Expand Down Expand Up @@ -124,7 +124,7 @@ class ElementTetrahedron4 : public ElementBase {
const std::vector<FieldQueryData<double>> m_field_query_data_gather;
const std::vector<std::string> m_part_names;
std::shared_ptr<aperi::MeshData> m_mesh_data;
std::shared_ptr<aperi::ElementGatherScatterProcessor<3, false>> m_element_processor;
std::shared_ptr<aperi::ElementGatherScatterProcessor<2, false>> m_element_processor;
};

} // namespace aperi
7 changes: 2 additions & 5 deletions src/InternalForceContribution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,17 @@ void InternalForceContribution::Preprocess() {
const std::vector<std::string> part_names = {m_internal_force_contribution_parameters.part_name};
std::vector<FieldQueryData<double>> field_query_data_gather;
if (m_internal_force_contribution_parameters.integration_scheme_parameters->GetIntegrationSchemeType() == IntegrationSchemeType::StrainSmoothing) { // TODO(jake): this doesn't have to have coordinates
field_query_data_gather.resize(2);
field_query_data_gather.resize(1);
if (UsesGeneralizedFields() && (!UsesOnePassMethod())) {
// If using generalized fields, use the gathered fields
field_query_data_gather[0] = FieldQueryData<double>{"displacement", FieldQueryState::None};
field_query_data_gather[1] = FieldQueryData<double>{"velocity", FieldQueryState::None};
} else {
field_query_data_gather[0] = FieldQueryData<double>{"displacement_coefficients", FieldQueryState::NP1};
field_query_data_gather[1] = FieldQueryData<double>{"velocity_coefficients", FieldQueryState::NP1};
}
} else {
field_query_data_gather.resize(3);
field_query_data_gather.resize(2);
field_query_data_gather[0] = FieldQueryData<double>{m_internal_force_contribution_parameters.mesh_data->GetCoordinatesFieldName(), FieldQueryState::None};
field_query_data_gather[1] = FieldQueryData<double>{"displacement_coefficients", FieldQueryState::NP1};
field_query_data_gather[2] = FieldQueryData<double>{"velocity_coefficients", FieldQueryState::NP1};
}

// Get the number of nodes per element
Expand Down

0 comments on commit c063bfd

Please sign in to comment.