diff --git a/.github/workflows/flare.yml b/.github/workflows/flare.yml index 291c76a43..e1511d124 100644 --- a/.github/workflows/flare.yml +++ b/.github/workflows/flare.yml @@ -89,7 +89,7 @@ jobs: - name: Install LAMMPS run: | - git clone -b stable_29Sep2021_update2 --depth 1 https://github.com/lammps/lammps.git lammps + git clone --depth 1 https://github.com/lammps/lammps.git lammps cd lammps_plugins ./install.sh $(pwd)/../lammps cd .. @@ -105,6 +105,11 @@ jobs: pip install -U codecov pytest pytest-cov pytest_mock pip install -r requirements.txt + - name: Patch ASE + run: | + ase_file="$(dirname $(python3 -c 'import ase; print(ase.__file__)'))/calculators/lammpsrun.py" + sed -i 's/line.startswith(_custom_thermo_mark)/line.strip\(\).startswith\("Step"\)/g' $ase_file + - name: Run tests run: | export lmp=$(pwd)/lammps/build/lmp diff --git a/docs/source/related.rst b/docs/source/related.rst index 8aec1c4fb..2bc7bcb13 100644 --- a/docs/source/related.rst +++ b/docs/source/related.rst @@ -1,7 +1,7 @@ Applications/Gallery ==================== -If you use FLARE in your research, please let us know. +If you use FLARE in your research, please let us know. We will list the applications of FLARE here. - Jonathan Vandermause, Steven B. Torrisi, Simon Batzner, Yu Xie, Lixin Sun, Alexie M. Kolpak, and Boris Kozinsky. **On-the-fly active learning of interpretable Bayesian force fields for atomistic rare events.** npj Computational Materials 6.1 (2020): 1-11. (`arXiv `_) (`published version `_) @@ -14,11 +14,11 @@ We will list the applications of FLARE here. - Jin Soo Lim, Jonathan Vandermause, Matthijs A. Van Spronsen, Albert Musaelian, Yu Xie, Lixin Sun, Christopher R. O'Connor, Tobias Egle, Nicola Molinari, Jacob Florian, Kaining Duanmu, Robert J. Madix, Philippe Sautet, Cynthia M. Friend, and Boris Kozinsky. **Evolution of Metastable Structures at Bimetallic Surfaces from Microscopy and Machine-Learning Molecular Dynamics.** Journal of the American Chemical Society (2020). (`ChemRxiv `_) (`published version `_) -.. figure:: https://www.researchgate.net/profile/Jin-Soo-Lim/publication/339087657/figure/fig3/AS:856047187132427@1581108962681/Frames-of-MD-simulation-showing-the-evolution-of-a-Pd-91-island-on-Ag111-at-500-K-over_W640.jpg +.. figure:: https://www.researchgate.net/profile/Jin-Soo-Lim/publication/339087657/figure/fig3/AS:856047187132427@1581108962681/Frames-of-MD-simulation-showing-the-evolution-of-a-Pd-91-island-on-Ag111-at-500-K-over_W640.jpg :figwidth: 80 % :align: center - Evolution of Pd/Ag bimetallic surface + Evolution of Pd/Ag bimetallic surface - Yu Xie, Jonathan Vandermause, Lixin Sun, Andrea Cepellotti, and Boris Kozinsky. **Bayesian force fields from active learning for simulation of inter-dimensional transformation of stanene.** npj Computational Materials 7, no. 1 (2021): 1-10. (`arXiv `_) (`published version `_) @@ -45,3 +45,18 @@ We will list the applications of FLARE here. Melting of Au nanoparticle - Kai Xu, Lei Yan, and Bingran You. **Bayesian active learning of interatomic force field for molecular dynamics simulation of Pt/Ag(111).** ChemRxiv preprint. (`ChemRxiv `_) + +- Anders Johansson, Yu Xie, Cameron J. Owen, Jin Soo Lim, Lixin Sun, Jonathan Vandermause, Boris Kozinsky. **Micron-scale heterogeneous catalysis with Bayesian force fields from first principles and active learning** (`arXiv `_) + +.. figure:: https://i.imgur.com/CPCrgWl.png + :figwidth: 80 % + :align: center + + Strong scaling benchmarks + +.. figure:: https://i.imgur.com/p2UeYBo.png + :figwidth: 80 % + :align: center + + Chemical reaction + diff --git a/flare/_version.py b/flare/_version.py index 72f26f596..c68196d1c 100644 --- a/flare/_version.py +++ b/flare/_version.py @@ -1 +1 @@ -__version__ = "1.1.2" +__version__ = "1.2.0" diff --git a/lammps_plugins/README.md b/lammps_plugins/README.md index 501b11652..56e1a8422 100644 --- a/lammps_plugins/README.md +++ b/lammps_plugins/README.md @@ -6,7 +6,7 @@ ``` git clone --depth=1 https://github.com/lammps/lammps.git ``` -2. From this directory, run the install script. +2. From this directory (`flare/lammps_plugins`), run the install script. ``` ./install.sh /path/to/lammps ``` @@ -25,7 +25,7 @@ cd build cmake ../cmake -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON [-DKokkos_ARCH_VOLTA70=on if not auto-detected] ``` -If you really, really, really want to use the old Makefile system, you should be able to copy the files from `kokkos/` into `/path/to/lammps/src`, do `make yes-kokkos` and otherwise follow the LAMMPS Kokkos instructions. +*Note: FLARE relies on [KokkosKernels](https://github.com/kokkos/kokkos-kernels) for a performance-portable matrix-matrix product. This will take advantage of MKL/cuBLAS etc. if found. By default, CMake will download KokkosKernels from GitHub and compile it together with LAMMPS. If you need multiple LAMMPS installations with the same Kokkos configuration, you can install Kokkos and KokkosKernels manually and then use the `-DEXTERNAL_KOKKOS=ON` option (with `-DCMAKE_PREFIX_PATH=/path/to/install` as needed).* ## Basic usage Input script: diff --git a/lammps_plugins/compute_flare_std_atom.cpp b/lammps_plugins/compute_flare_std_atom.cpp index 0fd23f87d..7ad52b201 100644 --- a/lammps_plugins/compute_flare_std_atom.cpp +++ b/lammps_plugins/compute_flare_std_atom.cpp @@ -28,7 +28,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -ComputeFlareStdAtom::ComputeFlareStdAtom(LAMMPS *lmp, int narg, char **arg) : +ComputeFlareStdAtom::ComputeFlareStdAtom(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg), stds(nullptr) { @@ -72,7 +72,7 @@ ComputeFlareStdAtom::~ComputeFlareStdAtom() { } /* ---------------------------------------------------------------------- - init specific to this compute command + init specific to this compute command ------------------------------------------------------------------------- */ void ComputeFlareStdAtom::init() { @@ -81,12 +81,7 @@ void ComputeFlareStdAtom::init() { // error->all(FLERR, "Compute command requires newton pair on"); // Request a full neighbor list. - int irequest = neighbor->request(this, instance_me); - neighbor->requests[irequest]->pair = 0; - neighbor->requests[irequest]->compute = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->occasional = 1; + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL); } void ComputeFlareStdAtom::init_list(int /*id*/, NeighList *ptr) @@ -166,9 +161,9 @@ void ComputeFlareStdAtom::compute_peratom() { } // Compute covariant descriptors. - single_bond_multiple_cutoffs(x, type, jnum, n_inner, i, xtmp, ytmp, ztmp, - jlist, basis_function, cutoff_function, - n_species, n_max, l_max, radial_hyps, + single_bond_multiple_cutoffs(x, type, jnum, n_inner, i, xtmp, ytmp, ztmp, + jlist, basis_function, cutoff_function, + n_species, n_max, l_max, radial_hyps, cutoff_hyps, single_bond_vals, single_bond_env_dervs, cutoff_matrix); @@ -200,19 +195,19 @@ void ComputeFlareStdAtom::compute_peratom() { } cum_types += n_clusters_by_type[s]; } - Eigen::VectorXd L_inv_kv = L_inv_blocks[0] * kernel_vec; + Eigen::VectorXd L_inv_kv = L_inv_blocks[0] * kernel_vec; double K_self = 1.0; double Q_self = sig2 * L_inv_kv.transpose() * L_inv_kv; - + variance = K_self - Q_self; } // Compute the normalized variance, it could be negative if (variance >= 0.0) { - stds[i] = pow(variance, 0.5); + stds[i] = pow(variance, 0.5); } else { - stds[i] = - pow(abs(variance), 0.5); + stds[i] = - pow(abs(variance), 0.5); } } @@ -390,7 +385,7 @@ void ComputeFlareStdAtom::read_file(char *filename) { cutoff_string_length = strlen(cutoff_string); } - MPI_Bcast(hyperparameters.data(), n_hyps, MPI_DOUBLE, 0, world); + MPI_Bcast(hyperparameters.data(), n_hyps, MPI_DOUBLE, 0, world); MPI_Bcast(&n_species, 1, MPI_INT, 0, world); MPI_Bcast(&n_max, 1, MPI_INT, 0, world); MPI_Bcast(&l_max, 1, MPI_INT, 0, world); @@ -477,7 +472,7 @@ void ComputeFlareStdAtom::read_L_inverse(char *filename) { if (me == 0) { fgets(line, MAXLINE, fptr); // skip the first line - fgets(line, MAXLINE, fptr); // power + fgets(line, MAXLINE, fptr); // power sscanf(line, "%i", &power); fgets(line, MAXLINE, fptr); // hyperparameters @@ -508,7 +503,7 @@ void ComputeFlareStdAtom::read_L_inverse(char *filename) { cutoff_string_length = strlen(cutoff_string); } - MPI_Bcast(hyperparameters.data(), n_hyps, MPI_DOUBLE, 0, world); + MPI_Bcast(hyperparameters.data(), n_hyps, MPI_DOUBLE, 0, world); MPI_Bcast(&n_species, 1, MPI_INT, 0, world); MPI_Bcast(&n_max, 1, MPI_INT, 0, world); MPI_Bcast(&l_max, 1, MPI_INT, 0, world); @@ -525,7 +520,7 @@ void ComputeFlareStdAtom::read_L_inverse(char *filename) { // Parse number of sparse envs if (me == 0) { fgets(line, MAXLINE, fptr); - sscanf(line, "%i", &n_clusters); + sscanf(line, "%i", &n_clusters); } MPI_Bcast(&n_clusters, 1, MPI_INT, 0, world); @@ -622,14 +617,14 @@ void ComputeFlareStdAtom::read_sparse_descriptors(char *filename) { n_clusters_by_type[s] = n_clst_by_type; // Check the relationship between the power spectrum and beta. - int sparse_desc_size = n_clst_by_type * n_descriptors; - + int sparse_desc_size = n_clst_by_type * n_descriptors; + // Parse the beta vectors. memory->create(beta, sparse_desc_size, "compute:sparse_desc"); if (me == 0) grab(fptr, sparse_desc_size, beta); MPI_Bcast(beta, sparse_desc_size, MPI_DOUBLE, 0, world); - + // Fill in the beta matrix. int count = 0; Eigen::MatrixXd sparse_desc = Eigen::MatrixXd::Zero(n_clst_by_type, n_descriptors); diff --git a/lammps_plugins/install.sh b/lammps_plugins/install.sh index 3424d9ae5..e1294493a 100755 --- a/lammps_plugins/install.sh +++ b/lammps_plugins/install.sh @@ -39,4 +39,21 @@ target_sources(lammps PRIVATE ${LAMMPS_SOURCE_DIR}/radial.cpp ${LAMMPS_SOURCE_DIR}/y_grad.cpp ) + +if(PKG_KOKKOS) + set(KokkosKernels_ADD_DEFAULT_ETI OFF CACHE BOOL "faster build") + set(KokkosKernels_ENABLED_COMPONENTS BLAS CACHE STRING "faster build") + + if(EXTERNAL_KOKKOS) + find_package(KokkosKernels REQUIRED) + else() + include(FetchContent) + FetchContent_Declare( + kokkoskernels + GIT_REPOSITORY https://github.com/kokkos/kokkos-kernels.git + ) + FetchContent_MakeAvailable(kokkoskernels) + endif() + target_link_libraries(lammps PUBLIC Kokkos::kokkoskernels) +endif() ' >> $lammps/cmake/CMakeLists.txt diff --git a/lammps_plugins/kokkos/pair_flare_kokkos.cpp b/lammps_plugins/kokkos/pair_flare_kokkos.cpp index e607b58cf..995c141bc 100644 --- a/lammps_plugins/kokkos/pair_flare_kokkos.cpp +++ b/lammps_plugins/kokkos/pair_flare_kokkos.cpp @@ -29,6 +29,7 @@ #include "error.h" #include "atom_masks.h" #include "math_const.h" +#include #include #include @@ -142,169 +143,215 @@ void PairFLAREKokkos::compute(int eflag_in, int vflag_in) #define SINGLE_BOND_TEAM_SIZE Kokkos::AUTO() #endif - // Divide the atoms into batches. - // Goal: First batch needs to be biggest to avoid extra allocs. - { - double beta_mem = n_species * n_descriptors * n_descriptors * 8; - double neigh_mem = 1.0*n_atoms * max_neighs * 4; - double lmp_atom_mem = ignum * (18 * 8 + 4 * 4); // 2xf, v, x, virial, tag, type, mask, image - double mem_per_atom = 8 * ( - 2*n_bond // single_bond, u - + 3*n_descriptors // B2, betaB2, w - + 2 // evdwls, B2_norm2s - + 0.5 // numneigh_short - + max_neighs * ( - n_max*4 // g - + n_harmonics*4 // Y - + n_max*n_harmonics*3 // single_bond_grad - + 3 // partial_forces - + 0.5 // neighs_short - ) - ); - size_t availmem, totalmem; - double avail_double = maxmem - beta_mem; - availmem = avail_double; - approx_batch_size = std::min(availmem/ mem_per_atom, n_atoms); + for(curr_type = 0; curr_typetype; + auto d_ilist = this->d_ilist; + auto curr_type = this->curr_type; - if(approx_batch_size < 1) error->all(FLERR,"Not enough memory for even a single atom!"); + Kokkos::parallel_reduce("FLARE: count current type", n_atoms, + KOKKOS_LAMBDA (const int ii, int &count){ + const int i = d_ilist[ii]; - n_batches = std::ceil(1.0*n_atoms / approx_batch_size); - approx_batch_size = n_atoms / n_batches; + const int itype = type[i] - 1; + if(itype==curr_type) count++; + }, n_atoms_curr_type); + //printf("curr type = %d, N = %d\n", curr_type, n_atoms_curr_type); - //printf("maxmem = %g | betamem = %g | neighmem = %g | lmp_atom_mem = %g | mem_per_atom = %g | approx_batch_size = %d | n_batches = %d | remainder = %d\n", maxmem, beta_mem, neigh_mem, lmp_atom_mem, mem_per_atom, approx_batch_size, n_batches, n_atoms -n_batches* approx_batch_size); + if(n_atoms_curr_type==0) continue; - } - int remainder = n_atoms - n_batches*approx_batch_size; - - vscatter = ScatterVType(d_vatom); - fscatter = ScatterFType(f); - - - startatom = 0; - for(int batch_idx = 0; batch_idx < n_batches; batch_idx++){ - batch_size = approx_batch_size + (remainder-- > 0 ? 1 : 0); - int stopatom = startatom + batch_size; - //printf("BATCH: %d from %d to %d\n", batch_idx, startatom, stopatom); - - // reallocate per-atom views - if (single_bond.extent(0) < batch_size){ - single_bond = View3D(); - single_bond = View3D(Kokkos::ViewAllocateWithoutInitializing("FLARE: single_bond"), batch_size, n_radial, n_harmonics); - B2 = View2D(); - B2 = View2D(Kokkos::ViewAllocateWithoutInitializing("FLARE: B2"), batch_size, n_descriptors); - beta_B2 = View2D(); - beta_B2 = View2D(Kokkos::ViewAllocateWithoutInitializing("FLARE: beta*B2"), batch_size, n_descriptors); - B2_norm2s = View1D(); evdwls = View1D(); w = View2D(); - B2_norm2s = View1D(Kokkos::ViewAllocateWithoutInitializing("FLARE: B2_norm2s"), batch_size); - evdwls = View1D(Kokkos::ViewAllocateWithoutInitializing("FLARE: evdwls"), batch_size); - w = View2D(Kokkos::ViewAllocateWithoutInitializing("FLARE: w"), batch_size, n_descriptors); - u = View3D(); - u = View3D(Kokkos::ViewAllocateWithoutInitializing("FLARE: u"), batch_size, n_radial, n_harmonics); - - d_numneigh_short = decltype(d_numneigh_short)(); - d_numneigh_short = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("FLARE::numneighs_short") ,batch_size); + if(ilist_curr_type.extent(0) < n_atoms_curr_type){ + ilist_curr_type = IntView1D(); + ilist_curr_type = IntView1D(Kokkos::ViewAllocateWithoutInitializing("FLARE: ilist_curr_type"), n_atoms_curr_type); + } + Kokkos::realloc(ilist_curr_type_idx, 1); + + Kokkos::parallel_for("FLARE: find curr type atoms", + Kokkos::RangePolicy(0, n_atoms), *this + ); + + auto ilist_curr_type_idx = this->ilist_curr_type_idx; + //Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int ii){ + // printf("curr_type_idx = %d\n", ilist_curr_type_idx(0)); + // }); + + //printf("\n%d %d\n\n", ilist_curr_type_idx, n_atoms_curr_type); } - // reallocate per-neighbor views - if(g.extent(0) < batch_size || g.extent(1) < max_neighs){ - Kokkos::LayoutStride glayout(batch_size, max_neighs*n_max*4, - max_neighs, 1, - n_max, 4*max_neighs, - 4, max_neighs); - Kokkos::LayoutStride Ylayout(batch_size, max_neighs*n_harmonics*4, - max_neighs, 1, - n_harmonics, 4*max_neighs, - 4, max_neighs); - g = gYView4D(); Y = gYView4D(); - g = gYView4D(Kokkos::ViewAllocateWithoutInitializing("FLARE: g"), glayout); - Y = gYView4D(Kokkos::ViewAllocateWithoutInitializing("FLARE: Y"), Ylayout); - g_ra = g; - Y_ra = Y; - - single_bond_grad = View5D(); - single_bond_grad = View5D(Kokkos::ViewAllocateWithoutInitializing("FLARE: single_bond_grad"), batch_size, max_neighs, 3, n_max, n_harmonics); - partial_forces = View3D(); - partial_forces = View3D(Kokkos::ViewAllocateWithoutInitializing("FLARE: partial forces"), batch_size, max_neighs, 3); - - d_neighbors_short = decltype(d_neighbors_short)(); - d_neighbors_short = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("FLARE::neighbors_short") ,batch_size,max_neighs); + + // Divide the atoms into batches. + // Goal: First batch needs to be biggest to avoid extra allocs. + { + double beta_mem = n_species * n_descriptors * n_descriptors * 8; + double neigh_mem = 1.0*n_atoms * max_neighs * 4; + double lmp_atom_mem = ignum * (18 * 8 + 4 * 4); // 2xf, v, x, virial, tag, type, mask, image + double mem_per_atom = 8 * ( + 2*n_bond // single_bond, u + + 3*n_descriptors // B2, betaB2, w + + 2 // evdwls, B2_norm2s + + 0.5 // numneigh_short + + max_neighs * ( + n_max*4 // g + + n_harmonics*4 // Y + + n_max*n_harmonics*3 // single_bond_grad + + 3 // partial_forces + + 0.5 // neighs_short + ) + ); + size_t availmem, totalmem; + double avail_double = maxmem - beta_mem; + availmem = avail_double; + approx_batch_size = std::min(availmem/ mem_per_atom, n_atoms_curr_type); + + if(approx_batch_size < 1) error->all(FLERR,"Not enough memory for even a single atom!"); + + n_batches = std::ceil(1.0*n_atoms_curr_type / approx_batch_size); + approx_batch_size = n_atoms_curr_type / n_batches; + + //printf("maxmem = %g | betamem = %g | neighmem = %g | lmp_atom_mem = %g | mem_per_atom = %g | approx_batch_size = %d | n_batches = %d | remainder = %d\n", maxmem, beta_mem, neigh_mem, lmp_atom_mem, mem_per_atom, approx_batch_size, n_batches, n_atoms -n_batches* approx_batch_size); + } + int remainder = n_atoms_curr_type - n_batches*approx_batch_size; + + + + startatom = 0; + for(int batch_idx = 0; batch_idx < n_batches; batch_idx++){ + batch_size = approx_batch_size + (remainder-- > 0 ? 1 : 0); + int stopatom = startatom + batch_size; + //printf("BATCH: %d from %d to %d\n", batch_idx, startatom, stopatom); + + // reallocate per-atom views + if (single_bond.extent(0) < batch_size){ + single_bond = View3D(); + single_bond = View3D(Kokkos::ViewAllocateWithoutInitializing("FLARE: single_bond"), batch_size, n_radial, n_harmonics); + B2 = View2D(); + B2 = View2D(Kokkos::ViewAllocateWithoutInitializing("FLARE: B2"), batch_size, n_descriptors); + beta_B2 = View2D(); + beta_B2 = View2D(Kokkos::ViewAllocateWithoutInitializing("FLARE: beta*B2"), batch_size, n_descriptors); + B2_norm2s = View1D(); evdwls = View1D(); w = View2D(); + B2_norm2s = View1D(Kokkos::ViewAllocateWithoutInitializing("FLARE: B2_norm2s"), batch_size); + evdwls = View1D(Kokkos::ViewAllocateWithoutInitializing("FLARE: evdwls"), batch_size); + w = View2D(Kokkos::ViewAllocateWithoutInitializing("FLARE: w"), batch_size, n_descriptors); + u = View3D(); + u = View3D(Kokkos::ViewAllocateWithoutInitializing("FLARE: u"), batch_size, n_radial, n_harmonics); + + d_numneigh_short = decltype(d_numneigh_short)(); + d_numneigh_short = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("FLARE::numneighs_short") ,batch_size); + } - // compute short neighbor list - Kokkos::parallel_for("FLARE: Short neighlist", Kokkos::RangePolicy(0,batch_size), *this); + // reallocate per-neighbor views + if(g.extent(0) < batch_size || g.extent(1) < max_neighs){ + Kokkos::LayoutStride glayout(batch_size, max_neighs*n_max*4, + max_neighs, 1, + n_max, 4*max_neighs, + 4, max_neighs); + Kokkos::LayoutStride Ylayout(batch_size, max_neighs*n_harmonics*4, + max_neighs, 1, + n_harmonics, 4*max_neighs, + 4, max_neighs); + g = gYView4D(); Y = gYView4D(); + g = gYView4D(Kokkos::ViewAllocateWithoutInitializing("FLARE: g"), glayout); + Y = gYView4D(Kokkos::ViewAllocateWithoutInitializing("FLARE: Y"), Ylayout); + g_ra = g; + Y_ra = Y; + + single_bond_grad = View5D(); + single_bond_grad = View5D(Kokkos::ViewAllocateWithoutInitializing("FLARE: single_bond_grad"), batch_size, max_neighs, 3, n_max, n_harmonics); + partial_forces = View3D(); + partial_forces = View3D(Kokkos::ViewAllocateWithoutInitializing("FLARE: partial forces"), batch_size, max_neighs, 3); + + d_neighbors_short = decltype(d_neighbors_short)(); + d_neighbors_short = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("FLARE::neighbors_short") ,batch_size,max_neighs); + } - // compute basis functions Rn and Ylm - Kokkos::parallel_for("FLARE: R and Y", - Kokkos::MDRangePolicy>( - {0,0}, {batch_size, max_neighs}, {1,max_neighs}), - *this - ); + // compute short neighbor list + Kokkos::parallel_for("FLARE: Short neighlist", Kokkos::RangePolicy(0,batch_size), *this); - // compute single bond and its gradient - // dnlm, dnlmj - int g_size = ScratchView2D::shmem_size(n_max, 4); - int Y_size = ScratchView2D::shmem_size(n_harmonics, 4); - auto policy = Kokkos::TeamPolicy(batch_size, SINGLE_BOND_TEAM_SIZE, vector_length).set_scratch_size( - 0, Kokkos::PerThread(g_size + Y_size)); - Kokkos::deep_copy(single_bond, 0.0); - //Kokkos::deep_copy(single_bond_grad, 0.0); - Kokkos::parallel_for("FLARE: single bond", - policy, - *this - ); + // compute basis functions Rn and Ylm + Kokkos::parallel_for("FLARE: R and Y", + Kokkos::MDRangePolicy>( + {0,0}, {batch_size, max_neighs}, {1,max_neighs}), + *this + ); - // compute B2 - // pn1n2l = dn1lm dn2lm - Kokkos::parallel_for("FLARE: B2", - Kokkos::MDRangePolicy, TagB2>( - {0,0}, {batch_size, n_descriptors}), - *this - ); + // compute single bond and its gradient + // dnlm, dnlmj + int g_size = ScratchView2D::shmem_size(n_max, 4); + int Y_size = ScratchView2D::shmem_size(n_harmonics, 4); + auto policy = Kokkos::TeamPolicy(batch_size, SINGLE_BOND_TEAM_SIZE, vector_length).set_scratch_size( + 0, Kokkos::PerThread(g_size + Y_size)); + Kokkos::deep_copy(single_bond, 0.0); + //Kokkos::deep_copy(single_bond_grad, 0.0); + Kokkos::parallel_for("FLARE: single bond", + policy, + *this + ); - // compute beta*B2 - B2_chunk_size = std::min(1000, n_descriptors); - int B2_size = ScratchView1D::shmem_size(B2_chunk_size); - Kokkos::parallel_for("FLARE: beta*B2", - Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length).set_scratch_size( - 0, Kokkos::PerTeam(B2_size) - ), - *this - ); + // compute B2 + // pn1n2l = dn1lm dn2lm + Kokkos::parallel_for("FLARE: B2", + Kokkos::MDRangePolicy, TagB2>( + {0,0}, {batch_size, n_descriptors}), + *this + ); - // compute B2 squared norms and evdwls and w - Kokkos::parallel_for("FLARE: B2 norm2 evdwl w", - Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length), - *this - ); + // compute beta*B2 + if(n_species>0){ + KokkosBlas::gemm("N", "T", 1.0, B2, Kokkos::subview(beta, curr_type, Kokkos::ALL(), Kokkos::ALL()), 0.0, beta_B2); + } + else{ + B2_chunk_size = std::min(1000, n_descriptors); + int B2_size = ScratchView1D::shmem_size(B2_chunk_size); + Kokkos::parallel_for("FLARE: beta*B2", + Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length).set_scratch_size( + 0, Kokkos::PerTeam(B2_size) + ), + *this + ); + } + + // compute B2 squared norms and evdwls and w + Kokkos::parallel_for("FLARE: B2 norm2 evdwl w", + Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length), + *this + ); - // compute u - // un1lm = dn2lm(wn1n2l + wn2n1l) ~ 2*dn2lm*wn1n2l - Kokkos::parallel_for("FLARE: u", - Kokkos::MDRangePolicy, Tagu>( - {0,0,0}, {batch_size, n_radial, n_harmonics}), - *this - ); + // compute u + // un1lm = dn2lm(wn1n2l + wn2n1l) ~ 2*dn2lm*wn1n2l + Kokkos::parallel_for("FLARE: u", + Kokkos::MDRangePolicy, Tagu>( + {0,0,0}, {batch_size, n_radial, n_harmonics}), + *this + ); - // compute partial forces - int u_size = ScratchView2D::shmem_size(n_radial, n_harmonics); - Kokkos::parallel_for("FLARE: partial forces", - Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length).set_scratch_size( - 0, Kokkos::PerTeam(u_size) - ), - *this - ); + // compute partial forces + int u_size = ScratchView2D::shmem_size(n_radial, n_harmonics); + Kokkos::parallel_for("FLARE: partial forces", + Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length).set_scratch_size( + 0, Kokkos::PerTeam(u_size) + ), + *this + ); - // sum and store total forces, ev_tally - EV_FLOAT ev; - Kokkos::parallel_reduce("FLARE: total forces, ev_tally", - Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length), - *this, - ev - ); - if (evflag) - ev_all += ev; + // sum and store total forces, ev_tally + vscatter = ScatterVType(d_vatom); + fscatter = ScatterFType(f); + EV_FLOAT ev; + Kokkos::parallel_reduce("FLARE: total forces, ev_tally", + Kokkos::TeamPolicy(batch_size, TEAM_SIZE, vector_length), + *this, + ev + ); + Kokkos::Experimental::contribute(d_vatom, vscatter); + Kokkos::Experimental::contribute(f, fscatter); + if (evflag) + ev_all += ev; - startatom = stopatom; + startatom = stopatom; + } } if (eflag_global) eng_vdwl += ev_all.evdwl; if (vflag_global) { @@ -315,8 +362,6 @@ void PairFLAREKokkos::compute(int eflag_in, int vflag_in) virial[4] += ev_all.v[4]; virial[5] += ev_all.v[5]; } - Kokkos::Experimental::contribute(d_vatom, vscatter); - Kokkos::Experimental::contribute(f, fscatter); if (eflag_atom) { @@ -343,7 +388,7 @@ template KOKKOS_INLINE_FUNCTION void PairFLAREKokkos::operator()(const int ii, const int jj) const { - const int i = d_ilist[ii+startatom]; + const int i = ilist_curr_type[ii+startatom]; const int j = d_neighbors_short(ii,jj); const int jnum = d_numneigh_short(ii); if(jj >= jnum) return; @@ -448,7 +493,7 @@ template KOKKOS_INLINE_FUNCTION void PairFLAREKokkos::operator()(TagBetaB2, const MemberType team_member) const{ int ii = team_member.league_rank(); - const int i = d_ilist[ii+startatom]; + const int i = ilist_curr_type[ii+startatom]; const int itype = type[i] - 1; @@ -514,7 +559,7 @@ void PairFLAREKokkos::operator()(TagNorm2, const MemberType team_mem }); } if (eflag_atom){ - const int i = d_ilist[ii+startatom]; + const int i = ilist_curr_type[ii+startatom]; d_eatom[i] = evdwls(ii); } @@ -542,7 +587,7 @@ template KOKKOS_INLINE_FUNCTION void PairFLAREKokkos::operator()(TagF, const MemberType team_member) const{ int ii = team_member.league_rank(); - const int i = d_ilist[ii+startatom]; + const int i = ilist_curr_type[ii+startatom]; const int jnum = d_numneigh_short(ii); ScratchView2D uscratch(team_member.team_scratch(0), n_radial, n_harmonics); @@ -576,7 +621,7 @@ template KOKKOS_INLINE_FUNCTION void PairFLAREKokkos::operator()(TagStoreF, const MemberType team_member, EV_FLOAT &ev) const{ int ii = team_member.league_rank(); - const int i = d_ilist[ii+startatom]; + const int i = ilist_curr_type[ii+startatom]; const int jnum = d_numneigh_short(ii); const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); @@ -633,7 +678,7 @@ void PairFLAREKokkos::operator()(TagStoreF, const MemberType team_me template KOKKOS_INLINE_FUNCTION void PairFLAREKokkos::operator()(const int& ii) const { - const int i = d_ilist[ii+startatom]; + const int i = ilist_curr_type[ii+startatom]; const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); @@ -713,24 +758,16 @@ void PairFLAREKokkos::init_style() { PairFLARE::init_style(); - // irequest = neigh request made by parent class + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); neighflag = lmp->kokkos->neighflag; - int irequest = neighbor->nrequest - 1; - - neighbor->requests[irequest]-> - kokkos_host = std::is_same::value && - !std::is_same::value; - neighbor->requests[irequest]-> - kokkos_device = std::is_same::value; // always request a full neighbor list - if (neighflag == FULL) { // TODO: figure this out - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->ghost = 0; - } else { + if (neighflag != FULL) { // TODO: figure this out error->all(FLERR,"Cannot use chosen neighbor list style with pair flare/kk"); } @@ -793,12 +830,25 @@ void PairFLAREKokkos::v_tally(E_FLOAT (&v)[6], const int &i, const i } } +template +KOKKOS_INLINE_FUNCTION +void PairFLAREKokkos::operator()(TagFindCurrType, const int ii) const{ + const int i = d_ilist[ii]; + + const int itype = type[i] - 1; + + if(itype==curr_type){ + int index = Kokkos::atomic_fetch_add(&ilist_curr_type_idx(0), 1); + ilist_curr_type(index) = i; + } +} + namespace LAMMPS_NS { template class PairFLAREKokkos; #ifdef LMP_KOKKOS_GPU -template class PairFLAREKokkos; +//template class PairFLAREKokkos; #endif } diff --git a/lammps_plugins/kokkos/pair_flare_kokkos.h b/lammps_plugins/kokkos/pair_flare_kokkos.h index 44d41cb3f..ee8853057 100644 --- a/lammps_plugins/kokkos/pair_flare_kokkos.h +++ b/lammps_plugins/kokkos/pair_flare_kokkos.h @@ -15,7 +15,7 @@ PairStyle(flare/kk,PairFLAREKokkos) PairStyle(flare/kk/device,PairFLAREKokkos) -PairStyle(flare/kk/host,PairFLAREKokkos) +//PairStyle(flare/kk/host,PairFLAREKokkos) #else @@ -25,6 +25,8 @@ PairStyle(flare/kk/host,PairFLAREKokkos) #include "pair_flare.h" #include +struct TagCountCurrType{}; +struct TagFindCurrType{}; struct TagSingleBond{}; struct TagB2{}; struct TagBetaB2{}; @@ -52,6 +54,9 @@ class PairFLAREKokkos : public PairFLARE { virtual void coeff(int, char **); virtual void init_style(); + KOKKOS_INLINE_FUNCTION + void operator()(TagFindCurrType, const int) const; + KOKKOS_INLINE_FUNCTION void operator()(TagSingleBond, const MemberType) const; @@ -105,6 +110,7 @@ class PairFLAREKokkos : public PairFLARE { int batch_size = 0, startatom, n_batches, approx_batch_size; + using IntView1D = Kokkos::View; using View1D = Kokkos::View; using View2D = Kokkos::View; using View3D = Kokkos::View; @@ -134,6 +140,9 @@ class PairFLAREKokkos : public PairFLARE { int B2_chunk_size; + int n_atoms_curr_type, curr_type; + IntView1D ilist_curr_type, ilist_curr_type_idx; + typename AT::t_int_1d_randomread d_type2frho; typename AT::t_int_2d_randomread d_type2rhor; typename AT::t_int_2d_randomread d_type2z2r; diff --git a/lammps_plugins/pair_flare.cpp b/lammps_plugins/pair_flare.cpp index 40f71905a..36312e0e3 100644 --- a/lammps_plugins/pair_flare.cpp +++ b/lammps_plugins/pair_flare.cpp @@ -128,7 +128,7 @@ void PairFLARE::compute(int eflag, int vflag) { B2_descriptor(B2_vals, B2_norm_squared, single_bond_vals, n_species, n_max, l_max); - compute_energy_and_u(B2_vals, B2_norm_squared, single_bond_vals, power, + compute_energy_and_u(B2_vals, B2_norm_squared, single_bond_vals, power, n_species, n_max, l_max, beta_matrices[itype - 1], u, &evdwl); // Continue if the environment is empty. @@ -241,9 +241,7 @@ void PairFLARE::init_style() { error->all(FLERR, "Pair style requires newton pair on"); // Request a full neighbor list. - int irequest = neighbor->request(this, instance_me); - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full = 1; + neighbor->add_request(this, NeighConst::REQ_FULL); } /* ---------------------------------------------------------------------- diff --git a/lammps_plugins/pair_mgp.cpp b/lammps_plugins/pair_mgp.cpp index 08816b17b..8ed62e46a 100644 --- a/lammps_plugins/pair_mgp.cpp +++ b/lammps_plugins/pair_mgp.cpp @@ -930,9 +930,7 @@ void PairMGP::init_style() { } // need a full neighbor list - int irequest = neighbor->request(this, instance_me); - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full = 1; + neighbor->add_request(this, NeighConst::REQ_FULL); } /* ------------------------------------------------------------------ diff --git a/tests/test_fake_otf.py b/tests/test_fake_otf.py index a6c874bc3..945c1a232 100644 --- a/tests/test_fake_otf.py +++ b/tests/test_fake_otf.py @@ -46,7 +46,7 @@ def test_traj_with_varying_sizes(md_engine): config = yaml.safe_load(f) config["supercell"]["file"] = "test_files/sic_dft.xyz" - #config["dft_calc"]["kwargs"]["filename"] = "test_files/sic_dft.xyz" + # config["dft_calc"]["kwargs"]["filename"] = "test_files/sic_dft.xyz" config["otf"]["md_kwargs"]["filenames"] = ["test_files/sic_dft.xyz"] config["otf"]["output_name"] = md_engine @@ -85,7 +85,9 @@ def test_otf_md(md_engine): with open("../examples/test_SGP_LMP_fresh.yaml", "r") as f: config = yaml.safe_load(f) - config["dft_calc"]["kwargs"]["command"] = os.environ.get("lmp") + config["dft_calc"]["kwargs"]["command"] = os.environ.get("lmp").replace( + "full", "half" + ) config["otf"]["md_kwargs"]["command"] = os.environ.get("lmp") config["otf"]["output_name"] = "myotf" fresh_start_otf(config) @@ -141,7 +143,7 @@ def test_otf_md(md_engine): config = yaml.safe_load(f) config["supercell"]["file"] = "myotf_dft.xyz" - #config["dft_calc"]["kwargs"]["filename"] = "myotf_dft.xyz" + # config["dft_calc"]["kwargs"]["filename"] = "myotf_dft.xyz" config["otf"]["md_kwargs"]["filenames"] = ["myotf_dft.xyz"] config["otf"]["output_name"] = "direct" config["otf"]["build_mode"] = "direct" diff --git a/tests/test_sgp_otf.py b/tests/test_sgp_otf.py index c9fbc6e50..ad29da593 100644 --- a/tests/test_sgp_otf.py +++ b/tests/test_sgp_otf.py @@ -41,7 +41,9 @@ def test_otf_md(md_engine): with open("../examples/test_SGP_LMP_fresh.yaml", "r") as f: config = yaml.safe_load(f) - config["dft_calc"]["kwargs"]["command"] = os.environ.get("lmp") + config["dft_calc"]["kwargs"]["command"] = os.environ.get("lmp").replace( + "full", "half" + ) if md_engine == "PyLAMMPS": config["flare_calc"]["use_mapping"] = True @@ -74,7 +76,9 @@ def test_otf_warm(md_engine): with open("../examples/test_SGP_LMP_fresh.yaml", "r") as f: config = yaml.safe_load(f) - config["dft_calc"]["kwargs"]["command"] = os.environ.get("lmp") + config["dft_calc"]["kwargs"]["command"] = os.environ.get("lmp").replace( + "full", "half" + ) if md_engine == "PyLAMMPS": config["flare_calc"]["use_mapping"] = True