From b5727ef8e8d00c3bc49b8acb0a947f410005fbd5 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Thu, 19 Sep 2024 17:36:23 +0900 Subject: [PATCH 1/6] Update pypolymlp doc --- doc/pypolymlp.md | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/doc/pypolymlp.md b/doc/pypolymlp.md index aca7c093..4a1bd73d 100644 --- a/doc/pypolymlp.md +++ b/doc/pypolymlp.md @@ -36,7 +36,8 @@ in the distribution from GitHub or PyPI. supercells. The dataset must be stored in a phono3py-yaml-like file, e.g., `phono3py_params.yaml`. Use {ref}`--cf3 ` and {ref}`--sp ` option simultaneously. -4. Develop MLPs. At this step `phono3py.pmlp` is saved. +4. Develop MLPs. By default, 90 and 10 percents of the dataset are used for the + training and test, respectively. At this step `phono3py.pmlp` is saved. 5. Generate displacements in supercells either systematic or random displacements. 6. Evaluate MLPs for forces of the supercells generated in step 5. 7. Calculate force constants from displacement-force dataset from steps 5 and 6. @@ -244,7 +245,7 @@ Having `phono3py_params.yaml`, phono3py is executed with `--pypolymlp` option, | |_) | | | | (_) | | | | (_) |__) | |_) | |_| | | .__/|_| |_|\___/|_| |_|\___/____/| .__/ \__, | |_| |_| |___/ - 3.5.0-dev22+g575c4107 + 3.5.0 -------------------------[time 2024-09-19 15:33:23]------------------------- Compiled with OpenMP support (max 10 threads). @@ -327,3 +328,40 @@ displacements are generated. These displacements are then inverted, resulting in an additional 200 supercells. In total, 400 supercells are created. The forces for these supercells are then evaluated. Finally, the force constants are calculated using symfc. + +## Parameters for developing MLPs + +A few parameters can be specified using the `--mlp-params` option for the +development of MLPs. The parameters are provided as a string, e.g., + +```bash +% phono3py-load phono3py_params.yaml --pypolymlp --mlp-params="ntrain=80, ntest=20" +``` + +Parameters are separated by commas for configuration. A brief explanation of the +available parameters can be found in the docstring of `PypolymlpParams` that is +found by + +```python +In [1]: from phonopy.interface.pypolymlp import PypolymlpParams + +In [2]: help(PypolymlpParams) +``` + +`ntrain` and `ntest` are implemented in phono3py, while the remaining parameters +are directly passed to pypolymlp. Optimizing pypolymlp parameters can be +difficult, both in terms of achieving accuracy and managing the computational +resources required. The current default parameters are likely suitable for +systems up to ternary compounds. For binary systems, the calculations can +generally be run on standard laptop computers, but for ternary systems, around +40 GB of memory or more may be necessary. + +For parameter adjustments, it is recommended to consult the +[pypolymlp](https://github.com/sekocha/pypolymlp) documentation and review the + relevant research papers. + +### `ntrain` and `ntest` + +This method provides a straightforward dataset split: the first `ntrain` +supercells from the list are used for training, while the last `ntest` +supercells are reserved for testing. From 7159633ac170099882f8714f1dc84456e106fe88 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Fri, 20 Sep 2024 14:17:47 +0900 Subject: [PATCH 2/6] Update pypolymlp doc --- doc/pypolymlp.md | 14 ++++++++++---- phono3py/file_IO.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/doc/pypolymlp.md b/doc/pypolymlp.md index 4a1bd73d..24aaef58 100644 --- a/doc/pypolymlp.md +++ b/doc/pypolymlp.md @@ -15,13 +15,15 @@ The training process involves using a dataset consisting of supercell displacements, forces, and energies. The trained MLPs are then employed to compute forces for supercells with specific displacements. -For more details on the methodology, refer to A. Togo and A. Seko, J. Chem. Phys. -**160**, 211001 (2024) [[doi](https://doi.org/10.1063/5.0211296)]. +For further details on combining phono3py calculations with pypolymlp, refer to +A. Togo and A. Seko, J. Chem. Phys. **160**, 211001 (2024) +[[doi](https://doi.org/10.1063/5.0211296)] +[[arxiv](https://arxiv.org/abs/2401.17531)]. An example of its usage can be found in the `example/NaCl-pypolymlp` directory in the distribution from GitHub or PyPI. -## Requirement +## Requirements - [pypolymlp](https://github.com/sekocha/pypolymlp) - [symfc](https://github.com/symfc/symfc) @@ -228,7 +230,7 @@ displacement distance of 0.001 Angstrom. The forces for these supercells are then evaluated using pypolymlp. Both the generated displacements and the corresponding forces are stored in the `phono3py_mlp_eval_dataset` file. -### Steps 4-6: Force constants calculation (random displacements in step 5) +### Steps 4-7: Force constants calculation (random displacements in step 5) After developing MLPs, random displacements are generated by specifying {ref}`--rd ` option. To compute force constants @@ -329,6 +331,10 @@ an additional 200 supercells. In total, 400 supercells are created. The forces for these supercells are then evaluated. Finally, the force constants are calculated using symfc. +## Convergence with respect to dataset size + +In general, increasing the amount of data improves the accuracy of representing force constants. Therefore, it is recommended to check the convergence of the target property as the dataset size grows. Lattice thermal conductivity is a convenient property to monitor when assessing convergence with respect to the number of supercells in the training dataset. + ## Parameters for developing MLPs A few parameters can be specified using the `--mlp-params` option for the diff --git a/phono3py/file_IO.py b/phono3py/file_IO.py index 0b9066b7..672bc3d6 100644 --- a/phono3py/file_IO.py +++ b/phono3py/file_IO.py @@ -413,6 +413,26 @@ def read_fc2_from_hdf5(filename="fc2.hdf5", p2s_map=None): ) +def write_datasets_to_hdf5( + dataset: dict, + phonon_dataset: dict = None, + filename: str = "datasets.hdf5", + compression: str = "gzip", +): + """Write dataset and phonon_dataset in datasets.hdf5.""" + + def _write_dataset(w, dataset: dict, group_name: str): + dataset_w = w.create_group(group_name) + for key in dataset: + dataset_w.create_dataset(key, data=dataset[key], compression=compression) + + with h5py.File(filename, "w") as w: + w.create_dataset("version", data=np.bytes_(__version__)) + _write_dataset(w, dataset, "dataset") + if phonon_dataset: + _write_dataset(w, phonon_dataset, "phonon_dataset") + + def write_grid_address_to_hdf5( grid_address, mesh, From 650adff94df4dd3eeceb6f6caf73a4b2afdb92d7 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Fri, 20 Sep 2024 14:25:17 +0900 Subject: [PATCH 3/6] Update pypolymlp doc --- doc/pypolymlp.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/pypolymlp.md b/doc/pypolymlp.md index 24aaef58..00403198 100644 --- a/doc/pypolymlp.md +++ b/doc/pypolymlp.md @@ -333,7 +333,11 @@ calculated using symfc. ## Convergence with respect to dataset size -In general, increasing the amount of data improves the accuracy of representing force constants. Therefore, it is recommended to check the convergence of the target property as the dataset size grows. Lattice thermal conductivity is a convenient property to monitor when assessing convergence with respect to the number of supercells in the training dataset. +In general, increasing the amount of data improves the accuracy of representing +force constants. Therefore, it is recommended to check the convergence of the +target property with respect to the number of supercells in the training +dataset. Lattice thermal conductivity may be a convenient property to monitor +when assessing convergence. ## Parameters for developing MLPs From ff925180eab0a3aa5efb5ad01f4324979f8c3843 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Mon, 23 Sep 2024 21:24:13 +0900 Subject: [PATCH 4/6] Fix memory leak --- c/real_to_reciprocal.c | 22 ++++++++++------------ phono3py/phonon3/interaction.py | 6 +++--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/c/real_to_reciprocal.c b/c/real_to_reciprocal.c index 29f9212b..b4eb7b86 100644 --- a/c/real_to_reciprocal.c +++ b/c/real_to_reciprocal.c @@ -80,8 +80,8 @@ void r2r_real_to_reciprocal(lapack_complex_double *fc3_reciprocal, const AtomTriplets *atom_triplets, const long openmp_per_triplets) { long i, j, num_band, num_patom, num_satom, adrs_vec; - lapack_complex_double *pre_phase_factors, *phase_factor0, *phase_factor1, - *phase_factor2; + lapack_complex_double *pre_phase_factors, *phase_factors, *phase_factor0, + *phase_factor1, *phase_factor2; num_patom = atom_triplets->multi_dims[1]; num_satom = atom_triplets->multi_dims[0]; @@ -92,12 +92,11 @@ void r2r_real_to_reciprocal(lapack_complex_double *fc3_reciprocal, pre_phase_factors[i] = get_pre_phase_factor(i, q_vecs, atom_triplets); } - phase_factor0 = (lapack_complex_double *)malloc( - sizeof(lapack_complex_double) * num_patom * num_satom); - phase_factor1 = (lapack_complex_double *)malloc( - sizeof(lapack_complex_double) * num_patom * num_satom); - phase_factor2 = (lapack_complex_double *)malloc( - sizeof(lapack_complex_double) * num_patom * num_satom); + phase_factors = (lapack_complex_double *)malloc( + sizeof(lapack_complex_double) * 3 * num_patom * num_satom); + phase_factor0 = phase_factors; + phase_factor1 = phase_factors + num_patom * num_satom; + phase_factor2 = phase_factors + 2 * num_patom * num_satom; for (i = 0; i < num_patom; i++) { for (j = 0; j < num_satom; j++) { adrs_vec = j * atom_triplets->multi_dims[1] + i; @@ -132,11 +131,10 @@ void r2r_real_to_reciprocal(lapack_complex_double *fc3_reciprocal, free(pre_phase_factors); pre_phase_factors = NULL; - free(phase_factor0); + free(phase_factors); + phase_factors = NULL; + phase_factor0 = NULL; phase_factor1 = NULL; - free(phase_factor1); - phase_factor1 = NULL; - free(phase_factor2); phase_factor2 = NULL; } diff --git a/phono3py/phonon3/interaction.py b/phono3py/phonon3/interaction.py index 6bf486f7..443ad335 100644 --- a/phono3py/phonon3/interaction.py +++ b/phono3py/phonon3/interaction.py @@ -304,7 +304,7 @@ def primitive_symmetry(self) -> Symmetry: def get_triplets_at_q( self, - ) -> tuple(np.ndarray, np.ndarray, np.ndarray, np.ndarray): + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Return grid point triplets information. triplets_at_q is in BZ-grid. @@ -414,7 +414,7 @@ def get_zero_value_positions(self): ) return self.zero_value_positions - def get_phonons(self) -> tuple(np.ndarray, np.ndarray, np.ndarray): + def get_phonons(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """Return phonons on grid. Returns @@ -538,7 +538,7 @@ def get_averaged_interaction(self): def get_primitive_and_supercell_correspondence( self, - ) -> tuple(np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray): + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Return atomic pair information.""" return (self._svecs, self._multi, self._p2s, self._s2p, self._masses) From 4d30244a7367c94e8bb206094eb4c6ab0bd84886 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Sep 2024 23:30:16 +0000 Subject: [PATCH 5/6] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.5 → v0.6.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.5...v0.6.7) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf7da9d3..94ea09a8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: exclude: ^example/AlN-LDA/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.5 + rev: v0.6.7 hooks: - id: ruff args: [ "--fix", "--show-fixes" ] From ba38ca13e37213da618fc92eeb3a36d4094642b9 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Tue, 24 Sep 2024 10:34:28 +0900 Subject: [PATCH 6/6] Set version 3.5.2 --- doc/changelog.md | 4 ++++ doc/conf.py | 2 +- phono3py/version.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/changelog.md b/doc/changelog.md index 70523225..71a671f2 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -2,6 +2,10 @@ # Change Log +## Sep-24-2024: Version 3.5.2 + +- Fix a memory leak. + ## Sep-19-2024: Version 3.5.1 - A small fix. diff --git a/doc/conf.py b/doc/conf.py index a4f00a21..f186e32a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -60,7 +60,7 @@ # The short X.Y version. version = "3.5" # The full version, including alpha/beta/rc tags. -release = "3.5.1" +release = "3.5.2" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/phono3py/version.py b/phono3py/version.py index b8845155..d4df8bf1 100644 --- a/phono3py/version.py +++ b/phono3py/version.py @@ -34,4 +34,4 @@ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -__version__ = "3.5.1" +__version__ = "3.5.2"