From 75e89c82235c8aa8266d1ee0f031f7dc8053c003 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 23 Oct 2024 18:11:53 +0800 Subject: [PATCH 1/6] Chore(pt): fix warning in `test_training` --- source/tests/pt/test_training.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index fa9e5c138a..6a5e99231c 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -206,7 +206,8 @@ def setUp(self): shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy") def tearDown(self) -> None: - (self.set_path / "fparam.npy").unlink(missing_ok=True) + # may remove file for other threads when testing separately. + # (self.set_path / "fparam.npy").unlink(missing_ok=True) DPTrainTest.tearDown(self) From 17dafea1e5850d9326796d58cc4a96813e9819eb Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 23 Oct 2024 22:48:35 +0800 Subject: [PATCH 2/6] Update test_training.py --- source/tests/pt/test_training.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 6a5e99231c..54ed508b85 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -2,6 +2,7 @@ import json import os import shutil +import tempfile import unittest from copy import ( deepcopy, @@ -195,19 +196,25 @@ def setUp(self): input_json = str(Path(__file__).parent / "water/se_atten.json") with open(input_json) as f: self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.original_data_path = Path(__file__).parent / "water/data/data_0" + # Create a temporary directory for this test + self.temp_dir = Path(tempfile.mkdtemp()) + self.temp_data_path = self.temp_dir / "data_0" + shutil.copytree(self.original_data_path, self.temp_data_path) + + data_file = [str(self.temp_data_path)] self.config["training"]["training_data"]["systems"] = data_file self.config["training"]["validation_data"]["systems"] = data_file self.config["model"] = deepcopy(model_se_e2_a) self.config["model"]["fitting_net"]["numb_fparam"] = 1 self.config["training"]["numb_steps"] = 1 self.config["training"]["save_freq"] = 1 - self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000" + self.set_path = self.temp_data_path / "set.000" shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy") def tearDown(self) -> None: - # may remove file for other threads when testing separately. - # (self.set_path / "fparam.npy").unlink(missing_ok=True) + # Remove the temporary directory and all its contents + shutil.rmtree(self.temp_dir) DPTrainTest.tearDown(self) From ce0d33d37754d8b4facdfc79fee5a0d96966b490 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Sat, 26 Oct 2024 14:42:34 +0800 Subject: [PATCH 3/6] delete dataloader after training --- deepmd/pt/train/training.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 0f7c030a84..3e244ed02b 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1033,6 +1033,11 @@ def log_loss_valid(_task_key="Default"): log.info( f"The profiling trace have been saved to: {self.profiling_file}" ) + if self.multi_task: + for model_key in self.model_keys: + del self.training_data[model_key], self.training_dataloader[model_key] + else: + del self.training_data, self.training_dataloader def save_model(self, save_path, lr=0.0, step=0): module = ( From d85fe6dce7115384355b18fb3b2fdb2034b2792a Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 26 Oct 2024 15:13:17 -0400 Subject: [PATCH 4/6] also delete validation_dataloader --- deepmd/pt/train/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 3e244ed02b..ac8d2c22fa 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1035,9 +1035,9 @@ def log_loss_valid(_task_key="Default"): ) if self.multi_task: for model_key in self.model_keys: - del self.training_data[model_key], self.training_dataloader[model_key] + del self.training_data[model_key], self.training_dataloader[model_key], self.validation_data[model_key], self.validation_dataloader[model_key] else: - del self.training_data, self.training_dataloader + del self.training_data, self.training_dataloader, self.validation_data, self.validation_dataloader def save_model(self, save_path, lr=0.0, step=0): module = ( From 9f69bb19440623755bfcde3b2c02ada6b072b5ae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 26 Oct 2024 19:14:26 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/train/training.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index ac8d2c22fa..90393575b5 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1035,9 +1035,19 @@ def log_loss_valid(_task_key="Default"): ) if self.multi_task: for model_key in self.model_keys: - del self.training_data[model_key], self.training_dataloader[model_key], self.validation_data[model_key], self.validation_dataloader[model_key] + del ( + self.training_data[model_key], + self.training_dataloader[model_key], + self.validation_data[model_key], + self.validation_dataloader[model_key], + ) else: - del self.training_data, self.training_dataloader, self.validation_data, self.validation_dataloader + del ( + self.training_data, + self.training_dataloader, + self.validation_data, + self.validation_dataloader, + ) def save_model(self, save_path, lr=0.0, step=0): module = ( From c92befac436bdc0c7d191fde20a633a141206edd Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 5 Nov 2024 20:57:21 +0800 Subject: [PATCH 6/6] make delete a func --- deepmd/pt/train/training.py | 2 ++ source/tests/pt/test_training.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 90393575b5..c5f1d60d87 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1033,6 +1033,8 @@ def log_loss_valid(_task_key="Default"): log.info( f"The profiling trace have been saved to: {self.profiling_file}" ) + + def delete_dataloader(self): if self.multi_task: for model_key in self.model_keys: del ( diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 54ed508b85..f912b944eb 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -35,6 +35,7 @@ def test_dp_train(self): # test training from scratch trainer = get_trainer(deepcopy(self.config)) trainer.run() + trainer.delete_dataloader() state_dict_trained = trainer.wrapper.model.state_dict() # test fine-tuning using same input @@ -101,6 +102,11 @@ def test_dp_train(self): trainer_finetune_empty.run() trainer_finetune_random.run() + # delete dataloader to stop buffer fetching + trainer_finetune.delete_dataloader() + trainer_finetune_empty.delete_dataloader() + trainer_finetune_random.delete_dataloader() + def test_trainable(self): fix_params = deepcopy(self.config) fix_params["model"]["descriptor"]["trainable"] = False