From 8177ea3d3a0b866f9bec8a20f7e704a09bd443b6 Mon Sep 17 00:00:00 2001 From: mmglove Date: Tue, 21 Nov 2023 18:53:58 +0800 Subject: [PATCH 1/3] add max_mem_reserved for benchmark --- ppsci/solver/printer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ppsci/solver/printer.py b/ppsci/solver/printer.py index e36559ec69..99d1e3f56e 100644 --- a/ppsci/solver/printer.py +++ b/ppsci/solver/printer.py @@ -25,6 +25,7 @@ if TYPE_CHECKING: from ppsci import solver +import paddle def update_train_loss( @@ -72,10 +73,12 @@ def log_train_info( (trainer.epochs - epoch_id + 1) * trainer.iters_per_epoch - iter_id ) * trainer.train_time_info["batch_cost"].avg eta_msg = f"eta: {str(datetime.timedelta(seconds=int(eta_sec))):s}" + max_mem_reserved_msg = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved()} B" + max_mem_allocated_msg = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated()} B" logger.info( f"[Train][Epoch {epoch_id}/{trainer.epochs}]" f"[Iter: {iter_id}/{trainer.iters_per_epoch}] {lr_msg}, " - f"{metric_msg}, {time_msg}, {ips_msg}, {eta_msg}" + f"{metric_msg}, {time_msg}, {ips_msg}, {eta_msg}, {max_mem_reserved_msg}, {max_mem_allocated_msg}" ) logger.scaler( From 77959a0161197ef0ae5336c70afee58484fcba99 Mon Sep 17 00:00:00 2001 From: mmglove Date: Wed, 22 Nov 2023 10:43:28 +0800 Subject: [PATCH 2/3] add max_mem_reserved for benchamrk --- ppsci/solver/printer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ppsci/solver/printer.py b/ppsci/solver/printer.py index 99d1e3f56e..5efd464b0f 100644 --- a/ppsci/solver/printer.py +++ b/ppsci/solver/printer.py @@ -73,8 +73,12 @@ def log_train_info( (trainer.epochs - epoch_id + 1) * trainer.iters_per_epoch - iter_id ) * trainer.train_time_info["batch_cost"].avg eta_msg = f"eta: {str(datetime.timedelta(seconds=int(eta_sec))):s}" - max_mem_reserved_msg = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved()} B" - max_mem_allocated_msg = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated()} B" + max_mem_reserved_msg = ( + f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved()} B" + ) + max_mem_allocated_msg = ( + f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated()} B" + ) logger.info( f"[Train][Epoch {epoch_id}/{trainer.epochs}]" f"[Iter: {iter_id}/{trainer.iters_per_epoch}] {lr_msg}, " From 83a4a5c22273a4d4fbbfb9f3e301e2a6ee485aff Mon Sep 17 00:00:00 2001 From: mmglove Date: Mon, 15 Apr 2024 17:24:11 +0800 Subject: [PATCH 3/3] fix benchmark model name --- .../2d_unsteady_continuous_train_infer_python.txt | 2 +- .../configs/train_eular_beam/eular_beam_train_infer_python.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test_tipc/configs/train_2d_unsteady_continuous/2d_unsteady_continuous_train_infer_python.txt b/test_tipc/configs/train_2d_unsteady_continuous/2d_unsteady_continuous_train_infer_python.txt index 0a1eb8fa7d..955aeb44a5 100644 --- a/test_tipc/configs/train_2d_unsteady_continuous/2d_unsteady_continuous_train_infer_python.txt +++ b/test_tipc/configs/train_2d_unsteady_continuous/2d_unsteady_continuous_train_infer_python.txt @@ -1,5 +1,5 @@ ===========================train_params=========================== -model_item:cylinder2d_unsteady_Re100 +model_name:cylinder2d_unsteady_Re100 bs_item:1 run_mode:DP device_num:N1C1 diff --git a/test_tipc/configs/train_eular_beam/eular_beam_train_infer_python.txt b/test_tipc/configs/train_eular_beam/eular_beam_train_infer_python.txt index 06b7802173..3affc54dae 100644 --- a/test_tipc/configs/train_eular_beam/eular_beam_train_infer_python.txt +++ b/test_tipc/configs/train_eular_beam/eular_beam_train_infer_python.txt @@ -1,5 +1,5 @@ ===========================train_params=========================== -model_item:euler_beam +model_name:euler_beam bs_item:1 run_mode:DP device_num:N1C1