From bebac626aef995aaa99cbc60542a4390ddad4749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yifan=20Li=E6=9D=8E=E4=B8=80=E5=B8=86?= Date: Fri, 24 May 2024 22:52:47 -0400 Subject: [PATCH 1/4] Add a reminder for the illeagle memory error encountered in the GPU version of neighbor stat. --- deepmd/utils/batch_size.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py index b35d9833d5..3a61d72f86 100644 --- a/deepmd/utils/batch_size.py +++ b/deepmd/utils/batch_size.py @@ -61,6 +61,12 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None: self.maximum_working_batch_size = initial_batch_size if self.is_gpu_available(): self.minimal_not_working_batch_size = 2**31 + log.info( + "You may encounter the error \"an illegal memory access was encountered\". " + "It is due to some issue of TensorFlow. " + "If you see this error, you can set the environment variable DP_INFER_BATCH_SIZE to a value smaller than the batch size you see adjusted. " + "The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). " + ) else: self.minimal_not_working_batch_size = ( self.maximum_working_batch_size + 1 From e84994deedfeb123f6d5ed484c3f46e08675239f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 25 May 2024 03:03:19 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/utils/batch_size.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py index 3a61d72f86..6b4d99fa2c 100644 --- a/deepmd/utils/batch_size.py +++ b/deepmd/utils/batch_size.py @@ -62,7 +62,7 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None: if self.is_gpu_available(): self.minimal_not_working_batch_size = 2**31 log.info( - "You may encounter the error \"an illegal memory access was encountered\". " + 'You may encounter the error "an illegal memory access was encountered". ' "It is due to some issue of TensorFlow. " "If you see this error, you can set the environment variable DP_INFER_BATCH_SIZE to a value smaller than the batch size you see adjusted. " "The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). " From b89b97bee00c685274f9e8b3657d6b8a51f5839b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yifan=20Li=E6=9D=8E=E4=B8=80=E5=B8=86?= Date: Fri, 24 May 2024 23:10:36 -0400 Subject: [PATCH 3/4] Update deepmd/utils/batch_size.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Yifan Li李一帆 --- deepmd/utils/batch_size.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py index 6b4d99fa2c..3d17a58ecc 100644 --- a/deepmd/utils/batch_size.py +++ b/deepmd/utils/batch_size.py @@ -62,9 +62,8 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None: if self.is_gpu_available(): self.minimal_not_working_batch_size = 2**31 log.info( - 'You may encounter the error "an illegal memory access was encountered". ' - "It is due to some issue of TensorFlow. " - "If you see this error, you can set the environment variable DP_INFER_BATCH_SIZE to a value smaller than the batch size you see adjusted. " + "If you encounter the error 'an illegal memory access was encountered', this may be due to a TensorFlow issue. " + "To mitigate this, set the environment variable DP_INFER_BATCH_SIZE to a smaller value than the last adjusted batch size. " "The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). " ) else: From e46b23208c5178c8372e9aa52ad1a0178f994f3e Mon Sep 17 00:00:00 2001 From: Yifan Li Date: Fri, 24 May 2024 23:12:27 -0400 Subject: [PATCH 4/4] mitigate -> avoid; do not use AI-style language --- deepmd/utils/batch_size.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py index 3d17a58ecc..30971c7256 100644 --- a/deepmd/utils/batch_size.py +++ b/deepmd/utils/batch_size.py @@ -63,7 +63,7 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None: self.minimal_not_working_batch_size = 2**31 log.info( "If you encounter the error 'an illegal memory access was encountered', this may be due to a TensorFlow issue. " - "To mitigate this, set the environment variable DP_INFER_BATCH_SIZE to a smaller value than the last adjusted batch size. " + "To avoid this, set the environment variable DP_INFER_BATCH_SIZE to a smaller value than the last adjusted batch size. " "The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). " ) else: