From f4e855ec6a3889c402d335487c16fc4795a42759 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 8 Dec 2023 19:02:11 -0500 Subject: [PATCH 1/2] print NaN loss when labeled data is not found Currently, when loss is defined and the labeled data is not found, `lcurve.out` shows wrong RMSE (assuming data is all zero). In this case, printing NaN is better. Signed-off-by: Jinzhe Zeng --- deepmd/loss/dos.py | 12 ++++++++---- deepmd/loss/ener.py | 44 ++++++++++++++++++++++++++++++------------- deepmd/loss/loss.py | 19 +++++++++++++++++++ deepmd/loss/tensor.py | 4 ++-- 4 files changed, 60 insertions(+), 19 deletions(-) diff --git a/deepmd/loss/dos.py b/deepmd/loss/dos.py index fa30552486..7d38f2b17a 100644 --- a/deepmd/loss/dos.py +++ b/deepmd/loss/dos.py @@ -143,16 +143,20 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): more_loss = {} if self.has_dos: l2_loss += atom_norm_ener * (pref_dos * l2_dos_loss) - more_loss["l2_dos_loss"] = l2_dos_loss + more_loss["l2_dos_loss"] = self.display_if_exist(l2_dos_loss, find_dos) if self.has_cdf: l2_loss += atom_norm_ener * (pref_cdf * l2_cdf_loss) - more_loss["l2_cdf_loss"] = l2_cdf_loss + more_loss["l2_cdf_loss"] = self.display_if_exist(l2_cdf_loss, find_dos) if self.has_ados: l2_loss += global_cvt_2_ener_float(pref_ados * l2_atom_dos_loss) - more_loss["l2_atom_dos_loss"] = l2_atom_dos_loss + more_loss["l2_atom_dos_loss"] = self.display_if_exist( + l2_atom_dos_loss, find_atom_dos + ) if self.has_acdf: l2_loss += global_cvt_2_ener_float(pref_acdf * l2_atom_cdf_loss) - more_loss["l2_atom_cdf_loss"] = l2_atom_cdf_loss + more_loss["l2_atom_cdf_loss"] = self.display_if_exist( + l2_atom_cdf_loss, find_atom_dos + ) # only used when tensorboard was set as true self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss)) diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py index 95997bad10..d7f83f09e5 100644 --- a/deepmd/loss/ener.py +++ b/deepmd/loss/ener.py @@ -291,22 +291,32 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): more_loss = {} if self.has_e: l2_loss += atom_norm_ener * (pref_e * l2_ener_loss) - more_loss["l2_ener_loss"] = l2_ener_loss + more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy) if self.has_f: l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss) - more_loss["l2_force_loss"] = l2_force_loss + more_loss["l2_force_loss"] = self.display_if_exist( + l2_force_loss, find_force + ) if self.has_v: l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss)) - more_loss["l2_virial_loss"] = l2_virial_loss + more_loss["l2_virial_loss"] = self.display_if_exist( + l2_virial_loss, find_virial + ) if self.has_ae: l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss) - more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss + more_loss["l2_atom_ener_loss"] = self.display_if_exist( + l2_atom_ener_loss, find_atom_ener + ) if self.has_pf: l2_loss += global_cvt_2_ener_float(pref_pf * l2_pref_force_loss) - more_loss["l2_pref_force_loss"] = l2_pref_force_loss + more_loss["l2_pref_force_loss"] = self.display_if_exist( + l2_pref_force_loss, find_atom_pref + ) if self.has_gf: l2_loss += global_cvt_2_ener_float(pref_gf * l2_gen_force_loss) - more_loss["l2_gen_force_loss"] = l2_gen_force_loss + more_loss["l2_gen_force_loss"] = self.display_if_exist( + l2_gen_force_loss, find_drdq + ) # only used when tensorboard was set as true self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss)) @@ -553,19 +563,25 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): more_loss = {} if self.has_e: l2_loss += atom_norm_ener * (pref_e * l2_ener_loss) - more_loss["l2_ener_loss"] = l2_ener_loss + more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy) if self.has_fr: l2_loss += global_cvt_2_ener_float(pref_fr * l2_force_r_loss) - more_loss["l2_force_r_loss"] = l2_force_r_loss + more_loss["l2_force_r_loss"] = self.display_if_exist( + l2_force_r_loss, find_force + ) if self.has_fm: l2_loss += global_cvt_2_ener_float(pref_fm * l2_force_m_loss) - more_loss["l2_force_m_loss"] = l2_force_m_loss + more_loss["l2_force_m_loss"] = self.display_if_exist( + l2_force_m_loss, find_force + ) if self.has_v: l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss)) - more_loss["l2_virial_loss"] = l2_virial_loss + more_loss["l2_virial_loss"] = self.display_if_exist(l2_virial_loss, find_virial) if self.has_ae: l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss) - more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss + more_loss["l2_atom_ener_loss"] = self.display_if_exist( + l2_atom_ener_loss, find_atom_ener + ) # only used when tensorboard was set as true self.l2_loss_summary = tf.summary.scalar("l2_loss", tf.sqrt(l2_loss)) @@ -785,8 +801,10 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): more_loss = {} l2_loss += atom_norm_ener * (pref_e * l2_ener_loss) l2_loss += global_cvt_2_ener_float(pref_ed * l2_ener_dipole_loss) - more_loss["l2_ener_loss"] = l2_ener_loss - more_loss["l2_ener_dipole_loss"] = l2_ener_dipole_loss + more_loss["l2_ener_loss"] = self.display_if_exist(l2_ener_loss, find_energy) + more_loss["l2_ener_dipole_loss"] = self.display_if_exist( + l2_ener_dipole_loss, find_ener_dipole + ) self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss)) self.l2_loss_ener_summary = tf.summary.scalar( diff --git a/deepmd/loss/loss.py b/deepmd/loss/loss.py index 9324077691..a719a08d81 100644 --- a/deepmd/loss/loss.py +++ b/deepmd/loss/loss.py @@ -8,6 +8,8 @@ Tuple, ) +import numpy as np + from deepmd.env import ( tf, ) @@ -72,3 +74,20 @@ def eval( A dictionary that maps keys to values. It should contain key `natoms` """ + + @staticmethod + def display_if_exist(loss: tf.Tensor, find_property: float) -> tf.Tensor: + """Display NaN if labeled property is not found. + + Parameters + ---------- + loss : tf.Tensor + the loss tensor + find_property : float + whether the property is found + """ + return tf.cond( + tf.cast(find_property, tf.bool), + lambda: loss, + lambda: tf.cast(np.nan, dtype=loss.dtype), + ) diff --git a/deepmd/loss/tensor.py b/deepmd/loss/tensor.py index 74eb2b74dc..a40f95a18e 100644 --- a/deepmd/loss/tensor.py +++ b/deepmd/loss/tensor.py @@ -87,7 +87,7 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): local_loss = global_cvt_2_tf_float(find_atomic) * tf.reduce_mean( tf.square(self.scale * (polar - atomic_polar_hat)), name="l2_" + suffix ) - more_loss["local_loss"] = local_loss + more_loss["local_loss"] = self.display_if_exist(local_loss, find_atomic) l2_loss += self.local_weight * local_loss self.l2_loss_local_summary = tf.summary.scalar( "l2_local_loss_" + suffix, tf.sqrt(more_loss["local_loss"]) @@ -118,7 +118,7 @@ def build(self, learning_rate, natoms, model_dict, label_dict, suffix): tf.square(self.scale * (global_polar - polar_hat)), name="l2_" + suffix ) - more_loss["global_loss"] = global_loss + more_loss["global_loss"] = self.display_if_exist(global_loss, find_global) self.l2_loss_global_summary = tf.summary.scalar( "l2_global_loss_" + suffix, tf.sqrt(more_loss["global_loss"]) / global_cvt_2_tf_float(atoms), From 18c2ed15401d165eecddd96d84371daa1d30ed34 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 11 Dec 2023 17:44:53 -0500 Subject: [PATCH 2/2] add comment for nan Signed-off-by: Jinzhe Zeng --- deepmd/train/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index bbcb305404..3b81740a93 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -943,6 +943,7 @@ def print_header(fp, train_results, valid_results, multi_task_mode=False): for k in train_results[fitting_key].keys(): print_str += prop_fmt % (k + "_trn") print_str += " %8s\n" % (fitting_key + "_lr") + print_str += "# If there is no available reference data, rmse_*_{val,trn} will print nan\n" fp.write(print_str) fp.flush()