add GaussianNLLLoss (ORNL#270)

RylieWeaver · Sep 25, 2024 · 8efc696 · 8efc696
1 parent 881ad51
commit 8efc696
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 17 deletions.
diff --git a/hydragnn/models/Base.py b/hydragnn/models/Base.py
@@ -69,6 +69,12 @@ def __init__(
             activation_function_type
         )
 
+        # output variance for Gaussian negative log likelihood loss
+        self.var_output = 0
+        if loss_function_type == "GaussianNLLLoss":
+            self.var_output = 1
+        self.loss_function_type = loss_function_type
+
         self.loss_function = loss_function_selection(loss_function_type)
         self.ilossweights_nll = ilossweights_nll
         self.ilossweights_hyperp = ilossweights_hyperp
@@ -193,14 +199,21 @@ def _init_node_conv(self):
             if "last_layer" in inspect.signature(self.get_conv).parameters:
                 self.convs_node_output.append(
                     self.get_conv(
-                        self.hidden_dim_node[-1], self.head_dims[ihead], last_layer=True
+                        self.hidden_dim_node[-1],
+                        self.head_dims[ihead] * (1 + self.var_output),
+                        last_layer=True,
                     )
                 )
             else:
                 self.convs_node_output.append(
-                    self.get_conv(self.hidden_dim_node[-1], self.head_dims[ihead])
+                    self.get_conv(
+                        self.hidden_dim_node[-1],
+                        self.head_dims[ihead] * (1 + self.var_output),
+                    )
                 )
-            self.batch_norms_node_output.append(BatchNorm(self.head_dims[ihead]))
+            self.batch_norms_node_output.append(
+                BatchNorm(self.head_dims[ihead] * (1 + self.var_output))
+            )
 
     def _multihead(self):
         ############multiple heads/taks################
@@ -238,7 +251,7 @@ def _multihead(self):
                 denselayers.append(
                     Linear(
                         dim_head_hidden[-1],
-                        self.head_dims[ihead] + self.ilossweights_nll * 1,
+                        self.head_dims[ihead] * (1 + self.var_output),
                     )
                 )
                 head_NN = Sequential(*denselayers)
@@ -253,7 +266,7 @@ def _multihead(self):
                     # """if different graphs in the dataset have different size, one MLP is shared across all nodes """
                     head_NN = MLPNode(
                         self.hidden_dim,
-                        self.head_dims[ihead],
+                        self.head_dims[ihead] * (1 + self.var_output),
                         self.num_mlp,
                         self.hidden_dim_node,
                         self.config_heads["node"]["type"],
@@ -308,12 +321,15 @@ def forward(self, data):
         else:
             x_graph = global_mean_pool(x, data.batch.to(x.device))
         outputs = []
+        outputs_var = []
         for head_dim, headloc, type_head in zip(
             self.head_dims, self.heads_NN, self.head_type
         ):
             if type_head == "graph":
                 x_graph_head = self.graph_shared(x_graph)
-                outputs.append(headloc(x_graph_head))
+                output_head = headloc(x_graph_head)
+                outputs.append(output_head[:, :head_dim])
+                outputs_var.append(output_head[:, head_dim:] ** 2)
             else:
                 if self.node_NN_type == "conv":
                     for conv, batch_norm in zip(headloc[0::2], headloc[1::2]):
@@ -323,16 +339,23 @@ def forward(self, data):
                     x_node = x
                 else:
                     x_node = headloc(x=x, batch=data.batch)
-                outputs.append(x_node)
+                outputs.append(x_node[:, :head_dim])
+                outputs_var.append(x_node[:, head_dim:] ** 2)
+        if self.var_output:
+            return outputs, outputs_var
         return outputs
 
     def loss(self, pred, value, head_index):
+        var = None
+        if self.var_output:
+            var = pred[1]
+            pred = pred[0]
         if self.ilossweights_nll == 1:
-            return self.loss_nll(pred, value, head_index)
+            return self.loss_nll(pred, value, head_index, var=var)
         elif self.ilossweights_hyperp == 1:
-            return self.loss_hpweighted(pred, value, head_index)
+            return self.loss_hpweighted(pred, value, head_index, var=var)
 
-    def loss_nll(self, pred, value, head_index):
+    def loss_nll(self, pred, value, head_index, var=None):
         # negative log likelihood loss
         # uncertainty to weigh losses in https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
         # fixme: Pei said that right now this is never used
@@ -353,7 +376,7 @@ def loss_nll(self, pred, value, head_index):
 
         return nll_loss, tasks_mseloss, []
 
-    def loss_hpweighted(self, pred, value, head_index):
+    def loss_hpweighted(self, pred, value, head_index, var=None):
         # weights for different tasks as hyper-parameters
         tot_loss = 0
         tasks_loss = []
@@ -364,11 +387,21 @@ def loss_hpweighted(self, pred, value, head_index):
             value_shape = head_val.shape
             if pred_shape != value_shape:
                 head_val = torch.reshape(head_val, pred_shape)
-
-            tot_loss += (
-                self.loss_function(head_pre, head_val) * self.loss_weights[ihead]
-            )
-            tasks_loss.append(self.loss_function(head_pre, head_val))
+            if var is None:
+                assert (
+                    self.loss_function_type != "GaussianNLLLoss"
+                ), "Expecting var for GaussianNLLLoss, but got None"
+                tot_loss += (
+                    self.loss_function(head_pre, head_val) * self.loss_weights[ihead]
+                )
+                tasks_loss.append(self.loss_function(head_pre, head_val))
+            else:
+                head_var = var[ihead]
+                tot_loss += (
+                    self.loss_function(head_pre, head_val, head_var)
+                    * self.loss_weights[ihead]
+                )
+                tasks_loss.append(self.loss_function(head_pre, head_val, head_var))
 
         return tot_loss, tasks_loss
 

diff --git a/hydragnn/train/train_validate_test.py b/hydragnn/train/train_validate_test.py
@@ -616,6 +616,8 @@ def test(loader, model, verbosity, reduce_ranks=True, return_samples=True):
         error, tasks_loss = model.module.loss(pred, data.y, head_index)
         ## FIXME: temporary
         if int(os.getenv("HYDRAGNN_DUMP_TESTDATA", "0")) == 1:
+            if model.module.var_output:
+                pred = pred[0]
             offset = 0
             for i in range(len(data)):
                 n = len(data[i].pos)
@@ -674,6 +676,8 @@ def test(loader, model, verbosity, reduce_ranks=True, return_samples=True):
             data = data.to(get_device())
             ytrue = data.y
             pred = model(data)
+            if model.module.var_output:
+                pred = pred[0]
             for ihead in range(model.module.num_heads):
                 head_pre = pred[ihead].reshape(-1, 1)
                 head_val = ytrue[head_index[ihead]]

diff --git a/hydragnn/utils/model.py b/hydragnn/utils/model.py
@@ -55,6 +55,10 @@ def loss_function_selection(loss_function_string: str):
         return torch.nn.SmoothL1Loss
     elif loss_function_string == "rmse":
         return lambda x, y: torch.sqrt(torch.nn.functional.mse_loss(x, y))
+    elif loss_function_string == "GaussianNLLLoss":
+        return torch.nn.GaussianNLLLoss()
+    else:
+        ImportError
 
 
 def save_model(model, optimizer, name, path="./logs/", use_deepspeed=False):

diff --git a/tests/test_loss_and_activation_functions.py b/tests/test_loss_and_activation_functions.py
@@ -101,8 +101,12 @@ def unittest_loss_and_activation_functions(
 
 
 # Test all supported loss function types. Separate input file because only 2 steps are run.
-@pytest.mark.parametrize("loss_function_type", ["mse", "mae", "rmse"])
+@pytest.mark.parametrize(
+    "loss_function_type", ["mse", "mae", "rmse", "GaussianNLLLoss"]
+)
 def pytest_loss_functions(loss_function_type, ci_input="ci.json", overwrite_data=False):
+    if loss_function_type == "GaussianNLLLoss":
+        ci_input = "ci_multihead.json"
     unittest_loss_and_activation_functions(
         "relu", loss_function_type, ci_input, overwrite_data
     )