Libr-AI · HanXudong · May 25, 2022 · May 25, 2022 · May 25, 2022 · May 25, 2022
diff --git a/fairlib/src/base_options.py b/fairlib/src/base_options.py
@@ -175,6 +175,8 @@ def __getattr__(self, name):
                             help='number of total epochs to train (default: 100)')
         parser.add_argument('--lr', type=pos_float, default=0.003, metavar='LR',
                             help='learning rate used to actually learn stuff (default: 0.003)')
+        parser.add_argument('--weight_decay', type=float, default=0.0,
+                            help='weight decay (L2 penalty) (default: 0)')
         parser.add_argument('--epochs_since_improvement', type=pos_int, default=5,
                             help='terminate training for early stopping')
         parser.add_argument('--base_seed', type=int, default=1, metavar='S',
@@ -203,6 +205,12 @@ def __getattr__(self, name):
         parser.add_argument('--conf_file', type=str, default=None,
                             help='path to the YAML file for reproduce an an experiment')
 
+        # Regression related arguments
+        parser.add_argument('--regression',  action='store_true', default=False, 
+                            help='indicate the downstream task is regression')
+        parser.add_argument('--n_bins',  type=int, default=4, 
+                            help='number of bins for discretizing proxy labels')
+
         # Handle iPython arguments
         parser.add_argument('--f', type=str, default=None, help='path to the YAML file for reproduce an an experiment')
 
@@ -466,6 +474,11 @@ def set_state(self, state, dummy=False, silence=False):
             np.random.seed(seed)
             random.seed(seed)
 
+            # Init for regression
+            if state.regression:
+                # Set the output dim to 1
+                state.num_classes = 1
+
             # Init the dataloaders
             if state.data_dir is None:
                 state.data_dir = dataloaders.default_dataset_roots[state.dataset]

diff --git a/fairlib/src/dataloaders/__init__.py b/fairlib/src/dataloaders/__init__.py
@@ -33,7 +33,12 @@ def get_dataloaders(args):
     Returns:
         tuple: dataloaders for training set, development set, and test set.
     """
-    assert args.dataset in ["Sample", "test", "Moji", "Bios_gender", "Bios_economy", "Bios_both"], "Not implemented"
+    assert args.dataset in [
+        "Sample", "test", "Moji", 
+        "Bios_gender", "Bios_economy", "Bios_both",
+        "Valence",
+        ], "Not implemented"
+
     if args.dataset == "Moji":
         task_dataloader = DeepMojiDataset
     elif args.dataset in ["Bios_gender", "Bios_economy", "Bios_both"]:
@@ -43,6 +48,8 @@ def get_dataloaders(args):
         task_dataloader = TestDataset
     elif args.dataset == "Sample":
         task_dataloader = SampleDataset
+    elif args.dataset == "Valence":
+        task_dataloader = ValenceDataset
     else:
         pass
 

diff --git a/fairlib/src/dataloaders/loaders.py b/fairlib/src/dataloaders/loaders.py
@@ -139,4 +139,23 @@ def load_data(self):
         elif self.args.protected_task == "economy":
             self.protected_label = data["economy_class"].astype(np.int32) # Economy
         else:
-            self.protected_label = data["intersection_class"].astype(np.int32) # Intersection
+            self.protected_label = data["intersection_class"].astype(np.int32) # Intersection
+
+class ValenceDataset(BaseDataset):
+    embedding_type = "cls"
+    CV_fold = 0
+    text_type = "text"
+    def load_data(self):
+        self.filename = "valence_arousal_{}_{}.pickle".format(self.split, self.CV_fold)
+
+        data = pd.read_pickle(Path(self.args.data_dir) / self.filename)
+
+        if self.args.encoder_architecture == "Fixed":
+            self.X = list(data[self.embedding_type])
+        elif self.args.encoder_architecture == "BERT":
+            self.X = self.args.text_encoder.encoder(list(data[self.text_type]))
+        else:
+            raise NotImplementedError
+
+        self.y = data["valence"].astype(np.float64)
+        self.protected_label = data["gender"].astype(np.int32)
diff --git a/fairlib/src/dataloaders/utils.py b/fairlib/src/dataloaders/utils.py
@@ -1,5 +1,6 @@
 import numpy as np
 import torch
+import pandas as pd
 from .BT import get_weights, get_sampled_indices
 from .generalized_BT import get_data_distribution, manipulate_data_distribution
 
@@ -28,8 +29,11 @@ def __init__(self, args, split):
         self.protected_label = []
         self.instance_weights = []
         self.adv_instance_weights = []
+        self.regression_label = []
 
         self.load_data()
+
+        self.regression_init()
 
         self.X = np.array(self.X)
         if len(self.X.shape) == 3:
@@ -51,7 +55,7 @@ def __len__(self):
 
     def __getitem__(self, index):
         'Generates one sample of data'
-        return self.X[index], self.y[index], self.protected_label[index], self.instance_weights[index], self.adv_instance_weights[index]
+        return self.X[index], self.y[index], self.protected_label[index], self.instance_weights[index], self.adv_instance_weights[index], self.regression_label[index]
 
     def load_data(self):
         pass
@@ -129,4 +133,18 @@ def adv_balanced_training(self):
                 self.adv_instance_weights = get_weights(self.args.adv_BTObj, self.y, self.protected_label)
             else:
                 raise NotImplementedError
-        return None
+        return None
+
+    def regression_init(self):
+        if not self.args.regression:
+            self.regression_label = np.array([0 for _ in range(len(self.protected_label))])
+        else:
+            # Discretize variable into equal-sized buckets
+            if self.split == "train":
+                bin_labels, bins = pd.qcut(self.y, q=self.args.n_bins, labels=False, duplicates = "drop", retbins = True)
+                self.args.regression_bins = bins
+            else:
+                bin_labels = pd.cut(self.y, bins=self.args.regression_bins, labels=False, duplicates = "drop")
+
+            # Reassign labels
+            self.regression_label, self.y = np.array(self.y), bin_labels
diff --git a/fairlib/src/evaluators/__init__.py b/fairlib/src/evaluators/__init__.py
@@ -26,12 +26,16 @@ def present_evaluation_scores(
     valid_scores, valid_confusion_matrices = gap_eval_scores(
         y_pred=valid_preds,
         y_true=valid_labels, 
-        protected_attribute=valid_private_labels)
+        protected_attribute=valid_private_labels,
+        args = model.args,
+        )
 
     test_scores, test_confusion_matrices = gap_eval_scores(
         y_pred=test_preds,
         y_true=test_labels, 
-        protected_attribute=test_private_labels)
+        protected_attribute=test_private_labels,
+        args = model.args,
+        )
 
     # Save checkpoint
     save_checkpoint(

diff --git a/fairlib/src/evaluators/evaluator.py b/fairlib/src/evaluators/evaluator.py
@@ -12,6 +12,9 @@
 from sklearn.metrics import recall_score
 from sklearn.metrics import accuracy_score
 from sklearn.metrics import confusion_matrix
+from sklearn.metrics import mean_absolute_error
+from sklearn.metrics import mean_squared_error
+from sklearn.metrics import r2_score
 import numpy as np
 
 from itertools import combinations
@@ -120,7 +123,7 @@ def Aggregation_GAP(distinct_groups, all_scores, metric="TPR", group_agg_power =
 
     return score_gaps
 
-def gap_eval_scores(y_pred, y_true, protected_attribute, metrics=["TPR","FPR","PPR"]):
+def gap_eval_scores(y_pred, y_true, protected_attribute, metrics=["TPR","FPR","PPR"], args = None):
     """fairness evaluation
 
     Args:
@@ -136,6 +139,26 @@ def gap_eval_scores(y_pred, y_true, protected_attribute, metrics=["TPR","FPR","P
     y_true = np.array(y_true)
     protected_attribute = np.array(protected_attribute)
 
+    if (args is not None) and args.regression:
+        eval_scores = {
+            "mean_absolute_error" : mean_absolute_error(y_true, y_pred),
+            "mean_squared_error" : mean_squared_error(y_true, y_pred),
+            "r2_score" : r2_score(y_true, y_pred),
+        }
+        # Processing regression labels for fairness evaluation under the classification framework
+        y_true = pd.cut(np.squeeze(y_true), bins=args.regression_bins, labels=False, duplicates = "drop")
+        y_pred = pd.cut(np.squeeze(y_pred), bins=args.regression_bins, labels=False, duplicates = "drop")
+        y_true = np.nan_to_num(y_true, nan=0)
+        y_pred = np.nan_to_num(y_pred, nan=0)
+
+    else:
+        # performance evaluation
+        eval_scores = {
+            "accuracy" : accuracy_score(y_true, y_pred),
+            "macro_fscore" : f1_score(y_true, y_pred, average="macro"),
+            "micro_fscore" : f1_score(y_true, y_pred, average="micro"),
+        }
+
     all_scores = {}
     confusion_matrices = {}
     # Overall evaluation
@@ -152,12 +175,6 @@ def gap_eval_scores(y_pred, y_true, protected_attribute, metrics=["TPR","FPR","P
         confusion_matrices[gid] = group_confusion_matrix
         all_scores[gid] = confusion_matrix_based_scores(group_confusion_matrix)
 
-    eval_scores = {
-        "accuracy" : accuracy_score(y_true, y_pred),
-        "macro_fscore" : f1_score(y_true, y_pred, average="macro"),
-        "micro_fscore" : f1_score(y_true, y_pred, average="micro"),
-    }
-
     for _metric in metrics:
         eval_scores["{}_GAP".format(_metric)] = Aggregation_GAP(distinct_groups=distinct_groups, all_scores=all_scores, metric=_metric)
 

diff --git a/fairlib/src/networks/DyBT/fairbatch_sampler.py b/fairlib/src/networks/DyBT/fairbatch_sampler.py
@@ -133,7 +133,10 @@ def epoch_loss(self):
 
         self.model.eval()
 
-        criterion = torch.nn.CrossEntropyLoss(reduction='none')
+        if self.args.regression:
+            criterion = torch.nn.MSELoss(reduction='none')
+        else:
+            criterion = torch.nn.CrossEntropyLoss(reduction='none')
 
         batch_losses = []
 
@@ -151,12 +154,18 @@ def epoch_loss(self):
                 instance_weights = batch[3].float()
                 instance_weights = instance_weights.to(device)
 
+            if self.args.regression:
+                regression_tags = batch[5].squeeze()
+                regression_tags = regression_tags.to(device)
+
             # main model predictions
             if self.args.gated:
                 predictions = self.model(text, p_tags)
             else:
                 predictions = self.model(text)
 
+            predictions = predictions if not self.args.regression else predictions.squeeze()
+
             # add the weighted loss
             if self.args.BT is not None and self.args.BT == "Reweighting":
                 loss = criterion(predictions, tags)

diff --git a/fairlib/src/networks/utils.py b/fairlib/src/networks/utils.py
@@ -5,6 +5,7 @@
 import time
 from pathlib import Path
 from ..evaluators import print_network, present_evaluation_scores
+import pandas as pd
 
 # train the main model with adv loss
 def train_epoch(model, iterator, args, epoch):
@@ -27,6 +28,10 @@ def train_epoch(model, iterator, args, epoch):
         if args.BT is not None and args.BT == "Reweighting":
             instance_weights = batch[3].float()
             instance_weights = instance_weights.to(args.device)
+
+        if args.regression:
+            regression_tags = batch[5].float().squeeze()
+            regression_tags = regression_tags.to(args.device)
 
         text = text.to(args.device)
         tags = tags.to(args.device)
@@ -41,13 +46,16 @@ def train_epoch(model, iterator, args, epoch):
             predictions = model(text, p_tags)
         else:
             predictions = model(text)
+
+        predictions = predictions if not args.regression else predictions.squeeze()
+
         # main tasks loss
         # add the weighted loss
         if args.BT is not None and args.BT == "Reweighting":
-            loss = criterion(predictions, tags)
+            loss = criterion(predictions, tags if not args.regression else regression_tags)
             loss = torch.mean(loss * instance_weights)
         else:
-            loss = criterion(predictions, tags)
+            loss = criterion(predictions, tags if not args.regression else regression_tags)
 
         if args.adv_debiasing:
             # Update discriminator if needed
@@ -151,25 +159,35 @@ def eval_epoch(model, iterator, args):
             instance_weights = batch[3].float()
             instance_weights = instance_weights.to(device)
 
+        if args.regression:
+            regression_tags = batch[5].squeeze()
+            regression_tags = regression_tags.to(args.device)
+
         # main model predictions
         if args.gated:
             predictions = model(text, p_tags)
         else:
             predictions = model(text)
-
+
+        predictions = predictions if not args.regression else predictions.squeeze()
+
         # add the weighted loss
         if args.BT is not None and args.BT == "Reweighting":
-            loss = criterion(predictions, tags)
+            loss = criterion(predictions, tags if not args.regression else regression_tags)
             loss = torch.mean(loss * instance_weights)
         else:
-            loss = criterion(predictions, tags)
+            loss = criterion(predictions, tags if not args.regression else regression_tags)
 
         epoch_loss += loss.item()
 
         predictions = predictions.detach().cpu()
-        tags = tags.cpu().numpy()
 
-        preds += list(torch.argmax(predictions, axis=1).numpy())
+        if args.regression:
+            preds += list(predictions.numpy())
+            tags = regression_tags.cpu().numpy()
+        else:
+            tags = tags.cpu().numpy()
+            preds += list(torch.argmax(predictions, axis=1).numpy())
         labels += list(tags)
 
         private_labels += list(batch[2].cpu().numpy())
@@ -184,12 +202,21 @@ def init_for_training(self):
         self.to(self.device)
 
         self.learning_rate = self.args.lr
-        self.optimizer = Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=self.learning_rate)
+        self.optimizer = Adam(
+            filter(lambda p: p.requires_grad, self.parameters()), 
+            lr=self.learning_rate,
+            weight_decay = self.args.weight_decay,
+            )
 
         if self.args.BT and self.args.BT == "Reweighting":
-            self.criterion = torch.nn.CrossEntropyLoss(reduction = "none")
+            reduction = "none"
+        else:
+            reduction = "mean"
+
+        if self.args.regression:
+            self.criterion = torch.nn.MSELoss(reduction = reduction)
         else:
-            self.criterion = torch.nn.CrossEntropyLoss()
+            self.criterion = torch.nn.CrossEntropyLoss(reduction = reduction)
 
         print_network(self, verbose=True)