Basic Formatting and linting

ZanMervic · Jun 28, 2024 · 907f828 · 907f828
1 parent 39d097d
commit 907f828
Show file tree

Hide file tree

Showing 17 changed files with 761 additions and 294 deletions.
diff --git a/orangecontrib/fairness/evaluation/scoring.py b/orangecontrib/fairness/evaluation/scoring.py
@@ -1,3 +1,13 @@
+"""
+This module contains classes for computing fairness scores.
+
+Classes:
+- StatisticalParityDifference
+- EqualOpportunityDifference
+- AverageOddsDifference
+- DisparateImpact
+"""
+
 from abc import abstractmethod
 from Orange.data import DiscreteVariable, ContinuousVariable, Domain
 from Orange.evaluation.scoring import Score
@@ -19,7 +29,12 @@
 
 
 class FairnessScorer(Score, abstract=True):
- """Abstract class which will allow fairness scores to be calculated and displayed in certain widgets"""
+ """
+ Abstract class for computing fairness scores.
+
+ Abstract class which will allow fairness scores to be calculated and displayed.
+ Subclasses need to implement the metric method which will return the fairness score.
+ """
 
  class_types = (
  DiscreteVariable,
@@ -28,17 +43,28 @@ class FairnessScorer(Score, abstract=True):
 
  @staticmethod
  def is_compatible(domain: Domain) -> bool:
- """Checks if the scorer is compatible with the domain of the data. If not the scores will not be computed."""
+ """
+ Checks if the scorer is compatible with the domain of the data.
+ If not the scores will not be computed.
+
+ Args:
+ domain (Domain): The domain of the data.
+ """
  return contains_fairness_attributes(domain)
 
  def compute_score(self, results):
- """Method that creates a ClassificationMetric object used to compute fairness scores"""
+ """
+ Creates a ClassificationMetric object used to compute fairness scores
+
+ Args:
+ results (Results): The results of the model.
+ """
 
  dataset, privileged_groups, unprivileged_groups = table_to_standard_dataset(
  results.data
  )
 
- # We need to subset the created dataset so that it will match the shape/order 
+ # We need to subset the created dataset so that it will match the shape/order
  # This is needed when/if some of the rows in the data were used multiple times
  dataset = dataset.subset(results.row_indices)
  dataset_pred = dataset.copy()
@@ -54,12 +80,22 @@ def compute_score(self, results):
 
  @abstractmethod
  def metric(self, classification_metric):
- """Method that needs to be implemented by the subclasses of the FairnessScorer."""
+ """
+ Abstract method that needs to be implemented by subclasses.
+
+ It should return the fairness score.
+
+ Args:
+ classification_metric (ClassificationMetric):
+ The ClassificationMetric object used to compute fairness scores.
+ """
  pass
 
 
 class StatisticalParityDifference(FairnessScorer):
- """Class for Statistical Parity Difference fairness scoring."""
+ """
+ A class for computing the Statistical Parity Difference fairness score.
+ """
 
  name = "SPD"
  long_name = str(
@@ -76,7 +112,9 @@ def metric(self, classification_metric):
 
 
 class EqualOpportunityDifference(FairnessScorer):
- """Class for Equal Opportunity Difference fairness scoring."""
+ """
+ A class for computing the Equal Opportunity Difference fairness score.
+ """
 
  name = "EOD"
  long_name = str(
@@ -94,7 +132,9 @@ def metric(self, classification_metric):
 
 
 class AverageOddsDifference(FairnessScorer):
- """Class for Average Odds Difference fairness scoring."""
+ """
+ A class for computing the Average Odds Difference fairness score.
+ """
 
  name = "AOD"
  long_name = str(
@@ -114,7 +154,9 @@ def metric(self, classification_metric):
 
 
 class DisparateImpact(FairnessScorer):
- """Class for Disparate Impact fairness scoring."""
+ """
+ A class for computing the Disparate Impact fairness score.
+ """
 
  name = "DI"
  long_name = str(
@@ -130,8 +172,8 @@ class DisparateImpact(FairnessScorer):
  )
 
  # TODO: When using randomize, models sometimes predict the same class for all instances
- # This can lead to division by zero in the Disparate Impact score (and untrue results for the other scores)
+ # This can lead to division by zero in the Disparate Impact score
+ # (and untrue results for the other scores)
  # What is the best way to handle this?
  def metric(self, classification_metric):
  return classification_metric.disparate_impact()
-
diff --git a/orangecontrib/fairness/modeling/adversarial.py b/orangecontrib/fairness/modeling/adversarial.py
@@ -1,3 +1,8 @@
+"""
+This module contains the AdversarialDebiasingLearner and AdversarialDebiasingModel classes 
+which are used to create and fit the AdversarialDebiasing model from the aif360 library.
+"""
+
 import numpy as np
 
 from Orange.base import Learner, Model
@@ -19,25 +24,32 @@
  tf = None
 
 
-
 # This gets called after the model is created and fitted
 # It is stored so we can use it to predict on new data
 class AdversarialDebiasingModel(Model):
- """Model created and fitted by the AdversarialDebiasingLearner, which is used to predict on new data"""
+ """
+ Model created and fitted by the AdversarialDebiasingLearner, used to predict on new data.
+ """
 
  def __init__(self, model):
  super().__init__()
  self._model = model
 
  def predict(self, data):
- """Function used to predict on new data"""
+ """
+ Method used to 'preprocess', predict on new data and 'postprocess' the predictions.
+
+ Args:
+ data (Table): The data to predict on.
+ """
  if isinstance(data, Table):
  standard_dataset, _, _ = table_to_standard_dataset(data)
  predictions = self._model.predict(standard_dataset)
 
  # Array of scores with a column of scores for each class
  # The scores given by the model are always for the favorable class
- # If the favorable class is 1 then the scores need to be flipped or else the AUC will be "reversed"
+ # If the favorable class is 1 then the scores need to be flipped or
+ # else the AUC will be "reversed"
  # (the first column is 1 - scores and the second column is scores)
  if standard_dataset.favorable_label == 0:
  scores = np.hstack(
@@ -62,18 +74,41 @@ def predict_storage(self, data):
  def __call__(self, data, ret=Model.Value):
  return super().__call__(data, ret)
 
+
 if is_tensorflow_installed():
+
  class AdversarialDebiasingLearner(Learner):
- """Learner subclass used to create and fit the AdversarialDebiasingModel"""
+ """
+ Learner subclass used to create and fit the AdversarialDebiasingModel
+
+ Attributes:
+ preprocessors (list): List of preprocessors, applied when __call__ function is called
+ callback (function): Callback function used to track the progress of the model fitting
+
+ Args:
+ preprocessors (list): List of preprocessors to apply to the data before fitting a model
+ classifier_num_hidden_units (int): Number of hidden units in the classifier
+ num_epochs (int): Number of epochs to train the model
+ batch_size (int): Batch size used to train the model
+ debias (bool): Whether to debias the model
+ adversary_loss_weight (float): Weight of the adversary loss
+ seed (int): Seed used to initialize the model
+ """
 
  __returns__ = AdversarialDebiasingModel
- # List of preprocessors, these get applied when the __call__ function is called
  preprocessors = [Normalize()]
  callback = None
 
- def __init__(self, preprocessors=None, classifier_num_hidden_units=100, 
- num_epochs=50, batch_size=128, debias=True, 
- adversary_loss_weight=0.1, seed=-1):
+ def __init__(
+ self,
+ preprocessors=None,
+ classifier_num_hidden_units=100,
+ num_epochs=50,
+ batch_size=128,
+ debias=True,
+ adversary_loss_weight=0.1,
+ seed=-1,
+ ):
  super().__init__(preprocessors=preprocessors)
  self.params = vars()
 
@@ -83,12 +118,15 @@ def __init__(self, preprocessors=None, classifier_num_hidden_units=100,
  "batch_size": batch_size,
  "debias": debias,
  "adversary_loss_weight": adversary_loss_weight,
- **({"seed": seed} if seed != -1 else {})
+ **({"seed": seed} if seed != -1 else {}),
  }
 
  def _calculate_total_runs(self, data):
- """Function used to calculate the total number of runs the learner will perform on the data"""
- # This is need to calculate and display the progress of the training
+ """
+ Method for calculating the total number of runs the learner will perform on the data
+
+ Used to calculate and display the progress of the training.
+ """
  num_epochs = self.params["num_epochs"]
  batch_size = self.params["batch_size"]
  num_instances = len(data)
@@ -97,7 +135,11 @@ def _calculate_total_runs(self, data):
  return total_runs
 
  def incompatibility_reason(self, domain):
- """Function used to check if the domain is compatible with the learner (contains fairness attributes)"""
+ """
+ Method used to check if the domain is compatible with the learner.
+
+ The domain is compatible if it contains the fairness attributes.
+ """
  if not contains_fairness_attributes(domain):
  return MISSING_FAIRNESS_ATTRIBUTES
 
@@ -142,26 +184,46 @@ def fit(self, data: Table) -> AdversarialDebiasingModel:
  return AdversarialDebiasingModel(model=model)
 
  def __call__(self, data, progress_callback=None):
- """Call method for AdversarialDebiasingLearner, in the superclass it calls the _fit_model function (and other things)"""
+ """
+ Call method for AdversarialDebiasingLearner
+
+ In the superclass it calls the _fit_model function (and other things)
+ """
  self.callback = progress_callback
  model = super().__call__(data, progress_callback)
  model.params = self.params
  return model
- 
+
  class CallbackSession(tf.Session):
- """Subclass of tensorflow session with callback functionality for progress tracking and displaying"""
+ """
+ Subclass of tensorflow session.
+
+ It adds callback functionality for progress tracking and displaying.
 
- def __init__(self, target="", graph=None, config=None, callback=None, total_runs=0):
+ Attributes:
+ callback (function): Callback function used to track the progress of the model fitting
+ run_count (int): Number of times the run function has been called
+ callback_enabled (bool): Flag to enable or disable the callback function
+ total_runs (int): Total number of runs the session will perform
+ """
+
+ def __init__(
+ self, target="", graph=None, config=None, callback=None, total_runs=0
+ ):
  super().__init__(target=target, graph=graph, config=config)
  self.callback = callback
  self.run_count = 0
  self.callback_enabled = False
  self.total_runs = total_runs
 
  def run(self, fetches, feed_dict=None, options=None, run_metadata=None):
- """A overridden run function which calls the callback function and calculates the progress"""
- # To calculate the progress using these ways we need to know the number of expected
- # calls to the callback function and count how many times it has been called
+ """
+ A overridden run function which calls the callback function and calculates the progress
+
+ To calculate the progress using these ways we need to know the number of expected
+ calls to the callback function and count how many times it has been called.
+ """
+
  self.run_count += 1
  progress = (self.run_count / self.total_runs) * 100
  if self.callback_enabled and self.callback:
@@ -179,9 +241,9 @@ def disable_callback(self):
  """Disable callback method for the model prediction fase"""
  self.callback_enabled = False
 
-
 else:
+
  class AdversarialDebiasingLearner(Learner):
  """Dummy class used if tensorflow is not installed"""
+
  __returns__ = Model
-