Skip to content

Commit

Permalink
Basic Formatting and linting
Browse files Browse the repository at this point in the history
  • Loading branch information
ZanMervic committed Jun 28, 2024
1 parent 39d097d commit 907f828
Show file tree
Hide file tree
Showing 17 changed files with 761 additions and 294 deletions.
64 changes: 53 additions & 11 deletions orangecontrib/fairness/evaluation/scoring.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
"""
This module contains classes for computing fairness scores.
Classes:
- StatisticalParityDifference
- EqualOpportunityDifference
- AverageOddsDifference
- DisparateImpact
"""

from abc import abstractmethod
from Orange.data import DiscreteVariable, ContinuousVariable, Domain
from Orange.evaluation.scoring import Score
Expand All @@ -19,7 +29,12 @@


class FairnessScorer(Score, abstract=True):
"""Abstract class which will allow fairness scores to be calculated and displayed in certain widgets"""
"""
Abstract class for computing fairness scores.
Abstract class which will allow fairness scores to be calculated and displayed.
Subclasses need to implement the metric method which will return the fairness score.
"""

class_types = (
DiscreteVariable,
Expand All @@ -28,17 +43,28 @@ class FairnessScorer(Score, abstract=True):

@staticmethod
def is_compatible(domain: Domain) -> bool:
"""Checks if the scorer is compatible with the domain of the data. If not the scores will not be computed."""
"""
Checks if the scorer is compatible with the domain of the data.
If not the scores will not be computed.
Args:
domain (Domain): The domain of the data.
"""
return contains_fairness_attributes(domain)

def compute_score(self, results):
"""Method that creates a ClassificationMetric object used to compute fairness scores"""
"""
Creates a ClassificationMetric object used to compute fairness scores
Args:
results (Results): The results of the model.
"""

dataset, privileged_groups, unprivileged_groups = table_to_standard_dataset(
results.data
)

# We need to subset the created dataset so that it will match the shape/order
# We need to subset the created dataset so that it will match the shape/order
# This is needed when/if some of the rows in the data were used multiple times
dataset = dataset.subset(results.row_indices)
dataset_pred = dataset.copy()
Expand All @@ -54,12 +80,22 @@ def compute_score(self, results):

@abstractmethod
def metric(self, classification_metric):
"""Method that needs to be implemented by the subclasses of the FairnessScorer."""
"""
Abstract method that needs to be implemented by subclasses.
It should return the fairness score.
Args:
classification_metric (ClassificationMetric):
The ClassificationMetric object used to compute fairness scores.
"""
pass


class StatisticalParityDifference(FairnessScorer):
"""Class for Statistical Parity Difference fairness scoring."""
"""
A class for computing the Statistical Parity Difference fairness score.
"""

name = "SPD"
long_name = str(
Expand All @@ -76,7 +112,9 @@ def metric(self, classification_metric):


class EqualOpportunityDifference(FairnessScorer):
"""Class for Equal Opportunity Difference fairness scoring."""
"""
A class for computing the Equal Opportunity Difference fairness score.
"""

name = "EOD"
long_name = str(
Expand All @@ -94,7 +132,9 @@ def metric(self, classification_metric):


class AverageOddsDifference(FairnessScorer):
"""Class for Average Odds Difference fairness scoring."""
"""
A class for computing the Average Odds Difference fairness score.
"""

name = "AOD"
long_name = str(
Expand All @@ -114,7 +154,9 @@ def metric(self, classification_metric):


class DisparateImpact(FairnessScorer):
"""Class for Disparate Impact fairness scoring."""
"""
A class for computing the Disparate Impact fairness score.
"""

name = "DI"
long_name = str(
Expand All @@ -130,8 +172,8 @@ class DisparateImpact(FairnessScorer):
)

# TODO: When using randomize, models sometimes predict the same class for all instances
# This can lead to division by zero in the Disparate Impact score (and untrue results for the other scores)
# This can lead to division by zero in the Disparate Impact score
# (and untrue results for the other scores)
# What is the best way to handle this?
def metric(self, classification_metric):
return classification_metric.disparate_impact()

106 changes: 84 additions & 22 deletions orangecontrib/fairness/modeling/adversarial.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
This module contains the AdversarialDebiasingLearner and AdversarialDebiasingModel classes
which are used to create and fit the AdversarialDebiasing model from the aif360 library.
"""

import numpy as np

from Orange.base import Learner, Model
Expand All @@ -19,25 +24,32 @@
tf = None



# This gets called after the model is created and fitted
# It is stored so we can use it to predict on new data
class AdversarialDebiasingModel(Model):
"""Model created and fitted by the AdversarialDebiasingLearner, which is used to predict on new data"""
"""
Model created and fitted by the AdversarialDebiasingLearner, used to predict on new data.
"""

def __init__(self, model):
super().__init__()
self._model = model

def predict(self, data):
"""Function used to predict on new data"""
"""
Method used to 'preprocess', predict on new data and 'postprocess' the predictions.
Args:
data (Table): The data to predict on.
"""
if isinstance(data, Table):
standard_dataset, _, _ = table_to_standard_dataset(data)
predictions = self._model.predict(standard_dataset)

# Array of scores with a column of scores for each class
# The scores given by the model are always for the favorable class
# If the favorable class is 1 then the scores need to be flipped or else the AUC will be "reversed"
# If the favorable class is 1 then the scores need to be flipped or
# else the AUC will be "reversed"
# (the first column is 1 - scores and the second column is scores)
if standard_dataset.favorable_label == 0:
scores = np.hstack(
Expand All @@ -62,18 +74,41 @@ def predict_storage(self, data):
def __call__(self, data, ret=Model.Value):
return super().__call__(data, ret)


if is_tensorflow_installed():

class AdversarialDebiasingLearner(Learner):
"""Learner subclass used to create and fit the AdversarialDebiasingModel"""
"""
Learner subclass used to create and fit the AdversarialDebiasingModel
Attributes:
preprocessors (list): List of preprocessors, applied when __call__ function is called
callback (function): Callback function used to track the progress of the model fitting
Args:
preprocessors (list): List of preprocessors to apply to the data before fitting a model
classifier_num_hidden_units (int): Number of hidden units in the classifier
num_epochs (int): Number of epochs to train the model
batch_size (int): Batch size used to train the model
debias (bool): Whether to debias the model
adversary_loss_weight (float): Weight of the adversary loss
seed (int): Seed used to initialize the model
"""

__returns__ = AdversarialDebiasingModel
# List of preprocessors, these get applied when the __call__ function is called
preprocessors = [Normalize()]
callback = None

def __init__(self, preprocessors=None, classifier_num_hidden_units=100,
num_epochs=50, batch_size=128, debias=True,
adversary_loss_weight=0.1, seed=-1):
def __init__(
self,
preprocessors=None,
classifier_num_hidden_units=100,
num_epochs=50,
batch_size=128,
debias=True,
adversary_loss_weight=0.1,
seed=-1,
):
super().__init__(preprocessors=preprocessors)
self.params = vars()

Expand All @@ -83,12 +118,15 @@ def __init__(self, preprocessors=None, classifier_num_hidden_units=100,
"batch_size": batch_size,
"debias": debias,
"adversary_loss_weight": adversary_loss_weight,
**({"seed": seed} if seed != -1 else {})
**({"seed": seed} if seed != -1 else {}),
}

def _calculate_total_runs(self, data):
"""Function used to calculate the total number of runs the learner will perform on the data"""
# This is need to calculate and display the progress of the training
"""
Method for calculating the total number of runs the learner will perform on the data
Used to calculate and display the progress of the training.
"""
num_epochs = self.params["num_epochs"]
batch_size = self.params["batch_size"]
num_instances = len(data)
Expand All @@ -97,7 +135,11 @@ def _calculate_total_runs(self, data):
return total_runs

def incompatibility_reason(self, domain):
"""Function used to check if the domain is compatible with the learner (contains fairness attributes)"""
"""
Method used to check if the domain is compatible with the learner.
The domain is compatible if it contains the fairness attributes.
"""
if not contains_fairness_attributes(domain):
return MISSING_FAIRNESS_ATTRIBUTES

Expand Down Expand Up @@ -142,26 +184,46 @@ def fit(self, data: Table) -> AdversarialDebiasingModel:
return AdversarialDebiasingModel(model=model)

def __call__(self, data, progress_callback=None):
"""Call method for AdversarialDebiasingLearner, in the superclass it calls the _fit_model function (and other things)"""
"""
Call method for AdversarialDebiasingLearner
In the superclass it calls the _fit_model function (and other things)
"""
self.callback = progress_callback
model = super().__call__(data, progress_callback)
model.params = self.params
return model

class CallbackSession(tf.Session):
"""Subclass of tensorflow session with callback functionality for progress tracking and displaying"""
"""
Subclass of tensorflow session.
It adds callback functionality for progress tracking and displaying.
def __init__(self, target="", graph=None, config=None, callback=None, total_runs=0):
Attributes:
callback (function): Callback function used to track the progress of the model fitting
run_count (int): Number of times the run function has been called
callback_enabled (bool): Flag to enable or disable the callback function
total_runs (int): Total number of runs the session will perform
"""

def __init__(
self, target="", graph=None, config=None, callback=None, total_runs=0
):
super().__init__(target=target, graph=graph, config=config)
self.callback = callback
self.run_count = 0
self.callback_enabled = False
self.total_runs = total_runs

def run(self, fetches, feed_dict=None, options=None, run_metadata=None):
"""A overridden run function which calls the callback function and calculates the progress"""
# To calculate the progress using these ways we need to know the number of expected
# calls to the callback function and count how many times it has been called
"""
A overridden run function which calls the callback function and calculates the progress
To calculate the progress using these ways we need to know the number of expected
calls to the callback function and count how many times it has been called.
"""

self.run_count += 1
progress = (self.run_count / self.total_runs) * 100
if self.callback_enabled and self.callback:
Expand All @@ -179,9 +241,9 @@ def disable_callback(self):
"""Disable callback method for the model prediction fase"""
self.callback_enabled = False


else:

class AdversarialDebiasingLearner(Learner):
"""Dummy class used if tensorflow is not installed"""

__returns__ = Model

Loading

0 comments on commit 907f828

Please sign in to comment.