RelationRx · paulmorio · Jan 16, 2023 · Jan 6, 2023 · Jan 6, 2023 · Jan 6, 2023
diff --git a/examples/demo/ensemble_uncertainty_classification.py b/examples/demo/ensemble_uncertainty_classification.py
@@ -5,11 +5,10 @@
 It illustrates the ensemble method.
 """
 
+# User imports
 import logging
 
 import torch
-
-# Pytorch
 from torchvision import datasets, transforms
 
 # Dataset and machine learning model
@@ -18,12 +17,13 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.models import LightningEnsembleModel
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.classification import LeastConfidenceStrategy
 
 # dataset
 dataset = datasets.FashionMNIST(root="data", train=True, download=True, transform=transforms.ToTensor())
-
-dataset = [dataset[i] for i in range(10000)]
+dataset = torch.utils.data.Subset(dataset, indices=range(10000))
 
 train_ds, val_ds, test_ds = torch.utils.data.random_split(dataset, [9000, 500, 500])
 train_indices = train_ds.indices
@@ -32,7 +32,7 @@
 
 # model
 model = LightningEnsembleModel(
-    model_class=MnistClassification, model_config={}, trainer_config={"epochs": 4}, n_estimators=5
+    model_class=MnistClassification, model_config={}, trainer_config={"epochs": 5}, n_estimators=5
 )
 
 # data_manager and defining strategy
@@ -44,18 +44,23 @@
     loader_batch_size=1000,
 )
 
-strategy = LeastConfidenceStrategy(data_manager=data_manager, model=model)
+# Set up active learning pipeline
+strategy = LeastConfidenceStrategy()
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
-# performance with the full trainset labelled
-strategy.theoretical_performance()
+# See performance with the full trainset labelled
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=1000)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=1000)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=1000)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=1000)
+
+# Pretty printed summary of the components in the pipeline along with annotation/performance history
+print(pipeline)
diff --git a/examples/demo/lightning_diversity_classification.py b/examples/demo/lightning_diversity_classification.py
@@ -16,18 +16,20 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.models import LightningModel
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.task_agnostic.relative_distance_strategy import (
     RelativeDistanceStrategy,
 )
 
-# dataset
+# Obtain dataset and set up labelled and unlabelled subsets
 dataset = BreastCancerDataset()
 train_ds, val_ds, test_ds = torch.utils.data.random_split(dataset, [500, 30, 39])
 train_indices = train_ds.indices
 val_indices = val_ds.indices
 test_indices = test_ds.indices
 
-# model
+# Instantiate model
 model = LightningModel(model_class=BreastCancerClassification, model_config={}, trainer_config={"epochs": 4})
 
 # data_manager and defining strategy
@@ -39,18 +41,23 @@
     hit_ratio_at=5,
 )
 
-strategy = RelativeDistanceStrategy(data_manager=data_manager, model=model)
+# Setup
+strategy = RelativeDistanceStrategy()
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
 # performance with the full trainset labelled
-strategy.theoretical_performance()
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=100)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=100)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=100)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=100)
+
+# Pretty printed summary of the components in the pipeline along with annotation/performance history
+print(pipeline)
diff --git a/examples/demo/lightning_diversity_regression.py b/examples/demo/lightning_diversity_regression.py
@@ -16,6 +16,8 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.models import LightningModel
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.task_agnostic.relative_distance_strategy import (
     RelativeDistanceStrategy,
 )
@@ -39,18 +41,22 @@
     hit_ratio_at=5,
 )
 
-strategy = RelativeDistanceStrategy(data_manager=data_manager, model=model)
+# Setup pipeline
+strategy = RelativeDistanceStrategy()
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
+
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
 # performance with the full trainset labelled
-strategy.theoretical_performance()
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=100)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=100)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=100)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=100)
+print(pipeline)
diff --git a/examples/demo/lightning_representative_classification.py b/examples/demo/lightning_representative_classification.py
@@ -16,6 +16,8 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.models import LightningModel
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.task_agnostic.representative_sampling_strategy import (
     RepresentativeSamplingStrategy,
 )
@@ -39,20 +41,21 @@
     loader_batch_size=100,
 )
 
-strategy = RepresentativeSamplingStrategy(
-    data_manager=data_manager, model=model, clustering_method="AffinityPropagation"
-)
+# Setup
+strategy = RepresentativeSamplingStrategy(clustering_method="AffinityPropagation")
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
 # performance with the full trainset labelled
-strategy.theoretical_performance()
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=100)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=100)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=100)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=100)
+print(pipeline)
diff --git a/examples/demo/mcdropout_uncertainty_classification.py b/examples/demo/mcdropout_uncertainty_classification.py
@@ -19,13 +19,13 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.models import LightningMCDropoutModel
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.classification import LeastConfidenceStrategy
 
 # dataset
 dataset = datasets.FashionMNIST(root="data", train=True, download=True, transform=transforms.ToTensor())
-
 dataset = [dataset[i] for i in range(10000)]
-
 train_ds, val_ds, test_ds = torch.utils.data.random_split(dataset, [9000, 500, 500])
 train_indices = train_ds.indices
 val_indices = val_ds.indices
@@ -45,18 +45,20 @@
     loader_batch_size=1000,
 )
 
-strategy = LeastConfidenceStrategy(data_manager=data_manager, model=model)
+strategy = LeastConfidenceStrategy()
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
 # performance with the full trainset labelled
-strategy.theoretical_performance()
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=1000)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=1000)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=1000)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=1000)
+print(pipeline)
diff --git a/examples/demo/mcdropout_uncertainty_regression.py b/examples/demo/mcdropout_uncertainty_regression.py
@@ -17,6 +17,8 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.models import LightningMCDropoutModel
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.regression import LeastConfidenceStrategy
 
 # dataset
@@ -34,18 +36,21 @@
     dataset=dataset, train_indices=train_indices, validation_indices=val_indices, test_indices=test_indices
 )
 
-strategy = LeastConfidenceStrategy(data_manager=data_manager, model=model)
+
+strategy = LeastConfidenceStrategy()
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
 # performance with the full trainset labelled
-strategy.theoretical_performance()
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=100)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=100)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=100)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=100)
+print(pipeline)
diff --git a/examples/demo/model_badge.py b/examples/demo/model_badge.py
@@ -16,7 +16,9 @@
 # Active Learning package
 from pyrelational.data import DataManager
 from pyrelational.informativeness import relative_distance
-from pyrelational.models import LightningModel
+from pyrelational.models import LightningModel, ModelManager
+from pyrelational.oracles import BenchmarkOracle
+from pyrelational.pipeline import Pipeline
 from pyrelational.strategies.abstract_strategy import Strategy
 
 # dataset
@@ -28,8 +30,6 @@
 
 
 # model
-
-
 class BadgeLightningModel(LightningModel):
     """Model compatible with BADGE strategy"""
 
@@ -85,34 +85,40 @@ def get_gradients(self, loader):
 class BadgeStrategy(Strategy):
     """Implementation of BADGE strategy."""
 
-    def __init__(self, data_manager: DataManager, model: BadgeLightningModel):
-        super(BadgeStrategy, self).__init__(data_manager, model)
+    def __init__(self):
+        super(BadgeStrategy, self).__init__()
 
-    def active_learning_step(self, num_annotate: int) -> List[int]:
+    def __call__(self, num_annotate: int, data_manager: DataManager, model: ModelManager) -> List[int]:
         """
         :param num_annotate: Number of samples to label
         :return: indices of samples to label
         """
-        self.model.train(self.l_loader, self.valid_loader)
-        u_grads = self.model.get_gradients(self.u_loader)
-        l_grads = self.model.get_gradients(self.l_loader)
+        l_loader = data_manager.get_labelled_loader()
+        u_loader = data_manager.get_unlabelled_loader()
+        valid_loader = data_manager.get_validation_loader()
+        model.train(l_loader, valid_loader)
+        u_grads = model.get_gradients(u_loader)
+        l_grads = model.get_gradients(l_loader)
         scores = relative_distance(u_grads, l_grads)
         ixs = torch.argsort(scores, descending=True).tolist()
-        return [self.u_indices[i] for i in ixs[:num_annotate]]
+        return [data_manager.u_indices[i] for i in ixs[:num_annotate]]
 
 
-strategy = BadgeStrategy(data_manager=data_manager, model=model)
+# Set the instantiated custom model and strategy into the Pipeline object
+strategy = BadgeStrategy()
+oracle = BenchmarkOracle()
+pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)
 
 # Remove lightning prints
 logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
 
 # performance with the full trainset labelled
-strategy.theoretical_performance()
+pipeline.theoretical_performance()
 
 # New data to be annotated, followed by an update of the data_manager and model
-to_annotate = strategy.active_learning_step(num_annotate=100)
-strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
+to_annotate = pipeline.active_learning_step(num_annotate=100)
+pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")
 
 # Annotating data step by step until the trainset is fully annotated
-strategy.full_active_learning_run(num_annotate=100)
-print(strategy)
+pipeline.full_active_learning_run(num_annotate=100)
+print(pipeline)