Skip to content
Merged
29 changes: 17 additions & 12 deletions examples/demo/ensemble_uncertainty_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
It illustrates the ensemble method.
"""

# User imports
import logging

import torch

# Pytorch
from torchvision import datasets, transforms

# Dataset and machine learning model
Expand All @@ -18,12 +17,13 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.models import LightningEnsembleModel
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.classification import LeastConfidenceStrategy

# dataset
dataset = datasets.FashionMNIST(root="data", train=True, download=True, transform=transforms.ToTensor())

dataset = [dataset[i] for i in range(10000)]
dataset = torch.utils.data.Subset(dataset, indices=range(10000))

train_ds, val_ds, test_ds = torch.utils.data.random_split(dataset, [9000, 500, 500])
train_indices = train_ds.indices
Expand All @@ -32,7 +32,7 @@

# model
model = LightningEnsembleModel(
model_class=MnistClassification, model_config={}, trainer_config={"epochs": 4}, n_estimators=5
model_class=MnistClassification, model_config={}, trainer_config={"epochs": 5}, n_estimators=5
)

# data_manager and defining strategy
Expand All @@ -44,18 +44,23 @@
loader_batch_size=1000,
)

strategy = LeastConfidenceStrategy(data_manager=data_manager, model=model)
# Set up active learning pipeline
strategy = LeastConfidenceStrategy()
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)

# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
# See performance with the full trainset labelled
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=1000)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=1000)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=1000)
print(strategy)
pipeline.full_active_learning_run(num_annotate=1000)

# Pretty printed summary of the components in the pipeline along with annotation/performance history
print(pipeline)
23 changes: 15 additions & 8 deletions examples/demo/lightning_diversity_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.models import LightningModel
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.task_agnostic.relative_distance_strategy import (
RelativeDistanceStrategy,
)

# dataset
# Obtain dataset and set up labelled and unlabelled subsets
dataset = BreastCancerDataset()
train_ds, val_ds, test_ds = torch.utils.data.random_split(dataset, [500, 30, 39])
train_indices = train_ds.indices
val_indices = val_ds.indices
test_indices = test_ds.indices

# model
# Instantiate model
model = LightningModel(model_class=BreastCancerClassification, model_config={}, trainer_config={"epochs": 4})

# data_manager and defining strategy
Expand All @@ -39,18 +41,23 @@
hit_ratio_at=5,
)

strategy = RelativeDistanceStrategy(data_manager=data_manager, model=model)
# Setup
strategy = RelativeDistanceStrategy()
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)

# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=100)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=100)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=100)
print(strategy)
pipeline.full_active_learning_run(num_annotate=100)

# Pretty printed summary of the components in the pipeline along with annotation/performance history
print(pipeline)
18 changes: 12 additions & 6 deletions examples/demo/lightning_diversity_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.models import LightningModel
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.task_agnostic.relative_distance_strategy import (
RelativeDistanceStrategy,
)
Expand All @@ -39,18 +41,22 @@
hit_ratio_at=5,
)

strategy = RelativeDistanceStrategy(data_manager=data_manager, model=model)
# Setup pipeline
strategy = RelativeDistanceStrategy()
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)


# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=100)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=100)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=100)
print(strategy)
pipeline.full_active_learning_run(num_annotate=100)
print(pipeline)
19 changes: 11 additions & 8 deletions examples/demo/lightning_representative_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.models import LightningModel
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.task_agnostic.representative_sampling_strategy import (
RepresentativeSamplingStrategy,
)
Expand All @@ -39,20 +41,21 @@
loader_batch_size=100,
)

strategy = RepresentativeSamplingStrategy(
data_manager=data_manager, model=model, clustering_method="AffinityPropagation"
)
# Setup
strategy = RepresentativeSamplingStrategy(clustering_method="AffinityPropagation")
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)

# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=100)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=100)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=100)
print(strategy)
pipeline.full_active_learning_run(num_annotate=100)
print(pipeline)
18 changes: 10 additions & 8 deletions examples/demo/mcdropout_uncertainty_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.models import LightningMCDropoutModel
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.classification import LeastConfidenceStrategy

# dataset
dataset = datasets.FashionMNIST(root="data", train=True, download=True, transform=transforms.ToTensor())

dataset = [dataset[i] for i in range(10000)]

train_ds, val_ds, test_ds = torch.utils.data.random_split(dataset, [9000, 500, 500])
train_indices = train_ds.indices
val_indices = val_ds.indices
Expand All @@ -45,18 +45,20 @@
loader_batch_size=1000,
)

strategy = LeastConfidenceStrategy(data_manager=data_manager, model=model)
strategy = LeastConfidenceStrategy()
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)

# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=1000)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=1000)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=1000)
print(strategy)
pipeline.full_active_learning_run(num_annotate=1000)
print(pipeline)
17 changes: 11 additions & 6 deletions examples/demo/mcdropout_uncertainty_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.models import LightningMCDropoutModel
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.regression import LeastConfidenceStrategy

# dataset
Expand All @@ -34,18 +36,21 @@
dataset=dataset, train_indices=train_indices, validation_indices=val_indices, test_indices=test_indices
)

strategy = LeastConfidenceStrategy(data_manager=data_manager, model=model)

strategy = LeastConfidenceStrategy()
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)

# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=100)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=100)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=100)
print(strategy)
pipeline.full_active_learning_run(num_annotate=100)
print(pipeline)
38 changes: 22 additions & 16 deletions examples/demo/model_badge.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
# Active Learning package
from pyrelational.data import DataManager
from pyrelational.informativeness import relative_distance
from pyrelational.models import LightningModel
from pyrelational.models import LightningModel, ModelManager
from pyrelational.oracles import BenchmarkOracle
from pyrelational.pipeline import Pipeline
from pyrelational.strategies.abstract_strategy import Strategy

# dataset
Expand All @@ -28,8 +30,6 @@


# model


class BadgeLightningModel(LightningModel):
"""Model compatible with BADGE strategy"""

Expand Down Expand Up @@ -85,34 +85,40 @@ def get_gradients(self, loader):
class BadgeStrategy(Strategy):
"""Implementation of BADGE strategy."""

def __init__(self, data_manager: DataManager, model: BadgeLightningModel):
super(BadgeStrategy, self).__init__(data_manager, model)
def __init__(self):
super(BadgeStrategy, self).__init__()

def active_learning_step(self, num_annotate: int) -> List[int]:
def __call__(self, num_annotate: int, data_manager: DataManager, model: ModelManager) -> List[int]:
"""
:param num_annotate: Number of samples to label
:return: indices of samples to label
"""
self.model.train(self.l_loader, self.valid_loader)
u_grads = self.model.get_gradients(self.u_loader)
l_grads = self.model.get_gradients(self.l_loader)
l_loader = data_manager.get_labelled_loader()
u_loader = data_manager.get_unlabelled_loader()
valid_loader = data_manager.get_validation_loader()
model.train(l_loader, valid_loader)
u_grads = model.get_gradients(u_loader)
l_grads = model.get_gradients(l_loader)
scores = relative_distance(u_grads, l_grads)
ixs = torch.argsort(scores, descending=True).tolist()
return [self.u_indices[i] for i in ixs[:num_annotate]]
return [data_manager.u_indices[i] for i in ixs[:num_annotate]]


strategy = BadgeStrategy(data_manager=data_manager, model=model)
# Set the instantiated custom model and strategy into the Pipeline object
strategy = BadgeStrategy()
oracle = BenchmarkOracle()
pipeline = Pipeline(data_manager=data_manager, model=model, strategy=strategy, oracle=oracle)

# Remove lightning prints
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

# performance with the full trainset labelled
strategy.theoretical_performance()
pipeline.theoretical_performance()

# New data to be annotated, followed by an update of the data_manager and model
to_annotate = strategy.active_learning_step(num_annotate=100)
strategy.active_learning_update(indices=to_annotate, update_tag="Manual Update")
to_annotate = pipeline.active_learning_step(num_annotate=100)
pipeline.active_learning_update(indices=to_annotate, update_tag="Manual Update")

# Annotating data step by step until the trainset is fully annotated
strategy.full_active_learning_run(num_annotate=100)
print(strategy)
pipeline.full_active_learning_run(num_annotate=100)
print(pipeline)
Loading