-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
364 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
audit: # Configurations for auditing | ||
random_seed: 1234 # Integer specifying the random seed | ||
attack_list: | ||
rmia: | ||
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) | ||
attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack | ||
num_shadow_models: 3 # Number of shadow models to train | ||
online: True # perform online or offline attack | ||
temperature: 2 | ||
gamma: 2.0 | ||
offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b. | ||
offline_b: 0.66 | ||
qmia: | ||
training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor | ||
epochs: 5 # Number of training epochs for quantile regression | ||
population: | ||
attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack | ||
lira: | ||
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) | ||
num_shadow_models: 8 # Number of shadow models to train | ||
online: False # perform online or offline attack | ||
fixed_variance: True # Use a fixed variance for the whole audit | ||
boosting: True | ||
loss_traj: | ||
training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2 | ||
number_of_traj: 10 # Number of epochs (number of points in the loss trajectory) | ||
label_only: False # True or False | ||
mia_classifier_epochs: 100 | ||
|
||
output_dir: "./leakpro_output" | ||
attack_type: "mia" #mia, gia | ||
modality: "tabular" | ||
|
||
target: | ||
# Target model path | ||
module_path: "utils/adult_model_preparation.py" | ||
model_class: "AdultNet" | ||
# Data paths | ||
target_folder: "./target" | ||
data_path: "./data/adult_data.pkl" | ||
|
||
shadow_model: | ||
|
||
distillation_model: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
"""Module containing the class to handle the user input for the CIFAR100 dataset.""" | ||
|
||
import torch | ||
from torch import cuda, device, optim, sigmoid | ||
from torch.nn import CrossEntropyLoss | ||
from torch.utils.data import DataLoader | ||
from tqdm import tqdm | ||
|
||
from leakpro import AbstractInputHandler | ||
|
||
class Cifar100InputHandler(AbstractInputHandler): | ||
"""Class to handle the user input for the CIFAR100 dataset.""" | ||
|
||
def __init__(self, configs: dict) -> None: | ||
super().__init__(configs = configs) | ||
|
||
|
||
def get_criterion(self)->None: | ||
"""Set the CrossEntropyLoss for the model.""" | ||
return CrossEntropyLoss() | ||
|
||
def get_optimizer(self, model:torch.nn.Module) -> None: | ||
"""Set the optimizer for the model.""" | ||
learning_rate = 0.1 | ||
momentum = 0.8 | ||
return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) | ||
|
||
def train( | ||
self, | ||
dataloader: DataLoader, | ||
model: torch.nn.Module = None, | ||
criterion: torch.nn.Module = None, | ||
optimizer: optim.Optimizer = None, | ||
epochs: int = None, | ||
) -> dict: | ||
"""Model training procedure.""" | ||
|
||
# read hyperparams for training (the parameters for the dataloader are defined in get_dataloader): | ||
if epochs is None: | ||
raise ValueError("epochs not found in configs") | ||
|
||
# prepare training | ||
gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu") | ||
model.to(gpu_or_cpu) | ||
|
||
# training loop | ||
for epoch in range(epochs): | ||
train_loss, train_acc = 0, 0 | ||
model.train() | ||
for inputs, labels in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"): | ||
labels = labels.long() | ||
inputs, labels = inputs.to(gpu_or_cpu, non_blocking=True), labels.to(gpu_or_cpu, non_blocking=True) | ||
optimizer.zero_grad() | ||
outputs = model(inputs) | ||
loss = criterion(outputs, labels) | ||
pred = outputs.data.max(1, keepdim=True)[1] | ||
loss.backward() | ||
optimizer.step() | ||
|
||
# Accumulate performance of shadow model | ||
train_acc += pred.eq(labels.data.view_as(pred)).sum() | ||
train_loss += loss.item() | ||
|
||
log_train_str = ( | ||
f"Epoch: {epoch+1}/{epochs} | Train Loss: {train_loss/len(dataloader):.8f} | " | ||
f"Train Acc: {float(train_acc)/len(dataloader.dataset):.8f}") | ||
self.logger.info(log_train_str) | ||
model.to("cpu") | ||
|
||
return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import joblib | ||
import pickle | ||
from sklearn.model_selection import train_test_split | ||
from torchvision import transforms, datasets | ||
from torchvision.datasets import CIFAR10, CIFAR100 | ||
import urllib.request | ||
from torch.utils.data import Dataset, Subset, DataLoader | ||
from torch import tensor, float32, cat | ||
|
||
|
||
|
||
class CifarDataset(Dataset): | ||
def __init__(self, x, y, transform=None, indices=None): | ||
""" | ||
Custom dataset for CIFAR data. | ||
Args: | ||
x (torch.Tensor): Tensor of input images. | ||
y (torch.Tensor): Tensor of labels. | ||
transform (callable, optional): Optional transform to be applied on the image tensors. | ||
""" | ||
self.x = x | ||
self.y = y | ||
self.transform = transform | ||
self.indices = indices | ||
|
||
def __len__(self): | ||
"""Return the total number of samples.""" | ||
return len(self.y) | ||
|
||
def __getitem__(self, idx): | ||
"""Retrieve the image and its corresponding label at index 'idx'.""" | ||
image = self.x[idx] | ||
label = self.y[idx] | ||
|
||
# Apply transformations to the image if any | ||
if self.transform: | ||
image = self.transform(image) | ||
|
||
return image, label | ||
|
||
@classmethod | ||
def from_cifar10(cls, root="./data", download=True, transform=None): | ||
# Load the CIFAR10 train and test datasets | ||
trainset = CIFAR10(root=root, train=True, download=download, transform=transforms.ToTensor()) | ||
testset = CIFAR10(root=root, train=False, download=download, transform=transforms.ToTensor()) | ||
|
||
# Concatenate both datasets' data and labels | ||
data = cat([tensor(trainset.data, dtype=float32), | ||
tensor(testset.data, dtype=float32)], | ||
dim=0) | ||
# Rescale data from [0, 255] to [0, 1] | ||
data /= 255.0 | ||
normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) | ||
data = data.permute(0, 3, 1, 2) | ||
data = normalize(data) | ||
|
||
targets = cat([tensor(trainset.targets), tensor(testset.targets)], dim=0) | ||
|
||
return cls(data, targets) | ||
|
||
@classmethod | ||
def from_cifar100(cls, root="./data", download=True, transform=None): | ||
# Load the CIFAR10 train and test datasets | ||
trainset = CIFAR100(root=root, train=True, download=download, transform=transforms.ToTensor()) | ||
testset = CIFAR100(root=root, train=False, download=download, transform=transforms.ToTensor()) | ||
|
||
# Concatenate both datasets' data and labels | ||
data = cat([tensor(trainset.data, dtype=float32), | ||
tensor(testset.data, dtype=float32)], | ||
dim=0) | ||
# Rescale data from [0, 255] to [0, 1] | ||
data /= 255.0 | ||
normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) | ||
data = data.permute(0, 3, 1, 2) | ||
data = normalize(data) | ||
|
||
targets = cat([tensor(trainset.targets), tensor(testset.targets)], dim=0) | ||
|
||
return cls(data, targets) | ||
|
||
def subset(self, indices): | ||
"""Return a subset of the dataset based on the given indices.""" | ||
return CifarDataset(self.x[indices], self.y[indices], transform=self.transform) | ||
|
||
|
||
def get_cifar10_dataset(data_path): | ||
# Create the combined CIFAR-10 dataset | ||
|
||
transform = transforms.Compose( | ||
[transforms.ToTensor(), | ||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) | ||
|
||
population = CifarDataset.from_cifar10(root=data_path, download=True, transform=transform) | ||
|
||
file_path = data_path + "cifar10.pkl" | ||
if not os.path.exists(file_path): | ||
with open(file_path, "wb") as file: | ||
pickle.dump(population, file) | ||
print(f"Save data to {file_path}.pkl") | ||
|
||
# Create a subset of the dataset (first 1000 samples) | ||
pretrain_indices = list(range(50000)) # first 1000 indices is the training set | ||
test_indices = list(range(50001, 51000)) # next 1000 indices is the test set | ||
client_indices = list(range(51001, 51002)) # first 1000 indices is the pretrain set | ||
trainset = population.subset(client_indices) | ||
testset = population.subset(test_indices) | ||
pretrainset = population.subset(pretrain_indices) | ||
|
||
return trainset, testset, pretrainset | ||
|
||
|
||
def get_cifar100_dataset(data_path): | ||
# Create the combined CIFAR-100 dataset | ||
|
||
transform = transforms.Compose( | ||
[transforms.ToTensor(), | ||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) | ||
|
||
population = CifarDataset.from_cifar100(root=data_path, download=True, transform=transform) | ||
|
||
file_path = data_path + "cifar100.pkl" | ||
if not os.path.exists(file_path): | ||
with open(file_path, "wb") as file: | ||
pickle.dump(population, file) | ||
print(f"Save data to {file_path}.pkl") | ||
|
||
# Create a subset of the dataset (first 1000 samples) | ||
pretrain_indices = list(range(50000)) # first 1000 indices is the training set | ||
test_indices = list(range(50001, 51000)) # next 1000 indices is the test set | ||
client_indices = list(range(51001, 51002)) # first 1000 indices is the pretrain set | ||
trainset = population.subset(client_indices) | ||
testset = population.subset(test_indices) | ||
pretrainset = population.subset(pretrain_indices) | ||
|
||
return trainset, testset, pretrainset | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import torch.nn as nn | ||
from torch import device, optim, cuda, no_grad, save, sigmoid | ||
import torchvision.models as models | ||
import pickle | ||
from tqdm import tqdm | ||
|
||
class ResNet18(nn.Module): | ||
def __init__(self, num_classes): | ||
super(ResNet18, self).__init__() | ||
self.model = models.resnet18(pretrained=False) | ||
self.model.fc = nn.Linear(self.model.fc.in_features, num_classes) | ||
self.init_params = {"num_classes": num_classes} | ||
|
||
def forward(self, x): | ||
return self.model(x) | ||
|
||
def evaluate(model, loader, criterion, device): | ||
model.eval() | ||
loss, acc = 0, 0 | ||
with no_grad(): | ||
for data, target in loader: | ||
data, target = data.to(device), target.to(device) | ||
output = model(data) | ||
loss += criterion(output, target).item() | ||
pred = output.argmax(dim=1) | ||
acc += pred.eq(target).sum().item() | ||
loss /= len(loader) | ||
acc = float(acc) / len(loader.dataset) | ||
return loss, acc | ||
|
||
def create_trained_model_and_metadata(model, train_loader, test_loader, epochs=10, metadata=None): | ||
device_name = device("cuda" if cuda.is_available() else "cpu") | ||
model.to(device_name) | ||
model.train() | ||
|
||
criterion = nn.CrossEntropyLoss() | ||
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.8) | ||
train_losses, train_accuracies = [], [] | ||
test_losses, test_accuracies = [], [] | ||
|
||
for e in tqdm(range(epochs), desc="Training Progress"): | ||
model.train() | ||
train_acc, train_loss = 0.0, 0.0 | ||
|
||
for data, target in train_loader: | ||
data, target = data.to(device_name, non_blocking=True), target.to(device_name, non_blocking=True) | ||
optimizer.zero_grad() | ||
output = model(data) | ||
|
||
loss = criterion(output, target) | ||
pred = output.argmax(dim=1) # for multi-class classification | ||
train_acc += pred.eq(target).sum().item() | ||
|
||
loss.backward() | ||
optimizer.step() | ||
train_loss += loss.item() | ||
|
||
train_loss /= len(train_loader) | ||
train_acc /= len(train_loader.dataset) | ||
|
||
train_losses.append(train_loss) | ||
train_accuracies.append(train_acc) | ||
|
||
test_loss, test_acc = evaluate(model, test_loader, criterion, device_name) | ||
test_losses.append(test_loss) | ||
test_accuracies.append(test_acc) | ||
|
||
# Move the model back to the CPU | ||
model.to("cpu") | ||
with open("target/target_model.pkl", "wb") as f: | ||
save(model.state_dict(), f) | ||
|
||
# Create metadata and store it | ||
meta_data = {} | ||
meta_data["train_indices"] = train_loader.dataset.indices | ||
meta_data["test_indices"] = test_loader.dataset.indices | ||
meta_data["num_train"] = len(meta_data["train_indices"]) | ||
|
||
# Write init params | ||
meta_data["init_params"] = {} | ||
for key, value in model.init_params.items(): | ||
meta_data["init_params"][key] = value | ||
|
||
# read out optimizer parameters | ||
meta_data["optimizer"] = {} | ||
meta_data["optimizer"]["name"] = optimizer.__class__.__name__.lower() | ||
meta_data["optimizer"]["lr"] = optimizer.param_groups[0].get("lr", 0) | ||
meta_data["optimizer"]["weight_decay"] = optimizer.param_groups[0].get("weight_decay", 0) | ||
meta_data["optimizer"]["momentum"] = optimizer.param_groups[0].get("momentum", 0) | ||
meta_data["optimizer"]["dampening"] = optimizer.param_groups[0].get("dampening", 0) | ||
meta_data["optimizer"]["nesterov"] = optimizer.param_groups[0].get("nesterov", False) | ||
|
||
# read out criterion parameters | ||
meta_data["loss"] = {} | ||
meta_data["loss"]["name"] = criterion.__class__.__name__.lower() | ||
|
||
meta_data["batch_size"] = train_loader.batch_size | ||
meta_data["epochs"] = epochs | ||
meta_data["train_acc"] = train_acc | ||
meta_data["test_acc"] = test_acc | ||
meta_data["train_loss"] = train_loss | ||
meta_data["test_loss"] = test_loss | ||
meta_data["dataset"] = "cifar10" | ||
|
||
with open("target/model_metadata.pkl", "wb") as f: | ||
pickle.dump(meta_data, f) | ||
|
||
return train_accuracies, train_losses, test_accuracies, test_losses |