Skip to content

Commit

Permalink
cifar example structure
Browse files Browse the repository at this point in the history
  • Loading branch information
fazelehh committed Oct 14, 2024
1 parent 2db8c08 commit 28bfc48
Show file tree
Hide file tree
Showing 4 changed files with 364 additions and 0 deletions.
44 changes: 44 additions & 0 deletions examples/mia/cifar/audit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
audit: # Configurations for auditing
random_seed: 1234 # Integer specifying the random seed
attack_list:
rmia:
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack
num_shadow_models: 3 # Number of shadow models to train
online: True # perform online or offline attack
temperature: 2
gamma: 2.0
offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
offline_b: 0.66
qmia:
training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor
epochs: 5 # Number of training epochs for quantile regression
population:
attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack
lira:
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
num_shadow_models: 8 # Number of shadow models to train
online: False # perform online or offline attack
fixed_variance: True # Use a fixed variance for the whole audit
boosting: True
loss_traj:
training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
number_of_traj: 10 # Number of epochs (number of points in the loss trajectory)
label_only: False # True or False
mia_classifier_epochs: 100

output_dir: "./leakpro_output"
attack_type: "mia" #mia, gia
modality: "tabular"

target:
# Target model path
module_path: "utils/adult_model_preparation.py"
model_class: "AdultNet"
# Data paths
target_folder: "./target"
data_path: "./data/adult_data.pkl"

shadow_model:

distillation_model:
70 changes: 70 additions & 0 deletions examples/mia/cifar/cifar_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Module containing the class to handle the user input for the CIFAR100 dataset."""

import torch
from torch import cuda, device, optim, sigmoid
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from tqdm import tqdm

from leakpro import AbstractInputHandler

class Cifar100InputHandler(AbstractInputHandler):
"""Class to handle the user input for the CIFAR100 dataset."""

def __init__(self, configs: dict) -> None:
super().__init__(configs = configs)


def get_criterion(self)->None:
"""Set the CrossEntropyLoss for the model."""
return CrossEntropyLoss()

def get_optimizer(self, model:torch.nn.Module) -> None:
"""Set the optimizer for the model."""
learning_rate = 0.1
momentum = 0.8
return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

def train(
self,
dataloader: DataLoader,
model: torch.nn.Module = None,
criterion: torch.nn.Module = None,
optimizer: optim.Optimizer = None,
epochs: int = None,
) -> dict:
"""Model training procedure."""

# read hyperparams for training (the parameters for the dataloader are defined in get_dataloader):
if epochs is None:
raise ValueError("epochs not found in configs")

# prepare training
gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu")
model.to(gpu_or_cpu)

# training loop
for epoch in range(epochs):
train_loss, train_acc = 0, 0
model.train()
for inputs, labels in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
labels = labels.long()
inputs, labels = inputs.to(gpu_or_cpu, non_blocking=True), labels.to(gpu_or_cpu, non_blocking=True)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
pred = outputs.data.max(1, keepdim=True)[1]
loss.backward()
optimizer.step()

# Accumulate performance of shadow model
train_acc += pred.eq(labels.data.view_as(pred)).sum()
train_loss += loss.item()

log_train_str = (
f"Epoch: {epoch+1}/{epochs} | Train Loss: {train_loss/len(dataloader):.8f} | "
f"Train Acc: {float(train_acc)/len(dataloader.dataset):.8f}")
self.logger.info(log_train_str)
model.to("cpu")

return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}}
142 changes: 142 additions & 0 deletions examples/mia/cifar/utils/cifar_data_prepration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import os
import numpy as np
import pandas as pd
import joblib
import pickle
from sklearn.model_selection import train_test_split
from torchvision import transforms, datasets
from torchvision.datasets import CIFAR10, CIFAR100
import urllib.request
from torch.utils.data import Dataset, Subset, DataLoader
from torch import tensor, float32, cat



class CifarDataset(Dataset):
def __init__(self, x, y, transform=None, indices=None):
"""
Custom dataset for CIFAR data.
Args:
x (torch.Tensor): Tensor of input images.
y (torch.Tensor): Tensor of labels.
transform (callable, optional): Optional transform to be applied on the image tensors.
"""
self.x = x
self.y = y
self.transform = transform
self.indices = indices

def __len__(self):
"""Return the total number of samples."""
return len(self.y)

def __getitem__(self, idx):
"""Retrieve the image and its corresponding label at index 'idx'."""
image = self.x[idx]
label = self.y[idx]

# Apply transformations to the image if any
if self.transform:
image = self.transform(image)

return image, label

@classmethod
def from_cifar10(cls, root="./data", download=True, transform=None):
# Load the CIFAR10 train and test datasets
trainset = CIFAR10(root=root, train=True, download=download, transform=transforms.ToTensor())
testset = CIFAR10(root=root, train=False, download=download, transform=transforms.ToTensor())

# Concatenate both datasets' data and labels
data = cat([tensor(trainset.data, dtype=float32),
tensor(testset.data, dtype=float32)],
dim=0)
# Rescale data from [0, 255] to [0, 1]
data /= 255.0
normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
data = data.permute(0, 3, 1, 2)
data = normalize(data)

targets = cat([tensor(trainset.targets), tensor(testset.targets)], dim=0)

return cls(data, targets)

@classmethod
def from_cifar100(cls, root="./data", download=True, transform=None):
# Load the CIFAR10 train and test datasets
trainset = CIFAR100(root=root, train=True, download=download, transform=transforms.ToTensor())
testset = CIFAR100(root=root, train=False, download=download, transform=transforms.ToTensor())

# Concatenate both datasets' data and labels
data = cat([tensor(trainset.data, dtype=float32),
tensor(testset.data, dtype=float32)],
dim=0)
# Rescale data from [0, 255] to [0, 1]
data /= 255.0
normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
data = data.permute(0, 3, 1, 2)
data = normalize(data)

targets = cat([tensor(trainset.targets), tensor(testset.targets)], dim=0)

return cls(data, targets)

def subset(self, indices):
"""Return a subset of the dataset based on the given indices."""
return CifarDataset(self.x[indices], self.y[indices], transform=self.transform)


def get_cifar10_dataset(data_path):
# Create the combined CIFAR-10 dataset

transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

population = CifarDataset.from_cifar10(root=data_path, download=True, transform=transform)

file_path = data_path + "cifar10.pkl"
if not os.path.exists(file_path):
with open(file_path, "wb") as file:
pickle.dump(population, file)
print(f"Save data to {file_path}.pkl")

# Create a subset of the dataset (first 1000 samples)
pretrain_indices = list(range(50000)) # first 1000 indices is the training set
test_indices = list(range(50001, 51000)) # next 1000 indices is the test set
client_indices = list(range(51001, 51002)) # first 1000 indices is the pretrain set
trainset = population.subset(client_indices)
testset = population.subset(test_indices)
pretrainset = population.subset(pretrain_indices)

return trainset, testset, pretrainset


def get_cifar100_dataset(data_path):
# Create the combined CIFAR-100 dataset

transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

population = CifarDataset.from_cifar100(root=data_path, download=True, transform=transform)

file_path = data_path + "cifar100.pkl"
if not os.path.exists(file_path):
with open(file_path, "wb") as file:
pickle.dump(population, file)
print(f"Save data to {file_path}.pkl")

# Create a subset of the dataset (first 1000 samples)
pretrain_indices = list(range(50000)) # first 1000 indices is the training set
test_indices = list(range(50001, 51000)) # next 1000 indices is the test set
client_indices = list(range(51001, 51002)) # first 1000 indices is the pretrain set
trainset = population.subset(client_indices)
testset = population.subset(test_indices)
pretrainset = population.subset(pretrain_indices)

return trainset, testset, pretrainset



108 changes: 108 additions & 0 deletions examples/mia/cifar/utils/cifar_model_prepration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import torch.nn as nn
from torch import device, optim, cuda, no_grad, save, sigmoid
import torchvision.models as models
import pickle
from tqdm import tqdm

class ResNet18(nn.Module):
def __init__(self, num_classes):
super(ResNet18, self).__init__()
self.model = models.resnet18(pretrained=False)
self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
self.init_params = {"num_classes": num_classes}

def forward(self, x):
return self.model(x)

def evaluate(model, loader, criterion, device):
model.eval()
loss, acc = 0, 0
with no_grad():
for data, target in loader:
data, target = data.to(device), target.to(device)
output = model(data)
loss += criterion(output, target).item()
pred = output.argmax(dim=1)
acc += pred.eq(target).sum().item()
loss /= len(loader)
acc = float(acc) / len(loader.dataset)
return loss, acc

def create_trained_model_and_metadata(model, train_loader, test_loader, epochs=10, metadata=None):
device_name = device("cuda" if cuda.is_available() else "cpu")
model.to(device_name)
model.train()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.8)
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []

for e in tqdm(range(epochs), desc="Training Progress"):
model.train()
train_acc, train_loss = 0.0, 0.0

for data, target in train_loader:
data, target = data.to(device_name, non_blocking=True), target.to(device_name, non_blocking=True)
optimizer.zero_grad()
output = model(data)

loss = criterion(output, target)
pred = output.argmax(dim=1) # for multi-class classification
train_acc += pred.eq(target).sum().item()

loss.backward()
optimizer.step()
train_loss += loss.item()

train_loss /= len(train_loader)
train_acc /= len(train_loader.dataset)

train_losses.append(train_loss)
train_accuracies.append(train_acc)

test_loss, test_acc = evaluate(model, test_loader, criterion, device_name)
test_losses.append(test_loss)
test_accuracies.append(test_acc)

# Move the model back to the CPU
model.to("cpu")
with open("target/target_model.pkl", "wb") as f:
save(model.state_dict(), f)

# Create metadata and store it
meta_data = {}
meta_data["train_indices"] = train_loader.dataset.indices
meta_data["test_indices"] = test_loader.dataset.indices
meta_data["num_train"] = len(meta_data["train_indices"])

# Write init params
meta_data["init_params"] = {}
for key, value in model.init_params.items():
meta_data["init_params"][key] = value

# read out optimizer parameters
meta_data["optimizer"] = {}
meta_data["optimizer"]["name"] = optimizer.__class__.__name__.lower()
meta_data["optimizer"]["lr"] = optimizer.param_groups[0].get("lr", 0)
meta_data["optimizer"]["weight_decay"] = optimizer.param_groups[0].get("weight_decay", 0)
meta_data["optimizer"]["momentum"] = optimizer.param_groups[0].get("momentum", 0)
meta_data["optimizer"]["dampening"] = optimizer.param_groups[0].get("dampening", 0)
meta_data["optimizer"]["nesterov"] = optimizer.param_groups[0].get("nesterov", False)

# read out criterion parameters
meta_data["loss"] = {}
meta_data["loss"]["name"] = criterion.__class__.__name__.lower()

meta_data["batch_size"] = train_loader.batch_size
meta_data["epochs"] = epochs
meta_data["train_acc"] = train_acc
meta_data["test_acc"] = test_acc
meta_data["train_loss"] = train_loss
meta_data["test_loss"] = test_loss
meta_data["dataset"] = "cifar10"

with open("target/model_metadata.pkl", "wb") as f:
pickle.dump(meta_data, f)

return train_accuracies, train_losses, test_accuracies, test_losses

0 comments on commit 28bfc48

Please sign in to comment.