Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

celebA MIA #175

Merged
merged 17 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions examples/mia/celebA_HQ/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
data/*
!data/.gitkeep

leakpro_output/*
target/*
29 changes: 29 additions & 0 deletions examples/mia/celebA_HQ/audit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

audit: # Configurations for auditing
random_seed: 1234 # Integer specifying the random seed
attack_list:
rmia:
training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack
num_shadow_models: 3 # Number of shadow models to train
online: True # perform online or offline attack
temperature: 2
gamma: 2.0
offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
offline_b: 0.66

output_dir: "./leakpro_output"
attack_type: "mia" #mia, gia
modality: "image" #image, tabular

target:
# Target model path
module_path: "./utils/celeb_hq_model.py"
model_class: "ResNet18"
# Data paths
target_folder: "./target"
data_path: "./data/celeba_hq_data.pkl"

shadow_model:

distillation_model:
83 changes: 83 additions & 0 deletions examples/mia/celebA_HQ/celebA_HQ_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import torch
from torch import cuda, device, optim
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from tqdm import tqdm
from leakpro import AbstractInputHandler

class CelebAHQInputHandler(AbstractInputHandler):
"""Class to handle the user input for the CelebA_HQ dataset."""

def __init__(self, configs: dict) -> None:
super().__init__(configs=configs)
print("Configurations:", configs)

def get_criterion(self) -> torch.nn.Module:
"""Set the CrossEntropyLoss for the model."""
return CrossEntropyLoss()

def get_optimizer(self, model: torch.nn.Module) -> optim.Optimizer:
"""Set the optimizer for the model."""
learning_rate =0.01 # Default learning rate
momentum = 0.9 # Default momentum
weight_decay = 0.0001 # Default weight decay
return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

johanos1 marked this conversation as resolved.
Show resolved Hide resolved
def train(
self,
dataloader: DataLoader,
model: torch.nn.Module,
criterion: torch.nn.Module,
optimizer: optim.Optimizer,
epochs: int,
) -> dict:
"""Model training procedure."""

if not epochs:
raise ValueError("Epochs not found in configurations")

gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu")
model.to(gpu_or_cpu)

for epoch in range(epochs):
train_loss, train_acc = 0.0, 0
model.train()
for inputs, labels in tqdm(dataloader, desc="Training Progress"):
inputs, labels = inputs.to(gpu_or_cpu), labels.to(gpu_or_cpu)
optimizer.zero_grad()

outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

# Performance metrics
preds = outputs.argmax(dim=1)
train_acc += (preds == labels).sum().item()
train_loss += loss.item()

model.to("cpu")

return {"model": model, "metrics": {"accuracy": train_acc / len(dataloader.dataset), "loss": train_loss}}

def evaluate(self, dataloader: DataLoader, model: torch.nn.Module, criterion: torch.nn.Module) -> dict:
"""Evaluate the model."""
gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu")
model.to(gpu_or_cpu)
model.eval()

test_loss, test_acc = 0.0, 0
with torch.no_grad():
for inputs, labels in tqdm(dataloader, desc="Evaluating"):
inputs, labels = inputs.to(gpu_or_cpu), labels.to(gpu_or_cpu)
outputs = model(inputs)
loss = criterion(outputs, labels)

preds = outputs.argmax(dim=1)
test_acc += (preds == labels).sum().item()
test_loss += loss.item()

model.to("cpu")

return {"accuracy": test_acc / len(dataloader.dataset), "loss": test_loss}
264 changes: 264 additions & 0 deletions examples/mia/celebA_HQ/main_celebA_hq.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Face Identity Classification\n",
"This example illustrates MIA attacks on a face identity classfier model. The classfier is train on [CelebA Mask HQ](https://github.com/switchablenorms/CelebAMask-HQ) dataset which should be downloaded, decompressed, and placed in the ```data``` directory as follows:\n",
"\n",
"directory_structure:\n",
"\n",
"```\n",
"data/\n",
" ├── train/\n",
" │ ├── identity 1/\n",
" │ ├── identity 2/\n",
" │ └── ...\n",
" └── test/\n",
" ├── identity 1/\n",
" ├── identity 2/\n",
" └── ...\n",
"```\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
johanos1 marked this conversation as resolved.
Show resolved Hide resolved
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset already exists.\n"
]
}
],
"source": [
"import os\n",
"\n",
"# Path to the dataset zip file\n",
"zip_path = \"./data/CelebA_HQ_facial_identity_dataset.zip\"\n",
"\n",
"# Check if the file already exists\n",
"if not os.path.exists(zip_path):\n",
" print(\"Dataset not found. Downloading...\")\n",
" !wget -O {zip_path} https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/ES-jbCNC6mNHhCyR4Nl1QpYBlxVOJ5YiVerhDpzmoS9ezA?download=1 \n",
" !unzip {zip_path} -d ./data\n",
" print(\"Download and extraction completed.\")\n",
"else:\n",
" print(\"Dataset already exists.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train the Identity Classifier Model"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import yaml\n",
"\n",
"project_root = os.path.abspath(os.path.join(os.getcwd(), \"../../..\"))\n",
"sys.path.append(project_root)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"vscode": {
"languageId": "bat"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'/home/fazeleh/LeakPro/examples/mia/celebA_HQ'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pwd"
johanos1 marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'examples.mia'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexamples\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmia\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcelebA_HQ\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mceleb_hq_data\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_celebA_hq_dataloader\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexamples\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmia\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcelebA_HQ\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mceleb_hq_model\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ResNet18, create_trained_model_and_metadata\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Load the config.yaml file\u001b[39;00m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'examples.mia'"
]
}
],
"source": [
"from examples.mia.celebA_HQ.utils.celeb_hq_data import get_celebA_hq_dataloader\n",
"from examples.mia.celebA_HQ.utils.celeb_hq_model import ResNet18, create_trained_model_and_metadata\n",
"\n",
"# Load the config.yaml file\n",
"with open('train_config.yaml', 'r') as file:\n",
" train_config = yaml.safe_load(file)\n",
"\n",
"# Generate the dataset and dataloaders\n",
"path = os.path.join(os.getcwd(), train_config[\"data\"][\"data_dir\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_loader, test_loader = get_celebA_hq_dataloader(path, train_config)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"num_classes =307\n",
"\n",
"model = ResNet18(num_classes = num_classes)\n",
"train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model,train_loader,test_loader, train_config)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Plot training and test accuracy\n",
"plt.figure(figsize=(5, 4))\n",
"\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(train_acc, label='Train Accuracy')\n",
"plt.plot(test_acc, label='Test Accuracy')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Accuracy')\n",
"plt.title('Accuracy over Epochs')\n",
"plt.legend()\n",
"\n",
"# Plot training and test loss\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(train_loss, label='Train Loss')\n",
"plt.plot(test_loss, label='Test Loss')\n",
"plt.xlabel('Epoch')\n",
"plt.ylabel('Loss')\n",
"plt.title('Loss over Epochs')\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MIA Attacks on Identifier Model "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from celebA_HQ_handler import CelebAHQInputHandler\n",
"\n",
"from leakpro import LeakPro\n",
"\n",
"# Read the config file\n",
"config_path = \"audit.yaml\"\n",
"\n",
"# Prepare leakpro object\n",
"leakpro = LeakPro(CelebAHQInputHandler, config_path)\n",
"\n",
"# Run the audit \n",
"mia_results = leakpro.run_audit(return_results=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Report Generating"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import and initialize ReportHandler\n",
"from leakpro.reporting.report_handler import ReportHandler\n",
"\n",
"# report_handler = ReportHandler()\n",
"report_handler = ReportHandler(report_dir=\"./leakpro_output/results\")\n",
"\n",
"# Save MIA resuls using report handler\n",
"for res in mia_results:\n",
" report_handler.save_results(attack_name=res.attack_name, result_data=res, config=res.configs)\n",
"\n",
"# # Create the report by compiling the latex text\n",
"report_handler.create_report()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "leakpro_test",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading
Loading