aidotse · fazelehh · Jan 19, 2025 · Nov 22, 2024 · Nov 22, 2024 · Nov 22, 2024
diff --git a/examples/mia/celebA_HQ/.gitignore b/examples/mia/celebA_HQ/.gitignore
@@ -0,0 +1,5 @@
+data/*
+!data/.gitkeep
+
+leakpro_output/*
+target/*
diff --git a/examples/mia/celebA_HQ/audit.yaml b/examples/mia/celebA_HQ/audit.yaml
@@ -0,0 +1,29 @@
+
+audit:  # Configurations for auditing
+  random_seed: 1234  # Integer specifying the random seed
+  attack_list:
+    rmia:
+      training_data_fraction: 0.5  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
+      attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack
+      num_shadow_models: 3 # Number of shadow models to train
+      online: True # perform online or offline attack
+      temperature: 2
+      gamma: 2.0
+      offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
+      offline_b: 0.66
+
+  output_dir: "./leakpro_output"
+  attack_type: "mia" #mia, gia
+  modality: "image" #image, tabular
+
+target:
+  # Target model path
+  module_path: "./utils/celeb_hq_model.py"
+  model_class: "ResNet18" 
+  # Data paths
+  target_folder: "./target"
+  data_path: "./data/celeba_hq_data.pkl"
+
+shadow_model:
+
+distillation_model:
diff --git a/examples/mia/celebA_HQ/celebA_HQ_handler.py b/examples/mia/celebA_HQ/celebA_HQ_handler.py
@@ -0,0 +1,83 @@
+import torch
+from torch import cuda, device, optim
+from torch.nn import CrossEntropyLoss
+from torch.utils.data import DataLoader
+from torchvision import transforms, datasets
+from tqdm import tqdm
+from leakpro import AbstractInputHandler 
+
+class CelebAHQInputHandler(AbstractInputHandler):
+    """Class to handle the user input for the CelebA_HQ dataset."""
+
+    def __init__(self, configs: dict) -> None:
+        super().__init__(configs=configs)
+        print("Configurations:", configs)
+
+    def get_criterion(self) -> torch.nn.Module:
+        """Set the CrossEntropyLoss for the model."""
+        return CrossEntropyLoss()
+
+    def get_optimizer(self, model: torch.nn.Module) -> optim.Optimizer:
+        """Set the optimizer for the model."""
+        learning_rate =0.01  # Default learning rate
+        momentum =  0.9  # Default momentum
+        weight_decay = 0.0001  # Default weight decay
+        return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
+
+    def train(
+        self,
+        dataloader: DataLoader,
+        model: torch.nn.Module,
+        criterion: torch.nn.Module,
+        optimizer: optim.Optimizer,
+        epochs: int,
+    ) -> dict:
+        """Model training procedure."""
+
+        if not epochs:
+            raise ValueError("Epochs not found in configurations")
+
+        gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu")
+        model.to(gpu_or_cpu)
+
+        for epoch in range(epochs):
+            train_loss, train_acc = 0.0, 0
+            model.train()
+            for inputs, labels in tqdm(dataloader, desc="Training Progress"):
+                inputs, labels = inputs.to(gpu_or_cpu), labels.to(gpu_or_cpu)
+                optimizer.zero_grad()
+
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+                loss.backward()
+                optimizer.step()
+
+                # Performance metrics
+                preds = outputs.argmax(dim=1)
+                train_acc += (preds == labels).sum().item()
+                train_loss += loss.item()
+
+        model.to("cpu")
+
+        return {"model": model, "metrics": {"accuracy": train_acc / len(dataloader.dataset), "loss": train_loss}}
+
+    def evaluate(self, dataloader: DataLoader, model: torch.nn.Module, criterion: torch.nn.Module) -> dict:
+        """Evaluate the model."""
+        gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu")
+        model.to(gpu_or_cpu)
+        model.eval()
+
+        test_loss, test_acc = 0.0, 0
+        with torch.no_grad():
+            for inputs, labels in tqdm(dataloader, desc="Evaluating"):
+                inputs, labels = inputs.to(gpu_or_cpu), labels.to(gpu_or_cpu)
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+
+                preds = outputs.argmax(dim=1)
+                test_acc += (preds == labels).sum().item()
+                test_loss += loss.item()
+
+        model.to("cpu")
+
+        return {"accuracy": test_acc / len(dataloader.dataset), "loss": test_loss}
diff --git a/examples/mia/celebA_HQ/main_celebA_hq.ipynb b/examples/mia/celebA_HQ/main_celebA_hq.ipynb
@@ -0,0 +1,264 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Face Identity Classification\n",
+    "This example illustrates MIA attacks on a face identity classfier model. The classfier is train on [CelebA Mask HQ](https://github.com/switchablenorms/CelebAMask-HQ) dataset which should be downloaded, decompressed, and placed in the ```data``` directory as follows:\n",
+    "\n",
+    "directory_structure:\n",
+    "\n",
+    "```\n",
+    "data/\n",
+    "    ├── train/\n",
+    "    │   ├── identity 1/\n",
+    "    │   ├── identity 2/\n",
+    "    │   └── ...\n",
+    "    └── test/\n",
+    "        ├── identity 1/\n",
+    "        ├── identity 2/\n",
+    "        └── ...\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset already exists.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "# Path to the dataset zip file\n",
+    "zip_path = \"./data/CelebA_HQ_facial_identity_dataset.zip\"\n",
+    "\n",
+    "# Check if the file already exists\n",
+    "if not os.path.exists(zip_path):\n",
+    "    print(\"Dataset not found. Downloading...\")\n",
+    "    !wget -O {zip_path} https://postechackr-my.sharepoint.com/:u:/g/personal/dongbinna_postech_ac_kr/ES-jbCNC6mNHhCyR4Nl1QpYBlxVOJ5YiVerhDpzmoS9ezA?download=1 \n",
+    "    !unzip {zip_path} -d ./data\n",
+    "    print(\"Download and extraction completed.\")\n",
+    "else:\n",
+    "    print(\"Dataset already exists.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train the Identity Classifier Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import yaml\n",
+    "\n",
+    "project_root = os.path.abspath(os.path.join(os.getcwd(), \"../../..\"))\n",
+    "sys.path.append(project_root)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "vscode": {
+     "languageId": "bat"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/home/fazeleh/LeakPro/examples/mia/celebA_HQ'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pwd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'examples.mia'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexamples\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmia\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcelebA_HQ\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mceleb_hq_data\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_celebA_hq_dataloader\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexamples\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmia\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcelebA_HQ\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mceleb_hq_model\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ResNet18, create_trained_model_and_metadata\n\u001b[1;32m      4\u001b[0m \u001b[38;5;66;03m# Load the config.yaml file\u001b[39;00m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'examples.mia'"
+     ]
+    }
+   ],
+   "source": [
+    "from examples.mia.celebA_HQ.utils.celeb_hq_data import get_celebA_hq_dataloader\n",
+    "from examples.mia.celebA_HQ.utils.celeb_hq_model import ResNet18, create_trained_model_and_metadata\n",
+    "\n",
+    "# Load the config.yaml file\n",
+    "with open('train_config.yaml', 'r') as file:\n",
+    "    train_config = yaml.safe_load(file)\n",
+    "\n",
+    "# Generate the dataset and dataloaders\n",
+    "path = os.path.join(os.getcwd(), train_config[\"data\"][\"data_dir\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_loader, test_loader = get_celebA_hq_dataloader(path, train_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_classes =307\n",
+    "\n",
+    "model = ResNet18(num_classes = num_classes)\n",
+    "train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model,train_loader,test_loader, train_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Plot training and test accuracy\n",
+    "plt.figure(figsize=(5, 4))\n",
+    "\n",
+    "plt.subplot(1, 2, 1)\n",
+    "plt.plot(train_acc, label='Train Accuracy')\n",
+    "plt.plot(test_acc, label='Test Accuracy')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Accuracy')\n",
+    "plt.title('Accuracy over Epochs')\n",
+    "plt.legend()\n",
+    "\n",
+    "# Plot training and test loss\n",
+    "plt.subplot(1, 2, 2)\n",
+    "plt.plot(train_loss, label='Train Loss')\n",
+    "plt.plot(test_loss, label='Test Loss')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Loss')\n",
+    "plt.title('Loss over Epochs')\n",
+    "plt.legend()\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### MIA Attacks on Identifier Model "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from celebA_HQ_handler import CelebAHQInputHandler\n",
+    "\n",
+    "from leakpro import LeakPro\n",
+    "\n",
+    "# Read the config file\n",
+    "config_path = \"audit.yaml\"\n",
+    "\n",
+    "# Prepare leakpro object\n",
+    "leakpro = LeakPro(CelebAHQInputHandler, config_path)\n",
+    "\n",
+    "# Run the audit \n",
+    "mia_results = leakpro.run_audit(return_results=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Report Generating"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import and initialize ReportHandler\n",
+    "from leakpro.reporting.report_handler import ReportHandler\n",
+    "\n",
+    "# report_handler = ReportHandler()\n",
+    "report_handler = ReportHandler(report_dir=\"./leakpro_output/results\")\n",
+    "\n",
+    "# Save MIA resuls using report handler\n",
+    "for res in mia_results:\n",
+    "    report_handler.save_results(attack_name=res.attack_name, result_data=res, config=res.configs)\n",
+    "\n",
+    "# # Create the report by compiling the latex text\n",
+    "report_handler.create_report()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "leakpro_test",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}