Mimic_Hnadler, data_processeing, model and the main file

aidotse · Oct 8, 2024 · 5abcbda · 5abcbda
1 parent b1c9331
commit 5abcbda
Show file tree

Hide file tree

Showing 5 changed files with 647 additions and 0 deletions.
diff --git a/examples/mia/LOS/audit.yaml b/examples/mia/LOS/audit.yaml
@@ -0,0 +1,43 @@
+audit:  # Configurations for auditing
+  random_seed: 1234  # Integer specifying the random seed
+  attack_list:
+    rmia:
+      training_data_fraction: 0.5  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
+      attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack
+      num_shadow_models: 3 # Number of shadow models to train
+      online: True # perform online or offline attack
+      temperature: 2
+      gamma: 2.0
+      offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
+      offline_b: 0.66
+    qmia:
+      training_data_fraction: 1.0  # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor
+      epochs: 5  # Number of training epochs for quantile regression
+    population:
+      attack_data_fraction: 1.0  # Fraction of the auxilary dataset to use for this attack
+    lira:
+      training_data_fraction: 0.5  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
+      num_shadow_models: 8 # Number of shadow models to train
+      online: False # perform online or offline attack
+      fixed_variance: True # Use a fixed variance for the whole audit
+      boosting: True
+    loss_traj:
+      training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
+      number_of_traj: 10 # Number of epochs (number of points in the loss trajectory)
+      label_only: False # True or False
+      mia_classifier_epochs: 100
+
+  output_dir: "./leakpro_output"
+  attack_type: "mia" #mia, gia
+
+target:
+  # Target model path
+  module_path: "utils/model.py"
+  model_class: "MimicLR" 
+  # Data paths
+  target_folder: "./target"
+  data_path: "./data/dataset.pkl"
+
+shadow_model:
+
+distillation_model:
diff --git a/examples/mia/LOS/mimic_handler.py b/examples/mia/LOS/mimic_handler.py
@@ -0,0 +1,65 @@
+
+import torch
+from torch import cuda, device, optim, sigmoid
+from torch.nn import BCELoss
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+from leakpro import AbstractInputHandler
+
+class MimicInputHandler(AbstractInputHandler):
+    """Class to handle the user input for the CIFAR10 dataset."""
+
+    def __init__(self, configs: dict) -> None:
+        super().__init__(configs = configs)
+
+
+    def get_criterion(self)->None:
+        """Set the CrossEntropyLoss for the model."""
+        return BCELoss()
+
+    def get_optimizer(self, model:torch.nn.Module) -> None:
+        """Set the optimizer for the model."""
+        learning_rate = 0.1
+        momentum = 0.8
+        return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
+
+    def train(
+        self,
+        dataloader: DataLoader,
+        model: torch.nn.Module = None,
+        criterion: torch.nn.Module = None,
+        optimizer: optim.Optimizer = None,
+        epochs: int = None,
+    ) -> dict:
+        """Model training procedure."""
+
+        compute_device = device("cuda" if cuda.is_available() else "cpu")
+        model.to(compute_device)
+        model.train()
+
+        criterion = self.get_criterion()
+        optimizer = self.get_optimizer(model)
+
+        for e in tqdm(range(epochs), desc="Training Progress"):
+            model.train()
+            train_acc, train_loss = 0.0, 0.0
+
+            for data, target in dataloader:
+                target = target.float().unsqueeze(1)
+                data, target = data.to(compute_device, non_blocking=True), target.to(compute_device, non_blocking=True)
+                optimizer.zero_grad()
+                output = model(data)
+
+                loss = criterion(output, target)
+                pred = sigmoid(output) >= 0.5
+                train_acc += pred.eq(target).sum().item()
+
+                loss.backward()
+                optimizer.step()
+                train_loss += loss.item()
+
+        train_acc = train_acc/len(dataloader.dataset)
+        train_loss = train_loss/len(dataloader)
+
+        return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}}
diff --git a/examples/mia/LOS/mimic_main.ipynb b/examples/mia/LOS/mimic_main.ipynb
@@ -0,0 +1,133 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "project_root = os.path.abspath(os.path.join(os.getcwd(), \"../../..\"))\n",
+    "sys.path.append(project_root)\n",
+    "\n",
+    "from examples.mia.LOS.utils.data_processing import get_mimic_dataset, get_mimic_dataloaders\n",
+    "from examples.mia.LOS.utils.model import MimicLR, create_trained_model_and_metadata\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "get_mimic_dataset() got an unexpected keyword argument 'train_fraction'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Generate the dataset and dataloaders\u001b[39;00m\n\u001b[1;32m      2\u001b[0m path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(os\u001b[38;5;241m.\u001b[39mgetcwd(), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata/\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m dataset, train_indices, test_indices\u001b[38;5;241m=\u001b[39m get_mimic_dataset(path, train_fraction \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.5\u001b[39m, test_fraction \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.2\u001b[39m)\n",
+      "\u001b[0;31mTypeError\u001b[0m: get_mimic_dataset() got an unexpected keyword argument 'train_fraction'"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# Generate the dataset and dataloaders\n",
+    "path = os.path.join(os.getcwd(), \"data/\")\n",
+    "\n",
+    "dataset, train_indices, test_indices= get_mimic_dataset(path, train_fraction = 0.5, test_fraction = 0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "train_loader, test_loader= get_mimic_dataloaders(dataset, train_indices, test_indices, batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_features = dataset.x.shape[1]\n",
+    "print(f\"Number of features: {n_features}\")\n",
+    "\n",
+    "# Train the model\n",
+    "if not os.path.exists(\"target\"):\n",
+    "    os.makedirs(\"target\")\n",
+    "model = MimicLR(n_features)\n",
+    "train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model, \n",
+    "                                                                               train_loader, \n",
+    "                                                                               test_loader, \n",
+    "                                                                               lr = 0.0001,\n",
+    "                                                                                weight_decay = 5.392,\n",
+    "                                                                               epochs=2)\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mimic_handler import MimicInputHandler\n",
+    "\n",
+    "from leakpro import LeakPro\n",
+    "\n",
+    "# Read the config file\n",
+    "config_path = \"audit.yaml\"\n",
+    "\n",
+    "# Prepare leakpro object\n",
+    "leakpro = LeakPro(MimicInputHandler, config_path)\n",
+    "\n",
+    "# Run the audit \n",
+    "leakpro.run_audit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "leakpro_test",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}