aidotse · johanos1 · Jun 10, 2024 · May 3, 2024 · May 3, 2024 · May 7, 2024
diff --git a/config/audit.yaml b/config/audit.yaml
@@ -2,31 +2,26 @@ audit:  # Configurations for auditing
   random_seed: 1234  # Integer specifying the random seed
   attack_list:
     rmia:
-      training_data_fraction: 0.4  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
-      attack_data_fraction: 0.1 # Fraction of auxiliary dataset to sample from during attack
-      num_shadow_models: 8 # Number of shadow models to train
+      training_data_fraction: 0.1  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
+      attack_data_fraction: 0.025 # Fraction of auxiliary dataset to sample from during attack
+      num_shadow_models: 2 # Number of shadow models to train
       online: False # perform online or offline attack
-      temperature: 2
-      gamma: 2.0
-      offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
-      offline_b: 0.66
     qmia:
       training_data_fraction: 0.5  # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor
       epochs: 5  # Number of training epochs for quantile regression
     population:
       attack_data_fraction: 0.1  # Fraction of the auxilary dataset to use for this attack
-    loss_traj:
-      training_distill_data_fraction : 0.2 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
-      number_of_traj: 20 # Number of epochs (number of points in the loss trajectory)
-      attack_mode: "soft_label" # label_only, soft_label
-      attack_data_dir: "./leakpro_output/attack_objects/loss_traj"
-      mia_classifier_epochs: 100
     lira:
-      training_data_fraction: 0.4  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
+      training_data_fraction: 0.1  # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
       num_shadow_models: 8 # Number of shadow models to train
-      online: false # perform online or offline attack
+      online: False # perform online or offline attack
       fixed_variance: True # Use a fixed variance for the whole audit
-
+    loss_traj:
+      training_distill_data_fraction : 0.2 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
+      number_of_traj: 1 # Number of epochs (number of points in the loss trajectory)
+      label_only: "False" # True or False
+      attack_data_dir: "./leakpro_output/attack_objects/loss_traj"
+      mia_classifier_epochs: 10
 
   report_log: "./leakpro_output/results"  # Folder to save the auditing report
   config_log: "./leakpro_output/config"  # Folder to save the configuration files
@@ -36,21 +31,21 @@ audit:  # Configurations for auditing
   split_method: "no_overlapping"  # Method of creating the attack dataset
 
 target:
+  # Target model path
   module_path: "./leakpro/shadow_model_blueprints.py"
   model_class: "ResNet18"
+  # Data paths
   trained_model_path: "./target/target_model.pkl" 
   trained_model_metadata_path: "./target/model_metadata.pkl"
   data_path: "./target/data/cinic10.pkl"
 
-
 shadow_model:
   storage_path: "./leakpro_output/attack_objects/shadow_models"
-  # Path to a Python file with the shadow model architecture
-  module_path: "./leakpro/shadow_model_blueprints.py"
-  #
   # [Optional] Define a shadow model (if none, shadow model will follow the target model)
+  # Path to a Python file with the shadow model architecture
+  #module_path: "./leakpro/shadow_model_blueprints.py"
   # Name of the class to instantiate from the specified file
-  model_class_path: "ResNet18" #"ConvNet"
+  model_class: "ResNet18" #"ConvNet"
   optimizer: 
     name: sgd #adam, sgd, rmsprop
     lr: 0.01
@@ -61,10 +56,10 @@ shadow_model:
   # Initialization parameters
   init_params: {}
 
-distillation_target_model:
-  storage_path: "./leakpro_output/attack_objects/distillation_target_models"
-  module_path: "./leakpro/shadow_model_blueprints.py"
-  # model_class: "ConvNet"
+distillation_model:
+  storage_path: "./leakpro_output/attack_objects/distillation_models"
+  #module_path: "./leakpro/shadow_model_blueprints.py"
+  #model_class: "ConvNet"
   optimizer: 
     name: sgd #adam, sgd, rmsprop
     lr: 0.01
@@ -74,6 +69,9 @@ distillation_target_model:
     name: crossentropyloss # crossentropyloss, nllloss, mseloss
   # Initialization parameters
   init_params: {}
+<<<<<<< HEAD
+
+=======
   trained_model_path: "./leakpro_output/attack_objects/distillation_target_models/distillation_model.pkl"
   trained_model_metadata_path: "./leakpro_output/attack_objects/distillation_target_models/model_metadata.pkl"
   data_path: "./leakpro_output/attack_objects/distillation_target_models/cinic10.pkl"
@@ -94,5 +92,6 @@ distillation_shadow_model:
   trained_model_path: "./leakpro_output/attack_objects/distillation_shadow_models/distillation_model.pkl"
   trained_model_metadata_path: "./leakpro_output/attack_objects/distillation_shadow_models/model_metadata.pkl"
   data_path: "./leakpro_output/attack_objects/distillation_shadow_models/cinic10.pkl"
+>>>>>>> main
 
 
diff --git a/leakpro.py b/leakpro.py
@@ -6,10 +6,9 @@
 import time
 from pathlib import Path
 
-import joblib
 import numpy as np
 import yaml
-from torch import load, manual_seed
+from torch import manual_seed
 from torch.utils.data import Subset
 
 import leakpro.dev_utils.train as utils
@@ -23,7 +22,7 @@
     prepare_train_test_datasets,
 )
 from leakpro.reporting.utils import prepare_priavcy_risk_report
-from leakpro.utils.input_handler import get_class_from_module, import_module_from_file
+from leakpro.user_inputs.cifar10_input_handler import Cifar10InputHandler
 
 
 def setup_log(name: str, save_file: bool=True) -> logging.Logger:
@@ -96,16 +95,13 @@ def generate_user_input(configs: dict, logger: logging.Logger)->None:
 
 if __name__ == "__main__":
 
-
-    #args = "./config/adult.yaml"  # noqa: ERA001
-    # user_args = "./config/dev_config/cifar10.yaml" # noqa: ERA001
     user_args = "./config/dev_config/cinic10.yaml" # noqa: ERA001
 
     with open(user_args, "rb") as f:
         user_configs = yaml.safe_load(f)
 
     # Setup logger
-    logger = setup_log("analysis")
+    logger = setup_log("LeakPro", save_file=True)
 
     # Generate user input
     generate_user_input(user_configs, logger) # This is for developing purposes only
@@ -126,41 +122,10 @@ def generate_user_input(configs: dict, logger: logging.Logger)->None:
     report_dir = f"{configs['audit']['report_log']}"
     Path(report_dir).mkdir(parents=True, exist_ok=True)
 
-    # Get the target  metadata
-    target_model_metadata_path = f'{configs["target"]["trained_model_metadata_path"]}'
-    try:
-        with open(target_model_metadata_path, "rb") as f:
-            target_model_metadata = joblib.load(f)
-    except FileNotFoundError:
-        logger.error(f"Could not find the target model metadata at {target_model_metadata_path}")
-
-    # Create a class instance of target model
-    target_module = import_module_from_file(configs["target"]["module_path"])
-    target_model_blueprint = get_class_from_module(target_module, configs["target"]["model_class"])
-    logger.info(f"Target model blueprint created from {configs['target']['model_class']} in {configs['target']['module_path']}")
-
-    # Load the target model parameters into the blueprint
-    with open(configs["target"]["trained_model_path"], "rb") as f:
-        target_model = target_model_blueprint(**target_model_metadata["model_metadata"]["init_params"])
-        target_model.load_state_dict(load(f))
-        logger.info(f"Loaded target model from {configs['target']['trained_model_path']}")
-
-    # Get the population dataset
-    try:
-        with open(configs["target"]["data_path"], "rb") as file:
-            population = joblib.load(file)
-            logger.info(f"Loaded population dataset from {configs['target']['data_path']}")
-    except FileNotFoundError:
-        logger.error(f"Could not find the population dataset at {configs['target']['data_path']}")
-    # ------------------------------------------------
-    # Now we have the target model, its metadata, and the train/test dataset indices.
-    attack_scheduler = AttackScheduler(
-        population,
-        target_model,
-        target_model_metadata["model_metadata"],
-        configs,
-        logger,
-    )
+    # Create user input handler
+    handler = Cifar10InputHandler(configs=configs, logger=logger)
+
+    attack_scheduler = AttackScheduler(handler)
     audit_results = attack_scheduler.run_attacks()
 
     for attack_name in audit_results:

diff --git a/leakpro/attacks/attack_scheduler.py b/leakpro/attacks/attack_scheduler.py
@@ -1,12 +1,9 @@
 """Module that contains the AttackScheduler class, which is responsible for creating and executing attacks."""
-import logging
-
-from torch import nn
 
 from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA
 from leakpro.attacks.mia_attacks.attack_factory_mia import AttackFactoryMIA
-from leakpro.dataset import GeneralDataset
 from leakpro.import_helper import Any, Dict, Self
+from leakpro.user_inputs.abstract_input_handler import AbstractInputHandler
 
 
 class AttackScheduler:
@@ -16,47 +13,38 @@ class AttackScheduler:
 
     def __init__(
         self:Self,
-        population:GeneralDataset,
-        target_model:nn.Module,
-        target_model_metadata:Dict[str, Any],
-        configs:Dict[str, Any],
-        logger:logging.Logger
+        handler: AbstractInputHandler,
     ) -> None:
         """Initialize the AttackScheduler class.
 
         Args:
         ----
-            population (GeneralDataset): The population dataset.
-            target_model (torch.nn.Module): The target model.
-            target_model_metadata (Dict[str, Any]): The metadata of the target model.
-            configs (Dict[str, Any]): The configurations.
-            logger (logging.Logger): The logger object.
+            handler (AbstractInputHandler): The handler object that contains the user inputs.
 
         """
+        configs = handler.configs
         if configs["audit"]["attack_type"] not in list(self.attack_type_to_factory.keys()):
             raise ValueError(
                 f"Unknown attack type: {configs['audit']['attack_type']}. "
                 f"Supported attack types: {self.attack_type_to_factory.keys()}"
             )
 
-        # Prepare factory with shared items
+        # Prepare factory
         factory = self.attack_type_to_factory[configs["audit"]["attack_type"]]
-        factory.set_population_and_audit_data(population,target_model_metadata)
-        factory.set_target_model_and_loss(target_model, nn.CrossEntropyLoss()) #TODO: Enable arbitrary loss functions
-        factory.set_logger(logger)
 
-        self.logger = logger
+        self.logger = handler.logger
 
         # Create the attacks
         self.attack_list = list(configs["audit"]["attack_list"].keys())
         self.attacks = []
         for attack_name in self.attack_list:
             try:
-                attack = factory.create_attack(attack_name, configs)
+                attack = factory.create_attack(attack_name, handler)
                 self.add_attack(attack)
                 self.logger.info(f"Added attack: {attack_name}")
             except ValueError as e:
-                logger.info(e)
+                self.logger.info(e)
+                self.logger.info(f"Failed to create attack: {attack_name}, supported attacks: {factory.attack_classes.keys()}")
 
     def add_attack(self:Self, attack: AbstractMIA) -> None:
         """Add an attack to the list of attacks."""
@@ -77,7 +65,7 @@ def run_attacks(self:Self) -> Dict[str, Any]:
             self.logger.info(f"Finished attack: {attack_type}")
         return results
 
-    def identify_attacks(self:Self) -> None:
+    def map_setting_to_attacks(self:Self) -> None:
         """Identify relevant attacks based on adversary setting."""
         # TODO: Implement this mapping and remove attack list from configs
         pass