Skip to content
This repository has been archived by the owner on Aug 31, 2024. It is now read-only.

Commit

Permalink
Merge pull request #33 from iotcad/config
Browse files Browse the repository at this point in the history
  • Loading branch information
nkrusch authored Mar 25, 2023
2 parents 943de18 + 214180a commit c808566
Show file tree
Hide file tree
Showing 12 changed files with 188 additions and 82 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ T_ROBUST := --robust
F_ROBUST :=

ALWAYS := --resume
IOT_OPTIONS := --validator IOT23
NB_OPTIONS := --validator NB15
IOT_OPTIONS := --validator IOT23 --config config/iot.yaml
NB_OPTIONS := --validator NB15 --config config/unsw.yaml

DS_1 := -d ./data/CTU.csv $(IOT_OPTIONS)
DS_2 := -d ./data/nb15.csv $(NB_OPTIONS)
Expand All @@ -47,7 +47,7 @@ sample:
@$(foreach i, $(ITERS), $(foreach c, $(CLS), $(foreach r, $(ROBUST), \
$(foreach attack, $(ATTACKS), \
python3 -m src experiment $(ALWAYS) -a $(attack) $(DS_2) $($(r)) \
--iter 0 -s 50 -t 3 -c $(c) ; ))))
--iter 0 -s 50 -t 3 -c $(c) ; ))))

fast:
@$(foreach r, $(ROBUST), $(foreach c, $(CLS), $(foreach attack, $(ATTACKS), \
Expand Down
54 changes: 54 additions & 0 deletions config/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
hsj:
# Attack instance parameters
# https://adversarial-robustness-toolbox.readthedocs.io/en/latest/modules/attacks/evasion.html#hopskipjump-attack
# size of the batch used by the estimator during inference.
batch_size: 64
# Maximum number of evaluations for estimating gradient.
max_eval: 1000
# Initial number of evaluations for estimating gradient.
init_eval: 100
# Maximum number of trials for initial generation of adversarial examples.
init_size: 100
zoo: # Attack instance parameters
# https://adversarial-robustness-toolbox.readthedocs.io/en/latest/modules/attacks/evasion.html#zeroth-order-optimization-zoo-attack
# Confidence of adversarial examples: a higher value produces examples that are farther away, from the original input,
# but classified with higher confidence as the target class.
confidence: 0.25
# The initial learning rate for the attack algorithm. Smaller values produce better results but are slower to
# converge.
learning_rate: 0.1
# Number of times to adjust constant with binary search (positive value).
binary_search_steps: 10
# The initial trade-off constant c to use to tune the relative importance of distance and confidence. If
# binary_search_steps is large, the initial constant is not important, as discussed in Carlini and Wagner (2016).
initial_const: 0.001
# True if gradient descent should be abandoned when it gets stuck.
abort_early: True
# True if to use the resizing strategy from the paper: first, compute attack on inputs resized to 32x32, then increase
# size if needed to 64x64, followed by 128x128.
use_resize: False
# True if to use importance sampling when choosing coordinates to update.
use_importance: False
# Step size for numerical estimation of derivatives.
variable_h: 0.3
xgb: # Tree booster params
# <https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster>
# Step size shrinkage used in update to prevents overfitting.
eta: 0.3
# Minimum loss reduction required to make a further partition on a leaf node of the tree.
gamma: 0
# Maximum depth of a tree
max_depth: 6
# Minimum sum of instance weight (hessian) needed in a child
min_child_weight: 1
dnn:
# Keras model: https://keras.io/guides/sequential_model/
model:
# hidden layers
layers: [ 60 ]
# Keras model training args
model_fit:
# model training epochs
epochs: 80
# batch size
batch_size: 64
3 changes: 3 additions & 0 deletions config/iot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dnn:
model:
layers: [ 60 ]
3 changes: 3 additions & 0 deletions config/unsw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dnn:
model:
layers: [ 60, 60, 60, 60, 60 ]
15 changes: 14 additions & 1 deletion src/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
"""
import logging
import yaml

from argparse import ArgumentParser
from pathlib import Path
from sys import exit
from typing import Optional, List

Expand Down Expand Up @@ -54,7 +56,12 @@ def main():
args.validator, args.dataset, args.capture, args.out)

if is_exp:
Experiment(utility.ts_str(), **args.__dict__).run()
df_args = yaml.safe_load(
Path(Experiment.DEFAULT_CF).read_text())
ex_args = yaml.safe_load(
Path(args.config).read_text()) if args.config else {}
c_args = {**df_args, **ex_args}
Experiment(utility.ts_str(), c_args, **args.__dict__).run()


def init_logger(level: int, fn: str = None):
Expand Down Expand Up @@ -182,6 +189,12 @@ def exp_args(parser: ArgumentParser):
action='store_true',
help="disable console log"
)
parser.add_argument(
'--config',
action="store",
default=None,
help=f'path to config file [default: None]',
)


def validator_args(parser: ArgumentParser):
Expand Down
10 changes: 7 additions & 3 deletions src/attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
class Attack:
"""Attack base class defines common functionality"""

def __init__(self, name, def_iter, validator, uuid, save, iters, silent):
def __init__(self, name, def_iter, validator, uuid, save, iters,
silent, attack_conf):
self.uuid = uuid
self.name = name
self.validator_kind = validator
Expand All @@ -26,6 +27,7 @@ def __init__(self, name, def_iter, validator, uuid, save, iters, silent):
self.valid_result = None
self.validation_reasons = None
self.reset()
self.attack_conf = attack_conf or {}

def reset(self):
self.cls = None
Expand Down Expand Up @@ -96,7 +98,8 @@ def adv_proto_valid(self):
def label_stats(self) -> dict:
labels = []
if self.use_validator and self.n_valid > 0:
labels = self.adv_y[self.idx_valid_evades].flatten().tolist()
labels = self.adv_y[
self.idx_valid_evades].flatten().tolist()
elif self.n_evasions > 0:
labels = self.adv_y[self.evasions].flatten().tolist()
return dict([(self.cls.text_label(c), labels.count(c))
Expand All @@ -106,7 +109,8 @@ def get_proto_stats(self, records) -> dict:
if not self.use_validator:
return {}
labels = [Validator.determine_proto(
self.validator_kind, self.cls.attrs, r).name for r in records]
self.validator_kind, self.cls.attrs, r).name for r in
records]
return dict(Counter(labels))

def set_cls(self, cls: Classifier, indices=None):
Expand Down
4 changes: 3 additions & 1 deletion src/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
class Classifier:

def __init__(
self, name, out, attrs, y, robust, mask_cols, attr_ranges
self, name, out, attrs, y, robust, mask_cols, attr_ranges,
cls_conf
):
self.name = name
self.out_dir = out
Expand All @@ -26,6 +27,7 @@ def __init__(
self.n_pred_pos = 0
self.n_true_p = 0
self.reset()
self.cls_conf = cls_conf or {}

def reset(self):
self.classifier = None
Expand Down
36 changes: 23 additions & 13 deletions src/dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,28 @@ class NeuralNetwork(Classifier):

def __init__(self, *args):
super().__init__('neural_network', *args)
self.epochs = 80
self.bsz = 64 # batch size

@staticmethod
def formatter(x, y):
return x

def __c_key(self, key):
return self.cls_conf[key] if key in self.cls_conf else {}

def __model(self, key):
m = self.__c_key('model')
return m[key] if key in m else None

def __m_train(self, key=None):
m = self.__c_key('model_fit')
return m if not key else (m[key] if key in m else None)

def dnn_config(self):
lrs = self.__model('layers') or \
[60 for _ in range(max(1, len(self.mutable) // 4))]
bs = gcd(self.n_train, (self.__m_train('batch_size') or 64))
return lrs, {'epochs': 80, **self.__m_train(), 'batch_size': bs}

def predict(self, data):
tmp = self.model.predict(data)
ax = 1 if len(tmp.shape) == 2 else 0
Expand All @@ -48,25 +63,20 @@ def _set_cls(self, cls):
self.classifier = cls
self.model = cls.model

@property
def model_fit_kwargs(self):
return {'callbacks': [EarlyStopping(monitor='loss', patience=5)],
'shuffle': True, 'verbose': False}

def init_classifier(self):
"""Trains a deep neural network classifier."""
n_layers = max(1, len(self.mutable) // 4)
layers = [Dense(60, activation='relu') for _ in range(n_layers)] + \
layers, args = self.dnn_config()
layers = [Dense(v, activation='relu') for v in layers] + \
[Dense(self.n_classes, activation='softmax')]
model = tf.keras.models.Sequential(layers)
model.compile(
optimizer=SGD(),
loss=SparseCategoricalCrossentropy(),
metrics=[SparseCategoricalAccuracy()])
model.fit(
self.train_x, self.train_y, epochs=self.epochs,
batch_size=gcd(self.bsz, self.n_train),
**self.model_fit_kwargs)
self.train_x, self.train_y,
shuffle=True, verbose=False, **args,
callbacks=[EarlyStopping(monitor='loss', patience=5)])
return KerasClassifier(model=model, clip_values=(0, 1))

def init_robust(self):
Expand All @@ -77,7 +87,7 @@ def init_robust(self):
trainer = AdversarialTrainer(
# Model to train adversarially
classifier=robust_classifier,
# Attacks to use for data augmentation in adversarial training
# Attacks to use for data augmentation
attacks=attack,
# Proportion of samples to be replaced with adversarial
# counterparts. Value 1 trains only on adversarial samples.
Expand Down
78 changes: 46 additions & 32 deletions src/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,12 @@ def valid_ratio(self) -> float:

DEFAULT_DS = 'data/CTU.csv'
DEFAULT_CLS = ClsLoader.XGB
DEFAULT_CF = 'config/default.yaml'
ATTACKS = [AttackLoader.HSJ, AttackLoader.ZOO]
CLASSIFIERS = [ClsLoader.XGB, ClsLoader.DNN]
VALIDATORS = [Validator.NB15, Validator.IOT23]

def __init__(self, uuid, **kwargs):
def __init__(self, uuid, config_obj, **kwargs):
self.uuid = uuid
self.start_time = 0
self.end_time = 0
Expand All @@ -138,8 +139,10 @@ def __init__(self, uuid, **kwargs):
self.mask_cols = []
self.attr_ranges = {}
self.stats = Experiment.Result()
config_keys = ",".join(kwargs.keys())
self.config = namedtuple('exp', config_keys)(**kwargs)
config_keys = ",".join(
list(kwargs.keys()) + list(config_obj.keys()))
self.config = (namedtuple('exp', config_keys)
(**kwargs, **config_obj))

@property
def n_records(self) -> int:
Expand Down Expand Up @@ -168,6 +171,10 @@ def is_repeat(self):
if f.startswith(match)]
return prev[0] if len(prev) > 0 else None

def custom_config(self, key):
return getattr(self.config, key) \
if key and hasattr(self.config, key) else None

def load_csv(self, ds_path: str, n_splits: int):
self.attrs, rows = utility.read_dataset(ds_path)
self.X = rows[:, :-1]
Expand All @@ -181,37 +188,17 @@ def load_csv(self, ds_path: str, n_splits: int):
if set(col_values).issubset({0, 1}):
self.mask_cols.append(col_i)

def exec_fold(self, fold_num: int, fold_indices: List[int]):
self.cls.reset() \
.load(self.X.copy(), self.y.copy(), *fold_indices, fold_num) \
.train()
self.stats.append_cls(self.cls)
self.log_training_result(fold_num)
sample_size = 0 if self.config.sample_size < 1 else \
min(self.config.sample_size, self.cls.n_test)

if self.attack:
for n in range(self.config.sample_times):
sample_idx = None if sample_size < 1 else \
sample(range(0, self.cls.n_test), sample_size)
self.attack.reset().set_cls(self.cls, sample_idx).run()
self.attack.eval_examples()
self.attack.validate()
if self.attack.save_records and self.attack.n_evasions > 0:
self.attack.dump_result(self.config.out)
self.stats.append_attack(self.attack)
gc.collect()
self.log_fold_attack(n + 1, self.config.sample_times)

def run(self):
config, prev = self.config, self.is_repeat
if config.resume and prev:
return print('Saved result to', prev)
self.load_csv(config.dataset, config.folds)
cls_args = (config.cls, config.out, self.attrs, self.y,
config.robust, self.mask_cols, self.attr_ranges)
config.robust, self.mask_cols, self.attr_ranges,
self.custom_config(config.cls))
atk_args = (config.attack, config.validator, self.uuid,
config.capture, config.iter, config.silent)
config.capture, config.iter, config.silent,
self.custom_config(config.attack))
self.cls = ClsLoader.init(*cls_args)
self.attack = AttackLoader.load(*atk_args) \
if config.attack else None
Expand All @@ -231,6 +218,28 @@ def run(self):
self.save_result()
self.cleanup()

def exec_fold(self, fold_num: int, fold_indices: List[int]):
self.cls.reset() \
.load(self.X.copy(), self.y.copy(), *fold_indices, fold_num) \
.train()
self.stats.append_cls(self.cls)
self.log_training_result(fold_num)
sample_size = 0 if self.config.sample_size < 1 else \
min(self.config.sample_size, self.cls.n_test)

if self.attack:
for n in range(self.config.sample_times):
sample_idx = None if sample_size < 1 else \
sample(range(0, self.cls.n_test), sample_size)
self.attack.reset().set_cls(self.cls, sample_idx).run()
self.attack.eval_examples()
self.attack.validate()
if self.attack.save_records and self.attack.n_evasions > 0:
self.attack.dump_result(self.config.out)
self.stats.append_attack(self.attack)
gc.collect()
self.log_fold_attack(n + 1, self.config.sample_times)

def log_experiment_setup(self):
Show('Dataset', self.config.dataset)
Show('Record count', self.n_records)
Expand Down Expand Up @@ -261,11 +270,14 @@ def log_fold_attack(self, sample_n: int, n_total: int):
if n_total > 1:
print('-' * 5)
if n_total > 1:
Show('Sampling round', f'{sample_n}/{self.config.sample_times}')
Show('Sampling round',
f'{sample_n}/{self.config.sample_times}')
if self.attack:
Ratio('Evasions', self.attack.n_evasions, self.attack.n_records)
Ratio('Evasions', self.attack.n_evasions,
self.attack.n_records)
if self.attack.use_validator:
Ratio('Valid', self.attack.n_valid, self.attack.n_evasions)
Ratio('Valid', self.attack.n_valid,
self.attack.n_evasions)
if self.attack.has_evasions:
Show('Class labels',
utility.dump_num_dict(self.attack.label_stats))
Expand All @@ -280,9 +292,11 @@ def log_experiment_result(self):
Show('Avg. Recall', f'{(self.stats.recall * 100):.2f} %')
Show('Avg. F-score', f'{(self.stats.f_score * 100):.2f} %')
if self.attack:
Ratio('Evasions', self.stats.n_evasions, self.stats.n_records)
Ratio('Evasions', self.stats.n_evasions,
self.stats.n_records)
if self.attack.use_validator:
Ratio('Valid', self.stats.n_valid, self.stats.n_evasions)
Ratio('Valid', self.stats.n_valid,
self.stats.n_evasions)
Show('Time', "{0} min {1:.2f} s".format(*self.duration))

def to_dict(self) -> dict:
Expand Down
Loading

0 comments on commit c808566

Please sign in to comment.