forked from duchi-lab/certifiable-distributional-robustness
-
Notifications
You must be signed in to change notification settings - Fork 0
/
attacks.py
116 lines (103 loc) · 4.56 KB
/
attacks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Based on code from https://github.com/tensorflow/cleverhans
#
# This is the code for the paper
#
# Certifying Some Distributional Robustness with Principled Adversarial Training
# Link: https://openreview.net/forum?id=Hk6kPgZA-
#
# Authors: Aman Sinha, Hongseok Namkoong, John Duchi
from abc import ABCMeta
import numpy as np
import warnings
from attacks_tf import wrm
class Attack:
"""
Abstract base class for all attack classes.
"""
__metaclass__ = ABCMeta
def __init__(self, model, back='tf', sess=None):
"""
:param model: A function that takes a symbolic input and returns the
symbolic output for the model's predictions.
:param back: The backend to use. Either 'tf' (default) or 'th'.
:param sess: The tf session to run graphs in (use None for Theano)
"""
if not(back == 'tf' or back == 'th'):
raise ValueError("Backend argument must either be 'tf' or 'th'.")
if back == 'tf' and sess is None:
raise Exception("A tf session was not provided in sess argument.")
if back == 'th' and sess is not None:
raise Exception("A session should not be provided when using th.")
if not hasattr(model, '__call__'):
raise ValueError("model argument must be a function that returns "
"the symbolic output when given an input tensor.")
# Prepare attributes
self.model = model
self.back = back
self.sess = sess
self.inf_loop = False
def generate(self, x, **kwargs):
"""
Generate the attack's symbolic graph for adversarial examples. This
method should be overriden in any child class that implements an
attack that is expressable symbolically. Otherwise, it will wrap the
numerical implementation as a symbolic operator.
:param x: The model's symbolic inputs.
:param **kwargs: optional parameters used by child classes.
:return: A symbolic representation of the adversarial examples.
"""
if self.back == 'th':
raise NotImplementedError('Theano version not implemented.')
if not self.inf_loop:
self.inf_loop = True
assert self.parse_params(**kwargs)
import tensorflow as tf
graph = tf.py_func(self.generate_np, [x], tf.float32)
self.inf_loop = False
return graph
else:
error = "No symbolic or numeric implementation of attack."
raise NotImplementedError(error)
def parse_params(self, params=None):
"""
Take in a dictionary of parameters and applies attack-specific checks
before saving them as attributes.
:param params: a dictionary of attack-specific parameters
:return: True when parsing was successful
"""
return True
class WassersteinRobustMethod(Attack):
def __init__(self, model, back='tf', sess=None):
super(WassersteinRobustMethod, self).__init__(model, back, sess)
def generate(self, x, **kwargs):
# Parse and save attack-specific parameters
assert self.parse_params(**kwargs)
return wrm(x, self.model(x), y=self.y, eps=self.eps, ord=self.ord, \
model=self.model, steps=self.steps)
def parse_params(self, eps=0.3, ord=2, y=None, steps=15,**kwargs):
"""
Take in a dictionary of parameters and applies attack-specific checks
before saving them as attributes.
Attack-specific parameters:
:param eps: (optional float) .5/gamma (Lagrange dual parameter)
in the ICLR paper (see link above),
:param ord: (optional) Order of the norm (mimics Numpy).
Possible values: 2.
:param y: (optional) A placeholder for the model labels. Only provide
this parameter if you'd like to use true labels when crafting
adversarial samples. Otherwise, model predictions are used as
labels to avoid the "label leaking" effect (explained in this
paper: https://arxiv.org/abs/1611.01236). Default is None.
Labels should be one-hot-encoded.
:param steps: how many gradient ascent steps to take in finding
the adversarial example
"""
# Save attack-specific parameters
self.eps = eps
self.ord = ord
self.y = y
self.steps = steps
# Check if order of the norm is acceptable given current implementation
if self.ord not in [int(2)]:
raise ValueError("Norm order must be 2.")
return True