-
Notifications
You must be signed in to change notification settings - Fork 31
/
test_fix.py
229 lines (200 loc) · 9.01 KB
/
test_fix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import os
from contextlib import contextmanager
from functools import wraps
import six
import numpy as np
import torch
from torch import nn
from nics_fix_pt import quant
import pytest
def _add_call_counter(func):
@wraps(func)
def _func(*args, **kwargs):
_func.num_calls += 1
return func(*args, **kwargs)
_func.num_calls = 0
return _func
quant.quantitize = _add_call_counter(quant.quantitize)
from nics_fix_pt.quant import quantitize
@contextmanager
def patch_variable(patches):
backup = []
for module, name, value in patches:
backup.append((module, name, getattr(module, name)))
setattr(module, name, value)
yield
for module, name, value in backup:
setattr(module, name, value)
def _cnn_data(device="cuda", batch_size=2):
return (torch.rand(batch_size, 3, 28, 28, dtype=torch.float, device=device),
torch.tensor(np.random.randint(0, high=10, size=batch_size)).long().to(device))
def _supernet_sample_cand(net):
ss = net.search_space
rollout = ss.random_sample()
# arch = [([0, 0, 2, 2, 0, 2, 4, 4], [0, 6, 7, 6, 1, 1, 5, 7]),
# ([1, 1, 0, 0, 1, 2, 2, 2], [7, 2, 2, 1, 7, 4, 3, 7])]
cand_net = net.assemble_candidate(rollout)
return cand_net
BITWIDTH = 8
FIX_METHOD = 1 # auto_fix
def _generate_default_fix_cfg(names, scale=0, bitwidth=8, method=0):
return {n: {
"method": torch.autograd.Variable(torch.IntTensor(np.array([method])), requires_grad=False),
"scale": torch.autograd.Variable(torch.IntTensor(np.array([scale])), requires_grad=False),
"bitwidth": torch.autograd.Variable(torch.IntTensor(np.array([bitwidth])), requires_grad=False)
} for n in names}
def fix_forward(self, *args, **kwargs):
with fix_params(self):
return self.old_forward(*args, **kwargs)
def quantize(self, fix_cfg, original=False):
for n, param in six.iteritems(self._parameters):
if not isinstance(param, (torch.Tensor, torch.autograd.Variable)):
continue
if not original:
# run quantize
param, _ = quantitize(param, fix_cfg.get(n, {}), {}, name=n)
object.__setattr__(self, n, param)
for n, param in six.iteritems(self._buffers):
if not isinstance(param, (torch.Tensor, torch.autograd.Variable)):
continue
if not original:
# run quantize
param, _ = quantitize(param, fix_cfg.get(n, {}), {}, name=n)
object.__setattr__(self, n, param)
def setback_buffer(self):
for n, param in six.iteritems(self._buffers):
# set buffer back, as there will be no gradient, just in-place modification
# FIXME: For fixed-point batch norm,
# the running mean/var accumulattion is on quantitized mean/var,
# which means it might fail to update the running mean/var
# if the updating momentum is too small
self._buffers[n] = getattr(self, n)
@contextmanager
def fix_params(module, original=False):
if not hasattr(module, "_generated_fixed_cfgs"):
module._generated_fixed_cfgs = {}
for mod_prefix, mod in module.named_modules():
if isinstance(mod, (nn.Conv2d, nn.Linear)):
to_fix = ["weight", "bias"]
elif isinstance(mod, nn.BatchNorm2d):
to_fix = ["weight", "bias", "running_mean", "running_var"]
else:
to_fix = []
if to_fix:
module._generated_fixed_cfgs[mod_prefix] = _generate_default_fix_cfg(
to_fix, method=FIX_METHOD, bitwidth=BITWIDTH)
current_active_modules = dict(module.named_modules()) \
if not hasattr(module, "active_named_members")\
else dict(module.active_named_members(member="modules",
prefix="super_net",
check_visited=True))
# FIXME: possible multi-access in multi-gpu
to_fix_mods = set(module._generated_fixed_cfgs.keys()).intersection(
current_active_modules.keys())
quantized = {"conv": 0, "bn": 0, "linear": 0}
for mod_prefix in to_fix_mods:
fix_cfg = module._generated_fixed_cfgs[mod_prefix]
mod = current_active_modules[mod_prefix]
quantize(mod, fix_cfg, original=original)
if isinstance(mod, nn.Conv2d):
key = "conv"
elif isinstance(mod, nn.Linear):
key = "linear"
elif isinstance(mod, nn.BatchNorm2d):
key = "bn"
quantized[key] += 1
print("num quantized modules: ", quantized)
yield
if not original:
for mod_prefix in to_fix_mods:
mod = current_active_modules[mod_prefix]
setback_buffer(mod)
@pytest.mark.parametrize("case", range(5))
def test_new_quantize(case):
from aw_nas.common import get_search_space
from aw_nas.weights_manager import SuperNet
import numpy as np
import torch
from torch import nn
import nics_fix_pt.nn_fix as nfp
class FixedConv(nfp.Conv2d_fix):
def __init__(self, *args, **kwargs):
kwargs["nf_fix_params"] = _generate_default_fix_cfg(
["weight", "bias"], method=FIX_METHOD, bitwidth=BITWIDTH)
super(FixedConv, self).__init__(*args, **kwargs)
class FixedLinear(nfp.Linear_fix):
def __init__(self, *args, **kwargs):
kwargs["nf_fix_params"] = _generate_default_fix_cfg(
["weight", "bias"], method=FIX_METHOD, bitwidth=BITWIDTH)
super(FixedLinear, self).__init__(*args, **kwargs)
class FixedBatchNorm2d(nfp.BatchNorm2d_fix):
"""
FIXME:
When training using multi gpu, the update for the buffers(running_mean/running_var)
is done using data on only the main gpu; This will lead to discrepancy between
one-gpu/multi-gpu training. Maybe can consider using `encoding.nn.BatchNorm2d`.
Moreover, as the fixed-point configuration is shared between threads, and each gpu
separately computes using its own data split, the unprotected concurrent read/write
to this shared config lead to undefined behaviour. Maybe this config should be
thread-local too. (Maybe this should be a fix in the nics_fix_pytorch lib).
An temporary fix is not to quantize the `running_mean` and `running_var` here.
"""
def __init__(self, *args, **kwargs):
kwargs["nf_fix_params"] = _generate_default_fix_cfg(
["weight", "bias", "running_mean", "running_var"],
method=FIX_METHOD, bitwidth=BITWIDTH)
super(FixedBatchNorm2d, self).__init__(*args, **kwargs)
ss = get_search_space(cls="cnn", **{
"num_steps": 4,
"num_layers": 1,
"num_cell_groups": 1,
"cell_layout": [0],
"reduce_cell_groups": [0],
})
device = "cuda:0"
with patch_variable([(nn, "Conv2d", FixedConv),
(nn, "BatchNorm2d", FixedBatchNorm2d),
(nn, "Linear", FixedLinear)]):
super_net_fixed = SuperNet(ss, device, dropout_rate=0)
super_net = SuperNet(ss, device, dropout_rate=0)
# use same float params
snf_params = dict(super_net_fixed.named_parameters())
for name, param in super_net.named_parameters():
snf_params[name].data.copy_(param)
# assemble the same subarch
rollout = ss.random_sample()
cand_net = super_net.assemble_candidate(rollout)
cand_net.old_forward = cand_net.forward
cand_net_fixed = super_net_fixed.assemble_candidate(rollout)
cand_net.train()
cand_net_fixed.train()
lr = 1e-3
FixedLinear.forward = _add_call_counter(FixedLinear.forward)
FixedConv.forward = _add_call_counter(FixedConv.forward)
FixedBatchNorm2d.forward = _add_call_counter(FixedBatchNorm2d.forward)
optimizer = torch.optim.SGD(cand_net.parameters(), lr=lr)
optimizer_fixed = torch.optim.SGD(cand_net_fixed.parameters(), lr=lr)
# only fixed params
for _ in range(5):
batch_size = 2
data = _cnn_data(batch_size=batch_size)
quantitize.num_calls = 0
logits = fix_forward(cand_net, data[0])
print("fix_forward quantize calls: ", quantitize.num_calls)
quantitize.num_calls = 0
FixedConv.forward.num_calls = 0
FixedLinear.forward.num_calls = 0
FixedBatchNorm2d.forward.num_calls = 0
logits_fixed = cand_net_fixed.forward(data[0])
print("old fix_forward quantize calls: ", quantitize.num_calls)
print("old fix_forward, num modules: conv, linear, bn",
FixedConv.forward.num_calls, FixedLinear.forward.num_calls,
FixedBatchNorm2d.forward.num_calls)
assert (logits == logits_fixed).all()
loss = nn.CrossEntropyLoss()(logits, data[1].cuda())
loss_fixed = nn.CrossEntropyLoss()(logits_fixed, data[1].cuda())
assert (loss == loss_fixed).all()
loss.backward()
loss_fixed.backward()
optimizer.step()
optimizer_fixed.step()