forked from intel/neural-compressor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
qat.yaml
139 lines (128 loc) · 6.84 KB
/
qat.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#
# Copyright (c) 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
version: 1.0 # optional. reserved for future use. if not specified, a supported version would be written back to user yaml.
model: # mandatory. used to specify model specific information.
name: resnet50v1.5
framework: pytorch # mandatory. supported values are tensorflow, pytorch, pytorch_fx, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.
device: cpu # optional. default value is cpu. other value is gpu.
quantization: # optional. required for QAT and PTQ.
approach: quant_aware_training # mandatory. supported values are quant_aware_training and post_training_static_quant.
train:
start_epoch: 0
end_epoch: 10
iteration: 100
frequency: 2
hostfile: /path/to/host/file # reserved for multi-node training.
dataloader:
batch_size: 256
dataset:
ImageFolder:
root: /path/to/imagenet/train
transform:
RandomResizedCrop:
size: 224
RandomHorizontalFlip:
ToTensor:
Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
criterion:
CrossEntropyLoss:
reduction: None
optimizer:
SGD:
learning_rate: 0.1
momentum: 0.9
weight_decay: 0.0004
nesterov: False
model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space.
weight:
granularity: per_channel
scheme: asym
dtype: int8
algorithm: minmax
activation:
granularity: per_tensor
scheme: asym
dtype: int8
algorithm: minmax
op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space.
'conv1': {
'activation': {'dtype': ['uint8', 'fp32'], 'algorithm': ['minmax', 'kl'], 'scheme':['sym']},
'weight': {'dtype': ['int8', 'fp32'], 'algorithm': ['minmax']}
},
'pool1': {
'activation': {'dtype': ['int8'], 'scheme': ['sym'], 'granularity': ['per_tensor'], 'algorithm': ['minmax', 'kl']},
},
'conv2': {
'activation': {'dtype': ['fp32']},
'weight': {'dtype': ['fp32']}
}
}
evaluation: # optional. optional. used to config evaluation process.
accuracy: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization.
metric: # optional. used to evaluate accuracy of passing model.
topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric.
configs:
cores_per_instance: 28
num_of_instance: 1
dataloader: # optional. if not specified, user need construct a q_dataloader in code for neural_compressor.Quantization.
batch_size: 256
dataset:
ImageFolder:
root: /path/to/imagenet/train
transform:
RandomResizedCrop:
size: 224
RandomHorizontalFlip:
ToTensor:
Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
postprocess:
transform:
Reshape:
shape: [-1, 1001]
performance: # optional. used to benchmark performance of passing model.
configs:
cores_per_instance: 4
num_of_instance: 7
dataloader:
warmup: 10
iteration: 100
dataset:
dummy:
shape: [[128, 3, 224, 224], [128, 1, 1, 1]]
tuning:
strategy:
name: basic # optional. default value is basic. other values are bayesian, mse.
accuracy_criterion:
relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%.
objective: performance # optional. objective with accuracy constraint guaranteed. default value is performance. other values are modelsize and footprint.
exit_policy:
timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
max_trials: 100 # optional. max tune times. default value is 100. combine with timeout field to decide when to exit.
random_seed: 9527 # optional. random seed for deterministic tuning.
tensorboard: True # optional. dump tensor distribution in evaluation phase for debug purpose. default value is False.
workspace:
path: /path/to/saving/directory # optional. default workspace is ./nc_workspace/current_time_stamp, saving tuning history and deploy yaml.
resume: /path/to/a/specified/snapshot/file # optional. if specified, resume from tuning history.
diagnosis:
diagnosis_after_tuning: False # optional. bool, defaults to False, whether or not dump tensor and show in Bench after tuning finish.
op_list: [] # optional. List[str], defaults to [], the op(s) to be dumped to reduce local disk consumption. the default setting means dump all quantized op instead of not dump anything.
iteration_list: [1] # optional. List[int], defaults to [1], the iteration that needs to dump activation, the default value is [1] which means dump the activation of the first iteration.
inspect_type: activation # optional. str, defaults to activation, dump weight, activation or all. can be one of 'weight', 'activation' or 'all'.
save_to_disk: True # optional. bool, defaults to True, whether or not to save the dumped tensor.
save_path: './nc_workspace/inspect_saved/' # optional. str, defaults to './nc_workspace/inspect_saved/', a path to save the dumped tensor.