-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update apgcc framework and inference code
- Loading branch information
Showing
47 changed files
with
3,304 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
import os | ||
from easydict import EasyDict as edict | ||
import time | ||
import numpy as np | ||
|
||
# ----------------------------------------------------------------------------- | ||
# Config definition | ||
# ----------------------------------------------------------------------------- | ||
_C = edict() | ||
cfg = _C # Alias for easy usage | ||
|
||
_C.TAG = 'APGCC' | ||
_C.SEED = 1229 # seed | ||
_C.GPU_ID = 0 # gpu_id, the gpu used for training | ||
_C.OUTPUT_DIR = './output/temp/' # output_dir, path where to save, empty for no saving | ||
_C.VIS = False # vis the predict sample | ||
|
||
# ----------------------------------------------------------------------------- | ||
# MODEL | ||
# ----------------------------------------------------------------------------- | ||
_C.MODEL = edict() | ||
_C.MODEL.ENCODER = 'vgg16_bn' # ['vgg16', 'vgg16_bn'] # backbone, Name of the convolutional backbone to use. | ||
_C.MODEL.ENCODER_kwargs = {"last_pool": False, # last layer downsample, False:feat4(H/16,W/16), True:feat4(H/32,W/32) | ||
} | ||
# "layers": 4, # select number of bodies. | ||
# "fpn": True, | ||
# "multi_grid": True, | ||
# "zero_init_residual": True, | ||
# "replace_stride_with_dilation": [False, False, False], | ||
# "feat_dims":64} | ||
|
||
_C.MODEL.DECODER = 'basic' # ['basic', 'IFA'] # decoder | ||
_C.MODEL.DECODER_kwargs = { "num_classes": 2, # output num_classes, default:2 means confindence. | ||
"inner_planes": 256, # basic: 256, IFA: 64 | ||
"feat_layers":[3,4], # control the number of decoder features. [1,2,3,4] | ||
"pos_dim": 2, # | ||
"ultra_pe": False, # additional position encoding. x -> (x, sin(x), cos(x)) | ||
"learn_pe": False, # additional position encoding. x -> (trainable variable) | ||
"unfold": False, # unfold feat channel, make the feat dim be 3x3 times. | ||
"local": False, # enable local patch, 3x3 mapping near by the center point. | ||
"no_aspp": True, # final feat encoding add the aspp module. | ||
"require_grad": True, | ||
"out_type": 'Normal', # out_type = 'Normal' / 'Conv' / 'Deconv' | ||
"head_layers":[1024,512,256,256]} # head layers is n+1, last layers is num_of_proposals | ||
|
||
_C.MODEL.STRIDE = 8 # the size of anchor map by image.shape/stride, ex: input 128x128, stride=8, anchor_map = 16x16 | ||
_C.MODEL.ROW = 2 # row, row number of anchor points | ||
_C.MODEL.LINE = 2 # line, line number of anchor points | ||
_C.MODEL.FROZEN_WEIGHTS = None # frozen_weights, Path to the pretrained model. If set, only the mask head will be trained. | ||
|
||
# mixed to the unity loss kwargs | ||
_C.MODEL.POINT_LOSS_COEF = 0.0002 # point_loss_coef | ||
_C.MODEL.EOS_COEF = 0.5 # eos_coef, Relative classification weight of the no-object class | ||
|
||
_C.MODEL.LOSS = ['L2'] | ||
_C.MODEL.WEIGHT_DICT = {'loss_ce': 1, 'loss_points': 0., 'loss_aux': 0.} | ||
_C.MODEL.AUX_EN = False | ||
_C.MODEL.AUX_NUMBER = [1, 1] # the number of pos/neg anchors | ||
_C.MODEL.AUX_RANGE = [1, 4] # the randomness range of auxiliary anchors | ||
_C.MODEL.AUX_kwargs = {'pos_coef': 1., 'neg_coef': 1., 'pos_loc': 0., 'neg_loc': 0.} | ||
|
||
_C.RESUME = False # resume, resume from checkpoint | ||
_C.RESUME_PATH = '' # keep training weights. | ||
|
||
# ----------------------------------------------------------------------------- | ||
# Dataset | ||
# ----------------------------------------------------------------------------- | ||
_C.DATASETS = edict() | ||
_C.DATASETS.DATASET = 'SHHA' # dataset_file | ||
_C.DATASETS.DATA_ROOT = './dataset_path/' # data_root, path where the dataset is | ||
|
||
# ----------------------------------------------------------------------------- | ||
# DATALOADER | ||
# ----------------------------------------------------------------------------- | ||
_C.DATALOADER = edict() | ||
_C.DATALOADER.AUGUMENTATION = ['Normalize', 'Crop', 'Flip'] | ||
_C.DATALOADER.CROP_SIZE = 128 # radnom crip size for training | ||
_C.DATALOADER.CROP_NUMBER = 4 # the number of training sample | ||
_C.DATALOADER.UPPER_BOUNDER = -1 # the upper bounder of size | ||
_C.DATALOADER.NUM_WORKERS = 8 # num_workers | ||
|
||
# ---------------------------------------------------------------------------- # | ||
# Solver | ||
# ---------------------------------------------------------------------------- # | ||
_C.SOLVER = edict() | ||
_C.SOLVER.BATCH_SIZE = 8 # batch_size\ | ||
_C.SOLVER.START_EPOCH = 0 # start_epoch | ||
_C.SOLVER.EPOCHS = 3500 # epochs | ||
_C.SOLVER.LR = 1e-4 # lr | ||
_C.SOLVER.LR_BACKBONE = 1e-5 # lr_backbone | ||
_C.SOLVER.WEIGHT_DECAY = 1e-4 # weight_decay | ||
_C.SOLVER.LR_DROP = 3500 # lr_drop | ||
_C.SOLVER.CLIP_MAX_NORM = 0.1 # clip_max_norm, gradient clipping max norm | ||
|
||
_C.SOLVER.EVAL_FREQ = 5 # eval_freq, frequency of evaluation, default setting is evaluating in every 5 epoch | ||
_C.SOLVER.LOG_FREQ = 1 # log_freq, frequency of recording training. | ||
# ---------------------------------------------------------------------------- # | ||
# Matcher | ||
# ---------------------------------------------------------------------------- # | ||
_C.MATCHER = edict() | ||
_C.MATCHER.SET_COST_CLASS = 1. # set_cost_class, Class coefficient in the matching cost | ||
_C.MATCHER.SET_COST_POINT = 0.05 # set_cost_point, L1 point coefficient in the matching cost | ||
|
||
# ----------------------------------------------------------------------------- | ||
# TEST | ||
# ----------------------------------------------------------------------------- | ||
_C.TEST = edict() | ||
_C.TEST.THRESHOLD = 0.5 | ||
|
||
################ modules ################ | ||
def cfg_merge_a2b(a, b): | ||
if type(a) is not edict and type(a) is not dict: | ||
raise KeyError('a is not a edict.') | ||
|
||
for k, v in a.items(): | ||
if k not in b: | ||
raise KeyError('{} is not a valid config key'.format(k)) | ||
|
||
old_type = type(b[k]) # original type | ||
if old_type is not type(v): | ||
if isinstance(b[k], np.ndarray): | ||
v = np.array(v, dtype=b[k].dtype) | ||
elif not isinstance(v, dict) and not isinstance(b[k], type(None)): | ||
raise ValueError(('Type mismatch ({} vs. {})' | ||
'for config key: {}').format(type(b[k]), type(v), k)) | ||
if type(v) is edict or type(v) is dict: | ||
try: | ||
cfg_merge_a2b(a[k], b[k]) | ||
except: | ||
print(('Error under config key: {}').format(k)) | ||
raise | ||
else: | ||
if v == 'None': | ||
b[k] = None | ||
else: | ||
b[k] = v | ||
|
||
return b | ||
|
||
def cfg_from_file(filename): | ||
import yaml | ||
with open(filename, 'r') as f: | ||
data = yaml.safe_load(f) | ||
return data | ||
|
||
def cfg_from_list(args_opts): | ||
from ast import literal_eval | ||
print(args_opts) | ||
if len(args_opts) == 0: | ||
return None | ||
assert len(args_opts)%2 == 0 | ||
for k, v in zip(args_opts[0::2], args_opts[1::2]): | ||
key_list = k.split('.') | ||
d = _C | ||
for subkey in key_list[:-1]: | ||
assert subkey in d | ||
d = d[subkey] | ||
subkey = key_list[-1] | ||
assert subkey in d | ||
try: | ||
value = literal_eval(v) | ||
except: | ||
value = v | ||
assert type(value) == type(d[subkey]), \ | ||
'type {} does not match original type {}'.format(type(value), type(d[subkey])) | ||
d[subkey] = value | ||
return d | ||
|
||
def merge_from_file(cfg, filename): | ||
file_cfg = cfg_from_file(filename) | ||
cfg = cfg_merge_a2b(file_cfg, cfg) | ||
return cfg | ||
|
||
def merge_from_list(cfg, args): | ||
args_cfg = cfg_from_list(args) | ||
if args_cfg == None: | ||
return cfg | ||
else: | ||
cfg = cfg_merge_a2b(args_cfg, cfg) | ||
return cfg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
TAG: 'SHHA_AGPCC' | ||
SEED: 1229 # seed | ||
GPU_ID: 0 # gpu_id, the gpu used for training | ||
OUTPUT_DIR: './output_SHHA/temp/' # output_dir, path where to save, empty for no saving | ||
VIS: False # vis the predict sample | ||
|
||
# ----------------------------------------------------------------------------- | ||
# MODEL | ||
# ----------------------------------------------------------------------------- | ||
MODEL: | ||
ENCODER: 'vgg16_bn' # backbone, select: ['vgg16', 'vgg16_bn', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] | ||
ENCODER_kwargs: {"last_pool": False} # last layer downsample, False:feat4(H/16,W/16), True:feat4(H/32,W/32) | ||
DECODER: 'IFI' # decoder, select: ['basic', 'IFI'] | ||
DECODER_kwargs: {"num_classes": 2, # output num_classes, default:2 means confindence. | ||
"inner_planes": 64, # default: basic: 256, IFI: 64 | ||
"feat_layers":[3,4], # control the number of decoder features. [1,2,3,4] | ||
"pos_dim": 32, # if ultra_pe, it has to large than 2 | ||
"ultra_pe": True, # additional position encoding. x -> (x, sin(x), cos(x)) | ||
"learn_pe": False, # additional position encoding. x -> (trainable variable) | ||
"unfold": False, # unfold feat channel, make the feat dim be 3x3 times. | ||
"local": True, # enable local patch, 3x3 mapping near by the center point. | ||
"no_aspp": False, # final feat encoding add the aspp module. | ||
"require_grad": True, | ||
"out_type": 'Normal', # out_type = 'Normal' / 'Conv' / 'Deconv' | ||
"head_layers":[1024,512,256,256]} # head layers is n+1, last layers is num_of_proposals | ||
|
||
FROZEN_WEIGHTS: None # frozen_weights, Path to the pretrained model. If set, only the mask head will be trained. | ||
|
||
STRIDE: 8 # the size of anchor map by image.shape/stride, ex: input 128x128, stride=8, anchor_map = 16x16 | ||
ROW: 2 # row, row number of anchor points | ||
LINE: 2 # line, line number of anchor points | ||
POINT_LOSS_COEF: 0.0002 # point_loss_coef | ||
EOS_COEF: 0.5 # eos_coef, Relative classification weight of the no-object class | ||
|
||
LOSS: ['L2'] | ||
WEIGHT_DICT: {'loss_ce': 1, 'loss_points': 0.0002, 'loss_aux': 0.2} | ||
AUX_EN: False # auxiliary point is not used in the inference. | ||
AUX_NUMBER: [2, 2] # the number of pos/neg anchors | ||
AUX_RANGE: [2, 8] # the RoI range of auxiliary anchors | ||
AUX_kwargs: {'pos_coef': 1., 'neg_coef': 1., 'pos_loc': 0.0002, 'neg_loc': 0.0002} | ||
|
||
RESUME: False # resume, resume from checkpoint | ||
RESUME_PATH: '' # keep training weights. | ||
|
||
# ----------------------------------------------------------------------------- | ||
# Dataset | ||
# ----------------------------------------------------------------------------- | ||
DATASETS: | ||
DATASET: 'SHHA' # dataset_file | ||
DATA_ROOT: '/mnt/191/c/CrowdCounting/Dataset/ShanghaiTech/PointData/part_A' # data_root, path where the dataset is | ||
|
||
# ----------------------------------------------------------------------------- | ||
# DATALOADER | ||
# ----------------------------------------------------------------------------- | ||
DATALOADER: | ||
AUGUMENTATION: ['Normalize', 'Crop', 'Flip'] | ||
CROP_SIZE: 128 # radnom crip size for training | ||
CROP_NUMBER: 4 # the number of training sample | ||
UPPER_BOUNDER: -1 # the upper bounder of size | ||
NUM_WORKERS: 0 # num_workers | ||
|
||
# ---------------------------------------------------------------------------- # | ||
# Solver | ||
# ---------------------------------------------------------------------------- # | ||
SOLVER: | ||
BATCH_SIZE: 8 # batch_size | ||
START_EPOCH: 0 # start_epoch | ||
EPOCHS: 3500 # epochs | ||
LR: 0.0001 # lr | ||
LR_BACKBONE: 0.00001 # lr_backbone | ||
WEIGHT_DECAY: 0.0001 # weight_decay | ||
LR_DROP: 3500 # lr_drop | ||
CLIP_MAX_NORM: 0.1 # clip_max_norm, gradient clipping max norm | ||
|
||
EVAL_FREQ: 1 # eval_freq, frequency of evaluation, default setting is evaluating in every 5 epoch | ||
LOG_FREQ: 1 # log_freq, frequency of recording training. | ||
|
||
# ---------------------------------------------------------------------------- # | ||
# Matcher | ||
# ---------------------------------------------------------------------------- # | ||
MATCHER: | ||
SET_COST_CLASS: 1. # set_cost_class, Class coefficient in the matching cost | ||
SET_COST_POINT: 0.05 # set_cost_point, L1 point coefficient in the matching cost | ||
|
||
# ----------------------------------------------------------------------------- | ||
# TEST | ||
# ----------------------------------------------------------------------------- | ||
TEST: | ||
THRESHOLD: 0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
TAG: 'SHHA_basic' | ||
SEED: 1229 # seed | ||
GPU_ID: 0 # gpu_id, the gpu used for training | ||
OUTPUT_DIR: './output_P2P/SHHA_P2P_mix_g2/' # output_dir, path where to save, empty for no saving | ||
VIS: False # vis the predict sample | ||
|
||
# ----------------------------------------------------------------------------- | ||
# MODEL | ||
# ----------------------------------------------------------------------------- | ||
MODEL: | ||
ENCODER: 'vgg16_bn' # backbone, select: ['vgg16', 'vgg16_bn', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] | ||
ENCODER_kwargs: {"last_pool": False} # last layer downsample, False:feat4(H/16,W/16), True:feat4(H/32,W/32) | ||
DECODER: 'basic' # decoder, select: ['basic', 'IFI'] | ||
DECODER_kwargs: {"num_classes": 2, # output num_classes, default:2 means confindence. | ||
"inner_planes": 256, # basic: 256, IFI: 64 | ||
"feat_layers":[3, 4]} # control the number of decoder features. [1,2,3,4] # notice!! change Line, Row | ||
|
||
FROZEN_WEIGHTS: None # frozen_weights, Path to the pretrained model. If set, only the mask head will be trained. | ||
|
||
STRIDE: 8 # the size of anchor map by image.shape/stride, ex: input 128x128, stride=8, anchor_map = 16x16 | ||
ROW: 2 # row, row number of anchor points | ||
LINE: 2 # line, line number of anchor points | ||
POINT_LOSS_COEF: 0.0002 # point_loss_coef | ||
EOS_COEF: 0.5 # eos_coef, Relative classification weight of the no-object class | ||
|
||
LOSS: ['L2'] | ||
WEIGHT_DICT: {'loss_ce': 1, 'loss_points': 0.0002, 'loss_aux': 0.} | ||
AUX_EN: False # auxiliary point is not used in the inference. | ||
AUX_NUMBER: [1, 1] # the number of pos/neg anchors | ||
AUX_RANGE: [1, 4] # the RoI range of auxiliary anchors | ||
AUX_kwargs: {'pos_coef': 1., 'neg_coef': 1., 'pos_loc': 0.0002, 'neg_loc': 0.0002} | ||
|
||
RESUME: False # resume, resume from checkpoint | ||
RESUME_PATH: '' # keep training weights. | ||
|
||
# ----------------------------------------------------------------------------- | ||
# Dataset | ||
# ----------------------------------------------------------------------------- | ||
DATASETS: | ||
DATASET: 'SHHA' # dataset_file | ||
DATA_ROOT: '/work/sylab607/CIH/CrowdCounting/Dataset/ShanghaiTech/TorchData/shanghaitech_part_A' # data_root, path where the dataset is | ||
# /work/sylab607/CIH/CrowdCounting/Dataset/ShanghaiTech/PointData/part_A/ | ||
# /work/sylab607/CIH/CrowdCounting/Dataset/ShanghaiTech/TorchData/shanghaitech_part_A | ||
|
||
# ----------------------------------------------------------------------------- | ||
# DATALOADER | ||
# ----------------------------------------------------------------------------- | ||
DATALOADER: | ||
AUGUMENTATION: ['Normalize', 'Crop', 'Flip'] | ||
CROP_SIZE: 128 # radnom crip size for training | ||
CROP_NUMBER: 4 # the number of training sample | ||
UPPER_BOUNDER: -1 # the upper bounder of size | ||
NUM_WORKERS: 0 # num_workers | ||
|
||
# ---------------------------------------------------------------------------- # | ||
# Solver | ||
# ---------------------------------------------------------------------------- # | ||
SOLVER: | ||
BATCH_SIZE: 8 # batch_size\ | ||
START_EPOCH: 0 # start_epoch | ||
EPOCHS: 3500 # epochs | ||
LR_DROP: 3500 # lr_drop | ||
LR: 0.0001 # lr | ||
LR_BACKBONE: 0.00001 # lr_backbone | ||
WEIGHT_DECAY: 0.0001 # weight_decay | ||
LR_DROP: 3500 # lr_drop | ||
CLIP_MAX_NORM: 0.1 # clip_max_norm, gradient clipping max norm | ||
|
||
EVAL_FREQ: 1 # eval_freq, frequency of evaluation, default setting is evaluating in every 5 epoch | ||
LOG_FREQ: 1 # log_freq, frequency of recording training. | ||
|
||
# ---------------------------------------------------------------------------- # | ||
# Matcher | ||
# ---------------------------------------------------------------------------- # | ||
MATCHER: | ||
SET_COST_CLASS: 1. # set_cost_class, Class coefficient in the matching cost | ||
SET_COST_POINT: 0.05 # set_cost_point, L1 point coefficient in the matching cost | ||
|
||
# ----------------------------------------------------------------------------- | ||
# TEST | ||
# ----------------------------------------------------------------------------- | ||
TEST: | ||
THRESHOLD: 0.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from .build import loading_data | ||
|
||
def build_dataset(cfg): | ||
return loading_data(cfg) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.