update apgcc framework and inference code

AaronCIH · Jul 21, 2024 · 7768a1b · 7768a1b
1 parent 3acd83c
commit 7768a1b
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 16 deletions.
diff --git a/apgcc/.DS_Store b/apgcc/.DS_Store
diff --git a/apgcc/config.py b/apgcc/config.py
@@ -106,6 +106,7 @@
 # -----------------------------------------------------------------------------
 _C.TEST = edict()
 _C.TEST.THRESHOLD = 0.5
+_C.TEST.WEIGHT = ""
 
 ################ modules ################
 def cfg_merge_a2b(a, b):

diff --git a/apgcc/configs/SHHA_IFI.yml b/apgcc/configs/SHHA_IFI.yml
@@ -86,4 +86,5 @@ MATCHER:
 # TEST
 # -----------------------------------------------------------------------------
 TEST:
-  THRESHOLD: 0.5
+  THRESHOLD: 0.5
+  WEIGHT: ''
diff --git a/apgcc/configs/SHHA_basic.yml b/apgcc/configs/SHHA_basic.yml
@@ -80,4 +80,5 @@ MATCHER:
 # TEST
 # -----------------------------------------------------------------------------
 TEST:
-  THRESHOLD: 0.5
+  THRESHOLD: 0.5
+  WEIGHT: ''
diff --git a/apgcc/configs/SHHA_test.yml b/apgcc/configs/SHHA_test.yml
@@ -0,0 +1,90 @@
+TAG: 'APGCC_SHHA'
+SEED: 1229                                  # seed
+GPU_ID: 0                                   # gpu_id, the gpu used for training
+OUTPUT_DIR: './output/'                     # output_dir, path where to save, empty for no saving
+VIS: False                                  # vis the predict sample
+
+# -----------------------------------------------------------------------------
+# MODEL
+# -----------------------------------------------------------------------------
+MODEL:
+  ENCODER: 'vgg16_bn'                       # backbone, select: ['vgg16', 'vgg16_bn', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']
+  ENCODER_kwargs: {"last_pool": False}      # last layer downsample, False:feat4(H/16,W/16), True:feat4(H/32,W/32)
+  DECODER: 'IFI'                            # decoder, select: ['basic', 'IFI']
+  DECODER_kwargs: {"num_classes": 2,        # output num_classes, default:2 means confindence.
+                   "inner_planes": 64,      # basic: 256, IFI: 64
+                   "feat_layers":[3,4],     # control the number of decoder features. [1,2,3,4]
+                   "pos_dim": 32,           # if ultra_pe, it has to large than 2
+                   "ultra_pe": True,        # additional position encoding. x -> (x, sin(x), cos(x))
+                   "learn_pe": False,       # additional position encoding. x -> (trainable variable)
+                   "unfold": False,         # unfold feat channel, make the feat dim be 3x3 times.
+                   "local": True,           # enable local patch, 3x3 mapping near by the center point.
+                   "no_aspp": False,        # final feat encoding add the aspp module.
+                   "require_grad": True,
+                   "out_type": 'Normal',    # out_type = 'Normal' / 'Conv' / 'Deconv'
+                   "head_layers":[1024,512,256,256]}  # head layers is n+1, last layers is num_of_proposals  
+
+  FROZEN_WEIGHTS: None # frozen_weights, Path to the pretrained model. If set, only the mask head will be trained.
+
+  STRIDE: 8 # the size of anchor map by image.shape/stride, ex: input 128x128, stride=8, anchor_map = 16x16
+  ROW: 2 # row, row number of anchor points
+  LINE: 2 # line, line number of anchor points
+  POINT_LOSS_COEF: 0.0002 # point_loss_coef
+  EOS_COEF: 0.5 # eos_coef, Relative classification weight of the no-object class
+
+  LOSS: ['L2']
+  WEIGHT_DICT: {'loss_ce': 1, 'loss_points': 0.0002, 'loss_aux': 0.2} 
+  AUX_EN: True      # auxiliary point is not used in the inference.
+  AUX_NUMBER: [2, 2] # the number of pos/neg anchors
+  AUX_RANGE: [2, 8]  # the RoI range of auxiliary anchors
+  AUX_kwargs: {'pos_coef': 1., 'neg_coef': 1., 'pos_loc': 0.0002, 'neg_loc': 0.0002} 
+
+RESUME: False # resume, resume from checkpoint
+RESUME_PATH: '' # keep training weights.
+
+# -----------------------------------------------------------------------------
+# Dataset
+# -----------------------------------------------------------------------------
+DATASETS:
+  DATASET: 'SHHA' # dataset_file
+  DATA_ROOT: '/mnt/191/c/CrowdCounting/Dataset/ShanghaiTech/PointData/part_A' # data_root, path where the dataset is
+
+# -----------------------------------------------------------------------------
+# DATALOADER
+# -----------------------------------------------------------------------------
+DATALOADER:
+  AUGUMENTATION: ['Normalize', 'Crop', 'Flip']
+  CROP_SIZE: 128      # radnom crip size for training
+  CROP_NUMBER: 4      # the number of training sample
+  UPPER_BOUNDER: -1   # the upper bounder of size
+  NUM_WORKERS: 0      # num_workers
+
+# ---------------------------------------------------------------------------- #
+# Solver
+# ---------------------------------------------------------------------------- #
+SOLVER:
+  BATCH_SIZE: 8 # batch_size
+  START_EPOCH: 0 # start_epoch
+  EPOCHS: 3500  # epochs
+  LR: 0.0001    # lr
+  LR_BACKBONE: 0.00001 # lr_backbone
+  WEIGHT_DECAY: 0.0001 # weight_decay
+  LR_DROP: 3500 # lr_drop
+  CLIP_MAX_NORM: 0.1 # clip_max_norm, gradient clipping max norm
+
+  EVAL_FREQ: 1 # eval_freq, frequency of evaluation, default setting is evaluating in every 5 epoch
+  LOG_FREQ: 1 # log_freq, frequency of recording training.
+
+# ---------------------------------------------------------------------------- #
+# Matcher
+# ---------------------------------------------------------------------------- #
+MATCHER:
+  SET_COST_CLASS: 1. # set_cost_class, Class coefficient in the matching cost
+  SET_COST_POINT: 0.05 # set_cost_point, L1 point coefficient in the matching cost
+
+# -----------------------------------------------------------------------------
+# TEST
+# -----------------------------------------------------------------------------
+TEST:
+  THRESHOLD: 0.5
+  WEIGHT: './output/SHHA_best.pth'
diff --git a/apgcc/datasets/prepare_label.py b/apgcc/datasets/prepare_label.py
@@ -1,5 +1,5 @@
 # ref: https://github.com/TencentYoutuResearch/CrowdCounting-P2PNet/issues/8
-# cmd: python _pre_process_label.py src_path dataset output_path
+# cmd: python pre_process_label.py src_path dataset output_path
 from scipy.io import loadmat
 import os
 import argparse
@@ -111,7 +111,7 @@ def build_datalabel(root_path, dataset, output_path):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("root_path", help="Root path for shanghai dataset")
-    parser.add_argument("dataset", help="dataset name: ('SHHA', 'SHHB', 'NWPU')")
+    parser.add_argument("dataset", choices=['SHHA', 'SHHB', 'NWPU'], help="dataset name: ('SHHA', 'SHHB', 'NWPU')")
     parser.add_argument("output_path", help="Path to store results")
     args = parser.parse_args()
     build_datalabel(args.root_path, args.dataset, args.output_path)
diff --git a/apgcc/requirements.txt b/apgcc/requirements.txt
diff --git a/apgcc/test.sh b/apgcc/test.sh
@@ -1 +1 @@
-python main.py -t -c /mnt/191/c/CrowdCounting/ECCV_APGCC/APGCC/output/SHHA_test.yml TEST.THRESHOLD 0.5 VIS False
+python main.py -t -c ./configs/SHHA_test.yml TEST.WEIGHT './output/SHHA_best.pth' OUTPUT_DIR ./output/ TEST.THRESHOLD 0.5
diff --git a/apgcc/train.sh b/apgcc/train.sh
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		python main.py -t -c /mnt/191/c/CrowdCounting/ECCV_APGCC/APGCC/output/SHHA_test.yml TEST.THRESHOLD 0.5 VIS False
		python main.py -t -c ./configs/SHHA_test.yml TEST.WEIGHT './output/SHHA_best.pth' OUTPUT_DIR ./output/ TEST.THRESHOLD 0.5