Return heatmaps and backbone features during inference (#212)

* Added optional support to return heatmaps and backbone features during inference of top-down pose model * Added hooks for extracting layer outputs defined by name (and heatmap) * Fixed docstring * return heatmap condition fix * Docstring fixes * Docstring fixes * Docstring fixes * 'image' call fix * Fixed linting errors * fixed top down demo * fixed evaluate func * Fixed evaluate func * Fixed json loader in tests * Cleanup * try catch fix * try catch fix * Fixed model forward_test return signature to return heatmap * Deleted test file * Added .npy to ignore list * Optimized layer output hooks logic * Decoupled heatmap from output layer names. * Refactored mmpose.utils.hooks * Fixed mmpose.utils.hooks * Fixed output_layer_names default in top down demos Co-authored-by: OWAL Admin <janderoot@owal.io>
open-mmlab · Nov 13, 2020 · 3bf19c6 · 3bf19c6
1 parent fc8ceb9
commit 3bf19c6
Show file tree

Hide file tree

Showing 10 changed files with 162 additions and 42 deletions.
diff --git a/.gitignore b/.gitignore
@@ -113,6 +113,7 @@ mmpose/version.py
 *.pkl
 *.pkl.json
 *.log.json
+*.npy
 work_dirs/
 
 # Pytorch

diff --git a/demo/top_down_img_demo.py b/demo/top_down_img_demo.py
@@ -50,6 +50,12 @@ def main():
 
     img_keys = list(coco.imgs.keys())
 
+    # optional
+    return_heatmap = False
+
+    # e.g. use ('backbone', ) to return backbone feature
+    output_layer_names = None
+
     # process each image
     for i in range(len(img_keys)):
         # get bounding box annotations
@@ -66,13 +72,16 @@ def main():
             bbox = ann['bbox']
             person_bboxes.append(bbox)
 
-        # test a single image, with a list of bboxes.
-        pose_results, heatmaps = inference_top_down_pose_model(
+        # test a single image, with a list of bboxes
+        pose_results, returned_outputs = inference_top_down_pose_model(
             pose_model,
             image_name,
             person_bboxes,
+            bbox_thr=args.bbox_thr,
             format='xywh',
-            dataset=dataset)
+            dataset=dataset,
+            return_heatmap=return_heatmap,
+            outputs=output_layer_names)
 
         if args.out_img_root == '':
             out_file = None

diff --git a/demo/top_down_img_demo_with_mmdet.py b/demo/top_down_img_demo_with_mmdet.py
@@ -78,13 +78,22 @@ def main():
     person_bboxes = process_mmdet_results(mmdet_results)
 
     # test a single image, with a list of bboxes.
-    pose_results, heatmaps = inference_top_down_pose_model(
+
+    # optional
+    return_heatmap = False
+
+    # e.g. use ('backbone', ) to return backbone feature
+    output_layer_names = None
+
+    pose_results, returned_outputs = inference_top_down_pose_model(
         pose_model,
         image_name,
         person_bboxes,
         bbox_thr=args.bbox_thr,
         format='xyxy',
-        dataset=dataset)
+        dataset=dataset,
+        return_heatmap=return_heatmap,
+        outputs=output_layer_names)
 
     if args.out_img_root == '':
         out_file = None

diff --git a/demo/top_down_video_demo_with_mmdet.py b/demo/top_down_video_demo_with_mmdet.py
@@ -85,6 +85,12 @@ def main():
                          f'vis_{os.path.basename(args.video_path)}'), fourcc,
             fps, size)
 
+    # optional
+    return_heatmap = False
+
+    # e.g. use ('backbone', ) to return backbone feature
+    output_layer_names = None
+
     while (cap.isOpened()):
         flag, img = cap.read()
         if not flag:
@@ -96,13 +102,15 @@ def main():
         person_bboxes = process_mmdet_results(mmdet_results)
 
         # test a single image, with a list of bboxes.
-        pose_results, heatmaps = inference_top_down_pose_model(
+        pose_results, returned_outputs = inference_top_down_pose_model(
             pose_model,
             img,
             person_bboxes,
             bbox_thr=args.bbox_thr,
             format='xyxy',
-            dataset=dataset)
+            dataset=dataset,
+            return_heatmap=return_heatmap,
+            outputs=output_layer_names)
 
         # show the results
         vis_img = vis_pose_result(

diff --git a/mmpose/apis/inference.py b/mmpose/apis/inference.py
@@ -8,6 +8,7 @@
 
 from mmpose.datasets.pipelines import Compose
 from mmpose.models import build_posenet
+from mmpose.utils.hooks import OutputHook
 
 os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
 
@@ -132,7 +133,11 @@ def __call__(self, results):
         return results
 
 
-def _inference_single_pose_model(model, img_or_path, bbox, dataset):
+def _inference_single_pose_model(model,
+                                 img_or_path,
+                                 bbox,
+                                 dataset,
+                                 return_heatmap=False):
     """Inference a single bbox.
 
     num_keypoints: K
@@ -143,11 +148,14 @@ def _inference_single_pose_model(model, img_or_path, bbox, dataset):
         bbox (list | np.ndarray): Bounding boxes (with scores),
             shaped (4, ) or (5, ). (left, top, width, height, [score])
         dataset (str): Dataset name.
+        outputs (list[str] | tuple[str]): Names of layers whose output is
+            to be returned, default: None
 
     Returns:
         ndarray[Kx3]: Predicted pose x, y, score.
-        ndarray[NxKxHxW]: Model output heatmap.
+        heatmap[N, K, H, W]: Model output heatmap.
     """
+
     cfg = model.cfg
     device = next(model.parameters()).device
 
@@ -226,8 +234,10 @@ def _inference_single_pose_model(model, img_or_path, bbox, dataset):
     # forward the model
     with torch.no_grad():
         all_preds, _, _, heatmap = model(
-            return_loss=False, img=data['img'], img_metas=data['img_metas'])
-
+            return_loss=False,
+            return_heatmap=return_heatmap,
+            img=data['img'],
+            img_metas=data['img_metas'])
     return all_preds[0], heatmap
 
 
@@ -236,7 +246,9 @@ def inference_top_down_pose_model(model,
                                   person_bboxes,
                                   bbox_thr=None,
                                   format='xywh',
-                                  dataset='TopDownCocoDataset'):
+                                  dataset='TopDownCocoDataset',
+                                  return_heatmap=False,
+                                  outputs=None):
     """Inference a single image with a list of person bounding boxes.
 
     num_people: P
@@ -255,38 +267,53 @@ def inference_top_down_pose_model(model,
             'xyxy' means (left, top, right, bottom),
             'xywh' means (left, top, width, height).
         dataset (str): Dataset name, e.g. 'TopDownCocoDataset'.
+        return_heatmap (bool) : Flag to return heatmap, default: False
+        outputs (list(str) | tuple(str)) : Names of layers whose outputs
+            need to be returned, default: None
 
     Returns:
-        list[dict]: The bbox & pose info.
-
+        list[dict]: The bbox & pose info,
             Each item in the list is a dictionary,
             containing the bbox: (left, top, right, bottom, [score])
             and the pose (ndarray[Kx3]): x, y, score
+        list[dict[np.ndarray[N, K, H, W] | torch.tensor[N, K, H, W]]]:
+            Output feature maps from layers specified in `outputs`.
+            Includes 'heatmap' if `return_heatmap` is True.
     """
     # only two kinds of bbox format is supported.
     assert format in ['xyxy', 'xywh']
     # transform the bboxes format to xywh
     if format == 'xyxy':
         person_bboxes = _xyxy2xywh(np.array(person_bboxes))
+
     pose_results = []
-    heatmaps = []
+    returned_outputs = []
 
     if len(person_bboxes) > 0:
         if bbox_thr is not None:
             person_bboxes = person_bboxes[person_bboxes[:, 4] > bbox_thr]
-        for bbox in person_bboxes:
-            pose, heatmap = _inference_single_pose_model(
-                model, img_or_path, bbox, dataset)
-            pose_results.append({
-                'bbox':
-                _xywh2xyxy(np.expand_dims(np.array(bbox), 0)),
-                'keypoints':
-                pose,
-            })
-
-            heatmaps.append(heatmap)
-
-    return pose_results, heatmaps
+
+        with OutputHook(model, outputs=outputs, as_tensor=True) as h:
+            for bbox in person_bboxes:
+                pose, heatmap = _inference_single_pose_model(
+                    model,
+                    img_or_path,
+                    bbox,
+                    dataset,
+                    return_heatmap=return_heatmap)
+
+                if return_heatmap:
+                    h.layer_outputs['heatmap'] = heatmap
+
+                returned_outputs.append(h.layer_outputs)
+                pose_results.append({
+                    'bbox':
+                    _xywh2xyxy(np.expand_dims(np.array(bbox), 0)),
+                    'keypoints':
+                    pose
+                })
+
+    return pose_results, returned_outputs
 
 
 def inference_bottom_up_pose_model(model, img_or_path):

diff --git a/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py b/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py
@@ -300,16 +300,15 @@ def evaluate(self, outputs, res_folder, metric='mAP', **kwargs):
             heatmap width: W
 
         Args:
-            outputs (list(preds, boxes, image_path, output_heatmap))
+            outputs (list(preds, boxes, image_path, heatmap))
                 :preds (np.ndarray[1,K,3]): The first two dimensions are
                     coordinates, score is the third dimension of the array.
                 :boxes (np.ndarray[1,6]): [center[0], center[1], scale[0]
                     , scale[1],area, score]
                 :image_path (list[str]): For example, [ '/', 'v','a', 'l',
                     '2', '0', '1', '7', '/', '0', '0', '0', '0', '0',
                     '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g']
-                :output_heatmap (np.ndarray[N, K, H, W]): model outpus.
-
+                :heatmap (np.ndarray[N, K, H, W]): model output heatmap.
             res_folder (str): Path of directory to save the results.
             metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
 

diff --git a/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mpii_dataset.py
@@ -145,15 +145,15 @@ def evaluate(self, outputs, res_folder, metric='PCKh', **kwargs):
             heatmap width: W
 
         Args:
-            outputs(list(preds, boxes, image_path, output_heatmap)):
+            outputs(list(preds, boxes, image_path, heatmap)):
 
                 * preds(np.ndarray[1,K,3]): The first two dimensions are
                   coordinates, score is the third dimension of the array.
                 * boxes(np.ndarray[1,6]): [center[0], center[1], scale[0]
                   , scale[1],area, score]
                 * image_path(list[str]): For example, ['0', '0',
                   '0', '0', '0', '1', '1', '6', '3', '.', 'j', 'p', 'g']
-                * output_heatmap (np.ndarray[N, K, H, W]): model outputs.
+                * heatmap (np.ndarray[N, K, H, W]): model output heatmap.
 
             res_folder(str): Path of directory to save the results.
             metric (str | list[str]): Metrics to be performed.

diff --git a/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py b/mmpose/datasets/datasets/top_down/topdown_mpii_trb_dataset.py
@@ -149,16 +149,15 @@ def evaluate(self, outputs, res_folder, metric='PCKh', **kwargs):
             heatmap width: W
 
         Args:
-            outputs(list(preds, boxes, image_path, output_heatmap)):
+            outputs(list(preds, boxes, image_path, heatmap)):
 
                 * preds(np.ndarray[1,K,3]): The first two dimensions are
                   coordinates, score is the third dimension of the array.
                 * boxes(np.ndarray[1,6]): [center[0], center[1], scale[0]
                   , scale[1],area, score]
                 * image_path(list[str]): For example, ['0', '0',
                   '0', '0', '0', '1', '1', '6', '3', '.', 'j', 'p', 'g']
-                * output_heatmap (np.ndarray[N, K, H, W]): model outputs.
-
+                * heatmap (np.ndarray[N, K, H, W]): model output heatmap.
             res_folder(str): Path of directory to save the results.
             metric (str | list[str]): Metrics to be performed.
                 Defaults: 'PCKh'.

diff --git a/mmpose/models/detectors/top_down.py b/mmpose/models/detectors/top_down.py
@@ -63,6 +63,7 @@ def forward(self,
                 target_weight=None,
                 img_metas=None,
                 return_loss=True,
+                return_heatmap=False,
                 **kwargs):
         """Calls either forward_train or forward_test depending on whether
         return_loss=True. Note this setting will change the expected inputs.
@@ -94,16 +95,19 @@ def forward(self,
                 - "bbox_score": score of bbox
             return_loss (bool): Option to `return loss`. `return loss=True`
                 for training, `return loss=False` for validation & test.
+            return_heatmap (bool) : Option to return heatmap.
 
         Returns:
             dict|tuple: if `return loss` is true, then return losses.
-              Otherwise, return predicted poses, boxes and image paths.
+              Otherwise, return predicted poses, boxes, image paths
+                  and heatmaps.
         """
         if return_loss:
             return self.forward_train(img, target, target_weight, img_metas,
                                       **kwargs)
         else:
-            return self.forward_test(img, img_metas, **kwargs)
+            return self.forward_test(
+                img, img_metas, return_heatmap=return_heatmap, **kwargs)
 
     def forward_train(self, img, target, target_weight, img_metas, **kwargs):
         """Defines the computation performed at every call when training."""
@@ -174,16 +178,25 @@ def forward_train(self, img, target, target_weight, img_metas, **kwargs):
 
         return losses
 
-    def forward_test(self, img, img_metas, **kwargs):
+    def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
         """Defines the computation performed at every call when testing."""
         assert img.size(0) == 1
         assert len(img_metas) == 1
-
         img_metas = img_metas[0]
 
-        flip_pairs = img_metas['flip_pairs']
-        # compute output
+        # compute backbone features
         output = self.backbone(img)
+
+        # process head
+        all_preds, all_boxes, image_path, heatmap = self.process_head(
+            output, img, img_metas, return_heatmap=return_heatmap)
+
+        return all_preds, all_boxes, image_path, heatmap
+
+    def process_head(self, output, img, img_metas, return_heatmap=False):
+        """Process heatmap and keypoints from backbone features."""
+        flip_pairs = img_metas['flip_pairs']
+
         if self.with_keypoint:
             output = self.keypoint_head(output)
 
@@ -234,6 +247,9 @@ def forward_test(self, img, img_metas, **kwargs):
         all_boxes[0, 5] = score
         image_path.extend(img_metas['image_file'])
 
+        if not return_heatmap:
+            output_heatmap = None
+
         return all_preds, all_boxes, image_path, output_heatmap
 
     def show_result(self,