[Enhance] Support visualization of multi-modality 3D detector using m…

…ulti-view images (#2488) * init commit * add demo image * support wait-time in hook * add demo of bevfusion * polish docs * more smooth multi-modal vis * fix visualization.md * support depth vis adaptively
open-mmlab · May 10, 2023 · 35fd839 · 35fd839
1 parent 1f0aeba
commit 35fd839
Show file tree

Hide file tree

Showing 17 changed files with 267 additions and 110 deletions.
diff --git a/demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl b/demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl
diff --git a/...ata/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK_LEFT__1532402927647423.jpg b/...ata/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK_LEFT__1532402927647423.jpg
diff --git a/...ta/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK_RIGHT__1532402927627893.jpg b/...ta/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_BACK_RIGHT__1532402927627893.jpg
diff --git a/...ta/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_FRONT_LEFT__1532402927604844.jpg b/...ta/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_FRONT_LEFT__1532402927604844.jpg
diff --git a/...a/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_FRONT_RIGHT__1532402927620339.jpg b/...a/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_FRONT_RIGHT__1532402927620339.jpg
diff --git a/demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg b/demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg
diff --git a/demo/multi_modality_demo.py b/demo/multi_modality_demo.py
@@ -49,8 +49,15 @@ def main(args):
  result, data = inference_multi_modality_detector(model, args.pcd, args.img,
  args.ann, args.cam_type)
  points = data['inputs']['points']
- img = mmcv.imread(args.img)
- img = mmcv.imconvert(img, 'bgr', 'rgb')
+ if isinstance(result.img_path, list):
+ img = []
+ for img_path in result.img_path:
+ single_img = mmcv.imread(img_path)
+ single_img = mmcv.imconvert(single_img, 'bgr', 'rgb')
+ img.append(single_img)
+ else:
+ img = mmcv.imread(result.img_path)
+ img = mmcv.imconvert(img, 'bgr', 'rgb')
  data_input = dict(points=points, img=img)
 
  # show the results

diff --git a/docs/en/user_guides/inference.md b/docs/en/user_guides/inference.md
@@ -78,6 +78,12 @@ Example on SUN RGB-D data using [ImVoteNet model](https://download.openmmlab.com
 python demo/multi_modality_demo.py demo/data/sunrgbd/000017.bin demo/data/sunrgbd/000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py ${CHECKPOINT_FILE} --cam-type CAM0 --show --score-thr 0.6
 ```
 
+Example on NuScenes data using [BEVFusion model](https://drive.google.com/file/d/1QkvbYDk4G2d6SZoeJqish13qSyXA4lp3/view?usp=share_link):
+
+```shell
+python demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show
+```
+
 ### 3D Segmentation
 
 To test a 3D segmentor on point cloud data, simply run:

diff --git a/docs/en/user_guides/visualization.md b/docs/en/user_guides/visualization.md
@@ -42,18 +42,19 @@ We support drawing 3D boxes on point cloud by using `draw_bboxes_3d`.
 
 ```python
 import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 from mmdet3d.structures import LiDARInstance3DBoxes
 
-points = np.fromfile('tests/data/kitti/training/velodyne/000000.bin', dtype=np.float32)
+points = np.fromfile('demo/data/kitti/000008.bin', dtype=np.float32)
 points = points.reshape(-1, 4)
 visualizer = Det3DLocalVisualizer()
 # set point cloud in visualizer
 visualizer.set_points(points)
-bboxes_3d = LiDARInstance3DBoxes(torch.tensor(
-  [[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
- -1.5808]])),
+bboxes_3d = LiDARInstance3DBoxes(
+ torch.tensor([[8.7314, -1.8559, -1.5997, 4.2000, 3.4800, 1.8900,
+  -1.5808]]))
 # Draw 3D bboxes
 visualizer.draw_bboxes_3d(bboxes_3d)
 visualizer.show()
@@ -92,8 +93,6 @@ visualizer.draw_proj_bboxes_3d(gt_bboxes_3d, input_meta)
 visualizer.show()
 ```
 
-![mono3d](../../../resources/mono3d.png)
-
 ### Drawing BEV Boxes
 
 We support drawing BEV boxes by using `draw_bev_bboxes`.
@@ -120,23 +119,22 @@ visualizer.draw_bev_bboxes(gt_bboxes_3d, edge_colors='orange')
 visualizer.show()
 ```
 
-<img src="../../../resources/bev.png" width = "50%" />
-
 ### Drawing 3D Semantic Mask
 
 We support draw segmentation mask via per-point colorization by using `draw_seg_mask`.
 
 ```python
-import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 
-points = np.fromfile('tests/data/s3dis/points/Area_1_office_2.bin', dtype=np.float32)
+points = np.fromfile('demo/data/sunrgbd/000017.bin', dtype=np.float32)
 points = points.reshape(-1, 3)
 visualizer = Det3DLocalVisualizer()
 mask = np.random.rand(points.shape[0], 3)
 points_with_mask = np.concatenate((points, mask), axis=-1)
 # Draw 3D points with mask
+visualizer.set_points(points, pcd_mode=2, vis_mode='add')
 visualizer.draw_seg_mask(points_with_mask)
 visualizer.show()
 ```
@@ -168,10 +166,10 @@ This allows the inference and results generation to be done in remote server and
 We also provide scripts to visualize the dataset without inference. You can use `tools/misc/browse_dataset.py` to show loaded data and ground-truth online and save them on the disk. Currently we support single-modality 3D detection and 3D segmentation on all the datasets, multi-modality 3D detection on KITTI and SUN RGB-D, as well as monocular 3D detection on nuScenes. To browse the KITTI dataset, you can run the following command:
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --output-dir ${OUTPUT_DIR}
+python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task lidar_det --output-dir ${OUTPUT_DIR}
 ```
 
-**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window.
+**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window. If you want to zoom out/in the point clouds to inspect more details, you could specify `--show-interval=0` in the command.
 
 To verify the data consistency and the effect of data augmentation, you can also add `--aug` flag to visualize the data after data augmentation using the command as below:
 
@@ -182,23 +180,23 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
 If you also want to show 2D images with 3D bounding boxes projected onto them, you need to find a config that supports multi-modality data loading, and then change the `--task` args to `multi-modality_det`. An example is showed below:
 
 ```shell
-python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
+python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_multi_modality.png)
 
 You can simply browse different datasets using different configs, e.g. visualizing the ScanNet dataset in 3D semantic segmentation task:
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_seg.png)
 
 And browsing the nuScenes dataset in monocular 3D detection task:
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_mono.png)
diff --git a/docs/zh_cn/user_guides/inference.md b/docs/zh_cn/user_guides/inference.md
@@ -78,6 +78,12 @@ python demo/multi_modality_demo.py demo/data/kitti/000008.bin demo/data/kitti/00
 python demo/multi_modality_demo.py demo/data/sunrgbd/000017.bin demo/data/sunrgbd/000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py ${CHECKPOINT_FILE} --cam-type CAM0 --show --score-thr 0.6
 ```
 
+在 NuScenes 数据上测试 [BEVFusion 模型](https://drive.google.com/file/d/1QkvbYDk4G2d6SZoeJqish13qSyXA4lp3/view?usp=share_link)
+
+```shell
+python demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show
+```
+
 ### 3D 分割
 
 在点云数据上测试 3D 分割器，运行：

diff --git a/docs/zh_cn/user_guides/visualization.md b/docs/zh_cn/user_guides/visualization.md
@@ -42,18 +42,19 @@ visualizer.show()
 
 ```python
 import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 from mmdet3d.structures import LiDARInstance3DBoxes
 
-points = np.fromfile('tests/data/kitti/training/velodyne/000000.bin', dtype=np.float32)
+points = np.fromfile('demo/data/kitti/000008.bin', dtype=np.float32)
 points = points.reshape(-1, 4)
 visualizer = Det3DLocalVisualizer()
 # set point cloud in visualizer
 visualizer.set_points(points)
-bboxes_3d = LiDARInstance3DBoxes(torch.tensor(
-  [[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
- -1.5808]])),
+bboxes_3d = LiDARInstance3DBoxes(
+ torch.tensor([[8.7314, -1.8559, -1.5997, 4.2000, 3.4800, 1.8900,
+  -1.5808]]))
 # Draw 3D bboxes
 visualizer.draw_bboxes_3d(bboxes_3d)
 visualizer.show()
@@ -92,8 +93,6 @@ visualizer.draw_proj_bboxes_3d(gt_bboxes_3d, input_meta)
 visualizer.show()
 ```
 
-![mono3d](../../../resources/mono3d.png)
-
 ### 绘制 BEV 视角的框
 
 通过使用 `draw_bev_bboxes`，我们支持绘制 BEV 视角下的框。
@@ -120,23 +119,22 @@ visualizer.draw_bev_bboxes(gt_bboxes_3d, edge_colors='orange')
 visualizer.show()
 ```
 
-<img src="../../../resources/bev.png" width = "50%" />
-
 ### 绘制 3D 分割掩码
 
 通过使用 `draw_seg_mask`，我们支持通过逐点着色来绘制分割掩码。
 
 ```python
-import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 
-points = np.fromfile('tests/data/s3dis/points/Area_1_office_2.bin', dtype=np.float32)
+points = np.fromfile('demo/data/sunrgbd/000017.bin', dtype=np.float32)
 points = points.reshape(-1, 3)
 visualizer = Det3DLocalVisualizer()
 mask = np.random.rand(points.shape[0], 3)
 points_with_mask = np.concatenate((points, mask), axis=-1)
 # Draw 3D points with mask
+visualizer.set_points(points, pcd_mode=2, vis_mode='add')
 visualizer.draw_seg_mask(points_with_mask)
 visualizer.show()
 ```
@@ -171,7 +169,7 @@ python tools/misc/visualize_results.py ${CONFIG_FILE} --result ${RESULTS_PATH} -
 python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task lidar_det --output-dir ${OUTPUT_DIR}
 ```
 
-**注意**：一旦指定了 `--output-dir`，当在 open3d 窗口中按下 `_ESC_` 时，用户指定的视图图像将会被保存下来。
+**注意**：一旦指定了 `--output-dir`，当在 open3d 窗口中按下 `_ESC_` 时，用户指定的视图图像将会被保存下来。如果你想要对点云进行缩放操作以观察更多细节， 你可以在命令中指定 `--show-interval=0`。
 
 为了验证数据的一致性和数据增强的效果，你可以加上 `--aug` 来可视化数据增强后的数据，指令如下所示：
 
@@ -182,23 +180,23 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
 如果你想显示带有投影的 3D 边界框的 2D 图像，你需要一个支持多模态数据加载的配置文件，并将 `--task` 参数改为 `multi-modality_det`。示例如下：
 
 ```shell
-python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
+python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_multi_modality.png)
 
 你可以使用不同的配置浏览不同的数据集，例如在 3D 语义分割任务中可视化 ScanNet 数据集：
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_seg.png)
 
 在单目 3D 检测任务中浏览 nuScenes 数据集：
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_mono.png)
diff --git a/mmdet3d/apis/inference.py b/mmdet3d/apis/inference.py
@@ -188,10 +188,10 @@ def inference_multi_modality_detector(model: nn.Module,
  imgs (str, Sequence[str]):
  Either image files or loaded images.
  ann_file (str, Sequence[str]): Annotation files.
- cam_type (str): Image of Camera chose to infer.
- For kitti dataset, it should be 'CAM2',
- and for nuscenes dataset, it should be
- 'CAM_FRONT'. Defaults to 'CAM_FRONT'.
+ cam_type (str): Image of Camera chose to infer. When detector only uses
+ single-view image, we need to specify a camera view. For kitti
+ dataset, it should be 'CAM2'. For sunrgbd, it should be 'CAM0'.
+ When detector uses multi-view images, we should set it to 'all'.
 
  Returns:
  :obj:`Det3DDataSample` or list[:obj:`Det3DDataSample`]:
@@ -220,37 +220,51 @@ def inference_multi_modality_detector(model: nn.Module,
  data = []
  for index, pcd in enumerate(pcds):
  # get data info containing calib
- img = imgs[index]
  data_info = data_list[index]
- img_path = data_info['images'][cam_type]['img_path']
-
- if osp.basename(img_path) != osp.basename(img):
- raise ValueError(f'the info file of {img_path} is not provided.')
+ img = imgs[index]
 
- # TODO: check the name consistency of
- # image file and point cloud file
- # TODO: support multi-view image loading
- data_ = dict(
- lidar_points=dict(lidar_path=pcd),
- img_path=img,
- box_type_3d=box_type_3d,
- box_mode_3d=box_mode_3d)
+ if cam_type != 'all':
+ assert osp.isfile(img), f'{img} must be a file.'
+ img_path = data_info['images'][cam_type]['img_path']
+ if osp.basename(img_path) != osp.basename(img):
+ raise ValueError(
+ f'the info file of {img_path} is not provided.')
+ data_ = dict(
+ lidar_points=dict(lidar_path=pcd),
+ img_path=img,
+ box_type_3d=box_type_3d,
+ box_mode_3d=box_mode_3d)
+ data_info['images'][cam_type]['img_path'] = img
+ if 'cam2img' in data_info['images'][cam_type]:
+ # The data annotation in SRUNRGBD dataset does not contain
+ # `cam2img`
+ data_['cam2img'] = np.array(
+ data_info['images'][cam_type]['cam2img'])
+
+ # LiDAR to image conversion for KITTI dataset
+ if box_mode_3d == Box3DMode.LIDAR:
+ if 'lidar2img' in data_info['images'][cam_type]:
+ data_['lidar2img'] = np.array(
+ data_info['images'][cam_type]['lidar2img'])
+ # Depth to image conversion for SUNRGBD dataset
+ elif box_mode_3d == Box3DMode.DEPTH:
+ data_['depth2img'] = np.array(
+ data_info['images'][cam_type]['depth2img'])
+ else:
+ assert osp.isdir(img), f'{img} must be a file directory'
+ for _, img_info in data_info['images'].items():
+ img_info['img_path'] = osp.join(img, img_info['img_path'])
+ assert osp.isfile(img_info['img_path']
+ ), f'{img_info["img_path"]} does not exist.'
+ data_ = dict(
+ lidar_points=dict(lidar_path=pcd),
+ images=data_info['images'],
+ box_type_3d=box_type_3d,
+ box_mode_3d=box_mode_3d)
 
- data_info['images'][cam_type]['img_path'] = img
- if 'cam2img' in data_info['images'][cam_type]:
- # The data annotation in SRUNRGBD dataset does not contain
- # `cam2img`
- data_['cam2img'] = np.array(
- data_info['images'][cam_type]['cam2img'])
-
- # LiDAR to image conversion for KITTI dataset
- if box_mode_3d == Box3DMode.LIDAR:
- data_['lidar2img'] = np.array(
- data_info['images'][cam_type]['lidar2img'])
- # Depth to image conversion for SUNRGBD dataset
- elif box_mode_3d == Box3DMode.DEPTH:
- data_['depth2img'] = np.array(
- data_info['images'][cam_type]['depth2img'])
+ if 'timestamp' in data_info:
+ # Using multi-sweeps need `timestamp`
+ data_['timestamp'] = data_info['timestamp']
 
  data_ = test_pipeline(data_)
  data.append(data_)

diff --git a/mmdet3d/engine/hooks/visualization_hook.py b/mmdet3d/engine/hooks/visualization_hook.py
@@ -102,8 +102,17 @@ def after_val_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
  ]:
  assert 'img_path' in outputs[0], 'img_path is not in outputs[0]'
  img_path = outputs[0].img_path
- img_bytes = get(img_path, backend_args=self.backend_args)
- img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
+ if isinstance(img_path, list):
+ img = []
+ for single_img_path in img_path:
+ img_bytes = get(
+ single_img_path, backend_args=self.backend_args)
+ single_img = mmcv.imfrombytes(
+ img_bytes, channel_order='rgb')
+ img.append(single_img)
+ else:
+ img_bytes = get(img_path, backend_args=self.backend_args)
+ img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
  data_input['img'] = img
 
  if self.vis_task in ['lidar_det', 'multi-modality_det', 'lidar_seg']:
@@ -161,10 +170,21 @@ def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
  assert 'img_path' in data_sample, \
  'img_path is not in data_sample'
  img_path = data_sample.img_path
- img_bytes = get(img_path, backend_args=self.backend_args)
- img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
+ if isinstance(img_path, list):
+ img = []
+ for single_img_path in img_path:
+ img_bytes = get(
+ single_img_path, backend_args=self.backend_args)
+ single_img = mmcv.imfrombytes(
+ img_bytes, channel_order='rgb')
+ img.append(single_img)
+ else:
+ img_bytes = get(img_path, backend_args=self.backend_args)
+ img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
  data_input['img'] = img
  if self.test_out_dir is not None:
+ if isinstance(img_path, list):
+ img_path = img_path[0]
  out_file = osp.basename(img_path)
  out_file = osp.join(self.test_out_dir, out_file)