Skip to content

Commit

Permalink
[Enhance] Support visualization of multi-modality 3D detector using m…
Browse files Browse the repository at this point in the history
…ulti-view images (#2488)

* init commit

* add demo image

* support wait-time in hook

* add demo of bevfusion

* polish docs

* more smooth multi-modal vis

* fix visualization.md

* support depth vis adaptively
  • Loading branch information
JingweiZhang12 authored May 10, 2023
1 parent 1f0aeba commit 35fd839
Show file tree
Hide file tree
Showing 17 changed files with 267 additions and 110 deletions.
Binary file modified demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 9 additions & 2 deletions demo/multi_modality_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,15 @@ def main(args):
result, data = inference_multi_modality_detector(model, args.pcd, args.img,
args.ann, args.cam_type)
points = data['inputs']['points']
img = mmcv.imread(args.img)
img = mmcv.imconvert(img, 'bgr', 'rgb')
if isinstance(result.img_path, list):
img = []
for img_path in result.img_path:
single_img = mmcv.imread(img_path)
single_img = mmcv.imconvert(single_img, 'bgr', 'rgb')
img.append(single_img)
else:
img = mmcv.imread(result.img_path)
img = mmcv.imconvert(img, 'bgr', 'rgb')
data_input = dict(points=points, img=img)

# show the results
Expand Down
6 changes: 6 additions & 0 deletions docs/en/user_guides/inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ Example on SUN RGB-D data using [ImVoteNet model](https://download.openmmlab.com
python demo/multi_modality_demo.py demo/data/sunrgbd/000017.bin demo/data/sunrgbd/000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py ${CHECKPOINT_FILE} --cam-type CAM0 --show --score-thr 0.6
```

Example on NuScenes data using [BEVFusion model](https://drive.google.com/file/d/1QkvbYDk4G2d6SZoeJqish13qSyXA4lp3/view?usp=share_link):

```shell
python demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show
```

### 3D Segmentation

To test a 3D segmentor on point cloud data, simply run:
Expand Down
28 changes: 13 additions & 15 deletions docs/en/user_guides/visualization.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,19 @@ We support drawing 3D boxes on point cloud by using `draw_bboxes_3d`.

```python
import torch
import numpy as np

from mmdet3d.visualization import Det3DLocalVisualizer
from mmdet3d.structures import LiDARInstance3DBoxes

points = np.fromfile('tests/data/kitti/training/velodyne/000000.bin', dtype=np.float32)
points = np.fromfile('demo/data/kitti/000008.bin', dtype=np.float32)
points = points.reshape(-1, 4)
visualizer = Det3DLocalVisualizer()
# set point cloud in visualizer
visualizer.set_points(points)
bboxes_3d = LiDARInstance3DBoxes(torch.tensor(
[[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
-1.5808]])),
bboxes_3d = LiDARInstance3DBoxes(
torch.tensor([[8.7314, -1.8559, -1.5997, 4.2000, 3.4800, 1.8900,
-1.5808]]))
# Draw 3D bboxes
visualizer.draw_bboxes_3d(bboxes_3d)
visualizer.show()
Expand Down Expand Up @@ -92,8 +93,6 @@ visualizer.draw_proj_bboxes_3d(gt_bboxes_3d, input_meta)
visualizer.show()
```

![mono3d](../../../resources/mono3d.png)

### Drawing BEV Boxes

We support drawing BEV boxes by using `draw_bev_bboxes`.
Expand All @@ -120,23 +119,22 @@ visualizer.draw_bev_bboxes(gt_bboxes_3d, edge_colors='orange')
visualizer.show()
```

<img src="../../../resources/bev.png" width = "50%" />

### Drawing 3D Semantic Mask

We support draw segmentation mask via per-point colorization by using `draw_seg_mask`.

```python
import torch
import numpy as np

from mmdet3d.visualization import Det3DLocalVisualizer

points = np.fromfile('tests/data/s3dis/points/Area_1_office_2.bin', dtype=np.float32)
points = np.fromfile('demo/data/sunrgbd/000017.bin', dtype=np.float32)
points = points.reshape(-1, 3)
visualizer = Det3DLocalVisualizer()
mask = np.random.rand(points.shape[0], 3)
points_with_mask = np.concatenate((points, mask), axis=-1)
# Draw 3D points with mask
visualizer.set_points(points, pcd_mode=2, vis_mode='add')
visualizer.draw_seg_mask(points_with_mask)
visualizer.show()
```
Expand Down Expand Up @@ -168,10 +166,10 @@ This allows the inference and results generation to be done in remote server and
We also provide scripts to visualize the dataset without inference. You can use `tools/misc/browse_dataset.py` to show loaded data and ground-truth online and save them on the disk. Currently we support single-modality 3D detection and 3D segmentation on all the datasets, multi-modality 3D detection on KITTI and SUN RGB-D, as well as monocular 3D detection on nuScenes. To browse the KITTI dataset, you can run the following command:

```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --output-dir ${OUTPUT_DIR}
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task lidar_det --output-dir ${OUTPUT_DIR}
```

**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window.
**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window. If you want to zoom out/in the point clouds to inspect more details, you could specify `--show-interval=0` in the command.

To verify the data consistency and the effect of data augmentation, you can also add `--aug` flag to visualize the data after data augmentation using the command as below:

Expand All @@ -182,23 +180,23 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
If you also want to show 2D images with 3D bounding boxes projected onto them, you need to find a config that supports multi-modality data loading, and then change the `--task` args to `multi-modality_det`. An example is showed below:

```shell
python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
```

![](../../../resources/browse_dataset_multi_modality.png)

You can simply browse different datasets using different configs, e.g. visualizing the ScanNet dataset in 3D semantic segmentation task:

```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR}
```

![](../../../resources/browse_dataset_seg.png)

And browsing the nuScenes dataset in monocular 3D detection task:

```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR}
```

![](../../../resources/browse_dataset_mono.png)
6 changes: 6 additions & 0 deletions docs/zh_cn/user_guides/inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ python demo/multi_modality_demo.py demo/data/kitti/000008.bin demo/data/kitti/00
python demo/multi_modality_demo.py demo/data/sunrgbd/000017.bin demo/data/sunrgbd/000017.jpg demo/data/sunrgbd/sunrgbd_000017_infos.pkl configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py ${CHECKPOINT_FILE} --cam-type CAM0 --show --score-thr 0.6
```

在 NuScenes 数据上测试 [BEVFusion 模型](https://drive.google.com/file/d/1QkvbYDk4G2d6SZoeJqish13qSyXA4lp3/view?usp=share_link)

```shell
python demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show
```

### 3D 分割

在点云数据上测试 3D 分割器,运行:
Expand Down
26 changes: 12 additions & 14 deletions docs/zh_cn/user_guides/visualization.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,19 @@ visualizer.show()

```python
import torch
import numpy as np

from mmdet3d.visualization import Det3DLocalVisualizer
from mmdet3d.structures import LiDARInstance3DBoxes

points = np.fromfile('tests/data/kitti/training/velodyne/000000.bin', dtype=np.float32)
points = np.fromfile('demo/data/kitti/000008.bin', dtype=np.float32)
points = points.reshape(-1, 4)
visualizer = Det3DLocalVisualizer()
# set point cloud in visualizer
visualizer.set_points(points)
bboxes_3d = LiDARInstance3DBoxes(torch.tensor(
[[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
-1.5808]])),
bboxes_3d = LiDARInstance3DBoxes(
torch.tensor([[8.7314, -1.8559, -1.5997, 4.2000, 3.4800, 1.8900,
-1.5808]]))
# Draw 3D bboxes
visualizer.draw_bboxes_3d(bboxes_3d)
visualizer.show()
Expand Down Expand Up @@ -92,8 +93,6 @@ visualizer.draw_proj_bboxes_3d(gt_bboxes_3d, input_meta)
visualizer.show()
```

![mono3d](../../../resources/mono3d.png)

### 绘制 BEV 视角的框

通过使用 `draw_bev_bboxes`,我们支持绘制 BEV 视角下的框。
Expand All @@ -120,23 +119,22 @@ visualizer.draw_bev_bboxes(gt_bboxes_3d, edge_colors='orange')
visualizer.show()
```

<img src="../../../resources/bev.png" width = "50%" />

### 绘制 3D 分割掩码

通过使用 `draw_seg_mask`,我们支持通过逐点着色来绘制分割掩码。

```python
import torch
import numpy as np

from mmdet3d.visualization import Det3DLocalVisualizer

points = np.fromfile('tests/data/s3dis/points/Area_1_office_2.bin', dtype=np.float32)
points = np.fromfile('demo/data/sunrgbd/000017.bin', dtype=np.float32)
points = points.reshape(-1, 3)
visualizer = Det3DLocalVisualizer()
mask = np.random.rand(points.shape[0], 3)
points_with_mask = np.concatenate((points, mask), axis=-1)
# Draw 3D points with mask
visualizer.set_points(points, pcd_mode=2, vis_mode='add')
visualizer.draw_seg_mask(points_with_mask)
visualizer.show()
```
Expand Down Expand Up @@ -171,7 +169,7 @@ python tools/misc/visualize_results.py ${CONFIG_FILE} --result ${RESULTS_PATH} -
python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task lidar_det --output-dir ${OUTPUT_DIR}
```

**注意**:一旦指定了 `--output-dir`,当在 open3d 窗口中按下 `_ESC_` 时,用户指定的视图图像将会被保存下来。
**注意**:一旦指定了 `--output-dir`,当在 open3d 窗口中按下 `_ESC_` 时,用户指定的视图图像将会被保存下来。如果你想要对点云进行缩放操作以观察更多细节, 你可以在命令中指定 `--show-interval=0`

为了验证数据的一致性和数据增强的效果,你可以加上 `--aug` 来可视化数据增强后的数据,指令如下所示:

Expand All @@ -182,23 +180,23 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
如果你想显示带有投影的 3D 边界框的 2D 图像,你需要一个支持多模态数据加载的配置文件,并将 `--task` 参数改为 `multi-modality_det`。示例如下:

```shell
python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
```

![](../../../resources/browse_dataset_multi_modality.png)

你可以使用不同的配置浏览不同的数据集,例如在 3D 语义分割任务中可视化 ScanNet 数据集:

```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR}
```

![](../../../resources/browse_dataset_seg.png)

在单目 3D 检测任务中浏览 nuScenes 数据集:

```shell
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR}
```

![](../../../resources/browse_dataset_mono.png)
78 changes: 46 additions & 32 deletions mmdet3d/apis/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ def inference_multi_modality_detector(model: nn.Module,
imgs (str, Sequence[str]):
Either image files or loaded images.
ann_file (str, Sequence[str]): Annotation files.
cam_type (str): Image of Camera chose to infer.
For kitti dataset, it should be 'CAM2',
and for nuscenes dataset, it should be
'CAM_FRONT'. Defaults to 'CAM_FRONT'.
cam_type (str): Image of Camera chose to infer. When detector only uses
single-view image, we need to specify a camera view. For kitti
dataset, it should be 'CAM2'. For sunrgbd, it should be 'CAM0'.
When detector uses multi-view images, we should set it to 'all'.
Returns:
:obj:`Det3DDataSample` or list[:obj:`Det3DDataSample`]:
Expand Down Expand Up @@ -220,37 +220,51 @@ def inference_multi_modality_detector(model: nn.Module,
data = []
for index, pcd in enumerate(pcds):
# get data info containing calib
img = imgs[index]
data_info = data_list[index]
img_path = data_info['images'][cam_type]['img_path']

if osp.basename(img_path) != osp.basename(img):
raise ValueError(f'the info file of {img_path} is not provided.')
img = imgs[index]

# TODO: check the name consistency of
# image file and point cloud file
# TODO: support multi-view image loading
data_ = dict(
lidar_points=dict(lidar_path=pcd),
img_path=img,
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d)
if cam_type != 'all':
assert osp.isfile(img), f'{img} must be a file.'
img_path = data_info['images'][cam_type]['img_path']
if osp.basename(img_path) != osp.basename(img):
raise ValueError(
f'the info file of {img_path} is not provided.')
data_ = dict(
lidar_points=dict(lidar_path=pcd),
img_path=img,
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d)
data_info['images'][cam_type]['img_path'] = img
if 'cam2img' in data_info['images'][cam_type]:
# The data annotation in SRUNRGBD dataset does not contain
# `cam2img`
data_['cam2img'] = np.array(
data_info['images'][cam_type]['cam2img'])

# LiDAR to image conversion for KITTI dataset
if box_mode_3d == Box3DMode.LIDAR:
if 'lidar2img' in data_info['images'][cam_type]:
data_['lidar2img'] = np.array(
data_info['images'][cam_type]['lidar2img'])
# Depth to image conversion for SUNRGBD dataset
elif box_mode_3d == Box3DMode.DEPTH:
data_['depth2img'] = np.array(
data_info['images'][cam_type]['depth2img'])
else:
assert osp.isdir(img), f'{img} must be a file directory'
for _, img_info in data_info['images'].items():
img_info['img_path'] = osp.join(img, img_info['img_path'])
assert osp.isfile(img_info['img_path']
), f'{img_info["img_path"]} does not exist.'
data_ = dict(
lidar_points=dict(lidar_path=pcd),
images=data_info['images'],
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d)

data_info['images'][cam_type]['img_path'] = img
if 'cam2img' in data_info['images'][cam_type]:
# The data annotation in SRUNRGBD dataset does not contain
# `cam2img`
data_['cam2img'] = np.array(
data_info['images'][cam_type]['cam2img'])

# LiDAR to image conversion for KITTI dataset
if box_mode_3d == Box3DMode.LIDAR:
data_['lidar2img'] = np.array(
data_info['images'][cam_type]['lidar2img'])
# Depth to image conversion for SUNRGBD dataset
elif box_mode_3d == Box3DMode.DEPTH:
data_['depth2img'] = np.array(
data_info['images'][cam_type]['depth2img'])
if 'timestamp' in data_info:
# Using multi-sweeps need `timestamp`
data_['timestamp'] = data_info['timestamp']

data_ = test_pipeline(data_)
data.append(data_)
Expand Down
28 changes: 24 additions & 4 deletions mmdet3d/engine/hooks/visualization_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,17 @@ def after_val_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
]:
assert 'img_path' in outputs[0], 'img_path is not in outputs[0]'
img_path = outputs[0].img_path
img_bytes = get(img_path, backend_args=self.backend_args)
img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
if isinstance(img_path, list):
img = []
for single_img_path in img_path:
img_bytes = get(
single_img_path, backend_args=self.backend_args)
single_img = mmcv.imfrombytes(
img_bytes, channel_order='rgb')
img.append(single_img)
else:
img_bytes = get(img_path, backend_args=self.backend_args)
img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
data_input['img'] = img

if self.vis_task in ['lidar_det', 'multi-modality_det', 'lidar_seg']:
Expand Down Expand Up @@ -161,10 +170,21 @@ def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
assert 'img_path' in data_sample, \
'img_path is not in data_sample'
img_path = data_sample.img_path
img_bytes = get(img_path, backend_args=self.backend_args)
img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
if isinstance(img_path, list):
img = []
for single_img_path in img_path:
img_bytes = get(
single_img_path, backend_args=self.backend_args)
single_img = mmcv.imfrombytes(
img_bytes, channel_order='rgb')
img.append(single_img)
else:
img_bytes = get(img_path, backend_args=self.backend_args)
img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
data_input['img'] = img
if self.test_out_dir is not None:
if isinstance(img_path, list):
img_path = img_path[0]
out_file = osp.basename(img_path)
out_file = osp.join(self.test_out_dir, out_file)

Expand Down
Loading

0 comments on commit 35fd839

Please sign in to comment.