diff --git a/.gitignore b/.gitignore index 9fd7ec8eb..053e30cb7 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ venv/ *.bin output version.py +.vscode/ diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..7e98067c7 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,373 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Waymo Demo", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + "args": ["--cfg_file", "./cfgs/waymo_models/pv_rcnn.yaml", "--ckpt", "../output/cfgs/waymo_models/pv_rcnn/default/ckpt/latest_model.pth", "--data_path", "../data/waymo/waymo_processed_data_v0_5_0/segment-15832924468527961_1564_160_1584_160_with_camera_labels/0000.npy", "--ext", ".npy"] + }, + { + "name": "Python: Run File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Train KITTI PV_RCNN BADet", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_BADet_car_class_only.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train Waymo PV_RCNN", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/waymo_models/pv_rcnn.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train Waymo PV_RCNN Relation", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/waymo_models/pv_rcnn_relation.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train KITTI PV_RCNN", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + { + "name": "Python: Train KITTI PV_RCNN Relation Experiment", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/experiments/post_mlp/a.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + + { + "name": "Python: Train KITTI FROZEN PV_RCNN", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_frozen.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + { + "name": "Python: Train KITTI FROZEN PV_RCNN Relation A", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/experiments/iterative_gnn/OneGNNIteration_added.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train KITTI FROZEN PV_RCNN Relation", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_frozen_relation.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Evaluate Waymo", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/waymo_models/pv_rcnn.yaml", "--ckpt", "../output/cfgs/waymo_models/pv_rcnn/default/ckpt/latest_model.pth", "--extra_tag", "default"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Evaluate KITTI PV_RCNN Frozen", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/waymo_modelspv_rcnn.yaml", "--ckpt", "../output/cfgs/kitti_models/pv_rcnn/2023-09-04_09-52-39/ckpt/checkpoint_epoch_78.pth", "--extra_tag", "2023-09-04_09-52-39"], + "cwd": "/root/OpenPCDet/tools/", + }, + + { + "name": "Python: Evaluate KITTI PV_RCNN Frozen", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn.yaml", "--ckpt", "../output/cfgs/kitti_models/pv_rcnn/2023-09-04_09-52-39/ckpt/checkpoint_epoch_78.pth", "--extra_tag", "2023-09-04_09-52-39"], + "cwd": "/root/OpenPCDet/tools/", + }, + + { + "name": "Python: Train KITTI PV_RCNN_Relation Car Class only", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_relation_car_class_only.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + + { + "name": "Python: Train KITTI PV_RCNN_PlusPlus_Relation Car Class only", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_plusplus_relation_car_class_only.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + { + "name": "Python: Train KITTI Voxel-RCNN", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/voxel_rcnn_car.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + { + "name": "Python: Train KITTI Voxel-RCNN-Relation", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/voxel_rcnn_relation_car.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + { + "name": "Python: Train KITTI PV_RCNN_PLUSPLUS_RELATION", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_plusplus_relation.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + }, + { + "name": "Python: Train KITTI PV_RCNN_RELATION", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_relation.yaml", "--extra_tag", "debug"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train KITTI PV_RCNN++", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_plusplus.yaml", "--extra_tag", "debug"], + "env": { + "CUDA_VISIBLE_DEVICES": "1" + + }, + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Train KITTI CenterPoint", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/centerpoint.yaml", "--extra_tag", "debug"], + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train KITTI CenterPointTwoStage", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/centerpoint_twostage.yaml", "--extra_tag", "debug"], + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train KITTI PartA2 Car Class Only", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/PartA2_car_class_only.yaml", "--extra_tag", "debug"], + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Train KITTI PartA2 Relation Car Class Only", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["--cfg_file", "./cfgs/kitti_models/PartA2_relation_car_class_only.yaml", "--extra_tag", "debug"], + "cwd": "/root/OpenPCDet/tools/", + }, + { + "name": "Python: Visualize KITTI PV_RCNN", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + // "env": { + // "CUDA_VISIBLE_DEVICES": "1" + + // }, + // "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn.yaml", "--ckpt", "../output/kitti/pv_rcnn_8369.pth", "--data_path", "../data/kitti/testing/velodyne/000008.bin"], + // "preLaunchTask": "Change working directory", + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Evaluate KITTI PV_RCNN_Relation_CAR_ONLY", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_relation_car_class_only.yaml", "--ckpt", "../output/cfgs/kitti_models/pv_rcnn_relation_car_class_only/2023-09-29_07-21-48/ckpt/checkpoint_epoch_80.pth", "--extra_tag", "2023-09-29_07-21-48"], + "cwd": "/root/OpenPCDet/tools/", + "env": { + "CUDA_VISIBLE_DEVICES": "1" + + }, + "justMyCode": false + }, + { + "name": "Python: Evaluate KITTI PV_RCNN++ ALL", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_plusplus.yaml", "--batch_size", "8", "--extra_tag", "2023-07-31_15-59-33", "--eval_all"], + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Evaluate KITTI PV_RCNN++-Relatoin ALL", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_plusplus_relation.yaml", "--extra_tag", "2023-09-11_09-37-34", "--eval_all"], + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Evaluate KITTI CenterPointTwoStage ALL", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/kitti_models/centerpoint_twostage.yaml", "--extra_tag", "2023-09-12_13-29-56", "--eval_all"], + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Evaluate KITTI Object Relation FC ALL", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_relation_fc.yaml", "--extra_tag", "2023-09-14_13-12-43", "--eval_all"], + "cwd": "/root/OpenPCDet/tools/" + }, + { + "name": "Python: Train PV-RCNN++ with ckpt", + "type": "python", + "request": "launch", + "program": "${file}", + "args": ["--cfg_file", "./cfgs/kitti_models/pv_rcnn_plusplus.yaml", "--extra_tag", "2023-07-29_20-33-11_continued", "--ckpt", "../output/cfgs/kitti_models/pv_rcnn_plusplus/2023-07-29_20-33-11/ckpt/checkpoint_epoch_80.pth"], + "console": "integratedTerminal", + "cwd": "/root/OpenPCDet/tools/" + } + ] +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..28315fdf0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,88 @@ +# This tag does not exist anymore it was changed to the next closest: FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 +FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu18.04 + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +# Install basics +RUN apt-get clean && rm -rf /var/lib/apt/lists/* +RUN apt-get update -y \ + && apt-get install -y build-essential \ + && apt-get install -y apt-utils git curl ca-certificates bzip2 tree htop wget \ + && apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev bmon iotop g++ python3.8 python3.8-dev python3.8-distutils + +# Install cmake v3.13.2 +RUN apt-get purge -y cmake && \ + mkdir /root/temp && \ + cd /root/temp && \ + wget https://github.com/Kitware/CMake/releases/download/v3.13.2/cmake-3.13.2.tar.gz && \ + tar -xzvf cmake-3.13.2.tar.gz && \ + cd cmake-3.13.2 && \ + bash ./bootstrap && \ + make && \ + make install && \ + cmake --version && \ + rm -rf /root/temp + +# Install python +RUN ln -sv /usr/bin/python3.8 /usr/bin/python +RUN wget https://bootstrap.pypa.io/get-pip.py && \ + python get-pip.py && \ + rm get-pip.py + +# Install python packages +RUN PIP_INSTALL="python -m pip --no-cache-dir install" && \ + # $PIP_INSTALL numpy==1.19.3 llvmlite numba + $PIP_INSTALL numpy==1.24.4 llvmlite numba + +# Install torch and torchvision +# See https://pytorch.org/ for other options if you use a different version of CUDA +# RUN pip install --user torch==1.6 torchvision==0.7.0 -f https://download.pytorch.org/whl/cu102/torch_stable.html +# RUN pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113 +RUN pip install torch torchvision torchaudio + +# Install python packages +RUN PIP_INSTALL="python -m pip --no-cache-dir install" && \ + $PIP_INSTALL tensorboardX easydict pyyaml scikit-image tqdm SharedArray six + +WORKDIR /root + +# Install Boost geometry +RUN wget https://jaist.dl.sourceforge.net/project/boost/boost/1.68.0/boost_1_68_0.tar.gz && \ + tar xzvf boost_1_68_0.tar.gz && \ + cp -r ./boost_1_68_0/boost /usr/include && \ + rm -rf ./boost_1_68_0 && \ + rm -rf ./boost_1_68_0.tar.gz + +# A weired problem that hasn't been solved yet +RUN pip uninstall -y SharedArray && \ + pip install SharedArray + +RUN pip install spconv-cu117 + +RUN pip install torch_geometric + +RUN pip install open3d + +RUN pip install mayavi + +RUN pip install kornia + +RUN apt-get update && apt-get install -y libgl1-mesa-glx + +RUN apt-get update && apt-get install -y \ + openssh-client \ + git + +RUN git clone https://github.com/MarcBrede/OpenPCDet.git + +ENV PYTHONPATH /root/OpenPCDet/ + +RUN pip install av2 + +RUN pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-2.0.1+cu117.html +RUN pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-2.0.1+cu117.html +RUN pip install tensorboard + +# RUN ls /root/OpenPCDet/ + +# RUN python /root/OpenPCDet/setup.py develop \ No newline at end of file diff --git a/MTObjectRelations.pdf b/MTObjectRelations.pdf new file mode 100644 index 000000000..4197efa61 Binary files /dev/null and b/MTObjectRelations.pdf differ diff --git a/README.md b/README.md index 779571acb..a630826c5 100644 --- a/README.md +++ b/README.md @@ -1,291 +1,98 @@ - +# Object Relations for 3D Object Detection -# OpenPCDet +This work focuses on exploring the impact of modeling object relation in two-stage object detection pipelines, aiming to enhance their detection performance. It extends OpenPCDet with a module that models object relations which can be integrated into existing object detectors. To get this project running please check [OpenPCDet](https://github.com/open-mmlab/OpenPCDet). + -`OpenPCDet` is a clear, simple, self-contained open source project for LiDAR-based 3D object detection. +## Project Structure -It is also the official code release of [`[PointRCNN]`](https://arxiv.org/abs/1812.04244), [`[Part-A2-Net]`](https://arxiv.org/abs/1907.03670), [`[PV-RCNN]`](https://arxiv.org/abs/1912.13192), [`[Voxel R-CNN]`](https://arxiv.org/abs/2012.15712), [`[PV-RCNN++]`](https://arxiv.org/abs/2102.00463) and [`[MPPNet]`](https://arxiv.org/abs/2205.05979). +This project extends [OpenPCDet](https://github.com/open-mmlab/OpenPCDet). Some of OpenPCDet model's are extended with an Object Relation Module. See the list below. -**Highlights**: -* `OpenPCDet` has been updated to `v0.6.0` (Sep. 2022). -* The codes of PV-RCNN++ has been supported. -* The codes of MPPNet has been supported. -* The multi-modal 3D detection approaches on Nuscenes have been supported. - -## Overview -- [Changelog](#changelog) -- [Design Pattern](#openpcdet-design-pattern) -- [Model Zoo](#model-zoo) -- [Installation](docs/INSTALL.md) -- [Quick Demo](docs/DEMO.md) -- [Getting Started](docs/GETTING_STARTED.md) -- [Citation](#citation) - - -## Changelog -[2023-06-30] **NEW:** Added support for [`DSVT`](https://arxiv.org/abs/2301.06051), which achieves state-of-the-art performance on large-scale Waymo Open Dataset with real-time inference speed (27HZ with TensorRT). - -[2023-05-13] **NEW:** Added support for the multi-modal 3D object detection models on Nuscenes dataset. -* Support multi-modal Nuscenes detection (See the [GETTING_STARTED.md](docs/GETTING_STARTED.md) to process data). -* Support [TransFusion-Lidar](https://arxiv.org/abs/2203.11496) head, which ahcieves 69.43% NDS on Nuscenes validation dataset. -* Support [`BEVFusion`](https://arxiv.org/abs/2205.13542), which fuses multi-modal information on BEV space and reaches 70.98% NDS on Nuscenes validation dataset. (see the [guideline](docs/guidelines_of_approaches/bevfusion.md) on how to train/test with BEVFusion). - -[2023-04-02] Added support for [`VoxelNeXt`](https://arxiv.org/abs/2303.11301) on Nuscenes, Waymo, and Argoverse2 datasets. It is a fully sparse 3D object detection network, which is a clean sparse CNNs network and predicts 3D objects directly upon voxels. - -[2022-09-02] **NEW:** Update `OpenPCDet` to v0.6.0: -* Official code release of [`MPPNet`](https://arxiv.org/abs/2205.05979) for temporal 3D object detection, which supports long-term multi-frame 3D object detection and ranks 1st place on [3D detection learderboard](https://waymo.com/open/challenges/2020/3d-detection) of Waymo Open Dataset on Sept. 2th, 2022. For validation dataset, MPPNet achieves 74.96%, 75.06% and 74.52% for vehicle, pedestrian and cyclist classes in terms of mAPH@Level_2. (see the [guideline](docs/guidelines_of_approaches/mppnet.md) on how to train/test with MPPNet). -* Support multi-frame training/testing on Waymo Open Dataset (see the [change log](docs/changelog.md) for more details on how to process data). -* Support to save changing training details (e.g., loss, iter, epoch) to file (previous tqdm progress bar is still supported by using `--use_tqdm_to_record`). Please use `pip install gpustat` if you also want to log the GPU related information. -* Support to save latest model every 5 mintues, so you can restore the model training from latest status instead of previous epoch. - -[2022-08-22] Added support for [custom dataset tutorial and template](docs/CUSTOM_DATASET_TUTORIAL.md) - -[2022-07-05] Added support for the 3D object detection backbone network [`Focals Conv`](https://openaccess.thecvf.com/content/CVPR2022/papers/Chen_Focal_Sparse_Convolutional_Networks_for_3D_Object_Detection_CVPR_2022_paper.pdf). - -[2022-02-12] Added support for using docker. Please refer to the guidance in [./docker](./docker). - -[2022-02-07] Added support for Centerpoint models on Nuscenes Dataset. - -[2022-01-14] Added support for dynamic pillar voxelization, following the implementation proposed in [`H^23D R-CNN`](https://arxiv.org/abs/2107.14391) with unique operation and [`torch_scatter`](https://github.com/rusty1s/pytorch_scatter) package. - -[2022-01-05] **NEW:** Update `OpenPCDet` to v0.5.2: -* The code of [`PV-RCNN++`](https://arxiv.org/abs/2102.00463) has been released to this repo, with higher performance, faster training/inference speed and less memory consumption than PV-RCNN. -* Add performance of several models trained with full training set of [Waymo Open Dataset](#waymo-open-dataset-baselines). -* Support Lyft dataset, see the pull request [here](https://github.com/open-mmlab/OpenPCDet/pull/720). - - -[2021-12-09] **NEW:** Update `OpenPCDet` to v0.5.1: -* Add PointPillar related baseline configs/results on [Waymo Open Dataset](#waymo-open-dataset-baselines). -* Support Pandaset dataloader, see the pull request [here](https://github.com/open-mmlab/OpenPCDet/pull/396). -* Support a set of new augmentations, see the pull request [here](https://github.com/open-mmlab/OpenPCDet/pull/653). - -[2021-12-01] **NEW:** `OpenPCDet` v0.5.0 is released with the following features: -* Improve the performance of all models on [Waymo Open Dataset](#waymo-open-dataset-baselines). Note that you need to re-prepare the training/validation data and ground-truth database of Waymo Open Dataset (see [GETTING_STARTED.md](docs/GETTING_STARTED.md)). -* Support anchor-free [CenterHead](pcdet/models/dense_heads/center_head.py), add configs of `CenterPoint` and `PV-RCNN with CenterHead`. -* Support lastest **PyTorch 1.1~1.10** and **spconv 1.0~2.x**, where **spconv 2.x** should be easy to install with pip and faster than previous version (see the official update of spconv [here](https://github.com/traveller59/spconv)). -* Support config [`USE_SHARED_MEMORY`](tools/cfgs/dataset_configs/waymo_dataset.yaml) to use shared memory to potentially speed up the training process in case you suffer from an IO problem. -* Support better and faster [visualization script](tools/visual_utils/open3d_vis_utils.py), and you need to install [Open3D](https://github.com/isl-org/Open3D) firstly. - -[2021-06-08] Added support for the voxel-based 3D object detection model [`Voxel R-CNN`](#KITTI-3D-Object-Detection-Baselines). - -[2021-05-14] Added support for the monocular 3D object detection model [`CaDDN`](#KITTI-3D-Object-Detection-Baselines). - -[2020-11-27] Bugfixed: Please re-prepare the validation infos of Waymo dataset (version 1.2) if you would like to -use our provided Waymo evaluation tool (see [PR](https://github.com/open-mmlab/OpenPCDet/pull/383)). -Note that you do not need to re-prepare the training data and ground-truth database. - -[2020-11-10] The [Waymo Open Dataset](#waymo-open-dataset-baselines) has been supported with state-of-the-art results. Currently we provide the -configs and results of `SECOND`, `PartA2` and `PV-RCNN` on the Waymo Open Dataset, and more models could be easily supported by modifying their dataset configs. - -[2020-08-10] Bugfixed: The provided NuScenes models have been updated to fix the loading bugs. Please redownload it if you need to use the pretrained NuScenes models. - -[2020-07-30] `OpenPCDet` v0.3.0 is released with the following features: - * The Point-based and Anchor-Free models ([`PointRCNN`](#KITTI-3D-Object-Detection-Baselines), [`PartA2-Free`](#KITTI-3D-Object-Detection-Baselines)) are supported now. - * The NuScenes dataset is supported with strong baseline results ([`SECOND-MultiHead (CBGS)`](#NuScenes-3D-Object-Detection-Baselines) and [`PointPillar-MultiHead`](#NuScenes-3D-Object-Detection-Baselines)). - * High efficiency than last version, support **PyTorch 1.1~1.7** and **spconv 1.0~1.2** simultaneously. - -[2020-07-17] Add simple visualization codes and a quick demo to test with custom data. - -[2020-06-24] `OpenPCDet` v0.2.0 is released with pretty new structures to support more models and datasets. - -[2020-03-16] `OpenPCDet` v0.1.0 is released. - - -## Introduction - - -### What does `OpenPCDet` toolbox do? - -Note that we have upgrated `PCDet` from `v0.1` to `v0.2` with pretty new structures to support various datasets and models. +## Installation -`OpenPCDet` is a general PyTorch-based codebase for 3D object detection from point cloud. -It currently supports multiple state-of-the-art 3D object detection methods with highly refactored codes for both one-stage and two-stage 3D detection frameworks. +1. Build the provided Dockerfile +2. Run the following command in the project root +```bash +python setup.py develop +``` +3. Prepare data according to [OpenPCDet](https://github.com/open-mmlab/OpenPCDet) -Based on `OpenPCDet` toolbox, we win the Waymo Open Dataset challenge in [3D Detection](https://waymo.com/open/challenges/3d-detection/), -[3D Tracking](https://waymo.com/open/challenges/3d-tracking/), [Domain Adaptation](https://waymo.com/open/challenges/domain-adaptation/) -three tracks among all LiDAR-only methods, and the Waymo related models will be released to `OpenPCDet` soon. +## Training & Testing +The following commands should be ran in ``./tools``. -We are actively updating this repo currently, and more datasets and models will be supported soon. -Contributions are also welcomed. +Train Model: +```bash +python train.py --cfg_file {PATH_TO_CONFIG_FILE} +``` -### `OpenPCDet` design pattern +Test Model: +```bash +python test.py --cfg_file {PATH_TO_CONFIG_FILE} --ckpt {PATH_TO_MODEL} +``` -* Data-Model separation with unified point cloud coordinate for easily extending to custom datasets: -
- -
+## Models -* Unified 3D box definition: (x, y, z, dx, dy, dz, heading). +| Model | Path | Description | Dataset | +| :------------------------------------ | :------------------------------------------------------------------------ | :------------------------------------------------------------------------ | :------ | +| **PV-RCNN-Relation** | `tools/cfgs/kitti_models/pv_rcnn_relation{_car_class_only}.yaml` | PV-RCNN model extended with the Object Relation Module. | KITTI | +| **PV-RCNN-Relation** | `tools/cfgs/waymo_models/pv_rcnn_relation.yaml` | PV-RCNN model extended with the Object Relation Module. | Waymo | +| **PV-RCNN++-Relation** | `tools/cfgs/kitti_models/pv_rcnn_plusplus_relation{_car_class_only}.yaml` | PV-RCNN++ model extended with the Object Relation Module. | KITTI | +| **Voxel-RCNN-Relation Car Class** | `tools/cfgs/kitti_models/voxel_rcnn_relation_car_class_only.yaml` | Voxel-RCNN extended with the Object Relation Module. | KITTI | +| **PartA2-Relation Car Class** | `tools/cfgs/kitti_models/PartA2_relation_car_class_only.yaml` | PartA2 model extended with the Object Relation Module, trained only on the car class. | KITTI | -* Flexible and clear model structure to easily support various 3D detection models: -- -
-* Support various models within one framework as: -- -
+For some models the suffix '_car_class_only.yaml' can be used to train the model only on the car class -### Currently Supported Features -- [x] Support both one-stage and two-stage 3D object detection frameworks -- [x] Support distributed training & testing with multiple GPUs and multiple machines -- [x] Support multiple heads on different scales to detect different classes -- [x] Support stacked version set abstraction to encode various number of points in different scenes -- [x] Support Adaptive Training Sample Selection (ATSS) for target assignment -- [x] Support RoI-aware point cloud pooling & RoI-grid point cloud pooling -- [x] Support GPU version 3D IoU calculation and rotated NMS -## Model Zoo -### KITTI 3D Object Detection Baselines -Selected supported methods are shown in the below table. The results are the 3D detection performance of moderate difficulty on the *val* set of KITTI dataset. -* All LiDAR-based models are trained with 8 GTX 1080Ti GPUs and are available for download. -* The training time is measured with 8 TITAN XP GPUs and PyTorch 1.5. +## Motivation -| | training time | Car@R11 | Pedestrian@R11 | Cyclist@R11 | download | -|---------------------------------------------|----------:|:-------:|:-------:|:-------:|:---------:| -| [PointPillar](tools/cfgs/kitti_models/pointpillar.yaml) |~1.2 hours| 77.28 | 52.29 | 62.68 | [model-18M](https://drive.google.com/file/d/1wMxWTpU1qUoY3DsCH31WJmvJxcjFXKlm/view?usp=sharing) | -| [SECOND](tools/cfgs/kitti_models/second.yaml) | ~1.7 hours | 78.62 | 52.98 | 67.15 | [model-20M](https://drive.google.com/file/d/1-01zsPOsqanZQqIIyy7FpNXStL3y4jdR/view?usp=sharing) | -| [SECOND-IoU](tools/cfgs/kitti_models/second_iou.yaml) | - | 79.09 | 55.74 | 71.31 | [model-46M](https://drive.google.com/file/d/1AQkeNs4bxhvhDQ-5sEo_yvQUlfo73lsW/view?usp=sharing) | -| [PointRCNN](tools/cfgs/kitti_models/pointrcnn.yaml) | ~3 hours | 78.70 | 54.41 | 72.11 | [model-16M](https://drive.google.com/file/d/1BCX9wMn-GYAfSOPpyxf6Iv6fc0qKLSiU/view?usp=sharing)| -| [PointRCNN-IoU](tools/cfgs/kitti_models/pointrcnn_iou.yaml) | ~3 hours | 78.75 | 58.32 | 71.34 | [model-16M](https://drive.google.com/file/d/1V0vNZ3lAHpEEt0MlT80eL2f41K2tHm_D/view?usp=sharing)| -| [Part-A2-Free](tools/cfgs/kitti_models/PartA2_free.yaml) | ~3.8 hours| 78.72 | 65.99 | 74.29 | [model-226M](https://drive.google.com/file/d/1lcUUxF8mJgZ_e-tZhP1XNQtTBuC-R0zr/view?usp=sharing) | -| [Part-A2-Anchor](tools/cfgs/kitti_models/PartA2.yaml) | ~4.3 hours| 79.40 | 60.05 | 69.90 | [model-244M](https://drive.google.com/file/d/10GK1aCkLqxGNeX3lVu8cLZyE0G8002hY/view?usp=sharing) | -| [PV-RCNN](tools/cfgs/kitti_models/pv_rcnn.yaml) | ~5 hours| 83.61 | 57.90 | 70.47 | [model-50M](https://drive.google.com/file/d/1lIOq4Hxr0W3qsX83ilQv0nk1Cls6KAr-/view?usp=sharing) | -| [Voxel R-CNN (Car)](tools/cfgs/kitti_models/voxel_rcnn_car.yaml) | ~2.2 hours| 84.54 | - | - | [model-28M](https://drive.google.com/file/d/19_jiAeGLz7V0wNjSJw4cKmMjdm5EW5By/view?usp=sharing) | -| [Focals Conv - F](tools/cfgs/kitti_models/voxel_rcnn_car_focal_multimodal.yaml) | ~4 hours| 85.66 | - | - | [model-30M](https://drive.google.com/file/d/1u2Vcg7gZPOI-EqrHy7_6fqaibvRt2IjQ/view?usp=sharing) | -|| -| [CaDDN (Mono)](tools/cfgs/kitti_models/CaDDN.yaml) |~15 hours| 21.38 | 13.02 | 9.76 | [model-774M](https://drive.google.com/file/d/1OQTO2PtXT8GGr35W9m2GZGuqgb6fyU1V/view?usp=sharing) | +There are four motivations for modeling object relations in the refinement stage of two-stage object detection pipelines. -### Waymo Open Dataset Baselines -We provide the setting of [`DATA_CONFIG.SAMPLED_INTERVAL`](tools/cfgs/dataset_configs/waymo_dataset.yaml) on the Waymo Open Dataset (WOD) to subsample partial samples for training and evaluation, -so you could also play with WOD by setting a smaller `DATA_CONFIG.SAMPLED_INTERVAL` even if you only have limited GPU resources. +- **Detecting Occlusions:** If information between an occluded and an occluder object is shared, the occluded object can be informed about its occlusion status. This can help the model learn the difference between sparse proposals that are heavily occluded and noise in the data. -By default, all models are trained with **a single frame** of **20% data (~32k frames)** of all the training samples on 8 GTX 1080Ti GPUs, and the results of each cell here are mAP/mAPH calculated by the official Waymo evaluation metrics on the **whole** validation set (version 1.2). +- **Exploiting Patterns:** Traffic scenes often follow specific patterns that can be exploited by the object detector. -| Performance@(train with 20\% Data) | Vec_L1 | Vec_L2 | Ped_L1 | Ped_L2 | Cyc_L1 | Cyc_L2 | -|---------------------------------------------|----------:|:-------:|:-------:|:-------:|:-------:|:-------:| -| [SECOND](tools/cfgs/waymo_models/second.yaml) | 70.96/70.34|62.58/62.02|65.23/54.24 |57.22/47.49| 57.13/55.62 | 54.97/53.53 | -| [PointPillar](tools/cfgs/waymo_models/pointpillar_1x.yaml) | 70.43/69.83 | 62.18/61.64 | 66.21/46.32|58.18/40.64|55.26/51.75|53.18/49.80 | -[CenterPoint-Pillar](tools/cfgs/waymo_models/centerpoint_pillar_1x.yaml)| 70.50/69.96|62.18/61.69|73.11/61.97|65.06/55.00|65.44/63.85|62.98/61.46| -[CenterPoint-Dynamic-Pillar](tools/cfgs/waymo_models/centerpoint_dyn_pillar_1x.yaml)| 70.46/69.93|62.06/61.58|73.92/63.35|65.91/56.33|66.24/64.69|63.73/62.24| -[CenterPoint](tools/cfgs/waymo_models/centerpoint_without_resnet.yaml)| 71.33/70.76|63.16/62.65| 72.09/65.49 |64.27/58.23| 68.68/67.39 |66.11/64.87| -| [CenterPoint (ResNet)](tools/cfgs/waymo_models/centerpoint.yaml)|72.76/72.23|64.91/64.42 |74.19/67.96 |66.03/60.34| 71.04/69.79 |68.49/67.28 | -| [Part-A2-Anchor](tools/cfgs/waymo_models/PartA2.yaml) | 74.66/74.12 |65.82/65.32 |71.71/62.24 |62.46/54.06 |66.53/65.18 |64.05/62.75 | -| [PV-RCNN (AnchorHead)](tools/cfgs/waymo_models/pv_rcnn.yaml) | 75.41/74.74 |67.44/66.80 |71.98/61.24 |63.70/53.95 |65.88/64.25 |63.39/61.82 | -| [PV-RCNN (CenterHead)](tools/cfgs/waymo_models/pv_rcnn_with_centerhead_rpn.yaml) | 75.95/75.43 |68.02/67.54 |75.94/69.40 |67.66/61.62 |70.18/68.98 |67.73/66.57| -| [Voxel R-CNN (CenterHead)-Dynamic-Voxel](tools/cfgs/waymo_models/voxel_rcnn_with_centerhead_dyn_voxel.yaml) | 76.13/75.66 |68.18/67.74 |78.20/71.98 |69.29/63.59 | 70.75/69.68 |68.25/67.21| -| [PV-RCNN++](tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml) | 77.82/77.32| 69.07/68.62| 77.99/71.36| 69.92/63.74| 71.80/70.71| 69.31/68.26| -| [PV-RCNN++ (ResNet)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml) |77.61/77.14| 69.18/68.75| 79.42/73.31| 70.88/65.21| 72.50/71.39| 69.84/68.77| +- **Increase of Receptive Fields:** Current object detectors fail to incorporate enough context in the refinement stage because their receptive fields are too small. Object relations can be seen as an efficient mechanism to increase the receptive field. -Here we also provide the performance of several models trained on the full training set (refer to the paper of [PV-RCNN++](https://arxiv.org/abs/2102.00463)): +- **Proposal Consensus:** Proposals often form clusters around potential objects. Each proposal might have a different view of the object. Sharing information between these proposals leads to a consensus prediction. -| Performance@(train with 100\% Data) | Vec_L1 | Vec_L2 | Ped_L1 | Ped_L2 | Cyc_L1 | Cyc_L2 | -|-------------------------------------------------------------------------------------------|----------:|:-------:|:-------:|:-------:|:-------:|:-------:| -| [SECOND](tools/cfgs/waymo_models/second.yaml) | 72.27/71.69 | 63.85/63.33 | 68.70/58.18 | 60.72/51.31 | 60.62/59.28 | 58.34/57.05 | -| [CenterPoint-Pillar](tools/cfgs/waymo_models/centerpoint_pillar_1x.yaml) | 73.37/72.86 | 65.09/64.62 | 75.35/65.11 | 67.61/58.25 | 67.76/66.22 | 65.25/63.77 | -| [Part-A2-Anchor](tools/cfgs/waymo_models/PartA2.yaml) | 77.05/76.51 | 68.47/67.97 | 75.24/66.87 | 66.18/58.62 | 68.60/67.36 | 66.13/64.93 | -| [VoxelNeXt-2D](tools/cfgs/waymo_models/voxelnext2d_ioubranch.yaml) | 77.94/77.47 |69.68/69.25 |80.24/73.47 |72.23/65.88 |73.33/72.20 |70.66/69.56 | -| [VoxelNeXt](tools/cfgs/waymo_models/voxelnext_ioubranch_large.yaml) | 78.16/77.70 |69.86/69.42 |81.47/76.30 |73.48/68.63 |76.06/74.90 |73.29/72.18 | -| [PV-RCNN (CenterHead)](tools/cfgs/waymo_models/pv_rcnn_with_centerhead_rpn.yaml) | 78.00/77.50 | 69.43/68.98 | 79.21/73.03 | 70.42/64.72 | 71.46/70.27 | 68.95/67.79 | -| [PV-RCNN++](tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml) | 79.10/78.63 | 70.34/69.91 | 80.62/74.62 | 71.86/66.30 | 73.49/72.38 | 70.70/69.62 | -| [PV-RCNN++ (ResNet)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml) | 79.25/78.78 | 70.61/70.18 | 81.83/76.28 | 73.17/68.00 | 73.72/72.66 | 71.21/70.19 | -| [DSVT-Pillar](tools/cfgs/waymo_models/dsvt_pillar.yaml) | 79.44/78.97 | 71.24/70.81 | 83.00/77.22 | 75.45/69.95 | 76.70/75.70 | 73.83/72.86 | -| [DSVT-Voxel](tools/cfgs/waymo_models/dsvt_voxel.yaml) | 79.77/79.31 | 71.67/71.25 | 83.75/78.92 | 76.21/71.57 | 77.57/76.58 | 74.70/73.73 | -| [PV-RCNN++ (ResNet, 2 frames)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet_2frames.yaml) | 80.17/79.70 | 72.14/71.70 | 83.48/80.42 | 75.54/72.61 | 74.63/73.75 | 72.35/71.50 | -| [MPPNet (4 frames)](docs/guidelines_of_approaches/mppnet.md) | 81.54/81.06 | 74.07/73.61 | 84.56/81.94 | 77.20/74.67 | 77.15/76.50 | 75.01/74.38 | -| [MPPNet (16 frames)](docs/guidelines_of_approaches/mppnet.md) | 82.74/82.28 | 75.41/74.96 | 84.69/82.25 | 77.43/75.06 | 77.28/76.66 | 75.13/74.52 | - - - - - - - -We could not provide the above pretrained models due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/), -but you could easily achieve similar performance by training with the default configs. - -### NuScenes 3D Object Detection Baselines -All models are trained with 8 GPUs and are available for download. For training BEVFusion, please refer to the [guideline](docs/guidelines_of_approaches/bevfusion.md). - -| | mATE | mASE | mAOE | mAVE | mAAE | mAP | NDS | download | -|----------------------------------------------------------------------------------------------------|-------:|:------:|:------:|:-----:|:-----:|:-----:|:------:|:--------------------------------------------------------------------------------------------------:| -| [PointPillar-MultiHead](tools/cfgs/nuscenes_models/cbgs_pp_multihead.yaml) | 33.87 | 26.00 | 32.07 | 28.74 | 20.15 | 44.63 | 58.23 | [model-23M](https://drive.google.com/file/d/1p-501mTWsq0G9RzroTWSXreIMyTUUpBM/view?usp=sharing) | -| [SECOND-MultiHead (CBGS)](tools/cfgs/nuscenes_models/cbgs_second_multihead.yaml) | 31.15 | 25.51 | 26.64 | 26.26 | 20.46 | 50.59 | 62.29 | [model-35M](https://drive.google.com/file/d/1bNzcOnE3u9iooBFMk2xK7HqhdeQ_nwTq/view?usp=sharing) | -| [CenterPoint-PointPillar](tools/cfgs/nuscenes_models/cbgs_dyn_pp_centerpoint.yaml) | 31.13 | 26.04 | 42.92 | 23.90 | 19.14 | 50.03 | 60.70 | [model-23M](https://drive.google.com/file/d/1UvGm6mROMyJzeSRu7OD1leU_YWoAZG7v/view?usp=sharing) | -| [CenterPoint (voxel_size=0.1)](tools/cfgs/nuscenes_models/cbgs_voxel01_res3d_centerpoint.yaml) | 30.11 | 25.55 | 38.28 | 21.94 | 18.87 | 56.03 | 64.54 | [model-34M](https://drive.google.com/file/d/1Cz-J1c3dw7JAWc25KRG1XQj8yCaOlexQ/view?usp=sharing) | -| [CenterPoint (voxel_size=0.075)](tools/cfgs/nuscenes_models/cbgs_voxel0075_res3d_centerpoint.yaml) | 28.80 | 25.43 | 37.27 | 21.55 | 18.24 | 59.22 | 66.48 | [model-34M](https://drive.google.com/file/d/1XOHAWm1MPkCKr1gqmc3TWi5AYZgPsgxU/view?usp=sharing) | -| [VoxelNeXt (voxel_size=0.075)](tools/cfgs/nuscenes_models/cbgs_voxel0075_voxelnext.yaml) | 30.11 | 25.23 | 40.57 | 21.69 | 18.56 | 60.53 | 66.65 | [model-31M](https://drive.google.com/file/d/1IV7e7G9X-61KXSjMGtQo579pzDNbhwvf/view?usp=share_link) | -| [TransFusion-L*](tools/cfgs/nuscenes_models/transfusion_lidar.yaml) | 27.96 | 25.37 | 29.35 | 27.31 | 18.55 | 64.58 | 69.43 | [model-32M](https://drive.google.com/file/d/1cuZ2qdDnxSwTCsiXWwbqCGF-uoazTXbz/view?usp=share_link) | -| [BEVFusion](tools/cfgs/nuscenes_models/bevfusion.yaml) | 28.03 | 25.43 | 30.19 | 26.76 | 18.48 | 67.75 | 70.98 | [model-157M](https://drive.google.com/file/d/1X50b-8immqlqD8VPAUkSKI0Ls-4k37g9/view?usp=share_link) | - -*: Use the fade strategy, which disables data augmentations in the last several epochs during training. - -### ONCE 3D Object Detection Baselines -All models are trained with 8 GPUs. - -| | Vehicle | Pedestrian | Cyclist | mAP | -| ------------------------------------------------------ | :-----: | :--------: | :-----: | :----: | -| [PointRCNN](tools/cfgs/once_models/pointrcnn.yaml) | 52.09 | 4.28 | 29.84 | 28.74 | -| [PointPillar](tools/cfgs/once_models/pointpillar.yaml) | 68.57 | 17.63 | 46.81 | 44.34 | -| [SECOND](tools/cfgs/once_models/second.yaml) | 71.19 | 26.44 | 58.04 | 51.89 | -| [PV-RCNN](tools/cfgs/once_models/pv_rcnn.yaml) | 77.77 | 23.50 | 59.37 | 53.55 | -| [CenterPoint](tools/cfgs/once_models/centerpoint.yaml) | 78.02 | 49.74 | 67.22 | 64.99 | - -### Argoverse2 3D Object Detection Baselines -All models are trained with 4 GPUs. - -| | mAP | download | -|---------------------------------------------------------|:----:|:--------------------------------------------------------------------------------------------------:| -| [VoxelNeXt](tools/cfgs/argo2_models/cbgs_voxel01_voxelnext.yaml) | 30.5 | [model-32M](https://drive.google.com/file/d/1YP2UOz-yO-cWfYQkIqILEu6bodvCBVrR/view?usp=share_link) | - -### Other datasets -Welcome to support other datasets by submitting pull request. -## Installation +| ![Image 1](resources/occlusion.png) | ![Image 2](resources/pattern.png) | +|:-:|:-:| +| *Detecting Occlusions* | *Exploiting Patterns* | +| ![Image 3](resources/radius.png) | ![Image 4](resources/proposal_consensus.png) +| *Increase of Receptive Fields* | *Proposals Consensus* | -Please refer to [INSTALL.md](docs/INSTALL.md) for the installation of `OpenPCDet`. +# PV-RCNN-Relation -## Quick Demo -Please refer to [DEMO.md](docs/DEMO.md) for a quick demo to test with a pretrained model and -visualize the predicted results on your custom data or the original KITTI data. +PV-RCNN-Relation is an implementation of an object relations module applied to the PVRCNN baseline. It beats its baseline on all difficulties for the vehicle class. -## Getting Started -Please refer to [GETTING_STARTED.md](docs/GETTING_STARTED.md) to learn more usage about this project. +## Results +| Model | Easy R11 / R40 | Moderate R11 / R40 | Hard R11 / R40 | All R11 / R40 | +|-------------------|----------------|--------------------|----------------|---------------| +| PV-RCNN | 89.39 / 92.02 | 83.63 / 84.80 | 78.86 / 82.58 | 83.91 / 86.45 | +| PV-RCNN-Relation | 89.59 / 92.53 | 84.56 / 85.22 | 79.04 / 82.99 | 84.35 / 86.90 | +| *Improvement* | **+0.20 / +0.51** | **+0.93 / +0.42** | **+0.18 / +0.41** | **+0.44 / +0.45** | -## License +*Comparison of PVRCNN and PVRCNN-Relation on KITTI validation set. Trained and evaluated only on the car class. All models were trained for 80 epochs and the best-performing epoch per model and metric was chosen. **Both models were trained three times** and the average is reported. The* Improvement *row represents the difference in mAP between the two models.* -`OpenPCDet` is released under the [Apache 2.0 license](LICENSE). -## Acknowledgement -`OpenPCDet` is an open source project for LiDAR-based 3D scene perception that supports multiple -LiDAR-based perception models as shown above. Some parts of `PCDet` are learned from the official released codes of the above supported methods. -We would like to thank for their proposed methods and the official implementation. +| | | +|:-------------------------:|:-------------------------:| +| ![Image 1](resources/side.png) | ![Image 2](resources/relation_side.png) | +| | | -We hope that this repo could serve as a strong and flexible codebase to benefit the research community by speeding up the process of reimplementing previous works and/or developing new methods. +Qualitative results for PV-RCNN baseline and PV-RCNN-Relation on Waymo data. Predictions are shown in green, labels in blue, and edges that connect proposals to share information in red. -## Citation -If you find this project useful in your research, please consider cite: -``` -@misc{openpcdet2020, - title={OpenPCDet: An Open-source Toolbox for 3D Object Detection from Point Clouds}, - author={OpenPCDet Development Team}, - howpublished = {\url{https://github.com/open-mmlab/OpenPCDet}}, - year={2020} -} -``` -## Contribution -Welcome to be a member of the OpenPCDet development team by contributing to this repo, and feel free to contact us for any potential contributions. diff --git a/README_OpenPCDet.md b/README_OpenPCDet.md new file mode 100644 index 000000000..779571acb --- /dev/null +++ b/README_OpenPCDet.md @@ -0,0 +1,291 @@ + + +# OpenPCDet + +`OpenPCDet` is a clear, simple, self-contained open source project for LiDAR-based 3D object detection. + +It is also the official code release of [`[PointRCNN]`](https://arxiv.org/abs/1812.04244), [`[Part-A2-Net]`](https://arxiv.org/abs/1907.03670), [`[PV-RCNN]`](https://arxiv.org/abs/1912.13192), [`[Voxel R-CNN]`](https://arxiv.org/abs/2012.15712), [`[PV-RCNN++]`](https://arxiv.org/abs/2102.00463) and [`[MPPNet]`](https://arxiv.org/abs/2205.05979). + +**Highlights**: +* `OpenPCDet` has been updated to `v0.6.0` (Sep. 2022). +* The codes of PV-RCNN++ has been supported. +* The codes of MPPNet has been supported. +* The multi-modal 3D detection approaches on Nuscenes have been supported. + +## Overview +- [Changelog](#changelog) +- [Design Pattern](#openpcdet-design-pattern) +- [Model Zoo](#model-zoo) +- [Installation](docs/INSTALL.md) +- [Quick Demo](docs/DEMO.md) +- [Getting Started](docs/GETTING_STARTED.md) +- [Citation](#citation) + + +## Changelog +[2023-06-30] **NEW:** Added support for [`DSVT`](https://arxiv.org/abs/2301.06051), which achieves state-of-the-art performance on large-scale Waymo Open Dataset with real-time inference speed (27HZ with TensorRT). + +[2023-05-13] **NEW:** Added support for the multi-modal 3D object detection models on Nuscenes dataset. +* Support multi-modal Nuscenes detection (See the [GETTING_STARTED.md](docs/GETTING_STARTED.md) to process data). +* Support [TransFusion-Lidar](https://arxiv.org/abs/2203.11496) head, which ahcieves 69.43% NDS on Nuscenes validation dataset. +* Support [`BEVFusion`](https://arxiv.org/abs/2205.13542), which fuses multi-modal information on BEV space and reaches 70.98% NDS on Nuscenes validation dataset. (see the [guideline](docs/guidelines_of_approaches/bevfusion.md) on how to train/test with BEVFusion). + +[2023-04-02] Added support for [`VoxelNeXt`](https://arxiv.org/abs/2303.11301) on Nuscenes, Waymo, and Argoverse2 datasets. It is a fully sparse 3D object detection network, which is a clean sparse CNNs network and predicts 3D objects directly upon voxels. + +[2022-09-02] **NEW:** Update `OpenPCDet` to v0.6.0: +* Official code release of [`MPPNet`](https://arxiv.org/abs/2205.05979) for temporal 3D object detection, which supports long-term multi-frame 3D object detection and ranks 1st place on [3D detection learderboard](https://waymo.com/open/challenges/2020/3d-detection) of Waymo Open Dataset on Sept. 2th, 2022. For validation dataset, MPPNet achieves 74.96%, 75.06% and 74.52% for vehicle, pedestrian and cyclist classes in terms of mAPH@Level_2. (see the [guideline](docs/guidelines_of_approaches/mppnet.md) on how to train/test with MPPNet). +* Support multi-frame training/testing on Waymo Open Dataset (see the [change log](docs/changelog.md) for more details on how to process data). +* Support to save changing training details (e.g., loss, iter, epoch) to file (previous tqdm progress bar is still supported by using `--use_tqdm_to_record`). Please use `pip install gpustat` if you also want to log the GPU related information. +* Support to save latest model every 5 mintues, so you can restore the model training from latest status instead of previous epoch. + +[2022-08-22] Added support for [custom dataset tutorial and template](docs/CUSTOM_DATASET_TUTORIAL.md) + +[2022-07-05] Added support for the 3D object detection backbone network [`Focals Conv`](https://openaccess.thecvf.com/content/CVPR2022/papers/Chen_Focal_Sparse_Convolutional_Networks_for_3D_Object_Detection_CVPR_2022_paper.pdf). + +[2022-02-12] Added support for using docker. Please refer to the guidance in [./docker](./docker). + +[2022-02-07] Added support for Centerpoint models on Nuscenes Dataset. + +[2022-01-14] Added support for dynamic pillar voxelization, following the implementation proposed in [`H^23D R-CNN`](https://arxiv.org/abs/2107.14391) with unique operation and [`torch_scatter`](https://github.com/rusty1s/pytorch_scatter) package. + +[2022-01-05] **NEW:** Update `OpenPCDet` to v0.5.2: +* The code of [`PV-RCNN++`](https://arxiv.org/abs/2102.00463) has been released to this repo, with higher performance, faster training/inference speed and less memory consumption than PV-RCNN. +* Add performance of several models trained with full training set of [Waymo Open Dataset](#waymo-open-dataset-baselines). +* Support Lyft dataset, see the pull request [here](https://github.com/open-mmlab/OpenPCDet/pull/720). + + +[2021-12-09] **NEW:** Update `OpenPCDet` to v0.5.1: +* Add PointPillar related baseline configs/results on [Waymo Open Dataset](#waymo-open-dataset-baselines). +* Support Pandaset dataloader, see the pull request [here](https://github.com/open-mmlab/OpenPCDet/pull/396). +* Support a set of new augmentations, see the pull request [here](https://github.com/open-mmlab/OpenPCDet/pull/653). + +[2021-12-01] **NEW:** `OpenPCDet` v0.5.0 is released with the following features: +* Improve the performance of all models on [Waymo Open Dataset](#waymo-open-dataset-baselines). Note that you need to re-prepare the training/validation data and ground-truth database of Waymo Open Dataset (see [GETTING_STARTED.md](docs/GETTING_STARTED.md)). +* Support anchor-free [CenterHead](pcdet/models/dense_heads/center_head.py), add configs of `CenterPoint` and `PV-RCNN with CenterHead`. +* Support lastest **PyTorch 1.1~1.10** and **spconv 1.0~2.x**, where **spconv 2.x** should be easy to install with pip and faster than previous version (see the official update of spconv [here](https://github.com/traveller59/spconv)). +* Support config [`USE_SHARED_MEMORY`](tools/cfgs/dataset_configs/waymo_dataset.yaml) to use shared memory to potentially speed up the training process in case you suffer from an IO problem. +* Support better and faster [visualization script](tools/visual_utils/open3d_vis_utils.py), and you need to install [Open3D](https://github.com/isl-org/Open3D) firstly. + +[2021-06-08] Added support for the voxel-based 3D object detection model [`Voxel R-CNN`](#KITTI-3D-Object-Detection-Baselines). + +[2021-05-14] Added support for the monocular 3D object detection model [`CaDDN`](#KITTI-3D-Object-Detection-Baselines). + +[2020-11-27] Bugfixed: Please re-prepare the validation infos of Waymo dataset (version 1.2) if you would like to +use our provided Waymo evaluation tool (see [PR](https://github.com/open-mmlab/OpenPCDet/pull/383)). +Note that you do not need to re-prepare the training data and ground-truth database. + +[2020-11-10] The [Waymo Open Dataset](#waymo-open-dataset-baselines) has been supported with state-of-the-art results. Currently we provide the +configs and results of `SECOND`, `PartA2` and `PV-RCNN` on the Waymo Open Dataset, and more models could be easily supported by modifying their dataset configs. + +[2020-08-10] Bugfixed: The provided NuScenes models have been updated to fix the loading bugs. Please redownload it if you need to use the pretrained NuScenes models. + +[2020-07-30] `OpenPCDet` v0.3.0 is released with the following features: + * The Point-based and Anchor-Free models ([`PointRCNN`](#KITTI-3D-Object-Detection-Baselines), [`PartA2-Free`](#KITTI-3D-Object-Detection-Baselines)) are supported now. + * The NuScenes dataset is supported with strong baseline results ([`SECOND-MultiHead (CBGS)`](#NuScenes-3D-Object-Detection-Baselines) and [`PointPillar-MultiHead`](#NuScenes-3D-Object-Detection-Baselines)). + * High efficiency than last version, support **PyTorch 1.1~1.7** and **spconv 1.0~1.2** simultaneously. + +[2020-07-17] Add simple visualization codes and a quick demo to test with custom data. + +[2020-06-24] `OpenPCDet` v0.2.0 is released with pretty new structures to support more models and datasets. + +[2020-03-16] `OpenPCDet` v0.1.0 is released. + + +## Introduction + + +### What does `OpenPCDet` toolbox do? + +Note that we have upgrated `PCDet` from `v0.1` to `v0.2` with pretty new structures to support various datasets and models. + +`OpenPCDet` is a general PyTorch-based codebase for 3D object detection from point cloud. +It currently supports multiple state-of-the-art 3D object detection methods with highly refactored codes for both one-stage and two-stage 3D detection frameworks. + +Based on `OpenPCDet` toolbox, we win the Waymo Open Dataset challenge in [3D Detection](https://waymo.com/open/challenges/3d-detection/), +[3D Tracking](https://waymo.com/open/challenges/3d-tracking/), [Domain Adaptation](https://waymo.com/open/challenges/domain-adaptation/) +three tracks among all LiDAR-only methods, and the Waymo related models will be released to `OpenPCDet` soon. + +We are actively updating this repo currently, and more datasets and models will be supported soon. +Contributions are also welcomed. + +### `OpenPCDet` design pattern + +* Data-Model separation with unified point cloud coordinate for easily extending to custom datasets: ++ +
+ +* Unified 3D box definition: (x, y, z, dx, dy, dz, heading). + +* Flexible and clear model structure to easily support various 3D detection models: ++ +
+ +* Support various models within one framework as: ++ +
+ + +### Currently Supported Features + +- [x] Support both one-stage and two-stage 3D object detection frameworks +- [x] Support distributed training & testing with multiple GPUs and multiple machines +- [x] Support multiple heads on different scales to detect different classes +- [x] Support stacked version set abstraction to encode various number of points in different scenes +- [x] Support Adaptive Training Sample Selection (ATSS) for target assignment +- [x] Support RoI-aware point cloud pooling & RoI-grid point cloud pooling +- [x] Support GPU version 3D IoU calculation and rotated NMS + + +## Model Zoo + +### KITTI 3D Object Detection Baselines +Selected supported methods are shown in the below table. The results are the 3D detection performance of moderate difficulty on the *val* set of KITTI dataset. +* All LiDAR-based models are trained with 8 GTX 1080Ti GPUs and are available for download. +* The training time is measured with 8 TITAN XP GPUs and PyTorch 1.5. + +| | training time | Car@R11 | Pedestrian@R11 | Cyclist@R11 | download | +|---------------------------------------------|----------:|:-------:|:-------:|:-------:|:---------:| +| [PointPillar](tools/cfgs/kitti_models/pointpillar.yaml) |~1.2 hours| 77.28 | 52.29 | 62.68 | [model-18M](https://drive.google.com/file/d/1wMxWTpU1qUoY3DsCH31WJmvJxcjFXKlm/view?usp=sharing) | +| [SECOND](tools/cfgs/kitti_models/second.yaml) | ~1.7 hours | 78.62 | 52.98 | 67.15 | [model-20M](https://drive.google.com/file/d/1-01zsPOsqanZQqIIyy7FpNXStL3y4jdR/view?usp=sharing) | +| [SECOND-IoU](tools/cfgs/kitti_models/second_iou.yaml) | - | 79.09 | 55.74 | 71.31 | [model-46M](https://drive.google.com/file/d/1AQkeNs4bxhvhDQ-5sEo_yvQUlfo73lsW/view?usp=sharing) | +| [PointRCNN](tools/cfgs/kitti_models/pointrcnn.yaml) | ~3 hours | 78.70 | 54.41 | 72.11 | [model-16M](https://drive.google.com/file/d/1BCX9wMn-GYAfSOPpyxf6Iv6fc0qKLSiU/view?usp=sharing)| +| [PointRCNN-IoU](tools/cfgs/kitti_models/pointrcnn_iou.yaml) | ~3 hours | 78.75 | 58.32 | 71.34 | [model-16M](https://drive.google.com/file/d/1V0vNZ3lAHpEEt0MlT80eL2f41K2tHm_D/view?usp=sharing)| +| [Part-A2-Free](tools/cfgs/kitti_models/PartA2_free.yaml) | ~3.8 hours| 78.72 | 65.99 | 74.29 | [model-226M](https://drive.google.com/file/d/1lcUUxF8mJgZ_e-tZhP1XNQtTBuC-R0zr/view?usp=sharing) | +| [Part-A2-Anchor](tools/cfgs/kitti_models/PartA2.yaml) | ~4.3 hours| 79.40 | 60.05 | 69.90 | [model-244M](https://drive.google.com/file/d/10GK1aCkLqxGNeX3lVu8cLZyE0G8002hY/view?usp=sharing) | +| [PV-RCNN](tools/cfgs/kitti_models/pv_rcnn.yaml) | ~5 hours| 83.61 | 57.90 | 70.47 | [model-50M](https://drive.google.com/file/d/1lIOq4Hxr0W3qsX83ilQv0nk1Cls6KAr-/view?usp=sharing) | +| [Voxel R-CNN (Car)](tools/cfgs/kitti_models/voxel_rcnn_car.yaml) | ~2.2 hours| 84.54 | - | - | [model-28M](https://drive.google.com/file/d/19_jiAeGLz7V0wNjSJw4cKmMjdm5EW5By/view?usp=sharing) | +| [Focals Conv - F](tools/cfgs/kitti_models/voxel_rcnn_car_focal_multimodal.yaml) | ~4 hours| 85.66 | - | - | [model-30M](https://drive.google.com/file/d/1u2Vcg7gZPOI-EqrHy7_6fqaibvRt2IjQ/view?usp=sharing) | +|| +| [CaDDN (Mono)](tools/cfgs/kitti_models/CaDDN.yaml) |~15 hours| 21.38 | 13.02 | 9.76 | [model-774M](https://drive.google.com/file/d/1OQTO2PtXT8GGr35W9m2GZGuqgb6fyU1V/view?usp=sharing) | + +### Waymo Open Dataset Baselines +We provide the setting of [`DATA_CONFIG.SAMPLED_INTERVAL`](tools/cfgs/dataset_configs/waymo_dataset.yaml) on the Waymo Open Dataset (WOD) to subsample partial samples for training and evaluation, +so you could also play with WOD by setting a smaller `DATA_CONFIG.SAMPLED_INTERVAL` even if you only have limited GPU resources. + +By default, all models are trained with **a single frame** of **20% data (~32k frames)** of all the training samples on 8 GTX 1080Ti GPUs, and the results of each cell here are mAP/mAPH calculated by the official Waymo evaluation metrics on the **whole** validation set (version 1.2). + +| Performance@(train with 20\% Data) | Vec_L1 | Vec_L2 | Ped_L1 | Ped_L2 | Cyc_L1 | Cyc_L2 | +|---------------------------------------------|----------:|:-------:|:-------:|:-------:|:-------:|:-------:| +| [SECOND](tools/cfgs/waymo_models/second.yaml) | 70.96/70.34|62.58/62.02|65.23/54.24 |57.22/47.49| 57.13/55.62 | 54.97/53.53 | +| [PointPillar](tools/cfgs/waymo_models/pointpillar_1x.yaml) | 70.43/69.83 | 62.18/61.64 | 66.21/46.32|58.18/40.64|55.26/51.75|53.18/49.80 | +[CenterPoint-Pillar](tools/cfgs/waymo_models/centerpoint_pillar_1x.yaml)| 70.50/69.96|62.18/61.69|73.11/61.97|65.06/55.00|65.44/63.85|62.98/61.46| +[CenterPoint-Dynamic-Pillar](tools/cfgs/waymo_models/centerpoint_dyn_pillar_1x.yaml)| 70.46/69.93|62.06/61.58|73.92/63.35|65.91/56.33|66.24/64.69|63.73/62.24| +[CenterPoint](tools/cfgs/waymo_models/centerpoint_without_resnet.yaml)| 71.33/70.76|63.16/62.65| 72.09/65.49 |64.27/58.23| 68.68/67.39 |66.11/64.87| +| [CenterPoint (ResNet)](tools/cfgs/waymo_models/centerpoint.yaml)|72.76/72.23|64.91/64.42 |74.19/67.96 |66.03/60.34| 71.04/69.79 |68.49/67.28 | +| [Part-A2-Anchor](tools/cfgs/waymo_models/PartA2.yaml) | 74.66/74.12 |65.82/65.32 |71.71/62.24 |62.46/54.06 |66.53/65.18 |64.05/62.75 | +| [PV-RCNN (AnchorHead)](tools/cfgs/waymo_models/pv_rcnn.yaml) | 75.41/74.74 |67.44/66.80 |71.98/61.24 |63.70/53.95 |65.88/64.25 |63.39/61.82 | +| [PV-RCNN (CenterHead)](tools/cfgs/waymo_models/pv_rcnn_with_centerhead_rpn.yaml) | 75.95/75.43 |68.02/67.54 |75.94/69.40 |67.66/61.62 |70.18/68.98 |67.73/66.57| +| [Voxel R-CNN (CenterHead)-Dynamic-Voxel](tools/cfgs/waymo_models/voxel_rcnn_with_centerhead_dyn_voxel.yaml) | 76.13/75.66 |68.18/67.74 |78.20/71.98 |69.29/63.59 | 70.75/69.68 |68.25/67.21| +| [PV-RCNN++](tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml) | 77.82/77.32| 69.07/68.62| 77.99/71.36| 69.92/63.74| 71.80/70.71| 69.31/68.26| +| [PV-RCNN++ (ResNet)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml) |77.61/77.14| 69.18/68.75| 79.42/73.31| 70.88/65.21| 72.50/71.39| 69.84/68.77| + +Here we also provide the performance of several models trained on the full training set (refer to the paper of [PV-RCNN++](https://arxiv.org/abs/2102.00463)): + +| Performance@(train with 100\% Data) | Vec_L1 | Vec_L2 | Ped_L1 | Ped_L2 | Cyc_L1 | Cyc_L2 | +|-------------------------------------------------------------------------------------------|----------:|:-------:|:-------:|:-------:|:-------:|:-------:| +| [SECOND](tools/cfgs/waymo_models/second.yaml) | 72.27/71.69 | 63.85/63.33 | 68.70/58.18 | 60.72/51.31 | 60.62/59.28 | 58.34/57.05 | +| [CenterPoint-Pillar](tools/cfgs/waymo_models/centerpoint_pillar_1x.yaml) | 73.37/72.86 | 65.09/64.62 | 75.35/65.11 | 67.61/58.25 | 67.76/66.22 | 65.25/63.77 | +| [Part-A2-Anchor](tools/cfgs/waymo_models/PartA2.yaml) | 77.05/76.51 | 68.47/67.97 | 75.24/66.87 | 66.18/58.62 | 68.60/67.36 | 66.13/64.93 | +| [VoxelNeXt-2D](tools/cfgs/waymo_models/voxelnext2d_ioubranch.yaml) | 77.94/77.47 |69.68/69.25 |80.24/73.47 |72.23/65.88 |73.33/72.20 |70.66/69.56 | +| [VoxelNeXt](tools/cfgs/waymo_models/voxelnext_ioubranch_large.yaml) | 78.16/77.70 |69.86/69.42 |81.47/76.30 |73.48/68.63 |76.06/74.90 |73.29/72.18 | +| [PV-RCNN (CenterHead)](tools/cfgs/waymo_models/pv_rcnn_with_centerhead_rpn.yaml) | 78.00/77.50 | 69.43/68.98 | 79.21/73.03 | 70.42/64.72 | 71.46/70.27 | 68.95/67.79 | +| [PV-RCNN++](tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml) | 79.10/78.63 | 70.34/69.91 | 80.62/74.62 | 71.86/66.30 | 73.49/72.38 | 70.70/69.62 | +| [PV-RCNN++ (ResNet)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml) | 79.25/78.78 | 70.61/70.18 | 81.83/76.28 | 73.17/68.00 | 73.72/72.66 | 71.21/70.19 | +| [DSVT-Pillar](tools/cfgs/waymo_models/dsvt_pillar.yaml) | 79.44/78.97 | 71.24/70.81 | 83.00/77.22 | 75.45/69.95 | 76.70/75.70 | 73.83/72.86 | +| [DSVT-Voxel](tools/cfgs/waymo_models/dsvt_voxel.yaml) | 79.77/79.31 | 71.67/71.25 | 83.75/78.92 | 76.21/71.57 | 77.57/76.58 | 74.70/73.73 | +| [PV-RCNN++ (ResNet, 2 frames)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet_2frames.yaml) | 80.17/79.70 | 72.14/71.70 | 83.48/80.42 | 75.54/72.61 | 74.63/73.75 | 72.35/71.50 | +| [MPPNet (4 frames)](docs/guidelines_of_approaches/mppnet.md) | 81.54/81.06 | 74.07/73.61 | 84.56/81.94 | 77.20/74.67 | 77.15/76.50 | 75.01/74.38 | +| [MPPNet (16 frames)](docs/guidelines_of_approaches/mppnet.md) | 82.74/82.28 | 75.41/74.96 | 84.69/82.25 | 77.43/75.06 | 77.28/76.66 | 75.13/74.52 | + + + + + + + +We could not provide the above pretrained models due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/), +but you could easily achieve similar performance by training with the default configs. + +### NuScenes 3D Object Detection Baselines +All models are trained with 8 GPUs and are available for download. For training BEVFusion, please refer to the [guideline](docs/guidelines_of_approaches/bevfusion.md). + +| | mATE | mASE | mAOE | mAVE | mAAE | mAP | NDS | download | +|----------------------------------------------------------------------------------------------------|-------:|:------:|:------:|:-----:|:-----:|:-----:|:------:|:--------------------------------------------------------------------------------------------------:| +| [PointPillar-MultiHead](tools/cfgs/nuscenes_models/cbgs_pp_multihead.yaml) | 33.87 | 26.00 | 32.07 | 28.74 | 20.15 | 44.63 | 58.23 | [model-23M](https://drive.google.com/file/d/1p-501mTWsq0G9RzroTWSXreIMyTUUpBM/view?usp=sharing) | +| [SECOND-MultiHead (CBGS)](tools/cfgs/nuscenes_models/cbgs_second_multihead.yaml) | 31.15 | 25.51 | 26.64 | 26.26 | 20.46 | 50.59 | 62.29 | [model-35M](https://drive.google.com/file/d/1bNzcOnE3u9iooBFMk2xK7HqhdeQ_nwTq/view?usp=sharing) | +| [CenterPoint-PointPillar](tools/cfgs/nuscenes_models/cbgs_dyn_pp_centerpoint.yaml) | 31.13 | 26.04 | 42.92 | 23.90 | 19.14 | 50.03 | 60.70 | [model-23M](https://drive.google.com/file/d/1UvGm6mROMyJzeSRu7OD1leU_YWoAZG7v/view?usp=sharing) | +| [CenterPoint (voxel_size=0.1)](tools/cfgs/nuscenes_models/cbgs_voxel01_res3d_centerpoint.yaml) | 30.11 | 25.55 | 38.28 | 21.94 | 18.87 | 56.03 | 64.54 | [model-34M](https://drive.google.com/file/d/1Cz-J1c3dw7JAWc25KRG1XQj8yCaOlexQ/view?usp=sharing) | +| [CenterPoint (voxel_size=0.075)](tools/cfgs/nuscenes_models/cbgs_voxel0075_res3d_centerpoint.yaml) | 28.80 | 25.43 | 37.27 | 21.55 | 18.24 | 59.22 | 66.48 | [model-34M](https://drive.google.com/file/d/1XOHAWm1MPkCKr1gqmc3TWi5AYZgPsgxU/view?usp=sharing) | +| [VoxelNeXt (voxel_size=0.075)](tools/cfgs/nuscenes_models/cbgs_voxel0075_voxelnext.yaml) | 30.11 | 25.23 | 40.57 | 21.69 | 18.56 | 60.53 | 66.65 | [model-31M](https://drive.google.com/file/d/1IV7e7G9X-61KXSjMGtQo579pzDNbhwvf/view?usp=share_link) | +| [TransFusion-L*](tools/cfgs/nuscenes_models/transfusion_lidar.yaml) | 27.96 | 25.37 | 29.35 | 27.31 | 18.55 | 64.58 | 69.43 | [model-32M](https://drive.google.com/file/d/1cuZ2qdDnxSwTCsiXWwbqCGF-uoazTXbz/view?usp=share_link) | +| [BEVFusion](tools/cfgs/nuscenes_models/bevfusion.yaml) | 28.03 | 25.43 | 30.19 | 26.76 | 18.48 | 67.75 | 70.98 | [model-157M](https://drive.google.com/file/d/1X50b-8immqlqD8VPAUkSKI0Ls-4k37g9/view?usp=share_link) | + +*: Use the fade strategy, which disables data augmentations in the last several epochs during training. + +### ONCE 3D Object Detection Baselines +All models are trained with 8 GPUs. + +| | Vehicle | Pedestrian | Cyclist | mAP | +| ------------------------------------------------------ | :-----: | :--------: | :-----: | :----: | +| [PointRCNN](tools/cfgs/once_models/pointrcnn.yaml) | 52.09 | 4.28 | 29.84 | 28.74 | +| [PointPillar](tools/cfgs/once_models/pointpillar.yaml) | 68.57 | 17.63 | 46.81 | 44.34 | +| [SECOND](tools/cfgs/once_models/second.yaml) | 71.19 | 26.44 | 58.04 | 51.89 | +| [PV-RCNN](tools/cfgs/once_models/pv_rcnn.yaml) | 77.77 | 23.50 | 59.37 | 53.55 | +| [CenterPoint](tools/cfgs/once_models/centerpoint.yaml) | 78.02 | 49.74 | 67.22 | 64.99 | + +### Argoverse2 3D Object Detection Baselines +All models are trained with 4 GPUs. + +| | mAP | download | +|---------------------------------------------------------|:----:|:--------------------------------------------------------------------------------------------------:| +| [VoxelNeXt](tools/cfgs/argo2_models/cbgs_voxel01_voxelnext.yaml) | 30.5 | [model-32M](https://drive.google.com/file/d/1YP2UOz-yO-cWfYQkIqILEu6bodvCBVrR/view?usp=share_link) | + +### Other datasets +Welcome to support other datasets by submitting pull request. + +## Installation + +Please refer to [INSTALL.md](docs/INSTALL.md) for the installation of `OpenPCDet`. + + +## Quick Demo +Please refer to [DEMO.md](docs/DEMO.md) for a quick demo to test with a pretrained model and +visualize the predicted results on your custom data or the original KITTI data. + +## Getting Started + +Please refer to [GETTING_STARTED.md](docs/GETTING_STARTED.md) to learn more usage about this project. + + +## License + +`OpenPCDet` is released under the [Apache 2.0 license](LICENSE). + +## Acknowledgement +`OpenPCDet` is an open source project for LiDAR-based 3D scene perception that supports multiple +LiDAR-based perception models as shown above. Some parts of `PCDet` are learned from the official released codes of the above supported methods. +We would like to thank for their proposed methods and the official implementation. + +We hope that this repo could serve as a strong and flexible codebase to benefit the research community by speeding up the process of reimplementing previous works and/or developing new methods. + + +## Citation +If you find this project useful in your research, please consider cite: + + +``` +@misc{openpcdet2020, + title={OpenPCDet: An Open-source Toolbox for 3D Object Detection from Point Clouds}, + author={OpenPCDet Development Team}, + howpublished = {\url{https://github.com/open-mmlab/OpenPCDet}}, + year={2020} +} +``` + +## Contribution +Welcome to be a member of the OpenPCDet development team by contributing to this repo, and feel free to contact us for any potential contributions. + + diff --git a/pcdet/datasets/__init__.py b/pcdet/datasets/__init__.py index 47c3900bf..7bcd7ab17 100644 --- a/pcdet/datasets/__init__.py +++ b/pcdet/datasets/__init__.py @@ -12,7 +12,7 @@ from .pandaset.pandaset_dataset import PandasetDataset from .lyft.lyft_dataset import LyftDataset from .once.once_dataset import ONCEDataset -from .argo2.argo2_dataset import Argo2Dataset +# from .argo2.argo2_dataset import Argo2Dataset from .custom.custom_dataset import CustomDataset __all__ = { @@ -24,7 +24,7 @@ 'LyftDataset': LyftDataset, 'ONCEDataset': ONCEDataset, 'CustomDataset': CustomDataset, - 'Argo2Dataset': Argo2Dataset + # 'Argo2Dataset': Argo2Dataset } diff --git a/pcdet/datasets/augmentor/database_sampler.py b/pcdet/datasets/augmentor/database_sampler.py index 105708a60..c10f78fa0 100644 --- a/pcdet/datasets/augmentor/database_sampler.py +++ b/pcdet/datasets/augmentor/database_sampler.py @@ -5,7 +5,7 @@ import numpy as np from skimage import io import torch -import SharedArray +# import SharedArray import torch.distributed as dist from ...ops.iou3d_nms import iou3d_nms_utils @@ -70,16 +70,17 @@ def __setstate__(self, d): self.__dict__.update(d) def __del__(self): - if self.use_shared_memory: - self.logger.info('Deleting GT database from shared memory') - cur_rank, num_gpus = common_utils.get_dist_info() - sa_key = self.sampler_cfg.DB_DATA_PATH[0] - if cur_rank % num_gpus == 0 and os.path.exists(f"/dev/shm/{sa_key}"): - SharedArray.delete(f"shm://{sa_key}") - - if num_gpus > 1: - dist.barrier() - self.logger.info('GT database has been removed from shared memory') + pass + # if self.use_shared_memory: + # self.logger.info('Deleting GT database from shared memory') + # cur_rank, num_gpus = common_utils.get_dist_info() + # sa_key = self.sampler_cfg.DB_DATA_PATH[0] + # if cur_rank % num_gpus == 0 and os.path.exists(f"/dev/shm/{sa_key}"): + # SharedArray.delete(f"shm://{sa_key}") + + # if num_gpus > 1: + # dist.barrier() + # self.logger.info('GT database has been removed from shared memory') def load_db_to_shared_memory(self): self.logger.info('Loading GT database to shared memory') @@ -379,11 +380,11 @@ def add_sampled_boxes_to_scene(self, data_dict, sampled_gt_boxes, total_valid_sa # convert sampled 3D boxes to image plane img_aug_gt_dict = self.initilize_image_aug_dict(data_dict, gt_boxes_mask) - if self.use_shared_memory: - gt_database_data = SharedArray.attach(f"shm://{self.gt_database_data_key}") - gt_database_data.setflags(write=0) - else: - gt_database_data = None + # if self.use_shared_memory: + # gt_database_data = SharedArray.attach(f"shm://{self.gt_database_data_key}") + # gt_database_data.setflags(write=0) + # else: + gt_database_data = None for idx, info in enumerate(total_valid_sampled_dict): if self.use_shared_memory: @@ -392,12 +393,13 @@ def add_sampled_boxes_to_scene(self, data_dict, sampled_gt_boxes, total_valid_sa else: file_path = self.root_path / info['path'] - obj_points = np.fromfile(str(file_path), dtype=np.float32).reshape( - [-1, self.sampler_cfg.NUM_POINT_FEATURES]) + obj_points = np.fromfile(str(file_path), dtype=np.float32).reshape([-1, self.sampler_cfg.NUM_POINT_FEATURES]) if obj_points.shape[0] != info['num_points_in_gt']: obj_points = np.fromfile(str(file_path), dtype=np.float64).reshape(-1, self.sampler_cfg.NUM_POINT_FEATURES) - assert obj_points.shape[0] == info['num_points_in_gt'] + + error_string = 'GT database error for file %s info: %s, %s, %s, %s' % (file_path, info['name'], info['path'], str(info['image_idx']), str(info['gt_idx'])) + assert obj_points.shape[0] == info['num_points_in_gt'], error_string obj_points[:, :3] += info['box3d_lidar'][:3].astype(np.float32) if self.sampler_cfg.get('USE_ROAD_PLANE', False): diff --git a/pcdet/datasets/waymo/waymo_dataset.py b/pcdet/datasets/waymo/waymo_dataset.py index 44632d53f..38ca24cf7 100644 --- a/pcdet/datasets/waymo/waymo_dataset.py +++ b/pcdet/datasets/waymo/waymo_dataset.py @@ -9,7 +9,7 @@ import numpy as np import torch import multiprocessing -import SharedArray +# import SharedArray import torch.distributed as dist from tqdm import tqdm from pathlib import Path @@ -149,7 +149,7 @@ def clean_shared_memory(self): if not os.path.exists(f"/dev/shm/{sa_key}"): continue - SharedArray.delete(f"shm://{sa_key}") + # SharedArray.delete(f"shm://{sa_key}") if num_gpus > 1: dist.barrier() @@ -352,11 +352,11 @@ def __getitem__(self, index): input_dict = { 'sample_idx': sample_idx } - if self.use_shared_memory and index < self.shared_memory_file_limit: - sa_key = f'{sequence_name}___{sample_idx}' - points = SharedArray.attach(f"shm://{sa_key}").copy() - else: - points = self.get_lidar(sequence_name, sample_idx) + # if self.use_shared_memory and index < self.shared_memory_file_limit: + # sa_key = f'{sequence_name}___{sample_idx}' + # points = SharedArray.attach(f"shm://{sa_key}").copy() + # else: + points = self.get_lidar(sequence_name, sample_idx) if self.dataset_cfg.get('SEQUENCE_CONFIG', None) is not None and self.dataset_cfg.SEQUENCE_CONFIG.ENABLED: points, num_points_all, sample_idx_pre_list, poses, pred_boxes, pred_scores, pred_labels = self.get_sequence_data( @@ -714,6 +714,8 @@ def create_waymo_infos(dataset_cfg, class_names, data_path, save_path, os.environ["CUDA_VISIBLE_DEVICES"] = "-1" print('---------------Start to generate data infos---------------') + raw_data_tag = 'training' + dataset.set_split(train_split) waymo_infos_train = dataset.get_infos( raw_data_path=data_path / raw_data_tag, @@ -724,6 +726,8 @@ def create_waymo_infos(dataset_cfg, class_names, data_path, save_path, pickle.dump(waymo_infos_train, f) print('----------------Waymo info train file is saved to %s----------------' % train_filename) + raw_data_tag = 'validation' + dataset.set_split(val_split) waymo_infos_val = dataset.get_infos( raw_data_path=data_path / raw_data_tag, diff --git a/pcdet/models/detectors/PartA2_relation_net.py b/pcdet/models/detectors/PartA2_relation_net.py new file mode 100644 index 000000000..30e68dd2d --- /dev/null +++ b/pcdet/models/detectors/PartA2_relation_net.py @@ -0,0 +1,36 @@ +from .detector3d_template import Detector3DTemplate +from ..object_relation import build_object_relation_module + + +class PartA2NetRelation(Detector3DTemplate): + def __init__(self, model_cfg, num_class, dataset): + super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) + self.module_list = self.build_networks() + self.object_relation = build_object_relation_module(model_cfg.OBJECT_RELATION) + + def forward(self, batch_dict): + for cur_module in self.module_list: + batch_dict = cur_module(batch_dict) + + batch_dict = self.object_relation(batch_dict) + batch_dict = self.roi_head.final_predictions(batch_dict) + + if self.training: + loss, tb_dict, disp_dict = self.get_training_loss() + + ret_dict = { + 'loss': loss + } + return ret_dict, tb_dict, disp_dict + else: + pred_dicts, recall_dicts = self.post_processing(batch_dict) + return pred_dicts, recall_dicts + + def get_training_loss(self): + disp_dict = {} + loss_rpn, tb_dict = self.dense_head.get_loss() + loss_point, tb_dict = self.point_head.get_loss(tb_dict) + loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict) + + loss = loss_rpn + loss_point + loss_rcnn + return loss, tb_dict, disp_dict \ No newline at end of file diff --git a/pcdet/models/detectors/__init__.py b/pcdet/models/detectors/__init__.py index 1af193078..30484ae9c 100644 --- a/pcdet/models/detectors/__init__.py +++ b/pcdet/models/detectors/__init__.py @@ -15,6 +15,11 @@ from .voxelnext import VoxelNeXt from .transfusion import TransFusion from .bevfusion import BevFusion +from .pv_rcnn_relation import PVRCNNRelation +from .pv_rcnn_plusplus_relation import PVRCNNPlusPlusRelation +from .centerpoint_twostage import CenterPointTwoStage +from .PartA2_relation_net import PartA2NetRelation +from .voxel_rcnn_relation import VoxelRCNNRelation __all__ = { 'Detector3DTemplate': Detector3DTemplate, @@ -35,6 +40,11 @@ 'VoxelNeXt': VoxelNeXt, 'TransFusion': TransFusion, 'BevFusion': BevFusion, + 'PVRCNNRelation': PVRCNNRelation, + 'PVRCNNPlusPlusRelation': PVRCNNPlusPlusRelation, + 'CenterPointTwoStage': CenterPointTwoStage, + 'PartA2NetRelation': PartA2NetRelation, + 'VoxelRCNNRelation': VoxelRCNNRelation, } diff --git a/pcdet/models/detectors/centerpoint_twostage.py b/pcdet/models/detectors/centerpoint_twostage.py new file mode 100644 index 000000000..4b19b2c3a --- /dev/null +++ b/pcdet/models/detectors/centerpoint_twostage.py @@ -0,0 +1,56 @@ +from .detector3d_template import Detector3DTemplate + + +class CenterPointTwoStage(Detector3DTemplate): + def __init__(self, model_cfg, num_class, dataset): + super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) + self.module_list = self.build_networks() + + def forward(self, batch_dict): + batch_dict = self.vfe(batch_dict) + batch_dict = self.backbone_3d(batch_dict) + batch_dict = self.map_to_bev_module(batch_dict) + batch_dict = self.backbone_2d(batch_dict) + batch_dict = self.dense_head(batch_dict) + batch_dict = self.roi_head(batch_dict) + + if self.training: + loss, tb_dict, disp_dict = self.get_training_loss() + + ret_dict = { + 'loss': loss + } + return ret_dict, tb_dict, disp_dict + else: + pred_dicts, recall_dicts = self.post_processing(batch_dict) + return pred_dicts, recall_dicts + + def get_training_loss(self): + disp_dict = {} + loss_rpn, tb_dict = self.dense_head.get_loss() + tb_dict = { + 'loss_rpn': loss_rpn.item(), + **tb_dict + } + + loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict) + + loss = loss_rpn + loss_rcnn + return loss, tb_dict, disp_dict + + # def post_processing(self, batch_dict): + # post_process_cfg = self.model_cfg.POST_PROCESSING + # batch_size = batch_dict['batch_size'] + # # final_pred_dict = batch_dict['final_box_dicts'] + # final_preds = batch_dict['batch_box_preds'] + # recall_dict = {} + # for index in range(batch_size): + # pred_boxes = final_pred_dict[index]['pred_boxes'] + + # recall_dict = self.generate_recall_record( + # box_preds=pred_boxes, + # recall_dict=recall_dict, batch_index=index, data_dict=batch_dict, + # thresh_list=post_process_cfg.RECALL_THRESH_LIST + # ) + + # return final_pred_dict, recall_dict diff --git a/pcdet/models/detectors/detector3d_template.py b/pcdet/models/detectors/detector3d_template.py index 91e44bd46..25793f844 100644 --- a/pcdet/models/detectors/detector3d_template.py +++ b/pcdet/models/detectors/detector3d_template.py @@ -160,14 +160,19 @@ def build_point_head(self, model_info_dict): def build_roi_head(self, model_info_dict): if self.model_cfg.get('ROI_HEAD', None) is None: return None, model_info_dict - point_head_module = roi_heads.__all__[self.model_cfg.ROI_HEAD.NAME]( - model_cfg=self.model_cfg.ROI_HEAD, - input_channels=model_info_dict['num_point_features'], - backbone_channels= model_info_dict.get('backbone_channels', None), - point_cloud_range=model_info_dict['point_cloud_range'], - voxel_size=model_info_dict['voxel_size'], - num_class=self.num_class if not self.model_cfg.ROI_HEAD.CLASS_AGNOSTIC else 1, - ) + + common_args = { + 'model_cfg': self.model_cfg.ROI_HEAD, + 'input_channels': model_info_dict['num_point_features'], + 'backbone_channels': model_info_dict.get('backbone_channels', None), + 'point_cloud_range': model_info_dict['point_cloud_range'], + 'voxel_size': model_info_dict['voxel_size'], + 'num_class': self.num_class if not self.model_cfg.ROI_HEAD.CLASS_AGNOSTIC else 1, + } + if 'Relation' in self.model_cfg.NAME: + common_args['object_relation_config'] = self.model_cfg.OBJECT_RELATION + + point_head_module = roi_heads.__all__[self.model_cfg.ROI_HEAD.NAME](**common_args) model_info_dict['module_list'].append(point_head_module) return point_head_module, model_info_dict @@ -196,6 +201,7 @@ def post_processing(self, batch_dict): batch_size = batch_dict['batch_size'] recall_dict = {} pred_dicts = [] + edges_list = [] for index in range(batch_size): if batch_dict.get('batch_index', None) is not None: assert batch_dict['batch_box_preds'].shape.__len__() == 2 @@ -255,7 +261,8 @@ def post_processing(self, batch_dict): else: label_preds = label_preds + 1 selected, selected_scores = model_nms_utils.class_agnostic_nms( - box_scores=cls_preds, box_preds=box_preds, + box_scores=cls_preds, box_preds=box_preds, + label_preds=label_preds, nms_config=post_process_cfg.NMS_CONFIG, score_thresh=post_process_cfg.SCORE_THRESH ) @@ -267,6 +274,14 @@ def post_processing(self, batch_dict): final_scores = selected_scores final_labels = label_preds[selected] final_boxes = box_preds[selected] + + include_edges = "OBJECT_RELATION" in self.model_cfg and self.model_cfg.OBJECT_RELATION.NAME == "GNN" + if include_edges: + edges = batch_dict['gnn_edges'] + from_node, to_node = edges + edges_mask = torch.isin(from_node, selected) & torch.isin(to_node, selected) + final_edges = edges[:, edges_mask] + edge_to_pred = {selected[i].item(): i for i in list(range(len(selected)))} recall_dict = self.generate_recall_record( box_preds=final_boxes if 'rois' not in batch_dict else src_box_preds, @@ -277,7 +292,9 @@ def post_processing(self, batch_dict): record_dict = { 'pred_boxes': final_boxes, 'pred_scores': final_scores, - 'pred_labels': final_labels + 'pred_labels': final_labels, + 'gnn_edges_final': final_edges if include_edges else None, + 'edge_to_pred': edge_to_pred if include_edges else None, } pred_dicts.append(record_dict) @@ -327,7 +344,7 @@ def generate_recall_record(box_preds, recall_dict, batch_index, data_dict=None, gt_iou = box_preds.new_zeros(box_preds.shape[0]) return recall_dict - def _load_state_dict(self, model_state_disk, *, strict=True): + def _load_state_dict(self, model_state_disk, *, strict=True, learnable_layer=None): state_dict = self.state_dict() # local cache of state_dict spconv_keys = find_all_spconv_keys(self) @@ -354,11 +371,15 @@ def _load_state_dict(self, model_state_disk, *, strict=True): if strict: self.load_state_dict(update_model_state) else: + if learnable_layer: + for key in list(update_model_state.keys()): + if any([(l in key) for l in learnable_layer]): + del update_model_state[key] state_dict.update(update_model_state) self.load_state_dict(state_dict) return state_dict, update_model_state - def load_params_from_file(self, filename, logger, to_cpu=False, pre_trained_path=None): + def load_params_from_file(self, filename, logger, to_cpu=False, pre_trained_path=None, learnable_layer=None): if not os.path.isfile(filename): raise FileNotFoundError @@ -375,7 +396,7 @@ def load_params_from_file(self, filename, logger, to_cpu=False, pre_trained_path if version is not None: logger.info('==> Checkpoint trained from version: %s' % version) - state_dict, update_model_state = self._load_state_dict(model_state_disk, strict=False) + state_dict, update_model_state = self._load_state_dict(model_state_disk, strict=False, learnable_layer=learnable_layer) for key in state_dict: if key not in update_model_state: diff --git a/pcdet/models/detectors/pv_rcnn.py b/pcdet/models/detectors/pv_rcnn.py index 4808513a2..8a1f87c98 100644 --- a/pcdet/models/detectors/pv_rcnn.py +++ b/pcdet/models/detectors/pv_rcnn.py @@ -5,6 +5,7 @@ class PVRCNN(Detector3DTemplate): def __init__(self, model_cfg, num_class, dataset): super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) self.module_list = self.build_networks() + self.frozen = model_cfg.FROZEN if "FROZEN" in model_cfg.keys() else False def forward(self, batch_dict): for cur_module in self.module_list: @@ -27,7 +28,10 @@ def get_training_loss(self): loss_point, tb_dict = self.point_head.get_loss(tb_dict) loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict) - loss = loss_rpn + loss_point + loss_rcnn + if self.frozen: + loss = loss_rcnn + else: + loss = loss_rpn + loss_point + loss_rcnn if hasattr(self.backbone_3d, 'get_loss'): loss_backbone3d, tb_dict = self.backbone_3d.get_loss(tb_dict) diff --git a/pcdet/models/detectors/pv_rcnn_plusplus_relation.py b/pcdet/models/detectors/pv_rcnn_plusplus_relation.py new file mode 100644 index 000000000..de373ae2b --- /dev/null +++ b/pcdet/models/detectors/pv_rcnn_plusplus_relation.py @@ -0,0 +1,61 @@ +from .detector3d_template import Detector3DTemplate + +from pcdet.models.object_relation.gnn import GNN + + +class PVRCNNPlusPlusRelation(Detector3DTemplate): + def __init__(self, model_cfg, num_class, dataset): + super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) + self.module_list = self.build_networks() + self.gnn = GNN(model_cfg.OBJECT_RELATION) + + def forward(self, batch_dict): + batch_dict = self.vfe(batch_dict) + batch_dict = self.backbone_3d(batch_dict) + batch_dict = self.map_to_bev_module(batch_dict) + batch_dict = self.backbone_2d(batch_dict) + batch_dict = self.dense_head(batch_dict) + + batch_dict = self.roi_head.proposal_layer( + batch_dict, nms_config=self.roi_head.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] + ) + if self.training: + targets_dict = self.roi_head.assign_targets(batch_dict) + batch_dict['rois'] = targets_dict['rois'] + batch_dict['roi_labels'] = targets_dict['roi_labels'] + batch_dict['roi_targets_dict'] = targets_dict + num_rois_per_scene = targets_dict['rois'].shape[1] + if 'roi_valid_num' in batch_dict: + batch_dict['roi_valid_num'] = [num_rois_per_scene for _ in range(batch_dict['batch_size'])] + + batch_dict = self.pfe(batch_dict) + batch_dict = self.point_head(batch_dict) + batch_dict = self.roi_head(batch_dict) + + # GNN: Object relation + batch_dict = self.gnn(batch_dict) + batch_dict = self.roi_head.final_predictions(batch_dict) + + + if self.training: + loss, tb_dict, disp_dict = self.get_training_loss() + + ret_dict = { + 'loss': loss + } + return ret_dict, tb_dict, disp_dict + else: + pred_dicts, recall_dicts = self.post_processing(batch_dict) + return pred_dicts, recall_dicts + + def get_training_loss(self): + disp_dict = {} + loss_rpn, tb_dict = self.dense_head.get_loss() + if self.point_head is not None: + loss_point, tb_dict = self.point_head.get_loss(tb_dict) + else: + loss_point = 0 + loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict) + + loss = loss_rpn + loss_point + loss_rcnn + return loss, tb_dict, disp_dict diff --git a/pcdet/models/detectors/pv_rcnn_relation.py b/pcdet/models/detectors/pv_rcnn_relation.py new file mode 100644 index 000000000..6ac3cc804 --- /dev/null +++ b/pcdet/models/detectors/pv_rcnn_relation.py @@ -0,0 +1,63 @@ +from .detector3d_template import Detector3DTemplate + +from pcdet.models.object_relation.gnn import GNN +from ..object_relation import build_object_relation_module + + +class PVRCNNRelation(Detector3DTemplate): + def __init__(self, model_cfg, num_class, dataset): + super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) + self.module_list = self.build_networks() + self.object_relation = build_object_relation_module(model_cfg.OBJECT_RELATION) + self.frozen = model_cfg.FROZEN if "FROZEN" in model_cfg.keys() else False + + def forward(self, batch_dict): + # MeanVFE: Voxelisation + batch_dict = self.vfe(batch_dict) + # VoxelBackBone8x: 3D Backbone + batch_dict = self.backbone_3d(batch_dict) + # HeightCompression(): 3D to BEV + batch_dict = self.map_to_bev_module(batch_dict) + # VoxelSetAbstraction: Aggregation of raw points with 3D features and BEV features + batch_dict = self.pfe(batch_dict) + # BaseBEVBackbone: 2D Backbone + batch_dict = self.backbone_2d(batch_dict) + # AnchorHeadSingle: Proposal generation for each voxel + batch_dict = self.dense_head(batch_dict) + # PointHeadSimple: prediction of a + batch_dict = self.point_head(batch_dict) + # PVRCNNHead: Proposal refinement + batch_dict = self.roi_head(batch_dict) + # GNN: Object relation + batch_dict = self.object_relation(batch_dict) + + batch_dict = self.roi_head.final_predictions(batch_dict) + + + if self.training: + loss, tb_dict, disp_dict = self.get_training_loss() + + ret_dict = { + 'loss': loss + } + return ret_dict, tb_dict, disp_dict + else: + pred_dicts, recall_dicts = self.post_processing(batch_dict) + return pred_dicts, recall_dicts + + def get_training_loss(self): + disp_dict = {} + loss_rpn, tb_dict = self.dense_head.get_loss() + loss_point, tb_dict = self.point_head.get_loss(tb_dict) + loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict) + + if self.frozen: + loss = loss_rcnn + else: + loss = loss_rpn + loss_point + loss_rcnn + + if hasattr(self.backbone_3d, 'get_loss'): + loss_backbone3d, tb_dict = self.backbone_3d.get_loss(tb_dict) + loss += loss_backbone3d + + return loss, tb_dict, disp_dict diff --git a/pcdet/models/detectors/voxel_rcnn_relation.py b/pcdet/models/detectors/voxel_rcnn_relation.py new file mode 100644 index 000000000..9353936f0 --- /dev/null +++ b/pcdet/models/detectors/voxel_rcnn_relation.py @@ -0,0 +1,42 @@ +from .detector3d_template import Detector3DTemplate +from ..object_relation import build_object_relation_module + + +class VoxelRCNNRelation(Detector3DTemplate): + def __init__(self, model_cfg, num_class, dataset): + super().__init__(model_cfg=model_cfg, num_class=num_class, dataset=dataset) + self.module_list = self.build_networks() + self.object_relation = build_object_relation_module(model_cfg.OBJECT_RELATION) + + def forward(self, batch_dict): + for cur_module in self.module_list: + batch_dict = cur_module(batch_dict) + + batch_dict = self.object_relation(batch_dict) + batch_dict = self.roi_head.final_predictions(batch_dict) + + if self.training: + loss, tb_dict, disp_dict = self.get_training_loss() + + ret_dict = { + 'loss': loss + } + return ret_dict, tb_dict, disp_dict + else: + pred_dicts, recall_dicts = self.post_processing(batch_dict) + return pred_dicts, recall_dicts + + def get_training_loss(self): + disp_dict = {} + loss = 0 + + loss_rpn, tb_dict = self.dense_head.get_loss() + loss_rcnn, tb_dict = self.roi_head.get_loss(tb_dict) + + loss = loss + loss_rpn + loss_rcnn + + if hasattr(self.backbone_3d, 'get_loss'): + loss_backbone3d, tb_dict = self.backbone_3d.get_loss(tb_dict) + loss += loss_backbone3d + + return loss, tb_dict, disp_dict diff --git a/pcdet/models/model_utils/model_nms_utils.py b/pcdet/models/model_utils/model_nms_utils.py index 8be1097e9..cd5c5d586 100644 --- a/pcdet/models/model_utils/model_nms_utils.py +++ b/pcdet/models/model_utils/model_nms_utils.py @@ -1,15 +1,26 @@ import torch +from functools import reduce from ...ops.iou3d_nms import iou3d_nms_utils -def class_agnostic_nms(box_scores, box_preds, nms_config, score_thresh=None): +def class_agnostic_nms(box_scores, box_preds, nms_config, label_preds=None, score_thresh=None): src_box_scores = box_scores if score_thresh is not None: - scores_mask = (box_scores >= score_thresh) - box_scores = box_scores[scores_mask] - box_preds = box_preds[scores_mask] - + if isinstance(score_thresh, list): + assert label_preds is not None, 'label preds have to be provided for class specific threshold' + scores_mask_list = [] + for class_index, class_score_threshold in enumerate(score_thresh): + cur_scores_mask = torch.logical_and(label_preds == (class_index + 1), box_scores >= class_score_threshold) + scores_mask_list.append(cur_scores_mask) + scores_mask = reduce(torch.logical_or, scores_mask_list) + box_scores = box_scores[scores_mask] + box_preds = box_preds[scores_mask] + else: + scores_mask = (box_scores >= score_thresh) + box_scores = box_scores[scores_mask] + box_preds = box_preds[scores_mask] + selected = [] if box_scores.shape[0] > 0: box_scores_nms, indices = torch.topk(box_scores, k=min(nms_config.NMS_PRE_MAXSIZE, box_scores.shape[0])) diff --git a/pcdet/models/object_relation/__init__.py b/pcdet/models/object_relation/__init__.py new file mode 100644 index 000000000..7aab8cf60 --- /dev/null +++ b/pcdet/models/object_relation/__init__.py @@ -0,0 +1,17 @@ +from .gnn import GNN +from .fc import CGNLNet +from .gnn_BADet import BARefiner +from .gnn_new import GNN_New + +__all__ = { + 'GNN': GNN, + 'CGNLNet': CGNLNet, + 'GNN_BADET':BARefiner, + 'GNN_NEW': GNN_New, +} + +def build_object_relation_module(model_cfg): + model = __all__[model_cfg.NAME]( + model_cfg + ) + return model \ No newline at end of file diff --git a/pcdet/models/object_relation/cgnl.py b/pcdet/models/object_relation/cgnl.py new file mode 100644 index 000000000..4fdaf19ff --- /dev/null +++ b/pcdet/models/object_relation/cgnl.py @@ -0,0 +1,90 @@ +import torch +import torch.nn as nn + +""" +Code taken from here: https://github.com/NUAAXQ/MLCVNet/blob/master/models/CGNL.py +""" + +class SpatialCGNL(nn.Module): + """Spatial CGNL block with dot production kernel for image classfication. + """ + def __init__(self, inplanes, planes, use_scale=False, groups=None): + self.use_scale = use_scale + self.groups = groups + + super(SpatialCGNL, self).__init__() + # conv theta + self.t = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + # conv phi + self.p = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + # conv g + self.g = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) + # conv z + self.z = nn.Conv2d(planes, inplanes, kernel_size=1, stride=1, + groups=self.groups, bias=False) + self.gn = nn.GroupNorm(num_groups=self.groups, num_channels=inplanes) + + if self.use_scale: + print("=> WARN: SpatialCGNL block uses 'SCALE'", \ + 'yellow') + if self.groups: + print("=> WARN: SpatialCGNL block uses '{}' groups".format(self.groups), \ + 'yellow') + + def kernel(self, t, p, g, b, c, h, w): + """The linear kernel (dot production). + + Args: + t: output of conv theata + p: output of conv phi + g: output of conv g + b: batch size + c: channels number + h: height of featuremaps + w: width of featuremaps + """ + t = t.view(b, 1, c * h * w) + p = p.view(b, 1, c * h * w) + g = g.view(b, c * h * w, 1) + + att = torch.bmm(p, g) + + if self.use_scale: + att = att.div((c*h*w)**0.5) + + x = torch.bmm(att, t) + x = x.view(b, c, h, w) + + return x + + def forward(self, x): + residual = x + + t = self.t(x) + p = self.p(x) + g = self.g(x) + + b, c, h, w = t.size() + + if self.groups and self.groups > 1: + _c = int(c / self.groups) + + ts = torch.split(t, split_size_or_sections=_c, dim=1) + ps = torch.split(p, split_size_or_sections=_c, dim=1) + gs = torch.split(g, split_size_or_sections=_c, dim=1) + + _t_sequences = [] + for i in range(self.groups): + _x = self.kernel(ts[i], ps[i], gs[i], + b, _c, h, w) + _t_sequences.append(_x) + + x = torch.cat(_t_sequences, dim=1) + else: + x = self.kernel(t, p, g, + b, c, h, w) + + x = self.z(x) + x = self.gn(x) + residual + + return x \ No newline at end of file diff --git a/pcdet/models/object_relation/fc.py b/pcdet/models/object_relation/fc.py new file mode 100644 index 000000000..deb11288c --- /dev/null +++ b/pcdet/models/object_relation/fc.py @@ -0,0 +1,75 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +import math + +from .cgnl import SpatialCGNL +from .utils import build_mlp + +class CGNLNet(nn.Module): + def __init__(self, object_relation_cfg, input_dim=256): + super(CGNLNet, self).__init__() + self.global_information = object_relation_cfg.GLOBAL_INFORMATION if 'GLOBAL_INFORMATION' in object_relation_cfg else None + self.drop_out = object_relation_cfg.DP_RATIO + self.skip_connection = False if 'SKIP_CONNECTION' not in object_relation_cfg else object_relation_cfg.SKIP_CONNECTION + + self.cgnl_input_dim = input_dim + if self.global_information: + self.cgnl_input_dim += object_relation_cfg.GLOBAL_INFORMATION.MLP_LAYERS[-1] if not self.global_information.CONCATENATED else object_relation_cfg.GLOBAL_INFORMATION.MLP_LAYERS[-1] + 8 + # groups = 8 from the CGNL paper + self.cgnl_1 = SpatialCGNL(self.cgnl_input_dim, int(self.cgnl_input_dim / 2), use_scale=False, groups=8) + self.cgnl_2 = SpatialCGNL(self.cgnl_input_dim, int(self.cgnl_input_dim / 2), use_scale=False, groups=8) + self.conv = torch.nn.Conv1d(self.cgnl_input_dim,self.cgnl_input_dim,1) + self.bn = torch.nn.BatchNorm1d(self.cgnl_input_dim) + + if self.global_information: + global_mlp_input_dim = input_dim + 8 if self.global_information.CONCATENATED else 8 + self.global_info_mlp = build_mlp(global_mlp_input_dim, self.global_information.MLP_LAYERS, activation='ReLU', bn=True, drop_out=self.drop_out) + + self.init_weights() + + + def forward(self, batch_dict): + (B, N, C) = batch_dict['pooled_features'].shape + assert math.sqrt(N) == int(math.sqrt(N)), "N must be a square number" + pooled_features = batch_dict['pooled_features'] + initial_pooled_features = pooled_features + proposal_boxes = batch_dict['rois'] + proposal_labels = batch_dict['roi_labels'] + + if self.global_information: + global_information = torch.cat((proposal_boxes, proposal_labels.unsqueeze(-1)), dim=-1).view(B*N, -1) + embedded_global_information = self.global_info_mlp(global_information) + pooled_features = torch.cat([pooled_features, embedded_global_information.view(B,N,-1)], dim=-1) + C = pooled_features.shape[-1] + + # permute to form image plane + pooled_features = pooled_features.permute((0,2,1)).contiguous() + pooled_features_plane = pooled_features.view(B, C, int(math.sqrt(N)), int(math.sqrt(N))) + pooled_features_plane = self.cgnl_1(pooled_features_plane) + pooled_features_plane = self.cgnl_2(pooled_features_plane) + + related_features = pooled_features_plane.view(B, C, N) + related_features = F.relu(self.bn(self.conv(related_features))) + # permute back to (B,N,C) + related_features = related_features.permute((0,2,1)).contiguous().view(B,N,C) + + if self.skip_connection: + related_features = torch.cat([related_features, initial_pooled_features], dim=-1) + + batch_dict['related_features'] = related_features + return batch_dict + + def init_weights(self, weight_init='xavier'): + if weight_init == 'xavier': + init_func = nn.init.xavier_uniform_ + if self.global_information: + for m in self.global_info_mlp: + if isinstance(m, nn.Linear): + init_func(m.weight) + + def get_output_dim(self): + return self.input_dim + + + diff --git a/pcdet/models/object_relation/gnn.py b/pcdet/models/object_relation/gnn.py new file mode 100644 index 000000000..7711cce4c --- /dev/null +++ b/pcdet/models/object_relation/gnn.py @@ -0,0 +1,232 @@ +import torch +import torch.nn as nn +import torch_geometric as tg +import torch.nn.functional as F +from .utils import build_mlp +# import torch_scatter + +# custom implementation for EdgeConv +# class EdgeWeightingNetwork(nn.Module): +# def __init__(self, dimension): +# super(EdgeWeightingNetwork, self).__init__() +# self.fc = nn.Linear(dimension*2, dimension) + +# def forward(self, x, edge_index): +# from_node, to_node = edge_index +# x_i, x_j = x[from_node], x[to_node] +# e_ij = torch.cat((x_j - x_i, x_i), dim=-1) +# e_ij = self.fc(e_ij) +# e_ij = F.relu(e_ij) +# # why from node here????? +# out, _ = torch_scatter.scatter_max(e_ij, from_node, dim=0) +# return out + +# similar to EdgeConv https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.EdgeConv.html +class EdgeConv(tg.nn.MessagePassing): + def __init__(self, dim_in, dim_out, drop_out=None, skip_connection=False): + super(EdgeConv, self).__init__(aggr='max') + self.skip_connection = skip_connection + self.mlp = build_mlp(dim_in, [dim_out], activation="ReLU", bn=True, drop_out=drop_out) + self.batch_norm = nn.BatchNorm1d(dim_out) + + def forward(self, x, edge_index, edge_attr=None): + out = self.propagate(edge_index, x=x, edge_attr=edge_attr) + out = self.batch_norm(out) + if self.skip_connection: + return out + x + return out + + def message(self, x_i, x_j, edge_attr=None): + if edge_attr is not None: + x = torch.cat((x_j - x_i, x_i, edge_attr), dim=-1) + else: + x = torch.cat((x_j - x_i, x_i), dim=-1) + x = self.mlp(x) + return x + + +class GNN(nn.Module): + def __init__(self, object_relation_cfg, number_classes=3, pooled_feature_dim=256): + super(GNN, self).__init__() + self.graph_cfg = object_relation_cfg.GRAPH + self.graph_conv = object_relation_cfg.GRAPH.CONV + self.gnn_layers = object_relation_cfg.LAYERS + self.in_between_layers = object_relation_cfg.IN_BETWEEN_MLP + # self.in_between_layers = object_relation_cfg.IN_BETWEEN_MLP if 'IN_BETWEEN_MLP' in object_relation_cfg else None + self.global_information = object_relation_cfg.GLOBAL_INFORMATION + # self.global_information = object_relation_cfg.GLOBAL_INFORMATION if 'GLOBAL_INFORMATION' in object_relation_cfg else None + self.number_classes = number_classes + self.drop_out = object_relation_cfg.DP_RATIO + self.skip_connection = object_relation_cfg.SKIP_CONNECTION + self.pooled_feature_dim = pooled_feature_dim + + if self.global_information: + global_mlp_input_dim = pooled_feature_dim + 7 if self.global_information.CONCATENATED else 7 + self.global_info_mlp = build_mlp(global_mlp_input_dim, self.global_information.MLP_LAYERS, activation='ReLU', bn=True, drop_out=self.drop_out) + + if not self.global_information: + gnn_input_dim = pooled_feature_dim + else: + gnn_input_dim = self.global_mlp[-1] if self.global_information.CONCATENATED else (self.global_mlp[-1] + self.pooled_feature_dim) + + conv_layer_list = [] + for i in range(len(self.gnn_layers)): + curr_conv_layer_list = [] + if i == 0: + input_dim = gnn_input_dim + else: + input_dim = self.gnn_layers[i-1] + + edge_dim = (7 if self.graph_conv.EDGE_EMBEDDING else 0) + if self.graph_conv.NAME == "EdgeConv": + curr_conv_layer_list.append(EdgeConv(2*input_dim+edge_dim, self.gnn_layers[i], drop_out=self.drop_out, skip_connection=self.graph_conv.SKIP_CONNECTION)) + elif self.graph_conv.NAME == "GATConv": + # layer according to tg example: https://github.com/pyg-team/pytorch_geometric/blob/master/examples/gat.py + curr_conv_layer_list.append(nn.Dropout(p=self.drop_out)) + curr_conv_layer_list.append(tg.nn.GATConv(input_dim, self.gnn_layers[i], self.graph_conv.HEADS, dropout=self.drop_out, edge_dim=edge_dim, concat=False)) + curr_conv_layer_list.append(nn.ELU()) + if self.in_between_layers: + curr_mlp = build_mlp(self.gnn_layers[i], [self.in_between_layers[i]], activation="ReLU", bn=True, drop_out=True) + curr_conv_layer_list.append(curr_mlp) + conv_layer_list.append(nn.ModuleList(curr_conv_layer_list)) + self.gnn = nn.ModuleList(conv_layer_list) + self.init_weights() + + + def init_weights(self, weight_init='xavier'): + if weight_init == 'xavier': + init_func = nn.init.xavier_uniform_ + for seq in self.gnn: + for n in seq: + if isinstance(n, EdgeConv): + for m in n.mlp: + if isinstance(m, nn.Linear): + init_func(m.weight) + elif isinstance(n, nn.Sequential): + for m in n: + if isinstance(m, nn.Linear): + init_func(m.weight) + elif isinstance(n, tg.nn.GATConv): + # automatically initialized + continue + else: + continue + if self.global_information: + for m in self.global_info_mlp: + if isinstance(m, nn.Linear): + init_func(m.weight) + + def forward(self, batch_dict): + (B, N, C) = batch_dict['pooled_features'].shape + + # BxNx7 + proposal_boxes = batch_dict['rois'].view(B*N,7) + # BxN + proposal_labels = batch_dict['roi_labels'].view(B*N) + # BxNxC + pooled_features = batch_dict['pooled_features'].view(B*N,C) + + + if self.global_information: + if self.global_information.CONCATENATED: + pooled_features_with_global_info = torch.cat([pooled_features, proposal_boxes], dim=1) + pooled_features = self.global_info_mlp(pooled_features_with_global_info) + else: + embedded_global_information = self.global_info_mlp(proposal_boxes) + pooled_features = torch.cat([pooled_features, embedded_global_information], dim=1) + + if self.graph_cfg.SPACE == 'R3': + assert self.graph_cfg.DYNAMIC == False, 'Distance should be measured in feature space if the graph is created dynamically' + edge_index = self.get_edges(proposal_boxes[:,:3], proposal_labels, (B, N, C)) + elif self.graph_cfg.SPACE == 'Feature': + edge_index = self.get_edges(pooled_features, proposal_labels, (B, N, C)) + else: + raise NotImplemented('Distance space was {} but should be R3 or FEATURE'.format(self.graph_cfg.SPACE)) + + batch_dict['gnn_edges'] = edge_index + + edge_attr = None + if self.graph_conv.EDGE_EMBEDDING: + from_node, to_node = edge_index + edge_attr = proposal_boxes[from_node] - proposal_boxes[to_node] + + gnn_features = [pooled_features] + x = pooled_features + for module_list in self.gnn: + for module in module_list: + if isinstance(module, (EdgeConv, tg.nn.GATConv)): + x = module(x, edge_index, edge_attr=edge_attr) + else: + x = module(x) + gnn_features.append(x) + if self.graph_cfg.DYNAMIC: + edge_index = self.get_edges(x, proposal_labels, (B, N, None)) + if edge_attr is not None: + from_node, to_node = edge_index + edge_attr = proposal_boxes[from_node] - proposal_boxes[to_node] + + if self.skip_connection: + batch_dict['related_features'] = torch.cat(gnn_features, dim=-1) + else: + batch_dict['related_features'] = x + + return batch_dict + + def get_edges(self, edge_generating_tensor, proposal_labels, shape): + B, N, _ = shape + f = getattr(tg.nn, self.graph_cfg.NAME) + a = (self.graph_cfg.RADIUS if self.graph_cfg.NAME == 'radius_graph' else self.graph_cfg.K) + batch_vector = torch.arange(B, device=edge_generating_tensor.device).repeat_interleave(N) + + if self.graph_cfg.CONNECT_ONLY_SAME_CLASS: + final_edge_indices = [] + for predicted_class in range(1, self.number_classes + 1): + label_indices = torch.where(proposal_labels == predicted_class)[0] + label_edge_generating_tensor = edge_generating_tensor[label_indices] + label_batch_vector = batch_vector[label_indices] + + label_edge_index = f(label_edge_generating_tensor, a, batch=label_batch_vector, loop=False) + label_edge_index[0] = label_indices[label_edge_index[0]] + label_edge_index[1] = label_indices[label_edge_index[1]] + final_edge_indices.append(label_edge_index) + edge_index = torch.cat(final_edge_indices, dim=-1) + else: + edge_index = f(edge_generating_tensor, a, batch=batch_vector, loop=False) + return edge_index + + +if __name__ == '__main__': + from easydict import EasyDict as edict + rois = torch.tensor([ + [[0, 0, 0, 0, 0, 0, 0], [2, 2, 2, 2, 2, 2, 2], [3, 3, 3, 3, 3, 3, 3]] + ], dtype=torch.float32) + # 1x3x512 + pooled_features = torch.rand((1, 3, 256), dtype=torch.float32) + proposal_labels = torch.tensor([ + [0, 0, 1] + ], dtype=torch.int64) + + batch_dict = { + 'rois': rois, # Random positions for 10 batches of 100 proposals each + 'pooled_features': pooled_features, # Random 16-dimensional features for 10 batches of 100 proposals each + 'roi_labels': proposal_labels # Random labels for 10 batches of 100 proposals each + } + + cfg = edict({ + 'GRAPH': { + 'NAME': 'radius_graph', + 'RADIUS': 3, + 'CONNECT_ONLY_SAME_CLASS': True + }, + 'LAYERS': [256, 256, 256], + 'GLOBAL_INFORMATION': { + 'MLP_LAYERS': [256, 256, 256] + } + }) + + model = GNN(cfg) + + batch_dict = model(batch_dict) + edges = batch_dict['gnn_edges'] + assert edges.shape[0] == 2 + assert edges.shape[1] == 6 diff --git a/pcdet/models/object_relation/gnn_BADet.py b/pcdet/models/object_relation/gnn_BADet.py new file mode 100644 index 000000000..2861483d9 --- /dev/null +++ b/pcdet/models/object_relation/gnn_BADet.py @@ -0,0 +1,148 @@ +from torch_geometric.nn import MessagePassing +from torch.nn import Sequential as Seq, Linear, ReLU +import torch +import torch.nn as nn +import torch_geometric as tg +from torch_geometric.data import Data +from torch_geometric.transforms import RadiusGraph + +class StateConvLayer(MessagePassing): + def __init__(self, state_dim): + """ + Graph Layer to perform update of the node states. + """ + super(StateConvLayer, self).__init__(aggr='max') + + # MLP to transform node state into the relative offset to alleviate translation variance. + self.mlp_h = Seq(Linear(state_dim, state_dim//2), + ReLU(inplace=True), + Linear(state_dim//2, state_dim//4), + ReLU(inplace=True), + Linear(state_dim//4, 3)) + + # MLP to compute edge features + self.mlp_f = Seq(Linear(state_dim+3, state_dim//2), + ReLU(inplace=True), + Linear(state_dim//2, state_dim//4), + ReLU(inplace=True), + Linear(state_dim//4, state_dim), + ) + + self.mlp_g = Seq(Linear(state_dim, state_dim//2), + ReLU(inplace=True), + Linear(state_dim//2, state_dim//4), + ReLU(inplace=True), + Linear(state_dim//4,state_dim), + ) + + def forward(self, s, x, edge_index): + return self.propagate(edge_index, s=s, x=x) + + def message(self, x_j, x_i, s_i, s_j): + + # The extended graph update algorithm. + delta_x_i = self.mlp_h(s_i) + tmp = torch.cat([x_j - x_i - delta_x_i, s_j], dim=1) + e_ij = self.mlp_f(tmp) + return e_ij + + def update(self, e_ij, s): + # Update vertex state based on aggregated edge features + return s + self.mlp_g(e_ij) + +def basic_block(in_channel, out_channel): + """ + Create block with linear layer followed by IN and ReLU. + :param in_channel: number of input features + :param out_channel: number of output features + :return: PyTorch Sequential object + """ + return nn.Sequential(Linear(in_channel, out_channel), + nn.InstanceNorm1d(out_channel), + nn.ReLU(inplace=True)) + +class BARefiner(nn.Module): + def __init__(self, object_relation_cfg, number_classes=3): + """ + Boundary-Aware Graph Neural Network, which takes 3D proposals in immediate neighborhood + as inputs for graph construction within a given cut-off distance, associating 3D proposals + in the form of local neighborhood graph, with boundary correlations of an object being + explicitly informed through an information compensation mechanism. + + Args: + :param state_dim: maximum number of state features + :param n_classes: number of classes + :param n_iterations: number of GNN iterations to perform + """ + super(BARefiner, self).__init__() + state_dim = object_relation_cfg.STATE_DIM + n_iterations = object_relation_cfg.ITERATIONS + self.graph_cfg = object_relation_cfg.GRAPH + self.n_classes = 1 + self._num_anchor_per_loc = 1 + self._box_code_size = 7 + + # List of GNN layers + self.graph_layers = nn.ModuleList([StateConvLayer(state_dim) for _ in + range(n_iterations)]) + + # MLP for class prediction + self.mlp_class = Seq(basic_block(state_dim, state_dim), + basic_block(state_dim, state_dim), + Linear(state_dim, self._num_anchor_per_loc * self.n_classes)) + + # Set of MLPs for per-class bounding box regression + self.mlp_loc = Seq(basic_block(state_dim, state_dim), + basic_block(state_dim, state_dim), + Linear(state_dim, self._num_anchor_per_loc * self._box_code_size)) + + def get_edges(self, edge_generating_tensor, proposal_labels, shape): + B, N, _ = shape + f = getattr(tg.nn, self.graph_cfg.NAME) + a = (self.graph_cfg.RADIUS if self.graph_cfg.NAME == 'radius_graph' else self.graph_cfg.K) + batch_vector = torch.arange(B, device=edge_generating_tensor.device).repeat_interleave(N) + + if self.graph_cfg.CONNECT_ONLY_SAME_CLASS: + final_edge_indices = [] + for predicted_class in range(1, self.number_classes + 1): + label_indices = torch.where(proposal_labels == predicted_class)[0] + label_edge_generating_tensor = edge_generating_tensor[label_indices] + label_batch_vector = batch_vector[label_indices] + + label_edge_index = f(label_edge_generating_tensor, a, batch=label_batch_vector, loop=False) + label_edge_index[0] = label_indices[label_edge_index[0]] + label_edge_index[1] = label_indices[label_edge_index[1]] + final_edge_indices.append(label_edge_index) + edge_index = torch.cat(final_edge_indices, dim=-1) + else: + edge_index = f(edge_generating_tensor, a, batch=batch_vector, loop=False) + return edge_index + + + def forward(self, batch_dict): + (B, N, C) = batch_dict['pooled_features'].shape + + # BxNx7 + proposal_boxes = batch_dict['rois'].view(B*N,7) + # BxN + proposal_labels = batch_dict['roi_labels'].view(B*N) + # BxNxC + pooled_features = batch_dict['pooled_features'].view(B*N,C) + + edge_index = self.get_edges(proposal_boxes[:,:3], proposal_labels, (B, N, C)) + + + # Set initial vertex state + # state = batch_data['node_features'] + # Perform GNN computations + for graph_layer in self.graph_layers: + # Update vertex state + x = graph_layer(pooled_features, proposal_boxes[:,:3], edge_index) + + x = x.unsqueeze(0) + cls_pred = self.mlp_class(x) + reg_pred = self.mlp_loc(x) + batch_dict['rcnn_cls'] = cls_pred.view(B*N, self.n_classes) + batch_dict['rcnn_reg'] = reg_pred.view(B*N, self._box_code_size) + return batch_dict + \ No newline at end of file diff --git a/pcdet/models/object_relation/gnn_old.py b/pcdet/models/object_relation/gnn_old.py new file mode 100644 index 000000000..0a12ef666 --- /dev/null +++ b/pcdet/models/object_relation/gnn_old.py @@ -0,0 +1,169 @@ +import torch +import torch.nn as nn +import torch_geometric as tg +import torch.nn.functional as F +# import torch_scatter + +# custom implementation for EdgeConv +# class EdgeWeightingNetwork(nn.Module): +# def __init__(self, dimension): +# super(EdgeWeightingNetwork, self).__init__() +# self.fc = nn.Linear(dimension*2, dimension) + +# def forward(self, x, edge_index): +# from_node, to_node = edge_index +# x_i, x_j = x[from_node], x[to_node] +# e_ij = torch.cat((x_j - x_i, x_i), dim=-1) +# e_ij = self.fc(e_ij) +# e_ij = F.relu(e_ij) +# # why from node here????? +# out, _ = torch_scatter.scatter_max(e_ij, from_node, dim=0) +# return out + +# similar to EdgeConv https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.EdgeConv.html +class EdgeConv(tg.nn.MessagePassing): + def __init__(self, in_dim, out_dim): + super(EdgeConv, self).__init__(aggr='max') + self.fc = nn.Linear(in_dim, out_dim) + + def forward(self, x, edge_index): + return self.propagate(edge_index, x=x) + + def message(self, x_i, x_j): + x = torch.cat((x_j - x_i, x_i), dim=-1) + x = self.fc(x) + x = F.relu(x) + return x + + +class GNN(nn.Module): + def __init__(self, object_relation_cfg, number_classes=3): + super(GNN, self).__init__() + self.graph_cfg = object_relation_cfg.GRAPH + self.gnn_layers = object_relation_cfg.LAYERS + self.global_information = object_relation_cfg.GLOBAL_INFORMATION if 'GLOBAL_INFORMATION' in object_relation_cfg else None + self.number_classes = number_classes + + if self.global_information: + self.global_mlp = self.global_information.MLP_LAYERS + mlp_layer_list = [] + for i in range(len(self.global_mlp)): + if i == 0: + mlp_layer_list.append(nn.Linear(7, self.global_mlp[i])) + else: + mlp_layer_list.append(nn.Linear(self.global_mlp[i-1], self.global_mlp[i])) + + mlp_layer_list.append(nn.ReLU()) + self.global_info_mlp = nn.Sequential(*mlp_layer_list) + + self.gnn_input_dim = (self.global_mlp[-1] if self.global_information else 0) + 256 + conv_layer_list = [] + for i in range(len(self.gnn_layers)): + if i == 0: + conv_layer_list.append(EdgeConv(2*self.gnn_input_dim, self.gnn_layers[i])) + else: + conv_layer_list.append(EdgeConv(2*self.gnn_layers[i-1], self.gnn_layers[i])) + self.gnn = nn.ModuleList(conv_layer_list) + self.init_weights() + + + def init_weights(self, weight_init='xavier'): + if weight_init == 'xavier': + init_func = nn.init.xavier_uniform_ + for m in self.gnn: + init_func(m.fc.weight) + if self.global_information: + for m in self.global_info_mlp: + if isinstance(m, nn.Linear): + init_func(m.weight) + + def forward(self, batch_dict): + (B, N, C) = batch_dict['pooled_features'].shape + + # BxNx7 + proposal_boxes = batch_dict['rois'] + # BxN + proposal_labels = batch_dict['roi_labels'] + # BxNxC + pooled_features = batch_dict['pooled_features'] + (B, N, C) = pooled_features.shape + + + if self.global_information: + embedded_global_features = self.global_info_mlp(proposal_boxes) + pooled_features = torch.cat((pooled_features, embedded_global_features), dim=-1) + + pooled_features = pooled_features.view(-1, self.gnn_input_dim) + + edge_index = self.get_edges(proposal_boxes[:,:,:3].view(-1,3), proposal_labels.view(-1), (B, N, C)) + batch_dict['gnn_edges'] = edge_index + + gnn_features = [pooled_features] + x = pooled_features + for i in range(len(self.gnn)): + x = self.gnn[i](x, edge_index) + gnn_features.append(x) + + batch_dict['related_features'] = torch.cat(gnn_features, dim=-1) + + return batch_dict + + def get_edges(self, proposal_boxes, proposal_labels, shape): + B, N, _ = shape + f = getattr(tg.nn, self.graph_cfg.NAME) + a = (self.graph_cfg.RADIUS if self.graph_cfg.NAME == 'radius_graph' else self.graph_cfg.K) + batch_vector = torch.arange(B, device=proposal_boxes.device).repeat_interleave(N) + + if self.graph_cfg.CONNECT_ONLY_SAME_CLASS: + final_edge_indices = [] + for predicted_class in range(1, self.number_classes + 1): + label_indices = torch.where(proposal_labels == predicted_class)[0] + label_proposal_boxes = proposal_boxes[label_indices] + label_batch_vector = batch_vector[label_indices] + + label_edge_index = f(label_proposal_boxes, a, batch=label_batch_vector, loop=False) + label_edge_index[0] = label_indices[label_edge_index[0]] + label_edge_index[1] = label_indices[label_edge_index[1]] + final_edge_indices.append(label_edge_index) + edge_index = torch.cat(final_edge_indices, dim=-1) + else: + edge_index = f(proposal_boxes, a, batch=batch_vector, loop=False) + return edge_index + + + +if __name__ == '__main__': + from easydict import EasyDict as edict + rois = torch.tensor([ + [[0, 0, 0, 0, 0, 0, 0], [2, 2, 2, 2, 2, 2, 2], [3, 3, 3, 3, 3, 3, 3]] + ], dtype=torch.float32) + # 1x3x512 + pooled_features = torch.rand((1, 3, 256), dtype=torch.float32) + proposal_labels = torch.tensor([ + [0, 0, 1] + ], dtype=torch.int64) + + batch_dict = { + 'rois': rois, # Random positions for 10 batches of 100 proposals each + 'pooled_features': pooled_features, # Random 16-dimensional features for 10 batches of 100 proposals each + 'roi_labels': proposal_labels # Random labels for 10 batches of 100 proposals each + } + + cfg = edict({ + 'GRAPH': { + 'NAME': 'radius_graph', + 'RADIUS': 3, + 'CONNECT_ONLY_SAME_CLASS': True + }, + 'LAYERS': [256, 256, 256], + 'GLOBAL_INFORMATION': { + 'MLP_LAYERS': [256, 256, 256] + } + }) + + model = GNN(cfg) + + batch_dict = model(batch_dict) + edges = batch_dict['gnn_edges'] + assert edges.shape[0] == 2 + assert edges.shape[1] == 6 diff --git a/pcdet/models/object_relation/utils.py b/pcdet/models/object_relation/utils.py new file mode 100644 index 000000000..796b3b532 --- /dev/null +++ b/pcdet/models/object_relation/utils.py @@ -0,0 +1,15 @@ +import torch.nn as nn + +def build_mlp(input_dim, hidden_dims, activation='ReLU', bn=False, drop_out=None): + mlp_list = [] + for i in range(len(hidden_dims)): + if i == 0: + mlp_list.append(nn.Linear(input_dim, hidden_dims[i])) + else: + mlp_list.append(nn.Linear(hidden_dims[i-1], hidden_dims[i])) + if bn: + mlp_list.append(nn.BatchNorm1d(hidden_dims[i])) + mlp_list.append(getattr(nn, activation)()) + if drop_out: + mlp_list.append(nn.Dropout(drop_out)) + return nn.Sequential(*mlp_list) diff --git a/pcdet/models/roi_heads/__init__.py b/pcdet/models/roi_heads/__init__.py index 693cec426..0be2a8b00 100644 --- a/pcdet/models/roi_heads/__init__.py +++ b/pcdet/models/roi_heads/__init__.py @@ -1,11 +1,16 @@ from .partA2_head import PartA2FCHead from .pointrcnn_head import PointRCNNHead from .pvrcnn_head import PVRCNNHead +from .pvrcnn_head_relation import PVRCNNHeadRelation from .second_head import SECONDHead from .voxelrcnn_head import VoxelRCNNHead from .roi_head_template import RoIHeadTemplate from .mppnet_head import MPPNetHead from .mppnet_memory_bank_e2e import MPPNetHeadE2E +from .roi_head import RoIHead +from .partA2_relation_head import PartA2RelationFCHead +from .voxelrcnn_relation_head import VoxelRCNNRelationHead + __all__ = { 'RoIHeadTemplate': RoIHeadTemplate, @@ -16,4 +21,8 @@ 'VoxelRCNNHead': VoxelRCNNHead, 'MPPNetHead': MPPNetHead, 'MPPNetHeadE2E': MPPNetHeadE2E, + 'PVRCNNHeadRelation': PVRCNNHeadRelation, + 'ROIHead': RoIHead, + 'PartA2RelationFCHead': PartA2RelationFCHead, + 'VoxelRCNNRelationHead': VoxelRCNNRelationHead, } diff --git a/pcdet/models/roi_heads/partA2_relation_head.py b/pcdet/models/roi_heads/partA2_relation_head.py new file mode 100644 index 000000000..dca2471e9 --- /dev/null +++ b/pcdet/models/roi_heads/partA2_relation_head.py @@ -0,0 +1,265 @@ +import numpy as np +import torch +import torch.nn as nn + +from ...ops.roiaware_pool3d import roiaware_pool3d_utils +from ...utils.spconv_utils import spconv +from .roi_head_template import RoIHeadTemplate + + +class PartA2RelationFCHead(RoIHeadTemplate): + def __init__(self, input_channels, model_cfg, num_class=1, object_relation_config=None, **kwargs): + super().__init__(num_class=num_class, model_cfg=model_cfg) + self.model_cfg = model_cfg + + self.SA_modules = nn.ModuleList() + block = self.post_act_block + + c0 = self.model_cfg.ROI_AWARE_POOL.NUM_FEATURES // 2 + self.conv_part = spconv.SparseSequential( + block(4, 64, 3, padding=1, indice_key='rcnn_subm1'), + block(64, c0, 3, padding=1, indice_key='rcnn_subm1_1'), + ) + self.conv_rpn = spconv.SparseSequential( + block(input_channels, 64, 3, padding=1, indice_key='rcnn_subm2'), + block(64, c0, 3, padding=1, indice_key='rcnn_subm1_2'), + ) + + shared_fc_list = [] + pool_size = self.model_cfg.ROI_AWARE_POOL.POOL_SIZE + pre_channel = self.model_cfg.ROI_AWARE_POOL.NUM_FEATURES * pool_size * pool_size * pool_size + for k in range(0, self.model_cfg.SHARED_FC.__len__()): + shared_fc_list.extend([ + nn.Conv1d(pre_channel, self.model_cfg.SHARED_FC[k], kernel_size=1, bias=False), + nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), + nn.ReLU() + ]) + pre_channel = self.model_cfg.SHARED_FC[k] + + if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + + self.shared_fc_layer = nn.Sequential(*shared_fc_list) + + if object_relation_config.NAME == 'GNN': + if object_relation_config.GLOBAL_INFORMATION: + initial_input_dim = object_relation_config.GLOBAL_INFORMATION.MLP_LAYERS[-1] + if not object_relation_config.GLOBAL_INFORMATION.CONCATENATED: + initial_input_dim += self.model_cfg.SHARED_FC[-1] + else: + initial_input_dim = self.model_cfg.SHARED_FC[-1] + + if object_relation_config.SKIP_CONNECTION: + self.head_input_channels = initial_input_dim + sum(object_relation_config.LAYERS) + else: + self.head_input_channels = object_relation_config.LAYERS[-1] + elif object_relation_config.NAME == 'CGNLNet': + # TODO: udpate this + self.head_input_channels = self.model_cfg.SHARED_FC[-1] + 256 + 256 + else: + raise NotImplementedError + + self.cls_layers = self.make_fc_layers( + input_channels=self.head_input_channels, output_channels=self.num_class, fc_list=self.model_cfg.CLS_FC + ) + self.reg_layers = self.make_fc_layers( + input_channels=self.head_input_channels, + output_channels=self.box_coder.code_size * self.num_class, + fc_list=self.model_cfg.REG_FC + ) + + self.roiaware_pool3d_layer = roiaware_pool3d_utils.RoIAwarePool3d( + out_size=self.model_cfg.ROI_AWARE_POOL.POOL_SIZE, + max_pts_each_voxel=self.model_cfg.ROI_AWARE_POOL.MAX_POINTS_PER_VOXEL + ) + self.init_weights(weight_init='xavier') + + def init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + def post_act_block(self, in_channels, out_channels, kernel_size, indice_key, stride=1, padding=0, conv_type='subm'): + if conv_type == 'subm': + m = spconv.SparseSequential( + spconv.SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key), + nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), + nn.ReLU(), + ) + elif conv_type == 'spconv': + m = spconv.SparseSequential( + spconv.SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, + bias=False, indice_key=indice_key), + nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), + nn.ReLU(), + ) + elif conv_type == 'inverseconv': + m = spconv.SparseSequential( + spconv.SparseInverseConv3d(in_channels, out_channels, kernel_size, + indice_key=indice_key, bias=False), + nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), + nn.ReLU(), + ) + else: + raise NotImplementedError + return m + + def roiaware_pool(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + rois: (B, num_rois, 7 + C) + point_coords: (num_points, 4) [bs_idx, x, y, z] + point_features: (num_points, C) + point_cls_scores: (N1 + N2 + N3 + ..., 1) + point_part_offset: (N1 + N2 + N3 + ..., 3) + Returns: + + """ + batch_size = batch_dict['batch_size'] + batch_idx = batch_dict['point_coords'][:, 0] + point_coords = batch_dict['point_coords'][:, 1:4] + point_features = batch_dict['point_features'] + part_features = torch.cat(( + batch_dict['point_part_offset'] if not self.model_cfg.get('DISABLE_PART', False) else point_coords, + batch_dict['point_cls_scores'].view(-1, 1).detach() + ), dim=1) + part_features[part_features[:, -1] < self.model_cfg.SEG_MASK_SCORE_THRESH, 0:3] = 0 + + rois = batch_dict['rois'] + + pooled_part_features_list, pooled_rpn_features_list = [], [] + + for bs_idx in range(batch_size): + bs_mask = (batch_idx == bs_idx) + cur_point_coords = point_coords[bs_mask] + cur_part_features = part_features[bs_mask] + cur_rpn_features = point_features[bs_mask] + cur_roi = rois[bs_idx][:, 0:7].contiguous() # (N, 7) + + pooled_part_features = self.roiaware_pool3d_layer.forward( + cur_roi, cur_point_coords, cur_part_features, pool_method='avg' + ) # (N, out_x, out_y, out_z, 4) + pooled_rpn_features = self.roiaware_pool3d_layer.forward( + cur_roi, cur_point_coords, cur_rpn_features, pool_method='max' + ) # (N, out_x, out_y, out_z, C) + + pooled_part_features_list.append(pooled_part_features) + pooled_rpn_features_list.append(pooled_rpn_features) + + pooled_part_features = torch.cat(pooled_part_features_list, dim=0) # (B * N, out_x, out_y, out_z, 4) + pooled_rpn_features = torch.cat(pooled_rpn_features_list, dim=0) # (B * N, out_x, out_y, out_z, C) + + return pooled_part_features, pooled_rpn_features + + @staticmethod + def fake_sparse_idx(sparse_idx, batch_size_rcnn): + print('Warning: Sparse_Idx_Shape(%s) \r' % (str(sparse_idx.shape)), end='', flush=True) + # at most one sample is non-empty, then fake the first voxels of each sample(BN needs at least + # two values each channel) as non-empty for the below calculation + sparse_idx = sparse_idx.new_zeros((batch_size_rcnn, 3)) + bs_idxs = torch.arange(batch_size_rcnn).type_as(sparse_idx).view(-1, 1) + sparse_idx = torch.cat((bs_idxs, sparse_idx), dim=1) + return sparse_idx + + def forward(self, batch_dict): + """ + Args: + batch_dict: + + Returns: + + """ + targets_dict = self.proposal_layer( + batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] + ) + if self.training: + targets_dict = self.assign_targets(batch_dict) + batch_dict['rois'] = targets_dict['rois'] + batch_dict['roi_labels'] = targets_dict['roi_labels'] + + B,N,_ = batch_dict['rois'].shape + # RoI aware pooling + pooled_part_features, pooled_rpn_features = self.roiaware_pool(batch_dict) + batch_size_rcnn = pooled_part_features.shape[0] # (B * N, out_x, out_y, out_z, 4) + + # transform to sparse tensors + sparse_shape = np.array(pooled_part_features.shape[1:4], dtype=np.int32) + sparse_idx = pooled_part_features.sum(dim=-1).nonzero() # (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx] + if sparse_idx.shape[0] < 3: + sparse_idx = self.fake_sparse_idx(sparse_idx, batch_size_rcnn) + if self.training: + # these are invalid samples + targets_dict['rcnn_cls_labels'].fill_(-1) + targets_dict['reg_valid_mask'].fill_(-1) + + part_features = pooled_part_features[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] + rpn_features = pooled_rpn_features[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] + coords = sparse_idx.int().contiguous() + part_features = spconv.SparseConvTensor(part_features, coords, sparse_shape, batch_size_rcnn) + rpn_features = spconv.SparseConvTensor(rpn_features, coords, sparse_shape, batch_size_rcnn) + + # forward rcnn network + x_part = self.conv_part(part_features) + x_rpn = self.conv_rpn(rpn_features) + + merged_feature = torch.cat((x_rpn.features, x_part.features), dim=1) # (N, C) + shared_feature = spconv.SparseConvTensor(merged_feature, coords, sparse_shape, batch_size_rcnn) + shared_feature = shared_feature.dense().view(batch_size_rcnn, -1, 1) + + shared_feature = self.shared_fc_layer(shared_feature) + batch_dict['pooled_features'] = shared_feature.view(B, N, self.model_cfg.SHARED_FC[-1]) + self.forward_ret_dict = targets_dict + + + # rcnn_cls = self.cls_layers(shared_feature).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + # rcnn_reg = self.reg_layers(shared_feature).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + # if not self.training: + # batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + # batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + # ) + # batch_dict['batch_cls_preds'] = batch_cls_preds + # batch_dict['batch_box_preds'] = batch_box_preds + # batch_dict['cls_preds_normalized'] = False + # else: + # targets_dict['rcnn_cls'] = rcnn_cls + # targets_dict['rcnn_reg'] = rcnn_reg + + # self.forward_ret_dict = targets_dict + return batch_dict + + def final_predictions(self, batch_dict): + shared_features = batch_dict['related_features'] + shared_features = shared_features.view(-1, self.head_input_channels, 1) + rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + if not self.training: + batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + ) + batch_dict['batch_cls_preds'] = batch_cls_preds + batch_dict['batch_box_preds'] = batch_box_preds + batch_dict['cls_preds_normalized'] = False + else: + self.forward_ret_dict['rcnn_cls'] = rcnn_cls + self.forward_ret_dict['rcnn_reg'] = rcnn_reg + + return batch_dict diff --git a/pcdet/models/roi_heads/pvrcnn_head_relation.py b/pcdet/models/roi_heads/pvrcnn_head_relation.py new file mode 100644 index 000000000..d341c07a9 --- /dev/null +++ b/pcdet/models/roi_heads/pvrcnn_head_relation.py @@ -0,0 +1,233 @@ +import torch.nn as nn + +from ...ops.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_stack_modules +from ...utils import common_utils +from .roi_head_template import RoIHeadTemplate + + +class PVRCNNHeadRelation(RoIHeadTemplate): + def __init__(self, input_channels, model_cfg, num_class=1, object_relation_config=None, **kwargs): + super().__init__(num_class=num_class, model_cfg=model_cfg) + self.model_cfg = model_cfg + + self.roi_grid_pool_layer, num_c_out = pointnet2_stack_modules.build_local_aggregation_module( + input_channels=input_channels, config=self.model_cfg.ROI_GRID_POOL + ) + + GRID_SIZE = self.model_cfg.ROI_GRID_POOL.GRID_SIZE + pre_channel = GRID_SIZE * GRID_SIZE * GRID_SIZE * num_c_out + + shared_fc_list = [] + for k in range(0, self.model_cfg.SHARED_FC.__len__()): + shared_fc_list.extend([ + nn.Conv1d(pre_channel, self.model_cfg.SHARED_FC[k], kernel_size=1, bias=False), + nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), + nn.ReLU() + ]) + pre_channel = self.model_cfg.SHARED_FC[k] + + if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + + self.shared_fc_layer = nn.Sequential(*shared_fc_list) + + if object_relation_config.NAME == 'GNN': + self.skip_head = False + if object_relation_config.GLOBAL_INFORMATION: + initial_input_dim = object_relation_config.GLOBAL_INFORMATION.MLP_LAYERS[-1] + if not object_relation_config.GLOBAL_INFORMATION.CONCATENATED: + initial_input_dim += self.model_cfg.SHARED_FC[-1] + else: + initial_input_dim = self.model_cfg.SHARED_FC[-1] + + if object_relation_config.SKIP_CONNECTION: + self.head_input_channels = initial_input_dim + sum(object_relation_config.LAYERS) + else: + if len(object_relation_config.LAYERS) == 0: + self.head_input_channels = self.model_cfg.SHARED_FC[-1] + else: + self.head_input_channels = object_relation_config.LAYERS[-1] + elif object_relation_config.NAME == 'CGNLNet': + # TODO: udpate this + self.head_input_channels = self.model_cfg.SHARED_FC[-1] + 256 + 256 + elif object_relation_config.NAME == 'GNN_BADET': + self.head_input_channels = 256 + self.skip_head = True + elif object_relation_config.NAME == 'GNN_NEW': + self.head_input_channels = 256 + self.skip_head = False + else: + raise NotImplementedError + + self.cls_layers = self.make_fc_layers( + input_channels=self.head_input_channels, output_channels=self.num_class, fc_list=self.model_cfg.CLS_FC + ) + self.reg_layers = self.make_fc_layers( + input_channels=self.head_input_channels, + output_channels=self.box_coder.code_size * self.num_class, + fc_list=self.model_cfg.REG_FC + ) + self.init_weights(weight_init='xavier') + + def init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + def roi_grid_pool(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + rois: (B, num_rois, 7 + C) + point_coords: (num_points, 4) [bs_idx, x, y, z] + point_features: (num_points, C) + point_cls_scores: (N1 + N2 + N3 + ..., 1) + point_part_offset: (N1 + N2 + N3 + ..., 3) + Returns: + + """ + batch_size = batch_dict['batch_size'] + rois = batch_dict['rois'] + point_coords = batch_dict['point_coords'] + point_features = batch_dict['point_features'] + + point_features = point_features * batch_dict['point_cls_scores'].view(-1, 1) + + global_roi_grid_points, local_roi_grid_points = self.get_global_grid_points_of_roi( + rois, grid_size=self.model_cfg.ROI_GRID_POOL.GRID_SIZE + ) # (BxN, 6x6x6, 3) + global_roi_grid_points = global_roi_grid_points.view(batch_size, -1, 3) # (B, Nx6x6x6, 3) + + xyz = point_coords[:, 1:4] + xyz_batch_cnt = xyz.new_zeros(batch_size).int() + batch_idx = point_coords[:, 0] + for k in range(batch_size): + xyz_batch_cnt[k] = (batch_idx == k).sum() + + new_xyz = global_roi_grid_points.view(-1, 3) + new_xyz_batch_cnt = xyz.new_zeros(batch_size).int().fill_(global_roi_grid_points.shape[1]) + pooled_points, pooled_features = self.roi_grid_pool_layer( + xyz=xyz.contiguous(), + xyz_batch_cnt=xyz_batch_cnt, + new_xyz=new_xyz, + new_xyz_batch_cnt=new_xyz_batch_cnt, + features=point_features.contiguous(), + ) # (M1 + M2 ..., C) + + pooled_features = pooled_features.view( + -1, self.model_cfg.ROI_GRID_POOL.GRID_SIZE ** 3, + pooled_features.shape[-1] + ) # (BxN, 6x6x6, C) + return pooled_features + + def get_global_grid_points_of_roi(self, rois, grid_size): + rois = rois.view(-1, rois.shape[-1]) + batch_size_rcnn = rois.shape[0] + + local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size) # (B, 6x6x6, 3) + global_roi_grid_points = common_utils.rotate_points_along_z( + local_roi_grid_points.clone(), rois[:, 6] + ).squeeze(dim=1) + global_center = rois[:, 0:3].clone() + global_roi_grid_points += global_center.unsqueeze(dim=1) + return global_roi_grid_points, local_roi_grid_points + + @staticmethod + def get_dense_grid_points(rois, batch_size_rcnn, grid_size): + faked_features = rois.new_ones((grid_size, grid_size, grid_size)) + dense_idx = faked_features.nonzero() # (N, 3) [x_idx, y_idx, z_idx] + dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float() # (B, 6x6x6, 3) + + local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] + roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \ + - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 6x6x6, 3) + return roi_grid_points + + def forward(self, batch_dict): + """ + :param input_data: input dict + :return: + """ + + targets_dict = self.proposal_layer( + batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] + ) + if self.training: + targets_dict = batch_dict.get('roi_targets_dict', None) + if targets_dict is None: + targets_dict = self.assign_targets(batch_dict) + batch_dict['rois'] = targets_dict['rois'] + batch_dict['roi_labels'] = targets_dict['roi_labels'] + + _, N, _ = batch_dict['rois'].shape + # RoI aware pooling + pooled_features = self.roi_grid_pool(batch_dict) # (BxN, 6x6x6, C) + + grid_size = self.model_cfg.ROI_GRID_POOL.GRID_SIZE + batch_size_rcnn = pooled_features.shape[0] + pooled_features = pooled_features.permute(0, 2, 1).\ + contiguous().view(batch_size_rcnn, -1, grid_size, grid_size, grid_size) # (BxN, C, 6, 6, 6) + + pooled_features = self.shared_fc_layer(pooled_features.view(batch_size_rcnn, -1, 1)) + batch_dict['pooled_features'] = pooled_features.view(-1, N, self.model_cfg.SHARED_FC[-1]) + + self.forward_ret_dict = targets_dict + + + # rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + # rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + # if not self.training: + # batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + # batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + # ) + # batch_dict['batch_cls_preds'] = batch_cls_preds + # batch_dict['batch_box_preds'] = batch_box_preds + # batch_dict['cls_preds_normalized'] = False + # else: + # targets_dict['rcnn_cls'] = rcnn_cls + # targets_dict['rcnn_reg'] = rcnn_reg + + # self.forward_ret_dict = targets_dict + + return batch_dict + + def final_predictions(self, batch_dict): + if self.skip_head: + rcnn_cls = batch_dict['rcnn_cls'] + rcnn_reg = batch_dict['rcnn_reg'] + else: + shared_features = batch_dict['related_features'] + shared_features = shared_features.view(-1, self.head_input_channels, 1) + rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + if not self.training: + batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + ) + batch_dict['batch_cls_preds'] = batch_cls_preds + batch_dict['batch_box_preds'] = batch_box_preds + batch_dict['cls_preds_normalized'] = False + else: + self.forward_ret_dict['rcnn_cls'] = rcnn_cls + self.forward_ret_dict['rcnn_reg'] = rcnn_reg + + return batch_dict + diff --git a/pcdet/models/roi_heads/roi_head.py b/pcdet/models/roi_heads/roi_head.py new file mode 100644 index 000000000..cd583798f --- /dev/null +++ b/pcdet/models/roi_heads/roi_head.py @@ -0,0 +1,172 @@ +import torch +import torch.nn as nn +from ..backbones_3d.pfe.voxel_set_abstraction import bilinear_interpolate_torch +from ...utils.box_torch_ops import center_to_corner_box2d +from .roi_head_template import RoIHeadTemplate + +class RoIHead(RoIHeadTemplate): + def __init__(self, model_cfg, voxel_size, point_cloud_range, num_class=1, **kwargs): + # backbone_channels, point_cloud_range + super().__init__(num_class=num_class, model_cfg=model_cfg) + + out_stride = model_cfg.FEATURE_MAP_STRIDE + pc_range_start = point_cloud_range[:2] + self.add_box_param = model_cfg.ADD_BOX_PARAM + self.num_point = model_cfg.NUM_POINTS + self.bev_feature_extractor = BEVFeatureExtractor(pc_range_start, voxel_size, out_stride, self.num_point) + + fc_input_dim = (model_cfg.BEV_FEATURE_DIM * self.num_point) + (self.box_coder.code_size + 1 if self.add_box_param else 0) + shared_fc_list = [] + for k in range(0, self.model_cfg.SHARED_FC.__len__()): + shared_fc_list.extend([ + nn.Conv1d(fc_input_dim if k==0 else self.model_cfg.SHARED_FC[k-1], self.model_cfg.SHARED_FC[k], kernel_size=1, bias=False), + nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), + nn.ReLU() + ]) + pre_channel = self.model_cfg.SHARED_FC[k] + + if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + + self.shared_fc_layer = nn.Sequential(*shared_fc_list) + + self.cls_layers = self.make_fc_layers( + input_channels=pre_channel, output_channels=self.num_class, fc_list=self.model_cfg.CLS_FC + ) + self.reg_layers = self.make_fc_layers( + input_channels=pre_channel, + output_channels=self.box_coder.code_size, + fc_list=self.model_cfg.REG_FC + ) + self.init_weights(weight_init='xavier') + + def init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, batch_dict): + if self.training: + targets_dict = batch_dict.get('roi_targets_dict', None) + if targets_dict is None: + targets_dict = self.assign_targets(batch_dict) + batch_dict['rois'] = targets_dict['rois'] + batch_dict['roi_labels'] = targets_dict['roi_labels'] + batch_dict['roi_scores'] = targets_dict['roi_scores'] + + B,N,_ = batch_dict['rois'].shape + batch_centers = self.get_box_center(batch_dict['rois']) + bev_features = self.bev_feature_extractor(batch_dict['spatial_features_2d'], batch_centers) + first_stage_pred = batch_dict['rois'] + + # target_dict = self.reorder_first_stage_pred_and_feature(batch_dict, first_stage_pred, bev_features) + + # RoI aware pooling + # if self.add_box_param: + if self.add_box_param: + pooled_features = torch.cat([bev_features, batch_dict['rois'], batch_dict['roi_scores'].unsqueeze(-1)], dim=-1) + else: + pooled_features = bev_features + + pooled_features = pooled_features.reshape(B*N,1,-1).contiguous() + batch_size_rcnn = pooled_features.shape[0] + pooled_features = pooled_features.permute(0, 2, 1).contiguous() # (BxN, C, 1) + + shared_features = self.shared_fc_layer(pooled_features.view(batch_size_rcnn, -1, 1)) + rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + if not self.training: + batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + ) + batch_dict['batch_cls_preds'] = batch_cls_preds + batch_dict['batch_box_preds'] = batch_box_preds + batch_dict['cls_preds_normalized'] = False + else: + targets_dict['rcnn_cls'] = rcnn_cls + targets_dict['rcnn_reg'] = rcnn_reg + + self.forward_ret_dict = targets_dict + + return batch_dict + + + def get_box_center(self, boxes): + # box [List] + centers = [] + for box in boxes: + if self.num_point == 1: + centers.append(box[:, :3]) + + elif self.num_point == 5: + center2d = box[:, :2] + height = box[:, 2:3] + dim2d = box[:, 3:5] + rotation_y = box[:, -1] + + corners = center_to_corner_box2d(center2d, dim2d, rotation_y) + + front_middle = torch.cat([(corners[:, 0] + corners[:, 1])/2, height], dim=-1) + back_middle = torch.cat([(corners[:, 2] + corners[:, 3])/2, height], dim=-1) + left_middle = torch.cat([(corners[:, 0] + corners[:, 3])/2, height], dim=-1) + right_middle = torch.cat([(corners[:, 1] + corners[:, 2])/2, height], dim=-1) + + points = torch.cat([box[:,:3], front_middle, back_middle, left_middle, right_middle], dim=0) + + centers.append(points) + else: + raise NotImplementedError() + + return centers + + + + +class BEVFeatureExtractor(nn.Module): + def __init__(self, pc_start, voxel_size, out_stride, num_point): + super().__init__() + self.pc_start = pc_start + self.voxel_size = voxel_size + self.out_stride = out_stride + self.num_point = num_point + + def absl_to_relative(self, absolute): + a1 = (absolute[..., 0] - self.pc_start[0]) / self.voxel_size[0] / self.out_stride + a2 = (absolute[..., 1] - self.pc_start[1]) / self.voxel_size[1] / self.out_stride + + return a1, a2 + + def forward(self, bev_features, batch_centers): + + batch_size = len(bev_features) + ret_maps = [] + + for batch_idx in range(batch_size): + xs, ys = self.absl_to_relative(batch_centers[batch_idx]) + + # N x C + feature_map = bilinear_interpolate_torch(bev_features[batch_idx], + xs, ys) + + if self.num_point > 1: + section_size = len(feature_map) // self.num_point + feature_map = torch.cat([feature_map[i*section_size: (i+1)*section_size] for i in range(self.num_point)], dim=1) + + ret_maps.append(feature_map) + + return torch.stack(ret_maps) \ No newline at end of file diff --git a/pcdet/models/roi_heads/voxelrcnn_relation_head.py b/pcdet/models/roi_heads/voxelrcnn_relation_head.py new file mode 100644 index 000000000..5e76ee73d --- /dev/null +++ b/pcdet/models/roi_heads/voxelrcnn_relation_head.py @@ -0,0 +1,304 @@ +import torch +import torch.nn as nn +from ...ops.pointnet2.pointnet2_stack import voxel_pool_modules as voxelpool_stack_modules +from ...utils import common_utils +from .roi_head_template import RoIHeadTemplate + + +class VoxelRCNNRelationHead(RoIHeadTemplate): + def __init__(self, backbone_channels, model_cfg, point_cloud_range, voxel_size, num_class=1, object_relation_config=None, **kwargs): + super().__init__(num_class=num_class, model_cfg=model_cfg) + self.model_cfg = model_cfg + self.pool_cfg = model_cfg.ROI_GRID_POOL + LAYER_cfg = self.pool_cfg.POOL_LAYERS + self.point_cloud_range = point_cloud_range + self.voxel_size = voxel_size + + c_out = 0 + self.roi_grid_pool_layers = nn.ModuleList() + for src_name in self.pool_cfg.FEATURES_SOURCE: + mlps = LAYER_cfg[src_name].MLPS + for k in range(len(mlps)): + mlps[k] = [backbone_channels[src_name]] + mlps[k] + pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG( + query_ranges=LAYER_cfg[src_name].QUERY_RANGES, + nsamples=LAYER_cfg[src_name].NSAMPLE, + radii=LAYER_cfg[src_name].POOL_RADIUS, + mlps=mlps, + pool_method=LAYER_cfg[src_name].POOL_METHOD, + ) + + self.roi_grid_pool_layers.append(pool_layer) + + c_out += sum([x[-1] for x in mlps]) + + + GRID_SIZE = self.model_cfg.ROI_GRID_POOL.GRID_SIZE + # c_out = sum([x[-1] for x in mlps]) + pre_channel = GRID_SIZE * GRID_SIZE * GRID_SIZE * c_out + + shared_fc_list = [] + for k in range(0, self.model_cfg.SHARED_FC.__len__()): + shared_fc_list.extend([ + nn.Linear(pre_channel, self.model_cfg.SHARED_FC[k], bias=False), + nn.BatchNorm1d(self.model_cfg.SHARED_FC[k]), + nn.ReLU(inplace=True) + ]) + pre_channel = self.model_cfg.SHARED_FC[k] + + if k != self.model_cfg.SHARED_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + shared_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + self.shared_fc_layer = nn.Sequential(*shared_fc_list) + + if object_relation_config.NAME == 'GNN': + if object_relation_config.GLOBAL_INFORMATION: + initial_input_dim = object_relation_config.GLOBAL_INFORMATION.MLP_LAYERS[-1] + if not object_relation_config.GLOBAL_INFORMATION.CONCATENATED: + initial_input_dim += self.model_cfg.SHARED_FC[-1] + else: + initial_input_dim = self.model_cfg.SHARED_FC[-1] + + if object_relation_config.SKIP_CONNECTION: + self.head_input_channels = initial_input_dim + sum(object_relation_config.LAYERS) + else: + self.head_input_channels = object_relation_config.LAYERS[-1] + elif object_relation_config.NAME == 'CGNLNet': + # TODO: udpate this + self.head_input_channels = self.model_cfg.SHARED_FC[-1] + 256 + 256 + else: + raise NotImplementedError + + cls_fc_list = [] + for k in range(0, self.model_cfg.CLS_FC.__len__()): + cls_fc_list.extend([ + nn.Linear(self.head_input_channels if k==0 else pre_channel, self.model_cfg.CLS_FC[k], bias=False), + nn.BatchNorm1d(self.model_cfg.CLS_FC[k]), + nn.ReLU() + ]) + pre_channel = self.model_cfg.CLS_FC[k] + + if k != self.model_cfg.CLS_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + cls_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + self.cls_fc_layers = nn.Sequential(*cls_fc_list) + self.cls_pred_layer = nn.Linear(pre_channel, self.num_class, bias=True) + + reg_fc_list = [] + for k in range(0, self.model_cfg.REG_FC.__len__()): + reg_fc_list.extend([ + nn.Linear(self.head_input_channels if k==0 else pre_channel, self.model_cfg.REG_FC[k], bias=False), + nn.BatchNorm1d(self.model_cfg.REG_FC[k]), + nn.ReLU() + ]) + pre_channel = self.model_cfg.REG_FC[k] + + if k != self.model_cfg.REG_FC.__len__() - 1 and self.model_cfg.DP_RATIO > 0: + reg_fc_list.append(nn.Dropout(self.model_cfg.DP_RATIO)) + self.reg_fc_layers = nn.Sequential(*reg_fc_list) + self.reg_pred_layer = nn.Linear(pre_channel, self.box_coder.code_size * self.num_class, bias=True) + + self.init_weights() + + def init_weights(self): + init_func = nn.init.xavier_normal_ + for module_list in [self.shared_fc_layer, self.cls_fc_layers, self.reg_fc_layers]: + for m in module_list.modules(): + if isinstance(m, nn.Linear): + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + nn.init.normal_(self.cls_pred_layer.weight, 0, 0.01) + nn.init.constant_(self.cls_pred_layer.bias, 0) + nn.init.normal_(self.reg_pred_layer.weight, mean=0, std=0.001) + nn.init.constant_(self.reg_pred_layer.bias, 0) + + # def _init_weights(self): + # init_func = nn.init.xavier_normal_ + # for m in self.modules(): + # if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear): + # init_func(m.weight) + # if m.bias is not None: + # nn.init.constant_(m.bias, 0) + # nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001) + + def roi_grid_pool(self, batch_dict): + """ + Args: + batch_dict: + batch_size: + rois: (B, num_rois, 7 + C) + point_coords: (num_points, 4) [bs_idx, x, y, z] + point_features: (num_points, C) + point_cls_scores: (N1 + N2 + N3 + ..., 1) + point_part_offset: (N1 + N2 + N3 + ..., 3) + Returns: + + """ + rois = batch_dict['rois'] + batch_size = batch_dict['batch_size'] + with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) + + roi_grid_xyz, _ = self.get_global_grid_points_of_roi( + rois, grid_size=self.pool_cfg.GRID_SIZE + ) # (BxN, 6x6x6, 3) + # roi_grid_xyz: (B, Nx6x6x6, 3) + roi_grid_xyz = roi_grid_xyz.view(batch_size, -1, 3) + + # compute the voxel coordinates of grid points + roi_grid_coords_x = (roi_grid_xyz[:, :, 0:1] - self.point_cloud_range[0]) // self.voxel_size[0] + roi_grid_coords_y = (roi_grid_xyz[:, :, 1:2] - self.point_cloud_range[1]) // self.voxel_size[1] + roi_grid_coords_z = (roi_grid_xyz[:, :, 2:3] - self.point_cloud_range[2]) // self.voxel_size[2] + # roi_grid_coords: (B, Nx6x6x6, 3) + roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) + + batch_idx = rois.new_zeros(batch_size, roi_grid_coords.shape[1], 1) + for bs_idx in range(batch_size): + batch_idx[bs_idx, :, 0] = bs_idx + # roi_grid_coords: (B, Nx6x6x6, 4) + # roi_grid_coords = torch.cat([batch_idx, roi_grid_coords], dim=-1) + # roi_grid_coords = roi_grid_coords.int() + roi_grid_batch_cnt = rois.new_zeros(batch_size).int().fill_(roi_grid_coords.shape[1]) + + pooled_features_list = [] + for k, src_name in enumerate(self.pool_cfg.FEATURES_SOURCE): + pool_layer = self.roi_grid_pool_layers[k] + cur_stride = batch_dict['multi_scale_3d_strides'][src_name] + cur_sp_tensors = batch_dict['multi_scale_3d_features'][src_name] + + if with_vf_transform: + cur_sp_tensors = batch_dict['multi_scale_3d_features_post'][src_name] + else: + cur_sp_tensors = batch_dict['multi_scale_3d_features'][src_name] + + # compute voxel center xyz and batch_cnt + cur_coords = cur_sp_tensors.indices + cur_voxel_xyz = common_utils.get_voxel_centers( + cur_coords[:, 1:4], + downsample_times=cur_stride, + voxel_size=self.voxel_size, + point_cloud_range=self.point_cloud_range + ) + cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int() + for bs_idx in range(batch_size): + cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum() + # get voxel2point tensor + v2p_ind_tensor = common_utils.generate_voxel2pinds(cur_sp_tensors) + # compute the grid coordinates in this scale, in [batch_idx, x y z] order + cur_roi_grid_coords = roi_grid_coords // cur_stride + cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) + cur_roi_grid_coords = cur_roi_grid_coords.int() + # voxel neighbor aggregation + pooled_features = pool_layer( + xyz=cur_voxel_xyz.contiguous(), + xyz_batch_cnt=cur_voxel_xyz_batch_cnt, + new_xyz=roi_grid_xyz.contiguous().view(-1, 3), + new_xyz_batch_cnt=roi_grid_batch_cnt, + new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), + features=cur_sp_tensors.features.contiguous(), + voxel2point_indices=v2p_ind_tensor + ) + + pooled_features = pooled_features.view( + -1, self.pool_cfg.GRID_SIZE ** 3, + pooled_features.shape[-1] + ) # (BxN, 6x6x6, C) + pooled_features_list.append(pooled_features) + + ms_pooled_features = torch.cat(pooled_features_list, dim=-1) + + return ms_pooled_features + + + def get_global_grid_points_of_roi(self, rois, grid_size): + rois = rois.view(-1, rois.shape[-1]) + batch_size_rcnn = rois.shape[0] + + local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size) # (B, 6x6x6, 3) + global_roi_grid_points = common_utils.rotate_points_along_z( + local_roi_grid_points.clone(), rois[:, 6] + ).squeeze(dim=1) + global_center = rois[:, 0:3].clone() + global_roi_grid_points += global_center.unsqueeze(dim=1) + return global_roi_grid_points, local_roi_grid_points + + @staticmethod + def get_dense_grid_points(rois, batch_size_rcnn, grid_size): + faked_features = rois.new_ones((grid_size, grid_size, grid_size)) + dense_idx = faked_features.nonzero() # (N, 3) [x_idx, y_idx, z_idx] + dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float() # (B, 6x6x6, 3) + + local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] + roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \ + - (local_roi_size.unsqueeze(dim=1) / 2) # (B, 6x6x6, 3) + return roi_grid_points + + def forward(self, batch_dict): + """ + :param input_data: input dict + :return: + """ + + targets_dict = self.proposal_layer( + batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST'] + ) + if self.training: + targets_dict = self.assign_targets(batch_dict) + batch_dict['rois'] = targets_dict['rois'] + batch_dict['roi_labels'] = targets_dict['roi_labels'] + + B, N, C = batch_dict['rois'].shape + # RoI aware pooling + pooled_features = self.roi_grid_pool(batch_dict) # (BxN, 6x6x6, C) + + # Box Refinement + pooled_features = pooled_features.view(pooled_features.size(0), -1) + pooled_features = self.shared_fc_layer(pooled_features) + + batch_dict['pooled_features'] = pooled_features.view(B, N, self.model_cfg.SHARED_FC[-1]) + + self.forward_ret_dict = targets_dict + + + # rcnn_cls = self.cls_pred_layer(self.cls_fc_layers(shared_features)) + # rcnn_reg = self.reg_pred_layer(self.reg_fc_layers(shared_features)) + + # grid_size = self.model_cfg.ROI_GRID_POOL.GRID_SIZE + # batch_size_rcnn = pooled_features.shape[0] + # pooled_features = pooled_features.permute(0, 2, 1).\ + # contiguous().view(batch_size_rcnn, -1, grid_size, grid_size, grid_size) # (BxN, C, 6, 6, 6) + + # shared_features = self.shared_fc_layer(pooled_features.view(batch_size_rcnn, -1, 1)) + # rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + # rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + + # if not self.training: + # batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + # batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + # ) + # batch_dict['batch_cls_preds'] = batch_cls_preds + # batch_dict['batch_box_preds'] = batch_box_preds + # batch_dict['cls_preds_normalized'] = False + # else: + # targets_dict['rcnn_cls'] = rcnn_cls + # targets_dict['rcnn_reg'] = rcnn_reg + + # self.forward_ret_dict = targets_dict + + return batch_dict + + def final_predictions(self, batch_dict): + shared_features = batch_dict['related_features'] + rcnn_cls = self.cls_pred_layer(self.cls_fc_layers(shared_features)) + rcnn_reg = self.reg_pred_layer(self.reg_fc_layers(shared_features)) + if not self.training: + batch_cls_preds, batch_box_preds = self.generate_predicted_boxes( + batch_size=batch_dict['batch_size'], rois=batch_dict['rois'], cls_preds=rcnn_cls, box_preds=rcnn_reg + ) + batch_dict['batch_cls_preds'] = batch_cls_preds + batch_dict['batch_box_preds'] = batch_box_preds + batch_dict['cls_preds_normalized'] = False + else: + self.forward_ret_dict['rcnn_cls'] = rcnn_cls + self.forward_ret_dict['rcnn_reg'] = rcnn_reg + + return batch_dict \ No newline at end of file diff --git a/pcdet/utils/box_torch_ops.py b/pcdet/utils/box_torch_ops.py new file mode 100644 index 000000000..18a3b12ab --- /dev/null +++ b/pcdet/utils/box_torch_ops.py @@ -0,0 +1,277 @@ +import math +from functools import reduce + +import numpy as np +import torch +from torch import stack as tstack +try: + from det3d.ops.iou3d_nms import iou3d_nms_cuda, iou3d_nms_utils +except: + print("iou3d cuda not built. You don't need this if you use circle_nms. Otherwise, refer to the advanced installation part to build this cuda extension") + +def torch_to_np_dtype(ttype): + type_map = { + torch.float16: np.dtype(np.float16), + torch.float32: np.dtype(np.float32), + torch.float16: np.dtype(np.float64), + torch.int32: np.dtype(np.int32), + torch.int64: np.dtype(np.int64), + torch.uint8: np.dtype(np.uint8), + } + return type_map[ttype] + + +def corners_nd(dims, origin=0.5): + """generate relative box corners based on length per dim and + origin point. + + Args: + dims (float array, shape=[N, ndim]): array of length per dim + origin (list or array or float): origin point relate to smallest point. + dtype (output dtype, optional): Defaults to np.float32 + + Returns: + float array, shape=[N, 2 ** ndim, ndim]: returned corners. + point layout example: (2d) x0y0, x0y1, x1y0, x1y1; + (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1 + where x0 < x1, y0 < y1, z0 < z1 + """ + ndim = int(dims.shape[1]) + dtype = torch_to_np_dtype(dims.dtype) + if isinstance(origin, float): + origin = [origin] * ndim + corners_norm = np.stack( + np.unravel_index(np.arange(2 ** ndim), [2] * ndim), axis=1 + ).astype(dtype) + # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1 + # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1 + # so need to convert to a format which is convenient to do other computing. + # for 2d boxes, format is clockwise start from minimum point + # for 3d boxes, please draw them by your hand. + if ndim == 2: + # generate clockwise box corners + corners_norm = corners_norm[[0, 1, 3, 2]] + elif ndim == 3: + corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]] + corners_norm = corners_norm - np.array(origin, dtype=dtype) + corners_norm = torch.from_numpy(corners_norm).type_as(dims) + corners = dims.view(-1, 1, ndim) * corners_norm.view(1, 2 ** ndim, ndim) + return corners + + +def corners_2d(dims, origin=0.5): + """generate relative 2d box corners based on length per dim and + origin point. + + Args: + dims (float array, shape=[N, 2]): array of length per dim + origin (list or array or float): origin point relate to smallest point. + dtype (output dtype, optional): Defaults to np.float32 + + Returns: + float array, shape=[N, 4, 2]: returned corners. + point layout: x0y0, x0y1, x1y1, x1y0 + """ + return corners_nd(dims, origin) + + +def corner_to_standup_nd(boxes_corner): + ndim = boxes_corner.shape[2] + standup_boxes = [] + for i in range(ndim): + standup_boxes.append(torch.min(boxes_corner[:, :, i], dim=1)[0]) + for i in range(ndim): + standup_boxes.append(torch.max(boxes_corner[:, :, i], dim=1)[0]) + return torch.stack(standup_boxes, dim=1) + + +def rotation_3d_in_axis(points, angles, axis=0): + # points: [N, point_size, 3] + # angles: [N] + rot_sin = torch.sin(angles) + rot_cos = torch.cos(angles) + ones = torch.ones_like(rot_cos) + zeros = torch.zeros_like(rot_cos) + if axis == 1: + rot_mat_T = tstack( + [ + tstack([rot_cos, zeros, -rot_sin]), + tstack([zeros, ones, zeros]), + tstack([rot_sin, zeros, rot_cos]), + ] + ) + elif axis == 2 or axis == -1: + rot_mat_T = tstack( + [ + tstack([rot_cos, -rot_sin, zeros]), + tstack([rot_sin, rot_cos, zeros]), + tstack([zeros, zeros, ones]), + ] + ) + elif axis == 0: + rot_mat_T = tstack( + [ + tstack([zeros, rot_cos, -rot_sin]), + tstack([zeros, rot_sin, rot_cos]), + tstack([ones, zeros, zeros]), + ] + ) + else: + raise ValueError("axis should in range") + # print(points.shape, rot_mat_T.shape) + return torch.einsum("aij,jka->aik", points, rot_mat_T) + +def rotate_points_along_z(points, angle): + """ + Args: + points: (B, N, 3 + C) + angle: (B), angle along z-axis, angle increases x ==> y + Returns: + """ + cosa = torch.cos(angle) + sina = torch.sin(angle) + zeros = angle.new_zeros(points.shape[0]) + ones = angle.new_ones(points.shape[0]) + rot_matrix = torch.stack(( + cosa, -sina, zeros, + sina, cosa, zeros, + zeros, zeros, ones + ), dim=1).view(-1, 3, 3).float() + points_rot = torch.matmul(points[:, :, 0:3], rot_matrix) + points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1) + return points_rot + + +def rotation_2d(points, angles): + """rotation 2d points based on origin point clockwise when angle positive. + + Args: + points (float array, shape=[N, point_size, 2]): points to be rotated. + angles (float array, shape=[N]): rotation angle. + + Returns: + float array: same shape as points + """ + rot_sin = torch.sin(angles) + rot_cos = torch.cos(angles) + rot_mat_T = torch.stack([tstack([rot_cos, -rot_sin]), tstack([rot_sin, rot_cos])]) + return torch.einsum("aij,jka->aik", (points, rot_mat_T)) + + +def center_to_corner_box3d(centers, dims, angles, origin=(0.5, 0.5, 0.5), axis=1): + """convert kitti locations, dimensions and angles to corners + + Args: + centers (float array, shape=[N, 3]): locations in kitti label file. + dims (float array, shape=[N, 3]): dimensions in kitti label file. + angles (float array, shape=[N]): rotation_y in kitti label file. + origin (list or array or float): origin point relate to smallest point. + use [0.5, 1.0, 0.5] in camera and [0.5, 0.5, 0] in lidar. + axis (int): rotation axis. 1 for camera and 2 for lidar. + Returns: + [type]: [description] + """ + # 'length' in kitti format is in x axis. + # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar) + # center in kitti format is [0.5, 1.0, 0.5] in xyz. + corners = corners_nd(dims, origin=origin) + # corners: [N, 8, 3] + corners = rotation_3d_in_axis(corners, angles, axis=axis) + corners += centers.view(-1, 1, 3) + return corners + + +def center_to_corner_box2d(centers, dims, angles=None, origin=0.5): + """convert kitti locations, dimensions and angles to corners + + Args: + centers (float array, shape=[N, 2]): locations in kitti label file. + dims (float array, shape=[N, 2]): dimensions in kitti label file. + angles (float array, shape=[N]): rotation_y in kitti label file. + + Returns: + [type]: [description] + """ + # 'length' in kitti format is in x axis. + # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar) + # center in kitti format is [0.5, 1.0, 0.5] in xyz. + corners = corners_nd(dims, origin=origin) + # corners: [N, 4, 2] + if angles is not None: + corners = rotation_2d(corners, angles) + corners += centers.view(-1, 1, 2) + return corners + + +def project_to_image(points_3d, proj_mat): + points_num = list(points_3d.shape)[:-1] + points_shape = np.concatenate([points_num, [1]], axis=0).tolist() + points_4 = torch.cat( + [points_3d, torch.ones(*points_shape).type_as(points_3d)], dim=-1 + ) + # point_2d = points_4 @ tf.transpose(proj_mat, [1, 0]) + point_2d = torch.matmul(points_4, proj_mat.t()) + point_2d_res = point_2d[..., :2] / point_2d[..., 2:3] + return point_2d_res + + +def camera_to_lidar(points, r_rect, velo2cam): + num_points = points.shape[0] + points = torch.cat([points, torch.ones(num_points, 1).type_as(points)], dim=-1) + lidar_points = points @ torch.inverse((r_rect @ velo2cam).t()) + return lidar_points[..., :3] + + +def lidar_to_camera(points, r_rect, velo2cam): + num_points = points.shape[0] + points = torch.cat([points, torch.ones(num_points, 1).type_as(points)], dim=-1) + camera_points = points @ (r_rect @ velo2cam).t() + return camera_points[..., :3] + + +def box_camera_to_lidar(data, r_rect, velo2cam): + xyz = data[..., 0:3] + l, h, w = data[..., 3:4], data[..., 4:5], data[..., 5:6] + r = data[..., 6:7] + xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam) + return torch.cat([xyz_lidar, w, l, h, r], dim=-1) + + +def box_lidar_to_camera(data, r_rect, velo2cam): + xyz_lidar = data[..., 0:3] + w, l, h = data[..., 3:4], data[..., 4:5], data[..., 5:6] + r = data[..., 6:7] + xyz = lidar_to_camera(xyz_lidar, r_rect, velo2cam) + return torch.cat([xyz, l, h, w, r], dim=-1) + + +def rotate_nms_pcdet(boxes, scores, thresh, pre_maxsize=None, post_max_size=None): + """ + :param boxes: (N, 5) [x, y, z, l, w, h, theta] + :param scores: (N) + :param thresh: + :return: + """ + # transform back to pcdet's coordinate + boxes = boxes[:, [0, 1, 2, 4, 3, 5, -1]] + boxes[:, -1] = -boxes[:, -1] - np.pi /2 + + order = scores.sort(0, descending=True)[1] + if pre_maxsize is not None: + order = order[:pre_maxsize] + + boxes = boxes[order].contiguous() + + keep = torch.LongTensor(boxes.size(0)) + + if len(boxes) == 0: + num_out =0 + else: + num_out = iou3d_nms_cuda.nms_gpu(boxes, keep, thresh) + + selected = order[keep[:num_out].cuda()].contiguous() + + if post_max_size is not None: + selected = selected[:post_max_size] + + return selected \ No newline at end of file diff --git a/pcdet/utils/common_utils.py b/pcdet/utils/common_utils.py index af70728db..4dab6b479 100644 --- a/pcdet/utils/common_utils.py +++ b/pcdet/utils/common_utils.py @@ -4,7 +4,7 @@ import random import shutil import subprocess -import SharedArray +# import SharedArray import numpy as np import torch @@ -270,11 +270,11 @@ def generate_voxel2pinds(sparse_tensor): return v2pinds_tensor -def sa_create(name, var): - x = SharedArray.create(name, var.shape, dtype=var.dtype) - x[...] = var[...] - x.flags.writeable = False - return x +# def sa_create(name, var): +# x = SharedArray.create(name, var.shape, dtype=var.dtype) +# x[...] = var[...] +# x.flags.writeable = False +# return x class AverageMeter(object): diff --git a/resources/With_and_without_context.jpeg b/resources/With_and_without_context.jpeg new file mode 100644 index 000000000..8b6b4588d Binary files /dev/null and b/resources/With_and_without_context.jpeg differ diff --git a/resources/architecture.jpeg b/resources/architecture.jpeg new file mode 100644 index 000000000..3b6794291 Binary files /dev/null and b/resources/architecture.jpeg differ diff --git a/resources/gnn_architecture.png b/resources/gnn_architecture.png new file mode 100644 index 000000000..fd9699595 Binary files /dev/null and b/resources/gnn_architecture.png differ diff --git a/resources/object_relation.png b/resources/object_relation.png new file mode 100644 index 000000000..9571fe7e3 Binary files /dev/null and b/resources/object_relation.png differ diff --git a/resources/occlusion.png b/resources/occlusion.png new file mode 100644 index 000000000..0c43e96dd Binary files /dev/null and b/resources/occlusion.png differ diff --git a/resources/pattern.png b/resources/pattern.png new file mode 100644 index 000000000..89d3b23fe Binary files /dev/null and b/resources/pattern.png differ diff --git a/resources/proposal_consensus.png b/resources/proposal_consensus.png new file mode 100644 index 000000000..e1092dd74 Binary files /dev/null and b/resources/proposal_consensus.png differ diff --git a/resources/radius.png b/resources/radius.png new file mode 100644 index 000000000..2068cdb94 Binary files /dev/null and b/resources/radius.png differ diff --git a/resources/receptive_field.jpeg b/resources/receptive_field.jpeg new file mode 100644 index 000000000..19d6ab9c9 Binary files /dev/null and b/resources/receptive_field.jpeg differ diff --git a/resources/receptive_field_with_context.jpeg b/resources/receptive_field_with_context.jpeg new file mode 100644 index 000000000..151ec5672 Binary files /dev/null and b/resources/receptive_field_with_context.jpeg differ diff --git a/resources/relation_side.png b/resources/relation_side.png new file mode 100644 index 000000000..c055298e0 Binary files /dev/null and b/resources/relation_side.png differ diff --git a/resources/results.png b/resources/results.png new file mode 100644 index 000000000..5180dc4b0 Binary files /dev/null and b/resources/results.png differ diff --git a/resources/seen_in_context.jpeg b/resources/seen_in_context.jpeg new file mode 100644 index 000000000..f0ea52785 Binary files /dev/null and b/resources/seen_in_context.jpeg differ diff --git a/resources/side.png b/resources/side.png new file mode 100644 index 000000000..2660a5282 Binary files /dev/null and b/resources/side.png differ diff --git a/resources/with_context.jpeg b/resources/with_context.jpeg new file mode 100644 index 000000000..3cdc018ab Binary files /dev/null and b/resources/with_context.jpeg differ diff --git a/resources/without_context.jpeg b/resources/without_context.jpeg new file mode 100644 index 000000000..6f5e06a0a Binary files /dev/null and b/resources/without_context.jpeg differ diff --git a/tools/cfgs/dataset_configs/waymo_dataset.yaml b/tools/cfgs/dataset_configs/waymo_dataset.yaml index ba836e811..98f48e039 100644 --- a/tools/cfgs/dataset_configs/waymo_dataset.yaml +++ b/tools/cfgs/dataset_configs/waymo_dataset.yaml @@ -23,30 +23,30 @@ SHARED_MEMORY_FILE_LIMIT: 35000 # set it based on the size of your shared memor DATA_AUGMENTOR: DISABLE_AUG_LIST: ['placeholder'] AUG_CONFIG_LIST: - - NAME: gt_sampling - USE_ROAD_PLANE: False - DB_INFO_PATH: - - waymo_processed_data_v0_5_0_waymo_dbinfos_train_sampled_1.pkl - - USE_SHARED_MEMORY: False # set it to True to speed up (it costs about 15GB shared memory) - DB_DATA_PATH: - - waymo_processed_data_v0_5_0_gt_database_train_sampled_1_global.npy - - BACKUP_DB_INFO: - # if the above DB_INFO cannot be found, will use this backup one - DB_INFO_PATH: waymo_processed_data_v0_5_0_waymo_dbinfos_train_sampled_1_multiframe_-4_to_0.pkl - DB_DATA_PATH: waymo_processed_data_v0_5_0_gt_database_train_sampled_1_multiframe_-4_to_0_global.npy - NUM_POINT_FEATURES: 6 - - PREPARE: { - filter_by_min_points: ['Vehicle:5', 'Pedestrian:5', 'Cyclist:5'], - filter_by_difficulty: [-1], - } - - SAMPLE_GROUPS: ['Vehicle:15', 'Pedestrian:10', 'Cyclist:10'] - NUM_POINT_FEATURES: 5 - REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] - LIMIT_WHOLE_SCENE: True + # - NAME: gt_sampling + # USE_ROAD_PLANE: False + # DB_INFO_PATH: + # - waymo_processed_data_v0_5_0_waymo_dbinfos_train_sampled_1.pkl + + # USE_SHARED_MEMORY: False # set it to True to speed up (it costs about 15GB shared memory) + # DB_DATA_PATH: + # - waymo_processed_data_v0_5_0_gt_database_train_sampled_1_global.npy + + # BACKUP_DB_INFO: + # # if the above DB_INFO cannot be found, will use this backup one + # DB_INFO_PATH: waymo_processed_data_v0_5_0_waymo_dbinfos_train_sampled_1_multiframe_-4_to_0.pkl + # DB_DATA_PATH: waymo_processed_data_v0_5_0_gt_database_train_sampled_1_multiframe_-4_to_0_global.npy + # NUM_POINT_FEATURES: 6 + + # PREPARE: { + # filter_by_min_points: ['Vehicle:5', 'Pedestrian:5', 'Cyclist:5'], + # filter_by_difficulty: [-1], + # } + + # SAMPLE_GROUPS: ['Vehicle:15', 'Pedestrian:10', 'Cyclist:10'] + # NUM_POINT_FEATURES: 5 + # REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + # LIMIT_WHOLE_SCENE: True - NAME: random_world_flip ALONG_AXIS_LIST: ['x', 'y'] diff --git a/tools/cfgs/kitti_models/PartA2_car_class_only.yaml b/tools/cfgs/kitti_models/PartA2_car_class_only.yaml new file mode 100644 index 000000000..d9d19f749 --- /dev/null +++ b/tools/cfgs/kitti_models/PartA2_car_class_only.yaml @@ -0,0 +1,171 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + + +MODEL: + NAME: PartA2Net + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: UNetV2 + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POINT_HEAD: + NAME: PointIntraPartOffsetHead + CLS_FC: [] + PART_FC: [] + CLASS_AGNOSTIC: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + 'point_part_weight': 1.0 + } + + ROI_HEAD: + NAME: PartA2FCHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + SEG_MASK_SCORE_THRESH: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_AWARE_POOL: + POOL_SIZE: 12 + NUM_FEATURES: 128 + MAX_POINTS_PER_VOXEL: 128 + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.65 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 4 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/PartA2_relation_car_class_only.yaml b/tools/cfgs/kitti_models/PartA2_relation_car_class_only.yaml new file mode 100644 index 000000000..1c99aacf6 --- /dev/null +++ b/tools/cfgs/kitti_models/PartA2_relation_car_class_only.yaml @@ -0,0 +1,193 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + + +MODEL: + NAME: PartA2NetRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: UNetV2 + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POINT_HEAD: + NAME: PointIntraPartOffsetHead + CLS_FC: [] + PART_FC: [] + CLASS_AGNOSTIC: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + 'point_part_weight': 1.0 + } + + ROI_HEAD: + NAME: PartA2RelationFCHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + SEG_MASK_SCORE_THRESH: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_AWARE_POOL: + POOL_SIZE: 12 + NUM_FEATURES: 128 + MAX_POINTS_PER_VOXEL: 128 + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.65 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + SKIP_CONNECTION: True + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + IN_BETWEEN_MLP: False + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 4 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/centerpoint.yaml b/tools/cfgs/kitti_models/centerpoint.yaml new file mode 100644 index 000000000..a20ef9000 --- /dev/null +++ b/tools/cfgs/kitti_models/centerpoint.yaml @@ -0,0 +1,110 @@ +# Taken from here: https://github.com/open-mmlab/OpenPCDet/issues/750 +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + + +MODEL: + NAME: CenterPoint + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelResBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5] + LAYER_STRIDES: [1] + NUM_FILTERS: [128] + UPSAMPLE_STRIDES: [2] + NUM_UPSAMPLE_FILTERS: [256] + + DENSE_HEAD: + NAME: CenterHead + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: False + + SHARED_CONV_CHANNEL: 64 + USE_BIAS_BEFORE_NORM: True + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: ['center', 'center_z', 'dim', 'rot'] + HEAD_DICT: { + 'center': {'out_channels': 2, 'num_conv': 2}, + 'center_z': {'out_channels': 1, 'num_conv': 2}, + 'dim': {'out_channels': 3, 'num_conv': 2}, + 'rot': {'out_channels': 2, 'num_conv': 2}, + } + + CLASS_NAMES_EACH_HEAD: [ + ['Car', 'Pedestrian', 'Cyclist'] + ] + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 4 + NUM_MAX_OBJS: 500 + # VOXEL_SIZE: [0.05, 0.05, 0.1] + # OUT_SIZE_FACTOR: 4 + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + # BOX_CODER: ResidualCoder + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + POST_PROCESSING: + SCORE_THRESH: 0.1 + POST_CENTER_LIMIT_RANGE: [-75.2, -75.2, -2, 75.2, 75.2, 4] + MAX_OBJ_PER_SAMPLE: 500 + NMS_CONFIG: + NMS_TYPE: nms_gpu + NMS_THRESH: 0.01 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.01 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 8 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.003 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 \ No newline at end of file diff --git a/tools/cfgs/kitti_models/centerpoint_twostage.yaml b/tools/cfgs/kitti_models/centerpoint_twostage.yaml new file mode 100644 index 000000000..2e299c9ab --- /dev/null +++ b/tools/cfgs/kitti_models/centerpoint_twostage.yaml @@ -0,0 +1,154 @@ +# Taken from here: https://github.com/open-mmlab/OpenPCDet/issues/750 +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + + +MODEL: + NAME: CenterPointTwoStage + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelResBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5] + LAYER_STRIDES: [1] + NUM_FILTERS: [128] + UPSAMPLE_STRIDES: [2] + NUM_UPSAMPLE_FILTERS: [256] + + DENSE_HEAD: + NAME: CenterHead + CLASS_AGNOSTIC: False + USE_DIRECTION_CLASSIFIER: False + ############################## + SHARED_CONV_CHANNEL: 64 + USE_BIAS_BEFORE_NORM: True + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: ['center', 'center_z', 'dim', 'rot'] + # this is what the head is predicting + HEAD_DICT: { + 'center': {'out_channels': 2, 'num_conv': 2}, + 'center_z': {'out_channels': 1, 'num_conv': 2}, + 'dim': {'out_channels': 3, 'num_conv': 2}, + 'rot': {'out_channels': 2, 'num_conv': 2}, + } + + CLASS_NAMES_EACH_HEAD: [ + ['Car', 'Pedestrian', 'Cyclist'] + ] + ############################## + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 4 + NUM_MAX_OBJS: 500 + # VOXEL_SIZE: [0.05, 0.05, 0.1] + # OUT_SIZE_FACTOR: 4 + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + # BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + POST_PROCESSING: + SCORE_THRESH: 0.1 + POST_CENTER_LIMIT_RANGE: [-75.2, -75.2, -2, 75.2, 75.2, 4] + MAX_OBJ_PER_SAMPLE: 500 + NMS_CONFIG: + NMS_TYPE: nms_gpu + NMS_THRESH: 0.01 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + ROI_HEAD: + NAME: ROIHead + #### Not sure about this #### + BEV_FEATURE_DIM: 352 + FEATURE_MAP_STRIDE: 4 + NUM_POINTS: 5 # anchor points for pooling either 1 or 5 + ADD_BOX_PARAM: False + ############################# + CLASS_AGNOSTIC: True + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + REG_FG_THRESH: 0.55 + + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.01 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.003 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 \ No newline at end of file diff --git a/tools/cfgs/kitti_models/pv_rcnn_BADet_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_BADet_car_class_only.yaml new file mode 100644 index 000000000..2f060eb9a --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_BADet_car_class_only.yaml @@ -0,0 +1,254 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + FROZEN: False + NAME: PVRCNNRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + # the proposals are soreted according to scores and then we take the NMS_PRE_MAXSIZE first + NMS_PRE_MAXSIZE: 9000 + # proposals with more than NMS_THRESH overlap with others will be removed + NMS_THRESH: 0.8 + # the proposals after NMS will be limited to NMS_POST_MAXSIZE + NMS_POST_MAXSIZE: 512 + # values are different for testing + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + # number of proposal after the sampling + ROI_PER_IMAGE: 128 + # max number of foreground proposal + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + # thresholds defining the foregrouns, easy background and hard background + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + # from the background proposals, HARD_BG_RATIO are ones with very small iou with gt (1-HARD_BG_RATIO are with large iou with gt -> still background) + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN_BADET + STATE_DIM: 256 # 120 in the paper + ITERATIONS: 3 + GRAPH: { + NAME: radius_graph, + RADIUS: 0.1, + CONNECT_ONLY_SAME_CLASS: False + } + + + + POST_PROCESSING: + # IoU Threshold + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + # Car, Pedestrian, Cyclist + SCORE_THRESH: 0.1 + # SCORE_THRESH: 0.0 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + # NMS_THRESH: 1.0 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 5 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 +# + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_car_class_only.yaml new file mode 100644 index 000000000..3c078f1e0 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_car_class_only.yaml @@ -0,0 +1,229 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNN + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_frozen.yaml b/tools/cfgs/kitti_models/pv_rcnn_frozen.yaml new file mode 100644 index 000000000..20f66c454 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_frozen.yaml @@ -0,0 +1,256 @@ +PRE_TRAINED: + MODEL_PATH: ../output/cfgs/kitti_models/pv_rcnn/2023-09-04_09-52-39/ckpt/checkpoint_epoch_78.pth + # LEARNABLE_LAYER: ['roi_head.reg_layers', 'roi_head.cls_layers'] + LEARNABLE_LAYER: ['roi_head'] + + +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNN + FROZEN: True + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 5 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_frozen_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_frozen_car_class_only.yaml new file mode 100644 index 000000000..25bdd327b --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_frozen_car_class_only.yaml @@ -0,0 +1,235 @@ +PRE_TRAINED: + MODEL_PATH: ../output/cfgs/kitti_models/pv_rcnn_car_class_only/2023-09-21_08-45-44/ckpt/checkpoint_epoch_75.pth + LEARNABLE_LAYER: ['roi_head.reg_layers', 'roi_head.cls_layers'] + + +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNN + FROZEN: True + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 2 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_frozen_relation.yaml b/tools/cfgs/kitti_models/pv_rcnn_frozen_relation.yaml new file mode 100644 index 000000000..71f1d29cc --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_frozen_relation.yaml @@ -0,0 +1,299 @@ +PRE_TRAINED: + MODEL_PATH: ../output/cfgs/kitti_models/pv_rcnn/2023-09-04_09-52-39/ckpt/checkpoint_epoch_78.pth + LEARNABLE_LAYER: ['roi_head.reg_layers', 'roi_head.cls_layers', 'object_relation'] + + +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNRelation + FROZEN: True + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + # the proposals are soreted according to scores and then we take the NMS_PRE_MAXSIZE first + NMS_PRE_MAXSIZE: 9000 + # proposals with more than NMS_THRESH overlap with others will be removed + NMS_THRESH: 0.8 + # the proposals after NMS will be limited to NMS_POST_MAXSIZE + NMS_POST_MAXSIZE: 512 + # values are different for testing + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + # number of proposal after the sampling + ROI_PER_IMAGE: 128 + # max number of foreground proposal + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + # thresholds defining the foregrouns, easy background and hard background + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + # from the background proposals, HARD_BG_RATIO are ones with very small iou with gt (1-HARD_BG_RATIO are with large iou with gt -> still background) + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + # NAME: radius_graph, + # RADIUS: 6, + # CONNECT_ONLY_SAME_CLASS: False + # } + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + # HEADS: 4, + SKIP_CONNECTION: False, + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + # EDGE_EMBEDDING: True, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + # IN_BETWEEN_MLP: [256, 256, 256, 256] + IN_BETWEEN_MLP: False + # GLOBAL_INFORMATION: { + # MLP_LAYERS: [256, 256], + # CONCATENATED: False, + # } + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + + POST_PROCESSING: + # IoU Threshold + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + # Car, Pedestrian, Cyclist + SCORE_THRESH: 0.1 + # SCORE_THRESH: 0.0 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + # NMS_THRESH: 1.0 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 5 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 +# + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_frozen_relation_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_frozen_relation_car_class_only.yaml new file mode 100644 index 000000000..519381b25 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_frozen_relation_car_class_only.yaml @@ -0,0 +1,279 @@ +PRE_TRAINED: + MODEL_PATH: ../output/cfgs/kitti_models/pv_rcnn_car_class_only/2023-09-21_08-45-44/ckpt/checkpoint_epoch_75.pth + LEARNABLE_LAYER: ['roi_head.reg_layers', 'roi_head.cls_layers', 'object_relation'] + + +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNRelation + FROZEN: True + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + # the proposals are soreted according to scores and then we take the NMS_PRE_MAXSIZE first + NMS_PRE_MAXSIZE: 9000 + # proposals with more than NMS_THRESH overlap with others will be removed + NMS_THRESH: 0.8 + # the proposals after NMS will be limited to NMS_POST_MAXSIZE + NMS_POST_MAXSIZE: 512 + # values are different for testing + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + # number of proposal after the sampling + ROI_PER_IMAGE: 128 + # max number of foreground proposal + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + # thresholds defining the foregrouns, easy background and hard background + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + # from the background proposals, HARD_BG_RATIO are ones with very small iou with gt (1-HARD_BG_RATIO are with large iou with gt -> still background) + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + # NAME: radius_graph, + # RADIUS: 6, + # CONNECT_ONLY_SAME_CLASS: False + # } + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + # HEADS: 4, + SKIP_CONNECTION: False, + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + # EDGE_EMBEDDING: True, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + # IN_BETWEEN_MLP: [256, 256, 256, 256] + IN_BETWEEN_MLP: False + # GLOBAL_INFORMATION: { + # MLP_LAYERS: [256, 256], + # CONCATENATED: False, + # } + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + + POST_PROCESSING: + # IoU Threshold + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + # Car, Pedestrian, Cyclist + SCORE_THRESH: 0.1 + # SCORE_THRESH: 0.0 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + # NMS_THRESH: 1.0 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 5 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 +# + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_plusplus_relation.yaml b/tools/cfgs/kitti_models/pv_rcnn_plusplus_relation.yaml new file mode 100644 index 000000000..15aeeab5d --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_plusplus_relation.yaml @@ -0,0 +1,328 @@ +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNPlusPlusRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: CenterHead + CLASS_AGNOSTIC: False + + CLASS_NAMES_EACH_HEAD: [ + [ 'Car', 'Pedestrian', 'Cyclist' ] + ] + + SHARED_CONV_CHANNEL: 64 + USE_BIAS_BEFORE_NORM: True + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ] + HEAD_DICT: { + 'center': { 'out_channels': 2, 'num_conv': 2 }, + 'center_z': { 'out_channels': 1, 'num_conv': 2 }, + 'dim': { 'out_channels': 3, 'num_conv': 2 }, + 'rot': { 'out_channels': 2, 'num_conv': 2 }, + } + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 8 + NUM_MAX_OBJS: 500 + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + } + + POST_PROCESSING: + SCORE_THRESH: 0.01 + POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ] + MAX_OBJ_PER_SAMPLE: 500 + NMS_CONFIG: + NMS_TYPE: nms_gpu + NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 4096 + NUM_OUTPUT_FEATURES: 90 + SAMPLE_METHOD: SPC + SPC_SAMPLING: + NUM_SECTORS: 6 + SAMPLE_RADIUS_WITH_ROI: 1.6 + + FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 1 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 32 ] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 2, 2, 2 ] + MAX_NEIGHBOR_DISTANCE: 0.2 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 32, 32 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 0.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 32, 32 ] + + x_conv3: + DOWNSAMPLE_FACTOR: 4 + INPUT_CHANNELS: 64 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 32 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [128] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [3, 3, 3] + MAX_NEIGHBOR_DISTANCE: 1.2 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [64, 64] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 2.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + + x_conv4: + DOWNSAMPLE_FACTOR: 8 + INPUT_CHANNELS: 64 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 32 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 128 ] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 2.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 4.8 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + SCORE_THRESH: 0.1 + +# NMS_PRE_MAXSIZE: 4096 +# NMS_POST_MAXSIZE: 500 +# NMS_THRESH: 0.85 + + + ROI_GRID_POOL: + GRID_SIZE: 6 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: voxel_random_choice + NUM_REDUCED_CHANNELS: 30 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 128 ] + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 0.8 + NEIGHBOR_NSAMPLE: 32 + POST_MLPS: [ 64, 64 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 1.6 + NEIGHBOR_NSAMPLE: 32 + POST_MLPS: [ 64, 64 ] + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + # NAME: radius_graph, + # RADIUS: 6, + # CONNECT_ONLY_SAME_CLASS: False + # } + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: False, + # HEADS: 4, + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + # EDGE_EMBEDDING: True, + } + LAYERS: [256, 256, 256, 256] + # IN_BETWEEN_MLP: [256, 256, 256, 256] + # GLOBAL_INFORMATION: { + # MLP_LAYERS: [256, 256], + # CONCATENATED: False, + # } + DP_RATIO: 0.3 + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.001 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 \ No newline at end of file diff --git a/tools/cfgs/kitti_models/pv_rcnn_plusplus_relation_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_plusplus_relation_car_class_only.yaml new file mode 100644 index 000000000..f90ec7609 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_plusplus_relation_car_class_only.yaml @@ -0,0 +1,332 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNPlusPlusRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: CenterHead + CLASS_AGNOSTIC: False + + CLASS_NAMES_EACH_HEAD: [ + [ 'Car'] + ] + + SHARED_CONV_CHANNEL: 64 + USE_BIAS_BEFORE_NORM: True + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ] + HEAD_DICT: { + 'center': { 'out_channels': 2, 'num_conv': 2 }, + 'center_z': { 'out_channels': 1, 'num_conv': 2 }, + 'dim': { 'out_channels': 3, 'num_conv': 2 }, + 'rot': { 'out_channels': 2, 'num_conv': 2 }, + } + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 8 + NUM_MAX_OBJS: 500 + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + } + + POST_PROCESSING: + SCORE_THRESH: 0.01 + POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ] + MAX_OBJ_PER_SAMPLE: 500 + NMS_CONFIG: + NMS_TYPE: nms_gpu + NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 4096 + NUM_OUTPUT_FEATURES: 90 + SAMPLE_METHOD: SPC + SPC_SAMPLING: + NUM_SECTORS: 6 + SAMPLE_RADIUS_WITH_ROI: 1.6 + + FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 1 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 32 ] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 2, 2, 2 ] + MAX_NEIGHBOR_DISTANCE: 0.2 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 32, 32 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 0.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 32, 32 ] + + x_conv3: + DOWNSAMPLE_FACTOR: 4 + INPUT_CHANNELS: 64 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 32 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [128] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [3, 3, 3] + MAX_NEIGHBOR_DISTANCE: 1.2 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [64, 64] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 2.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + + x_conv4: + DOWNSAMPLE_FACTOR: 8 + INPUT_CHANNELS: 64 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 32 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 128 ] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 2.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 4.8 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + SCORE_THRESH: 0.1 + +# NMS_PRE_MAXSIZE: 4096 +# NMS_POST_MAXSIZE: 500 +# NMS_THRESH: 0.85 + + + ROI_GRID_POOL: + GRID_SIZE: 6 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: voxel_random_choice + NUM_REDUCED_CHANNELS: 30 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 128 ] + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 0.8 + NEIGHBOR_NSAMPLE: 32 + POST_MLPS: [ 64, 64 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 1.6 + NEIGHBOR_NSAMPLE: 32 + POST_MLPS: [ 64, 64 ] + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + # NAME: radius_graph, + # RADIUS: 6, + # CONNECT_ONLY_SAME_CLASS: False + # } + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + SKIP_CONNECTION: False + # HEADS: 4, + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + # EDGE_EMBEDDING: True, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + # IN_BETWEEN_MLP: [256, 256, 256, 256] + IN_BETWEEN_MLP: False + # GLOBAL_INFORMATION: { + # MLP_LAYERS: [256, 256], + # CONCATENATED: False, + # } + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.001 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 \ No newline at end of file diff --git a/tools/cfgs/kitti_models/pv_rcnn_plusplus_reproduced_by_community_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_plusplus_reproduced_by_community_car_class_only.yaml new file mode 100644 index 000000000..e1dddf3ee --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_plusplus_reproduced_by_community_car_class_only.yaml @@ -0,0 +1,301 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNPlusPlus + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: CenterHead + CLASS_AGNOSTIC: False + + CLASS_NAMES_EACH_HEAD: [ + [ 'Car'] + ] + + SHARED_CONV_CHANNEL: 64 + USE_BIAS_BEFORE_NORM: True + NUM_HM_CONV: 2 + SEPARATE_HEAD_CFG: + HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ] + HEAD_DICT: { + 'center': { 'out_channels': 2, 'num_conv': 2 }, + 'center_z': { 'out_channels': 1, 'num_conv': 2 }, + 'dim': { 'out_channels': 3, 'num_conv': 2 }, + 'rot': { 'out_channels': 2, 'num_conv': 2 }, + } + + TARGET_ASSIGNER_CONFIG: + FEATURE_MAP_STRIDE: 8 + NUM_MAX_OBJS: 500 + GAUSSIAN_OVERLAP: 0.1 + MIN_RADIUS: 2 + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + } + + POST_PROCESSING: + SCORE_THRESH: 0.01 + POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ] + MAX_OBJ_PER_SAMPLE: 500 + NMS_CONFIG: + NMS_TYPE: nms_gpu + NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 4096 + NUM_OUTPUT_FEATURES: 90 + SAMPLE_METHOD: SPC + SPC_SAMPLING: + NUM_SECTORS: 6 + SAMPLE_RADIUS_WITH_ROI: 1.6 + + FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 1 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 32 ] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 2, 2, 2 ] + MAX_NEIGHBOR_DISTANCE: 0.2 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 32, 32 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 0.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 32, 32 ] + + x_conv3: + DOWNSAMPLE_FACTOR: 4 + INPUT_CHANNELS: 64 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 32 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [128] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [3, 3, 3] + MAX_NEIGHBOR_DISTANCE: 1.2 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [64, 64] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 2.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + + x_conv4: + DOWNSAMPLE_FACTOR: 8 + INPUT_CHANNELS: 64 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: local_interpolation + NUM_REDUCED_CHANNELS: 32 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 128 ] + FILTER_NEIGHBOR_WITH_ROI: True + RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4 + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 2.4 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 4.8 + NEIGHBOR_NSAMPLE: -1 + POST_MLPS: [ 64, 64 ] + + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHead + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + SCORE_THRESH: 0.1 + +# NMS_PRE_MAXSIZE: 4096 +# NMS_POST_MAXSIZE: 500 +# NMS_THRESH: 0.85 + + + ROI_GRID_POOL: + GRID_SIZE: 6 + + NAME: VectorPoolAggregationModuleMSG + NUM_GROUPS: 2 + LOCAL_AGGREGATION_TYPE: voxel_random_choice + NUM_REDUCED_CHANNELS: 30 + NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32 + MSG_POST_MLPS: [ 128 ] + + GROUP_CFG_0: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 0.8 + NEIGHBOR_NSAMPLE: 32 + POST_MLPS: [ 64, 64 ] + GROUP_CFG_1: + NUM_LOCAL_VOXEL: [ 3, 3, 3 ] + MAX_NEIGHBOR_DISTANCE: 1.6 + NEIGHBOR_NSAMPLE: 32 + POST_MLPS: [ 64, 64 ] + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.001 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 \ No newline at end of file diff --git a/tools/cfgs/kitti_models/pv_rcnn_relation.yaml b/tools/cfgs/kitti_models/pv_rcnn_relation.yaml new file mode 100644 index 000000000..eff359b58 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_relation.yaml @@ -0,0 +1,293 @@ +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + # the proposals are soreted according to scores and then we take the NMS_PRE_MAXSIZE first + NMS_PRE_MAXSIZE: 9000 + # proposals with more than NMS_THRESH overlap with others will be removed + NMS_THRESH: 0.8 + # the proposals after NMS will be limited to NMS_POST_MAXSIZE + NMS_POST_MAXSIZE: 512 + # values are different for testing + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + # number of proposal after the sampling + ROI_PER_IMAGE: 128 + # max number of foreground proposal + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + # thresholds defining the foregrouns, easy background and hard background + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + # from the background proposals, HARD_BG_RATIO are ones with very small iou with gt (1-HARD_BG_RATIO are with large iou with gt -> still background) + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + # NAME: radius_graph, + # RADIUS: 6, + # CONNECT_ONLY_SAME_CLASS: False + # } + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + # HEADS: 4, + SKIP_CONNECTION: True + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + # EDGE_EMBEDDING: True, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + # IN_BETWEEN_MLP: [256, 256, 256, 256] + IN_BETWEEN_MLP: False + # GLOBAL_INFORMATION: { + # MLP_LAYERS: [256, 256], + # CONCATENATED: False, + # } + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + + POST_PROCESSING: + # IoU Threshold + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + # Car, Pedestrian, Cyclist + SCORE_THRESH: 0.1 + # SCORE_THRESH: 0.0 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + # NMS_THRESH: 1.0 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 +# + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_relation_car_class_only.yaml b/tools/cfgs/kitti_models/pv_rcnn_relation_car_class_only.yaml new file mode 100644 index 000000000..837b60e75 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_relation_car_class_only.yaml @@ -0,0 +1,273 @@ +CLASS_NAMES: ['Car'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + # the proposals are soreted according to scores and then we take the NMS_PRE_MAXSIZE first + NMS_PRE_MAXSIZE: 9000 + # proposals with more than NMS_THRESH overlap with others will be removed + NMS_THRESH: 0.8 + # the proposals after NMS will be limited to NMS_POST_MAXSIZE + NMS_POST_MAXSIZE: 512 + # values are different for testing + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + # number of proposal after the sampling + ROI_PER_IMAGE: 128 + # max number of foreground proposal + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + # thresholds defining the foregrouns, easy background and hard background + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + # from the background proposals, HARD_BG_RATIO are ones with very small iou with gt (1-HARD_BG_RATIO are with large iou with gt -> still background) + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + # NAME: radius_graph, + # RADIUS: 6, + # CONNECT_ONLY_SAME_CLASS: False + # } + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + # HEADS: 4, + SKIP_CONNECTION: True + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + # EDGE_EMBEDDING: True, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + # IN_BETWEEN_MLP: [256, 256, 256, 256] + IN_BETWEEN_MLP: False + # GLOBAL_INFORMATION: { + # MLP_LAYERS: [256, 256], + # CONCATENATED: False, + # } + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + + POST_PROCESSING: + # IoU Threshold + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + # Car, Pedestrian, Cyclist + SCORE_THRESH: 0.1 + # SCORE_THRESH: 0.0 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + # NMS_THRESH: 1.0 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 +# + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/kitti_models/pv_rcnn_relation_fc.yaml b/tools/cfgs/kitti_models/pv_rcnn_relation_fc.yaml new file mode 100644 index 000000000..055c5b9b6 --- /dev/null +++ b/tools/cfgs/kitti_models/pv_rcnn_relation_fc.yaml @@ -0,0 +1,271 @@ +CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml + DATA_AUGMENTOR: + DISABLE_AUG_LIST: ['placeholder'] + AUG_CONFIG_LIST: + - NAME: gt_sampling + USE_ROAD_PLANE: True + DB_INFO_PATH: + - kitti_dbinfos_train.pkl + PREPARE: { + filter_by_min_points: ['Car:5', 'Pedestrian:5', 'Cyclist:5'], + filter_by_difficulty: [-1], + } + + SAMPLE_GROUPS: ['Car:15','Pedestrian:10', 'Cyclist:10'] + NUM_POINT_FEATURES: 4 + DATABASE_WITH_FAKELIDAR: False + REMOVE_EXTRA_WIDTH: [0.0, 0.0, 0.0] + LIMIT_WHOLE_SCENE: False + + - NAME: random_world_flip + ALONG_AXIS_LIST: ['x'] + + - NAME: random_world_rotation + WORLD_ROT_ANGLE: [-0.78539816, 0.78539816] + + - NAME: random_world_scaling + WORLD_SCALE_RANGE: [0.95, 1.05] + +MODEL: + NAME: PVRCNNRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Car', + 'anchor_sizes': [[3.9, 1.6, 1.56]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-1.78], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.6, + 'unmatched_threshold': 0.45 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.8, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.76, 0.6, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [-0.6], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 2048 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv1', 'x_conv2', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + # the proposals are soreted according to scores and then we take the NMS_PRE_MAXSIZE first + NMS_PRE_MAXSIZE: 9000 + # proposals with more than NMS_THRESH overlap with others will be removed + NMS_THRESH: 0.8 + # the proposals after NMS will be limited to NMS_POST_MAXSIZE + NMS_POST_MAXSIZE: 512 + # values are different for testing + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 1024 + NMS_POST_MAXSIZE: 100 + NMS_THRESH: 0.7 + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + # number of proposal after the sampling + ROI_PER_IMAGE: 121 # from 128 + # max number of foreground proposal + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + # thresholds defining the foregrouns, easy background and hard background + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + # from the background proposals, HARD_BG_RATIO are ones with very small iou with gt (1-HARD_BG_RATIO are with large iou with gt -> still background) + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: CGNLNet + GLOBAL_INFORMATION: { + MLP_LAYERS: [256, 256], + CONCATENATED: False, + } + SKIP_CONNECTION: True + DP_RATIO: 0.3 + + + POST_PROCESSING: + # IoU Threshold + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + # Car, Pedestrian, Cyclist + SCORE_THRESH: 0.1 + # SCORE_THRESH: 0.0 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: kitti + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + NMS_THRESH: 0.1 + # NMS_THRESH: 1.0 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 80 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.01 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 +# + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 diff --git a/tools/cfgs/waymo_models/pv_rcnn_relation.yaml b/tools/cfgs/waymo_models/pv_rcnn_relation.yaml new file mode 100644 index 000000000..babc9907d --- /dev/null +++ b/tools/cfgs/waymo_models/pv_rcnn_relation.yaml @@ -0,0 +1,250 @@ +CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist'] + +DATA_CONFIG: + _BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml + + +MODEL: + NAME: PVRCNNRelation + + VFE: + NAME: MeanVFE + + BACKBONE_3D: + NAME: VoxelBackBone8x + + MAP_TO_BEV: + NAME: HeightCompression + NUM_BEV_FEATURES: 256 + + BACKBONE_2D: + NAME: BaseBEVBackbone + + LAYER_NUMS: [5, 5] + LAYER_STRIDES: [1, 2] + NUM_FILTERS: [128, 256] + UPSAMPLE_STRIDES: [1, 2] + NUM_UPSAMPLE_FILTERS: [256, 256] + + DENSE_HEAD: + NAME: AnchorHeadSingle + CLASS_AGNOSTIC: False + + USE_DIRECTION_CLASSIFIER: True + DIR_OFFSET: 0.78539 + DIR_LIMIT_OFFSET: 0.0 + NUM_DIR_BINS: 2 + + ANCHOR_GENERATOR_CONFIG: [ + { + 'class_name': 'Vehicle', + 'anchor_sizes': [[4.7, 2.1, 1.7]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [0], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.55, + 'unmatched_threshold': 0.4 + }, + { + 'class_name': 'Pedestrian', + 'anchor_sizes': [[0.91, 0.86, 1.73]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [0], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + }, + { + 'class_name': 'Cyclist', + 'anchor_sizes': [[1.78, 0.84, 1.78]], + 'anchor_rotations': [0, 1.57], + 'anchor_bottom_heights': [0], + 'align_center': False, + 'feature_map_stride': 8, + 'matched_threshold': 0.5, + 'unmatched_threshold': 0.35 + } + ] + + TARGET_ASSIGNER_CONFIG: + NAME: AxisAlignedTargetAssigner + POS_FRACTION: -1.0 + SAMPLE_SIZE: 512 + NORM_BY_NUM_EXAMPLES: False + MATCH_HEIGHT: False + BOX_CODER: ResidualCoder + + LOSS_CONFIG: + LOSS_WEIGHTS: { + 'cls_weight': 1.0, + 'loc_weight': 2.0, + 'dir_weight': 0.2, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + PFE: + NAME: VoxelSetAbstraction + POINT_SOURCE: raw_points + NUM_KEYPOINTS: 4096 + NUM_OUTPUT_FEATURES: 128 + SAMPLE_METHOD: FPS + + FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points'] + SA_LAYER: + raw_points: + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv1: + DOWNSAMPLE_FACTOR: 1 + MLPS: [[16, 16], [16, 16]] + POOL_RADIUS: [0.4, 0.8] + NSAMPLE: [16, 16] + x_conv2: + DOWNSAMPLE_FACTOR: 2 + MLPS: [[32, 32], [32, 32]] + POOL_RADIUS: [0.8, 1.2] + NSAMPLE: [16, 32] + x_conv3: + DOWNSAMPLE_FACTOR: 4 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [1.2, 2.4] + NSAMPLE: [16, 32] + x_conv4: + DOWNSAMPLE_FACTOR: 8 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [2.4, 4.8] + NSAMPLE: [16, 32] + + POINT_HEAD: + NAME: PointHeadSimple + CLS_FC: [256, 256] + CLASS_AGNOSTIC: True + USE_POINT_FEATURES_BEFORE_FUSION: True + TARGET_CONFIG: + GT_EXTRA_WIDTH: [0.2, 0.2, 0.2] + LOSS_CONFIG: + LOSS_REG: smooth-l1 + LOSS_WEIGHTS: { + 'point_cls_weight': 1.0, + } + + ROI_HEAD: + NAME: PVRCNNHeadRelation + CLASS_AGNOSTIC: True + + SHARED_FC: [256, 256] + CLS_FC: [256, 256] + REG_FC: [256, 256] + DP_RATIO: 0.3 + + NMS_CONFIG: + TRAIN: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False + NMS_PRE_MAXSIZE: 9000 + NMS_POST_MAXSIZE: 512 + NMS_THRESH: 0.8 + TEST: + NMS_TYPE: nms_gpu + MULTI_CLASSES_NMS: False +# NMS_PRE_MAXSIZE: 1024 +# NMS_POST_MAXSIZE: 100 +# NMS_THRESH: 0.7 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 300 + NMS_THRESH: 0.85 + + + ROI_GRID_POOL: + GRID_SIZE: 6 + MLPS: [[64, 64], [64, 64]] + POOL_RADIUS: [0.8, 1.6] + NSAMPLE: [16, 16] + POOL_METHOD: max_pool + + TARGET_CONFIG: + BOX_CODER: ResidualCoder + ROI_PER_IMAGE: 128 + FG_RATIO: 0.5 + + SAMPLE_ROI_BY_EACH_CLASS: True + CLS_SCORE_TYPE: roi_iou + + CLS_FG_THRESH: 0.75 + CLS_BG_THRESH: 0.25 + CLS_BG_THRESH_LO: 0.1 + HARD_BG_RATIO: 0.8 + + REG_FG_THRESH: 0.55 + + LOSS_CONFIG: + CLS_LOSS: BinaryCrossEntropy + REG_LOSS: smooth-l1 + CORNER_LOSS_REGULARIZATION: True + LOSS_WEIGHTS: { + 'rcnn_cls_weight': 1.0, + 'rcnn_reg_weight': 1.0, + 'rcnn_corner_weight': 1.0, + 'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + + OBJECT_RELATION: + NAME: GNN + GRAPH: { + CONV: { + NAME: EdgeConv, + EDGE_EMBEDDING: True, + SKIP_CONNECTION: False + }, + DYNAMIC: False, + NAME: knn_graph, + K: 16, + CONNECT_ONLY_SAME_CLASS: False, + SPACE: R3, + } + LAYERS: [256, 256, 256, 256] + SKIP_CONNECTION: True + IN_BETWEEN_MLP: False + GLOBAL_INFORMATION: False + DP_RATIO: 0.3 + + POST_PROCESSING: + RECALL_THRESH_LIST: [0.3, 0.5, 0.7] + SCORE_THRESH: 0.1 + OUTPUT_RAW_SCORE: False + + EVAL_METRIC: waymo + + NMS_CONFIG: + MULTI_CLASSES_NMS: False + NMS_TYPE: nms_gpu + # NMS_THRESH: 1.0 + # NMS_THRESH: 0.7 + NMS_THRESH: 0.1 + NMS_PRE_MAXSIZE: 4096 + NMS_POST_MAXSIZE: 500 + + +OPTIMIZATION: + BATCH_SIZE_PER_GPU: 2 + NUM_EPOCHS: 30 + + OPTIMIZER: adam_onecycle + LR: 0.01 + WEIGHT_DECAY: 0.001 + MOMENTUM: 0.9 + + MOMS: [0.95, 0.85] + PCT_START: 0.4 + DIV_FACTOR: 10 + DECAY_STEP_LIST: [35, 45] + LR_DECAY: 0.1 + LR_CLIP: 0.0000001 + + LR_WARMUP: False + WARMUP_EPOCH: 1 + + GRAD_NORM_CLIP: 10 \ No newline at end of file diff --git a/tools/kitti_demo.py b/tools/kitti_demo.py new file mode 100644 index 000000000..91861216a --- /dev/null +++ b/tools/kitti_demo.py @@ -0,0 +1,81 @@ +import os +import torch +import json + +from pcdet.models import build_network, load_data_to_gpu +from pathlib import Path +from pcdet.config import cfg, cfg_from_yaml_file +from pcdet.utils import common_utils +from pcdet.datasets.kitti.kitti_dataset import KittiDataset + +# import mayavi.mlab as mlab +# from visual_utils import visualize_utils as V + +NUMBER_OF_SCENES = 500 + +def main(cfg_path, model_path, save_3d=False, tag=None): + cfg_from_yaml_file(cfg_path, cfg) + logger = common_utils.create_logger() + logger.info('-----------------Creating data for visualization-------------------------') + kitti_dataset = KittiDataset( + dataset_cfg=cfg.DATA_CONFIG, class_names=cfg.CLASS_NAMES, training=False, root_path=Path(cfg.DATA_CONFIG.DATA_PATH), + ) + + logger.info(f'Total number of samples: \t{NUMBER_OF_SCENES}') + + model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=kitti_dataset) + model.load_params_from_file(filename=model_path, logger=logger, to_cpu=True) + model.cuda() + model.eval() + + # create folder for visualization + vis_path = '/'.join(os.path.normpath(model_path).split(os.path.sep)[:-2]) + '/visualization' + tag + os.makedirs(vis_path, exist_ok=True) + + with torch.no_grad(): + for idx, data_dict in enumerate(kitti_dataset): + if idx >= NUMBER_OF_SCENES: + break + logger.info(f'Visualized sample index: \t{idx + 1}') + data_dict = kitti_dataset.collate_batch([data_dict]) + load_data_to_gpu(data_dict) + pred_dicts, _ = model.forward(data_dict) + + if save_3d: + torch.save(data_dict['points'][:,1:], os.path.join(vis_path, 'points_{}.pt'.format(int(data_dict['frame_id'])))) + torch.save(pred_dicts[0]['pred_boxes'], os.path.join(vis_path, 'pred_boxes_{}.pt'.format(int(data_dict['frame_id'])))) + torch.save(pred_dicts[0]['pred_scores'], os.path.join(vis_path, 'pred_scores_{}.pt'.format(int(data_dict['frame_id'])))) + torch.save(pred_dicts[0]['pred_labels'], os.path.join(vis_path, 'pred_labels_{}.pt'.format(int(data_dict['frame_id'])))) + torch.save(data_dict['gt_boxes'], os.path.join(vis_path, 'gt_boxes_{}.pt'.format(int(data_dict['frame_id'])))) + if 'gnn_edges_final' in pred_dicts[0]: + torch.save(pred_dicts[0]['gnn_edges_final'],os.path.join(vis_path, 'gnn_edges{}.pt'.format(int(data_dict['frame_id'])))) + json.dump(pred_dicts[0]['edge_to_pred'] , open(os.path.join(vis_path, 'edge_to_predict{}.json'.format(int(data_dict['frame_id']))), 'w')) + + else: + # fig = V.draw_scenes( + # points=data_dict['points'][:, 1:], ref_boxes=pred_dicts[0]['pred_boxes'], + # ref_scores=pred_dicts[0]['pred_scores'], ref_labels=pred_dicts[0]['pred_labels'] + # ) + # mlab.savefig(os.path.join(vis_path, 'points_{}.pt'.format(int(data_dict['frame_id'])))) + pass + + logger.info('Demo done.') + +if __name__ == '__main__': + # model_path = '../output/cfgs/kitti_models/pv_rcnn_relation/2023-09-15_10-21-38' + model_path = '../output/cfgs/kitti_models/pv_rcnn_relation_car_class_only/2023-09-29_07-21-48' + # model_path = '../output/cfgs/kitti_models/pv_rcnn_relation/2023-08-25_13-47-22' + # full_model_path = model_path + '/ckpt/checkpoint_epoch_73.pth' + full_model_path = model_path + '/ckpt/checkpoint_epoch_80.pth' + # cfg_path = model_path + '/pv_rcnn_relation.yaml' + cfg_path = '../tools/cfgs/kitti_models/pv_rcnn_relation_car_class_only.yaml' + # /pv_rcnn_relation.yaml + tag = '/epoch_80/' + # tag = '/no_post_processing/' + # tag = '/no_post_processing-94_epoch/' + # tag = '/epoch_100_no_post_processing/' + + # model_path = '../output/cfgs/kitti_models/pv_rcnn/2023-08-01_20-06-45/ckpt/checkpoint_epoch_90.pth' + # model_path = '../output/cfgs/kitti_models/pv_rcnn/debug/ckpt/checkpoint_epoch_2.pth' + # model_path = '../output/kitti/pv_rcnn_8369.pth' + main(cfg_path, full_model_path, save_3d=True, tag=tag) \ No newline at end of file diff --git a/tools/process_tools/logger.py b/tools/process_tools/logger.py new file mode 100644 index 000000000..c699844f3 --- /dev/null +++ b/tools/process_tools/logger.py @@ -0,0 +1,8 @@ +import json +from pathlib import Path + +class CustomEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, Path): + return str(obj) + return super().default(obj) \ No newline at end of file diff --git a/tools/scripts/eval_with_tensorboard.sh b/tools/scripts/eval_with_tensorboard.sh new file mode 100755 index 000000000..566fa9398 --- /dev/null +++ b/tools/scripts/eval_with_tensorboard.sh @@ -0,0 +1,19 @@ +#!/bin/bash +date='2023-09-14_13-12-43' +# architecture='pv_rcnn_relation' +# architecture='centerpoint' +# architecture='pv_rcnn_plusplus_reproduced_by_community' +# architecture='pv_rcnn_plusplus_relation' +architecture='pv_rcnn_relation_fc' + +# tmux kill-server + +# Create the first tmux session and run a command +tmux new-session -d -s Session3 +tmux send-keys -t Session3 "(cd tools/; python test.py --cfg ./cfgs/kitti_models/$architecture.yaml --eval_all --extra_tag $date --max_waiting_mins 1440)" C-m + +sleep 300 + +# Create the second tmux session and run another command +tmux new-session -d -s Session4 +tmux send-keys -t Session4 "(cd output/cfgs/kitti_models/$architecture/$date/eval/eval_all_default/default; tensorboard dev upload --logdir tensorboard_val --name "${architecture//-/ }_KITTI_Evaluation_$date")" C-m diff --git a/tools/scripts/experiments.py b/tools/scripts/experiments.py new file mode 100644 index 000000000..9942bf66d --- /dev/null +++ b/tools/scripts/experiments.py @@ -0,0 +1,63 @@ +import os +import glob +import subprocess +import time + +# Initialize paths and names +experiment_name = 'iterative_gnn_1' + +BASE_PATH = '/root/OpenPCDet/tools' +base_path = "./cfgs/kitti_models" +experiments_input_path = os.path.join(base_path, "experiments", experiment_name) + +# Initialize output paths +kitti_output_path = "../output/cfgs/kitti_models" +experiment_output_path = os.path.join(kitti_output_path, "experiments", experiment_name) + +# Read all model config paths from the directory +yaml_files = glob.glob(os.path.join(experiments_input_path, "*.yaml")) +experiments = [os.path.splitext(os.path.basename(f))[0] for f in yaml_files] +# sort the experiments +# experiments = sorted(experiments, key=lambda x: int(x.split('_')[0])) + +# print the experiments +print(f"Running experiments at {experiments_input_path}: {experiments}") + +# create tensorboard directory +os.makedirs(os.path.join(experiment_output_path, 'tensorboard'), exist_ok=True) +cmd = f'(cd {experiment_output_path}; tensorboard dev upload --logdir tensorboard --name "{experiment_name}")' +tensorboard_process = subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + +# Run all experiments +for e in experiments: + success = False + while not success: + try: + config_path = os.path.join(experiments_input_path, f"{e}.yaml") + cmd = ["python", "train.py", "--cfg", config_path, "--extra_tag", f"{e}"] + subprocess.run(cmd, check=True, cwd=BASE_PATH) + + source_path = os.path.join(experiment_output_path, f"{e}/{e}/tensorboard") + tensor_file = glob.glob(os.path.join(source_path, "*")) + dest_path = os.path.join(experiment_output_path, 'tensorboard', f"{e}") + os.makedirs(dest_path, exist_ok=True) + + # Get the base name of the source file + file_name = os.path.basename(tensor_file[0]) + + # Create the full destination path including the file name + full_dest_path = os.path.join(dest_path, file_name) + + # Perform the copy + os.system(f"cp -r {tensor_file[0]} {full_dest_path}") + print(f"Finished experiment {e}") + success = True + except Exception as ex: + print(f"Error while running experiment {e}: {ex}") + time.sleep(60) + print(f"Retrying experiment {e}") + +# Kill tensorboard process +# wait for 10 minutes to make sure tensorboard is done +time.sleep(600) +tensorboard_process.terminate() diff --git a/tools/scripts/train_with_tensorboard.sh b/tools/scripts/train_with_tensorboard.sh new file mode 100755 index 000000000..0df8ab9f5 --- /dev/null +++ b/tools/scripts/train_with_tensorboard.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# date=$(date "+%Y-%m-%d_%H-%M-%S") +date=2023-10-24_09-25-41 +# date='2023-10-19_12-02-51' +# architecture='pv_rcnn' +architecture="pv_rcnn_relation" +# architecture='centerpoint' +# architecture='pv_rcnn_plusplus_reproduced_by_community' +# architecture='centerpoint_twostage' +# architecture='pv_rcnn_plusplus_relation' +# architecture='pv_rcnn_relation_fc' +# architecture='pv_rcnn_car_class_only' +# architecture='pv_rcnn_relation_car_class_only' +# architecture='pv_rcnn_plusplus_reproduced_by_community_car_class_only' +# architecture='pv_rcnn_plusplus_relation_car_class_only' +# architecture='PartA2_car_class_only' +# architecture='PartA2_relation_car_class_only' +# architecture='voxel_rcnn_car' +# architecture='voxel_rcnn_relation_car' +# architecture='pv_rcnn_frozen' +# architecture='pv_rcnn_frozen_relation' +# architecture='pv_rcnn_BADet_car_class_only' + +# data='kitti' +data='waymo' + +# tmux kill-server + +# Sending the training command to tmux session 0 +# tmux new-session -d -s Session1 +# tmux send-keys -t Session1 "(cd tools/; python train.py --cfg_file ./cfgs/${data}_models/$architecture.yaml --extra_tag $date)" C-m + +tmux new-session -d -s Session3 +tmux send-keys -t Session3 "(cd tools/; python test.py --cfg ./cfgs/${data}_models/$architecture.yaml --eval_all --extra_tag $date --max_waiting_mins 1440)" C-m + +sleep 100 + +# tmux new-session -d -s Session2 +# tmux send-keys -t Session2 "(cd output/cfgs/${data}_models/$architecture/$date/; tensorboard dev upload --logdir tensorboard --name \"${architecture^^} $data $date\")" C-m + + +# Create the second tmux session and run another command +tmux new-session -d -s Session4 +tmux send-keys -t Session4 "(cd output/cfgs/${data}_models/$architecture/$date/eval/eval_all_default/default; tensorboard dev upload --logdir tensorboard_val --name "${architecture//-/ }_$data_Evaluation_$date")" C-m + +# evaluate the model for one epoch + +# tmux new-session -d -s Session3 +# tmux send-keys -t Session3 "(cd tools/; python test.py --cfg_file ./cfgs/${data}_models/${architecture}.yaml --extra_tag $date --ckpt ../output/cfgs/${data}_models/${architecture}/${date}/ckpt/checkpoint_epoch_30.pth)" C-m diff --git a/tools/test.py b/tools/test.py index 51b7178c6..d6f3e6475 100644 --- a/tools/test.py +++ b/tools/test.py @@ -6,6 +6,7 @@ import re import time from pathlib import Path +import json import numpy as np import torch @@ -16,7 +17,7 @@ from pcdet.datasets import build_dataloader from pcdet.models import build_network from pcdet.utils import common_utils - +from tools.process_tools.logger import CustomEncoder def parse_config(): parser = argparse.ArgumentParser(description='arg parser') @@ -95,6 +96,8 @@ def repeat_eval_ckpt(model, test_loader, args, eval_output_dir, logger, ckpt_dir # tensorboard log if cfg.LOCAL_RANK == 0: tb_log = SummaryWriter(log_dir=str(eval_output_dir / ('tensorboard_%s' % cfg.DATA_CONFIG.DATA_SPLIT['test']))) + tb_log.add_text('Configurations', json.dumps(cfg, cls=CustomEncoder, indent=2)) + total_time = 0 first_eval = True diff --git a/tools/train.py b/tools/train.py index 29a88bde9..47b5dbc33 100644 --- a/tools/train.py +++ b/tools/train.py @@ -5,18 +5,19 @@ import os from pathlib import Path from test import repeat_eval_ckpt +import json import torch import torch.nn as nn from tensorboardX import SummaryWriter -from pcdet.config import cfg, cfg_from_list, cfg_from_yaml_file, log_config_to_file +from pcdet.config import cfg, cfg_from_list, cfg_from_yaml_file, log_config_to_file, second_cfg_from_yaml_file from pcdet.datasets import build_dataloader from pcdet.models import build_network, model_fn_decorator from pcdet.utils import common_utils from train_utils.optimization import build_optimizer, build_scheduler from train_utils.train_utils import train_model - +from tools.process_tools.logger import CustomEncoder def parse_config(): parser = argparse.ArgumentParser(description='arg parser') @@ -49,7 +50,6 @@ def parse_config(): parser.add_argument('--ckpt_save_time_interval', type=int, default=300, help='in terms of seconds') parser.add_argument('--wo_gpu_stat', action='store_true', help='') parser.add_argument('--use_amp', action='store_true', help='use mix precision training') - args = parser.parse_args() @@ -112,6 +112,7 @@ def main(): os.system('cp %s %s' % (args.cfg_file, output_dir)) tb_log = SummaryWriter(log_dir=str(output_dir / 'tensorboard')) if cfg.LOCAL_RANK == 0 else None + tb_log.add_text('Configurations', json.dumps(cfg, cls=CustomEncoder, indent=2)) logger.info("----------- Create dataloader & network & optimizer -----------") train_set, train_loader, train_sampler = build_dataloader( @@ -127,6 +128,7 @@ def main(): ) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=train_set) + if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model.cuda() @@ -138,6 +140,15 @@ def main(): last_epoch = -1 if args.pretrained_model is not None: model.load_params_from_file(filename=args.pretrained_model, to_cpu=dist_train, logger=logger) + + if 'PRE_TRAINED' in cfg: + model.load_params_from_file(filename=cfg.PRE_TRAINED.MODEL_PATH, to_cpu=dist_train, logger=logger, learnable_layer=cfg.PRE_TRAINED.LEARNABLE_LAYER) + # freeze all other layers + for name, param in model.named_parameters(): + if any([(l in name) for l in cfg.PRE_TRAINED.LEARNABLE_LAYER]): + print('learnable layer: ', name) + else: + param.requires_grad = False if args.ckpt is not None: it, start_epoch = model.load_params_with_optimizer(args.ckpt, to_cpu=dist_train, optimizer=optimizer, logger=logger) diff --git a/tools/waymo_demo.py b/tools/waymo_demo.py new file mode 100644 index 000000000..a2a97237f --- /dev/null +++ b/tools/waymo_demo.py @@ -0,0 +1,144 @@ +import argparse +import glob +import json +from pathlib import Path +import os + +# try: +# import open3d +# from visual_utils import open3d_vis_utils as V +# OPEN3D_FLAG = True +# except: +# import mayavi.mlab as mlab +# from visual_utils import visualize_utils as V +# OPEN3D_FLAG = False + +import numpy as np +import torch + +from pcdet.config import cfg, cfg_from_yaml_file +from pcdet.datasets import DatasetTemplate +from pcdet.models import build_network, load_data_to_gpu +from pcdet.utils import common_utils +from pcdet.datasets.waymo.waymo_dataset import WaymoDataset + +NUMBER_OF_SCENES = 100 + + +class DemoDataset(DatasetTemplate): + def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logger=None, ext='.bin'): + """ + Args: + root_path: + dataset_cfg: + class_names: + training: + logger: + """ + super().__init__( + dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger + ) + self.root_path = root_path + self.ext = ext + data_file_list = glob.glob(str(root_path / f'*{self.ext}')) if self.root_path.is_dir() else [self.root_path] + + data_file_list.sort() + self.sample_file_list = data_file_list + + def __len__(self): + return len(self.sample_file_list) + + def __getitem__(self, index): + if self.ext == '.bin': + points = np.fromfile(self.sample_file_list[index], dtype=np.float32).reshape(-1, 4) + elif self.ext == '.npy': + points = np.load(self.sample_file_list[index]) + else: + raise NotImplementedError + + input_dict = { + 'points': points[:, :5], + 'frame_id': index, + } + + data_dict = self.prepare_data(data_dict=input_dict) + return data_dict + + +def parse_config(): + parser = argparse.ArgumentParser(description='arg parser') + parser.add_argument('--cfg_file', type=str, default='cfgs/kitti_models/second.yaml', + help='specify the config for demo') + parser.add_argument('--data_path', type=str, default='demo_data', + help='specify the point cloud data file or directory') + parser.add_argument('--ckpt', type=str, default=None, help='specify the pretrained model') + parser.add_argument('--ext', type=str, default='.bin', help='specify the extension of your point cloud data file') + + args = parser.parse_args() + + cfg_from_yaml_file(args.cfg_file, cfg) + + return args, cfg + + +def main(model_path, cfg_path, tag): + cfg_from_yaml_file(cfg_path, cfg) + logger = common_utils.create_logger() + logger.info('-----------------Quick Demo of OpenPCDet-------------------------') + dataset = WaymoDataset( + dataset_cfg=cfg.DATA_CONFIG, class_names=cfg.CLASS_NAMES, training=False, logger=logger + ) + + logger.info(f'Total number of samples: \t{len(dataset)}') + + model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset) + model.load_params_from_file(filename=model_path, logger=logger, to_cpu=True) + model.cuda() + model.eval() + + vis_path = '/'.join(os.path.normpath(model_path).split(os.path.sep)[:-2]) + '/visualization/' + tag + os.makedirs(vis_path, exist_ok=True) + + with torch.no_grad(): + for idx, data_dict in enumerate(dataset): + if idx % 100 != 0: + continue + if idx >= NUMBER_OF_SCENES * 100: + break + logger.info(f'Visualized sample index: \t{idx + 1}') + data_dict = dataset.collate_batch([data_dict]) + load_data_to_gpu(data_dict) + pred_dicts, _ = model.forward(data_dict) + + frame_id = str(data_dict['frame_id'][0]) + + torch.save(data_dict['points'][:,1:], os.path.join(vis_path, 'points_{}.pt'.format(frame_id))) + torch.save(pred_dicts[0]['pred_boxes'], os.path.join(vis_path, 'pred_boxes_{}.pt'.format(frame_id))) + torch.save(pred_dicts[0]['pred_scores'], os.path.join(vis_path, 'pred_scores_{}.pt'.format(frame_id))) + torch.save(pred_dicts[0]['pred_labels'], os.path.join(vis_path, 'pred_labels_{}.pt'.format(frame_id))) + torch.save(data_dict['gt_boxes'], os.path.join(vis_path, 'gt_boxes_{}.pt'.format(frame_id))) + if 'gnn_edges_final' in pred_dicts[0]: + torch.save(pred_dicts[0]['gnn_edges_final'],os.path.join(vis_path, 'gnn_edges{}.pt'.format(frame_id))) + json.dump(pred_dicts[0]['edge_to_pred'] , open(os.path.join(vis_path, 'edge_to_predict{}.json'.format(frame_id)), 'w')) + + # V.draw_scenes( + # points=data_dict['points'][:, 1:], ref_boxes=pred_dicts[0]['pred_boxes'], + # ref_scores=pred_dicts[0]['pred_scores'], ref_labels=pred_dicts[0]['pred_labels'] + # ) + + # if not OPEN3D_FLAG: + # mlab.show(stop=True) + + logger.info('Demo done.') + + +if __name__ == '__main__': + + model_path = "../output/cfgs/waymo_models/pv_rcnn/2023-10-19_12-02-51/ckpt/checkpoint_epoch_30.pth" + cfg_path = "./cfgs/waymo_models/pv_rcnn.yaml" + tag = "epoch_30" + + # model_path = "../output/cfgs/waymo_models/pv_rcnn_relation/2023-10-24_09-25-41/ckpt/checkpoint_epoch_25.pth" + # cfg_path = "./cfgs/waymo_models/pv_rcnn_relation.yaml" + # tag = "epoch_25" + main(model_path, cfg_path, tag)