diff --git a/README.md b/README.md index 0525c176..c86e3619 100644 --- a/README.md +++ b/README.md @@ -1,60 +1,72 @@ -# SECOND for KITTI object detection -SECOND detector. Based on my unofficial implementation of VoxelNet with some improvements. +# PointPillars -ONLY support python 3.6+, pytorch 0.4.1+. Don't support pytorch 0.4.0. Tested in Ubuntu 16.04/18.04. +Welcome to PointPillars. -* Ubuntu 18.04 have speed problem in my environment and may can't build/usr SparseConvNet. +This repo demonstrates how to reproduce the results from +_PointPillars: Fast Encoders for Object Detection from Point Clouds_ on the +[KITTI dataset](http://www.cvlibs.net/datasets/kitti/) by making the minimum required changes from the preexisting +open source codebase [SECOND](https://github.com/traveller59/second.pytorch). -### Performance in KITTI validation set (50/50 split, people have problems, need to be tuned.) +This is not an official nuTonomy codebase, but it can be used to match the published PointPillars results. -``` -Car AP@0.70, 0.70, 0.70: -bbox AP:90.80, 88.97, 87.52 -bev AP:89.96, 86.69, 86.11 -3d AP:87.43, 76.48, 74.66 -aos AP:90.68, 88.39, 86.57 -Car AP@0.70, 0.50, 0.50: -bbox AP:90.80, 88.97, 87.52 -bev AP:90.85, 90.02, 89.36 -3d AP:90.85, 89.86, 89.05 -aos AP:90.68, 88.39, 86.57 -``` +![Example Results](https://raw.githubusercontent.com/nutonomy/second.pytorch/master/images/pointpillars_kitti_results.pdf) + + +## Getting Started + +This is a fork of [SECOND for KITTI object detection](https://github.com/traveller59/second.pytorch) and the relevant +subset of the original README is reproduced here. + +### Code Support + +ONLY supports python 3.6+, pytorch 0.4.1+. Code has only been tested on Ubuntu 16.04/18.04. -## Install +### Install -### 1. Clone code +#### 1. Clone code ```bash -git clone https://github.com/traveller59/second.pytorch.git -cd ./second.pytorch/second +git clone https://github.com/nutonomy/second.pytorch.git ``` -### 2. Install dependence python packages +#### 2. Install Python packages -It is recommend to use Anaconda package manager. +It is recommend to use the Anaconda package manager. +First, use Anaconda to configure as many packages as possible. ```bash -pip install shapely fire pybind11 tensorboardX protobuf scikit-image numba pillow +conda create -n pointpillars python=3.7 anaconda +source activate pointpillars +conda install shapely pybind11 protobuf scikit-image numba pillow +conda install pytorch torchvision -c pytorch +conda install google-sparsehash -c bioconda ``` -If you don't have Anaconda: - +Then use pip for the packages missing from Anaconda. ```bash -pip install numba +pip install --upgrade pip +pip install fire tensorboardX ``` -Follow instructions in https://github.com/facebookresearch/SparseConvNet to install SparseConvNet. +Finally, install SparseConvNet. This is not required for PointPillars, but the general SECOND code base expects this +to be correctly configured. +```bash +git clone git@github.com:facebookresearch/SparseConvNet.git +cd SparseConvNet/ +bash build.sh +# NOTE: if bash build.sh fails, try bash develop.sh instead +``` -Install Boost geometry: +Additionally, you may need to install Boost geometry: ```bash sudo apt-get install libboost-all-dev ``` -### 3. Setup cuda for numba +#### 3. Setup cuda for numba -you need to add following environment variable for numba.cuda, you can add them to ~/.bashrc: +You need to add following environment variables for numba to ~/.bashrc: ```bash export NUMBAPRO_CUDA_DRIVER=/usr/lib/x86_64-linux-gnu/libcuda.so @@ -62,11 +74,13 @@ export NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so export NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice ``` -### 4. add second.pytorch/ to PYTHONPATH +#### 4. PYTHONPATH -## Prepare dataset +Add second.pytorch/ to your PYTHONPATH. -* Dataset preparation +### Prepare dataset + +#### 1. Dataset preparation Download KITTI dataset and create some directories first: @@ -85,27 +99,29 @@ Download KITTI dataset and create some directories first: └── velodyne_reduced <-- empty directory ``` -* Create kitti infos: +Note: PointPillar's protos use ```KITTI_DATASET_ROOT=/data/sets/kitti_second/```. + +#### 2. Create kitti infos: ```bash python create_data.py create_kitti_info_file --data_path=KITTI_DATASET_ROOT ``` -* Create reduced point cloud: +#### 3. Create reduced point cloud: ```bash python create_data.py create_reduced_point_cloud --data_path=KITTI_DATASET_ROOT ``` -* Create groundtruth-database infos: +#### 4. Create groundtruth-database infos: ```bash python create_data.py create_groundtruth_database --data_path=KITTI_DATASET_ROOT ``` -* Modify config file +#### 5. Modify config file -There is some path need to be configured in config file: +The config file needs to be edited to point to the above datasets: ```bash train_input_reader: { @@ -125,135 +141,28 @@ eval_input_reader: { } ``` -## Usage -### train +### Train ```bash -python ./pytorch/train.py train --config_path=./configs/car.config --model_dir=/path/to/model_dir +cd ~/second.pytorch/second +python ./pytorch/train.py train --config_path=./configs/pointpillars/car/xyres_16.proto --model_dir=/path/to/model_dir ``` -* Make sure "/path/to/model_dir" doesn't exist if you want to train new model. A new directory will be created if the model_dir doesn't exist, otherwise will read checkpoints in it. +* If you want to train a new model, make sure "/path/to/model_dir" doesn't exist. +* If "/path/to/model_dir" does exist, training will be resumed from the last checkpoint. +* Training only supports a single GPU. +* Training uses a batchsize=2 which should fit in memory on most standard GPUs. +* On a single 1080Ti, training xyres_16 requires approximately 20 hours for 160 epochs. -* training process use batchsize=3 as default for 1080Ti, you need to reduce batchsize if your GPU has less memory. -* Currently only support single GPU training, but train a model only needs 20 hours (165 epoch) in a single 1080Ti and only needs 40 epoch to reach 74 AP in car moderate 3D in Kitti validation dateset. +### Evaluate -### evaluate ```bash -python ./pytorch/train.py evaluate --config_path=./configs/car.config --model_dir=/path/to/model_dir -``` - -* detection result will saved as a result.pkl file in model_dir/eval_results/step_xxx or save as official KITTI label format if you use --pickle_result=False. - -### pretrained model - -Before using pretrained model, you need to modify some file in SparseConvNet because the pretrained model doesn't support SparseConvNet master: - -* convolution.py -```Python -# self.weight = Parameter(torch.Tensor( -# self.filter_volume, nIn, nOut).normal_( -# 0, -# std)) -self.weight = Parameter(torch.Tensor( - self.filter_volume * nIn, nOut).normal_( - 0, - std)) -# ... -# output.features = ConvolutionFunction.apply( -# input.features, -# self.weight, -output.features = ConvolutionFunction.apply( - input.features, - self.weight.view(self.filter_volume, self.nIn, self.nOut), -``` - -* submanifoldConvolution.py -```Python -# self.weight = Parameter(torch.Tensor( -# self.filter_volume, nIn, nOut).normal_( -# 0, -# std)) -self.weight = Parameter(torch.Tensor( - self.filter_volume * nIn, nOut).normal_( - 0, - std)) -# ... -# output.features = SubmanifoldConvolutionFunction.apply( -# input.features, -# self.weight, -output.features = SubmanifoldConvolutionFunction.apply( - input.features, - self.weight.view(self.filter_volume, self.nIn, self.nOut), -``` - -You can download pretrained models in [google drive](https://drive.google.com/open?id=1eblyuILwbxkJXfIP5QlALW5N_x5xJZhL). The car model is corresponding to car.config, the car_tiny model is corresponding to car.tiny.config and the people model is corresponding to people.config. - -## Docker - -You can use a prebuilt docker for testing: +cd ~/second.pytorch/second/ +python pytorch/train.py evaluate --config_path= configs/pointpillars/car/xyres_16.proto --model_dir=/path/to/model_dir ``` -docker pull scrin/second-pytorch -``` -Then run: -``` -nvidia-docker run -it --rm -v /media/yy/960evo/datasets/:/root/data -v $HOME/pretrained_models:/root/model --ipc=host second-pytorch:latest -python ./pytorch/train.py evaluate --config_path=./configs/car.config --model_dir=/root/model/car -... -``` - -Currently there is a problem that training and evaluating in docker is very slow. - -## Try Kitti Viewer Web - -### Major step - -1. run ```python ./kittiviewer/backend.py main --port=xxxx``` in your server/local. - -2. run ```cd ./kittiviewer/frontend && python -m http.server``` to launch a local web server. - -3. open your browser and enter your frontend url (e.g. http://127.0.0.1:8000, default]). - -4. input backend url (e.g. http://127.0.0.1:16666) - -5. input root path, info path and det path (optional) - -6. click load, loadDet (optional), input image index in center bottom of screen and press Enter. - -### Inference step - -Firstly the load button must be clicked and load successfully. - -1. input checkpointPath and configPath. - -2. click buildNet. - -3. click inference. - -![GuidePic](https://raw.githubusercontent.com/traveller59/second.pytorch/master/images/viewerweb.png) - - - -## Try Kitti Viewer (Deprecated) - -You should use kitti viewer based on pyqt and pyqtgraph to check data before training. - -run ```python ./kittiviewer/viewer.py```, check following picture to use kitti viewer: -![GuidePic](https://raw.githubusercontent.com/traveller59/second.pytorch/master/images/simpleguide.png) - -## Concepts - - -* Kitti lidar box - -A kitti lidar box is consist of 7 elements: [x, y, z, w, l, h, rz], see figure. - -![Kitti Box Image](https://raw.githubusercontent.com/traveller59/second.pytorch/master/images/kittibox.png) - -All training and inference code use kitti box format. So we need to convert other format to KITTI format before training. - -* Kitti camera box -A kitti camera box is consist of 7 elements: [x, y, z, l, h, w, ry]. +* Detection result will saved in model_dir/eval_results/step_xxx. +* By default, results are stored as a result.pkl file. To save as official KITTI label format use --pickle_result=False. diff --git a/images/pointpillars_kitti_results.pdf b/images/pointpillars_kitti_results.pdf new file mode 100644 index 00000000..e49f0a5a Binary files /dev/null and b/images/pointpillars_kitti_results.pdf differ diff --git a/second/builder/dataset_builder.py b/second/builder/dataset_builder.py index 231e1f2c..bc6d2d4c 100644 --- a/second/builder/dataset_builder.py +++ b/second/builder/dataset_builder.py @@ -85,6 +85,7 @@ def build(input_reader_config, gt_loc_noise_std=list(cfg.groundtruth_localization_noise_std), global_rotation_noise=list(cfg.global_rotation_uniform_noise), global_scaling_noise=list(cfg.global_scaling_uniform_noise), + global_loc_noise_std=(0.2, 0.2, 0.2), global_random_rot_range=list( cfg.global_random_rotation_range_per_object), db_sampler=db_sampler, diff --git a/second/configs/pointpillars/README.md b/second/configs/pointpillars/README.md new file mode 100644 index 00000000..6c9cb8f7 --- /dev/null +++ b/second/configs/pointpillars/README.md @@ -0,0 +1,3 @@ +# PointPillars Configs + +The configuration files in these directories can be used to reproduce the results published in PointPillars. diff --git a/second/configs/pointpillars/car/xyres_16.proto b/second/configs/pointpillars/car/xyres_16.proto new file mode 100644 index 00000000..d2b7bdc1 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -39.68, -3, 69.12, 39.68, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -39.68, -5, 69.12, 39.68, 5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.6, 3.9, 1.56] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.16, -39.52, -1.78] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Car"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Car" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Car" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Car"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/car/xyres_20.proto b/second/configs/pointpillars/car/xyres_20.proto new file mode 100644 index 00000000..0b51cd3c --- /dev/null +++ b/second/configs/pointpillars/car/xyres_20.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -40, -3, 70.4, 40, 1] + voxel_size : [0.2, 0.2, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -40, -5, 70.4, 40, 5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.6, 3.9, 1.56] # wlh + strides: [0.4, 0.4, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.2, -39.8, -1.78] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Car"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Car" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Car" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Car"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/car/xyres_24.proto b/second/configs/pointpillars/car/xyres_24.proto new file mode 100644 index 00000000..6a402e72 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_24.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -40.32, -3, 71.04, 40.32, 1] + voxel_size : [0.24, 0.24, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -40.32, -5, 71.04, 40.32, 5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.6, 3.9, 1.56] # wlh + strides: [0.48, 0.48, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.24, -40.08, -1.78] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Car"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Car" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Car" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Car"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/car/xyres_28.proto b/second/configs/pointpillars/car/xyres_28.proto new file mode 100644 index 00000000..fc14bee2 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_28.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -40.32, -3, 71.68, 40.32, 1] + voxel_size : [0.28, 0.28, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -40.32, -5, 71.68, 40.32, 5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.6, 3.9, 1.56] # wlh + strides: [0.56, 0.56, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.28, -40.04, -1.78] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Car"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Car" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Car" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Car"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/ped_cycle/xyres_16.proto b/second/configs/pointpillars/ped_cycle/xyres_16.proto new file mode 100644 index 00000000..eb2abcf7 --- /dev/null +++ b/second/configs/pointpillars/ped_cycle/xyres_16.proto @@ -0,0 +1,197 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -19.84, -2.5, 47.36, 19.84, 0.5] + voxel_size : [0.16, 0.16, 3] + max_number_of_points_per_voxel : 100 + } + num_class: 2 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [1, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -19.84, -2.5, 47.36, 19.84, 0.5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 1.76, 1.73] # wlh + strides: [0.16, 0.16, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.08, -19.76, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 0.8, 1.73] # wlh + strides: [0.16, 0.16, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.08, -19.76, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Cyclist" + value: 8 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Cyclist" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/ped_cycle/xyres_20.proto b/second/configs/pointpillars/ped_cycle/xyres_20.proto new file mode 100644 index 00000000..816a33d3 --- /dev/null +++ b/second/configs/pointpillars/ped_cycle/xyres_20.proto @@ -0,0 +1,197 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -20, -2.5, 48, 20, 0.5] + voxel_size : [0.2, 0.2, 3] + max_number_of_points_per_voxel : 100 + } + num_class: 2 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [1, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -20, -2.5, 48, 20, 0.5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 1.76, 1.73] # wlh + strides: [0.2, 0.2, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.1, -19.9, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 0.8, 1.73] # wlh + strides: [0.2, 0.2, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.1, -19.9, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Cyclist" + value: 8 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Cyclist" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/ped_cycle/xyres_24.proto b/second/configs/pointpillars/ped_cycle/xyres_24.proto new file mode 100644 index 00000000..a89abbf5 --- /dev/null +++ b/second/configs/pointpillars/ped_cycle/xyres_24.proto @@ -0,0 +1,197 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -19.68, -2.5, 48, 19.68, 0.5] + voxel_size : [0.24, 0.24, 3] + max_number_of_points_per_voxel : 100 + } + num_class: 2 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [1, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -19.68, -2.5, 48, 19.68, 0.5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 1.76, 1.73] # wlh + strides: [0.24, 0.24, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.12, -19.56, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 0.8, 1.73] # wlh + strides: [0.24, 0.24, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.12, -19.56, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Cyclist" + value: 8 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Cyclist" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/configs/pointpillars/ped_cycle/xyres_28.proto b/second/configs/pointpillars/ped_cycle/xyres_28.proto new file mode 100644 index 00000000..c75a3fbf --- /dev/null +++ b/second/configs/pointpillars/ped_cycle/xyres_28.proto @@ -0,0 +1,197 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [0, -20.16, -2.5, 47.04, 20.16, 0.5] + voxel_size : [0.28, 0.28, 3] + max_number_of_points_per_voxel : 100 + } + num_class: 2 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [1, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [0, -20.16, -2.5, 47.04, 20.16, 0.5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 4 + without_reflectivity: false + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 1.76, 1.73] # wlh + strides: [0.28, 0.28, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.14, -20.02, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + anchor_generators: { + anchor_generator_stride: { + sizes: [0.6, 0.8, 1.73] # wlh + strides: [0.28, 0.28, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [0.14, -20.02, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + max_num_epochs : 160 + batch_size: 2 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "Cyclist" + value: 8 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "Cyclist" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_train.pkl" + kitti_root_path: "/data/sets/kitti_second" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 296960 # 1856 steps per epoch * 160 epochs + steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["Cyclist", "Pedestrian"] + batch_size: 2 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/data/sets/kitti_second/kitti_infos_val.pkl" + kitti_root_path: "/data/sets/kitti_second" +} diff --git a/second/core/preprocess.py b/second/core/preprocess.py index 3a80e4c1..98e0f41f 100644 --- a/second/core/preprocess.py +++ b/second/core/preprocess.py @@ -885,3 +885,20 @@ def box_collision_test(boxes, qboxes, clockwise=True): ret[i, j] = True # collision. return ret + +def global_translate(gt_boxes, points, noise_translate_std): + """ + Apply global translation to gt_boxes and points. + """ + + if not isinstance(noise_translate_std, (list, tuple, np.ndarray)): + noise_translate_std = np.array([noise_translate_std, noise_translate_std, noise_translate_std]) + + noise_translate = np.array([np.random.normal(0, noise_translate_std[0], 1), + np.random.normal(0, noise_translate_std[1], 1), + np.random.normal(0, noise_translate_std[0], 1)]).T + + points[:, :3] += noise_translate + gt_boxes[:, :3] += noise_translate + + return gt_boxes, points diff --git a/second/data/preprocess.py b/second/data/preprocess.py index 9e7f4a45..436f55dd 100644 --- a/second/data/preprocess.py +++ b/second/data/preprocess.py @@ -59,6 +59,7 @@ def prep_pointcloud(input_dict, gt_loc_noise_std=[1.0, 1.0, 1.0], global_rotation_noise=[-np.pi / 4, np.pi / 4], global_scaling_noise=[0.95, 1.05], + global_loc_noise_std=(0.2, 0.2, 0.2), global_random_rot_range=[0.78, 2.35], generate_bev=False, without_reflectivity=False, @@ -208,6 +209,9 @@ def prep_pointcloud(input_dict, gt_boxes, points = prep.global_scaling_v2(gt_boxes, points, *global_scaling_noise) + # Global translation + gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std) + bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) gt_boxes = gt_boxes[mask] diff --git a/second/pytorch/models/pointpillars.py b/second/pytorch/models/pointpillars.py new file mode 100644 index 00000000..3671080d --- /dev/null +++ b/second/pytorch/models/pointpillars.py @@ -0,0 +1,192 @@ +""" +PointPillars fork from SECOND. +Code written by Alex Lang and Oscar Beijbom, 2018. +Licensed under MIT License [see LICENSE]. +""" + +import torch +from torch import nn +from torch.nn import functional as F + +from second.pytorch.utils import get_paddings_indicator +from torchplus.nn import Empty +from torchplus.tools import change_default_args + + +class PFNLayer(nn.Module): + def __init__(self, + in_channels, + out_channels, + use_norm=True, + last_layer=False): + """ + Pillar Feature Net Layer. + The Pillar Feature Net could be composed of a series of these layers, but the PointPillars paper results only + used a single PFNLayer. This layer performs a similar role as second.pytorch.voxelnet.VFELayer. + :param in_channels: . Number of input channels. + :param out_channels: . Number of output channels. + :param use_norm: . Whether to include BatchNorm. + :param last_layer: . If last_layer, there is no concatenation of features. + """ + + super().__init__() + self.name = 'PFNLayer' + self.last_vfe = last_layer + if not self.last_vfe: + out_channels = out_channels // 2 + self.units = out_channels + + if use_norm: + BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d) + Linear = change_default_args(bias=False)(nn.Linear) + else: + BatchNorm1d = Empty + Linear = change_default_args(bias=True)(nn.Linear) + + self.linear = Linear(in_channels, self.units) + self.norm = BatchNorm1d(self.units) + + def forward(self, inputs): + + x = self.linear(inputs) + x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous() + x = F.relu(x) + + x_max = torch.max(x, dim=1, keepdim=True)[0] + + if self.last_vfe: + return x_max + else: + x_repeat = x_max.repeat(1, inputs.shape[1], 1) + x_concatenated = torch.cat([x, x_repeat], dim=2) + return x_concatenated + + +class PillarFeatureNet(nn.Module): + def __init__(self, + num_input_features=4, + use_norm=True, + num_filters=(64,), + with_distance=False, + voxel_size=(0.2, 0.2, 4), + pc_range=(0, -40, -3, 70.4, 40, 1)): + """ + Pillar Feature Net. + The network prepares the pillar features and performs forward pass through PFNLayers. This net performs a + similar role to SECOND's second.pytorch.voxelnet.VoxelFeatureExtractor. + :param num_input_features: . Number of input features, either x, y, z or x, y, z, r. + :param use_norm: . Whether to include BatchNorm. + :param num_filters: (: N). Number of features in each of the N PFNLayers. + :param with_distance: . Whether to include Euclidean distance to points. + :param voxel_size: (: 3). Size of voxels, only utilize x and y size. + :param pc_range: (: 6). Point cloud range, only utilize x and y min. + """ + + super().__init__() + self.name = 'PillarFeatureNet' + assert len(num_filters) > 0 + num_input_features += 5 + if with_distance: + num_input_features += 1 + self._with_distance = with_distance + + # Create PillarFeatureNet layers + num_filters = [num_input_features] + list(num_filters) + pfn_layers = [] + for i in range(len(num_filters) - 1): + in_filters = num_filters[i] + out_filters = num_filters[i + 1] + if i < len(num_filters) - 2: + last_layer = False + else: + last_layer = True + pfn_layers.append(PFNLayer(in_filters, out_filters, use_norm, last_layer=last_layer)) + self.pfn_layers = nn.ModuleList(pfn_layers) + + # Need pillar (voxel) size and x/y offset in order to calculate pillar offset + self.vx = voxel_size[0] + self.vy = voxel_size[1] + self.x_offset = self.vx / 2 + pc_range[0] + self.y_offset = self.vy / 2 + pc_range[1] + + def forward(self, features, num_voxels, coors): + + # Find distance of x, y, and z from cluster center + points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) + f_cluster = features[:, :, :3] - points_mean + + # Find distance of x, y, and z from pillar center + f_center = features[:, :, :2] + f_center[:, :, 0] = f_center[:, :, 0] - (coors[:, 3].float().unsqueeze(1) * self.vx + self.x_offset) + f_center[:, :, 1] = f_center[:, :, 1] - (coors[:, 2].float().unsqueeze(1) * self.vy + self.y_offset) + + # Combine together feature decorations + features_ls = [features, f_cluster, f_center] + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + features = torch.cat(features_ls, dim=-1) + + # The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that + # empty pillars remain set to zeros. + voxel_count = features.shape[1] + mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + + # Forward pass through PFNLayers + for pfn in self.pfn_layers: + features = pfn(features) + + return features.squeeze() + + +class PointPillarsScatter(nn.Module): + def __init__(self, + output_shape, + num_input_features=64): + """ + Point Pillar's Scatter. + Converts learned features from dense tensor to sparse pseudo image. This replaces SECOND's + second.pytorch.voxelnet.SparseMiddleExtractor. + :param output_shape: ([int]: 4). Required output shape of features. + :param num_input_features: . Number of input features. + """ + + super().__init__() + self.name = 'PointPillarsScatter' + self.output_shape = output_shape + self.ny = output_shape[2] + self.nx = output_shape[3] + self.nchannels = num_input_features + + def forward(self, voxel_features, coords, batch_size): + + # batch_canvas will be the final output. + batch_canvas = [] + for batch_itt in range(batch_size): + # Create the canvas for this sample + canvas = torch.zeros(self.nchannels, self.nx * self.ny, dtype=voxel_features.dtype, + device=voxel_features.device) + + # Only include non-empty pillars + batch_mask = coords[:, 0] == batch_itt + this_coords = coords[batch_mask, :] + indices = this_coords[:, 2] * self.nx + this_coords[:, 3] + indices = indices.type(torch.long) + voxels = voxel_features[batch_mask, :] + voxels = voxels.t() + + # Now scatter the blob back to the canvas. + canvas[:, indices] = voxels + + # Append to a list for later stacking. + batch_canvas.append(canvas) + + # Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols) + batch_canvas = torch.stack(batch_canvas, 0) + + # Undo the column stacking to final 4-dim tensor + batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx) + + return batch_canvas diff --git a/second/pytorch/models/voxelnet.py b/second/pytorch/models/voxelnet.py index f85d0db6..f113b34c 100644 --- a/second/pytorch/models/voxelnet.py +++ b/second/pytorch/models/voxelnet.py @@ -17,6 +17,8 @@ from second.pytorch.core.losses import (WeightedSigmoidClassificationLoss, WeightedSmoothL1LocalizationLoss, WeightedSoftmaxClassificationLoss) +from second.pytorch.models.pointpillars import PillarFeatureNet, PointPillarsScatter +from second.pytorch.utils import get_paddings_indicator def _get_pos_neg_loss(cls_loss, labels): @@ -36,30 +38,6 @@ def _get_pos_neg_loss(cls_loss, labels): return cls_pos_loss, cls_neg_loss -def get_paddings_indicator(actual_num, max_num, axis=0): - """Create boolean mask by actually number of a padded tensor. - - Args: - actual_num ([type]): [description] - max_num ([type]): [description] - - Returns: - [type]: [description] - """ - - actual_num = torch.unsqueeze(actual_num, axis + 1) - # tiled_actual_num: [N, M, 1] - max_num_shape = [1] * len(actual_num.shape) - max_num_shape[axis + 1] = -1 - max_num = torch.arange( - max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape) - # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]] - # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]] - paddings_indicator = actual_num.int() > max_num - # paddings_indicator shape: [batch_size, max_num] - return paddings_indicator - - class VFELayer(nn.Module): def __init__(self, in_channels, out_channels, use_norm=True, name='vfe'): super(VFELayer, self).__init__() @@ -121,7 +99,7 @@ def __init__(self, # var_torch_init(self.linear.bias) self.norm = BatchNorm1d(num_filters[1]) - def forward(self, features, num_voxels): + def forward(self, features, num_voxels, coors): # features: [concated_num_points, num_voxel_size, 3(4)] # num_voxels: [concated_num_points] points_mean = features[:, :, :3].sum( @@ -183,7 +161,7 @@ def __init__(self, # var_torch_init(self.linear.bias) self.norm = BatchNorm1d(num_filters[-1]) - def forward(self, features, num_voxels): + def forward(self, features, num_voxels, coors): # features: [concated_num_points, num_voxel_size, 3(4)] # num_voxels: [concated_num_points] points_mean = features[:, :, :3].sum( @@ -587,6 +565,7 @@ def __init__(self, vfe_class_dict = { "VoxelFeatureExtractor": VoxelFeatureExtractor, "VoxelFeatureExtractorV2": VoxelFeatureExtractorV2, + "PillarFeatureNet": PillarFeatureNet } vfe_class = vfe_class_dict[vfe_class_name] self.voxel_feature_extractor = vfe_class( @@ -594,24 +573,32 @@ def __init__(self, use_norm, num_filters=vfe_num_filters, with_distance=with_distance) - mid_class_dict = { - "MiddleExtractor": MiddleExtractor, - "SparseMiddleExtractor": SparseMiddleExtractor, - } - mid_class = mid_class_dict[middle_class_name] - self.middle_feature_extractor = mid_class( - output_shape, - use_norm, - num_input_features=vfe_num_filters[-1], - num_filters_down1=middle_num_filters_d1, - num_filters_down2=middle_num_filters_d2) - if len(middle_num_filters_d2) == 0: - if len(middle_num_filters_d1) == 0: - num_rpn_input_filters = vfe_num_filters[-1] - else: - num_rpn_input_filters = middle_num_filters_d1[-1] + + print("middle_class_name", middle_class_name) + if middle_class_name == "PointPillarsScatter": + self.middle_feature_extractor = PointPillarsScatter(output_shape=output_shape, + num_input_features=vfe_num_filters[-1]) + num_rpn_input_filters = self.middle_feature_extractor.nchannels else: - num_rpn_input_filters = middle_num_filters_d2[-1] + mid_class_dict = { + "MiddleExtractor": MiddleExtractor, + "SparseMiddleExtractor": SparseMiddleExtractor, + } + mid_class = mid_class_dict[middle_class_name] + self.middle_feature_extractor = mid_class( + output_shape, + use_norm, + num_input_features=vfe_num_filters[-1], + num_filters_down1=middle_num_filters_d1, + num_filters_down2=middle_num_filters_d2) + if len(middle_num_filters_d2) == 0: + if len(middle_num_filters_d1) == 0: + num_rpn_input_filters = int(vfe_num_filters[-1] * 2) + else: + num_rpn_input_filters = int(middle_num_filters_d1[-1] * 2) + else: + num_rpn_input_filters = int(middle_num_filters_d2[-1] * 2) + rpn_class_dict = { "RPN": RPN, } @@ -624,7 +611,7 @@ def __init__(self, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, - num_input_filters=num_rpn_input_filters * 2, + num_input_filters=num_rpn_input_filters, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, @@ -666,7 +653,7 @@ def forward(self, example): # features: [num_voxels, max_num_points_per_voxel, 7] # num_points: [num_voxels] # coors: [num_voxels, 4] - voxel_features = self.voxel_feature_extractor(voxels, num_points) + voxel_features = self.voxel_feature_extractor(voxels, num_points, coors) if self._use_sparse_rpn: preds_dict = self.sparse_rpn(voxel_features, coors, batch_size_dev) else: diff --git a/second/pytorch/train.py b/second/pytorch/train.py index 2d2b143c..4fcf0902 100644 --- a/second/pytorch/train.py +++ b/second/pytorch/train.py @@ -98,6 +98,8 @@ def train(config_path, model_dir = pathlib.Path(model_dir) model_dir.mkdir(parents=True, exist_ok=True) + eval_checkpoint_dir = model_dir / 'eval_checkpoints' + eval_checkpoint_dir.mkdir(parents=True, exist_ok=True) if result_path is None: result_path = model_dir / 'results' config_file_bkp = "pipeline.config" @@ -335,6 +337,10 @@ def _worker_init_fn(worker_id): total_step_elapsed += steps torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) + + # Ensure that all evaluation points are saved forever + torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer], net.get_global_step(), max_to_keep=100) + net.eval() result_path_step = result_path / f"step_{net.get_global_step()}" result_path_step.mkdir(parents=True, exist_ok=True) @@ -379,10 +385,20 @@ def _worker_init_fn(worker_id): ] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) - result = get_official_eval_result(gt_annos, dt_annos, class_names) + result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result(gt_annos, dt_annos, class_names, + return_data=True) print(result, file=logf) print(result) writer.add_text('eval_result', result, global_step) + + for i, class_name in enumerate(class_names): + writer.add_scalar('bev_ap:{}'.format(class_name), mAPbev[i, 1, 0], global_step) + writer.add_scalar('3d_ap:{}'.format(class_name), mAP3d[i, 1, 0], global_step) + writer.add_scalar('aos_ap:{}'.format(class_name), mAPaos[i, 1, 0], global_step) + writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step) + writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step) + writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step) + result = get_coco_eval_result(gt_annos, dt_annos, class_names) print(result, file=logf) print(result) diff --git a/second/pytorch/utils.py b/second/pytorch/utils.py new file mode 100644 index 00000000..e49cbadc --- /dev/null +++ b/second/pytorch/utils.py @@ -0,0 +1,25 @@ +import torch + + +def get_paddings_indicator(actual_num, max_num, axis=0): + """Create boolean mask by actually number of a padded tensor. + + Args: + actual_num ([type]): [description] + max_num ([type]): [description] + + Returns: + [type]: [description] + """ + + actual_num = torch.unsqueeze(actual_num, axis + 1) + # tiled_actual_num: [N, M, 1] + max_num_shape = [1] * len(actual_num.shape) + max_num_shape[axis + 1] = -1 + max_num = torch.arange( + max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape) + # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]] + # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]] + paddings_indicator = actual_num.int() > max_num + # paddings_indicator shape: [batch_size, max_num] + return paddings_indicator diff --git a/second/utils/eval.py b/second/utils/eval.py index bdf6265a..49c7421d 100644 --- a/second/utils/eval.py +++ b/second/utils/eval.py @@ -788,12 +788,12 @@ def get_official_eval_result_v1(gt_annos, dt_annos, current_class): return result -def get_official_eval_result(gt_annos, dt_annos, current_classes, difficultys=[0, 1, 2]): - overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7, - 0.5, 0.7, 0.7, 0.7], [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7], +def get_official_eval_result(gt_annos, dt_annos, current_classes, difficultys=[0, 1, 2], return_data=False): + overlap_0_7 = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7], + [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7], [0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7]]) - overlap_0_5 = np.array([[0.7, 0.5, 0.5, 0.7, - 0.5, 0.5, 0.5, 0.5], [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5], + overlap_0_5 = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5], + [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5], [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]]) min_overlaps = np.stack([overlap_0_7, overlap_0_5], axis=0) # [2, 3, 5] class_to_name = { @@ -847,8 +847,11 @@ def get_official_eval_result(gt_annos, dt_annos, current_classes, difficultys=[0 result += print_str((f"aos AP:{mAPaos[j, 0, i]:.2f}, " f"{mAPaos[j, 1, i]:.2f}, " f"{mAPaos[j, 2, i]:.2f}")) + if return_data: + return result, mAPbbox, mAPbev, mAP3d, mAPaos + else: + return result - return result def get_coco_eval_result(gt_annos, dt_annos, current_classes): class_to_name = {