initial code release

OpenGVLab · Nov 30, 2023 · 46a3192 · 46a3192
1 parent 88fcb8f
commit 46a3192
Show file tree

Hide file tree

Showing 104 changed files with 23,591 additions and 1 deletion.
diff --git a/.github/workflows/formatter.yml b/.github/workflows/formatter.yml
@@ -0,0 +1,20 @@
+name: Formatter
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  formatter:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 PonderV2
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -28,9 +28,28 @@ PonderV2 is a comprehensive 3D pre-training framework designed to facilitate the
     <img src="assets/pipeline.png" alt="pipeline" width="800" />
 </p>
 
-## Highlights:
+## News:
+- *Nov. 2023*: [**Model files**](./ponder/models/ponder/) are released! Usage instructions, complete codes and checkpoints are coming soon!
 - *Oct. 2023*: **PonderV2** is released on [arXiv](https://arxiv.org/abs/2310.08586), code will be made public and supported by [Pointcept](https://github.com/Pointcept/Pointcept) soon.
 
+## Example Usage:
+Pre-train PonderV2 on single Structured3D dataset with 8 GPUs:
+```bash
+bash scripts/train.sh -g 8 -d s3dis -c pretrain-ponder-spunet-v1m1-0-base -n ponderv2-pretrain
+```
+
+More detailed instructions on installation, data pre-processing, pre-training and finetuning will come soon!
+
+For more outdoor pre-training and downstream information, you can also refer to [UniPAD](https://github.com/Nightmare-n/UniPAD). 
+
+## Todo:
+- [ ] add instructions on installation and usage
+- [ ] add ScanNet w. RGB-D dataloader and data pre-processing scripts
+- [ ] add multi-dataset loader and trainer
+- [ ] add multi-dataset point prompt training model
+- [ ] add more pre-training and finetuning scripts
+- [ ] add pre-trained checkpoints
+
 ## Citation
 ```bib
 @misc{zhu2023ponderv2,

diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py
@@ -0,0 +1,37 @@
+weight = None  # path to model weight
+resume = False  # whether to resume training process
+evaluate = True  # evaluate after each epoch training process
+test_only = False  # test process
+
+seed = None  # train process will init a random seed and record
+save_path = "exp/default"
+num_worker = 16  # total worker in all gpu
+batch_size = 16  # total batch size in all gpu
+batch_size_val = None  # auto adapt to bs 1 for each gpu
+batch_size_test = None  # auto adapt to bs 1 for each gpu
+epoch = 100  # total epoch, data loop = epoch // eval_epoch
+eval_epoch = 100  # sche total eval & checkpoint epoch
+
+sync_bn = False
+enable_amp = False
+empty_cache = False
+find_unused_parameters = False
+
+mix_prob = 0
+param_dicts = None  # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
+
+# hook
+hooks = [
+    dict(type="CheckpointLoader"),
+    dict(type="IterationTimer", warmup_iter=2),
+    dict(type="InformationWriter"),
+    dict(type="SemSegEvaluator"),
+    dict(type="CheckpointSaver", save_freq=None),
+    dict(type="PreciseEvaluator", test_last=False),
+]
+
+# Trainer
+train = dict(type="DefaultTrainer")
+
+# Tester
+test = dict(type="SemSegTester", verbose=True)
diff --git a/configs/nuscenes/pretrain-ponder-spunet-v1m1-0-base-color-amp.py b/configs/nuscenes/pretrain-ponder-spunet-v1m1-0-base-color-amp.py
@@ -0,0 +1,272 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+num_gpu = 4
+# misc custom setting
+batch_size = 4 * num_gpu  # bs: total bs in all gpus
+num_worker = 8 * num_gpu
+
+mix_prob = 0
+empty_cache = True
+enable_amp = True
+evaluate = False
+find_unused_parameters = True
+
+# trainer
+train = dict(
+    type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+    type="PonderOutdoor-v2",
+    mask=dict(ratio=0.8, size=8, channel=4),
+    backbone=dict(
+        type="SpUNet-v1m1",
+        in_channels=4,
+        num_classes=0,
+        channels=(32, 64, 128, 256, 256, 128, 96, 96),
+        layers=(2, 3, 4, 6, 2, 2, 2, 2),
+    ),
+    projection=dict(
+        type="SimpleConv3D-v1m1",
+        in_channels=96,
+        out_channels=32,
+    ),
+    renderer=dict(
+        type="NeuSModel",
+        field=dict(
+            type="SDFField",
+            sdf_decoder=dict(
+                in_dim=32,
+                out_dim=16 + 1,
+                hidden_size=16,
+                n_blocks=5,
+            ),
+            rgb_decoder=dict(
+                in_dim=32 + 16 + 3 + 3,
+                out_dim=3,
+                hidden_size=16,
+                n_blocks=3,
+            ),
+            beta_init=0.3,
+            use_gradient=True,
+            volume_type="default",
+            padding_mode="zeros",
+            share_volume=True,
+        ),
+        collider=dict(
+            type="AABBBoxCollider",
+            near_plane=0.01,
+            bbox=[0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
+        ),
+        sampler=dict(
+            type="NeuSSampler",
+            initial_sampler="UniformSampler",
+            num_samples=72,
+            num_samples_importance=24,
+            num_upsample_steps=1,
+            train_stratified=True,
+            single_jitter=False,
+        ),
+        loss=dict(
+            sensor_depth_truncation=0.01,
+            weights=dict(
+                depth_loss=10.0,
+                rgb_loss=10.0,
+            ),
+        ),
+    ),
+    scene_bbox=((-54.0, -54.0, -5.0, 54.0, 54.0, 3.0),),
+    grid_shape=((180, 180, 5),),
+    grid_size=((0.6, 0.6, 1.6),),
+    val_ray_split=8192,
+    pool_type="mean",
+    share_volume=True,
+    render_semantic=False,
+    conditions=("nuScenes",),
+    template="[x]",
+    clip_model="ViT-B/16",
+    # fmt: off
+    class_name=(
+        # nuScenes
+        "barrier", "bicycle", "bus", "car", "construction vehicle",
+        "motorcycle", "pedestrian", "traffic cone", "trailer", "truck",
+        "path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation",
+    ),
+    valid_index=(
+        [i for i in range(16)],
+    ),
+)
+
+# scheduler settings
+epoch = 24
+eval_epoch = 24
+optimizer = dict(type="AdamW", lr=0.0002, weight_decay=0.01)
+scheduler = dict(
+    type="OneCycleLR",
+    max_lr=optimizer["lr"],
+    pct_start=0.4,
+    anneal_strategy="cos",
+    div_factor=10.0,
+    final_div_factor=100.0,
+)
+
+data = dict(
+    num_classes=16,
+    ignore_index=-1,
+    names=[
+        "barrier",
+        "bicycle",
+        "bus",
+        "car",
+        "construction_vehicle",
+        "motorcycle",
+        "pedestrian",
+        "traffic_cone",
+        "trailer",
+        "truck",
+        "driveable_surface",
+        "other_flat",
+        "sidewalk",
+        "terrain",
+        "manmade",
+        "vegetation",
+    ],
+    train=dict(
+        type="ConcatDataset",
+        datasets=[
+            # nuScenes
+            dict(
+                type="NuScenesDataset",
+                split="train",
+                data_root="data/nuscenes",
+                transform=[
+                    dict(
+                        type="RandomRotate",
+                        angle=[-0.25, 0.25],
+                        axis="z",
+                        center=[0, 0, 0],
+                        p=0.5,
+                        keys=["lidar2img", "lidar2cam"],
+                    ),
+                    dict(
+                        type="RandomScale",
+                        scale=[0.9, 1.1],
+                        anisotropic=False,
+                        keys=["lidar2img", "lidar2cam"],
+                    ),
+                    dict(
+                        type="RandomShift",
+                        shift=[0.5, 0.5, 0.5],
+                        keys=["lidar2img", "lidar2cam"],
+                    ),
+                    dict(
+                        type="RandomFlip",
+                        p=0.5,
+                        keys=["lidar2img", "lidar2cam"],
+                    ),
+                    dict(
+                        type="PointRangeFilter",
+                        point_cloud_range=(-54.0, -54.0, -5.0, 54.0, 54.0, 3.0),
+                        padding=0.1,
+                    ),
+                    dict(
+                        type="GridSample",
+                        grid_size=0.1,
+                        hash_type="ravel",
+                        mode="train",
+                        keys=("coord", "strength", "segment"),
+                        return_grid_coord=True,
+                    ),
+                    dict(
+                        type="ProjectOnImage",
+                        filter_overlap=True,
+                        close_radius=3.0,
+                    ),
+                    dict(
+                        type="RaySample",
+                        point_nsample=512,
+                        fetch_color=True,
+                        fetch_segment=True,
+                    ),
+                    dict(type="Add", keys_dict={"condition": "nuScenes"}),
+                    dict(type="ToTensor"),
+                    dict(
+                        type="Collect",
+                        keys=(
+                            "coord",
+                            "grid_coord",
+                            "segment",
+                            "condition",
+                            "ray_start",
+                            "ray_end",
+                            "ray_segment",
+                            "ray_color",
+                        ),
+                        offset_keys_dict=dict(offset="coord", ray_offset="ray_start"),
+                        stack_keys=("lidar2img", "lidar2cam", "cam_intrinsic"),
+                        feat_keys=("coord", "strength"),
+                    ),
+                ],
+                test_mode=False,
+                ignore_index=-1,
+                loop=1,
+                use_camera=True,
+            ),
+        ],
+    ),
+    val=dict(
+        type="NuScenesDataset",
+        split="val",
+        data_root="data/nuscenes",
+        transform=[
+            dict(
+                type="GridSample",
+                grid_size=0.1,
+                hash_type="ravel",
+                mode="train",
+                keys=("coord", "strength", "segment"),
+                return_grid_coord=True,
+            ),
+            dict(
+                type="ProjectOnImage",
+                filter_overlap=True,
+                close_radius=3.0,
+            ),
+            dict(
+                type="RaySample",
+                point_nsample=512,
+                fetch_color=True,
+                fetch_segment=True,
+            ),
+            dict(type="Add", keys_dict={"condition": "nuScenes"}),
+            dict(type="ToTensor"),
+            dict(
+                type="Collect",
+                keys=(
+                    "coord",
+                    "grid_coord",
+                    "segment",
+                    "condition",
+                    "ray_start",
+                    "ray_end",
+                    "ray_segment",
+                    "ray_color",
+                ),
+                offset_keys_dict=dict(offset="coord", ray_offset="ray_start"),
+                stack_keys=("lidar2img", "lidar2cam", "cam_intrinsic"),
+                feat_keys=("coord", "strength"),
+            ),
+        ],
+        test_mode=False,
+        ignore_index=-1,
+        use_camera=True,
+    ),
+)
+
+hooks = [
+    dict(type="CheckpointLoader"),
+    dict(type="IterationTimer", warmup_iter=2),
+    dict(type="InformationWriter"),
+    dict(type="CheckpointSaver", save_freq=None),
+]