Skip to content

Commit 65f667f

Browse files
bottlerfacebook-github-bot
authored andcommitted
loading llff and blender datasets
Summary: Copy code from NeRF for loading LLFF data and blender synthetic data, and create dataset objects for them Reviewed By: shapovalov Differential Revision: D35581039 fbshipit-source-id: af7a6f3e9a42499700693381b5b147c991f57e5d
1 parent 7978ffd commit 65f667f

16 files changed

+972
-47
lines changed

LICENSE-3RD-PARTY

+23
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,26 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4646
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
4747
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4848
SOFTWARE.
49+
50+
51+
NeRF https://github.com/bmild/nerf/
52+
53+
Copyright (c) 2020 bmild
54+
55+
Permission is hereby granted, free of charge, to any person obtaining a copy
56+
of this software and associated documentation files (the "Software"), to deal
57+
in the Software without restriction, including without limitation the rights
58+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
59+
copies of the Software, and to permit persons to whom the Software is
60+
furnished to do so, subject to the following conditions:
61+
62+
The above copyright notice and this permission notice shall be included in all
63+
copies or substantial portions of the Software.
64+
65+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
66+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
67+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
68+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
69+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
70+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
71+
SOFTWARE.

projects/implicitron_trainer/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Implicitron is a PyTorch3D-based framework for new-view synthesis via modeling t
55
# License
66

77
Implicitron is distributed as part of PyTorch3D under the [BSD license](https://github.com/facebookresearch/pytorch3d/blob/main/LICENSE).
8-
It includes code from [SRN](http://github.com/vsitzmann/scene-representation-networks) and [IDR](http://github.com/lioryariv/idr) repos.
8+
It includes code from the [NeRF](https://github.com/bmild/nerf), [SRN](http://github.com/vsitzmann/scene-representation-networks) and [IDR](http://github.com/lioryariv/idr) repos.
99
See [LICENSE-3RD-PARTY](https://github.com/facebookresearch/pytorch3d/blob/main/LICENSE-3RD-PARTY) for their licenses.
1010

1111

projects/implicitron_trainer/experiment.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def trainvalidate(
315315
epoch,
316316
loader,
317317
optimizer,
318-
validation,
318+
validation: bool,
319319
bp_var: str = "objective",
320320
metric_print_interval: int = 5,
321321
visualize_interval: int = 100,

projects/implicitron_trainer/tests/experiment.yaml

+47-33
Original file line numberDiff line numberDiff line change
@@ -95,18 +95,18 @@ generic_model_args:
9595
append_coarse_samples_to_fine: true
9696
density_noise_std_train: 0.0
9797
return_weights: false
98-
raymarcher_EmissionAbsorptionRaymarcher_args:
98+
raymarcher_CumsumRaymarcher_args:
9999
surface_thickness: 1
100100
bg_color:
101101
- 0.0
102-
background_opacity: 10000000000.0
102+
background_opacity: 0.0
103103
density_relu: true
104104
blend_output: false
105-
raymarcher_CumsumRaymarcher_args:
105+
raymarcher_EmissionAbsorptionRaymarcher_args:
106106
surface_thickness: 1
107107
bg_color:
108108
- 0.0
109-
background_opacity: 0.0
109+
background_opacity: 10000000000.0
110110
density_relu: true
111111
blend_output: false
112112
renderer_SignedDistanceFunctionRenderer_args:
@@ -157,17 +157,12 @@ generic_model_args:
157157
view_sampler_args:
158158
masked_sampling: false
159159
sampling_mode: bilinear
160-
feature_aggregator_IdentityFeatureAggregator_args:
161-
exclude_target_view: true
162-
exclude_target_view_mask_features: true
163-
concatenate_output: true
164-
feature_aggregator_ReductionFeatureAggregator_args:
160+
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
165161
exclude_target_view: true
166162
exclude_target_view_mask_features: true
167163
concatenate_output: true
168-
reduction_functions:
169-
- AVG
170-
- STD
164+
weight_by_ray_angle_gamma: 1.0
165+
min_ray_angle_weight: 0.1
171166
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
172167
exclude_target_view: true
173168
exclude_target_view_mask_features: true
@@ -177,12 +172,17 @@ generic_model_args:
177172
- STD
178173
weight_by_ray_angle_gamma: 1.0
179174
min_ray_angle_weight: 0.1
180-
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
175+
feature_aggregator_IdentityFeatureAggregator_args:
181176
exclude_target_view: true
182177
exclude_target_view_mask_features: true
183178
concatenate_output: true
184-
weight_by_ray_angle_gamma: 1.0
185-
min_ray_angle_weight: 0.1
179+
feature_aggregator_ReductionFeatureAggregator_args:
180+
exclude_target_view: true
181+
exclude_target_view_mask_features: true
182+
concatenate_output: true
183+
reduction_functions:
184+
- AVG
185+
- STD
186186
implicit_function_IdrFeatureField_args:
187187
feature_vector_size: 3
188188
d_in: 3
@@ -203,42 +203,44 @@ generic_model_args:
203203
n_harmonic_functions_xyz: 0
204204
pooled_feature_dim: 0
205205
encoding_dim: 0
206-
implicit_function_NeuralRadianceFieldImplicitFunction_args:
206+
implicit_function_NeRFormerImplicitFunction_args:
207207
n_harmonic_functions_xyz: 10
208208
n_harmonic_functions_dir: 4
209209
n_hidden_neurons_dir: 128
210210
latent_dim: 0
211211
input_xyz: true
212212
xyz_ray_dir_in_camera_coords: false
213213
color_dim: 3
214-
transformer_dim_down_factor: 1.0
215-
n_hidden_neurons_xyz: 256
216-
n_layers_xyz: 8
214+
transformer_dim_down_factor: 2.0
215+
n_hidden_neurons_xyz: 80
216+
n_layers_xyz: 2
217217
append_xyz:
218-
- 5
219-
implicit_function_NeRFormerImplicitFunction_args:
218+
- 1
219+
implicit_function_NeuralRadianceFieldImplicitFunction_args:
220220
n_harmonic_functions_xyz: 10
221221
n_harmonic_functions_dir: 4
222222
n_hidden_neurons_dir: 128
223223
latent_dim: 0
224224
input_xyz: true
225225
xyz_ray_dir_in_camera_coords: false
226226
color_dim: 3
227-
transformer_dim_down_factor: 2.0
228-
n_hidden_neurons_xyz: 80
229-
n_layers_xyz: 2
227+
transformer_dim_down_factor: 1.0
228+
n_hidden_neurons_xyz: 256
229+
n_layers_xyz: 8
230230
append_xyz:
231-
- 1
232-
implicit_function_SRNImplicitFunction_args:
233-
raymarch_function_args:
231+
- 5
232+
implicit_function_SRNHyperNetImplicitFunction_args:
233+
hypernet_args:
234234
n_harmonic_functions: 3
235235
n_hidden_units: 256
236236
n_layers: 2
237+
n_hidden_units_hypernet: 256
238+
n_layers_hypernet: 1
237239
in_features: 3
238240
out_features: 256
241+
latent_dim_hypernet: 0
239242
latent_dim: 0
240243
xyz_in_camera_coords: false
241-
raymarch_function: null
242244
pixel_generator_args:
243245
n_harmonic_functions: 4
244246
n_hidden_units: 256
@@ -247,18 +249,16 @@ generic_model_args:
247249
in_features: 256
248250
out_features: 3
249251
ray_dir_in_camera_coords: false
250-
implicit_function_SRNHyperNetImplicitFunction_args:
251-
hypernet_args:
252+
implicit_function_SRNImplicitFunction_args:
253+
raymarch_function_args:
252254
n_harmonic_functions: 3
253255
n_hidden_units: 256
254256
n_layers: 2
255-
n_hidden_units_hypernet: 256
256-
n_layers_hypernet: 1
257257
in_features: 3
258258
out_features: 256
259-
latent_dim_hypernet: 0
260259
latent_dim: 0
261260
xyz_in_camera_coords: false
261+
raymarch_function: null
262262
pixel_generator_args:
263263
n_harmonic_functions: 4
264264
n_hidden_units: 256
@@ -282,6 +282,13 @@ solver_args:
282282
data_source_args:
283283
dataset_map_provider_class_type: ???
284284
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
285+
dataset_map_provider_BlenderDatasetMapProvider_args:
286+
base_dir: ???
287+
object_name: ???
288+
path_manager_factory_class_type: PathManagerFactory
289+
n_known_frames_for_test: null
290+
path_manager_factory_PathManagerFactory_args:
291+
silence_logs: true
285292
dataset_map_provider_JsonIndexDatasetMapProvider_args:
286293
category: ???
287294
task_str: singlesequence
@@ -317,6 +324,13 @@ data_source_args:
317324
sort_frames: false
318325
path_manager_factory_PathManagerFactory_args:
319326
silence_logs: true
327+
dataset_map_provider_LlffDatasetMapProvider_args:
328+
base_dir: ???
329+
object_name: ???
330+
path_manager_factory_class_type: PathManagerFactory
331+
n_known_frames_for_test: null
332+
path_manager_factory_PathManagerFactory_args:
333+
silence_logs: true
320334
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
321335
batch_size: 1
322336
num_workers: 0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
8+
import torch
9+
from pytorch3d.implicitron.tools.config import registry
10+
11+
from .load_blender import load_blender_data
12+
from .single_sequence_dataset import (
13+
_interpret_blender_cameras,
14+
SingleSceneDatasetMapProviderBase,
15+
)
16+
17+
18+
@registry.register
19+
class BlenderDatasetMapProvider(SingleSceneDatasetMapProviderBase):
20+
"""
21+
Provides data for one scene from Blender synthetic dataset.
22+
Uses the code in load_blender.py
23+
24+
Members:
25+
base_dir: directory holding the data for the scene.
26+
object_name: The name of the scene (e.g. "lego"). This is just used as a label.
27+
It will typically be equal to the name of the directory self.base_dir.
28+
path_manager_factory: Creates path manager which may be used for
29+
interpreting paths.
30+
n_known_frames_for_test: If set, training frames are included in the val
31+
and test datasets, and this many random training frames are added to
32+
each test batch. If not set, test batches each contain just a single
33+
testing frame.
34+
"""
35+
36+
def _load_data(self) -> None:
37+
path_manager = self.path_manager_factory.get()
38+
images, poses, _, hwf, i_split = load_blender_data(
39+
self.base_dir,
40+
testskip=1,
41+
path_manager=path_manager,
42+
)
43+
H, W, focal = hwf
44+
H, W = int(H), int(W)
45+
images = torch.from_numpy(images)
46+
47+
# pyre-ignore[16]
48+
self.poses = _interpret_blender_cameras(poses, H, W, focal)
49+
# pyre-ignore[16]
50+
self.images = images
51+
# pyre-ignore[16]
52+
self.i_split = i_split

pytorch3d/implicitron/dataset/data_source.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88

99
from pytorch3d.implicitron.tools.config import ReplaceableBase, run_auto_creation
1010

11-
from . import json_index_dataset_map_provider # noqa
11+
from .blender_dataset_map_provider import BlenderDatasetMapProvider # noqa
1212
from .data_loader_map_provider import DataLoaderMap, DataLoaderMapProviderBase
1313
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, Task
14+
from .json_index_dataset_map_provider import JsonIndexDatasetMapProvider # noqa
15+
from .llff_dataset_map_provider import LlffDatasetMapProvider # noqa
1416

1517

1618
class DataSourceBase(ReplaceableBase):

pytorch3d/implicitron/dataset/dataset_base.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ class FrameData(Mapping[str, Any]):
3636
Args:
3737
frame_number: The number of the frame within its sequence.
3838
0-based continuous integers.
39-
frame_timestamp: The time elapsed since the start of a sequence in sec.
4039
sequence_name: The unique name of the frame's sequence.
4140
sequence_category: The object category of the sequence.
42-
image_size_hw: The size of the image in pixels; (height, width) tuple.
41+
frame_timestamp: The time elapsed since the start of a sequence in sec.
42+
image_size_hw: The size of the image in pixels; (height, width) tensor
43+
of shape (2,).
4344
image_path: The qualified path to the loaded image (with dataset_root).
4445
image_rgb: A Tensor of shape `(3, H, W)` holding the RGB image
4546
of the frame; elements are floats in [0, 1].
@@ -81,9 +82,9 @@ class FrameData(Mapping[str, Any]):
8182
"""
8283

8384
frame_number: Optional[torch.LongTensor]
84-
frame_timestamp: Optional[torch.Tensor]
8585
sequence_name: Union[str, List[str]]
8686
sequence_category: Union[str, List[str]]
87+
frame_timestamp: Optional[torch.Tensor] = None
8788
image_size_hw: Optional[torch.Tensor] = None
8889
image_path: Union[str, List[str], None] = None
8990
image_rgb: Optional[torch.Tensor] = None
@@ -101,7 +102,7 @@ class FrameData(Mapping[str, Any]):
101102
sequence_point_cloud_path: Union[str, List[str], None] = None
102103
sequence_point_cloud: Optional[Pointclouds] = None
103104
sequence_point_cloud_idx: Optional[torch.Tensor] = None
104-
frame_type: Union[str, List[str], None] = None # seen | unseen
105+
frame_type: Union[str, List[str], None] = None # known | unseen
105106
meta: dict = field(default_factory=lambda: {})
106107

107108
def to(self, *args, **kwargs):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
8+
import numpy as np
9+
import torch
10+
from pytorch3d.implicitron.tools.config import registry
11+
12+
from .load_llff import load_llff_data
13+
14+
from .single_sequence_dataset import (
15+
_interpret_blender_cameras,
16+
SingleSceneDatasetMapProviderBase,
17+
)
18+
19+
20+
@registry.register
21+
class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase):
22+
"""
23+
Provides data for one scene from the LLFF dataset.
24+
25+
Members:
26+
base_dir: directory holding the data for the scene.
27+
object_name: The name of the scene (e.g. "fern"). This is just used as a label.
28+
It will typically be equal to the name of the directory self.base_dir.
29+
path_manager_factory: Creates path manager which may be used for
30+
interpreting paths.
31+
n_known_frames_for_test: If set, training frames are included in the val
32+
and test datasets, and this many random training frames are added to
33+
each test batch. If not set, test batches each contain just a single
34+
testing frame.
35+
"""
36+
37+
def _load_data(self) -> None:
38+
path_manager = self.path_manager_factory.get()
39+
images, poses, _ = load_llff_data(
40+
self.base_dir, factor=8, path_manager=path_manager
41+
)
42+
hwf = poses[0, :3, -1]
43+
poses = poses[:, :3, :4]
44+
45+
i_test = np.arange(images.shape[0])[::8]
46+
i_test_index = set(i_test.tolist())
47+
i_train = np.array(
48+
[i for i in np.arange(images.shape[0]) if i not in i_test_index]
49+
)
50+
i_split = (i_train, i_test, i_test)
51+
H, W, focal = hwf
52+
H, W = int(H), int(W)
53+
images = torch.from_numpy(images)
54+
poses = torch.from_numpy(poses)
55+
56+
# pyre-ignore[16]
57+
self.poses = _interpret_blender_cameras(poses, H, W, focal)
58+
# pyre-ignore[16]
59+
self.images = images
60+
# pyre-ignore[16]
61+
self.i_split = i_split

0 commit comments

Comments
 (0)