From 34bf44e7f53c0865236fabab448fcde46b9a4618 Mon Sep 17 00:00:00 2001 From: Hoanh Le Date: Wed, 14 Aug 2024 13:49:39 +0300 Subject: [PATCH 1/6] add a way to orient based on main direction of the flat surface --- .../data/dataparsers/nerfstudio_dataparser.py | 126 +++++++++++++++--- 1 file changed, 107 insertions(+), 19 deletions(-) diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index e11902c094..088716cd2d 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -53,7 +53,7 @@ class NerfstudioDataParserConfig(DataParserConfig): """How much to downscale images. If not set, images are chosen such that the max dimension is <1600px.""" scene_scale: float = 1.0 """How much to scale the region of interest by.""" - orientation_method: Literal["pca", "up", "vertical", "none"] = "up" + orientation_method: Literal["pca", "up", "vertical", "none", "align"] = "up" """The method to use for orientation.""" center_method: Literal["poses", "focus", "none"] = "poses" """The method to use to center the poses.""" @@ -232,6 +232,8 @@ def _generate_dataparser_outputs(self, split="train"): CONSOLE.log(f"[yellow] Dataset is overriding orientation method to {orientation_method}") else: orientation_method = self.config.orientation_method + if orientation_method == "align": + orientation_method = "up" poses = torch.from_numpy(np.array(poses).astype(np.float32)) poses, transform_matrix = camera_utils.auto_orient_and_center_poses( @@ -298,22 +300,6 @@ def _generate_dataparser_outputs(self, split="train"): if (camera_type in [CameraType.FISHEYE, CameraType.FISHEYE624]) and (fisheye_crop_radius is not None): metadata["fisheye_crop_radius"] = fisheye_crop_radius - cameras = Cameras( - fx=fx, - fy=fy, - cx=cx, - cy=cy, - distortion_params=distortion_params, - height=height, - width=width, - camera_to_worlds=poses[:, :3, :4], - camera_type=camera_type, - metadata=metadata, - ) - - assert self.downscale_factor is not None - cameras.rescale_output_resolution(scaling_factor=1.0 / self.downscale_factor) - # The naming is somewhat confusing, but: # - transform_matrix contains the transformation to dataparser output coordinates from saved coordinates. # - dataparser_transform_matrix contains the transformation to dataparser output coordinates from original data coordinates. @@ -348,6 +334,9 @@ def _generate_dataparser_outputs(self, split="train"): except AttributeError: self.prompted_user = False + alignment_matrix = None + sparse_points = None + # Load 3D points if self.config.load_3D_points: if "ply_file_path" in meta: @@ -399,10 +388,45 @@ def _generate_dataparser_outputs(self, split="train"): if ply_file_path: sparse_points = self._load_3D_points(ply_file_path, transform_matrix, scale_factor) - if sparse_points is not None: - metadata.update(sparse_points) + + if orientation_method == "align": + points3D_xyz = sparse_points["points3D_xyz"] + aligned_points3D, alignment_matrix = self._align_points_to_target_plane( + points3D_xyz, torch.tensor([0, 1, 0], dtype=torch.float32) + ) + sparse_points["points3D_xyz"] = aligned_points3D[:, :3] self.prompted_user = True + if alignment_matrix is not None: + num_poses = poses.shape[0] + bottom_row = torch.tensor([0, 0, 0, 1], dtype=torch.float32).unsqueeze(0).expand(num_poses, -1, -1) + poses_homogeneous = torch.cat([poses, bottom_row], dim=1) # Shape: (num_poses, 4, 4) + + poses = alignment_matrix @ poses_homogeneous + dataparser_transform_matrix = torch.cat( + [dataparser_transform_matrix, torch.tensor([0, 0, 0, 1], dtype=torch.float32).unsqueeze(0)], dim=0 + ) + dataparser_transform_matrix = alignment_matrix @ dataparser_transform_matrix + + cameras = Cameras( + fx=fx, + fy=fy, + cx=cx, + cy=cy, + distortion_params=distortion_params, + height=height, + width=width, + camera_to_worlds=poses[:, :3, :4], + camera_type=camera_type, + metadata=metadata, + ) + + assert self.downscale_factor is not None + cameras.rescale_output_resolution(scaling_factor=1.0 / self.downscale_factor) + + if sparse_points is not None: + metadata.update(sparse_points) + dataparser_outputs = DataparserOutputs( image_filenames=image_filenames, cameras=cameras, @@ -458,6 +482,70 @@ def _load_3D_points(self, ply_file_path: Path, transform_matrix: torch.Tensor, s } return out + @staticmethod + def _align_points_to_target_plane(points: torch.Tensor, target_normal: torch.Tensor): + """Aligns a set of 3D points (in homogeneous coordinates) to a target plane defined by its normal vector. + + Args: + points: A torch tensor of shape (n, 4) representing the 3D points in homogeneous coordinates. + target_normal: A torch tensor of shape (3, ) representing the normal vector of the target plane. + + Returns: + A tuple containing: + - aligned_points: The 3D points aligned to the target plane as a torch tensor of shape (n, 4). + - alignment_matrix: The 4x4 alignment matrix used for alignment. + """ + points_xyz = points[:, :3] # Shape: (n, 3) + + # Calculate the centroid (mean of points) + centroid = torch.mean(points_xyz, dim=0) # Shape: (3,) + + # Center the points around the centroid + centered_points = points_xyz - centroid # Shape: (n, 3) + + # Perform SVD + _, _, vh = torch.linalg.svd(centered_points) # vh shape: (3, 3) + + # The last right singular vector is the normal to the plane + normal = vh[-1] # Shape: (3,) + + # Calculate the rotation axis and angle + rotation_axis = torch.cross(normal, target_normal) # Shape: (3,) + rotation_axis_norm = torch.norm(rotation_axis) + + if rotation_axis_norm != 0: + rotation_axis /= rotation_axis_norm + cos_theta = torch.dot(normal, target_normal) + theta = torch.arccos(cos_theta) + + # Create the rotation matrix using Rodrigues' rotation formula + K = torch.tensor( + [ + [0, -rotation_axis[2], rotation_axis[1]], + [rotation_axis[2], 0, -rotation_axis[0]], + [-rotation_axis[1], rotation_axis[0], 0], + ], + dtype=torch.float32, + ) + rotation_matrix = torch.eye(3) + torch.sin(theta) * K + (1 - torch.cos(theta)) * (K @ K) + else: + rotation_matrix = torch.eye(3) # If the normal is already aligned, no rotation needed + + # Create the 4x4 alignment matrix + alignment_matrix = torch.eye(4, dtype=torch.float32) # Shape: (4, 4) + alignment_matrix[:3, :3] = rotation_matrix # Insert rotation part + alignment_matrix[:3, 3] = -rotation_matrix @ centroid # Apply translation + + # Ensure the points are in homogeneous coordinates + if points.shape[1] == 3: + points = torch.cat([points, torch.ones((points.shape[0], 1), dtype=torch.float32)], dim=1) # Shape: (n, 4) + + # Apply the alignment transformation + aligned_points = alignment_matrix @ points.T # Shape: (4, n) + aligned_points = aligned_points.T # Shape: (n, 4) + + return aligned_points, alignment_matrix + def _get_fname(self, filepath: Path, data_dir: Path, downsample_folder_prefix="images_") -> Path: """Get the filename of the image file. downsample_folder_prefix can be used to point to auxiliary image data, e.g. masks From 50863bfc4327b1b2e1b59662af5f1cb3152efac1 Mon Sep 17 00:00:00 2001 From: Hoanh Le Date: Mon, 26 Aug 2024 13:36:43 +0300 Subject: [PATCH 2/6] add a way to align main flat surface --- .../data/dataparsers/base_dataparser.py | 2 +- .../data/dataparsers/nerfstudio_dataparser.py | 89 ++++++++++++------- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py index 5cf1e6bdbf..3e59cf2778 100644 --- a/nerfstudio/data/dataparsers/base_dataparser.py +++ b/nerfstudio/data/dataparsers/base_dataparser.py @@ -57,7 +57,7 @@ class DataparserOutputs: """Camera object storing collection of camera information in dataset.""" alpha_color: Optional[Float[Tensor, "3"]] = None """Color of dataset background.""" - scene_box: SceneBox = field(default_factory=lambda: SceneBox(aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]]))) + scene_box: SceneBox = SceneBox(aabb=torch.tensor([[-0.283, -0.032, -0.328], [0.661, 0.034, 0.370]])) """Scene box of dataset. Used to bound the scene or provide the scene scale depending on model.""" mask_filenames: Optional[List[Path]] = None """Filenames for any masks that are required""" diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 088716cd2d..350cab8dee 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -53,8 +53,16 @@ class NerfstudioDataParserConfig(DataParserConfig): """How much to downscale images. If not set, images are chosen such that the max dimension is <1600px.""" scene_scale: float = 1.0 """How much to scale the region of interest by.""" - orientation_method: Literal["pca", "up", "vertical", "none", "align"] = "up" + orientation_method: Literal[ + "pca", + "up", + "vertical", + "align", + "none", + ] = "vertical" """The method to use for orientation.""" + target_normal: Tuple[float, float, float] = (1.0, 0.0, 0.0) + """The normal vector to align the scene to, represented as a tuple of floats.""" center_method: Literal["poses", "focus", "none"] = "poses" """The method to use to center the poses.""" auto_scale_poses: bool = True @@ -260,6 +268,7 @@ def _generate_dataparser_outputs(self, split="train"): # in x,y,z order # assumes that the scene is centered at the origin + # _ = self.config.scene_scale aabb_scale = self.config.scene_scale scene_box = SceneBox( aabb=torch.tensor( @@ -389,10 +398,11 @@ def _generate_dataparser_outputs(self, split="train"): if ply_file_path: sparse_points = self._load_3D_points(ply_file_path, transform_matrix, scale_factor) - if orientation_method == "align": + if sparse_points is not None and self.config.orientation_method == "align": + target_normal_tensor = torch.tensor(self.config.target_normal, dtype=torch.float32) points3D_xyz = sparse_points["points3D_xyz"] aligned_points3D, alignment_matrix = self._align_points_to_target_plane( - points3D_xyz, torch.tensor([0, 1, 0], dtype=torch.float32) + points3D_xyz, target_normal_tensor ) sparse_points["points3D_xyz"] = aligned_points3D[:, :3] self.prompted_user = True @@ -483,53 +493,64 @@ def _load_3D_points(self, ply_file_path: Path, transform_matrix: torch.Tensor, s return out @staticmethod - def _align_points_to_target_plane(points: torch.Tensor, target_normal: torch.Tensor): + def _align_points_to_target_plane( + points: torch.Tensor, + target_normal: Tuple[float, float, float], + target_point: Tuple[float, float, float], + threshold: float = 1.0, + max_iterations: int = 5, + ) -> Tuple[torch.Tensor, torch.Tensor]: """Aligns a set of 3D points (in homogeneous coordinates) to a target plane defined by its normal vector. Args: points: A torch tensor of shape (n, 4) representing the 3D points in homogeneous coordinates. target_normal: A torch tensor of shape (3, ) representing the normal vector of the target plane. + threshold: The distance threshold for identifying inliers. + max_iterations: The maximum number of iterations for refining inliers. Returns: A tuple containing: - aligned_points: The 3D points aligned to the target plane as a torch tensor of shape (n, 4). - alignment_matrix: The 4x4 alignment matrix used for alignment. """ - points_xyz = points[:, :3] # Shape: (n, 3) - # Calculate the centroid (mean of points) - centroid = torch.mean(points_xyz, dim=0) # Shape: (3,) + def filter_outliers(points_xyz, threshold, max_iterations): + inlier_mask = torch.ones(points_xyz.size(0), dtype=torch.bool) + + for _ in range(max_iterations): + current_inliers = points_xyz[inlier_mask] + centroid = torch.mean(current_inliers, dim=0) + + centered_points = current_inliers - centroid + _, _, vh = torch.linalg.svd(centered_points) + + normal = vh[-1] + + distances = torch.abs((points_xyz - centroid) @ normal) + new_inlier_mask = distances < threshold + + inlier_mask = inlier_mask & new_inlier_mask - # Center the points around the centroid - centered_points = points_xyz - centroid # Shape: (n, 3) + threshold *= 0.9 # Reduce threshold for more aggressiveness - # Perform SVD - _, _, vh = torch.linalg.svd(centered_points) # vh shape: (3, 3) + return inlier_mask - # The last right singular vector is the normal to the plane + points_xyz = points[:, :3] # Shape: (n, 3) + inlier_mask = filter_outliers(points_xyz, threshold, max_iterations) + inliers = points_xyz[inlier_mask] + + # Calculate the centroid using only inliers + centroid = torch.mean(inliers, dim=0) # Shape: (3,) + + # Center the inlier points around the centroid + centered_inliers = inliers - centroid # Shape: (m, 3) where m <= n + + # Perform SVD on inliers to find the normal + _, _, vh = torch.linalg.svd(centered_inliers) # vh shape: (3, 3) normal = vh[-1] # Shape: (3,) - # Calculate the rotation axis and angle - rotation_axis = torch.cross(normal, target_normal) # Shape: (3,) - rotation_axis_norm = torch.norm(rotation_axis) - - if rotation_axis_norm != 0: - rotation_axis /= rotation_axis_norm - cos_theta = torch.dot(normal, target_normal) - theta = torch.arccos(cos_theta) - - # Create the rotation matrix using Rodrigues' rotation formula - K = torch.tensor( - [ - [0, -rotation_axis[2], rotation_axis[1]], - [rotation_axis[2], 0, -rotation_axis[0]], - [-rotation_axis[1], rotation_axis[0], 0], - ], - dtype=torch.float32, - ) - rotation_matrix = torch.eye(3) + torch.sin(theta) * K + (1 - torch.cos(theta)) * (K @ K) - else: - rotation_matrix = torch.eye(3) # If the normal is already aligned, no rotation needed + # Use the provided helper function to get the rotation matrix + rotation_matrix = camera_utils.rotation_matrix_between(normal, target_normal) # Create the 4x4 alignment matrix alignment_matrix = torch.eye(4, dtype=torch.float32) # Shape: (4, 4) @@ -540,7 +561,7 @@ def _align_points_to_target_plane(points: torch.Tensor, target_normal: torch.Ten if points.shape[1] == 3: points = torch.cat([points, torch.ones((points.shape[0], 1), dtype=torch.float32)], dim=1) # Shape: (n, 4) - # Apply the alignment transformation + # Apply the alignment transformation to all points aligned_points = alignment_matrix @ points.T # Shape: (4, n) aligned_points = aligned_points.T # Shape: (n, 4) From 39236adb105a38eb035208f8050992bf782d7ae7 Mon Sep 17 00:00:00 2001 From: Hoanh Le Date: Mon, 26 Aug 2024 13:37:47 +0300 Subject: [PATCH 3/6] use default param for scene box --- nerfstudio/data/dataparsers/base_dataparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py index 3e59cf2778..5cf1e6bdbf 100644 --- a/nerfstudio/data/dataparsers/base_dataparser.py +++ b/nerfstudio/data/dataparsers/base_dataparser.py @@ -57,7 +57,7 @@ class DataparserOutputs: """Camera object storing collection of camera information in dataset.""" alpha_color: Optional[Float[Tensor, "3"]] = None """Color of dataset background.""" - scene_box: SceneBox = SceneBox(aabb=torch.tensor([[-0.283, -0.032, -0.328], [0.661, 0.034, 0.370]])) + scene_box: SceneBox = field(default_factory=lambda: SceneBox(aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]]))) """Scene box of dataset. Used to bound the scene or provide the scene scale depending on model.""" mask_filenames: Optional[List[Path]] = None """Filenames for any masks that are required""" From 72006e14748f98480348e6e389f8d6d6f91c55cb Mon Sep 17 00:00:00 2001 From: Hoanh Le Date: Mon, 26 Aug 2024 13:39:42 +0300 Subject: [PATCH 4/6] annotate _load_3D_points output --- nerfstudio/data/dataparsers/nerfstudio_dataparser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 350cab8dee..09d40fc38f 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -453,7 +453,9 @@ def _generate_dataparser_outputs(self, split="train"): ) return dataparser_outputs - def _load_3D_points(self, ply_file_path: Path, transform_matrix: torch.Tensor, scale_factor: float): + def _load_3D_points( + self, ply_file_path: Path, transform_matrix: torch.Tensor, scale_factor: float + ) -> Optional[Dict[str, torch.Tensor]]: """Loads point clouds positions and colors from .ply Args: From d0c97f23737cd7c18f8f7eaf4545748e59e1d520 Mon Sep 17 00:00:00 2001 From: Hoanh Le Date: Mon, 26 Aug 2024 13:44:29 +0300 Subject: [PATCH 5/6] change arg type --- nerfstudio/data/dataparsers/nerfstudio_dataparser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 09d40fc38f..7d2d9d543c 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -497,8 +497,7 @@ def _load_3D_points( @staticmethod def _align_points_to_target_plane( points: torch.Tensor, - target_normal: Tuple[float, float, float], - target_point: Tuple[float, float, float], + target_normal: Float[Tensor, "3"], threshold: float = 1.0, max_iterations: int = 5, ) -> Tuple[torch.Tensor, torch.Tensor]: From a3fceabbb96f7096719cbb6af2662cf64dc893a5 Mon Sep 17 00:00:00 2001 From: Hoanh Le Date: Mon, 26 Aug 2024 13:52:12 +0300 Subject: [PATCH 6/6] add missing types --- nerfstudio/data/dataparsers/nerfstudio_dataparser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 7d2d9d543c..226449e5fd 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -17,11 +17,13 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Literal, Optional, Tuple, Type +from typing import Dict, Literal, Optional, Tuple, Type import numpy as np import torch +from jaxtyping import Float from PIL import Image +from torch import Tensor from nerfstudio.cameras import camera_utils from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType