Skip to content

Commit

Permalink
Merge pull request #6 from hbb1/align
Browse files Browse the repository at this point in the history
Better camera alignment to 3DGS
  • Loading branch information
hbb1 authored May 13, 2024
2 parents 661d224 + 7a3657a commit 362a17a
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 138 deletions.
152 changes: 71 additions & 81 deletions cuda_rasterizer/backward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ renderCUDA(
const uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) };
const uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y };
const uint32_t pix_id = W * pix.y + pix.x;
const float2 pixf = { (float)pix.x + 0.5, (float)pix.y + 0.5};
const float2 pixf = {(float)pix.x, (float)pix.y};

const bool inside = pix.x < W&& pix.y < H;
const uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x];
Expand Down Expand Up @@ -453,79 +453,66 @@ inline __device__ void computeTransMat(
const glm::vec4 & quat,
const glm::vec2 & scale,
const float* viewmat,
const float4 & intrins,
float tan_fovx,
float tan_fovy,
const float* projmat,
const int W,
const int H,
const float* transMat,
const float* dL_dtransMat,
const float* dL_dnormal3D,
glm::vec3 & dL_dmean3D,
glm::vec2 & dL_dscale,
glm::vec4 & dL_drot
) {
// camera information
const glm::mat3 W = glm::mat3(
viewmat[0],viewmat[1],viewmat[2],
viewmat[4],viewmat[5],viewmat[6],
viewmat[8],viewmat[9],viewmat[10]
); // viewmat

const glm::vec3 cam_pos = glm::vec3(viewmat[12], viewmat[13], viewmat[14]); // camera center
const glm::mat4 P = glm::mat4(
intrins.x, 0.0, 0.0, 0.0,
0.0, intrins.y, 0.0, 0.0,
intrins.z, intrins.w, 1.0, 1.0,
0.0, 0.0, 0.0, 0.0
glm::mat4 world2ndc = glm::mat4(
projmat[0], projmat[4], projmat[8], projmat[12],
projmat[1], projmat[5], projmat[9], projmat[13],
projmat[2], projmat[6], projmat[10], projmat[14],
projmat[3], projmat[7], projmat[11], projmat[15]
);

glm::mat3 S = scale_to_mat({scale.x, scale.y, 1.0f}, 1.0f);
glm::mat3 R = quat_to_rotmat(quat);
glm::mat3 RS = R * S;
glm::vec3 p_view = W * p_world + cam_pos;
glm::mat3 M = glm::mat3(W * RS[0], W * RS[1], p_view);
glm::mat3x4 ndc2pix = glm::mat3x4(
glm::vec4(float(W) / 2.0, 0.0, 0.0, float(W-1) / 2.0),
glm::vec4(0.0, float(H) / 2.0, 0.0, float(H-1) / 2.0),
glm::vec4(0.0, 0.0, 0.0, 1.0)
);

glm::mat3x4 P = world2ndc * ndc2pix;

glm::mat4x3 dL_dT = glm::mat4x3(
glm::mat3 dL_dT = glm::mat3(
dL_dtransMat[0], dL_dtransMat[1], dL_dtransMat[2],
dL_dtransMat[3], dL_dtransMat[4], dL_dtransMat[5],
dL_dtransMat[6], dL_dtransMat[7], dL_dtransMat[8],
0.0, 0.0, 0.0
dL_dtransMat[6], dL_dtransMat[7], dL_dtransMat[8]
);

glm::mat3x4 dL_dM_aug = glm::transpose(P) * glm::transpose(dL_dT);
glm::mat3 dL_dM = glm::mat3(
glm::vec3(dL_dM_aug[0]),
glm::vec3(dL_dM_aug[1]),
glm::vec3(dL_dM_aug[2])
);
glm::mat3x4 dL_dsplat = P * glm::transpose(dL_dT);

const glm::mat3 R = quat_to_rotmat(quat);

glm::mat3 W_t = glm::transpose(W);
glm::mat3 dL_dRS = W_t * dL_dM;
glm::vec3 dL_dRS0 = dL_dRS[0];
glm::vec3 dL_dRS1 = dL_dRS[1];
glm::vec3 dL_dpw = dL_dRS[2];
glm::vec3 dL_dtn = W_t * glm::vec3(dL_dnormal3D[0], dL_dnormal3D[1], dL_dnormal3D[2]);

#if DUAL_VISIABLE
glm::vec3 tn = W*R[2];
float cos = glm::dot(-tn, M[2]);
float multiplier = cos > 0 ? 1 : -1;
dL_dtn *= multiplier;
float multiplier = 1;

#if DUAL_VISIABLE
float3 normal = transformVec4x3({R[2].x, R[2].y, R[2].z}, viewmat);
multiplier = normal.z < 0 ? 1: -1;
#endif

glm::mat3 dL_dR = glm::mat3(
dL_dRS0 * glm::vec3(scale.x),
dL_dRS1 * glm::vec3(scale.y),
dL_dtn
float3 dL_dtn = transformVec4x3Transpose({dL_dnormal3D[0],dL_dnormal3D[1],dL_dnormal3D[2]}, viewmat);
glm::mat3 dL_dRS = glm::mat3(
glm::vec3(dL_dsplat[0]),
glm::vec3(dL_dsplat[1]),
multiplier * glm::vec3(dL_dtn.x, dL_dtn.y, dL_dtn.z)
);

// propagate to scale and quat, mean
glm::mat3 dL_dR = glm::mat3(
dL_dRS[0] * glm::vec3(scale.x),
dL_dRS[1] * glm::vec3(scale.y),
dL_dRS[2]);

dL_dmean3D = glm::vec3(dL_dsplat[2]);
dL_drot = quat_to_rotmat_vjp(quat, dL_dR);
dL_dscale = glm::vec2(
(float)glm::dot(dL_dRS0, R[0]),
(float)glm::dot(dL_dRS1, R[1])
);

dL_dmean3D = dL_dpw;
(float)glm::dot(dL_dRS[0], R[0]),
(float)glm::dot(dL_dRS[1], R[1]));
}


Expand Down Expand Up @@ -562,35 +549,38 @@ __global__ void preprocessCUDA(
if (idx >= P || !(radii[idx] > 0))
return;

const float* transMat = &(transMats[9 * idx]);
const float* dL_dtransMat = &(dL_dtransMats[9 * idx]);
const float* dL_dnormal3D = &(dL_dnormal3Ds[3 * idx]);

glm::vec3 p_world = glm::vec3(means3D[idx].x, means3D[idx].y, means3D[idx].z);
float4 intrins = {focal_x, focal_y, focal_x * tan_fovx, focal_y * tan_fovy};

glm::vec3 dL_dmean3D;
glm::vec2 dL_dscale;
glm::vec4 dL_drot;
computeTransMat(
p_world,
rotations[idx],
scales[idx],
viewmatrix,
intrins,
tan_fovx,
tan_fovy,
transMat,
dL_dtransMat,
dL_dnormal3D,
dL_dmean3D,
dL_dscale,
dL_drot
);
// update
dL_dmean3Ds[idx] = dL_dmean3D;
dL_dscales[idx] = dL_dscale;
dL_drots[idx] = dL_drot;
if (scales) {
const float* transMat = &(transMats[9 * idx]);
const float* dL_dtransMat = &(dL_dtransMats[9 * idx]);
const float* dL_dnormal3D = &(dL_dnormal3Ds[3 * idx]);

glm::vec3 p_world = glm::vec3(means3D[idx].x, means3D[idx].y, means3D[idx].z);

const int W = int(focal_x * tan_fovx * 2);
const int H = int(focal_y * tan_fovy * 2);
glm::vec3 dL_dmean3D;
glm::vec2 dL_dscale;
glm::vec4 dL_drot;
computeTransMat(
p_world,
rotations[idx],
scales[idx],
viewmatrix,
projmatrix,
W,
H,
transMat,
dL_dtransMat,
dL_dnormal3D,
dL_dmean3D,
dL_dscale,
dL_drot
);
// update
dL_dmean3Ds[idx] = dL_dmean3D;
dL_dscales[idx] = dL_dscale;
dL_drots[idx] = dL_drot;
}

if (shs)
computeColorFromSH(idx, D, M, (glm::vec3*)means3D, *campos, shs, clamped, (glm::vec3*)dL_dcolors, (glm::vec3*)dL_dmean3Ds, (glm::vec3*)dL_dshs);
Expand Down
86 changes: 37 additions & 49 deletions cuda_rasterizer/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,47 +72,33 @@ __device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const

// Compute a 2D-to-2D mapping matrix from a tangent plane into a image plane
// given a 2D gaussian parameters.
__device__ bool computeTransMat(const glm::vec3 &p_world, const glm::vec4 &quat, const glm::vec2 &scale, const float *viewmat, const float4 &intrins, float tan_fovx, float tan_fovy, float* transMat, float3 &normal) {
// Setup cameras
// Currently only support ideal pinhole camera
// but more advanced intrins can be implemented
const glm::mat3 W = glm::mat3(
viewmat[0],viewmat[1],viewmat[2],
viewmat[4],viewmat[5],viewmat[6],
viewmat[8],viewmat[9],viewmat[10]
);
const glm::vec3 cam_pos = glm::vec3(viewmat[12], viewmat[13], viewmat[14]); // camera center
const glm::mat4 P = glm::mat4(
intrins.x, 0.0, 0.0, 0.0,
0.0, intrins.y, 0.0, 0.0,
intrins.z, intrins.w, 1.0, 1.0,
0.0, 0.0, 0.0, 0.0
__device__ void computeTransMat(const glm::vec3 &p_world, const glm::vec4 &quat, const glm::vec2 &scale, const float *viewmat, const float*projmat, const int W, const int H, float* transMat, float3 &normal) {
// setup camera
// can be fatored out to reduce computations
glm::mat4 world2ndc = glm::mat4(
projmat[0], projmat[4], projmat[8], projmat[12],
projmat[1], projmat[5], projmat[9], projmat[13],
projmat[2], projmat[6], projmat[10], projmat[14],
projmat[3], projmat[7], projmat[11], projmat[15]
);

glm::mat3x4 ndc2pix = glm::mat3x4(
glm::vec4(float(W) / 2.0, 0.0, 0.0, float(W-1) / 2.0),
glm::vec4(0.0, float(H) / 2.0, 0.0, float(H-1) / 2.0),
glm::vec4(0.0, 0.0, 0.0, 1.0)
);

glm::mat3x4 P = world2ndc * ndc2pix;
// Make the geometry of 2D Gaussian as a Homogeneous transformation matrix
// under the camera view, See Eq. (5) in 2DGS' paper.
glm::vec3 p_view = W * p_world + cam_pos;
glm::mat3 R = quat_to_rotmat(quat) * scale_to_mat({scale.x, scale.y, 1.0f}, 1.0f);
glm::mat3 M = glm::mat3(W * R[0], W * R[1], p_view);
glm::vec3 tn = W*R[2];
float cos = glm::dot(-tn, p_view);

#if BACKFACE_CULL
if (cos == 0.0f) return false;
#endif

#if RENDER_AXUTILITY and DUAL_VISIABLE
// This means a 2D Gaussian is dual visiable.
// Experimentally, turning off the dual visiable works eqully.
float multiplier = cos > 0 ? 1 : -1;
tn *= multiplier;
#endif
// projection into screen space, see Eq. (7)
glm::mat4x3 T = glm::transpose(P * glm::mat3x4(
glm::vec4(M[0], 0.0),
glm::vec4(M[1], 0.0),
glm::vec4(M[2], 1.0)
));
glm::mat3 RS = quat_to_rotmat(quat) * scale_to_mat({scale.x, scale.y, 1.0f}, 1.0f);
glm::mat3x4 splat2world = glm::mat3x4(
glm::vec4(RS[0], 0.0),
glm::vec4(RS[1], 0.0),
glm::vec4(p_world, 1.0)
);
// projection into screen space, see Eq. (7) in 2DGS
glm::mat3 T = glm::transpose(splat2world) * P;

transMat[0] = T[0].x;
transMat[1] = T[0].y;
Expand All @@ -123,8 +109,15 @@ __device__ bool computeTransMat(const glm::vec3 &p_world, const glm::vec4 &quat,
transMat[6] = T[2].x;
transMat[7] = T[2].y;
transMat[8] = T[2].z;
normal = {tn.x, tn.y, tn.z};
return true;

normal = transformVec4x3({RS[2].x, RS[2].y, RS[2].z}, viewmat);

#if DUAL_VISIABLE
// This means a 2D Gaussian is dual visiable.
// Experimentally, turning off the dual visiable works eqully.
float multiplier = normal.z < 0 ? 1: -1;
normal = {multiplier * normal.x, multiplier * normal.y, multiplier * normal.z};
#endif
}

// Computing the bounding box of the 2D Gaussian and its center,
Expand Down Expand Up @@ -205,34 +198,29 @@ __global__ void preprocessCUDA(int P, int D, int M,
if (!in_frustum(idx, orig_points, viewmatrix, projmatrix, prefiltered, p_view))
return;

float4 intrins = {focal_x, focal_y, float(W)/2.0, float(H)/2.0};
glm::vec2 scale = scales[idx];
glm::vec4 quat = rotations[idx];

const float* transMat;
bool ok;
float3 normal;
if (transMat_precomp != nullptr)
{
transMat = transMat_precomp + idx * 9;
normal = {0.f, 0.f, 0.f}; // not support precomp normal
}
else
{
ok = computeTransMat(p_world, quat, scale, viewmatrix, intrins, tan_fovx, tan_fovy, transMats + idx * 9, normal);
if (!ok) return;
computeTransMat(p_world, rotations[idx], scales[idx], viewmatrix, projmatrix, W, H, transMats + idx * 9, normal);
transMat = transMats + idx * 9;
}

// compute center and extent
float2 center;
float2 extent;
ok = computeAABB(transMat, center, extent);
bool ok = computeAABB(transMat, center, extent);
if (!ok) return;

// add the bounding of countour
#if TIGHTBBOX // no use in the paper, but it indeed help speeds.
// the effective extent is now depended on the opacity of gaussian.
float truncated_R = sqrtf(max(9.f + logf(opacities[idx]), 0.000001));
float truncated_R = sqrtf(max(9.f + 2.f * logf(opacities[idx]), 0.000001));
#else
float truncated_R = 3.f;
#endif
Expand Down Expand Up @@ -287,7 +275,7 @@ renderCUDA(
uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) };
uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y };
uint32_t pix_id = W * pix.y + pix.x;
float2 pixf = { (float)pix.x + 0.5, (float)pix.y + 0.5};
float2 pixf = { (float)pix.x, (float)pix.y};

// Check if this thread is associated with a valid pixel or outside.
bool inside = pix.x < W&& pix.y < H;
Expand Down
7 changes: 0 additions & 7 deletions rasterize_points.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,6 @@ RasterizeGaussiansCUDA(
AT_ERROR("means3D must have dimensions (num_points, 3)");
}

if (scales.ndimension() != 2 || scales.size(1) != 2) {
AT_ERROR("scales must have dimensions (num_points, 2)");
}

if (rotations.ndimension() != 2 || rotations.size(1) != 4) {
AT_ERROR("rotations must have dimensions (num_points, 4)");
}

const int P = means3D.size(0);
const int H = image_height;
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
setup(
name="diff_surfel_rasterization",
packages=['diff_surfel_rasterization'],
version='0.0.0',
version='0.0.1',
ext_modules=[
CUDAExtension(
name="diff_surfel_rasterization._C",
Expand Down

0 comments on commit 362a17a

Please sign in to comment.