Question about coordinate system between lidar and camera. #2

SubChange · 2024-12-20T09:41:33Z

For an object detection research, I dived into this dataset. Currently, I successfully parse the label file in the format of json. However, when I tried to find relationship between point clouds and images, I foud that the coordinate system in the json file is inaccurate. This heavily hindered my study interests. Therefore, I would greatly appriciate it if you cloud give me some advice.

I put the whole python code in the bottom, anyone can run it directly. The code depends on Open3D, opencv-python, and pyntcloud. Theses three main dependencies are easy to install through pip install xxx

The problems are as follows:

I know the cuboid label is in format (x, y, z, rx, ry, rz, sx, sy, sz) as commented in the followed python codes. However, it has a wrong pose when I draw it in open3d viewer?
The coordinate system calibration between lidar and camera seems to be inaccurate, because when I projected bboxes_3d into image, there is a large deviation between the actual object and the projected bbox_2d. So I want to know if the calibration matrix is accurate enough or if there is any bug in my python code.

The following are some samples:

SOSDaR24_part_02/023_urban_01_01_00, timestamp=1717494400449

In this sample, the rotation of the label (bbox_3d) is not only around z-axis. Meanwhile, It can be seen that the position of this label are also not accurate, because it doesn't contain the target point cloud.

SOSDaR24_part_04/075_urban_05_02_04, timestamp=1717660765081

In this sample, Though the position is accurate in the point cloud, it's not accurate after projecting it onto image.

SOSDaR24_part_03/047_urban_01_02_00, timestamp=1717569731559

The same problems as the above two.

There are many samples as above. Therefore, I guess there are two possibility. One is the label has mistakes. The other is my codes have a wrong implementation. I really hope someone can give me some hints or tips. Thank you very much.

python codes

import os
# os.environ['WEBRTC_IP'] = '127.0.0.1'
# os.environ['WEBRTC_PORT'] = '18889'
import sys
import time
import json
from typing import List, Optional, Tuple, Union
import re
import colorsys

import numpy as np
from scipy.spatial.transform import Rotation as sci_rot

import pandas as pd
import matplotlib as mpl
# import matplotlib.cm as mcm
# import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

import cv2 as cv
import open3d as o3d

from pyntcloud import PyntCloud

HAS_EULER = True
try:
    from eulerangles import euler2matrix as euler2mtx
except:
    HAS_EULER = False
    

class SOSDaR24Utils(object):
    def __init__(self) -> None:
        super(SOSDaR24Utils, self).__init__()

    @staticmethod
    def get_frame2ts_map(json_data):
        """In returned dict, `timestamp` is of type of `int`"""
        frame2ts = {}
        frames = json_data['openlabel']['frames']
        for k, v in frames.items():
            streams = v['frame_properties']['streams']
            tele15_ts = os.path.splitext(os.path.basename(streams['tele15']['stream_properties']['uri']))[0]
            pandar64_ts = os.path.splitext(os.path.basename(streams['pandar64']['stream_properties']['uri']))[0]
            camera_ts = os.path.splitext(os.path.basename(streams['camera']['stream_properties']['uri']))[0]
            assert tele15_ts==pandar64_ts and pandar64_ts==camera_ts, "sample timestamp not the same"
            frame2ts[k] = int(tele15_ts)
        return frame2ts
    
    @staticmethod
    def get_sensor_coord_sys(json_data):
        coordinate_systems = json_data['openlabel']['coordinate_systems']

        tele15_sys = np.array(coordinate_systems['tele15']['pose_wrt_parent']['matrix4x4']).reshape(4,4)
        pandar64_sys = np.array(coordinate_systems['pandar64']['pose_wrt_parent']['matrix4x4']).reshape(4,4)
        camera_sys = np.array(coordinate_systems['camera']['pose_wrt_parent']['matrix4x4']).reshape(4,4)

        # Here pose_wrt_parent, pose matricx is from `LIDAR` to `CAMERA`
        coord_sys = dict(
            tele15 = np.linalg.inv(tele15_sys), # NOTE to be confirmed
            pandar64 = np.linalg.inv(pandar64_sys),
            camera = np.linalg.inv(camera_sys)
        )

        return coord_sys
    
    @staticmethod
    def get_camera_intrinsic_mtx(json_data):
        streams = json_data['openlabel']['streams']
        # 3x4
        cam_k = np.array(streams['camera']['stream_properties']['intrinsics_pinhole']['camera_matrix_3x4']).reshape(3,4)
        return cam_k[:,:3]
    
    @staticmethod
    def get_cluster_based_on_id(points, iden_col=-2):

        ori_ids = points[:,iden_col].astype(int)

        object_pcd_ids = np.unique(ori_ids)
        ids = object_pcd_ids[object_pcd_ids!=0] # 0 是背景

        clusters = []
        for id in ids:
            idx = ori_ids==id
            pts = points[idx,:]

            try:
                obb = o3d.geometry.OrientedBoundingBox.create_from_points(o3d.utility.Vector3dVector(pts[:,:3]))
                center = np.asarray(obb.get_center()).flatten()
            except:
                obb = None
                center = np.mean(pts[:,:3], axis=0)
            
            clusters.append(
                dict(
                    oid = id,
                    points=pts,
                    obb = obb,
                    center=center
                )
            )

        return clusters

    @staticmethod
    def fix_bbox_3d_anno(bbox_3d:list, clusters:List[dict]):
        """update label according to `cluster`, only move position (in try)
        
        The idea is that there are 
        1. lebels in annotation
        6. object_id in point cloud scalar feilds

        therefore, for each label (bbox_3d), we try to find the nearest object cluster in the point cloud.
        when found, we move the labelled bbox_3d to the cluster center        

        """

        cluster_centers = np.array([it_['center'] for it_ in clusters]) # n x 3

        loc = np.array(bbox_3d[:3]) # 3,
        dim = np.array(bbox_3d[3:6])
        yaw = bbox_3d[-1]
        center = loc[:]
        center[-1] = center[-1] + dim[-1] / 2

        # if any centers of cluster in bboxes_3d, if no, do not move
        rot_mat = np.array([[np.cos(yaw), np.sin(yaw), 0.],
                            [-np.sin(yaw), np.cos(yaw), 0.],
                            [0., 0., 1.]])
        rot_diff = (cluster_centers-center) @ rot_mat.T # n x 3
        idx1 = (np.abs(rot_diff[:,0])-dim[0]/2) < 0
        idx2 = (np.abs(rot_diff[:,1])-dim[1]/2) < 0
        idx3 = (np.abs(rot_diff[:,2])-dim[2]/2) < 0
        index = np.logical_and(np.logical_and(idx1,idx2), idx3) #
        dst = np.sqrt(np.sum(rot_diff**2, axis=1))
        
        filtered_dst = dst[index] # distance of cluster center to the center of the bbox_3d
        if len(filtered_dst)==0:
            r = np.argsort(dst)
            tar_idx = r[0]
            if dst[tar_idx]<=np.sqrt(np.sum(dim**2)):
                # only move to the neighboring labeled cluster
                new_loc = cluster_centers[tar_idx,...].flatten().tolist()
                new_loc[-1] = new_loc[-1] - dim[-1] / 2 # to gravity center
                return new_loc + dim.tolist() + [yaw,]
        
        return bbox_3d[:]


def rand_cmap(nlabels, type='bright', first_color_black=True, last_color_black=False, verbose=False):
    """Creates a random colormap to be used together with matplotlib. Useful for segmentation tasks

    Args:
        nlabels (int): Number of labels (size of colormap).
        type (str, optional): 'bright' for strong colors, 'soft' for pastel colors.
        first_color_black (bool, optional): Option to use first color as black, True or False.
        last_color_black (bool, optional): Option to use last color as black, True or False.
        verbose (bool, optional): Prints the number of labels and shows the colormap. True or False.

    Returns:
        random_colormap: colormap for matplotlib (matplotlib.colors.LinearSegmentedColormap)
    """

    if type not in ('bright', 'soft'):
        print ('Please choose "bright" or "soft" for type')
        return

    if verbose:
        print('Number of labels: ' + str(nlabels))

    # Generate color map for bright colors, based on hsv
    if type == 'bright':
        randHSVcolors = [(np.random.uniform(low=0.0, high=1),
                          np.random.uniform(low=0.2, high=1),
                          np.random.uniform(low=0.9, high=1)) for _ in range(nlabels)]

        # Convert HSV list to RGB
        randRGBcolors = []
        for HSVcolor in randHSVcolors:
            randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))

        if first_color_black:
            randRGBcolors[0] = [0, 0, 0]

        if last_color_black:
            randRGBcolors[-1] = [0, 0, 0]

        random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)

    # Generate soft pastel colors, by limiting the RGB spectrum
    if type == 'soft':
        low = 0.6
        high = 0.95
        randRGBcolors = [(np.random.uniform(low=low, high=high),
                          np.random.uniform(low=low, high=high),
                          np.random.uniform(low=low, high=high)) for _ in range(nlabels)]

        if first_color_black:
            randRGBcolors[0] = [0, 0, 0]

        if last_color_black:
            randRGBcolors[-1] = [0, 0, 0]
        random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)

    # Display colorbar
    if verbose:
        from matplotlib import colors, colorbar
        from matplotlib import pyplot as plt
        fig, ax = plt.subplots(1, 1, figsize=(15, 0.5))

        bounds = np.linspace(0, nlabels, nlabels + 1)
        norm = colors.BoundaryNorm(bounds, nlabels)

        cb = colorbar.ColorbarBase(ax, cmap=random_colormap, norm=norm, spacing='proportional', ticks=None,
                                   boundaries=bounds, format='%1i', orientation=u'horizontal')

    return random_colormap


def map_scalar_to_discrete_color(scalars):

    sc = np.array(scalars).astype(np.int64)
    usc = np.unique(sc).tolist()
    sc2id_map = dict([(usc[i_], i_) for i_ in range(len(usc))])

    discrete_colors = []

    cmap_ = rand_cmap(len(usc))
    
    colors = [cmap_(sc2id_map[s_.item()]) for s_ in sc]

    discrete_colors = np.clip(np.array(colors), 0., 1.)

    return discrete_colors


def mm3d_lidar_inst_eight_pts_to_mesh(points, color=(1.0, 0.0, 0.0)):
    """
    convert bbox of `mmdet3d` to open3d

    Convert boxes to corners in clockwise order, in the form of 
    (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).
                                   up z
                    front x           ^
                         /            |
                        /             |
          (x1, y0, z1) + -----------  + (x1, y1, z1)
                      /|            / |
                     / |           /  |
       (x0, y0, z1) + ----------- +   + (x1, y1, z0)
                    |  /      .   |  /
                    | / origin    | /
    left y <------- + ----------- + (x0, y1, z0)
        (x0, y0, z0)

    
           ^ z   x            5 -------------- 6
           |   /             / |             / |
           |  /             1 -|------------ 2 |   
    y      | /              |  |             | | 
    <------|o               | 4 -------------| 7
                            |/       o       |/    
                            0 -------------- 3 

    """

    bidx = [[0,4,1],[1,4,5],
            [1,2,5],[5,2,6],
            [2,3,6],[6,3,7],
            [3,0,7],[0,4,7],
            [0,2,1],[0,3,2],
            [4,6,5],[4,7,6]]
    
    lidx = [[0,1],[1,2],[2,3],[3,0],
            [4,5],[5,6],[6,7],[7,4],
            [0,4],[1,5],[2,6],[3,7]]
    
    box = o3d.geometry.TriangleMesh(o3d.utility.Vector3dVector(points),
                                    o3d.utility.Vector3iVector(bidx)
                                    )
    edge = o3d.geometry.LineSet(o3d.utility.Vector3dVector(points),
                                o3d.utility.Vector2iVector(lidx)
                                )
    box.paint_uniform_color(color)
    edge.paint_uniform_color(color)

    return box, edge


def lidar2cam(points:np.array, calib_dict:dict):

    assert 'lidar2cam' in calib_dict.keys(), "missing calib for lidar2cam"

    lidar2cam = calib_dict['lidar2cam'] # 4x4
    homo_pts = np.pad(points, ((0, 0), (0, 1)), mode='constant', constant_values=1)
    cam_pts = homo_pts @ lidar2cam.T # (lidar2cam @ homo_pts.T).T

    return cam_pts[:,:-1]


def camera2img(points:np.array, calib_dict:dict):
    assert 'cam2img' in calib_dict.keys(), "missing calib for cam2img"

    cam2img = calib_dict['cam2img'] # camera intrinsic matrix 3x3
    homo_img_pts = points @ cam2img.T # (cam2img @ points.T).T

    img_pts = homo_img_pts / homo_img_pts[:,-1:]

    return img_pts[:,:-1]


def bbox_3d_to_bbox_img(bbox3d:List[np.array], calib_dict:dict, image_shape:Union[tuple, list]=None):
    """_summary_

    Args:
        bbox3d (List[np.array]): nx8x3
        image_shape (tuple): w, h
        calib_dict (dict): _description_

    Returns:
        _type_: _description_
    """

    assert bbox3d[0].shape[0] == 8 and bbox3d[0].shape[1] == 3, "bbox3d shape: nx8x3"

    bbox3d_stacked = np.vstack(bbox3d)

    cam_pts = lidar2cam(bbox3d_stacked, calib_dict)
    img_pts = camera2img(cam_pts, calib_dict)

    img_pts = img_pts.reshape(-1,8,2)

    cliped_shape = [0, np.inf]
    if image_shape is not None:
        if image_shape[0]>cliped_shape[0]:
            cliped_shape[0] = image_shape[0]
        if image_shape[1]<cliped_shape[1]:
            cliped_shape[1] = image_shape[1]

    bboxes_img = []
    for bbox in img_pts:
        max_pt = np.max(bbox, axis=0)
        min_pt = np.min(bbox, axis=0)
        bboxes_img.append(np.array([min_pt[0], min_pt[1], max_pt[0], max_pt[1]]))

    bboxes_img = np.array(bboxes_img)

    # u1 u2
    bboxes_img[bboxes_img[:,0]<0, 0] = 0
    bboxes_img[bboxes_img[:,2]<0, 2] = 0
    bboxes_img[bboxes_img[:,0]>cliped_shape[0], 0] = cliped_shape[0]
    bboxes_img[bboxes_img[:,2]>cliped_shape[0], 2] = cliped_shape[0]
    # v1 v2
    bboxes_img[bboxes_img[:,1]<0, 1] = 0 # h
    bboxes_img[bboxes_img[:,3]<0, 3] = 0 # h
    bboxes_img[bboxes_img[:,1]>cliped_shape[1], 1] = cliped_shape[1] # h
    bboxes_img[bboxes_img[:,3]>cliped_shape[1], 3] = cliped_shape[1] # h

    bboxes_img[:,2:] = bboxes_img[:,2:] - bboxes_img[:,:2]

    return bboxes_img


def euler2matrix(euler_angles: np.ndarray,
                 axes: str = 'zyx',
                 intrinsic: bool = True,
                 right_handed_rotation: bool = True) -> np.ndarray:
    if not intrinsic:
        raise NotImplementedError("unsupport intrinsic")
    if not right_handed_rotation:
        raise NotImplementedError("unsupport right handed rotation")
    
    if axes != "zyx":
        raise NotImplementedError("unsupport rotation axes rank")
    
    alpha, beta, theta = euler_angles

    Rx = np.array([[1., 0., 0.],
                   [0., np.cos(alpha), np.sin(alpha)],
                   [0., -np.sin(alpha), np.cos(alpha)]])
    
    Ry = np.array([[np.cos(beta), 0., -np.sin(beta)],
                   [0., 1., 0.],
                   [np.sin(beta), 0., np.cos(beta)]])
    
    Rz = np.array([[np.cos(theta), np.sin(theta), 0.],
                   [-np.sin(theta), np.cos(theta), 0.],
                   [0., 0., 1.]])
    
    # intrinsic rotation zy'x'' = extrinsic rotation xyz
    mtx = Rz.T @ Ry.T @ Rx.T # (Rx @ Ry @ Rz).T

    return mtx


__DETCLASSES__ = {
    "person": 0,
    "road_vehicle": 1,
    "obstacle": 1
}

if __name__ == "__main__":

    # o3d.visualization.webrtc_server.enable_webrtc()

    data_root = 'F:/data/SOSDaR24/'

    # line_pth = data_root + 'SOSDaR24_part_04/074_urban_04_02_06/'
    # timestamp = 1717660252585 # 1717660249955

    # line_pth = data_root + 'SOSDaR24_part_02/023_urban_01_01_00/'
    # timestamp = 1717494400449

    # line_pth = data_root + 'SOSDaR24_part_03/055_urban_07_02_00/'
    # timestamp = 1717572439421

    # line_pth = data_root + 'SOSDaR24_part_03/058_urban_10_02_00/'
    # timestamp = 1717573298891 # 1717573299181

    line_pth = data_root + 'SOSDaR24_part_03/047_urban_01_02_00/'
    timestamp = 1717569731559 # 1717573299181

    # line_pth = data_root + 'SOSDaR24_part_04/071_urban_01_02_12/'
    # timestamp = 1717657424660 # overlap

    # line_pth = data_root + 'SOSDaR24_part_04/075_urban_05_02_04/'
    # timestamp = 1717660765081 # 3 overlap


    with open(line_pth + line_pth.rstrip('/').split('/')[-1] + '.json', 'r') as f:
        labl24_json = json.load(f)

    camera_streams_path = os.path.join(line_pth, 'streams/camera/')
    pandar64_streams_path = os.path.join(line_pth, 'streams/pandar64/')
    tele15_streams_path = os.path.join(line_pth, 'streams/tele15/')

    # point cloud
    pandar64_pcd_pth = os.path.join(pandar64_streams_path, str(timestamp)+'.pcd')
    pandar64_cloud = PyntCloud.from_file(pandar64_pcd_pth)
    pandar64_points = pandar64_cloud.points.loc[:,['x','y','z','object_id','object_tag']].to_numpy().astype(np.float32)

    tele15_pcd_pth = os.path.join(tele15_streams_path, str(timestamp)+'.pcd')
    tele15_cloud = PyntCloud.from_file(tele15_pcd_pth)
    tele15_points = tele15_cloud.points.loc[:,['x','y','z','object_id','object_tag']].to_numpy().astype(np.float32)

    all_points = np.vstack((pandar64_points, tele15_points))
    colors = map_scalar_to_discrete_color(all_points[:,-2])

    object_ids = labl24_json['openlabel']['objects'].keys()
    object_pcd_ids = np.unique(all_points[:,-2].astype(int))
    object_pcd_tag = np.unique(all_points[:,-1].astype(int))

    # center of point cloud object id
    object_clusters = SOSDaR24Utils.get_cluster_based_on_id(all_points, iden_col=-2)
    if len(object_clusters)==0:
        print("no labeled object")

    pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(all_points[:,:3]))
    pcd.colors = o3d.utility.Vector3dVector(colors[:,:3])

    # image
    image = cv.imread(camera_streams_path + str(timestamp)+'.png', cv.IMREAD_COLOR)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    
    lidar_axes = o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.5)
    vis.add_geometry(lidar_axes)

    # coordinate system
    coord_sys = SOSDaR24Utils.get_sensor_coord_sys(labl24_json)
    cam_k = SOSDaR24Utils.get_camera_intrinsic_mtx(labl24_json)
    calib_dict = dict(
        lidar2cam=coord_sys['camera'],
        cam2img=cam_k
    )

    frame2ts = SOSDaR24Utils.get_frame2ts_map(labl24_json)
    ts2frame = dict([(v, k) for k, v in frame2ts.items()])

    frame_id = ts2frame[timestamp]
    frame_now = labl24_json['openlabel']['frames'][frame_id]

    bboxes_3d = []
    annos_name, annos_type, annos_loc, annos_dim, annos_rotz = [], [], [], [], []
    for oid, oinfo in frame_now['objects'].items():
        odata = oinfo['object_data'] # {text, cuboid, boolean}
        if 'cuboid' in odata.keys():
            cuboid = odata['cuboid']
            cu1 = cuboid[0]['val']
            # List of values encoding the position, rotation and dimensions. 
            # Two options are supported, using 9 or 10 values. If 9 values are used, 
            # the format is (x, y, z, rx, ry, rz, sx, sy, sz), 
            # where (x, y, z) encodes the position, 
            # (rx, ry, rz) encodes the Euler angles that encode the rotation, and 
            # (sx, sy, sz) are the dimensions of the cuboid in its object coordinate system. 
            # 
            # If 10 values are used, then the format is (x, y, z, qx, qy, qz, qw, sx, sy, sz) 
            # with the only difference of the rotation values which are the 4 values of a quaternion.
            # intrinsic Z->Y'->X'' equals to extrinsic XYZ (rpy)
            tmp_loc = cu1[:3]
            if tmp_loc[0]>130:
                continue # filtering objects that are too long
            rxyz = cu1[3:-3] # rx=roll, ry=pitch, rz=yaw

            # only for verify rotation matrix
            ro_mtx_tmp = euler2matrix(rxyz)
            o3d_ro_mtx_tmp = o3d.geometry.OrientedBoundingBox.get_rotation_matrix_from_zyx(np.array(rxyz[::-1])) # radius
            if HAS_EULER:
                ro_mtx = euler2mtx(np.array(rxyz[::-1]) / np.pi * 180, axes='zyx', intrinsic=True, right_handed_rotation=True) # degree
            else:
                ro_mtx = euler2matrix(rxyz)
            # assert np.sum((ro_mtx-ro_mtx_tmp)**2)<1e-6, "rotation matrix error"
            # assert rxyz[0]==0 and rxyz[1]==0, "yaw is not only around Z axis"
            if rxyz[0]!=0 or rxyz[1]!=0:
                print(f"yaw is not only around Z axis, rxyz: {rxyz}")

            tmp_yaw = rxyz[-1]
            # tmp_yaw = 0 # temorarily consider no `yaw` angle
            tmp_dim = cu1[-3:]
            tmp_dim[0] = tmp_dim[0] * 1.5 # enlarge 1.5 times except height dim
            tmp_dim[1] = tmp_dim[1] * 1.5
            tmp_loc[-1] = tmp_loc[-1] - tmp_dim[-1] / 2 # moce center to `bottom center`

            tmp_anno_bbox_3d = tmp_loc + tmp_dim + [tmp_yaw,]
            # fixed_anno_bbox_3d = SOSDaR24Utils.fix_bbox_3d_anno(tmp_anno_bbox_3d, object_clusters)
            fixed_anno_bbox_3d = tmp_anno_bbox_3d[:]

            # for mmdet3d use
            # lidar_inst_bbox_3d = LiDARInstance3DBoxes(np.array(fixed_anno_bbox_3d).reshape(1,-1), box_dim=7, with_yaw=True)
            # eight_pts = lidar_inst_bbox_3d.corners.numpy()
            # bbox3d_, edge_ = mm3d_lidar_inst_eight_pts_to_mesh(eight_pts[0,...], color=(1.0, 0.0, 0.0))

            # for open3d use
            bbox3d = o3d.geometry.OrientedBoundingBox(np.array(tmp_loc) + np.array([0,0,tmp_dim[-1]/2]),
                                                    ro_mtx[:3,:3],
                                                    np.array(tmp_dim))
            bbox3d.color = [1.0, 0.0, 0.0]
            eight_pts = np.asarray(bbox3d.get_box_points())
            bboxes_3d.append(eight_pts) # for open3d use
            # bboxes_3d.append(eight_pts[0,...]) # for mmdet3d use

            annos_loc.append(fixed_anno_bbox_3d[:3])
            annos_dim.append(fixed_anno_bbox_3d[3:6])
            annos_rotz.append(fixed_anno_bbox_3d[-1])

            # vis.add_geometry(edge_) # for mmdet3d use
            vis.add_geometry(bbox3d) # for open3d use
            vis.poll_events()
            vis.update_renderer()
    
    vis.add_geometry(pcd)
    vis.poll_events()
    vis.update_renderer()

    if len(annos_type)==0:
        # no bboxes_3d of objects
        pass
    annos_loc = np.array(annos_loc)
    annos_dim = np.array(annos_dim)
    annos_rotz = np.array(annos_rotz).reshape(-1,1)

    # # label bboxes overlap, only uncomment it when mmdet3d is ready
    # if len(annos_loc)>1:
    #     bbox_bev = np.hstack((annos_loc, annos_dim, annos_rotz))[:,[0,1,3,4,6]]
    #     bbox_bev[:,-1]*=-1 # `iou` calculation need clockwise angle
    #     iou_vals = iou_bev(bbox_bev, bbox_bev)
    #     np.fill_diagonal(iou_vals, 0)
    #     # assert np.all(iou_vals<=1/3), f"{line_folder}:{timestamp} label overlap"
    #     if np.any(iou_vals>1/3):
    #         print(f"{line_pth}:{timestamp} label overlap")

    bboxes_img = bbox_3d_to_bbox_img(bboxes_3d, calib_dict, image_shape=image.shape[:2][::-1])
    bboxes_img = bboxes_img.astype(int)
    for bbox_img in bboxes_img:
        cv.rectangle(image, bbox_img[:2].tolist(), (bbox_img[:2]+bbox_img[2:]).tolist(), (255,0,0), 1)

    os.makedirs("./data/imgs", exist_ok=True)
    cv.imwrite(f"./data/imgs/{line_pth.rstrip('/').split('/')[-2]}_{line_pth.rstrip('/').split('/')[-1]}_lidar2img_{timestamp}.jpg", image)

    vis.run()
    vis.destroy_window()
    

    print("ooooo!")

The text was updated successfully, but these errors were encountered:

aiglesias-vicom · 2025-01-07T07:47:34Z

Hi @SubChange,
Thanks for trying our dataset.

In this sample, the rotation of the label (bbox_3d) is not only around the z-axis. Meanwhile, it can be seen that the position of this label is also not accurate, because it doesn't contain the target point cloud.

We are aware of the problem with the rotation and position of some objects. We believe it is related to a bug in CARLA (carla-simulator/carla#8226). We are currently working on resolving it. In the meantime, I recommend applying a convex hull or a similar method to the points of the objects and using that instead.

In this sample, though the position is accurate in the point cloud, it's not accurate after projecting it onto the image.

The purpose of the images is to provide a more understandable way to visualize the scene. However, the primary purpose of the dataset is for use with point clouds; the labels were not designed for images. There shouldn't be a calibration error, as the positions of the three sensors are the same.

We are working on a new dataset that includes images, and I’ll notify you once it is published. I’ll also keep you updated on fixes for the current errors.

SubChange · 2025-01-07T09:48:52Z

Hi,
Thank you for your reply.

I recommend applying a convex hull or a similar method to the points of the objects and using that instead.

Yes, I agree that this is a good choice to recorrect the label accuracy. I ill try it.

the labels were not designed for images. There shouldn't be a calibration error, as the positions of the three sensors are the same.

Though the primary purpose of the dataset is for use with point clouds, it woule be much more excellent with accurate calibration matrix between lidar coordinate system and camera coordinate system. What do you mean by "there should be no calibration errors"? I list a example of the label file (SOSDaR24_part_02/023_urban_01_01_00, timestamp=1717494400449)

According to the label file (json), the pose of each sensor is:

Entrinsics

$$ P_{egotrain} = P_{Tele15} = P_{pandar64} = \begin{bmatrix} 1.0 & 0.0 & 0.0 & 0.0 \\ 0.0 & 1.0 & 0.0 & 0.0 \\ 0.0 & 0.0 & 1.0 & 0.0 \\ 0.0 & 0.0 & 0.0 & 1.0 \end{bmatrix} $$

$$ P_{Camera} = \begin{bmatrix} 6.123233995736766e-17 & 6.123233995736766e-17 & 1.0 & 0.0 \\ -1.0 & 3.749399456654644e-33 & 6.123233995736766e-17 & 0.0 \\ 0.0 & -1.0 & 6.123233995736766e-17 & 0.0 \\ 0.0 & 0.0 & 0.0 & 1.0 \end{bmatrix} = \begin{bmatrix} 0.0 & 0.0 & 1.0 & 0.0 \\ -1.0 & 0.0 & 0.0 & 0.0 \\ 0.0 & -1.0 & 0.0 & 0.0 \\ 0.0 & 0.0 & 0.0 & 1.0 \end{bmatrix} $$

Intrinsics

$$ K_{intrinsicspinhole} = \begin{bmatrix} 747.90462424 & 0.0 & 623.79287665 & 0.0 \\ 0.0 & 794.63662007 & 498.12602172 & 0.0 \\ 0.0 & 0.0 & 1.0 & 0.0 \end{bmatrix} $$

From above, it can be seen that there is no translations between Tele15, pandar64 and camera coordinate system, but rotations between camera and ego_train coordinate system exist (of course, this is common). In my practice, the lidar and camera can be illustrated as follows:


LIDAR:
            Z     X
            ^    ^
            |   /
            |  /
            | /
            |/
Y  <------ 0

CAMERA:
             Z
             ^
            /
           /
          /
         /
        0 ------> X
        |
        |
        |
        |
        Y

However, based on above settings, the labels (bboxes_3d) given in the label file can not be accurately projected onto the image. This can be validated in my python codes. If all sensors are located in the same position in your simulator, then the only source that the projection errors can come from is the camera intrinsics matrix. If the intrinsics matrix is also accurate, I think it woule be helpful to check each setting in your simulator.

We are working on a new dataset that includes images

Thank you very much. I'm looking forward to your new dataset release. Meanwhile, I'm also instersted in the methods that you used to generate this dataset. If there are any recommendations about your papers and open source tools, please also let me know. I believed it was also an excellent works and I will insterested in it.

I’ll also keep you updated on fixes for the current errors.

If you need me to provide any tests about this dataset, please feel free to contact me. I'm very glad to contribute this dataset.

Appendix: Part of the label json of the discussed scene above(SOSDaR24_part_02/023_urban_01_01_00, timestamp=1717494400449)

{
  "openlabel": {
    "metadata": {
      "schema_version": "1.0.0",
      "rec_file": "023_urban_01_01_00.log"
    },
    "coordinate_systems": {
      "odom": {
        "type": "scene_cs",
        "parent": "",
        "children": ["ego_train"]
      },
      "ego_train": {
        "type": "local_cs",
        "parent": "odom",
        "children": ["tele15", "pandar64", "camera"],
        "pose_wrt_parent": {
          "matrix4x4": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]
        }
      },
      "tele15": {
        "type": "sensor_cs",
        "parent": "ego_train",
        "children": [],
        "pose_wrt_parent": {
          "matrix4x4": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]
        }
      },
      "pandar64": {
        "type": "sensor_cs",
        "parent": "ego_train",
        "children": [],
        "pose_wrt_parent": {
          "matrix4x4": [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]
        }
      },
      "camera": {
        "type": "sensor_cs",
        "parent": "ego_train",
        "children": [],
        "pose_wrt_parent": {
          "matrix4x4": [6.123233995736766e-17, 6.123233995736766e-17, 1.0, 0.0, -1.0, 3.749399456654644e-33, 6.123233995736766e-17, 0.0, 0.0, -1.0, 6.123233995736766e-17, 0.0, 0.0, 0.0, 0.0, 1.0]
        }
      }
    },
    "streams": {
      "tele15": {
        "description": "lidar",
        "uri": "",
        "type": "lidar",
        "stream_properties": {
          "intrinsics_custom": {
            "lidar_type": "Risley_prism",
            "name": "tele"
          }
        }
      },
      "pandar64": {
        "description": "lidar",
        "uri": "",
        "type": "lidar",
        "stream_properties": {
          "intrinsics_custom": {
            "lidar_type": "Surround",
            "name": "pandar64"
          }
        }
      },
      "camera": {
        "description": "pinhole_camera",
        "uri": "",
        "type": "camera",
        "stream_properties": {
          "intrinsics_pinhole": {
            "width_px": 1232,
            "height_px": 1028,
            "camera_matrix_3x4": [747.90462424, 0.0, 623.79287665, 0.0, 0.0, 794.63662007, 498.12602172, 0.0, 0.0, 0.0, 1.0, 0.0],
            "distortion_coeffs_1xN": []
          }
        }
      }
    }
  }
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Question about coordinate system between lidar and camera. #2

Question about coordinate system between lidar and camera. #2

SubChange commented Dec 20, 2024 •

edited

Loading

aiglesias-vicom commented Jan 7, 2025

SubChange commented Jan 7, 2025

Question about coordinate system between lidar and camera. #2

Question about coordinate system between lidar and camera. #2

Comments

SubChange commented Dec 20, 2024 • edited Loading

aiglesias-vicom commented Jan 7, 2025

SubChange commented Jan 7, 2025

SubChange commented Dec 20, 2024 •

edited

Loading