diff --git a/pykitti/tracking.py b/pykitti/tracking.py index 26ffc7d..d833fd4 100644 --- a/pykitti/tracking.py +++ b/pykitti/tracking.py @@ -1,24 +1,18 @@ """Provides 'tracking', which loads and parses tracking benchmark data.""" -import datetime as dt import glob import os -from collections import namedtuple +from collections import namedtuple, defaultdict import pandas as pd - import numpy as np import pykitti.utils as utils -import cv2 try: xrange except NameError: xrange = range -__author__ = "Sidney zhang" -__email__ = "sidney@sidazhang.com" - class tracking: """Load and parse tracking benchmark data into a usable format.""" @@ -32,15 +26,19 @@ def __init__(self, base_path, sequence, **kwargs): # Default image file extension is 'png' self.imtype = kwargs.get('imtype', 'png') + # Exclude DontCare objects from dataset + self.ignore_dontcare = kwargs.get('ignore_dontcare', False) + # Find all the data files self._get_file_lists() - print('files', len(self.cam2_files)) + # Pre-load data that isn't returned as a generator - # self._load_calib() + self._load_calib() + self._load_objects() def __len__(self): """Return the number of frames loaded.""" - return len(self.timestamps) + return len(self.cam2_files) @property def cam2(self): @@ -60,16 +58,6 @@ def get_cam3(self, idx): """Read image file for cam3 (RGB right) at the specified index.""" return utils.load_image(self.cam3_files[idx], mode='RGB') - @property - def gray(self): - """Generator to read monochrome stereo pairs from file. - """ - return zip(self.cam0, self.cam1) - - def get_gray(self, idx): - """Read monochrome stereo pair at the specified index.""" - return (self.get_cam0(idx), self.get_cam1(idx)) - @property def rgb(self): """Generator to read RGB stereo pairs from file. @@ -90,6 +78,11 @@ def velo(self): def get_velo(self, idx): """Read velodyne [x,y,z,reflectance] scan at the specified index.""" return utils.load_velo_scan(self.velo_files[idx]) + + def get_objects(self, idx): + """Return a list of objects visible at the specified index.""" + return self.objects[idx] + def _get_file_lists(self): """Find and list data files for each sensor.""" @@ -111,10 +104,6 @@ def _get_file_lists(self): # Subselect the chosen range of frames, if any if self.frames is not None: - self.cam0_files = utils.subselect_files( - self.cam0_files, self.frames) - self.cam1_files = utils.subselect_files( - self.cam1_files, self.frames) self.cam2_files = utils.subselect_files( self.cam2_files, self.frames) self.cam3_files = utils.subselect_files( @@ -129,7 +118,8 @@ def _load_calib(self): data = {} # Load the calibration file - calib_filepath = os.path.join(self.sequence_path + '.txt', 'calib.txt') + calib_filepath = os.path.join( + self.base_path, 'calib/{}.txt'.format(self.sequence)) filedata = utils.read_calib_file(calib_filepath) # Create 3x4 projection matrices @@ -143,6 +133,11 @@ def _load_calib(self): data['P_rect_20'] = P_rect_20 data['P_rect_30'] = P_rect_30 + # Create 4x4 matrices from the rectifying rotation matrices + R_rect_00 = np.eye(4) + R_rect_00[0:3, 0:3] = np.reshape(filedata['R_rect'], (3, 3)) + data['R_rect_00'] = R_rect_00 + # Compute the rectified extrinsics from cam0 to camN T1 = np.eye(4) T1[0, 3] = P_rect_10[0, 3] / P_rect_10[0, 0] @@ -151,14 +146,15 @@ def _load_calib(self): T3 = np.eye(4) T3[0, 3] = P_rect_30[0, 3] / P_rect_30[0, 0] - # Compute the velodyne to rectified camera coordinate transforms - data['T_cam0_velo'] = np.reshape(filedata['Tr'], (3, 4)) - data['T_cam0_velo'] = np.vstack([data['T_cam0_velo'], [0, 0, 0, 1]]) - data['T_cam1_velo'] = T1.dot(data['T_cam0_velo']) - data['T_cam2_velo'] = T2.dot(data['T_cam0_velo']) - data['T_cam3_velo'] = T3.dot(data['T_cam0_velo']) + # # Compute the velodyne to rectified camera coordinate transforms + T_cam_velo = filedata['Tr_velo_cam'].reshape((3, 4)) + T_cam_velo = np.vstack([T_cam_velo, [0, 0, 0, 1]]) + data['T_cam0_velo'] = R_rect_00.dot(T_cam_velo) + data['T_cam1_velo'] = T1.dot(R_rect_00.dot(T_cam_velo)) + data['T_cam2_velo'] = T2.dot(R_rect_00.dot(T_cam_velo)) + data['T_cam3_velo'] = T3.dot(R_rect_00.dot(T_cam_velo)) - # Compute the camera intrinsics + # # Compute the camera intrinsics data['K_cam0'] = P_rect_00[0:3, 0:3] data['K_cam1'] = P_rect_10[0:3, 0:3] data['K_cam2'] = P_rect_20[0:3, 0:3] @@ -168,205 +164,55 @@ def _load_calib(self): # each camera frame into the velodyne frame and computing the distances # between them p_cam = np.array([0, 0, 0, 1]) - p_velo0 = np.linalg.inv(data['T_cam0_velo']).dot(p_cam) - p_velo1 = np.linalg.inv(data['T_cam1_velo']).dot(p_cam) p_velo2 = np.linalg.inv(data['T_cam2_velo']).dot(p_cam) p_velo3 = np.linalg.inv(data['T_cam3_velo']).dot(p_cam) - - data['b_gray'] = np.linalg.norm(p_velo1 - p_velo0) # gray baseline data['b_rgb'] = np.linalg.norm(p_velo3 - p_velo2) # rgb baseline self.calib = namedtuple('CalibData', data.keys())(*data.values()) - -def to_array_list(df, length=None, by_id=True): - """Converts a dataframe to a list of arrays, with one array for every unique index entry. - Index is assumed to be 0-based contiguous. If there is a missing index entry, an empty - numpy array is returned for it. - Elements in the arrays are sorted by their id. - :param df: - :param length: - :return: - """ - - if by_id: - assert 'id' in df.columns - - # if `id` is the only column, don't sort it (and don't remove it) - if len(df.columns) == 1: - by_id = False - - idx = df.index.unique() - if length is None: - length = max(idx) + 1 - - l = [np.empty(0) for _ in xrange(length)] - for i in idx: - a = df.loc[i] - if by_id: - if isinstance(a, pd.Series): - a = a[1:] + + + def _load_objects(self): + """Parse object tracklets""" + + # Load tracklet data as a pandas dataframe + track_filename = os.path.join( + self.base_path, 'label_02/{}.txt'.format(self.sequence)) + columns = ('fid tid type trunc occ alpha x1 y1 x2 y2 ' + 'h w l x y z ry score').split() + df = pd.read_csv(track_filename, sep=' ', header=None, names=columns, + index_col=None, skip_blank_lines=True) + + self.objects = [list() for _ in self.cam2_files] + + # Iterate over the entries in the csv file + for data in df.itertuples(index=False): + + # Skip DontCare objects if requested + if self.ignore_dontcare and data.type == 'DontCare': + continue + + # Extract 2D bounding box, dimensions and location from dataframe + bbox = utils.BoundingBox(data.x1, data.y1, data.x2, data.y2) + dims = utils.Dimensions(data.l, data.h, data.w) + loc = utils.Location(data.x, data.y, data.z) + + # Construct object data + obj_data = utils.ObjectData( + data.tid, data.type, data.trunc, data.occ, data.alpha, + bbox, dims, loc, data.ry, data.score + ) + + # Assign object to frame with index fid + if self.frames is not None: + if data.fid in self.frames: + index = self.frames.index(data.fid) + self.objects[index].append(obj_data) else: - a = a.copy().set_index('id').sort_index() - - l[i] = a.values.reshape((-1, a.shape[-1])) - return np.asarray(l) - - -# TODO: Acknowledge this is from HART -class KittiTrackingLabels(object): - """Kitt Tracking Label parser. It can limit the maximum number of objects per track, - filter out objects with class "DontCare", or retain only those objects present - in a given frame. - """ - - columns = 'id class truncated occluded alpha x1 y1 x2 y2 xd yd zd x y z roty score'.split() - classes = 'Car Van Truck Pedestrian Person_sitting Cyclist Tram Misc DontCare'.split() - - - def __init__(self, path_or_df, bbox_with_size=True, remove_dontcare=True, split_on_reappear=True): - - if isinstance(path_or_df, pd.DataFrame): - self._df = path_or_df - else: - if not os.path.exists(path_or_df): - raise ValueError('File {} doesn\'t exist'.format(path_or_df)) - - self._df = pd.read_csv(path_or_df, sep=' ', header=None, - index_col=0, skip_blank_lines=True) - - # Detection files have 1 more column than label files - # label file has 16 columns - # detection file has 17 columns (the last column is score) - # Here it checks the number of columns the df has and sets the - # column names on the df accordingly - self._df.columns = self.columns[:len(self._df.columns)] - - self.bbox_with_size = bbox_with_size - - if remove_dontcare: - self._df = self._df[self._df['class'] != 'DontCare'] - - for c in self._df.columns: - self._convert_type(c, np.float32, np.float64) - self._convert_type(c, np.int32, np.int64) - - - # TODO: Add occlusion filtering back in - truncated_threshold=(0, 2.) - occluded_threshold=(0, 3.) - # if not nest.is_sequence(occluded_threshold): - # occluded_threshold = (0, occluded_threshold) - # - # if not nest.is_sequence(truncated_threshold): - # truncated_threshold = (0, truncated_threshold) - # TODO: Add occlusion filteringinback in - # self._df = self._df[self._df['occluded'] >= occluded_threshold[0]] - # self._df = self._df[self._df['occluded'] <= occluded_threshold[1]] - # - # self._df = self._df[self._df['truncated'] >= truncated_threshold[0]] - # self._df = self._df[self._df['truncated'] <= truncated_threshold[1]] - - # make 0-based contiguous ids - ids = self._df.id.unique() - offset = max(ids) + 1 - id_map = {id: new_id for id, new_id in zip(ids, np.arange(offset, len(ids) + offset))} - self._df.replace({'id': id_map}, inplace=True) - self._df.id -= offset - - self.ids = list(self._df.id.unique()) - self.max_objects = len(self.ids) - self.index = self._df.index.unique() - - if split_on_reappear: - added_ids = self._split_on_reappear(self._df, self.presence, self.ids[-1]) - self.ids.extend(added_ids) - self.max_objects += len(added_ids) - - def _convert_type(self, column, dest_type, only_from_type=None): - cond = only_from_type is None or self._df[column].dtype == only_from_type - if cond: - self._df[column] = self._df[column].astype(dest_type) - - @property - def bbox(self): - bbox = self._df[['id', 'x1', 'y1', 'x2', 'y2']].copy() - # TODO: Fix this to become x, y, w, h - if self.bbox_with_size: - bbox['y2'] -= bbox['y1'] - bbox['x2'] -= bbox['x1'] - - """Converts a dataframe to a list of arrays - :param df: - :param length: - :return: - """ - - return to_array_list(bbox) - - @property - def presence(self): - return self._presence(self._df, self.index, self.max_objects) - - @property - def num_objects(self): - ns = self._df.id.groupby(self._df.index).count() - absent = list(set(range(len(self))) - set(self.index)) - other = pd.DataFrame([0] * len(absent), absent) - ns = ns.append(other) - ns.sort_index(inplace=True) - return ns.as_matrix().squeeze() - - @property - def cls(self): - return to_array_list(self._df[['id', 'class']]) - - @property - def occlusion(self): - return to_array_list(self._df[['id', 'occluded']]) - - @property - def id(self): - return to_array_list(self._df['id']) - - def __len__(self): - return self.index[-1] - self.index[0] + 1 - - @classmethod - def _presence(cls, df, index, n_objects): - p = np.zeros((index[-1] + 1, n_objects), dtype=bool) - for i, row in df.iterrows(): - p[i, row.id] = True - return p - - @classmethod - def _split_on_reappear(cls, df, p, id_offset): - """Assign a new identity to an objects that appears after disappearing previously. - Works on `df` in-place. - :param df: data frame - :param p: presence - :param id_offset: offset added to new ids - :return: - """ - - next_id = id_offset + 1 - added_ids = [] - nt = p.sum(0) - start = np.argmax(p, 0) - end = np.argmax(np.cumsum(p, 0), 0) - diff = end - start + 1 - is_contiguous = np.equal(nt, diff) - for id, contiguous in enumerate(is_contiguous): - if not contiguous: - - to_change = df[df.id == id] - index = to_change.index - diff = index[1:] - index[:-1] - where = np.where(np.greater(diff, 1))[0] - for w in where: - to_change.loc[w + 1:, 'id'] = next_id - added_ids.append(next_id) - next_id += 1 - - df[df.id == id] = to_change - - return added_ids + self.objects[data.fid].append(obj_data) + + + def _load_oxts(self): + """Load OXTS data from file.""" + oxt_filename = os.path.join( + self.base_path, 'oxts/{}.txt'.format(self.sequence)) + self.oxts = utils.load_oxts_packets_and_poses([oxt_filename]) diff --git a/pykitti/utils.py b/pykitti/utils.py index 23d111b..47d81e8 100644 --- a/pykitti/utils.py +++ b/pykitti/utils.py @@ -22,6 +22,15 @@ # Bundle into an easy-to-access structure OxtsData = namedtuple('OxtsData', 'packet, T_w_imu') +# Data structures for object and tracking data +ObjectData = namedtuple('ObjectData', + 'track_id, type, truncated, ' + + 'occluded, alpha, bbox, dimensions, ' + + 'location, rotation_y, score') +BoundingBox = namedtuple('BoundingBox', 'left, top, right, bottom') +Dimensions = namedtuple('Dimensions', 'length, height, width') +Location = namedtuple('Location', 'x, y, z') + def subselect_files(files, indices): try: @@ -71,7 +80,8 @@ def read_calib_file(filepath): with open(filepath, 'r') as f: for line in f.readlines(): - key, value = line.split(':', 1) + key, value = line.split(' ', 1) + key = key.rstrip(':') # The only non-float values in these files are dates, which # we don't care about anyway try: