diff --git a/Dockerfile b/Dockerfile index 9572048c..72e148d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,37 @@ # tagged aloception-oss:cuda-11.3.1-pytorch1.10.1-lightning1.4.1 + FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04 +#FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04 + +ARG py=3.9 +ARG pytorch=1.13.1 +ARG torchvision=0.14.1 +ARG torchaudio=0.13.1 +ARG pytorch_lightning=1.9.0 +ARG pycyda=11.7 + ENV TZ=Europe/Paris ENV DEBIAN_FRONTEND=noninteractive + RUN apt-get update RUN apt-get install -y build-essential nano git wget libgl1-mesa-glx + # Usefull for scipy RUN apt-get install -y gfortran +# required for aloscene +RUN apt-get install -y libglib2.0-0 + + RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh RUN bash Miniconda3-latest-Linux-x86_64.sh -b -p /miniconda ENV PATH=$PATH:/miniconda/condabin:/miniconda/bin RUN /bin/bash -c "source activate base" ENV HOME /workspace WORKDIR /workspace -RUN conda install python=3.9 pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=11.3 opencv=4.5.3 -c pytorch -c conda-forge -COPY requirements.txt /install/requirements.txt -RUN pip install -r /install/requirements.txt + +# Pytorch & pytorch litning +RUN conda install pytorch==${pytorch} torchvision==${torchvision} torchaudio==${torchaudio} pytorch-cuda=${pycuda} -c pytorch -c nvidia +RUN pip install pytorch_lightning==${pytorch_lightning} + +COPY requirements-torch1.13.1.txt /install/requirements-torch1.13.1.txt +RUN pip install -r /install/requirements-torch1.13.1.txt diff --git a/README.md b/README.md index 22155779..ac8ce3e7 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@
- +
Documentation -[![Conventional Commits](https://img.shields.io/badge/Conventional%20Commits-0.3.0-green.svg)](https://conventionalcommits.org) +[![Conventional Commits](https://img.shields.io/badge/Conventional%20Commits-0.5.0-green.svg)](https://conventionalcommits.org) # Aloception open source software @@ -75,6 +75,26 @@ training pipelines with **augmented tensors**. ## Installation +### Docker install + +``` +docker build -t aloception-oss:cuda-11.3.1-pytorch1.13.1-lightning1.9.0 . +``` + +``` +docker run --gpus all -it -v /YOUR/WORKSPACE/:/workspace --privileged -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix aloception-oss:cuda-11.3.1-pytorch1.13.1-lightning1.9.0 +``` + +Or without building the image + +``` +docker run --gpus all -it -v /YOUR/WORKSPACE/:/workspace --privileged -e DISPLAY=$DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix visualbehaviorofficial/aloception-oss:cuda-11.3.1-pytorch1.13.1-lightning1.9.0 +``` + + + +### Pip install + You first need to install PyTorch 1.10.1 based on your hardware and environment configuration. Please refer to the [pytorch website](https://pytorch.org/get-started/locally/) for this installation. @@ -90,7 +110,7 @@ Alternatively, you can clone the repository and use: pip install -e aloception-oss/ ``` -Or setup the repo yourself in your env and install the dependencies +Or setup the repo yourself in your env and install the dependencies ```sh pip install -r requirements.txt diff --git a/alodataset/__init__.py b/alodataset/__init__.py index 928c2ecd..f3d81dc4 100644 --- a/alodataset/__init__.py +++ b/alodataset/__init__.py @@ -14,5 +14,6 @@ from .sintel_flow_dataset import SintelFlowDataset from .sintel_disparity_dataset import SintelDisparityDataset from .sintel_multi_dataset import SintelMultiDataset +from .from_directory_dataset import FromDirectoryDataset from .woodScape_dataset import WooodScapeDataset from .woodScape_split_dataset import WoodScapeSplitDataset diff --git a/alodataset/base_dataset.py b/alodataset/base_dataset.py index 5be311f2..9974775a 100644 --- a/alodataset/base_dataset.py +++ b/alodataset/base_dataset.py @@ -56,7 +56,7 @@ def stream_loader(dataset, num_workers=2): return data_loader -def train_loader(dataset, batch_size=1, num_workers=2, sampler=torch.utils.data.RandomSampler): +def train_loader(dataset, batch_size=1, num_workers=2, sampler=torch.utils.data.RandomSampler, sampler_kwargs={}): """Get training loader from the dataset Parameters @@ -69,14 +69,15 @@ def train_loader(dataset, batch_size=1, num_workers=2, sampler=torch.utils.data. Number of workers, by default 2 sampler : torch.utils.data, optional Callback to sampler the dataset, by default torch.utils.data.RandomSampler + Or instance of any class inheriting from torch.utils.data.Sampler Returns ------- torch.utils.data.DataLoader A generator """ - sampler = sampler(dataset) if sampler is not None else None - + if sampler is not None and not(isinstance(sampler, torch.utils.data.Sampler)): + sampler = sampler(dataset, **sampler_kwargs) data_loader = torch.utils.data.DataLoader( dataset, # batch_sampler=batch_sampler, @@ -332,7 +333,7 @@ def stream_loader(self, num_workers=2): """ return stream_loader(self, num_workers=num_workers) - def train_loader(self, batch_size=1, num_workers=2, sampler=torch.utils.data.RandomSampler): + def train_loader(self, batch_size=1, num_workers=2, sampler=torch.utils.data.RandomSampler, sampler_kwargs={}): """Get training loader from the dataset Parameters @@ -351,7 +352,7 @@ def train_loader(self, batch_size=1, num_workers=2, sampler=torch.utils.data.Ran torch.utils.data.DataLoader A generator """ - return train_loader(self, batch_size=batch_size, num_workers=num_workers, sampler=sampler) + return train_loader(self, batch_size=batch_size, num_workers=num_workers, sampler=sampler, sampler_kwargs=sampler_kwargs ) def prepare(self): """Prepare the dataset. Not all child class need to implement this method. diff --git a/alodataset/coco_base_dataset.py b/alodataset/coco_base_dataset.py index e0e5a673..890777a4 100644 --- a/alodataset/coco_base_dataset.py +++ b/alodataset/coco_base_dataset.py @@ -6,13 +6,15 @@ import os import numpy as np import torch + +from alodataset import BaseDataset +from aloscene import BoundingBoxes2D, Frame, Labels, Mask from collections import defaultdict +from pathlib import Path from pycocotools import mask as coco_mask from pycocotools.coco import COCO from typing import Dict, Union -from alodataset import BaseDataset -from aloscene import BoundingBoxes2D, Frame, Labels, Mask class CocoBaseDataset(BaseDataset): @@ -78,10 +80,17 @@ def __init__( return else: assert img_folder is not None, "When sample = False, img_folder must be given." - assert ann_file is not None, "When sample = False, ann_file must be given." + assert ann_file is not None or "test" in img_folder, "When sample = False and the test split is not used, ann_file must be given." + # Create properties self.img_folder = os.path.join(self.dataset_dir, img_folder) + + if "test" in img_folder: + #get a list of indices that don't rely on the annotation file + self.items = [int(Path(os.path.join(self.img_folder, f)).stem) for f in os.listdir(self.img_folder) if os.path.isfile(os.path.join(self.img_folder, f))] + return + self.coco = COCO(os.path.join(self.dataset_dir, ann_file)) self.items = list(sorted(self.coco.imgs.keys())) @@ -231,7 +240,12 @@ def getitem(self, idx): return BaseDataset.__getitem__(self, idx) image_id = self.items[idx] + if "test" in self.img_folder: + #get the filename from image_id without relying on annotation file + return Frame(os.path.join(self.img_folder, f"{str(image_id).zfill(12)}.jpg")) + frame = Frame(os.path.join(self.img_folder, self.coco.loadImgs(image_id)[0]["file_name"])) + target = self.coco.loadAnns(self.coco.getAnnIds(image_id)) target = {"image_id": image_id, "annotations": target} _, target = self.prepare(frame, target) @@ -341,7 +355,12 @@ def __call__(self, image, target): if __name__ == "__main__": - coco_dataset = CocoBaseDataset(sample=True) + coco_dataset = CocoBaseDataset(sample=False, img_folder="test2017") + #checking if regular getitem works + frame = coco_dataset[0] + frame.get_view().render() + + #check if dataloader works for f, frames in enumerate(coco_dataset.train_loader(batch_size=2)): frames = Frame.batch_list(frames) frames.get_view().render() diff --git a/alodataset/crowd_human_dataset.py b/alodataset/crowd_human_dataset.py index e9857950..61feb330 100644 --- a/alodataset/crowd_human_dataset.py +++ b/alodataset/crowd_human_dataset.py @@ -36,7 +36,18 @@ def __init__( return else: assert img_folder is not None, "When sample = False, img_folder must be given." - assert ann_file is not None, "When sample = False, ann_file must be given." + assert ann_file is not None or "test" in img_folder, "When sample = False and the test split is not used, ann_file must be given." + + if "test" in img_folder: + self._img_folder = img_folder + self.img_folder = os.path.join(self.dataset_dir, img_folder, "images_test") + + self.items = [] + for f in os.listdir(self.img_folder): + if os.path.isfile(os.path.join(self.img_folder, f)): + self.items.append({"ID": Path(os.path.join(self.img_folder, f)).stem}) + + return assert type(img_folder) == type(ann_file), "img_folder & ann_file must be the same type." @@ -121,9 +132,14 @@ def getitem(self, idx): return BaseDataset.__getitem__(self, idx) record = self.items[idx] - ann_id = record["ann_id"] image_id = record["ID"] + if "test" in self.img_folder: + #get the filename from image_id without relying on annotation file + return Frame(os.path.join(self.img_folder, image_id + ".jpg")) + + ann_id = record["ann_id"] + image_path = os.path.join(self.img_folder[ann_id], image_id + ".jpg") frame = Frame(image_path) @@ -272,6 +288,9 @@ def prepare(self): if self.sample is not None and self.sample is not False: # Nothing to do. Samples are ready return + if "test" in self.img_folder: + return #The code for preparing test datasets exist but we are not doing that now + if self.dataset_dir.endswith("_prepared") and not os.path.exists(self.dataset_dir.replace("_prepared", "")): return @@ -294,7 +313,9 @@ def prepare(self): def main(): """Main""" - crowd_human_dataset = CrowdHumanDataset(img_folder="CrowdHuman_train", ann_file="annotation_train.odgt") + crowd_human_dataset = CrowdHumanDataset(img_folder="CrowdHuman_test") + stuff = crowd_human_dataset[0] + stuff.get_view().render() crowd_human_dataset.prepare() for i, frames in enumerate(crowd_human_dataset.train_loader(batch_size=2, sampler=None, num_workers=0)): diff --git a/alodataset/from_directory_dataset.py b/alodataset/from_directory_dataset.py new file mode 100644 index 00000000..ed12c605 --- /dev/null +++ b/alodataset/from_directory_dataset.py @@ -0,0 +1,153 @@ +import aloscene +from alodataset import BaseDataset + +import os +import cv2 +import glob +import numpy as np +from typing import List, Union, Dict + + +class FromDirectoryDataset(BaseDataset): + """Data iterator from directory + + Parameters + ---------- + dirs : List[str] + List of directories paths to load images from. Default None. + + slice : List[flaot] + Use for restrciting the number of samples. Default [0, 1]. + Example slice = [0.2, 0.4] will only load samples from 20%th to 40%th. + + Raises + ------ + Exception + All directories are empty. + + Exception + One of the directories does not exist. + + Exception + One of the paths is not a directory. + + Examples + -------- + >>> # from list of paths. + >>> path1 = "/PATH/TO/DATA/DIR1" + >>> path2 = "/PATH/TO/DATA/DIR2" + + >>> data = FromDirectoryDataset(dirs=[path1, path2]) + >>> for i in range(len(data)): + >>> print(data[i].shape) + + >>> # from dict of list of paths. + >>> path0 = "/PATH/TO/DATA/DIR0" + >>> path1 = "/PATH/TO/DATA/DIR1" + >>> path2 = "/PATH/TO/DATA/DIR2" + >>> path3 = "/PATH/TO/DATA/DIR3" + + >>> data = FromDirectoryDataset(dirs={"key1": [path0, path1], "key2": [path2, path3]]) + >>> for i in range(len(data)): + >>> print(data[i]["key1"].shape) + >>> print(data[i]["key2"].shape) + + """ + def __init__( + self, + dirs : Union[List[str], Dict] = None, + slice : list = [0, 1], + name : str = "from_dir", + **kwargs + ): + super().__init__( + name, + **kwargs + ) + assert not self.sample, "Can not sample this dataset" + assert dirs not in [None, [], {}], "List of directories not provided" + + assert len(slice) == 2, "Slice arg should be a list of 2 elements" + assert slice[0] < slice[1], "Element at index 1 should be greater than elemnt at index 0." + assert slice[0] >= 0 and slice[1] <=1, "Unvalid slice, values should be between 0 and 1" + + if isinstance(dirs , list): + self.items = self._extract_dir_path(dirs) + + elif isinstance(dirs, dict): + titems = [] + keys = list(dirs.keys()) + for key in keys: + p_list = dirs[key] + d_path = {key: self._extract_dir_path(p_list)} + titems.append(d_path) + # Hypotesis : all dirs have the same number of elements + for i in range(len(titems[0][keys[0]])): + sample = {} + for key in keys: + sample[key] = titems[keys.index(key)][key][i] + self.items.append(sample) + + # Slicing + length = len(self.items) + end = int(length * slice[1]) + start = int(length * slice[0]) + self.items = self.items[start:end] + + # Samples check. + if not len(self.items): + raise Exception("Empty dir, no .png .jpg files found") + + def _extract_dir_path(self, dirs): + items = [] + for dir in dirs: + if not os.path.exists(dir): + raise Exception(f"Directory not found: {dir}") + if not os.path.isdir(dir): + raise Exception(f"{dir} is not a directory") + + for dir in dirs: + gpath = os.path.join(dir, "*") + files = sorted(glob.glob(gpath)) + files = list(filter(self._filter_img_path, files)) + items += files + return items + + @staticmethod + def _filter_img_path(path): + ends = (".png", ".jpg") + return path.endswith(ends) + + @staticmethod + def _load_frame(path): + if path.endswith((".png", ".jpg")): + frame = cv2.imread(path) + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = np.moveaxis(frame, 2, 0) + frame = aloscene.Frame(frame, names=tuple("CHW")) + return frame + else: + raise Exception(f"Unknown extention for file {path}") + + def getitem(self, idx): + item = self.items[idx] + if isinstance(item, str): + frame = self._load_frame(item) + elif isinstance(item, dict): + frame = {k: self._load_frame(path) for k, path in item.items()} + else: + raise Exception("Unknown item format") + return frame + + def set_dataset_dir(self, dataset_dir: str): + """Override parent function, this class has no dir""" + pass + + +if __name__ == "__main__": + path1 = "amusement/amusement/Easy/P001/image_right" + path2 = "amusement/amusement/Easy/P001/image_left" + + data = FromDirectoryDataset(dirs={"right": [path1, path1], "left": [path2, path2]}, slice=[0.2, 0.3]) + for i in range(len(data)): + print(data[i]) diff --git a/alodataset/kitti_object.py b/alodataset/kitti_object.py index efd34392..5407731a 100644 --- a/alodataset/kitti_object.py +++ b/alodataset/kitti_object.py @@ -11,6 +11,26 @@ class KittiObjectDataset(BaseDataset, SplitMixin): + """ + Object Task from KITTI dataset. + Parameters + ---------- + name : str + Name of the dataset. + right_frame : bool + If True, load the right frame. + context_images : int + Number of image before the main frame to load (max 3). + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Get all the training samples: + >>> dataset = KittiObjectDataset(right_frame=True, context_images=3, split=Split.TRAIN) + >>> # Get the annotated image from the left camera from the testing set: + >>> dataset = KittiObjectDataset(right_frame=False, context_images=0, split=Split.TEST) + """ SPLIT_FOLDERS = {Split.TRAIN: "training", Split.TEST: "testing"} LABELS = ["Car", "Van", "Truck", "Pedestrian", "Person_sitting", "Cyclist", "Tram", "Misc", "DontCare"] diff --git a/alodataset/kitti_odometry.py b/alodataset/kitti_odometry.py index 3c709a30..9c528e76 100644 --- a/alodataset/kitti_odometry.py +++ b/alodataset/kitti_odometry.py @@ -10,6 +10,35 @@ class KittiOdometryDataset(BaseDataset, SplitMixin): + """ + Odometry Task from KITTI dataset. + Parameters + ---------- + name : str + Name of the dataset + sequences : str | List[str] | None + List of sequences to load. If None, all sequences are loaded. + grayscale : bool + If True, load grayscale images. Otherwise, load RGB images. + right_frame : bool + If True, load right frame images. + sequence_size : int + Number of images per item. + skip : int + Number of images to skip between each image in the sequence. + sequence_skip : int + Nunber of sequences to skip between each item. + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Load all training sequences with 3 images per item and 3 image from right camera + >>> dataset = KittiOdometryDataset(split=Split.TRAIN, sequence_size=3, right_frame=True) + >>> # Load all training sequences from sequence 00 and 03 with 5 images per item + >>> dataset = KittiOdometryDataset(split=Split.TRAIN, sequences=["00", "03"], sequence_size=5, right_frame=False) + """ + def __init__( self, name="kitti_odometry", diff --git a/alodataset/kitti_road.py b/alodataset/kitti_road.py index 7d296725..970873d0 100644 --- a/alodataset/kitti_road.py +++ b/alodataset/kitti_road.py @@ -9,7 +9,34 @@ from alodataset.utils.kitti import load_calib_cam_to_cam + class KittiRoadDataset(BaseDataset, SplitMixin): + """ + Road task from Kitti dataset. + Parameters + ---------- + name : str + Name of the dataset. + right_frame : bool + If True, load the right frame. + grayscale : bool + If True, load the grayscale images. + environement : str + Environement to load. Must be in ['um', 'umm', 'uu']. + (urban marked, urban unmarked multiple lanes, urban unmarked) + obj : str + Can be set to 'lane' if environement is 'um'. + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Load urban marked environement with lane object: + >>> dataset = KittiRoadDataset(environement="um", obj="lane") + >>> # Load urban unmarked environement with road object with grayscale images: + >>> dataset = KittiRoadDataset(environement="uu", obj="road", grayscale=True) + """ + SPLIT_FOLDERS = {Split.TRAIN: "training", Split.TEST: "testing"} def __init__(self, name="kitti_road", right_frame=False, grayscale=False, environement="um", obj="road", **kwargs): diff --git a/alodataset/kitti_semantic.py b/alodataset/kitti_semantic.py index f4cee566..ed3b15a3 100644 --- a/alodataset/kitti_semantic.py +++ b/alodataset/kitti_semantic.py @@ -5,11 +5,26 @@ from typing import Union from alodataset import BaseDataset, Split, SplitMixin -from aloscene import Frame, Mask, Labels, BoundingBoxes2D -from aloscene.camera_calib import CameraIntrinsic, CameraExtrinsic +from aloscene import Frame, Mask, Labels class KittiSemanticDataset(BaseDataset, SplitMixin): + """ + Semantic task from Kitti dataset. + Parameters + ---------- + name : str + Name of the dataset. + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Load kitti training semantic dataset + >>> dataset = KittiSemanticDataset(split=Split.TRAIN) + >>> # Load kitti testing semantic dataset + >>> dataset = KittiSemanticDataset(split=Split.TEST) + """ SPLIT_FOLDERS = {Split.TRAIN: "training", Split.TEST: "testing"} def __init__(self, name="kitti_semantic", **kwargs): diff --git a/alodataset/kitti_stereo_flow2012.py b/alodataset/kitti_stereo_flow2012.py index 49214546..5b839909 100644 --- a/alodataset/kitti_stereo_flow2012.py +++ b/alodataset/kitti_stereo_flow2012.py @@ -12,6 +12,40 @@ class KittiStereoFlow2012(BaseDataset, SplitMixin): + """ + Stereo, Flow tasks from Kitti 2012 dataset. + Parameters + ---------- + name : str + Name of the dataset + grayscale : bool + If True, load images in grayscale + sequence_start : int + 20 images are available for each item. Only image 10 is annotated. + sequence_start is the first image to load. + sequence_end : int + sequence_end is the last image to load. + load : list + List of data to load. Available data are: + - right: right image + - disp_noc: disparity map without occlusions + - disp_occ: disparity map with occlusions + - disp_refl_noc: disparity map without occlusions and reflections + - disp_refl_occ: disparity map with occlusions and reflections + - flow_noc: flow map without occlusions + - flow_occ: flow map with occlusions + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Load only annotated images with all available data + >>> kitti_stereo = KittiStereoFlow2012(sequence_start=10, sequence_end=10, split=Split.TRAIN) + >>> # Load only annotated images with no data except right image + >>> kitti_stereo = KittiStereoFlow2012(load=["right"], sequence_start=10, sequence_end=10, split=Split.TRAIN) + >>> # Load only all trainging images with all available data + >>> kitti_stereo = KittiStereoFlow2012(sequence_start=0, sequence_end=20, split=Split.TRAIN) + """ SPLIT_FOLDERS = {Split.TRAIN: "training", Split.TEST: "testing"} def __init__( diff --git a/alodataset/kitti_stereo_flow_sflow2015.py b/alodataset/kitti_stereo_flow_sflow2015.py index 0916aa0a..872a8474 100644 --- a/alodataset/kitti_stereo_flow_sflow2015.py +++ b/alodataset/kitti_stereo_flow_sflow2015.py @@ -12,6 +12,40 @@ class KittiStereoFlowSFlow2015(BaseDataset, SplitMixin): + """ + Stereo, Flow, SceneFlow Tasks from Kitti 2015 dataset. + Parameters + ---------- + name : str + Name of the dataset + sequence_start : int + 20 images are available for each item. Only image 10 and 11 are annotated. + sequence_start is the first image to load. + sequence_end : int + sequence_end is the last image to load. + grayscale : bool + If True, load images in grayscale. + load : List[str] + List of data to load. Available data are: + - right: right image + - disp_noc: disparity map without occlusions + - disp_occ: disparity map with occlusions + - flow_occ: flow map with occlusions + - flow_noc: flow map without occlusions + - scene_flow: scene flow map + - obj_map: object map + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Load dataset with only the 2 annotated images + >>> dataset = KittiStereoFlowSFlow2015(sequence_start=10, sequence_end=11) + >>> # Load dataset with 3 context images before the 2 annotated images + >>> dataset = KittiStereoFlowSFlow2015(sequence_start=7, sequence_end=11) + >>> # Load dataset with all the context images + >>> dataset = KittiStereoFlowSFlow2015(sequence_start=0, sequence_end=20) + """ SPLIT_FOLDERS = {Split.TRAIN: "training", Split.TEST: "testing"} def __init__( diff --git a/alodataset/kitti_tracking.py b/alodataset/kitti_tracking.py index 08f095b8..58e04a4c 100644 --- a/alodataset/kitti_tracking.py +++ b/alodataset/kitti_tracking.py @@ -11,6 +11,32 @@ class KittiTrackingDataset(BaseDataset, SplitMixin): + """ + Tracking task from KITTI dataset. + Parameters + ---------- + name : str + Name of the dataset + sequences : int | list[str] | None + List of sequences to load. If None, all sequences are loaded. + right_frame : bool + If True, load the right frame. + sequence_size : int + Number of frames in a sequence. + skip : int + Number of frames to skip between two frames in a sequence. + sequence_skip : int + Nunber of sequences to skip between each item. + split : Split + Split of the dataset. Can be `Split.TRAIN` or `Split.TEST`. + + Examples + -------- + >>> # Load the first sequence without the right frame + >>> dataset = KittiTrackingDataset(sequences=0, right_frame=False) + >>> # Load the sequences 0, 1 and 2 with 4 image per item + >>> dataset = KittiTrackingDataset(sequences=["0000", "0001", "0002"], sequence_size=4) + """ SPLIT_FOLDERS = {Split.TRAIN: "training", Split.TEST: "testing"} LABELS = ["Car", "Van", "Truck", "Pedestrian", "Person_sitting", "Cyclist", "Tram", "Misc", "DontCare"] diff --git a/alodataset/sintel_base_dataset.py b/alodataset/sintel_base_dataset.py index 7f159243..d8e0225d 100644 --- a/alodataset/sintel_base_dataset.py +++ b/alodataset/sintel_base_dataset.py @@ -2,13 +2,13 @@ import torch import os -from aloscene.io.disparity import load_disp_png +from alodataset import BaseDataset, SequenceMixin, Split, SplitMixin from aloscene import Frame, Flow, Mask, Disparity +from aloscene.io.disparity import load_disp_png from aloscene.utils.data_utils import DLtoLD -from alodataset import BaseDataset, SequenceMixin -class SintelBaseDataset(BaseDataset, SequenceMixin): +class SintelBaseDataset(BaseDataset, SplitMixin, SequenceMixin): """ Abstract Base Class for MPI Sintel datasets @@ -29,6 +29,8 @@ class SintelBaseDataset(BaseDataset, SequenceMixin): LABELS = NotImplemented PASSES = NotImplemented + SPLIT_FOLDERS = {Split.VAL: "validation", Split.TRAIN: "training", Split.TEST: "testing"} + SINTEL_SEQUENCES = [ "alley_1", "alley_2", @@ -67,9 +69,7 @@ def __init__(self, cameras=None, labels=None, passes=None, sintel_sequences=None self.sintel_sequences = sintel_sequences if sintel_sequences is not None else self.SINTEL_SEQUENCES if self.sample: return - self._assert_inputs() - self.items = self._get_sequences() def _assert_inputs(self): @@ -98,7 +98,7 @@ def _split_cameras(self, my_dict): def _get_folder(self, sintel_seq, feature_or_label): dset_dir = self.dataset_dir - return os.path.join(dset_dir, "training", feature_or_label, sintel_seq) + return os.path.join(dset_dir, self.get_split_folder(), feature_or_label, sintel_seq) @property def _left_img_dir(self, sintel_pass=None): diff --git a/alodataset/sintel_disparity_dataset.py b/alodataset/sintel_disparity_dataset.py index ada75044..1118a90f 100644 --- a/alodataset/sintel_disparity_dataset.py +++ b/alodataset/sintel_disparity_dataset.py @@ -105,7 +105,9 @@ def _get_camera_frames(self, sequence_data, camera): if __name__ == "__main__": - dataset = SintelDisparityDataset(sample=True) + dataset = SintelDisparityDataset(sample=False) + banana = dataset.getitem(0) + banana["right"].get_view().render() for f, frames in enumerate(dataset.train_loader(batch_size=2)): frames = Frame.batch_list(frames) diff --git a/alodataset/transforms.py b/alodataset/transforms.py index 03dd2af7..1734f2fc 100644 --- a/alodataset/transforms.py +++ b/alodataset/transforms.py @@ -14,7 +14,7 @@ class AloTransform(object): - def __init__(self, same_on_sequence: bool = True, same_on_frames: bool = False, p: float = 0.5): + def __init__(self, same_on_sequence: bool = True, same_on_frames: bool = False, p: float = 1.0): """Alo Transform. Each transform in the project should inhert from this class. @@ -561,24 +561,28 @@ def apply(self, frame: Frame): class Rotate(AloTransform): - def __init__(self, angle: float, *args, **kwargs): - """Rotate the given frame using the given rotation angle. + def __init__(self, angle: float, center=None, *args, **kwargs): + """Rotate the given frame using the given rotation angle around the given rotation center. Parameters ---------- angle: float, between 0 and 360 + center: list or tuple of coordinates. + Coordinates should be in absolute format (in range [0, W] and [0, H]). Default is the center of the frame. """ assert isinstance(angle, float) self.angle = angle + self.center = center super().__init__(*args, **kwargs) def sample_params(self): """Sample an `angle` from the list of possible `angles`""" - return (self.angle,) + return (self.angle, self.center) - def set_params(self, angle): + def set_params(self, angle, center): """Given predefined params, set the params on the class""" self.angle = angle + self.center = center def apply(self, frame: Frame): """Apply the transformation @@ -588,7 +592,7 @@ def apply(self, frame: Frame): frame: Frame Frame to apply the transformation on """ - frame = frame.rotate(self.angle) + frame = frame.rotate(self.angle, self.center) return frame @@ -768,7 +772,6 @@ def apply(self, frame: Frame): ------- n_frame: aloscene.Frame """ - n_frame = frame.norm01() frame_data = n_frame.data.as_tensor() diff --git a/alonet/callbacks/object_detector_callback.py b/alonet/callbacks/object_detector_callback.py index 27feb33f..589eb995 100644 --- a/alonet/callbacks/object_detector_callback.py +++ b/alonet/callbacks/object_detector_callback.py @@ -66,15 +66,7 @@ def log_boxes_2d(self, frames: list, preds_boxes: list, trainer: pl.trainer.trai else: target_boxes = frames.boxes2d[b] - frame = frames[b] - frame = frame.detach() - frame = frame.norm255() - frame = frame.cpu() - frame = frame.type(torch.uint8) - frame = frame.rename(None) - frame = frame.permute([1, 2, 0]) - frame = frame.contiguous() - frame = frame.numpy() + frame = frames[b].as_image() # wandb_img = wandb.Image(frame, boxes=boxes) # images.append(wandb_img) @@ -175,8 +167,7 @@ def log_masks(self, frames: list, pred_masks: list, trainer: pl.trainer.trainer. if id < len(labels_names) } - frame = frames[b].detach().norm255().cpu().type(torch.uint8).rename(None) - frame = frame.permute([1, 2, 0]).contiguous().numpy() + frame = frames[b].as_image() # Get panoptic view target_masks = target_masks.mask2id(return_cats=self.one_color_per_class) diff --git a/alonet/common/pl_helpers.py b/alonet/common/pl_helpers.py index dee86cb8..054108ba 100644 --- a/alonet/common/pl_helpers.py +++ b/alonet/common/pl_helpers.py @@ -10,13 +10,16 @@ parser = ArgumentParser() -def vb_folder(): +def vb_folder(create_if_not_found=False): home = os.getenv("HOME") alofolder = os.path.join(home, ".aloception") if not os.path.exists(alofolder): - raise Exception( - f"{alofolder} do not exist. Please, create the folder with the appropriate files. (Checkout documentation)" - ) + if create_if_not_found: + os.mkdir(alofolder) + else: + raise Exception( + f"{alofolder} do not exist. Please, create the folder with the appropriate files. (Checkout documentation)" + ) return alofolder @@ -251,6 +254,8 @@ def run_pl_training( if args.log is not None: if args.log == "wandb": logger = WandbLogger(name=expe_name, project=project, id=expe_name) + logger.log_hyperparams(args) + elif args.log == "tensorboard": logger = TensorBoardLogger(save_dir="tensorboard/", name=expe_name, sub_dir=expe_name) else: diff --git a/alonet/common/weights.py b/alonet/common/weights.py index fa88f60b..c115ea4b 100644 --- a/alonet/common/weights.py +++ b/alonet/common/weights.py @@ -1,6 +1,7 @@ import torch import requests import os +from alonet.common.pl_helpers import vb_folder, checkpoint_handler WEIGHT_NAME_TO_FILES = { "detr-r50": ["https://storage.googleapis.com/visualbehavior-publicweights/detr-r50/detr-r50.pth"], @@ -30,15 +31,16 @@ } -def vb_fodler(): - home = os.getenv("HOME") - alofolder = os.path.join(home, ".aloception") - if not os.path.exists(alofolder): - os.mkdir(alofolder) - return alofolder - - -def load_weights(model, weights, device, strict_load_weights=True): +def load_weights( + model, + weights=None, + run_id=None, + project_run_id=None, + checkpoint="best", + monitor="val_loss", + device=torch.device("cpu"), + strict_load_weights=True, + ): """Load and/or download weights from public cloud Parameters @@ -50,10 +52,19 @@ def load_weights(model, weights, device, strict_load_weights=True): device: torch.device Device to load the weights into """ - weights_dir = os.path.join(vb_fodler(), "weights") + assert run_id is not None or weights is not None, "run_id or weights must be set." - if not os.path.exists(weights_dir): - os.makedirs(weights_dir) + if weights is None: + if project_run_id is None: + Exception( + "project_run_id need to be set if we load model from run_id." + ) + run_id_project_dir = os.path.join(vb_folder(), f"project_{project_run_id}", run_id) + ckpt_path = checkpoint_handler(checkpoint, run_id_project_dir, monitor) + weights = os.path.join(run_id_project_dir, ckpt_path) + if not os.path.exists(weights): + raise Exception(f"Impossible to load the ckpt at the following destination:{weights}") + print(f"Loading ckpt from {run_id} at {weights}") if os.path.splitext(weights.lower())[1] == ".pth": checkpoint = torch.load(weights, map_location=device) @@ -67,7 +78,7 @@ def load_weights(model, weights, device, strict_load_weights=True): model.load_state_dict(checkpoint, strict=strict_load_weights) print(f"Weights loaded from {weights}") elif weights in WEIGHT_NAME_TO_FILES: - weights_dir = os.path.join(weights_dir, weights) + weights_dir = os.path.join(vb_folder(create_if_not_found=True), "weights", weights) if not os.path.exists(weights_dir): os.makedirs(weights_dir) for f in WEIGHT_NAME_TO_FILES[weights]: diff --git a/alonet/deformable_detr/README.md b/alonet/deformable_detr/README.md index ac224012..fcb9c432 100644 --- a/alonet/deformable_detr/README.md +++ b/alonet/deformable_detr/README.md @@ -61,5 +61,10 @@ Evaluation on 1000 images COCO with box refinement ## Exportation ```bash -python trt_exporter.py --refinement --HW 320 480 --verbose --ignore_adapt_graph +python trt_exporter.py --refinement --HW 320 480 --verbose +``` +or (for preprocessing included) + +```bash +python trt_exporter.py --refinement --HW 320 480 --verbose ``` \ No newline at end of file diff --git a/alonet/deformable_detr/deformable_detr.py b/alonet/deformable_detr/deformable_detr.py index 6f8b675b..ec5f235d 100644 --- a/alonet/deformable_detr/deformable_detr.py +++ b/alonet/deformable_detr/deformable_detr.py @@ -85,17 +85,20 @@ def __init__( return_intermediate_dec: bool = True, strict_load_weights: bool = True, tracing=False, + include_preprocessing=False, ): + print("WARNING : you are using DeformableDETR or an unherited class. Please launch aloception-oss/alonet/deformable_detr/ops/make.sh before proceeding with training. Please refer to the README for more info") super().__init__() self.device = device self.num_feature_levels = num_feature_levels - self.transformer = transformer + self.backbone = backbone self.num_queries = num_queries self.return_intermediate_dec = return_intermediate_dec self.hidden_dim = transformer.d_model self.return_dec_outputs = return_dec_outputs self.return_enc_outputs = return_enc_outputs self.return_bb_outputs = return_bb_outputs + self.include_preprocessing = include_preprocessing if activation_fn not in ["sigmoid", "softmax"]: raise Exception(f"activation_fn = {activation_fn} must be one of this two values: 'sigmoid' or 'softmax'.") @@ -104,9 +107,6 @@ def __init__( self.background_class = num_classes if self.activation_fn == "softmax" else None num_classes += 1 if self.activation_fn == "softmax" else 0 # Add bg class - self.class_embed = nn.Linear(self.hidden_dim, num_classes) - self.bbox_embed = MLP(self.hidden_dim, self.hidden_dim, 4, 3) - self.query_embed = nn.Embedding(num_queries, self.hidden_dim * 2) # Projection for Multi-scale features if num_feature_levels > 1: num_backbone_outs = len(backbone.strides) - 1 # Ignore backbone.layer1 @@ -137,8 +137,11 @@ def __init__( ) ] ) + self.query_embed = nn.Embedding(num_queries, self.hidden_dim * 2) + self.transformer = transformer + self.class_embed = nn.Linear(self.hidden_dim, num_classes) + self.bbox_embed = MLP(self.hidden_dim, self.hidden_dim, 4, 3) - self.backbone = backbone self.aux_loss = aux_loss self.with_box_refine = with_box_refine self.tracing = tracing @@ -191,6 +194,22 @@ def tracing(self): def tracing(self, is_tracing): self._tracing = is_tracing self.backbone.tracing = is_tracing + + @staticmethod + def in_img_preprocess(frames): + frames = frames.permute(0, 3, 1, 2) + frames = frames.div(255) + + n_shape = [1] * len(frames.shape) + n_shape[1] = 3 + + mean_std = ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) + std_tensor = torch.tensor(mean_std[1], device=frames.device).view(tuple(n_shape)) + mean_tensor = torch.tensor(mean_std[0], device=frames.device).view(tuple(n_shape)) + + frames = frames - mean_tensor + frames = frames / std_tensor + return frames @assert_and_export_onnx(check_mean_std=True, input_mean_std=INPUT_MEAN_STD) def forward(self, frames: aloscene.Frame, **kwargs): @@ -222,16 +241,22 @@ def forward(self, frames: aloscene.Frame, **kwargs): - :attr:`dec_outputs`: Optional, only returned when transformer decoder outputs are activated. """ - # ==== Backbone - features, pos = self.backbone(frames, **kwargs) - - assert next(self.parameters()).is_cuda, "DeformableDETR cannot run on CPU (due to MSdeformable op)" - if self.tracing: - frame_masks = frames[:, 3:4] + if self.include_preprocessing: + frame_masks = torch.zeros((1, 1, *frames.shape[1:3]), dtype=torch.float32) + frame_masks = frame_masks.to(frames.device) + frames = self.in_img_preprocess(frames) + else: + frame_masks = torch.zeros((1, 1, *frames.shape[-2:]), dtype=torch.float32) + frame_masks = frame_masks.to(frames.device) + frames = torch.cat([frames, frame_masks], dim=1) else: + assert next(self.parameters()).is_cuda, "DeformableDETR cannot run on CPU (due to MSdeformable op)" frame_masks = frames.mask.as_tensor() + # ==== Backbone + features, pos = self.backbone(frames, **kwargs) + # ==== Transformer srcs = [] masks = [] @@ -616,11 +641,15 @@ def build_decoder_layer( n_points=dec_n_points, ) - def build_decoder(self, dec_layers: int = 6, return_intermediate_dec: bool = True): + def build_decoder(self, dec_layers: int = 6, return_intermediate_dec: bool = True, hidden_dim: int = 256, num_feature_levels: int = 4): """Build decoder layer Parameters ---------- + hidden_dim : int, optional + Hidden dimension size, by default 256 + num_feature_levels : int, optional + Number of feature levels, by default 4 dec_layers : int, optional Number of decoder layers, by default 6 return_intermediate_dec : bool, optional @@ -631,7 +660,7 @@ def build_decoder(self, dec_layers: int = 6, return_intermediate_dec: bool = Tru :class:`~alonet.deformable.deformable_transformer.DeformableTransformerDecoder` Transformer decoder """ - decoder_layer = self.build_decoder_layer() + decoder_layer = self.build_decoder_layer(hidden_dim=hidden_dim, num_feature_levels=num_feature_levels) return DeformableTransformerDecoder(decoder_layer, dec_layers, return_intermediate_dec) @@ -678,7 +707,7 @@ def build_transformer( :mod:`Transformer