diff --git a/alodataset/coco_panoptic_dataset.py b/alodataset/coco_panoptic_dataset.py index 7bfd2060..be48f297 100644 --- a/alodataset/coco_panoptic_dataset.py +++ b/alodataset/coco_panoptic_dataset.py @@ -9,6 +9,7 @@ import numpy as np import torch from PIL import Image +from typing import Union from alodataset.utils.panoptic_utils import rgb2id from alodataset.utils.panoptic_utils import masks_to_boxes @@ -70,7 +71,7 @@ def __init__( split=Split.TRAIN, return_masks: bool = True, classes: list = None, - fix_classes_len: int = 250, # Match with pre-trained weights + fix_classes_len: int = None, # Match with pre-trained weights **kwargs, ): super(CocoPanopticDataset, self).__init__(name=name, split=split, **kwargs) @@ -82,7 +83,7 @@ def __init__( self.ann_folder = os.path.join(self.dataset_dir, self.get_split_ann_folder()) self.ann_file = os.path.join(self.dataset_dir, self.get_split_ann_file()) self.return_masks = return_masks - self.label_names = None + self.label_names, self.label_types, self.label_types_names = None, None, None self.items = self._get_sequences() # Fix classes if it is desired @@ -110,6 +111,15 @@ def __init__( items.append(self.items[i]) self.items = items + # Fix label_types: If `classes` is desired, remove types that not include this classes and fix indices + if self.label_types is not None: + for ltype, vtype in self.label_types.items(): + vtype = [x for b, x in enumerate(vtype) if self._ids_renamed[b] != -1] + ltn = list(sorted(set([self.label_types_names[ltype][vt] for vt in vtype]))) + index = {b: ltn.index(p) for b, p in enumerate(self.label_types_names[ltype]) if p in ltn} + self.label_types[ltype] = [index[idx] for idx in vtype] + self.label_types_names[ltype] = ltn + # Fix number of label names if desired if fix_classes_len is not None: if fix_classes_len > len(self.label_names): @@ -146,9 +156,26 @@ def _get_sequences(self): if "categories" in coco: nb_category = max(cat["id"] for cat in coco["categories"]) self.label_names = ["N/A"] * (nb_category + 1) + + # Get types names + self.label_types_names = { + k: list(sorted(set([cat[k] for cat in coco["categories"]]))) + ["N/A"] + for k in coco["categories"][0].keys() + if k not in ["id", "name"] + } + + # Make index between type category id and label id + self.label_types = { + k: [len(self.label_types_names[k]) - 1] * (nb_category + 1) for k in self.label_types_names + } + if "isthing" in self.label_types_names: + self.label_types_names["isthing"] = ["stuff", "thing", "N/A"] for cat in coco["categories"]: self.label_names[cat["id"]] = cat["name"] - print("Done") + for k in self.label_types: + self.label_types[k][cat["id"]] = ( + cat[k] if k == "isthing" else self.label_types_names[k].index(cat[k]) + ) return items def get_split_ann_folder(self): @@ -173,6 +200,18 @@ def get_split_ann_file(self): assert self.split in self.SPLIT_ANN_FILES return self.SPLIT_ANN_FILES[self.split] + def _append_type_labels(self, element: Union[BoundingBoxes2D, Mask], labels): + if self.label_types is not None: + for ktype in self.label_types: + label_types = torch.as_tensor(self.label_types[ktype])[labels] + label_types = Labels( + label_types.to(torch.float32), + labels_names=self.label_types_names[ktype], + names=("N"), + encoding="id", + ) + element.append_labels(label_types, name=ktype) + def getitem(self, idx): """Get the :mod:`Frame ` corresponds to *idx* index @@ -213,28 +252,32 @@ def getitem(self, idx): # Make aloscene.frame frame = Frame(img_path) - labels_2d = Labels(labels.to(torch.float32), labels_names=self.label_names, names=("N"), encoding="id") boxes_2d = BoundingBoxes2D( - masks_to_boxes(masks), - boxes_format="xyxy", - absolute=True, - frame_size=frame.HW, - names=("N", None), - labels=labels_2d, + masks_to_boxes(masks), boxes_format="xyxy", absolute=True, frame_size=frame.HW, names=("N", None), ) + boxes_2d.append_labels(labels_2d, name="category") + self._append_type_labels(boxes_2d, labels) frame.append_boxes2d(boxes_2d) if self.return_masks: - masks_2d = Mask(masks, names=("N", "H", "W"), labels=labels_2d) + masks_2d = Mask(masks, names=("N", "H", "W")) + masks_2d.append_labels(labels_2d, name="category") + self._append_type_labels(masks_2d, labels) frame.append_segmentation(masks_2d) return frame if __name__ == "__main__": - coco_seg = CocoPanopticDataset(sample=True) + coco_seg = CocoPanopticDataset(sample=False) for f, frames in enumerate(coco_seg.train_loader(batch_size=2)): frames = Frame.batch_list(frames) - frames.get_view().render() + labels_set = "category" if isinstance(frames.boxes2d[0].labels, dict) else None + views = [fr.boxes2d.get_view(fr, labels_set=labels_set) for fr in frames] + if hasattr(frames, "segmentation"): + views += [fr.segmentation.get_view(fr, labels_set=labels_set) for fr in frames] + frames.get_view(views).render() + # frames.get_view(labels_set=labels_set).render() + if f > 1: break diff --git a/alodataset/utils/panoptic_utils.py b/alodataset/utils/panoptic_utils.py index e3d8dd62..e30be0f7 100644 --- a/alodataset/utils/panoptic_utils.py +++ b/alodataset/utils/panoptic_utils.py @@ -1,11 +1,17 @@ +from typing import Union import numpy as np import torch +# from alonet.metrics.compute_pq import VOID + +VOID_CLASS_ID = -1 GLOBAL_COLOR_SET = np.random.uniform(0, 1, (300, 3)) +GLOBAL_COLOR_SET[VOID_CLASS_ID] = [0, 0, 0] +OFFSET = 256 * 256 * 256 # Function get from PanopticAPI: https://github.com/cocodataset/panopticapi/blob/master/panopticapi/utils.py -def rgb2id(color): +def rgb2id(color: Union[list, np.ndarray]): if isinstance(color, np.ndarray) and len(color.shape) == 3: if color.dtype == np.uint8: color = color.astype(np.int32) @@ -14,9 +20,9 @@ def rgb2id(color): # Function get from PanopticAPI: https://github.com/cocodataset/panopticapi/blob/master/panopticapi/utils.py -def id2rgb(id_map, random_color=True): +def id2rgb(id_map: np.ndarray, random_color: bool = True): if random_color: - return (256 * GLOBAL_COLOR_SET[id_map]).astype(np.uint8) + return GLOBAL_COLOR_SET[id_map] if isinstance(id_map, np.ndarray): id_map_copy = id_map.copy() rgb_shape = tuple(list(id_map.shape) + [3]) @@ -24,10 +30,10 @@ def id2rgb(id_map, random_color=True): for i in range(3): rgb_map[..., i] = id_map_copy % 256 id_map_copy //= 256 - return rgb_map + return rgb_map / 255.0 color = [] for _ in range(3): - color.append(id_map % 256) + color.append((id_map % 256) / 255.0) id_map //= 256 return color diff --git a/alonet/callbacks/__init__.py b/alonet/callbacks/__init__.py index 90275da0..94d0b92e 100644 --- a/alonet/callbacks/__init__.py +++ b/alonet/callbacks/__init__.py @@ -1,3 +1,5 @@ from .object_detector_callback import ObjectDetectorCallback from .metrics_callback import MetricsCallback +from .base_metrics_callback import InstancesBaseMetricsCallback from .map_metrics_callback import ApMetricsCallback +from .pq_metrics_callback import PQMetricsCallback diff --git a/alonet/callbacks/base_metrics_callback.py b/alonet/callbacks/base_metrics_callback.py new file mode 100644 index 00000000..f66b718b --- /dev/null +++ b/alonet/callbacks/base_metrics_callback.py @@ -0,0 +1,154 @@ +import pytorch_lightning as pl +import aloscene +from alonet import metrics +from pytorch_lightning.utilities import rank_zero_only + +# import wandb + + +class InstancesBaseMetricsCallback(pl.Callback): + def __init__(self, base_metric: metrics, *args, **kwargs): + self.metrics = [] + self.base_metric = base_metric + super().__init__(*args, **kwargs) + + def inference(self, pl_module: pl.LightningModule, m_outputs: dict, **kwargs): + """This method will call the `infernece` method of the module's model and will expect to receive the + predicted boxes2D and/or Masks. + + Parameters + ---------- + pl_module : pl.LightningModule + Pytorch lighting module with inference function + m_outputs : dict + Forward outputs + + Returns + ------- + :mod:`~aloscene.bounding_boxes_2d`, :mod:`~aloscene.Mask` + Boxes and masks predicted from inference function + + Notes + ----- + If `m_outputs` does not contain "pred_masks" attribute, a [None]*B attribute will be returned by default + """ + b_pred_masks = None + if "pred_masks" in m_outputs: + b_pred_boxes, b_pred_masks = pl_module.inference(m_outputs, **kwargs) + else: + b_pred_boxes = pl_module.inference(m_outputs, **kwargs) + if not isinstance(m_outputs, list): + b_pred_boxes = [b_pred_boxes] + b_pred_masks = [b_pred_masks] + elif b_pred_masks is None: + b_pred_masks = [None] * len(b_pred_boxes) + return b_pred_boxes, b_pred_masks + + @rank_zero_only + def on_validation_batch_end( + self, + trainer: pl.Trainer, + pl_module: pl.LightningModule, + outputs: dict, + batch: list, + batch_idx: int, + dataloader_idx: int, + ): + """Method call after each validation batch. This class is a pytorch lightning callback, therefore + this method will by automaticly call by pl. + + This method will call the `infernece` method of the module's model and will expect to receive the + predicted boxes2D and/or Masks. Theses elements will be aggregate to compute the different metrics in the + `on_validation_end` method. + The infernece method will be call using the `m_outputs` key from the outputs dict. If `m_outputs` is a list, + then the list will be consider as an temporal list. Therefore, this callback will aggregate the prediction + for each element of the sequence and will log the final results with the timestep prefix val/t/ instead of + simply /val/ + + Parameters + ---------- + trainer: pl.Trainer + Pytorch lightning trainer + pl_module: pl.LightningModule + Pytorch lightning module. The "m_outputs" key is expected for this this callback to work properly. + outputs: + Training/Validation step outputs of the pl.LightningModule class. + batch: list + Batch comming from the dataloader. Usually, a list of frame. + batch_idx: int + Id the batch + dataloader_idx: int + Dataloader batch ID. + """ + if isinstance(batch, list): # Resize frames for mask procedure + batch = batch[0].batch_list(batch) + + b_pred_boxes, b_pred_masks = self.inference(pl_module, outputs["m_outputs"]) + is_temporal = isinstance(outputs["m_outputs"], list) + for b, (t_pred_boxes, t_pred_masks) in enumerate(zip(b_pred_boxes, b_pred_masks)): + + # Retrieve the matching GT boxes at the same time step + t_gt_boxes = batch[b].boxes2d + t_gt_masks = batch[b].segmentation + + if not is_temporal: + t_gt_boxes = [t_gt_boxes] + t_gt_masks = [t_gt_masks] + + if t_pred_masks is None: + t_pred_masks = [None] * len(t_gt_masks) + + # Add the samples to metrics for each batch of the current sequence + for t, (gt_boxes, pred_boxes, gt_masks, pred_masks) in enumerate( + zip(t_gt_boxes, t_pred_boxes, t_gt_masks, t_pred_masks) + ): + if t + 1 > len(self.metrics): + self.metrics.append(self.base_metric()) + self.add_sample(self.metrics[t], pred_boxes, gt_boxes, pred_masks, gt_masks) + + @rank_zero_only + def add_sample( + self, + base_metric: metrics, + pred_boxes: aloscene.BoundingBoxes2D, + gt_boxes: aloscene.BoundingBoxes2D, + pred_masks: aloscene.Mask = None, + gt_masks: aloscene.Mask = None, + ): + """Add a smaple to some `alonet.metrics` class. One might want to inhert this method + to edit the `pred_boxes` and `gt_boxes` boxes before to add them to the ApMetrics class. + + Parameters + ---------- + ap_metrics: Union[:mod:`~alonet.metrics.ApMetrics`, :mod:`~alonet.metrics.PQMetrics` + ApMetrics intance. + pred_boxes: :mod:`~aloscene.BoundingBoxes2D` + Predicted boxes2D. + gt_boxes: :mod:`~aloscene.BoundingBoxes2D` + GT boxes2d. + pred_masks: :mod:`~aloscene.Mask` + Predicted Masks for segmentation task + gt_masks: :mod:`~aloscene.Mask` + GT masks in segmentation task. + """ + base_metric.add_sample(p_bbox=pred_boxes, t_bbox=gt_boxes, p_mask=pred_masks, t_mask=gt_masks) + + @rank_zero_only + def on_validation_end(self, trainer, pl_module): + """Method call at the end of each validation epoch. The method will use all the aggregate + data over the epoch to log the final metrics on wandb. + This class is a pytorch lightning callback, therefore this method will by automaticly call by pl. + + This method is currently a WIP since some metrics are not logged due to some wandb error when loading + Table. + + Parameters + ---------- + trainer: pl.Trainer + Pytorch lightning trainer + pl_module: pl.LightningModule + Pytorch lightning module + """ + if trainer.logger is None: + return + raise Exception("To inhert in a child class") diff --git a/alonet/callbacks/map_metrics_callback.py b/alonet/callbacks/map_metrics_callback.py index 7252d999..b8f97c91 100644 --- a/alonet/callbacks/map_metrics_callback.py +++ b/alonet/callbacks/map_metrics_callback.py @@ -1,139 +1,23 @@ -import pytorch_lightning as pl -import aloscene -import alonet import matplotlib.pyplot as plt from pytorch_lightning.utilities import rank_zero_only from alonet.common.logger import log_figure, log_scalar -# import wandb +from alonet.metrics import ApMetrics +from alonet.callbacks import InstancesBaseMetricsCallback -class ApMetricsCallback(pl.Callback): +class ApMetricsCallback(InstancesBaseMetricsCallback): def __init__(self, *args, **kwargs): - self.ap_metrics = [] - super().__init__(*args, **kwargs) - - def inference(self, pl_module: pl.LightningModule, m_outputs: dict, **kwargs): - b_pred_masks = None - if "pred_masks" in m_outputs: - b_pred_boxes, b_pred_masks = pl_module.inference(m_outputs, **kwargs) - else: - b_pred_boxes = pl_module.inference(m_outputs, **kwargs) - if not isinstance(m_outputs, list): - b_pred_boxes = [b_pred_boxes] - b_pred_masks = [b_pred_masks] - return b_pred_boxes, b_pred_masks - - @rank_zero_only - def on_validation_batch_end( - self, - trainer: pl.Trainer, - pl_module: pl.LightningModule, - outputs: dict, - batch: list, - batch_idx: int, - dataloader_idx: int, - ): - """Method call after each validation batch. This class is a pytorch lightning callback, therefore - this method will by automaticly call by pl. - - This method will call the `infernece` method of the module's model and will expect to receive the - predicted boxes2D. Theses boxes will be aggregate to compute the AP metrics in the `on_validation_end` method. - The infernece method will be call using the `m_outputs` key from the outputs dict. If `m_outputs` is a list, - then the list will be consider as an temporal list. Therefore, this callback will aggregate the predicted boxes - for each element of the sequence and will log the final results with the timestep prefix val/t/ instead of - simply /val/ - - Parameters - ---------- - trainer: pl.Trainer - Pytorch lightning trainer - pl_module: pl.LightningModule - Pytorch lightning module. The "m_outputs" key is expected for this this callback to work properly. - outputs: - Training/Validation step outputs of the pl.LightningModule class. - batch: list - Batch comming from the dataloader. Usually, a list of frame. - batch_idx: int - Id the batch - dataloader_idx: int - Dataloader batch ID. - """ - if isinstance(batch, list): # Resize frames for mask procedure - batch = batch[0].batch_list(batch) - - b_pred_boxes, b_pred_masks = self.inference(pl_module, outputs["m_outputs"]) - is_temporal = isinstance(outputs["m_outputs"], list) - for b, (t_pred_boxes, t_pred_masks) in enumerate(zip(b_pred_boxes, b_pred_masks)): - - # Retrieve the matching GT boxes at the same time step - t_gt_boxes = batch[b].boxes2d - t_gt_masks = batch[b].segmentation - - if not is_temporal: - t_gt_boxes = [t_gt_boxes] - t_gt_masks = [t_gt_masks] - - if t_pred_masks is None: - t_pred_masks = [None] * len(t_gt_masks) - - # Add the samples to to the AP metrics for each batch of the current sequence - for t, (gt_boxes, pred_boxes, gt_masks, pred_masks) in enumerate( - zip(t_gt_boxes, t_pred_boxes, t_gt_masks, t_pred_masks) - ): - if t + 1 > len(self.ap_metrics): - self.ap_metrics.append(alonet.metrics.ApMetrics()) - self.add_sample(self.ap_metrics[t], pred_boxes, gt_boxes, pred_masks, gt_masks) - - @rank_zero_only - def add_sample( - self, - ap_metrics: alonet.metrics.ApMetrics, - pred_boxes: aloscene.BoundingBoxes2D, - gt_boxes: aloscene.BoundingBoxes2D, - pred_masks: aloscene.Mask = None, - gt_masks: aloscene.Mask = None, - ): - """Add a smple to some `alonet.metrics.ApMetrics()` class. One might want to inhert this method - to edit the `pred_boxes` and `gt_boxes` boxes before to add them to the ApMetrics class. - - Parameters - ---------- - ap_metrics: alonet.metrics.ApMetrics - ApMetrics intance. - pred_boxes: aloscene.BoundingBoxes2D - Predicted boxes2D. - gt_boxes: aloscene.BoundingBoxes2D - GT boxes2d. - pred_masks: aloscene.Mask - Predicted Masks for segmentation task - gt_masks: aloscene.Mask - GT masks in segmentation task. - """ - ap_metrics.add_sample(pred_boxes, gt_boxes, pred_masks, gt_masks) + super().__init__(*args, base_metric=ApMetrics, **kwargs) @rank_zero_only def on_validation_end(self, trainer, pl_module): - """Method call at the end of each validation epoch. The method will use all the aggregate - data over the epoch to log the final metrics on wandb. - This class is a pytorch lightning callback, therefore this method will by automaticly call by pl. - - This method is currently a WIP since some metrics are not logged due to some wandb error when loading - Table. - - Parameters - ---------- - trainer: pl.Trainer - Pytorch lightning trainer - pl_module: pl.LightningModule - Pytorch lightning module - """ if trainer.logger is None: return - for t, ap_metrics in enumerate(self.ap_metrics): + for t, ap_metrics in enumerate(self.metrics): - prefix = f"val/{t}/" if len(self.ap_metrics) > 1 else "val/" + prefix = f"val/{t}/" if len(self.metrics) > 1 else "val/" # step = trainer.global_step ( @@ -260,4 +144,4 @@ def on_validation_end(self, trainer, pl_module): log_scalar(trainer, f"{prefix}map_bbox", all_maps["box"]["all"]) log_scalar(trainer, f"{prefix}map_mask", all_maps["mask"]["all"]) - self.ap_metrics = [] + self.metrics = [] diff --git a/alonet/callbacks/object_detector_callback.py b/alonet/callbacks/object_detector_callback.py index 241f78d7..4341ba8a 100644 --- a/alonet/callbacks/object_detector_callback.py +++ b/alonet/callbacks/object_detector_callback.py @@ -1,15 +1,11 @@ import torch import pytorch_lightning as pl import wandb -from typing import * +from typing import Union import aloscene -import alodataset -import alonet from pytorch_lightning.utilities import rank_zero_only import numpy as np -import matplotlib.pyplot as plt -from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger from alonet.common.logger import log_image @@ -156,8 +152,8 @@ def log_masks(self, frames: list, pred_masks: list, trainer: pl.trainer.trainer. frame = frame.permute([1, 2, 0]).contiguous().numpy() # Get panoptic view - target_masks = target_masks.masks2panoptic() - p_mask = p_mask.masks2panoptic() + target_masks = target_masks.mask2id() + p_mask = p_mask.mask2id() target_masks[target_masks == -1] = len(labels_names) # Background N/A p_mask[p_mask == -1] = len(labels_names) # Background N/A target_masks = target_masks.astype(np.uint8) diff --git a/alonet/callbacks/pq_metrics_callback.py b/alonet/callbacks/pq_metrics_callback.py new file mode 100644 index 00000000..18f23a13 --- /dev/null +++ b/alonet/callbacks/pq_metrics_callback.py @@ -0,0 +1,42 @@ +import matplotlib.pyplot as plt +from pytorch_lightning.utilities import rank_zero_only +from alonet.common.logger import log_figure, log_scalar + +from alonet.metrics import PQMetrics +from alonet.callbacks import InstancesBaseMetricsCallback + + +class PQMetricsCallback(InstancesBaseMetricsCallback): + def __init__(self, *args, **kwargs): + super().__init__(*args, base_metric=PQMetrics, **kwargs) + + @rank_zero_only + def on_validation_end(self, trainer, pl_module): + if trainer.logger is None: + return + + for t, pq_metrics in enumerate(self.metrics): + + prefix = f"val/{t}/" if len(self.metrics) > 1 else "val/" + all_maps, all_maps_per_class = pq_metrics.calc_map(print_result=False) + + log_scalar(trainer, f"{prefix}PQ", all_maps["all"]["pq"]) + log_scalar(trainer, f"{prefix}SQ", all_maps["all"]["sq"]) + log_scalar(trainer, f"{prefix}RQ", all_maps["all"]["rq"]) + + # Bar per each PQ class + plt.style.use("ggplot") + for cat in ["thing", "stuff"] if len(all_maps_per_class) > 1 else ["all"]: + x_set, y_set = zip(*all_maps_per_class[cat].items()) + y_set = [y["pq"] for y in y_set] + + _, ax = plt.subplots() + ax.barh(x_set, y_set) + ax.set_xlabel("Panoptic Quality metric") + ax.set_ylabel("Category") + ax.set_title("PQ metric per category") + log_figure(trainer, f"{prefix}pq_{cat}_per_class", plt.gcf()) + plt.clf() + plt.cla() + + self.metrics = [] diff --git a/alonet/common/logger.py b/alonet/common/logger.py index 0a2d79c7..c8e608f6 100644 --- a/alonet/common/logger.py +++ b/alonet/common/logger.py @@ -119,7 +119,7 @@ def log_image(trainer, key, images): if masks is not None: for m in masks: - img = (id2rgb(m["masks"]) * 0.8 + image * 0.2).transpose(2, 0, 1).astype(np.uint8) + img = (256 * id2rgb(m["masks"]) * 0.8 + image * 0.2).transpose(2, 0, 1).astype(np.uint8) trainer.logger.experiment.add_image(f"{batch_el_key}_{m['name']}", img, trainer.global_step) if boxes is not None: image = aloscene.Frame(np.transpose(image, (2, 0, 1)), names=["C", "H", "W"]) diff --git a/alonet/detr/data_modules/coco_panoptic2detr.py b/alonet/detr/data_modules/coco_panoptic2detr.py index 4d041dfd..e3251cf0 100644 --- a/alonet/detr/data_modules/coco_panoptic2detr.py +++ b/alonet/detr/data_modules/coco_panoptic2detr.py @@ -12,17 +12,21 @@ class CocoPanoptic2Detr(Data2Detr): - def setup(self, stage: Optional[str] = None): + def setup(self, stage: Optional[str] = None, fix_classes_len: int = 250): if stage == "fit" or stage is None: # Setup train/val loaders self.train_dataset = alodataset.CocoPanopticDataset( transform_fn=self.val_transform if self.train_on_val else self.train_transform, sample=self.sample, split=alodataset.Split.VAL if self.train_on_val else alodataset.Split.TRAIN, + fix_classes_len=fix_classes_len, ) self.sample = self.train_dataset.sample or self.sample # Update sample if user prompt is given self.val_dataset = alodataset.CocoPanopticDataset( - transform_fn=self.val_transform, sample=self.sample, split=alodataset.Split.VAL, + transform_fn=self.val_transform, + sample=self.sample, + split=alodataset.Split.VAL, + fix_classes_len=fix_classes_len, ) self.sample = self.val_dataset.sample or self.sample # Update sample if user prompt is given self.label_names = self.val_dataset.label_names if hasattr(self.val_dataset, "label_names") else None diff --git a/alonet/detr/detr.py b/alonet/detr/detr.py index a91d0807..a79cc7fb 100644 --- a/alonet/detr/detr.py +++ b/alonet/detr/detr.py @@ -96,7 +96,7 @@ def __init__( if weights is not None: if weights == "detr-r50" or ".pth" in weights or ".ckpt" in weights: - alonet.common.load_weights(self, "detr-r50", device, strict_load_weights=strict_load_weights) + alonet.common.load_weights(self, weights, device, strict_load_weights=strict_load_weights) else: raise ValueError(f"Unknown weights: '{weights}'") @@ -329,9 +329,7 @@ def build_decoder_layer( ) def build_decoder( - self, - hidden_dim: int = 256, - num_decoder_layers: int = 6, + self, hidden_dim: int = 256, num_decoder_layers: int = 6, ): decoder_layer = self.build_decoder_layer() diff --git a/alonet/detr_panoptic/__init__.py b/alonet/detr_panoptic/__init__.py index 782ad5de..a56e9e41 100644 --- a/alonet/detr_panoptic/__init__.py +++ b/alonet/detr_panoptic/__init__.py @@ -1,4 +1,6 @@ from .detr_panoptic import PanopticHead +from .detr_r50_panoptic_finetune import DetrR50PanopticFinetune from .criterion import PanopticCriterion from .callbacks import PanopticObjectDetectorCallback +from .callbacks import PanopticApMetricsCallbacks from .train import LitPanopticDetr diff --git a/alonet/detr_panoptic/callbacks.py b/alonet/detr_panoptic/callbacks.py index 6eeb9168..91fbff3a 100644 --- a/alonet/detr_panoptic/callbacks.py +++ b/alonet/detr_panoptic/callbacks.py @@ -1,12 +1,21 @@ import aloscene +from typing import Union -from alonet.callbacks import ObjectDetectorCallback +from alonet.callbacks import ObjectDetectorCallback, ApMetricsCallback +from alonet import metrics from pytorch_lightning.utilities import rank_zero_only +from alonet.detr_panoptic.utils import get_base_model_frame class PanopticObjectDetectorCallback(ObjectDetectorCallback): """Panoptic Detr Callback for object detection training that use alonet.Frames as GT.""" + def __init__(self, val_frames: Union[list, aloscene.Frame]): + # Batch list of frame if needed + if isinstance(val_frames, list): + val_frames = aloscene.Frame.batch_list(val_frames) + super().__init__(val_frames=get_base_model_frame(val_frames)) + @rank_zero_only def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx): """ """ @@ -20,12 +29,8 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, data assert isinstance(outputs, dict) assert "m_outputs" in outputs - if isinstance(batch, list): - frames = aloscene.Frame.batch_list(batch) - else: - frames = batch - pred_boxes, pred_masks = pl_module.inference(outputs["m_outputs"]) + frames = get_base_model_frame(batch) self.log_boxes_2d(frames=frames, preds_boxes=pred_boxes, trainer=trainer, name="train/frame_obj_detector") self.log_masks(frames=frames, pred_masks=pred_masks, trainer=trainer, name="train/frame_seg_detector") @@ -44,3 +49,21 @@ def on_validation_epoch_end(self, trainer, pl_module): frames=self.val_frames, preds_boxes=pred_boxes, trainer=trainer, name="val/frame_obj_detector" ) self.log_masks(frames=self.val_frames, pred_masks=pred_masks, trainer=trainer, name="val/frame_seg_detector") + + +class PanopticApMetricsCallbacks(ApMetricsCallback): + def add_sample( + self, + base_metric: metrics, + pred_boxes: aloscene.BoundingBoxes2D, + gt_boxes: aloscene.BoundingBoxes2D, + pred_masks: aloscene.Mask = None, + gt_masks: aloscene.Mask = None, + ): + if isinstance(gt_boxes.labels, dict): + gt_boxes = gt_boxes.clone() + gt_boxes.labels = gt_boxes.labels["category"] + if isinstance(gt_masks.labels, dict): + gt_masks = gt_masks.clone() + gt_masks.labels = gt_masks.labels["category"] + return super().add_sample(base_metric, pred_boxes, gt_boxes, pred_masks=pred_masks, gt_masks=gt_masks) diff --git a/alonet/detr_panoptic/detr_r50_panoptic_finetune.py b/alonet/detr_panoptic/detr_r50_panoptic_finetune.py new file mode 100644 index 00000000..ed2a7b49 --- /dev/null +++ b/alonet/detr_panoptic/detr_r50_panoptic_finetune.py @@ -0,0 +1,71 @@ +"""Module to create a custom :mod:`PanopticHead ` model using +:mod:`DetrR50 ` as based model, which allows to upload a decided pretrained weights and +change the number of outputs in :attr:`class_embed` layer, in order to train custom classes. +""" + +from torch import nn +from argparse import Namespace +from alonet.detr_panoptic import PanopticHead +from alonet.detr import DetrR50Finetune +from alonet.common.weights import load_weights + + +class DetrR50PanopticFinetune(PanopticHead): + """Pre made helpfull class to finetune the :mod:`DetrR50 ` and use a pretrained + :mod:`PanopticHead `. + + Parameters + ---------- + num_classes : int + Number of classes in the :attr:`class_embed` output layer + background_class : int, optional + Background class, by default None + base_weights : str, optional + Load weights from original :mod:`DetrR50 ` + + :mod:`PanopticHead `, + by default "/home/johan/.aloception/weights/detr-r50-panoptic/detr-r50-panoptic.pth" + freeze_detr : bool, optional + Freeze :mod:`DetrR50 ` weights, by default False + weights : str, optional + Weights in finetune model, by default None + + Raises + ------ + ValueError + :attr:`weights` must be a '.pth' or '.ckpt' file + """ + + def __init__( + self, + num_classes: int, + background_class: int = None, + base_weights: str = "/home/johan/.aloception/weights/detr-r50-panoptic/detr-r50-panoptic.pth", + freeze_detr: bool = False, + weights: str = None, + *args: Namespace, + **kwargs: dict, + ): + """Init method""" + base_model = DetrR50Finetune(*args, background_class=background_class, num_classes=250, **kwargs) + super().__init__(*args, DETR_module=base_model, freeze_detr=freeze_detr, weights=base_weights, **kwargs) + + self.detr.num_classes = num_classes + # Replace the class_embed layer a new layer once the detr-r50 weight are loaded + # + 1 to include the background class. + self.detr.background_class = self.detr.num_classes if background_class is None else background_class + self.detr.num_classes = num_classes + 1 + self.detr.class_embed = nn.Linear(self.detr.hidden_dim, self.detr.num_classes) + self.detr.class_embed = self.detr.class_embed.to(self.device) + + # Load weights procedure + if weights is not None: + if ".pth" in weights or ".ckpt" in weights: + load_weights(self, weights, self.device) + else: + raise ValueError(f"Unknown weights: '{weights}'") + + +if __name__ == "__main__": + # Setup a new Detr Model with 2 class and the background class equal to 0. + # Additionally, we're gonna load the pretrained detr-r50 weights. + panoptic_finetune = DetrR50PanopticFinetune(num_classes=2) diff --git a/alonet/detr_panoptic/train.py b/alonet/detr_panoptic/train.py index 9b04449f..184ae1db 100644 --- a/alonet/detr_panoptic/train.py +++ b/alonet/detr_panoptic/train.py @@ -1,6 +1,5 @@ -from alonet.detr_panoptic.utils import get_mask_queries +from alonet.detr_panoptic.utils import get_mask_queries, get_base_model_frame import alonet -import aloscene class LitPanopticDetr(alonet.detr.LitDetr): @@ -47,25 +46,8 @@ def add_argparse_args(parent_parser, parser=None): return parent_parser def training_step(self, frames, batch_idx): - """Train the model for one step - - Parameters - ---------- - frames: list | aloscene.Frame - List of aloscene.Frame without batch dimension or a Frame with the batch dimension - batch_idx: int - Batch id given by Lightning - - Returns - ------- - outptus: dict - dict with the `loss` to optimize and the `metrics` to log. - """ - # Batch list of frame if needed - if isinstance(frames, list): - frames = aloscene.Frame.batch_list(frames) - - # Assert inputs content + # Get correct set of labels and assert inputs content + frames = get_base_model_frame(frames) self.assert_input(frames) get_filter_fn = lambda *args, **kwargs: get_mask_queries( *args, model=self.model.detr, matcher=self.matcher, **kwargs @@ -79,10 +61,15 @@ def training_step(self, frames, batch_idx): outputs.update({"m_outputs": m_outputs}) return outputs + def validation_step(self, frames, batch_idx): + # Get correct set of labels + frames = get_base_model_frame(frames) + return super().validation_step(frames, batch_idx) + def build_model(self, num_classes=250, aux_loss=True, weights=None): """Build model with default parameters""" if self.model_name == "detr-r50-panoptic": - detr_model = alonet.detr.DetrR50Finetune(num_classes=num_classes, aux_loss=aux_loss, background_class=250) + detr_model = alonet.detr.DetrR50(num_classes=num_classes, aux_loss=aux_loss, background_class=250) elif self.model_name == "deformable-detr-r50-panoptic": detr_model = alonet.deformable_detr.DeformableDetrR50Refinement( num_classes=num_classes, aux_loss=aux_loss, activation_fn="softmax", background_class=250, @@ -122,8 +109,9 @@ def callbacks(self, data_loader): val_frames=next(iter(data_loader.val_dataloader())) ) metrics_callback = alonet.callbacks.MetricsCallback() - ap_metrics_callback = alonet.callbacks.ApMetricsCallback() - return [obj_detection_callback, metrics_callback, ap_metrics_callback] + ap_metrics_callback = alonet.detr_panoptic.PanopticApMetricsCallbacks() + pq_metrics_callback = alonet.callbacks.PQMetricsCallback() + return [obj_detection_callback, metrics_callback, ap_metrics_callback, pq_metrics_callback] def run_train(self, data_loader, args, project="panoptic-detr", expe_name=None, callbacks: list = None): expe_name = expe_name or self.model_name diff --git a/alonet/detr_panoptic/utils.py b/alonet/detr_panoptic/utils.py index 69f1eedc..ffc5273d 100644 --- a/alonet/detr_panoptic/utils.py +++ b/alonet/detr_panoptic/utils.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict, List, Union import torch import aloscene @@ -7,13 +7,32 @@ def get_mask_queries( frames: aloscene.frame, m_outputs: Dict, model: torch.nn, matcher: torch.nn = None, filters: List = None, **kwargs ): + """Mask process filter throught matcher or our_filter function + Parameters + ---------- + frames : aloscene.frame + Input frames + m_outputs : Dict + Forward output + model : torch.nn + model with inference function + matcher : torch.nn, optional + Matcher between GT and pred elements, by default None + filters : List, optional + Boolean mask for each batch, by default None + + Returns + ------- + torch.Tensor, List + Mask reduced from (M,H,W) to (N,H,W) with boolean mask per batch (M >= N) + """ dec_outputs = m_outputs["dec_outputs"][-1] device = dec_outputs.device if filters is None: if matcher is None: if "threshold" not in kwargs: - kwargs.update({"threshold": 0.5}) + kwargs.update({"threshold": 0.85}) # Take from original paper filters = model.get_outs_filter(m_outputs=m_outputs, **kwargs) else: nq = dec_outputs.size(1) @@ -31,3 +50,31 @@ def get_mask_queries( for b, (idx, fs) in enumerate(zip(filters, fsizes)) ] return torch.cat(dec_outputs, dim=0), filters + + +def get_base_model_frame(frames: Union[list, aloscene.Frame], cat: str = "category") -> aloscene.Frame: + """Get frames with correct labels for criterion process + + Parameters + ---------- + frames : aloscene.Frame + frames to set labels + + Returns + ------- + aloscene.Frame + frames with correct set of labels + """ + if isinstance(frames, list): + frames = aloscene.Frame.batch_list(frames) + + frames = frames.clone() + + def criterion(b): + b.labels = b.labels[cat] + + if isinstance(frames.boxes2d[0].labels, dict): + frames.apply_on_label(frames.boxes2d, criterion) + if isinstance(frames.segmentation[0].labels, dict): + frames.apply_on_label(frames.segmentation, criterion) + return frames diff --git a/alonet/metrics/__init__.py b/alonet/metrics/__init__.py index 47800de2..cac374bc 100644 --- a/alonet/metrics/__init__.py +++ b/alonet/metrics/__init__.py @@ -1,2 +1,3 @@ from .compute_map import ApMetrics from .compute_map_3d import ApMetrics3D +from .compute_pq import PQMetrics diff --git a/alonet/metrics/compute_map_3d.py b/alonet/metrics/compute_map_3d.py index d430e5ae..2181ac1c 100644 --- a/alonet/metrics/compute_map_3d.py +++ b/alonet/metrics/compute_map_3d.py @@ -275,7 +275,13 @@ def _populate_ap_objects_all_range(self, ap_breakdowns, classes, gt_classes, iou else: ap_obj.push(score_func(i), False) - def add_sample(self, p_bbox: BoundingBoxes3D, t_bbox: BoundingBoxes3D): + def add_sample( + self, + p_bbox: BoundingBoxes3D, + t_bbox: BoundingBoxes3D, + p_mask: aloscene.Mask = None, + t_mask: aloscene.Mask = None, + ): assert isinstance(p_bbox, BoundingBoxes3D) assert isinstance(t_bbox, BoundingBoxes3D) assert isinstance(p_bbox.labels, aloscene.Labels) diff --git a/alonet/metrics/compute_pq.py b/alonet/metrics/compute_pq.py index ff6d6622..b939a2f0 100644 --- a/alonet/metrics/compute_pq.py +++ b/alonet/metrics/compute_pq.py @@ -7,9 +7,7 @@ from typing import Dict, Tuple import aloscene - -VOID = -1 -OFFSET = 256 * 256 * 256 +from alodataset.utils.panoptic_utils import VOID_CLASS_ID, OFFSET class PQStatCat(object): @@ -28,12 +26,14 @@ def __iadd__(self, pq_stat_cat): return self -class PQmetrics(object): +class PQMetrics(object): """Compute Panoptic, Segmentation and Recognition Qualities Metrics.""" def __init__(self): self.pq_per_cat = defaultdict(PQStatCat) self.class_names = None + self.isfull = False + self.categories = dict() def __getitem__(self, label_id: int): return self.pq_per_cat[label_id] @@ -43,7 +43,7 @@ def __iadd__(self, pq_stat): self.pq_per_cat[label] += pq_stat_cat return self - def init_data_objects(self, class_names: list): + def update_data_objects(self, cat_labels: aloscene.Labels, isthing_labels: aloscene.Labels): """Init data objects used to compute the PQ metrics given some `class_names` list Parameters @@ -51,9 +51,27 @@ def init_data_objects(self, class_names: list): class_names: list List of class_names to use to init the pq_stat_cat """ - self.class_names = class_names - self.categories = {id: {"category": cname, "isthing": True} for id, cname in enumerate(class_names)} # TODO - self.pq_per_cat.fromkeys(range(len(class_names))) + self.class_names = cat_labels.labels_names + if isthing_labels is not None: + try: + thing_id = isthing_labels.labels_names.index("thing") + except: + thing_id = len(isthing_labels.labels_names) + self.categories.update( + { + id: {"category": self.class_names[id], "isthing": it == thing_id} + for id, it in zip(list(cat_labels.numpy().astype(int)), list(isthing_labels.numpy().astype(int))) + } + ) + self.isfull = True + else: + self.categories.update( + { + id: {"category": self.class_names[id], "isthing": True} + for id in list(cat_labels.numpy().astype(int)) + } + ) + self.isfull = False def pq_average(self, isthing: bool = None, print_result: bool = False) -> Tuple[Dict, Dict]: """Calculate SQ, RQ and PQ metrics from the categories, and thing/stuff/all if desired @@ -77,7 +95,7 @@ def pq_average(self, isthing: bool = None, print_result: bool = False) -> Tuple[ per_class_results = {} for label, label_info in self.categories.items(): if isthing is not None: - cat_isthing = label_info["isthing"] == 1 + cat_isthing = label_info["isthing"] if isthing != cat_isthing: continue iou = self.pq_per_cat[label].iou @@ -85,23 +103,30 @@ def pq_average(self, isthing: bool = None, print_result: bool = False) -> Tuple[ fp = self.pq_per_cat[label].fp fn = self.pq_per_cat[label].fn if tp + fp + fn == 0: - per_class_results[label] = {"pq": 0.0, "sq": 0.0, "rq": 0.0} + per_class_results[label_info["category"]] = {"pq": 0.0, "sq": 0.0, "rq": 0.0} continue n += 1 pq_class = iou / (tp + 0.5 * fp + 0.5 * fn) sq_class = iou / tp if tp != 0 else 0 rq_class = tp / (tp + 0.5 * fp + 0.5 * fn) - per_class_results[label] = {"pq": pq_class, "sq": sq_class, "rq": rq_class} + per_class_results[label_info["category"]] = {"pq": pq_class, "sq": sq_class, "rq": rq_class} pq += pq_class sq += sq_class rq += rq_class result = {"pq": pq / n, "sq": sq / n, "rq": rq / n, "n": n} if print_result: - self.print_map(result, per_class_results) + suffix = "" + if isthing is not None and isthing: + suffix = "th" + elif isthing is not None and not isthing: + suffix = "st" + self.print_map(result, per_class_results, suffix=suffix) return result, per_class_results - def add_sample(self, p_mask: aloscene.Mask, t_mask: aloscene.Mask): + def add_sample( + self, p_mask: aloscene.Mask, t_mask: aloscene.Mask, **kwargs, + ): """Add a new prediction and target masks to PQ metrics estimation process Parameters @@ -116,21 +141,33 @@ def add_sample(self, p_mask: aloscene.Mask, t_mask: aloscene.Mask): Exception p_mask and t_mask must be an aloscene.Mask object, and must have labels attribute """ - assert isinstance(p_mask, aloscene.Mask) - assert isinstance(t_mask, aloscene.Mask) - assert isinstance(p_mask.labels, aloscene.Labels) - assert isinstance(t_mask.labels, aloscene.Labels) - assert isinstance(p_mask.labels.scores, torch.Tensor) - assert hasattr(t_mask.labels, "labels_names") + assert isinstance(p_mask, aloscene.Mask) and isinstance(t_mask, aloscene.Mask) + assert isinstance(p_mask.labels, aloscene.Labels) and isinstance(t_mask.labels, (dict, aloscene.Labels)) p_mask = p_mask.to(torch.device("cpu")) t_mask = t_mask.to(torch.device("cpu")) - if self.class_names is None: - self.class_names = t_mask.labels.labels_names - - pan_pred = p_mask.masks2panoptic() - pan_gt = t_mask.masks2panoptic() + label_set = None + if isinstance(t_mask.labels, aloscene.Labels): + assert hasattr(t_mask.labels, "labels_names") + self.update_data_objects(t_mask.labels, None) + else: + assert "category" in t_mask.labels and hasattr(t_mask.labels["category"], "labels_names") + if "isthing" in t_mask.labels: + assert hasattr(t_mask.labels["isthing"], "labels_names") + assert len(t_mask.labels["category"]) == len(t_mask.labels["isthing"]) + self.update_data_objects(t_mask.labels["category"], t_mask.labels["isthing"]) + label_set = "category" + else: + self.update_data_objects(t_mask.labels["category"], None) + + # Get positional ID by object + pan_pred = p_mask.mask2id(return_cats=False) - VOID_CLASS_ID + pred_lbl = p_mask.labels.numpy().astype("int") + pan_gt = t_mask.mask2id(labels_set=label_set, return_cats=False) - VOID_CLASS_ID + gt_lbl = t_mask.labels.numpy() if label_set is None else t_mask.labels[label_set].numpy() + gt_lbl = gt_lbl.astype("int") + VOID = 0 # VOID class in first position # ground truth segments area calculation gt_segms = {} @@ -138,8 +175,11 @@ def add_sample(self, p_mask: aloscene.Mask, t_mask: aloscene.Mask): for label, label_cnt in zip(labels, labels_cnt): if label == VOID: # Ignore pixels without category continue - assert label < len(self.class_names) - gt_segms[label] = label_cnt # Get area for each class + assert gt_lbl[label - 1] < len(self.class_names) + gt_segms[label] = { + "area": label_cnt, # Get area for each object + "cat_id": gt_lbl[label - 1], # Decode category class + } # predicted segments area calculation pred_segms = {} @@ -147,68 +187,121 @@ def add_sample(self, p_mask: aloscene.Mask, t_mask: aloscene.Mask): for label, label_cnt in zip(labels, labels_cnt): if label == VOID: # Ignore pixels without category continue - assert label < len(self.class_names) - pred_segms[label] = label_cnt # Get area for each class + assert pred_lbl[label - 1] < len(self.class_names) + pred_segms[label] = { + "area": label_cnt, # Get area for each object + "cat_id": pred_lbl[label - 1], # Decode category class + } - # confusion matrix calculation - aux_off = VOID if VOID < 0 else 0 - pan_gt = pan_gt - aux_off - pan_pred = pan_pred - aux_off - - pan_gt_pred = pan_gt.astype(np.uint64) * OFFSET + pan_pred.astype(np.uint64) + # confusion matrix calculation if not empty views gt_pred_map = {} - labels, labels_cnt = np.unique(pan_gt_pred, return_counts=True) - for label, intersection in zip(labels, labels_cnt): - gt_id = label // OFFSET + aux_off - pred_id = label % OFFSET + aux_off - gt_pred_map[(gt_id, pred_id)] = intersection + if len(gt_segms) > 0 and len(pred_segms) > 0: + pan_gt_pred = pan_gt.astype(np.uint64) * OFFSET + pan_pred.astype(np.uint64) + labels, labels_cnt = np.unique(pan_gt_pred, return_counts=True) + for label, intersection in zip(labels, labels_cnt): + gt_id = label // OFFSET + pred_id = label % OFFSET + gt_pred_map[(gt_id, pred_id)] = intersection # count all matched pairs - matched = set() + pred_matched, gt_matched = set(), set() for label_tuple, intersection in gt_pred_map.items(): gt_label, pred_label = label_tuple - if gt_label != pred_label: + if gt_label not in gt_segms: continue - - union = pred_segms[pred_label] + gt_segms[gt_label] - intersection - gt_pred_map.get((VOID, pred_label), 0) + if pred_label not in pred_segms: + continue + if gt_segms[gt_label]["cat_id"] != pred_segms[pred_label]["cat_id"]: + continue + union = ( + pred_segms[pred_label]["area"] + + gt_segms[gt_label]["area"] + - intersection + - gt_pred_map.get((VOID, pred_label), 0) + ) iou = intersection / union - if iou > 0.5: - self.pq_per_cat[gt_label].tp += 1 - self.pq_per_cat[gt_label].iou += iou - matched.add(pred_label) - - # count false positives - for gt_label in gt_segms: - if gt_label in matched: + if iou > 0.5: # Add matches from this IoU (take from original paper) + self.pq_per_cat[gt_segms[gt_label]["cat_id"]].tp += 1 + self.pq_per_cat[gt_segms[gt_label]["cat_id"]].iou += iou + gt_matched.add(gt_label) + pred_matched.add(pred_label) + + # count false negative + for gt_label, gt_info in gt_segms.items(): + if gt_label in gt_matched: continue - self.pq_per_cat[gt_label].fn += 1 + self.pq_per_cat[gt_info["cat_id"]].fn += 1 # count false positives - for pred_label, pred_area in pred_segms.items(): - if pred_label in matched: + for pred_label, pred_info in pred_segms.items(): + if pred_label in pred_matched: continue # intersection of the segment with VOID intersection = gt_pred_map.get((VOID, pred_label), 0) # predicted segment is ignored if more than half of the segment correspond to VOID regions - if intersection / pred_area > 0.5: + if intersection / pred_info["area"] > 0.5: continue - self.pq_per_cat[pred_label].fp += 1 + self.pq_per_cat[pred_info["cat_id"]].fp += 1 def calc_map(self, print_result: bool = False): - if print_result: - print("TOTAL PQ: ") - pq_total, _ = self.pq_average(None, print_result) - if print_result: - print("THINGS PQ: ") - pq_things, _ = self.pq_average(True, print_result) - if print_result: - print("TOTAL PQ: ") - pq_stuff, _ = self.pq_average(False, print_result) - return pq_total, pq_things, pq_stuff + all_maps = dict() + all_maps_per_class = dict() + if self.isfull: + keys, cats = ["stuff", "thing", "all"], [False, True, None] + else: + keys, cats = ["all"], [None] + for key, cat in zip(keys, cats): + if cat is not None or not self.isfull: + all_maps[key], all_maps_per_class[key] = self.pq_average(cat, print_result) + else: + all_maps[key], all_maps_per_class[key] = self.pq_average(cat) + + if print_result and self.isfull: + self.print_head() + self.print_body(all_maps["all"], {}) + + return all_maps, all_maps_per_class + + def print_map(self, average_pq: Dict, pq_per_class: Dict, suffix: str = ""): + self.print_head(suffix) + self.print_body(average_pq, pq_per_class) + + @staticmethod + def print_head(suffix: str = ""): + make_row = lambda vals: (" %5s |" * len(vals)) % tuple(vals) + make_sep = lambda n: ("-------+" * (n + 1)) + + print() + print(make_sep(5)) + print(" " * 23 + "|" + make_row([v + suffix for v in ["PQ", "SQ", "RQ"]])) + print(make_sep(5)) @staticmethod - def print_map(average_pq: Dict, pq_per_class: Dict): # TODO - print("AVERAGE PQ:") - print(average_pq) - print("PQ PER CLASS:") - print(pq_per_class) + def print_body(average_pq: Dict, pq_per_class: Dict): + make_row = lambda vals: (" %5s |" * len(vals)) % tuple(vals) + make_sep = lambda n: ("-------+" * (n + 1)) + + for cat, metrics in pq_per_class.items(): + print( + make_row( + [ + cat[:21] if len(cat) > 20 else cat + " " * (21 - len(cat)), + "%.3f" % metrics["pq"], + "%.3f" % metrics["sq"], + "%.3f" % metrics["rq"], + ] + ) + ) + print(make_sep(5)) + n = "%d" % average_pq["n"] + print( + make_row( + [ + "total = %s" % n + " " * (13 - len(n)), + "%.3f" % average_pq["pq"], + "%.3f" % average_pq["sq"], + "%.3f" % average_pq["rq"], + ] + ) + ) + print(make_sep(5)) diff --git a/aloscene/bounding_boxes_2d.py b/aloscene/bounding_boxes_2d.py index 9c8f6502..65fbab24 100644 --- a/aloscene/bounding_boxes_2d.py +++ b/aloscene/bounding_boxes_2d.py @@ -1,16 +1,14 @@ from __future__ import annotations import torch from torch import Tensor -import torchvision -from typing import * +from typing import Union import numpy as np import cv2 import aloscene from aloscene.renderer import View from aloscene.labels import Labels -import torchvision from torchvision.ops.boxes import nms @@ -119,13 +117,7 @@ def boxes2xyxy(tensor): if tensor.boxes_format == "xcyc": labels = tensor.drop_labels() # Convert from xcyc to xyxy - n_tensor = torch.cat( - [ - tensor[:, :2] - (tensor[:, 2:] / 2), - tensor[:, :2] + (tensor[:, 2:] / 2), - ], - dim=1, - ) + n_tensor = torch.cat([tensor[:, :2] - (tensor[:, 2:] / 2), tensor[:, :2] + (tensor[:, 2:] / 2)], dim=1,) n_tensor.boxes_format = "xyxy" n_tensor.set_labels(labels) return n_tensor @@ -135,13 +127,7 @@ def boxes2xyxy(tensor): labels = tensor.drop_labels() tensor.rename_(None) # Convert from yxyx to xyxy - n_tensor = torch.cat( - [ - tensor[:, :2].flip([1]), - tensor[:, 2:].flip([1]), - ], - dim=1, - ) + n_tensor = torch.cat([tensor[:, :2].flip([1]), tensor[:, 2:].flip([1])], dim=1,) tensor.reset_names() n_tensor.reset_names() n_tensor.boxes_format = "xyxy" @@ -181,13 +167,7 @@ def boxes2yxyx(tensor): labels = tensor.drop_labels() tensor.rename_(None) # Convert from xyxy to yxyx - yxyx_boxes = torch.cat( - [ - tensor[:, :2].flip([1]), - tensor[:, 2:].flip([1]), - ], - dim=1, - ) + yxyx_boxes = torch.cat([tensor[:, :2].flip([1]), tensor[:, 2:].flip([1])], dim=1,) yxyx_boxes.reset_names() tensor.reset_names() yxyx_boxes.boxes_format = "yxyx" @@ -413,12 +393,12 @@ def get_view(self, frame: Tensor = None, size: tuple = None, labels_set: str = N labels = boxes_abs.labels if isinstance(boxes_abs.labels, aloscene.Labels) else [None] * len(boxes_abs) if labels_set is not None and not isinstance(boxes_abs.labels, dict): raise Exception( - f"Trying to display a set of boxes labels ({labels_set}) while the boxes do not have multiple set of labels" + f"Trying to display a boxes labels set ({labels_set}) while boxes do not have multiple set of labels" ) elif labels_set is not None and isinstance(boxes_abs.labels, dict) and labels_set not in boxes_abs.labels: raise Exception( - f"Trying to display a set of boxes labels ({labels_set}) while the boxes no not have this set. Avaiable set (" - + [key for key in boxes_abs.labels] + f"Trying to display a boxes labels set ({labels_set}) while boxes do not have this set. Avaiable set (" + + f"{[key for key in boxes_abs.labels]}" + ") " ) elif labels_set is not None: @@ -587,7 +567,8 @@ def nms(self, scores: torch.Tensor, iou_threshold: float = 0.5): Returns ------- - int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores + int64 tensor + The indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ nms_boxes = self.xyxy() diff --git a/aloscene/mask.py b/aloscene/mask.py index b2409b31..7f401314 100644 --- a/aloscene/mask.py +++ b/aloscene/mask.py @@ -27,12 +27,9 @@ def __new__(cls, x, labels: Union[dict, Labels] = None, *args, **kwargs): x = load_mask(x) kwargs["names"] = ("N", "H", "W") tensor = super().__new__(cls, x, *args, **kwargs) - tensor.add_label("labels", labels, align_dim=["N"], mergeable=False) + tensor.add_label("labels", labels, align_dim=["N"], mergeable=True) return tensor - def __init__(self, x, *args, **kwargs): - super().__init__(x) - def append_labels(self, labels: Labels, name: str = None): """Attach a set of labels to the masks. @@ -46,86 +43,37 @@ def append_labels(self, labels: Labels, name: str = None): """ self._append_label("labels", labels, name) - GLOBAL_COLOR_SET = np.random.uniform(0, 1, (300, 3)) - - def __get_view__(self, title=None): - """Create a view of the frame""" - assert self.names[0] != "T" and self.names[1] != "B" - frame = self.cpu().rename(None).permute([1, 2, 0]).detach().contiguous().numpy() - - # Try to retrieve the associated label ID (if any) - labels = self.labels if isinstance(self.labels, aloscene.Labels) else [None] * len(self) - annotations = [] - if isinstance(self.labels, aloscene.Labels) and len(self) > 0: - assert self.labels.encoding == "id" - - frame = np.concatenate([np.zeros_like(frame[..., [0]]), frame], axis=-1) # Add background class as 0 - frame = np.argmax(frame, axis=-1).astype("int") # Get one mask by ID - - assert len(labels) == len(self) # Required to plot labels - for i, label in enumerate(labels): # Add ID in text and use same color by object ID - if label is not None: - # Change ID if labels are defined - label = int(label) - frame[frame == i + 1] = (label + 1) % len(self.GLOBAL_COLOR_SET) - - # Get mass center to put text in frame - feat = self[i].cpu().detach().contiguous().numpy() # Get i_mask - mass_y, mass_x = np.where(feat > 0.5) - x, y = np.average(mass_x), np.average(mass_y) - x = 0 if np.isnan(x) else x - y = 0 if np.isnan(y) else y - color = self.GLOBAL_COLOR_SET[(label + 1) % len(self.GLOBAL_COLOR_SET)] - color = (0, 0, 0) - text = str(label) if labels.labels_names is None else labels.labels_names[label] - annotations.append({"color": color, "x": int(x), "y": int(y), "text": text}) - - # Frame construction by segmentation masks - frame = self.GLOBAL_COLOR_SET[frame] - - # Add relative text in frame - for anno in annotations: - cv2.putText( - frame, - anno["text"], - (anno["x"], anno["y"]), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - anno["color"], - 1, - cv2.LINE_AA, - ) - return View(frame, title=title) + def iou_with(self, mask2) -> torch.Tensor: + """ IoU calculation between mask2 and itself - def masks2panoptic(self): - """Create a panoptic view of the frame, where each pixel represent one class + Parameters + ---------- + mask2 : aloscene.Mask + Masks with size (M,H,W) Returns ------- - np.array - Array of (H,W) dimensions, where each value represent one class + torch.Tensor + IoU matrix of size (N,M) """ - """""" - assert self.names[0] != "T" and self.names[1] != "B" - frame = self.cpu().rename(None).permute([1, 2, 0]).detach().contiguous().numpy() - - # Try to retrieve the associated label ID (if any) - labels = self.labels if isinstance(self.labels, aloscene.Labels) else [None] * len(self) - if isinstance(self.labels, aloscene.Labels) and len(self) > 0: - assert self.labels.encoding == "id" - - frame = np.concatenate([np.zeros_like(frame[..., [0]]), frame], axis=-1) # Add background class with ID=-1 - frame = np.argmax(frame, axis=-1).astype("int") - 1 # Get one mask by ID - - assert len(labels) == len(self) # Required to plot labels - for i, label in enumerate(labels): # Add ID in text and use same color by object ID - if label is not None: - # Change ID if labels are defined - label = int(label) - frame[frame == i] = label - return frame + if len(self) == 0 and len(mask2) == 0: + return torch.rand(0, 0) + elif len(self) == 0: + return torch.rand(0, len(mask2)) + elif len(mask2) == 0: + return torch.rand(len(self), 0) + mask1 = self.flatten(["H", "W"], "features").rename(None) # Binary mask (N, f=WxH) + mask2 = mask2.flatten(["H", "W"], "features").rename(None) # Binary mask (M, f=WxH) + assert mask1.shape[-1] == mask2.shape[-1] + intersection = mask1.matmul(mask2.transpose(0, 1)) # (N, M) + mask1, mask2 = mask1.sum(-1, keepdim=True), mask2.sum(-1, keepdim=True) + union = mask1.repeat(1, len(mask2)) + mask2.transpose(0, 1) # (N, M) + union[union == 0] = 0.001 # Avoid divide by 0 + return intersection / (union - intersection) - def get_view(self, frame: Tensor = None, size: tuple = None, labels_set: str = None, **kwargs): + def get_view( + self, frame: Tensor = None, size: tuple = None, labels_set: str = None, color_by_cat: bool = False, **kwargs + ): """Get view of segmentation mask and used it in a input Frame Parameters @@ -135,7 +83,9 @@ def get_view(self, frame: Tensor = None, size: tuple = None, labels_set: str = N size : tuple, optional Size of a desired masks, by default not-resize labels_set : str, optional - TODO set of labels to show in segmentation, by default all + Set of labels to show in segmentation when multiple labels are defined, by default None + color_by_cat : bool, optional + Set same color by category ID, by default False Returns ------- @@ -149,7 +99,7 @@ def get_view(self, frame: Tensor = None, size: tuple = None, labels_set: str = N """ from aloscene import Frame - if not isinstance(self.labels, aloscene.Labels): + if not (hasattr(self, "labels") and isinstance(self.labels, (aloscene.Labels, dict))): return super().get_view(size=size, frame=frame, **kwargs) if frame is not None: @@ -161,36 +111,108 @@ def get_view(self, frame: Tensor = None, size: tuple = None, labels_set: str = N frame = torch.zeros(3, *size) frame = Frame(frame, names=("C", "H", "W"), normalization="01") - masks = self.__get_view__(**kwargs).image + masks = self.__get_view__(labels_set=labels_set, color_by_cat=color_by_cat, **kwargs).image frame = frame.norm01().cpu().rename(None).permute([1, 2, 0]).detach().contiguous().numpy() frame = cv2.resize(frame, (self.shape[-1], self.shape[-2])) if masks.shape[-1] > 0: - frame = 0.4 * frame + 0.6 * masks + frame = 0.2 * frame + 0.8 * masks return View(frame, **kwargs) - def iou_with(self, mask2) -> torch.Tensor: - """ IoU calculation between mask2 and itself + def __get_view__(self, labels_set: str = None, title: str = None, color_by_cat: bool = False, **kwargs): + """Create a view of the frame""" + from alodataset.utils.panoptic_utils import id2rgb + + frame, annotations = self.mask2id(labels_set=labels_set, return_ann=True, return_cats=color_by_cat) + + # Frame construction by segmentation masks + if hasattr(self, "labels") and self.labels is not None and len(self) > 0: + frame = id2rgb(frame) + + # Add relative text in frame + for anno in annotations: + cv2.putText( + frame, + anno["text"], + (anno["x"], anno["y"]), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + anno["color"], + 1, + cv2.LINE_AA, + ) + return View(frame, title=title) + + def mask2id(self, labels_set: str = None, return_ann: bool = False, return_cats: bool = False): + """Create a panoptic view of the frame, where each pixel represent one class Parameters ---------- - mask2 : aloscene.Mask - Masks with size (M,H,W) + labels_set : str, optional + If multilabels are handled, get mask_id by a set of label desired, by default None + return_ann : bool, optional + Return annotations to get_view function, by default False + return_cats : bool, optional + Return categories ID instance ID, by default False. Returns ------- - torch.Tensor - IoU matrix of size (N,M) + np.array + Array of (H,W) dimensions, where each value represent one class """ - if len(self) == 0 and len(mask2) == 0: - return torch.rand(0, 0) - elif len(self) == 0: - return torch.rand(0, len(mask2)) - elif len(mask2) == 0: - return torch.rand(len(self), 0) - mask1 = self.flatten(["H", "W"], "features").rename(None) # (N, f=WxH) - mask2 = mask2.flatten(["H", "W"], "features").rename(None) # (M, f=WxH) - intersection = mask1.matmul(mask2.transpose(0, 1)) # (N, M) - mask1, mask2 = mask1.sum(-1, keepdim=True), mask2.sum(-1, keepdim=True) - union = mask1.repeat(1, len(mask2)) + mask2.transpose(0, 1) # (N, M) - union[union == 0] = 0.001 # Avoid divide by 0 - return intersection / (union - intersection) + from alodataset.utils.panoptic_utils import VOID_CLASS_ID + + assert self.names[0] != "T" and self.names[1] != "B" + frame = self.cpu().rename(None).permute([1, 2, 0]).detach().contiguous().numpy() + + # Try to retrieve the associated label ID (if any) + labels = self._get_set_labels(labels_set=labels_set) + annotations = [] + if hasattr(self, "labels") and self.labels is not None and len(labels) > 0: + assert len(labels) == len(self) # Required to make panoptic view + + frame = np.concatenate([np.zeros_like(frame[..., [0]]), frame], axis=-1) # Add BG class with ID=VOID + frame = np.argmax(frame, axis=-1).astype("int") + VOID_CLASS_ID # Get one mask by ID + copy_frame = frame.copy() + + for i, label in enumerate(labels): # Add ID in text and use same color by object ID + # Change ID if labels are defined + if label is not None: + label = int(label) + + if return_cats: + frame[copy_frame == (i + VOID_CLASS_ID + 1)] = label + + if return_ann: + feat = self[i].cpu().detach().contiguous().numpy() # Get i_mask + mass_y, mass_x = np.where(feat > 0.5) + x, y = np.average(mass_x), np.average(mass_y) + x = 0 if np.isnan(x) else x + y = 0 if np.isnan(y) else y + text = str(label) if labels.labels_names is None else labels.labels_names[label] + annotations.append({"color": (0, 0, 0), "x": int(x), "y": int(y), "text": text}) + if return_ann: + return frame, annotations + return frame + + def _get_set_labels(self, labels_set: str = None): + if not (labels_set is None or isinstance(self.labels, dict)): + raise Exception( + f"Trying to display a set of labels ({labels_set}) while masks do not have multiple set of labels" + ) + elif labels_set is not None and isinstance(self.labels, dict) and labels_set not in self.labels: + raise Exception( + f"Trying to display a set of labels ({labels_set}) while masks not have it. Available set: (" + + f"{[key for key in self.labels]}" + + ") " + ) + elif not hasattr(self, "labels"): + labels = [None] * len(self) + elif labels_set is not None and isinstance(self.labels, dict): + labels = self.labels[labels_set] + assert isinstance(labels, aloscene.Labels) and labels.encoding == "id" + elif isinstance(self.labels, aloscene.Labels): + labels = self.labels + assert labels.encoding == "id" + else: + labels = [None] * len(self) + return labels