diff --git a/alonet/deformable_detr/deformable_detr.py b/alonet/deformable_detr/deformable_detr.py index 6bcd9554..76cc3f04 100644 --- a/alonet/deformable_detr/deformable_detr.py +++ b/alonet/deformable_detr/deformable_detr.py @@ -90,7 +90,7 @@ def __init__( super().__init__() self.device = device self.num_feature_levels = num_feature_levels - self.transformer = transformer + self.backbone = backbone self.num_queries = num_queries self.return_intermediate_dec = return_intermediate_dec self.hidden_dim = transformer.d_model @@ -105,9 +105,6 @@ def __init__( self.background_class = num_classes if self.activation_fn == "softmax" else None num_classes += 1 if self.activation_fn == "softmax" else 0 # Add bg class - self.class_embed = nn.Linear(self.hidden_dim, num_classes) - self.bbox_embed = MLP(self.hidden_dim, self.hidden_dim, 4, 3) - self.query_embed = nn.Embedding(num_queries, self.hidden_dim * 2) # Projection for Multi-scale features if num_feature_levels > 1: num_backbone_outs = len(backbone.strides) - 1 # Ignore backbone.layer1 @@ -138,8 +135,11 @@ def __init__( ) ] ) + self.query_embed = nn.Embedding(num_queries, self.hidden_dim * 2) + self.transformer = transformer + self.class_embed = nn.Linear(self.hidden_dim, num_classes) + self.bbox_embed = MLP(self.hidden_dim, self.hidden_dim, 4, 3) - self.backbone = backbone self.aux_loss = aux_loss self.with_box_refine = with_box_refine self.tracing = tracing @@ -617,11 +617,15 @@ def build_decoder_layer( n_points=dec_n_points, ) - def build_decoder(self, dec_layers: int = 6, return_intermediate_dec: bool = True): + def build_decoder(self, dec_layers: int = 6, return_intermediate_dec: bool = True, hidden_dim: int = 256, num_feature_levels: int = 4): """Build decoder layer Parameters ---------- + hidden_dim : int, optional + Hidden dimension size, by default 256 + num_feature_levels : int, optional + Number of feature levels, by default 4 dec_layers : int, optional Number of decoder layers, by default 6 return_intermediate_dec : bool, optional @@ -632,7 +636,7 @@ def build_decoder(self, dec_layers: int = 6, return_intermediate_dec: bool = Tru :class:`~alonet.deformable.deformable_transformer.DeformableTransformerDecoder` Transformer decoder """ - decoder_layer = self.build_decoder_layer() + decoder_layer = self.build_decoder_layer(hidden_dim=hidden_dim, num_feature_levels=num_feature_levels) return DeformableTransformerDecoder(decoder_layer, dec_layers, return_intermediate_dec) @@ -679,7 +683,7 @@ def build_transformer( :mod:`Transformer ` Transformer module """ - decoder = self.build_decoder() + decoder = self.build_decoder(hidden_dim=hidden_dim, num_feature_levels=num_feature_levels) return DeformableTransformer( decoder=decoder, diff --git a/alonet/detr/data_modules/coco_detection2detr.py b/alonet/detr/data_modules/coco_detection2detr.py index 08816ea5..30a34719 100644 --- a/alonet/detr/data_modules/coco_detection2detr.py +++ b/alonet/detr/data_modules/coco_detection2detr.py @@ -88,20 +88,18 @@ def setup(self, stage: Optional[str] = None): if __name__ == "__main__": # setup data loader_kwargs = dict( - name="rabbits", - train_folder="train", - train_ann="train/_annotations.coco.json", - val_folder="valid", - val_ann="valid/_annotations.coco.json", + name="coco", + train_folder="train2017", + train_ann="annotations/instances_train2017.json", + val_folder="val2017", + val_ann="annotations/instances_val2017.json", ) args = CocoDetection2Detr.add_argparse_args(ArgumentParser()).parse_args() # Help provider coco = CocoDetection2Detr(args, **loader_kwargs) coco.prepare_data() coco.setup() - - samples = next(iter(coco.train_dataloader())) - samples[0].get_view().render() - - samples = next(iter(coco.val_dataloader())) - samples[0].get_view().render() + iterator = iter(coco.train_dataloader()) + for i in range(2): + samples = next(iterator) + samples[0].get_view().render() diff --git a/alonet/detr/data_modules/data2detr.py b/alonet/detr/data_modules/data2detr.py index 878eefa8..5037db17 100644 --- a/alonet/detr/data_modules/data2detr.py +++ b/alonet/detr/data_modules/data2detr.py @@ -16,6 +16,7 @@ import alonet import aloscene +from torch.utils.data.sampler import RandomSampler, SequentialSampler class Data2Detr(pl.LightningDataModule): @@ -122,6 +123,9 @@ def add_argparse_args(parent_parser: ArgumentParser, parser: _ArgumentGroup = No parser.add_argument( "--sample", action="store_true", help="Download a sample for train/val process (Default: %(default)s)" ) + parser.add_argument( + "--sequential", action="store_true", help="Use sequential loading for train (Default: %(default)s)" + ) return parent_parser def train_transform(self, frame: aloscene.Frame, same_on_sequence: bool = True, same_on_frames: bool = False): @@ -217,7 +221,10 @@ def train_dataloader(self): torch.utils.data.DataLoader Dataloader for training process """ - return self.train_dataset.train_loader(batch_size=self.batch_size, num_workers=self.num_workers) + return self.train_dataset.train_loader( + batch_size=self.batch_size, + num_workers=self.num_workers, + sampler=SequentialSampler if self.sequential else RandomSampler) def val_dataloader(self, sampler: torch.utils.data = None): """Get val dataloader diff --git a/alonet/detr/detr.py b/alonet/detr/detr.py index 3b5c7171..cac6c5f5 100644 --- a/alonet/detr/detr.py +++ b/alonet/detr/detr.py @@ -74,7 +74,12 @@ def __init__( tracing: bool = False, ): super().__init__() + self.backbone = backbone self.num_queries = num_queries + hidden_dim = transformer.d_model + self.hidden_dim = hidden_dim + self.query_embed = nn.Embedding(num_queries, hidden_dim) + self.input_proj = nn.Conv2d(backbone.num_channels, hidden_dim, kernel_size=1) self.transformer = transformer self.num_decoder_layers = transformer.decoder.num_layers self.num_classes = num_classes @@ -87,15 +92,9 @@ def __init__( self.background_class = self.num_classes if background_class is None else background_class self.num_classes += 1 - hidden_dim = transformer.d_model - self.hidden_dim = hidden_dim - self.class_embed = self.build_class_embed() self.bbox_embed = self.build_bbox_embed() - self.query_embed = nn.Embedding(num_queries, hidden_dim) - self.input_proj = nn.Conv2d(backbone.num_channels, hidden_dim, kernel_size=1) - self.backbone = backbone self.aux_loss = aux_loss self.tracing = tracing diff --git a/aloscene/mask.py b/aloscene/mask.py index bc03066a..b5222600 100644 --- a/aloscene/mask.py +++ b/aloscene/mask.py @@ -234,3 +234,22 @@ def _get_set_children(self, labels_set: Union[str, None] = None): else: labels = [None] * len(self) return labels + + + def _spatial_shift(self, shift_y: float, shift_x: float, **kwargs): + """ + Spatially shift the Mask. + This function is empty for now because the 1st implementation created issues. + The only purpose of this is to stop triggering the warning message. + Parameters + ---------- + shift_y: float + Shift percentage on the y axis. Could be negative or positive + shift_x: float + Shift percentage on the x axis. Could ne negative or positive. + Returns + ------- + shifted_tensor: aloscene.AugmentedTensor + shifted tensor + """ + return self