From f7f06ffbb59f518631bbfa80e7375a6875a64fe2 Mon Sep 17 00:00:00 2001 From: aaprasad Date: Mon, 29 Apr 2024 13:50:33 -0700 Subject: [PATCH 1/6] implement fixed sinusoidal temporal embedding. update docstrings for `model_utils.get_boxes_time` to be more clear about the output --- biogtr/models/embedding.py | 52 ++++++++++++++++++++++++++++++------ biogtr/models/model_utils.py | 4 +-- tests/test_models.py | 23 ++++++++++------ 3 files changed, 61 insertions(+), 18 deletions(-) diff --git a/biogtr/models/embedding.py b/biogtr/models/embedding.py index 95a555dd..0adab350 100644 --- a/biogtr/models/embedding.py +++ b/biogtr/models/embedding.py @@ -83,7 +83,7 @@ def __init__( if self.emb_type == "pos": self._emb_func = self._sine_box_embedding elif self.emb_type == "temp": - pass # TODO Implement fixed sine temporal embedding + self._emb_func = self._sine_temp_embedding def _check_init_args(self, emb_type: str, mode: str): """Check whether the correct arguments were passed to initialization. @@ -108,8 +108,8 @@ def _check_init_args(self, emb_type: str, mode: str): f"Embedding `mode` must be one of {self.EMB_MODES} not {mode}" ) - if mode == "fixed" and emb_type == "temp": - raise NotImplementedError("TODO: Implement Fixed Sinusoidal Temp Embedding") + # if mode == "fixed" and emb_type == "temp": + # raise NotImplementedError("TODO: Implement Fixed Sinusoidal Temp Embedding") def forward(self, seq_positions: torch.Tensor) -> torch.Tensor: """Get the sequence positional embeddings. @@ -141,13 +141,17 @@ def _torch_int_div( def _sine_box_embedding(self, boxes: torch.Tensor) -> torch.Tensor: """Compute sine positional embeddings for boxes using given parameters. - Args: - boxes: the input boxes of shape N x 4 or B x N x 4 - where the last dimension is the bbox coords in [y1, x1, y2, x2]. - (Note currently `B=batch_size=1`). + Args: + boxes: the input boxes of shape N x 4 or B x N x 4 + where the last dimension is the bbox coords in [y1, x1, y2, x2]. + (Note currently `B=batch_size=1`). Returns: - torch.Tensor, the sine positional embeddings. + torch.Tensor, the sine positional embeddings + (embedding[:, 4i] = sin(x) + embedding[:, 4i+1] = cos(x) + embedding[4i+2] = sin(y) + embedding[4i+3) = cos(y) """ if self.scale is not None and self.normalize is False: raise ValueError("normalize should be True if scale is passed") @@ -176,6 +180,38 @@ def _sine_box_embedding(self, boxes: torch.Tensor) -> torch.Tensor: return pos_emb + def _sine_temp_embedding(self, times: torch.Tensor) -> torch.Tensor: + """Compute fixed sine temporal embeddings. + + Args: + times: the input times of shape (N,) or (N,1) where N = (sum(instances_per_frame)) + which is the frame index of the instance relative + to the batch size + (e.g. `torch.tensor([0, 0, ..., 0, 1, 1, ..., 1, 2, 2, ..., 2,..., B, B, ...B])`). + + Returns: + an n_instances x D embedding representing the temporal embedding. + """ + T = times.int().max().item() + 1 + d = self.features + n = self.temperature + + positions = torch.arange(0, T).unsqueeze(1) + temp_lookup = torch.zeros(T, d, device=times.device) + + denominators = torch.pow( + n, 2 * torch.arange(0, d // 2) / d + ) # 10000^(2i/d_model), i is the index of embedding + temp_lookup[:, 0::2] = torch.sin( + positions / denominators + ) # sin(pos/10000^(2i/d_model)) + temp_lookup[:, 1::2] = torch.cos( + positions / denominators + ) # cos(pos/10000^(2i/d_model)) + + temp_emb = temp_lookup[times.int()] + return temp_emb # .view(len(times), self.features) + def _learned_pos_embedding(self, boxes: torch.Tensor) -> torch.Tensor: """Compute learned positional embeddings for boxes using given parameters. diff --git a/biogtr/models/model_utils.py b/biogtr/models/model_utils.py index 34ec02fc..e695017c 100644 --- a/biogtr/models/model_utils.py +++ b/biogtr/models/model_utils.py @@ -14,8 +14,8 @@ def get_boxes_times(frames: List[Frame]) -> Tuple[torch.Tensor, torch.Tensor]: Returns: Tuple[torch.Tensor, torch.Tensor]: A tuple of two tensors containing the - bounding boxes and corresponding frame - indices, respectively. + bounding boxes normalized by the height and width of the image + and corresponding frame indices, respectively. """ boxes, times = [], [] _, h, w = frames[0].img_shape.flatten() diff --git a/tests/test_models.py b/tests/test_models.py index 0e8c3c29..ec8e4b3a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -94,9 +94,6 @@ def test_embedding_validity(): with pytest.raises(Exception): _ = Embedding(emb_type="temporal", mode="learn", features=128) - with pytest.raises(Exception): - _ = Embedding(emb_type="temp", mode="fixed", features=128) - _ = Embedding(emb_type="temp", mode="learned", features=128) _ = Embedding(emb_type="pos", mode="learned", features=128) @@ -185,21 +182,28 @@ def test_embedding_kwargs(): N = frames * objects - boxes = torch.rand(size=(N, 4)) - times = torch.rand(size=(N,)) + boxes = torch.rand(N, 2) * (1024 - 128) + boxes = torch.concat([boxes, boxes + 128], axis=-1) + print(boxes) + # times = torch.rand(size=(N,)) # sine embedding - sine_no_args = Embedding("pos", "fixed", 128)(boxes) - sine_args = { "temperature": objects, "scale": frames, "normalize": True, } + sine_no_args = Embedding("pos", "fixed", 128, temperature=10000) + sine_with_args = Embedding("pos", "fixed", 128, temperature=10) + + assert sine_no_args.temperature != sine_with_args.temperature - sine_with_args = Embedding("pos", "fixed", 128, **sine_args)(boxes) + sine_no_args = sine_no_args(boxes) + sine_with_args = sine_with_args(boxes) + print(sine_with_args) + print(sine_no_args) assert not torch.equal(sine_no_args, sine_with_args) # learned pos embedding @@ -336,6 +340,9 @@ def test_transformer_embedding(): return_embedding=True, ) + assert transformer.pos_emb.mode == "learned" + assert transformer.temp_emb.mode == "learned" + asso_preds, embedding = transformer(frames) assert asso_preds[0].size() == (num_detected * num_frames,) * 2 From 2d4b1edd277dd20940d8a028b5f42828c8fc1edd Mon Sep 17 00:00:00 2001 From: aaprasad Date: Mon, 29 Apr 2024 13:52:55 -0700 Subject: [PATCH 2/6] store poses as node, array dictionaries instead of as arrays --- biogtr/data_structures.py | 2 +- biogtr/datasets/sleap_dataset.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/biogtr/data_structures.py b/biogtr/data_structures.py index a3190dd3..b79f98a8 100644 --- a/biogtr/data_structures.py +++ b/biogtr/data_structures.py @@ -21,7 +21,7 @@ def __init__( point_scores: ArrayLike = None, instance_score: float = -1.0, skeleton: sio.Skeleton = None, - pose: dict[str, ArrayLike] = np.array([]), + pose: dict[str, ArrayLike] = {}, device: str = None, ): """Initialize Instance. diff --git a/biogtr/datasets/sleap_dataset.py b/biogtr/datasets/sleap_dataset.py index 73ef5be0..4ecaec90 100644 --- a/biogtr/datasets/sleap_dataset.py +++ b/biogtr/datasets/sleap_dataset.py @@ -290,7 +290,7 @@ def get_instances(self, label_idx: List[int], frame_idx: List[int]) -> list[dict crop=crop, bbox=bbox, skeleton=skeleton, - pose=np.array(list(poses[j].values())), + pose=poses[j], point_scores=point_scores[j], instance_score=instance_score[j], ) From 3f21a6eefbd66aedb65de55d0b8bf1324c1ced39 Mon Sep 17 00:00:00 2001 From: aaprasad Date: Mon, 29 Apr 2024 14:06:45 -0700 Subject: [PATCH 3/6] use kwargs variable instead of hardcoded override --- tests/test_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index ec8e4b3a..47b5b84f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -194,8 +194,8 @@ def test_embedding_kwargs(): "scale": frames, "normalize": True, } - sine_no_args = Embedding("pos", "fixed", 128, temperature=10000) - sine_with_args = Embedding("pos", "fixed", 128, temperature=10) + sine_no_args = Embedding("pos", "fixed", 128) + sine_with_args = Embedding("pos", "fixed", 128, **sine_args) assert sine_no_args.temperature != sine_with_args.temperature From ec3639fc19838ee46e9689f9114a6b8c0f6d3f12 Mon Sep 17 00:00:00 2001 From: aaprasad Date: Tue, 30 Apr 2024 10:57:29 -0700 Subject: [PATCH 4/6] add changes requested by @talmo's code review * remove unnecessary comments/print statements * use `None` instead of empty mutables. --- biogtr/data_structures.py | 2 +- tests/test_models.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/biogtr/data_structures.py b/biogtr/data_structures.py index b79f98a8..a6d50803 100644 --- a/biogtr/data_structures.py +++ b/biogtr/data_structures.py @@ -21,7 +21,7 @@ def __init__( point_scores: ArrayLike = None, instance_score: float = -1.0, skeleton: sio.Skeleton = None, - pose: dict[str, ArrayLike] = {}, + pose: dict[str, ArrayLike] = None, device: str = None, ): """Initialize Instance. diff --git a/tests/test_models.py b/tests/test_models.py index 47b5b84f..4eb61768 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -183,9 +183,7 @@ def test_embedding_kwargs(): N = frames * objects boxes = torch.rand(N, 2) * (1024 - 128) - boxes = torch.concat([boxes, boxes + 128], axis=-1) - print(boxes) - # times = torch.rand(size=(N,)) + boxes = torch.concat([boxes / 1024, (boxes + 128) / 1024], axis=-1) # sine embedding @@ -202,8 +200,6 @@ def test_embedding_kwargs(): sine_no_args = sine_no_args(boxes) sine_with_args = sine_with_args(boxes) - print(sine_with_args) - print(sine_no_args) assert not torch.equal(sine_no_args, sine_with_args) # learned pos embedding From bf3ff18ffcce5f4bc6a0b33d18cfb78f21599a33 Mon Sep 17 00:00:00 2001 From: aaprasad Date: Tue, 30 Apr 2024 10:58:27 -0700 Subject: [PATCH 5/6] add changes requested by @talmo's code review * remove unnecessary comment --- biogtr/models/embedding.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/biogtr/models/embedding.py b/biogtr/models/embedding.py index 0adab350..4629f73b 100644 --- a/biogtr/models/embedding.py +++ b/biogtr/models/embedding.py @@ -108,9 +108,6 @@ def _check_init_args(self, emb_type: str, mode: str): f"Embedding `mode` must be one of {self.EMB_MODES} not {mode}" ) - # if mode == "fixed" and emb_type == "temp": - # raise NotImplementedError("TODO: Implement Fixed Sinusoidal Temp Embedding") - def forward(self, seq_positions: torch.Tensor) -> torch.Tensor: """Get the sequence positional embeddings. From e3f577b547a84d8797f6b58de307287169e69c5b Mon Sep 17 00:00:00 2001 From: aaprasad Date: Tue, 30 Apr 2024 11:04:20 -0700 Subject: [PATCH 6/6] fix documentation typo --- biogtr/models/embedding.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/biogtr/models/embedding.py b/biogtr/models/embedding.py index 4629f73b..f0f8fa36 100644 --- a/biogtr/models/embedding.py +++ b/biogtr/models/embedding.py @@ -147,8 +147,9 @@ def _sine_box_embedding(self, boxes: torch.Tensor) -> torch.Tensor: torch.Tensor, the sine positional embeddings (embedding[:, 4i] = sin(x) embedding[:, 4i+1] = cos(x) - embedding[4i+2] = sin(y) - embedding[4i+3) = cos(y) + embedding[:, 4i+2] = sin(y) + embedding[:, 4i+3] = cos(y) + ) """ if self.scale is not None and self.normalize is False: raise ValueError("normalize should be True if scale is passed")