-
Notifications
You must be signed in to change notification settings - Fork 68
PTDT-3807: Add temporal audio annotation support #2013
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
e4fd630
dbcc7bf
dbb592f
ff298d4
16896fd
7a666cc
ac58ad0
67dd14a
a1600e5
b4d2f42
fadb14e
1e12596
c2a7b4c
26a35fd
b16f2ea
943cb73
a838513
0ca9cd6
7861537
6c3c50a
68773cf
58b30f7
0a63def
538ba66
9675c73
327800b
1174ad8
2361ca3
59f0cd8
b186359
e63b306
6b54e26
ccad765
735bb09
db3fb5e
b0d5ee4
1266338
66e4c44
471c618
478fb23
82e90e1
fb8df4a
f202586
1e424ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from .classification import Checklist, ClassificationAnswer, Radio, Text | ||
from .classification import Checklist, ClassificationAnswer, Radio, Text, FrameLocation |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,10 @@ | |
from .metrics import ScalarMetric, ConfusionMatrixMetric | ||
from .video import VideoClassificationAnnotation | ||
from .video import VideoObjectAnnotation, VideoMaskAnnotation | ||
from .temporal import ( | ||
TemporalClassificationText, | ||
TemporalClassificationQuestion, | ||
) | ||
from .mmc import MessageEvaluationTaskAnnotation | ||
from pydantic import BaseModel, field_validator | ||
|
||
|
@@ -44,6 +48,8 @@ class Label(BaseModel): | |
ClassificationAnnotation, | ||
ObjectAnnotation, | ||
VideoMaskAnnotation, | ||
TemporalClassificationText, | ||
TemporalClassificationQuestion, | ||
ScalarMetric, | ||
ConfusionMatrixMetric, | ||
RelationshipAnnotation, | ||
|
@@ -75,15 +81,43 @@ def _get_annotations_by_type(self, annotation_type): | |
|
||
def frame_annotations( | ||
self, | ||
) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]: | ||
) -> Dict[ | ||
int, | ||
Union[ | ||
VideoObjectAnnotation, | ||
VideoClassificationAnnotation, | ||
TemporalClassificationText, | ||
TemporalClassificationQuestion, | ||
], | ||
]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Mismatch Between Type Annotation and ImplementationThe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Incorrect Return Type Annotation for
|
||
"""Get temporal annotations organized by frame | ||
|
||
Returns: | ||
Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations | ||
|
||
Example: | ||
>>> label.frame_annotations() | ||
{2500: [VideoClassificationAnnotation(...), TemporalClassificationText(...)]} | ||
|
||
Note: | ||
For TemporalClassificationText/Question, returns dictionary mapping to start of first frame range. | ||
These annotations may have multiple discontinuous frame ranges. | ||
""" | ||
frame_dict = defaultdict(list) | ||
for annotation in self.annotations: | ||
if isinstance( | ||
annotation, | ||
(VideoObjectAnnotation, VideoClassificationAnnotation), | ||
): | ||
frame_dict[annotation.frame].append(annotation) | ||
return frame_dict | ||
elif isinstance(annotation, (TemporalClassificationText, TemporalClassificationQuestion)): | ||
# For temporal annotations with multiple values/answers, use first frame | ||
if isinstance(annotation, TemporalClassificationText) and annotation.value: | ||
frame_dict[annotation.value[0][0]].append(annotation) # value[0][0] is start_frame | ||
elif isinstance(annotation, TemporalClassificationQuestion) and annotation.value: | ||
if annotation.value[0].frames: | ||
frame_dict[annotation.value[0].frames[0][0]].append(annotation) # frames[0][0] is start_frame | ||
return dict(frame_dict) | ||
rishisurana-labelbox marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Audio Annotations Use Invalid KeysThe |
||
|
||
def add_url_to_masks(self, signer) -> "Label": | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
""" | ||
Temporal classification annotations for audio, video, and other time-based media. | ||
|
||
These classes provide a unified, recursive structure for temporal annotations with | ||
frame-level precision. All temporal classifications support nested hierarchies. | ||
""" | ||
|
||
from typing import List, Optional, Tuple, Union | ||
from pydantic import Field | ||
|
||
from labelbox.data.annotation_types.annotation import ClassificationAnnotation | ||
from labelbox.data.annotation_types.classification.classification import ( | ||
ClassificationAnswer, | ||
FrameLocation, | ||
) | ||
|
||
|
||
class TemporalClassificationAnswer(ClassificationAnswer): | ||
""" | ||
Temporal answer for Radio/Checklist questions with frame ranges. | ||
|
||
Represents a single answer option that can exist at multiple discontinuous | ||
time ranges and contain nested classifications. | ||
|
||
Args: | ||
name (str): Name of the answer option | ||
frames (List[Tuple[int, int]]): List of (start_frame, end_frame) ranges in milliseconds | ||
classifications (Optional[List[Union[TemporalClassificationText, TemporalClassificationQuestion]]]): | ||
Nested classifications within this answer | ||
feature_schema_id (Optional[str]): Feature schema identifier | ||
extra (dict): Additional metadata | ||
|
||
Example: | ||
>>> # Radio answer with nested classifications | ||
>>> answer = TemporalClassificationAnswer( | ||
>>> name="user", | ||
>>> frames=[(200, 1600)], | ||
>>> classifications=[ | ||
>>> TemporalClassificationQuestion( | ||
>>> name="tone", | ||
>>> answers=[ | ||
>>> TemporalClassificationAnswer( | ||
>>> name="professional", | ||
>>> frames=[(1000, 1600)] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
""" | ||
|
||
frames: List[Tuple[int, int]] = Field( | ||
default_factory=list, | ||
description="List of (start_frame, end_frame) tuples in milliseconds", | ||
) | ||
classifications: Optional[ | ||
List[Union["TemporalClassificationText", "TemporalClassificationQuestion"]] | ||
] = None | ||
|
||
|
||
class TemporalClassificationText(ClassificationAnnotation): | ||
""" | ||
Temporal text classification with multiple text values at different frame ranges. | ||
|
||
Allows multiple text annotations at different time segments, each with precise | ||
frame ranges. Supports recursive nesting of text and question classifications. | ||
|
||
Args: | ||
name (str): Name of the text classification | ||
values (List[Tuple[int, int, str]]): List of (start_frame, end_frame, text_value) tuples | ||
classifications (Optional[List[Union[TemporalClassificationText, TemporalClassificationQuestion]]]): | ||
Nested classifications | ||
feature_schema_id (Optional[str]): Feature schema identifier | ||
extra (dict): Additional metadata | ||
|
||
Example: | ||
>>> # Simple text with multiple temporal values | ||
>>> transcription = TemporalClassificationText( | ||
>>> name="transcription", | ||
>>> values=[ | ||
>>> (1600, 2000, "Hello, how can I help you?"), | ||
>>> (2500, 3000, "Thank you for calling!"), | ||
>>> ] | ||
>>> ) | ||
>>> | ||
>>> # Text with nested classifications | ||
>>> transcription_with_notes = TemporalClassificationText( | ||
>>> name="transcription", | ||
>>> values=[ | ||
>>> (1600, 2000, "Hello, how can I help you?"), | ||
>>> ], | ||
>>> classifications=[ | ||
>>> TemporalClassificationText( | ||
>>> name="speaker_notes", | ||
>>> values=[ | ||
>>> (1600, 2000, "Polite greeting"), | ||
>>> ] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
""" | ||
|
||
# Override parent's value field | ||
value: List[Tuple[int, int, str]] = Field( | ||
default_factory=list, | ||
description="List of (start_frame, end_frame, text_value) tuples", | ||
) | ||
classifications: Optional[ | ||
List[Union["TemporalClassificationText", "TemporalClassificationQuestion"]] | ||
] = None | ||
|
||
|
||
class TemporalClassificationQuestion(ClassificationAnnotation): | ||
""" | ||
Temporal Radio/Checklist question with multiple answer options. | ||
|
||
Represents a question with one or more answer options, each having their own | ||
frame ranges. Radio questions have a single answer, Checklist can have multiple. | ||
|
||
Args: | ||
name (str): Name of the question/classification | ||
answers (List[TemporalClassificationAnswer]): List of answer options with frame ranges | ||
feature_schema_id (Optional[str]): Feature schema identifier | ||
extra (dict): Additional metadata | ||
|
||
Note: | ||
- Radio: Single answer in the answers list | ||
- Checklist: Multiple answers in the answers list | ||
The serializer automatically handles the distinction based on the number of answers. | ||
|
||
Example: | ||
>>> # Radio question (single answer) | ||
>>> speaker = TemporalClassificationQuestion( | ||
>>> name="speaker", | ||
>>> answers=[ | ||
>>> TemporalClassificationAnswer( | ||
>>> name="user", | ||
>>> frames=[(200, 1600)] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
>>> | ||
>>> # Checklist question (multiple answers) | ||
>>> audio_quality = TemporalClassificationQuestion( | ||
>>> name="audio_quality", | ||
>>> answers=[ | ||
>>> TemporalClassificationAnswer( | ||
>>> name="background_noise", | ||
>>> frames=[(0, 1500), (2000, 3000)] | ||
>>> ), | ||
>>> TemporalClassificationAnswer( | ||
>>> name="echo", | ||
>>> frames=[(2200, 2900)] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
>>> | ||
>>> # Nested structure: Radio > Radio > Radio | ||
>>> speaker_with_tone = TemporalClassificationQuestion( | ||
>>> name="speaker", | ||
>>> answers=[ | ||
>>> TemporalClassificationAnswer( | ||
>>> name="user", | ||
>>> frames=[(200, 1600)], | ||
>>> classifications=[ | ||
>>> TemporalClassificationQuestion( | ||
>>> name="tone", | ||
>>> answers=[ | ||
>>> TemporalClassificationAnswer( | ||
>>> name="professional", | ||
>>> frames=[(1000, 1600)] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
>>> ] | ||
>>> ) | ||
""" | ||
|
||
# Override parent's value field | ||
value: List[TemporalClassificationAnswer] = Field( | ||
default_factory=list, | ||
description="List of temporal answer options", | ||
) | ||
classifications: Optional[ | ||
List[Union["TemporalClassificationText", "TemporalClassificationQuestion"]] | ||
] = None | ||
|
||
|
||
# Update forward references for recursive types | ||
TemporalClassificationAnswer.model_rebuild() | ||
TemporalClassificationText.model_rebuild() | ||
TemporalClassificationQuestion.model_rebuild() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: Missing List Wrapper in Type Annotation
The
Label.frame_annotations
method's return type annotation is missing aList
wrapper. Because the method usesdefaultdict(list)
and appends annotations, it actually returnsDict[int, List[Union[...]]]
, notDict[int, Union[...]]
as currently typed. The docstring correctly describes the list of annotations.