Skip to content

Commit b186359

Browse files
chore: update temporal logic to be 1:1 with v3 script
1 parent 59f0cd8 commit b186359

File tree

1 file changed

+105
-18
lines changed
  • libs/labelbox/src/labelbox/data/serialization/ndjson

1 file changed

+105
-18
lines changed

libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -183,76 +183,163 @@ def _create_answer_entry(self, first_ann: TemporalAnnotation, frames: List[Dict[
183183

184184
class HierarchyBuilder(Generic[TemporalAnnotation]):
185185
"""Builds hierarchical nested classifications from temporal annotations."""
186-
186+
187187
def __init__(self, group_manager: AnnotationGroupManager[TemporalAnnotation], value_grouper: ValueGrouper[TemporalAnnotation]):
188188
self.group_manager = group_manager
189189
self.value_grouper = value_grouper
190-
190+
self.parent_assignments = self._compute_parent_assignments()
191+
192+
def _compute_parent_assignments(self) -> Dict[str, str]:
193+
"""
194+
Compute best parent assignment for each group based on temporal containment and hierarchy depth.
195+
Returns mapping of child_group_key -> parent_group_key.
196+
"""
197+
assignments = {}
198+
assignment_depth = {} # Track depth of each assignment (0 = root)
199+
200+
# Assign depth 0 to roots
201+
for root_key in self.group_manager.root_groups:
202+
assignment_depth[root_key] = 0
203+
204+
# Build assignments level by level
205+
remaining_groups = set(self.group_manager.groups.keys()) - self.group_manager.root_groups
206+
207+
max_iterations = len(remaining_groups) + 1 # Prevent infinite loops
208+
iteration = 0
209+
210+
while remaining_groups and iteration < max_iterations:
211+
iteration += 1
212+
assigned_this_round = set()
213+
214+
for child_key in remaining_groups:
215+
child_anns = self.group_manager.groups[child_key]
216+
217+
# Find all potential parents (groups that contain this child's annotations)
218+
potential_parents = []
219+
220+
for parent_key, parent_anns in self.group_manager.groups.items():
221+
if parent_key == child_key:
222+
continue
223+
224+
# Check if all child annotations are contained by at least one parent annotation
225+
all_contained = True
226+
for child_ann in child_anns:
227+
child_start, child_end = self.group_manager.frame_extractor(child_ann)
228+
child_frame = TemporalFrame(child_start, child_end)
229+
230+
contained_by_parent = False
231+
for parent_ann in parent_anns:
232+
parent_start, parent_end = self.group_manager.frame_extractor(parent_ann)
233+
parent_frame = TemporalFrame(parent_start, parent_end)
234+
if parent_frame.contains(child_frame):
235+
contained_by_parent = True
236+
break
237+
238+
if not contained_by_parent:
239+
all_contained = False
240+
break
241+
242+
if all_contained:
243+
# Calculate average container size for this parent
244+
avg_size = sum((self.group_manager.frame_extractor(ann)[1] - self.group_manager.frame_extractor(ann)[0])
245+
for ann in parent_anns) / len(parent_anns)
246+
247+
# Get depth of this parent (lower depth = closer to root = prefer)
248+
parent_depth = assignment_depth.get(parent_key, 999)
249+
250+
# Name similarity heuristic: if child name contains parent name as prefix/substring,
251+
# it's likely related (e.g., "sub_radio_question_2" contains "sub_radio_question")
252+
name_similarity = 1 if parent_key in child_key else 0
253+
254+
potential_parents.append((parent_key, avg_size, parent_depth, name_similarity))
255+
256+
# Choose best parent: prefer name similarity, then higher depth, then smallest size
257+
if potential_parents:
258+
# Sort by: 1) prefer name similarity, 2) prefer higher depth, 3) smallest size
259+
potential_parents.sort(key=lambda x: (-x[3], -x[2], x[1]))
260+
best_parent = potential_parents[0][0]
261+
assignments[child_key] = best_parent
262+
assignment_depth[child_key] = assignment_depth.get(best_parent, 0) + 1
263+
assigned_this_round.add(child_key)
264+
265+
# Remove assigned groups from remaining
266+
remaining_groups -= assigned_this_round
267+
268+
# If no progress, break to avoid infinite loop
269+
if not assigned_this_round:
270+
break
271+
272+
return assignments
273+
191274
def build_hierarchy(self) -> List[Dict[str, Any]]:
192275
"""Build the complete hierarchical structure."""
193276
results = []
194-
277+
195278
for group_key in self.group_manager.root_groups:
196279
group_anns = self.group_manager.groups[group_key]
197280
top_entries = self.value_grouper.group_by_value(group_anns)
198-
281+
199282
# Attach nested classifications to each top-level entry
200283
for entry in top_entries:
201284
frames = [TemporalFrame(f["start"], f["end"]) for f in entry.get("frames", [])]
202285
nested = self._build_nested_for_frames(frames, group_key)
203286
if nested:
204287
entry["classifications"] = nested
205-
288+
206289
results.append({
207290
"name": self.group_manager.get_group_display_name(group_key),
208291
"answer": top_entries,
209292
})
210-
293+
211294
return results
212295

213-
def _build_nested_for_frames(self, parent_frames: List[TemporalFrame], exclude_group: str) -> List[Dict[str, Any]]:
296+
def _build_nested_for_frames(self, parent_frames: List[TemporalFrame], parent_group_key: str) -> List[Dict[str, Any]]:
214297
"""Recursively build nested classifications for specific parent frames."""
215298
nested = []
216-
299+
217300
# Get all annotations within parent frames
218-
all_contained = self.group_manager.get_annotations_within_frames(parent_frames, exclude_group)
219-
301+
all_contained = self.group_manager.get_annotations_within_frames(parent_frames, parent_group_key)
302+
220303
# Group by classification type and process each group
221304
for group_key, group_anns in self.group_manager.groups.items():
222-
if group_key == exclude_group or group_key in self.group_manager.root_groups:
305+
if group_key == parent_group_key or group_key in self.group_manager.root_groups:
223306
continue
224-
307+
308+
# Only process groups that are assigned to this parent
309+
if self.parent_assignments.get(group_key) != parent_group_key:
310+
continue
311+
225312
# Filter annotations that are contained by parent frames
226313
candidate_anns = []
227314
for ann in group_anns:
228315
start, end = self.group_manager.frame_extractor(ann)
229316
ann_frame = TemporalFrame(start, end)
230317
if any(frame.contains(ann_frame) for frame in parent_frames):
231318
candidate_anns.append(ann)
232-
319+
233320
if not candidate_anns:
234321
continue
235-
322+
236323
# Keep only immediate children (not strictly contained by other contained annotations)
237324
child_anns = self._filter_immediate_children(candidate_anns, all_contained)
238325
if not child_anns:
239326
continue
240-
327+
241328
# Build this child classification block
242329
child_entries = self.value_grouper.group_by_value(child_anns)
243-
330+
244331
# Recursively attach further nested classifications
245332
for entry in child_entries:
246333
entry_frames = [TemporalFrame(f["start"], f["end"]) for f in entry.get("frames", [])]
247334
child_nested = self._build_nested_for_frames(entry_frames, group_key)
248335
if child_nested:
249336
entry["classifications"] = child_nested
250-
337+
251338
nested.append({
252339
"name": self.group_manager.get_group_display_name(group_key),
253340
"answer": child_entries,
254341
})
255-
342+
256343
return nested
257344

258345
def _filter_immediate_children(self, candidates: List[TemporalAnnotation],

0 commit comments

Comments
 (0)