chore: update temporal logic to be 1:1 with v3 script

rishisurana-labelbox · rishisurana-labelbox · commit b1863595b4b0 · 2025-09-30T09:11:01.000-07:00
diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py
@@ -183,76 +183,163 @@ def _create_answer_entry(self, first_ann: TemporalAnnotation, frames: List[Dict[
 
 class HierarchyBuilder(Generic[TemporalAnnotation]):
     """Builds hierarchical nested classifications from temporal annotations."""
-    
+
     def __init__(self, group_manager: AnnotationGroupManager[TemporalAnnotation], value_grouper: ValueGrouper[TemporalAnnotation]):
         self.group_manager = group_manager
         self.value_grouper = value_grouper
-    
+        self.parent_assignments = self._compute_parent_assignments()
+
+    def _compute_parent_assignments(self) -> Dict[str, str]:
+        """
+        Compute best parent assignment for each group based on temporal containment and hierarchy depth.
+        Returns mapping of child_group_key -> parent_group_key.
+        """
+        assignments = {}
+        assignment_depth = {}  # Track depth of each assignment (0 = root)
+
+        # Assign depth 0 to roots
+        for root_key in self.group_manager.root_groups:
+            assignment_depth[root_key] = 0
+
+        # Build assignments level by level
+        remaining_groups = set(self.group_manager.groups.keys()) - self.group_manager.root_groups
+
+        max_iterations = len(remaining_groups) + 1  # Prevent infinite loops
+        iteration = 0
+
+        while remaining_groups and iteration < max_iterations:
+            iteration += 1
+            assigned_this_round = set()
+
+            for child_key in remaining_groups:
+                child_anns = self.group_manager.groups[child_key]
+
+                # Find all potential parents (groups that contain this child's annotations)
+                potential_parents = []
+
+                for parent_key, parent_anns in self.group_manager.groups.items():
+                    if parent_key == child_key:
+                        continue
+
+                    # Check if all child annotations are contained by at least one parent annotation
+                    all_contained = True
+                    for child_ann in child_anns:
+                        child_start, child_end = self.group_manager.frame_extractor(child_ann)
+                        child_frame = TemporalFrame(child_start, child_end)
+
+                        contained_by_parent = False
+                        for parent_ann in parent_anns:
+                            parent_start, parent_end = self.group_manager.frame_extractor(parent_ann)
+                            parent_frame = TemporalFrame(parent_start, parent_end)
+                            if parent_frame.contains(child_frame):
+                                contained_by_parent = True
+                                break
+
+                        if not contained_by_parent:
+                            all_contained = False
+                            break
+
+                    if all_contained:
+                        # Calculate average container size for this parent
+                        avg_size = sum((self.group_manager.frame_extractor(ann)[1] - self.group_manager.frame_extractor(ann)[0])
+                                       for ann in parent_anns) / len(parent_anns)
+
+                        # Get depth of this parent (lower depth = closer to root = prefer)
+                        parent_depth = assignment_depth.get(parent_key, 999)
+
+                        # Name similarity heuristic: if child name contains parent name as prefix/substring,
+                        # it's likely related (e.g., "sub_radio_question_2" contains "sub_radio_question")
+                        name_similarity = 1 if parent_key in child_key else 0
+
+                        potential_parents.append((parent_key, avg_size, parent_depth, name_similarity))
+
+                # Choose best parent: prefer name similarity, then higher depth, then smallest size
+                if potential_parents:
+                    # Sort by: 1) prefer name similarity, 2) prefer higher depth, 3) smallest size
+                    potential_parents.sort(key=lambda x: (-x[3], -x[2], x[1]))
+                    best_parent = potential_parents[0][0]
+                    assignments[child_key] = best_parent
+                    assignment_depth[child_key] = assignment_depth.get(best_parent, 0) + 1
+                    assigned_this_round.add(child_key)
+
+            # Remove assigned groups from remaining
+            remaining_groups -= assigned_this_round
+
+            # If no progress, break to avoid infinite loop
+            if not assigned_this_round:
+                break
+
+        return assignments
+
     def build_hierarchy(self) -> List[Dict[str, Any]]:
         """Build the complete hierarchical structure."""
         results = []
-        
+
         for group_key in self.group_manager.root_groups:
             group_anns = self.group_manager.groups[group_key]
             top_entries = self.value_grouper.group_by_value(group_anns)
-            
+
             # Attach nested classifications to each top-level entry
             for entry in top_entries:
                 frames = [TemporalFrame(f["start"], f["end"]) for f in entry.get("frames", [])]
                 nested = self._build_nested_for_frames(frames, group_key)
                 if nested:
                     entry["classifications"] = nested
-            
+
             results.append({
                 "name": self.group_manager.get_group_display_name(group_key),
                 "answer": top_entries,
             })
-        
+
         return results
     
-    def _build_nested_for_frames(self, parent_frames: List[TemporalFrame], exclude_group: str) -> List[Dict[str, Any]]:
+    def _build_nested_for_frames(self, parent_frames: List[TemporalFrame], parent_group_key: str) -> List[Dict[str, Any]]:
         """Recursively build nested classifications for specific parent frames."""
         nested = []
-        
+
         # Get all annotations within parent frames
-        all_contained = self.group_manager.get_annotations_within_frames(parent_frames, exclude_group)
-        
+        all_contained = self.group_manager.get_annotations_within_frames(parent_frames, parent_group_key)
+
         # Group by classification type and process each group
         for group_key, group_anns in self.group_manager.groups.items():
-            if group_key == exclude_group or group_key in self.group_manager.root_groups:
+            if group_key == parent_group_key or group_key in self.group_manager.root_groups:
                 continue
-            
+
+            # Only process groups that are assigned to this parent
+            if self.parent_assignments.get(group_key) != parent_group_key:
+                continue
+
             # Filter annotations that are contained by parent frames
             candidate_anns = []
             for ann in group_anns:
                 start, end = self.group_manager.frame_extractor(ann)
                 ann_frame = TemporalFrame(start, end)
                 if any(frame.contains(ann_frame) for frame in parent_frames):
                     candidate_anns.append(ann)
-            
+
             if not candidate_anns:
                 continue
-            
+
             # Keep only immediate children (not strictly contained by other contained annotations)
             child_anns = self._filter_immediate_children(candidate_anns, all_contained)
             if not child_anns:
                 continue
-            
+
             # Build this child classification block
             child_entries = self.value_grouper.group_by_value(child_anns)
-            
+
             # Recursively attach further nested classifications
             for entry in child_entries:
                 entry_frames = [TemporalFrame(f["start"], f["end"]) for f in entry.get("frames", [])]
                 child_nested = self._build_nested_for_frames(entry_frames, group_key)
                 if child_nested:
                     entry["classifications"] = child_nested
-            
+
             nested.append({
                 "name": self.group_manager.get_group_display_name(group_key),
                 "answer": child_entries,
             })
-        
+
         return nested
     
     def _filter_immediate_children(self, candidates: List[TemporalAnnotation],