@@ -183,76 +183,163 @@ def _create_answer_entry(self, first_ann: TemporalAnnotation, frames: List[Dict[
183
183
184
184
class HierarchyBuilder (Generic [TemporalAnnotation ]):
185
185
"""Builds hierarchical nested classifications from temporal annotations."""
186
-
186
+
187
187
def __init__ (self , group_manager : AnnotationGroupManager [TemporalAnnotation ], value_grouper : ValueGrouper [TemporalAnnotation ]):
188
188
self .group_manager = group_manager
189
189
self .value_grouper = value_grouper
190
-
190
+ self .parent_assignments = self ._compute_parent_assignments ()
191
+
192
+ def _compute_parent_assignments (self ) -> Dict [str , str ]:
193
+ """
194
+ Compute best parent assignment for each group based on temporal containment and hierarchy depth.
195
+ Returns mapping of child_group_key -> parent_group_key.
196
+ """
197
+ assignments = {}
198
+ assignment_depth = {} # Track depth of each assignment (0 = root)
199
+
200
+ # Assign depth 0 to roots
201
+ for root_key in self .group_manager .root_groups :
202
+ assignment_depth [root_key ] = 0
203
+
204
+ # Build assignments level by level
205
+ remaining_groups = set (self .group_manager .groups .keys ()) - self .group_manager .root_groups
206
+
207
+ max_iterations = len (remaining_groups ) + 1 # Prevent infinite loops
208
+ iteration = 0
209
+
210
+ while remaining_groups and iteration < max_iterations :
211
+ iteration += 1
212
+ assigned_this_round = set ()
213
+
214
+ for child_key in remaining_groups :
215
+ child_anns = self .group_manager .groups [child_key ]
216
+
217
+ # Find all potential parents (groups that contain this child's annotations)
218
+ potential_parents = []
219
+
220
+ for parent_key , parent_anns in self .group_manager .groups .items ():
221
+ if parent_key == child_key :
222
+ continue
223
+
224
+ # Check if all child annotations are contained by at least one parent annotation
225
+ all_contained = True
226
+ for child_ann in child_anns :
227
+ child_start , child_end = self .group_manager .frame_extractor (child_ann )
228
+ child_frame = TemporalFrame (child_start , child_end )
229
+
230
+ contained_by_parent = False
231
+ for parent_ann in parent_anns :
232
+ parent_start , parent_end = self .group_manager .frame_extractor (parent_ann )
233
+ parent_frame = TemporalFrame (parent_start , parent_end )
234
+ if parent_frame .contains (child_frame ):
235
+ contained_by_parent = True
236
+ break
237
+
238
+ if not contained_by_parent :
239
+ all_contained = False
240
+ break
241
+
242
+ if all_contained :
243
+ # Calculate average container size for this parent
244
+ avg_size = sum ((self .group_manager .frame_extractor (ann )[1 ] - self .group_manager .frame_extractor (ann )[0 ])
245
+ for ann in parent_anns ) / len (parent_anns )
246
+
247
+ # Get depth of this parent (lower depth = closer to root = prefer)
248
+ parent_depth = assignment_depth .get (parent_key , 999 )
249
+
250
+ # Name similarity heuristic: if child name contains parent name as prefix/substring,
251
+ # it's likely related (e.g., "sub_radio_question_2" contains "sub_radio_question")
252
+ name_similarity = 1 if parent_key in child_key else 0
253
+
254
+ potential_parents .append ((parent_key , avg_size , parent_depth , name_similarity ))
255
+
256
+ # Choose best parent: prefer name similarity, then higher depth, then smallest size
257
+ if potential_parents :
258
+ # Sort by: 1) prefer name similarity, 2) prefer higher depth, 3) smallest size
259
+ potential_parents .sort (key = lambda x : (- x [3 ], - x [2 ], x [1 ]))
260
+ best_parent = potential_parents [0 ][0 ]
261
+ assignments [child_key ] = best_parent
262
+ assignment_depth [child_key ] = assignment_depth .get (best_parent , 0 ) + 1
263
+ assigned_this_round .add (child_key )
264
+
265
+ # Remove assigned groups from remaining
266
+ remaining_groups -= assigned_this_round
267
+
268
+ # If no progress, break to avoid infinite loop
269
+ if not assigned_this_round :
270
+ break
271
+
272
+ return assignments
273
+
191
274
def build_hierarchy (self ) -> List [Dict [str , Any ]]:
192
275
"""Build the complete hierarchical structure."""
193
276
results = []
194
-
277
+
195
278
for group_key in self .group_manager .root_groups :
196
279
group_anns = self .group_manager .groups [group_key ]
197
280
top_entries = self .value_grouper .group_by_value (group_anns )
198
-
281
+
199
282
# Attach nested classifications to each top-level entry
200
283
for entry in top_entries :
201
284
frames = [TemporalFrame (f ["start" ], f ["end" ]) for f in entry .get ("frames" , [])]
202
285
nested = self ._build_nested_for_frames (frames , group_key )
203
286
if nested :
204
287
entry ["classifications" ] = nested
205
-
288
+
206
289
results .append ({
207
290
"name" : self .group_manager .get_group_display_name (group_key ),
208
291
"answer" : top_entries ,
209
292
})
210
-
293
+
211
294
return results
212
295
213
- def _build_nested_for_frames (self , parent_frames : List [TemporalFrame ], exclude_group : str ) -> List [Dict [str , Any ]]:
296
+ def _build_nested_for_frames (self , parent_frames : List [TemporalFrame ], parent_group_key : str ) -> List [Dict [str , Any ]]:
214
297
"""Recursively build nested classifications for specific parent frames."""
215
298
nested = []
216
-
299
+
217
300
# Get all annotations within parent frames
218
- all_contained = self .group_manager .get_annotations_within_frames (parent_frames , exclude_group )
219
-
301
+ all_contained = self .group_manager .get_annotations_within_frames (parent_frames , parent_group_key )
302
+
220
303
# Group by classification type and process each group
221
304
for group_key , group_anns in self .group_manager .groups .items ():
222
- if group_key == exclude_group or group_key in self .group_manager .root_groups :
305
+ if group_key == parent_group_key or group_key in self .group_manager .root_groups :
223
306
continue
224
-
307
+
308
+ # Only process groups that are assigned to this parent
309
+ if self .parent_assignments .get (group_key ) != parent_group_key :
310
+ continue
311
+
225
312
# Filter annotations that are contained by parent frames
226
313
candidate_anns = []
227
314
for ann in group_anns :
228
315
start , end = self .group_manager .frame_extractor (ann )
229
316
ann_frame = TemporalFrame (start , end )
230
317
if any (frame .contains (ann_frame ) for frame in parent_frames ):
231
318
candidate_anns .append (ann )
232
-
319
+
233
320
if not candidate_anns :
234
321
continue
235
-
322
+
236
323
# Keep only immediate children (not strictly contained by other contained annotations)
237
324
child_anns = self ._filter_immediate_children (candidate_anns , all_contained )
238
325
if not child_anns :
239
326
continue
240
-
327
+
241
328
# Build this child classification block
242
329
child_entries = self .value_grouper .group_by_value (child_anns )
243
-
330
+
244
331
# Recursively attach further nested classifications
245
332
for entry in child_entries :
246
333
entry_frames = [TemporalFrame (f ["start" ], f ["end" ]) for f in entry .get ("frames" , [])]
247
334
child_nested = self ._build_nested_for_frames (entry_frames , group_key )
248
335
if child_nested :
249
336
entry ["classifications" ] = child_nested
250
-
337
+
251
338
nested .append ({
252
339
"name" : self .group_manager .get_group_display_name (group_key ),
253
340
"answer" : child_entries ,
254
341
})
255
-
342
+
256
343
return nested
257
344
258
345
def _filter_immediate_children (self , candidates : List [TemporalAnnotation ],
0 commit comments