Skip to content

Commit

Permalink
Fix two problems with the task segmentation algorithm
Browse files Browse the repository at this point in the history
1. When a task has non-zero overlap and exactly as many frames as needed
   to create 1 or more complete segments, the current algorithm generates a
   redundant segment at the end. For example, if size is 5, segment size is
   3, and overlap is 1, it generates segments (0, 2), (2, 4), and (4, 4).

   The algorithm attempts to compensate for this, but it only works in the
   case where the segment size is unspecified (and defaults to the total
   size).

   Update the algorithm to handle this correctly in the general case.

2. The algorithm selects a default overlap size of 5 if the media file is a
   video. However, this might not be a valid value if the task has a very
   small segment size. In this case, a range of undesirable behaviors may
   occur, depending on the segment size:

   * segments getting generated such that more than 2 segments cover a
     single frame;

   * task creation crashing with an exception;

   * a task being created with no segments at all.

   Fix this by clamping the default overlap size the same way as a
   user-specified one.
  • Loading branch information
SpecLad committed Mar 26, 2024
1 parent eb5b612 commit 0478408
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 12 deletions.
18 changes: 6 additions & 12 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import itertools
import fnmatch
import os
import sys
from typing import Any, Dict, Iterator, List, NamedTuple, Optional, Union, Iterable
from rest_framework.serializers import ValidationError
import rq
Expand Down Expand Up @@ -132,23 +131,18 @@ def _segments():
data_size = db_task.data.size

segment_size = db_task.segment_size
segment_step = segment_size
if segment_size == 0 or segment_size > data_size:
segment_size = data_size

# Segment step must be more than segment_size + overlap in single-segment tasks
# Otherwise a task contains an extra segment
segment_step = sys.maxsize

overlap = 5 if db_task.mode == 'interpolation' else 0
if db_task.overlap is not None:
overlap = min(db_task.overlap, segment_size // 2)

segment_step -= overlap
overlap = min(
db_task.overlap if db_task.overlap is not None
else 5 if db_task.mode == 'interpolation' else 0,
segment_size // 2,
)

segments = (
SegmentParams(start_frame, min(start_frame + segment_size - 1, data_size - 1))
for start_frame in range(0, data_size, segment_step)
for start_frame in range(0, data_size - overlap, segment_size - overlap)
)

return SegmentsParams(segments, segment_size, overlap)
Expand Down
61 changes: 61 additions & 0 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,67 @@ def test_can_create_task_with_defined_start_and_stop_frames(self):
(task, _) = api_client.tasks_api.retrieve(task_id)
assert task.size == 4

def test_default_overlap_for_small_segment_size(self):
task_spec = {
"name": f"test {self._USERNAME} with default overlap and small segment_size",
"labels": [{"name": "car"}],
"segment_size": 2,
}

task_data = {
"image_quality": 75,
"server_files": ["videos/video_1.mp4"],
}

task_id, _ = create_task(self._USERNAME, task_spec, task_data)

# check task size
with make_api_client(self._USERNAME) as api_client:
paginated_job_list, _ = api_client.jobs_api.list(task_id=task_id)

jobs = paginated_job_list.results
jobs.sort(key=lambda job: job.start_frame)

assert len(jobs) == 2
assert jobs[0].start_frame == 0
assert jobs[0].stop_frame == 1
assert jobs[1].start_frame == 1
assert jobs[1].stop_frame == 2

@pytest.mark.parametrize(
"size,expected_segments",
[
(2, [(0, 1)]),
(3, [(0, 2)]),
(4, [(0, 2), (2, 3)]),
(5, [(0, 2), (2, 4)]),
(6, [(0, 2), (2, 4), (4, 5)]),
],
)
def test_task_segmentation(self, size, expected_segments):
task_spec = {
"name": f"test {self._USERNAME} to check segmentation into jobs",
"labels": [{"name": "car"}],
"segment_size": 3,
"overlap": 1,
}

task_data = {
"image_quality": 75,
"client_files": generate_image_files(size),
}

task_id, _ = create_task(self._USERNAME, task_spec, task_data)

# check task size
with make_api_client(self._USERNAME) as api_client:
paginated_job_list, _ = api_client.jobs_api.list(task_id=task_id)

jobs = paginated_job_list.results
jobs.sort(key=lambda job: job.start_frame)

assert [(j.start_frame, j.stop_frame) for j in jobs] == expected_segments

def test_can_create_task_with_exif_rotated_images(self):
task_spec = {
"name": f"test {self._USERNAME} to create a task with exif rotated images",
Expand Down

0 comments on commit 0478408

Please sign in to comment.