Skip to content

Commit

Permalink
[scene_manager] Add ability to crop input
Browse files Browse the repository at this point in the history
  • Loading branch information
Breakthrough committed Oct 27, 2024
1 parent 3fb8d8a commit eef0ade
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 23 deletions.
4 changes: 4 additions & 0 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ Options

Path to config file. See :ref:`config file reference <scenedetect_cli-config_file>` for details.

.. option:: --crop X0 Y0 X1 Y1

Crop input video. Specified as two points representing top left and bottom right corner of crop region. 0 0 is top-left of the video frame. Bounds are inclusive (e.g. for a 100x100 video, the region covering the whole frame is 0 0 99 99).

.. option:: -s CSV, --stats CSV

Stats file (.csv) to write frame metrics. Existing files will be overwritten. Used for tuning detection parameters and data analysis.
Expand Down
4 changes: 4 additions & 0 deletions scenedetect.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
# Video backend interface, must be one of: opencv, pyav.
#backend = opencv

# Crop input video to area. Specified as two points in the form X0 Y0 X1 Y1 or
# as (X0 Y0), (X1 Y1). Coordinate (0, 0) is the top-left corner.
#crop = 100 100 200 250

# Downscale frame using a ratio of N. Set to 1 for no downscaling. If unset,
# applied automatically based on input video resolution. Must be an integer value.
#downscale = 1
Expand Down
12 changes: 11 additions & 1 deletion scenedetect/_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,14 @@ def _print_command_help(ctx: click.Context, command: click.Command):
help="Backend to use for video input. Backend options can be set using a config file (-c/--config). [available: %s]%s"
% (", ".join(AVAILABLE_BACKENDS.keys()), USER_CONFIG.get_help_string("global", "backend")),
)
@click.option(
"--crop",
metavar="X0 Y0 X1 Y1",
type=(int, int, int, int),
default=None,
help="Crop input video. Specified as two points representing top left and bottom right corner of crop region. 0 0 is top-left of the video frame. Bounds are inclusive (e.g. for a 100x100 video, the region covering the whole frame is 0 0 99 99).%s"
% (USER_CONFIG.get_help_string("global", "crop", show_default=False)),
)
@click.option(
"--downscale",
"-d",
Expand Down Expand Up @@ -284,6 +292,7 @@ def scenedetect(
drop_short_scenes: ty.Optional[bool],
merge_last_scene: ty.Optional[bool],
backend: ty.Optional[str],
crop: ty.Optional[ty.Tuple[int, int, int, int]],
downscale: ty.Optional[int],
frame_skip: ty.Optional[int],
verbosity: ty.Optional[str],
Expand Down Expand Up @@ -324,12 +333,13 @@ def scenedetect(
output=output,
framerate=framerate,
stats_file=stats,
downscale=downscale,
frame_skip=frame_skip,
min_scene_len=min_scene_len,
drop_short_scenes=drop_short_scenes,
merge_last_scene=merge_last_scene,
backend=backend,
crop=crop,
downscale=downscale,
quiet=quiet,
logfile=logfile,
config=config,
Expand Down
6 changes: 6 additions & 0 deletions scenedetect/_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,12 @@ def format(self, timecode: FrameTimecode) -> str:
},
"global": {
"backend": "opencv",
#
#
# FIXME: This should be a tuple of 4 valid ints similar to ScoreWeightsValue.
#
#
"crop": None,
"default-detector": "detect-adaptive",
"downscale": 0,
"downscale-method": Interpolation.LINEAR,
Expand Down
11 changes: 9 additions & 2 deletions scenedetect/_cli/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,13 @@ def handle_options(
output: ty.Optional[ty.AnyStr],
framerate: float,
stats_file: ty.Optional[ty.AnyStr],
downscale: ty.Optional[int],
frame_skip: int,
min_scene_len: str,
drop_short_scenes: ty.Optional[bool],
merge_last_scene: ty.Optional[bool],
backend: ty.Optional[str],
crop: ty.Optional[ty.Tuple[int, int, int, int]],
downscale: ty.Optional[int],
quiet: bool,
logfile: ty.Optional[ty.AnyStr],
config: ty.Optional[ty.AnyStr],
Expand Down Expand Up @@ -287,6 +288,7 @@ def handle_options(
logger.debug(str(ex))
raise click.BadParameter(str(ex), param_hint="downscale factor") from None
scene_manager.interpolation = self.config.get_value("global", "downscale-method")
scene_manager.crop = self.config.get_value("global", "crop", crop)

self.scene_manager = scene_manager

Expand Down Expand Up @@ -545,7 +547,12 @@ def _open_video_stream(
framerate=framerate,
backend=backend,
)
logger.debug("Video opened using backend %s", type(self.video_stream).__name__)
logger.debug(f"""Video information:
Backend: {type(self.video_stream).__name__}
Resolution: {self.video_stream.frame_size}
Framerate: {self.video_stream.frame_rate}
Duration: {self.video_stream.duration} ({self.video_stream.duration.frame_num} frames)""")

except FrameRateUnavailable as ex:
raise click.BadParameter(
"Failed to obtain framerate for input video. Manually specify framerate with the"
Expand Down
93 changes: 73 additions & 20 deletions scenedetect/scene_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ def on_new_scene(frame_img: numpy.ndarray, frame_num: int):
CutList = List[FrameTimecode]
"""Type hint for a list of cuts, where each timecode represents the first frame of a new shot."""

CropRegion = Tuple[int, int, int, int]
"""Type hint for rectangle of the form X0 Y0 X1 Y1 for cropping frames. Coordinates are relative
to source frame without downscaling.
"""

# TODO: This value can and should be tuned for performance improvements as much as possible,
# until accuracy falls, on a large enough dataset. This has yet to be done, but the current
# value doesn't seem to have caused any issues at least.
Expand Down Expand Up @@ -143,7 +148,7 @@ class Interpolation(Enum):
"""Lanczos interpolation over 8x8 neighborhood."""


def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MIN_WIDTH) -> int:
def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MIN_WIDTH) -> float:
"""Get the optimal default downscale factor based on a video's resolution (currently only
the width in pixels is considered).
Expand All @@ -157,10 +162,10 @@ def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MI
Returns:
int: The default downscale factor to use to achieve at least the target effective_width.
"""
assert not (frame_width < 1 or effective_width < 1)
assert frame_width > 0 and effective_width > 0
if frame_width < effective_width:
return 1
return frame_width // effective_width
return frame_width / float(effective_width)


def get_scenes_from_cuts(
Expand Down Expand Up @@ -651,6 +656,7 @@ def __init__(

self._frame_buffer = []
self._frame_buffer_size = 0
self._crop = None

@property
def interpolation(self) -> Interpolation:
Expand All @@ -666,6 +672,35 @@ def stats_manager(self) -> Optional[StatsManager]:
"""Getter for the StatsManager associated with this SceneManager, if any."""
return self._stats_manager

@property
def crop(self) -> Optional[CropRegion]:
"""Portion of the frame to crop. Tuple of 4 ints in the form (X0, Y0, X1, Y1) where X0, Y0
describes one point and X1, Y1 is another which describe a rectangle inside of the frame.
Coordinates start from 0 and are inclusive. For example, with a 100x100 pixel video,
(0, 0, 99, 99) covers the entire frame."""
if self._crop is None:
return None
(x0, y0, x1, y1) = self._crop
return (x0, y0, x1 - 1, y1 - 1)

@crop.setter
def crop(self, value: CropRegion):
"""Raises:
ValueError: All coordinates must be >= 0.
"""
if value is None:
self._crop = None
return
if not (len(value) == 4 and all(isinstance(v, int) for v in value)):
raise TypeError("crop region must be tuple of 4 ints")
# Verify that the provided crop results in a non-empty portion of the frame.
if any(coordinate < 0 for coordinate in value):
raise ValueError("crop coordinates must be >= 0")
(x0, y0, x1, y1) = value
# Internally we store the value in the form used to de-reference the image, which must be
# one-past the end.
self._crop = (x0, y0, x1 + 1, y1 + 1)

@property
def downscale(self) -> int:
"""Factor to downscale each frame by. Will always be >= 1, where 1
Expand Down Expand Up @@ -892,6 +927,33 @@ def detect_scenes(
if end_time is not None and isinstance(end_time, (int, float)) and end_time < 0:
raise ValueError("end_time must be greater than or equal to 0!")

effective_frame_size = video.frame_size
if self._crop:
logger.debug(f"Crop set: {self.crop}")
x0, y0, x1, y1 = self._crop
min_x, min_y = (min(x0, x1), min(y0, y1))
max_x, max_y = (max(x0, x1), max(y0, y1))
frame_width, frame_height = video.frame_size
if min_x >= frame_width or min_y >= frame_height:
raise ValueError("crop starts outside video boundary")
if max_x >= frame_width or max_y >= frame_height:
logger.warning("Warning: crop ends outside of video boundary.")
effective_frame_size = (
1 + min(max_x, frame_width) - min_x,
1 + min(max_y, frame_height) - min_y,
)
# Calculate downscale factor and log effective resolution.
if self.auto_downscale:
downscale_factor = compute_downscale_factor(max(effective_frame_size))
else:
downscale_factor = self.downscale
logger.debug(
"Processing resolution: %d x %d, downscale: %1.1f",
int(effective_frame_size[0] / downscale_factor),
int(effective_frame_size[1] / downscale_factor),
downscale_factor,
)

self._base_timecode = video.base_timecode

# TODO: Figure out a better solution for communicating framerate to StatsManager.
Expand All @@ -911,19 +973,6 @@ def detect_scenes(
else:
total_frames = video.duration.get_frames() - start_frame_num

# Calculate the desired downscale factor and log the effective resolution.
if self.auto_downscale:
downscale_factor = compute_downscale_factor(frame_width=video.frame_size[0])
else:
downscale_factor = self.downscale
if downscale_factor > 1:
logger.info(
"Downscale factor set to %d, effective resolution: %d x %d",
downscale_factor,
video.frame_size[0] // downscale_factor,
video.frame_size[1] // downscale_factor,
)

progress_bar = None
if show_progress:
progress_bar = tqdm(
Expand Down Expand Up @@ -980,7 +1029,7 @@ def _decode_thread(
self,
video: VideoStream,
frame_skip: int,
downscale_factor: int,
downscale_factor: float,
end_time: FrameTimecode,
out_queue: queue.Queue,
):
Expand Down Expand Up @@ -1021,12 +1070,16 @@ def _decode_thread(
# Skip processing frames that have an incorrect size.
continue

if downscale_factor > 1:
if self._crop:
(x0, y0, x1, y1) = self._crop
frame_im = frame_im[y0:y1, x0:x1]

if downscale_factor > 1.0:
frame_im = cv2.resize(
frame_im,
(
round(frame_im.shape[1] / downscale_factor),
round(frame_im.shape[0] / downscale_factor),
max(1, round(frame_im.shape[1] / downscale_factor)),
max(1, round(frame_im.shape[0] / downscale_factor)),
),
interpolation=self._interpolation.value,
)
Expand Down
5 changes: 5 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ def test_cli_default_detector():
assert invoke_scenedetect("-i {VIDEO} time {TIME}", config_file=None) == 0


def test_cli_crop():
"""Test --crop functionality."""
assert invoke_scenedetect("-i {VIDEO} --crop 0 0 256 256 time {TIME}", config_file=None) == 0


@pytest.mark.parametrize("info_command", ["help", "about", "version"])
def test_cli_info_command(info_command):
"""Test `scenedetect` info commands (e.g. help, about)."""
Expand Down
35 changes: 35 additions & 0 deletions tests/test_scene_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import os.path
from typing import List

import pytest

from scenedetect.backends.opencv import VideoStreamCv2
from scenedetect.detectors import AdaptiveDetector, ContentDetector
from scenedetect.frame_timecode import FrameTimecode
Expand Down Expand Up @@ -255,3 +257,36 @@ def test_detect_scenes_callback_adaptive(test_video_file):
scene_list = sm.get_scene_list()
assert [start for start, end in scene_list] == TEST_VIDEO_START_FRAMES_ACTUAL
assert fake_callback.scene_list == TEST_VIDEO_START_FRAMES_ACTUAL[1:]


def test_detect_scenes_crop(test_video_file):
video = VideoStreamCv2(test_video_file)
sm = SceneManager()
sm.crop = (10, 10, 1900, 1000)
sm.add_detector(ContentDetector())

video_fps = video.frame_rate
start_time = FrameTimecode("00:00:05", video_fps)
end_time = FrameTimecode("00:00:15", video_fps)
video.seek(start_time)
sm.auto_downscale = True

_ = sm.detect_scenes(video=video, end_time=end_time)
scene_list = sm.get_scene_list()
assert [start for start, _ in scene_list] == TEST_VIDEO_START_FRAMES_ACTUAL


def test_crop_invalid():
sm = SceneManager()
sm.crop = None
sm.crop = (0, 0, 0, 0)
sm.crop = (1, 1, 0, 0)
sm.crop = (0, 0, 1, 1)
with pytest.raises(TypeError):
sm.crop = 1
with pytest.raises(TypeError):
sm.crop = (1, 1)
with pytest.raises(TypeError):
sm.crop = (1, 1, 1)
with pytest.raises(ValueError):
sm.crop = (1, 1, 1, -1)
3 changes: 3 additions & 0 deletions website/pages/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -588,3 +588,6 @@ Development
- [bugfix] Fix `ContentDetector` crash when using callbacks [#416](https://github.com/Breakthrough/PySceneDetect/issues/416) [#420](https://github.com/Breakthrough/PySceneDetect/issues/420)
- [general] Timecodes of the form `MM:SS[.nnn]` are now processed correctly [#443](https://github.com/Breakthrough/PySceneDetect/issues/443)
- [api] The `save_to_csv` function now works correctly with paths from the `pathlib` module
- [feature] Add ability to crop input video before processing [#302](https://github.com/Breakthrough/PySceneDetect/issues/302) [#449](https://github.com/Breakthrough/PySceneDetect/issues/449)
- [cli] Add `--crop` option to `scenedetect` command and config file to crop video frames before scene detection
- [api] Add `crop` property to `SceneManager` to crop video frames before scene detection

0 comments on commit eef0ade

Please sign in to comment.