Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Real-ESRGAN, plus various fixes (hangs, async video, etc.) #1133

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ WORKDIR /video2x
RUN apt-get install -y --no-install-recommends \
python3.9 python3-pip python3.9-dev \
python3-opencv python3-pil \
mesa-vulkan-drivers cuda-drivers ffmpeg \
mesa-vulkan-drivers cuda-drivers ffmpeg libomp5 \
&& python3.9 -m pip install --no-cache-dir --no-index -f /wheels '.[all]' \
&& apt-get clean \
&& rm -rf /wheels /video2x /var/lib/apt/lists/*
Expand Down
465 changes: 253 additions & 212 deletions pdm.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pdm.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[venv]
backend = "venv"
15 changes: 8 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@ classifiers = [
"Topic :: Scientific/Engineering :: Image Processing",
]
dependencies = [
"ffmpeg-python>=0.2.0",
"loguru>=0.6.0",
"opencv-python>=4.9.0.80",
"pillow>=9.1.0",
"pynput>=1.7.6",
"rich>=12.0.0",
"numpy>=1.26.4",
"ffmpeg-python>=0.2.0",
"loguru>=0.6.0",
"pillow>=9.1.0",
"pynput>=1.7.6",
"rich>=12.0.0",
"numpy>=1.26.4",
]
dynamic = ["version"]

Expand All @@ -46,13 +45,15 @@ all = [
"rife-ncnn-vulkan-python>=1.2.1",
"realcugan-ncnn-vulkan-python>=1.0.2",
"anime4k-python>=1.1.3",
"realesrgan-ncnn-py>=2.0.0",
]
waifu2x = ["waifu2x-ncnn-vulkan-python>=1.0.4"]
srmd = ["srmd-ncnn-vulkan-python>=1.0.2"]
realsr = ["realsr-ncnn-vulkan-python>=1.0.6"]
rife = ["rife-ncnn-vulkan-python>=1.2.1"]
realcugan = ["realcugan-ncnn-vulkan-python>=1.0.2"]
anime4k = ["anime4k-python>=1.1.3"]
realesrgan = ["realesrgan-ncnn-py>=2.0.0"]

[project.urls]
homepage = "https://github.com/k4yt3x/video2x/"
Expand Down
48 changes: 48 additions & 0 deletions shell.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
rev ? "89c49874fb15f4124bf71ca5f42a04f2ee5825fd", # nixos-24.05
sha256 ? "07mr5xmdba3i5qw68kvxs0w1l70pv6pg636dqqxi6s91hiazv4n8",
nixpkgs ? builtins.fetchTarball {
name = "nixpkgs-${rev}";
url = "https://github.com/arximboldi/nixpkgs/archive/${rev}.tar.gz";
sha256 = sha256;
},
command ? "bash",
}:

with import nixpkgs {};

let
libomp-5 = runCommand "libomp-5" {} ''
mkdir -p $out/lib
ln -s ${llvmPackages.openmp}/lib/libomp.so $out/lib/libomp.so.5
'';

in
(pkgs.buildFHSUserEnv {
name = "video2x-env";
targetPkgs = pkgs: (with pkgs; [
python3
swig
pdm
vulkan-headers
vulkan-tools
vulkan-loader
linuxHeaders
glslang
shaderc
mesa
mesa.drivers
libGL
glib
libomp-5
llvmPackages.openmp
]);
profile = ''
# evdev fails to build when these are not set
export CC=cc
export CXX=c++

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib
'';
runScript = command;
}).env
23 changes: 15 additions & 8 deletions video2x/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from rich import print as rich_print

from . import __version__
from .video2x import LOGURU_FORMAT, Video2X
from .video2x import setup_logger, Video2X

LEGAL_INFO = f"""Video2X\t\t{__version__}
Author:\t\tK4YT3X
Expand All @@ -38,7 +38,8 @@
Contact:\ti@k4yt3x.com"""

# algorithms available for upscaling tasks
UPSCALING_ALGORITHMS = ["waifu2x", "srmd", "realsr", "realcugan", "anime4k"]
UPSCALING_ALGORITHMS = ["waifu2x", "srmd", "realsr", "realcugan", "anime4k",
"realesr-animevideov3", "realesrgan-x4plus-anime", "realesrgan-x4plus"]

# algorithms available for frame interpolation tasks
INTERPOLATION_ALGORITHMS = ["rife"]
Expand Down Expand Up @@ -80,6 +81,13 @@ def parse_arguments() -> argparse.Namespace:
choices=["trace", "debug", "info", "success", "warning", "error", "critical"],
default="info",
)
parser.add_argument(
"-L",
"--loglevel-ffmpeg",
choices=["trace", "debug", "info", "success", "warning", "error", "critical"],
default="info",
help="log level for ffmpeg processes",
)

# upscaler arguments
action = parser.add_subparsers(
Expand Down Expand Up @@ -177,12 +185,11 @@ def main() -> int:
# set logger level
if os.environ.get("LOGURU_LEVEL") is None:
os.environ["LOGURU_LEVEL"] = args.loglevel.upper()
if os.environ.get("LOGURU_FFMPEG_LEVEL") is None:
os.environ["LOGURU_FFMPEG_LEVEL"] = args.loglevel_ffmpeg.upper()

# remove default handler
logger.remove()

# add new sink with custom handler
logger.add(sys.stderr, colorize=True, format=LOGURU_FORMAT)
# Set the logger up
setup_logger()

# print package version and copyright notice
logger.opt(colors=True).info(f"<magenta>Video2X {__version__}</magenta>")
Expand Down Expand Up @@ -229,4 +236,4 @@ def main() -> int:


if __name__ == "__main__":
sys.exit(main())
os._exit(main())
43 changes: 34 additions & 9 deletions video2x/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(
frame_rate: float,
pil_ignore_max_image_pixels: bool = True,
) -> None:
self.is_done = False
self.input_path = input_path
self.input_width = input_width
self.input_height = input_height
Expand All @@ -76,15 +77,39 @@ def __init__(

self.decoder = subprocess.Popen(
ffmpeg.compile(
ffmpeg.input(input_path, r=frame_rate)["v"]
.output("pipe:1", format="rawvideo", pix_fmt="rgb24")
ffmpeg.input(
input_path,
thread_queue_size='128',
)["v"]
# Some files (particularly mkv), even if they have a
# more or less stable frame-rate, have some time
# miss-aligned frames. With this filter we fully
# stabilizie the frame-rate, which is required to
# avoid issues since we lose frame timing information
# when convertion to `rawvideo`.
#
# Note that we do not pass r=frame_rate to the input.
# If we do, this filter does not see the
# misalignment between the target frame rate and the
# frames and can't fix them!
.filter(
"fps",
fps=frame_rate,
)
.output(
"pipe:1",
format="rawvideo",
pix_fmt="rgb24",
fps_mode="cfr",
r=frame_rate,
)
.global_args("-hide_banner")
.global_args("-nostats")
.global_args("-nostdin")
.global_args(
"-loglevel",
LOGURU_FFMPEG_LOGLEVELS.get(
os.environ.get("LOGURU_LEVEL", "INFO").lower()
os.environ.get("LOGURU_FFMPEG_LEVEL", "INFO").lower()
),
),
overwrite_output=True,
Expand Down Expand Up @@ -118,10 +143,11 @@ def __iter__(self):
buffer = self.decoder.stdout.read(3 * self.input_width * self.input_height)

# automatically self-join and clean up after iterations are done
self.join()
self.is_done = True

def kill(self):
self.decoder.send_signal(signal.SIGKILL)
self.pipe_printer.stop()

def join(self):
# close PIPEs to prevent process from getting stuck
Expand Down Expand Up @@ -152,18 +178,17 @@ def run(self):
previous_frame = None
for frame_index, frame in enumerate(self.decoder):
while True:
# check for the stop signal
if self.running is False:
self.decoder.join()
return

with contextlib.suppress(Full):
self.tasks_queue.put(
(frame_index, previous_frame, frame, self.processing_settings),
timeout=0.1,
)
break

# check for the stop signal
if self.running is False:
return

previous_frame = frame

def stop(self):
Expand Down
14 changes: 10 additions & 4 deletions video2x/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,21 @@ def __init__(
output_height: int,
copy_audio: bool = True,
copy_subtitle: bool = True,
copy_data: bool = False,
copy_attachments: bool = False,
copy_data: bool = True,
copy_attachments: bool = True,
) -> None:
# create FFmpeg input for the original input video
original = ffmpeg.input(input_path)
original = ffmpeg.input(
input_path,
thread_queue_size="128",
)

# define frames as input
frames = ffmpeg.input(
"pipe:0",
format="rawvideo",
pix_fmt="rgb24",
thread_queue_size="128",
s=f"{output_width}x{output_height}",
r=frame_rate,
)
Expand Down Expand Up @@ -93,14 +97,15 @@ def __init__(
# cutoff=18000,
r=frame_rate,
map_metadata=1,
max_interleave_delta='0',
metadata="comment=Processed with Video2X",
)
.global_args("-hide_banner")
.global_args("-nostats")
.global_args(
"-loglevel",
LOGURU_FFMPEG_LOGLEVELS.get(
os.environ.get("LOGURU_LEVEL", "INFO").lower()
os.environ.get("LOGURU_FFMPEG_LEVEL", "INFO").lower()
),
),
overwrite_output=True,
Expand All @@ -116,6 +121,7 @@ def __init__(

def kill(self):
self.encoder.send_signal(signal.SIGKILL)
self.pipe_printer.stop()

def write(self, frame: Image.Image) -> None:
"""
Expand Down
Loading