k4yt3x · arximboldi · Feb 21, 2024 · Feb 28, 2024 · Jun 26, 2024 · Jun 27, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -35,7 +35,7 @@ WORKDIR /video2x
 RUN apt-get install -y --no-install-recommends \
         python3.9 python3-pip python3.9-dev \
         python3-opencv python3-pil \
-        mesa-vulkan-drivers cuda-drivers ffmpeg \
+        mesa-vulkan-drivers cuda-drivers ffmpeg libomp5 \
     && python3.9 -m pip install --no-cache-dir --no-index -f /wheels '.[all]' \
     && apt-get clean \
     && rm -rf /wheels /video2x /var/lib/apt/lists/*

diff --git a/pdm.lock b/pdm.lock
diff --git a/pdm.toml b/pdm.toml
@@ -0,0 +1,2 @@
+[venv]
+backend = "venv"
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,13 +28,12 @@ classifiers = [
   "Topic :: Scientific/Engineering :: Image Processing",
 ]
 dependencies = [
-  "ffmpeg-python>=0.2.0",
-  "loguru>=0.6.0",
-  "opencv-python>=4.9.0.80",
-  "pillow>=9.1.0",
-  "pynput>=1.7.6",
-  "rich>=12.0.0",
-  "numpy>=1.26.4",
+    "ffmpeg-python>=0.2.0",
+    "loguru>=0.6.0",
+    "pillow>=9.1.0",
+    "pynput>=1.7.6",
+    "rich>=12.0.0",
+    "numpy>=1.26.4",
 ]
 dynamic = ["version"]
 
@@ -46,13 +45,15 @@ all = [
   "rife-ncnn-vulkan-python>=1.2.1",
   "realcugan-ncnn-vulkan-python>=1.0.2",
   "anime4k-python>=1.1.3",
+  "realesrgan-ncnn-py>=2.0.0",
 ]
 waifu2x = ["waifu2x-ncnn-vulkan-python>=1.0.4"]
 srmd = ["srmd-ncnn-vulkan-python>=1.0.2"]
 realsr = ["realsr-ncnn-vulkan-python>=1.0.6"]
 rife = ["rife-ncnn-vulkan-python>=1.2.1"]
 realcugan = ["realcugan-ncnn-vulkan-python>=1.0.2"]
 anime4k = ["anime4k-python>=1.1.3"]
+realesrgan = ["realesrgan-ncnn-py>=2.0.0"]
 
 [project.urls]
 homepage = "https://github.com/k4yt3x/video2x/"

diff --git a/shell.nix b/shell.nix
@@ -0,0 +1,48 @@
+{
+  rev     ? "89c49874fb15f4124bf71ca5f42a04f2ee5825fd", # nixos-24.05
+  sha256  ? "07mr5xmdba3i5qw68kvxs0w1l70pv6pg636dqqxi6s91hiazv4n8",
+  nixpkgs ? builtins.fetchTarball {
+    name   = "nixpkgs-${rev}";
+    url    = "https://github.com/arximboldi/nixpkgs/archive/${rev}.tar.gz";
+    sha256 = sha256;
+  },
+  command ? "bash",
+}:
+
+with import nixpkgs {};
+
+let
+  libomp-5 = runCommand "libomp-5" {} ''
+    mkdir -p $out/lib
+    ln -s ${llvmPackages.openmp}/lib/libomp.so $out/lib/libomp.so.5
+  '';
+
+in
+(pkgs.buildFHSUserEnv {
+  name = "video2x-env";
+  targetPkgs = pkgs: (with pkgs; [
+    python3
+    swig
+    pdm
+    vulkan-headers
+    vulkan-tools
+    vulkan-loader
+    linuxHeaders
+    glslang
+    shaderc
+    mesa
+    mesa.drivers
+    libGL
+    glib
+    libomp-5
+    llvmPackages.openmp
+  ]);
+  profile = ''
+    # evdev fails to build when these are not set
+    export CC=cc
+    export CXX=c++
+
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib
+  '';
+  runScript = command;
+}).env
diff --git a/video2x/__main__.py b/video2x/__main__.py
@@ -29,7 +29,7 @@
 from rich import print as rich_print
 
 from . import __version__
-from .video2x import LOGURU_FORMAT, Video2X
+from .video2x import setup_logger, Video2X
 
 LEGAL_INFO = f"""Video2X\t\t{__version__}
 Author:\t\tK4YT3X
@@ -38,7 +38,8 @@
 Contact:\ti@k4yt3x.com"""
 
 # algorithms available for upscaling tasks
-UPSCALING_ALGORITHMS = ["waifu2x", "srmd", "realsr", "realcugan", "anime4k"]
+UPSCALING_ALGORITHMS = ["waifu2x", "srmd", "realsr", "realcugan", "anime4k",
+                        "realesr-animevideov3", "realesrgan-x4plus-anime", "realesrgan-x4plus"]
 
 # algorithms available for frame interpolation tasks
 INTERPOLATION_ALGORITHMS = ["rife"]
@@ -80,6 +81,13 @@ def parse_arguments() -> argparse.Namespace:
         choices=["trace", "debug", "info", "success", "warning", "error", "critical"],
         default="info",
     )
+    parser.add_argument(
+        "-L",
+        "--loglevel-ffmpeg",
+        choices=["trace", "debug", "info", "success", "warning", "error", "critical"],
+        default="info",
+        help="log level for ffmpeg processes",
+    )
 
     # upscaler arguments
     action = parser.add_subparsers(
@@ -177,12 +185,11 @@ def main() -> int:
         # set logger level
         if os.environ.get("LOGURU_LEVEL") is None:
             os.environ["LOGURU_LEVEL"] = args.loglevel.upper()
+        if os.environ.get("LOGURU_FFMPEG_LEVEL") is None:
+            os.environ["LOGURU_FFMPEG_LEVEL"] = args.loglevel_ffmpeg.upper()
 
-        # remove default handler
-        logger.remove()
-
-        # add new sink with custom handler
-        logger.add(sys.stderr, colorize=True, format=LOGURU_FORMAT)
+        # Set the logger up
+        setup_logger()
 
         # print package version and copyright notice
         logger.opt(colors=True).info(f"<magenta>Video2X {__version__}</magenta>")
@@ -229,4 +236,4 @@ def main() -> int:
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    os._exit(main())
diff --git a/video2x/decoder.py b/video2x/decoder.py
@@ -66,6 +66,7 @@ def __init__(
         frame_rate: float,
         pil_ignore_max_image_pixels: bool = True,
     ) -> None:
+        self.is_done = False
         self.input_path = input_path
         self.input_width = input_width
         self.input_height = input_height
@@ -76,15 +77,39 @@ def __init__(
 
         self.decoder = subprocess.Popen(
             ffmpeg.compile(
-                ffmpeg.input(input_path, r=frame_rate)["v"]
-                .output("pipe:1", format="rawvideo", pix_fmt="rgb24")
+                ffmpeg.input(
+                    input_path,
+                    thread_queue_size='128',
+                )["v"]
+                # Some files (particularly mkv), even if they have a
+                # more or less stable frame-rate, have some time
+                # miss-aligned frames. With this filter we fully
+                # stabilizie the frame-rate, which is required to
+                # avoid issues since we lose frame timing information
+                # when convertion to `rawvideo`.
+                #
+                # Note that we do not pass r=frame_rate to the input.
+                # If we do, this filter does not see the
+                # misalignment between the target frame rate and the
+                # frames and can't fix them!
+                .filter(
+                    "fps",
+                    fps=frame_rate,
+                )
+                .output(
+                    "pipe:1",
+                    format="rawvideo",
+                    pix_fmt="rgb24",
+                    fps_mode="cfr",
+                    r=frame_rate,
+                )
                 .global_args("-hide_banner")
                 .global_args("-nostats")
                 .global_args("-nostdin")
                 .global_args(
                     "-loglevel",
                     LOGURU_FFMPEG_LOGLEVELS.get(
-                        os.environ.get("LOGURU_LEVEL", "INFO").lower()
+                        os.environ.get("LOGURU_FFMPEG_LEVEL", "INFO").lower()
                     ),
                 ),
                 overwrite_output=True,
@@ -118,10 +143,11 @@ def __iter__(self):
             buffer = self.decoder.stdout.read(3 * self.input_width * self.input_height)
 
         # automatically self-join and clean up after iterations are done
-        self.join()
+        self.is_done = True
 
     def kill(self):
         self.decoder.send_signal(signal.SIGKILL)
+        self.pipe_printer.stop()
 
     def join(self):
         # close PIPEs to prevent process from getting stuck
@@ -152,18 +178,17 @@ def run(self):
         previous_frame = None
         for frame_index, frame in enumerate(self.decoder):
             while True:
-                # check for the stop signal
-                if self.running is False:
-                    self.decoder.join()
-                    return
-
                 with contextlib.suppress(Full):
                     self.tasks_queue.put(
                         (frame_index, previous_frame, frame, self.processing_settings),
                         timeout=0.1,
                     )
                     break
 
+                # check for the stop signal
+                if self.running is False:
+                    return
+
             previous_frame = frame
 
     def stop(self):

diff --git a/video2x/encoder.py b/video2x/encoder.py
@@ -52,17 +52,21 @@ def __init__(
         output_height: int,
         copy_audio: bool = True,
         copy_subtitle: bool = True,
-        copy_data: bool = False,
-        copy_attachments: bool = False,
+        copy_data: bool = True,
+        copy_attachments: bool = True,
     ) -> None:
         # create FFmpeg input for the original input video
-        original = ffmpeg.input(input_path)
+        original = ffmpeg.input(
+            input_path,
+            thread_queue_size="128",
+        )
 
         # define frames as input
         frames = ffmpeg.input(
             "pipe:0",
             format="rawvideo",
             pix_fmt="rgb24",
+            thread_queue_size="128",
             s=f"{output_width}x{output_height}",
             r=frame_rate,
         )
@@ -93,14 +97,15 @@ def __init__(
                     # cutoff=18000,
                     r=frame_rate,
                     map_metadata=1,
+                    max_interleave_delta='0',
                     metadata="comment=Processed with Video2X",
                 )
                 .global_args("-hide_banner")
                 .global_args("-nostats")
                 .global_args(
                     "-loglevel",
                     LOGURU_FFMPEG_LOGLEVELS.get(
-                        os.environ.get("LOGURU_LEVEL", "INFO").lower()
+                        os.environ.get("LOGURU_FFMPEG_LEVEL", "INFO").lower()
                     ),
                 ),
                 overwrite_output=True,
@@ -116,6 +121,7 @@ def __init__(
 
     def kill(self):
         self.encoder.send_signal(signal.SIGKILL)
+        self.pipe_printer.stop()
 
     def write(self, frame: Image.Image) -> None:
         """