upscale.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import io
import logging
import sys
import time
from collections import OrderedDict, defaultdict
from contextlib import nullcontext
from enum import Enum
from pathlib import Path
from threading import Lock, Thread
from typing import Dict, List, Optional, Tuple, Union

import cv2
import numpy as np
import torch
from fs import zipfs
from rich import print
from rich.progress import (
    BarColumn,
    Progress,
    SpinnerColumn,
    TaskID,
    TimeRemainingColumn,
)
from wand.image import Image as WandImage

import utils.architecture as arch
import utils.dataops as ops


class SeamlessOptions(str, Enum):
    tile = "tile"
    mirror = "mirror"
    replicate = "replicate"
    alpha_pad = "alpha_pad"


class AlphaOptions(str, Enum):
    no_alpha = "no_alpha"
    bas = "bas"
    alpha_separately = "alpha_separately"
    swapping = "swapping"


class ModelInfo:
    device: torch.device
    model: Union[arch.nn.Module, arch.RRDBNet, arch.SPSRNet]
    in_nc: int
    out_nc: int

    def __init__(self, device, model, in_nc, out_nc) -> None:
        self.device = device
        self.model = model
        self.in_nc = in_nc
        self.out_nc = out_nc


class Upscale:
    model_str: str = None
    seamless: SeamlessOptions = None
    cpu: bool = None
    fp16: bool = None
    # device_id: int = None
    multi_gpu: bool = None
    cache_max_split_depth: bool = None
    binary_alpha: bool = None
    ternary_alpha: bool = None
    alpha_threshold: float = None
    alpha_boundary_offset: float = None
    alpha_mode: AlphaOptions = None
    imagemagick: bool = None
    jpg: bool = None
    resize: int = None
    zip: bool = None
    log: logging.Logger = None

    devices: Dict[torch.device, List[Lock]] = {}
    model_chain: List[str] = []
    in_nc: int = None
    out_nc: int = None
    last_scale: int = 1
    models: Dict[str, List[ModelInfo]] = defaultdict(
        list
    )  # {model_path: [model_info,...],...}
    current_model: str = None

    def __init__(
        self,
        model: str,
        seamless: Optional[SeamlessOptions] = None,
        cpu: bool = False,
        fp16: bool = False,
        device_id: int = 0,
        multi_gpu: bool = False,  # TODO Change to a list of device ids
        cache_max_split_depth: bool = False,
        binary_alpha: bool = False,
        ternary_alpha: bool = False,
        alpha_threshold: float = 0.5,
        alpha_boundary_offset: float = 0.2,
        alpha_mode: AlphaOptions = AlphaOptions.alpha_separately,
        imagemagick: bool = False,
        jpg: bool = False,
        resize: int = 100,
        zip: bool = False,
        log: logging.Logger = logging.getLogger(),
    ) -> None:
        self.model_str = model
        self.seamless = seamless
        self.cpu = cpu
        self.fp16 = fp16
        self.multi_gpu = multi_gpu
        if self.multi_gpu:
            for i in range(torch.cuda.device_count()):
                self.devices[torch.device(f"cuda:{i}")] = [Lock()]
                # Uncomment to upscale 2 images per device
                # self.devices[torch.device(f"cuda:{i}")] = [Lock(), Lock()]
            # Uncomment to use the cpu
            # self.devices[torch.device("cpu")] = [Lock()]
        else:
            self.devices[torch.device("cpu" if self.cpu else f"cuda:{device_id}")] = [
                Lock()
            ]
        self.cache_max_split_depth = cache_max_split_depth
        self.binary_alpha = binary_alpha
        self.ternary_alpha = ternary_alpha
        self.alpha_threshold = alpha_threshold
        self.alpha_boundary_offset = alpha_boundary_offset
        self.alpha_mode = alpha_mode if alpha_mode else alpha_mode.alpha_separately
        self.imagemagick = imagemagick
        self.log = log
        if self.imagemagick and self.multi_gpu:
            self.log.warning(f"Multi GPU mode detected. ImageMagick mode disabled.")
            self.imagemagick = False
        self.jpg = jpg
        self.resize = resize
        self.zip = zip
        if self.fp16:
            torch.set_default_tensor_type(
                torch.HalfTensor if self.cpu else torch.cuda.HalfTensor
            )

        self.model_chain = (
            self.model_str.split("+")
            if "+" in self.model_str
            else self.model_str.split(">")
        )

        for idx, model in enumerate(self.model_chain):
            interpolations = (
                model.split("|") if "|" in self.model_str else model.split("&")
            )

            if len(interpolations) > 1:
                for i, interpolation in enumerate(interpolations):
                    interp_model, interp_amount = (
                        interpolation.split("@")
                        if "@" in interpolation
                        else interpolation.split(":")
                    )
                    interp_model = self.__check_model_path(interp_model)
                    interpolations[i] = f"{interp_model}@{interp_amount}"
                self.model_chain[idx] = "&".join(interpolations)
            else:
                self.model_chain[idx] = self.__check_model_path(model)
        print(
            'Model{:s}: "{:s}"'.format(
                "s" if len(self.model_chain) > 1 else "",
                # ", ".join([Path(x).stem for x in model_chain]),
                ", ".join([x for x in self.model_chain]),
            )
        )

    def get_available_device(
        self, sleep_time=0.25, first_lock=True
    ) -> Tuple[torch.device, int]:
        device: torch.device = None
        while device == None:
            for d, locks in self.devices.items():
                num_lock = 0
                if first_lock:
                    lock = locks[0]
                else:
                    lock = None
                    for n in range(len(locks)):
                        if not locks[n].locked():
                            lock = locks[n]
                            break
                        num_lock += 1
                if lock != None and not lock.locked():
                    device = d
                    lock.acquire()
                    break
            if device == None:
                # self.log.warning(f"No GPU available. Waiting...")
                time.sleep(sleep_time)
        return device, num_lock

    def image(
        self,
        img: np.ndarray,
        device: torch.device = None,
        # progress: Progress = None,
        # progress_text: str = "",
        multi_gpu_release_device=True,
    ) -> np.ndarray:
        self.in_nc = None
        self.out_nc = None

        # Store the maximum split depths for each model in the chain
        split_depths = {}

        if len(img.shape) < 3:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        # Seamless modes
        if self.seamless == SeamlessOptions.tile:
            img = cv2.copyMakeBorder(img, 16, 16, 16, 16, cv2.BORDER_WRAP)
        elif self.seamless == SeamlessOptions.mirror:
            img = cv2.copyMakeBorder(img, 16, 16, 16, 16, cv2.BORDER_REFLECT_101)
        elif self.seamless == SeamlessOptions.replicate:
            img = cv2.copyMakeBorder(img, 16, 16, 16, 16, cv2.BORDER_REPLICATE)
        elif self.seamless == SeamlessOptions.alpha_pad:
            img = cv2.copyMakeBorder(
                img, 16, 16, 16, 16, cv2.BORDER_CONSTANT, value=[0, 0, 0, 0]
            )
        final_scale: int = 1

        # task_model_chain: TaskID = None
        # if len(self.model_chain) > 1 and progress:
        #     task_model_chain = progress.add_task(
        #         progress_text, total=len(self.model_chain)
        #     )

        if device == None:
            if self.multi_gpu:
                device, _ = self.get_available_device()
            else:
                device = list(self.devices.keys())[0]
        for i, model_path in enumerate(self.model_chain):
            # img_height, img_width = img.shape[:2]

            # Load the model so we can access the scale
            self.load_model(model_path)

            if self.cache_max_split_depth and len(split_depths.keys()) > 0:
                rlt, depth = ops.auto_split_upscale(
                    img,
                    device,
                    self.upscale,
                    self.last_scale,
                    max_depth=split_depths[i],
                )
            else:
                rlt, depth = ops.auto_split_upscale(
                    img, device, self.upscale, self.last_scale
                )
                split_depths[i] = depth

            final_scale *= self.last_scale

            # This is for model chaining
            img = rlt.astype("uint8")
            # if len(self.model_chain) > 1 and progress:
            #     progress.advance(task_model_chain)

        if self.seamless:
            img = self.crop_seamless(img, final_scale)
            # img = img.astype("uint8")

        if self.multi_gpu and multi_gpu_release_device:
            self.devices[device][0].release()

        return img

    def __folder_thread_func(
        self,
        img: np.ndarray,
        zip_fs: zipfs.WriteZipFS,
        img_output_path_rel: Path,
        device: torch.device = None,
        task_upscaling: TaskID = None,
        progress: Progress = None,
        progress_text: str = "",
        output_zip_path: Path = None,
    ):
        # img = self.image(img, device, progress, progress_text)
        img = self.image(img, device)
        if device.type == "cuda":
            device_name = torch.cuda.get_device_name(device.index)
        else:
            device_name = "CPU"
        self.log.info(f'Upscaling "{img_output_path_rel.name}" using "{device_name}"')

        if self.imagemagick:
            img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
            img = WandImage.from_array(img)
            if self.jpg:
                # https://developers.google.com/speed/docs/insights/OptimizeImages
                # img.format = "jpg"
                img.sampling_factors = "4:2:0"
                img.interlace_scheme = "jpeg"
                img.colorspace = "srgb"
            else:
                img.format = "png"
            img.strip()
            if self.resize != 100:
                img.transform(resize=f"{self.resize}%")
        else:
            if self.resize != 100:
                width = int(img.shape[1] * self.resize / 100)
                height = int(img.shape[0] * self.resize / 100)
                img = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
        if self.zip:
            if self.imagemagick:
                buffer = io.BytesIO()
                img.save(file=buffer)
                buffer.seek(0)
            else:
                is_success, buffer = cv2.imencode(".jpg" if self.jpg else ".png", img)
                buffer = io.BytesIO(buffer)
            img_output_path_rel = img_output_path_rel.relative_to(
                output_zip_path.parent
            )
            if not zip_fs.isdir(img_output_path_rel.parent.as_posix()):
                zip_fs.makedirs(img_output_path_rel.parent.as_posix())
            zip_fs.writefile(
                img_output_path_rel.as_posix(),
                buffer,
            )
        else:
            if self.imagemagick:
                img.save(filename=str(img_output_path_rel.absolute()))
            else:
                cv2.imwrite(str(img_output_path_rel.absolute()), img)
        progress.advance(task_upscaling)

    def folder(
        self,
        input: Path,
        output: Path,
        skip_existing: bool = False,
        reverse: bool = False,
        delete_input: bool = False,
    ) -> None:
        # TODO preserve folder structure on/off
        input = input.resolve()
        output = output.resolve()
        if not input.exists():
            self.log.error(f'Folder "{input}" does not exist.')
            sys.exit(1)
        elif input.is_file():
            self.log.error(f'Folder "{input}" is a file.')
            sys.exit(1)
        elif output.is_file():
            self.log.error(f'Folder "{output}" is a file.')
            sys.exit(1)
        elif not output.exists():
            output.mkdir(parents=True)

        images: List[Path] = []
        for ext in ["png", "jpg", "jpeg", "gif", "bmp", "tiff", "tga"]:
            images.extend(input.glob(f"**/*.{ext}"))
        images = sorted(list(images), reverse=reverse)

        if self.zip:
            output_zip_path = output.joinpath(
                f"{input.stem}_{'_'.join([Path(x).stem for x in self.model_chain])}.zip"
            )
            if skip_existing and output_zip_path.is_file():
                self.log.warning(f"Zip {output_zip_path.stem} already exists, skipping")
                exit()
        with Progress(
            # SpinnerColumn(),
            "[progress.description]{task.description}",
            BarColumn(),
            "[progress.percentage]{task.percentage:>3.0f}%",
            TimeRemainingColumn(),
        ) as progress:
            task_upscaling = progress.add_task("Upscaling", total=len(images))
            if self.zip:
                cm = zipfs.WriteZipFS(output_zip_path)
            else:
                cm = nullcontext()
            with cm as zip_fs:
                threads = []
                for idx, img_path in enumerate(images, 1):
                    img_input_path_rel = img_path.relative_to(input)
                    output_dir = output.joinpath(img_input_path_rel).parent
                    img_output_path_rel = output_dir.joinpath(
                        f"{img_path.stem}.{'jpg' if self.jpg else 'png'}"
                    )
                    if not self.zip:
                        output_dir.mkdir(parents=True, exist_ok=True)
                    # if len(self.model_chain) == 1:
                    #     self.log.info(
                    #         f'Processing {str(idx).zfill(len(str(len(images))))}: "{img_input_path_rel}"'
                    #     )
                    if not self.zip and skip_existing and img_output_path_rel.is_file():
                        self.log.warning("Already exists, skipping")
                        if delete_input:
                            img_path.unlink(missing_ok=True)
                        progress.advance(task_upscaling)
                        continue
                    # read image
                    if (
                        img_path.suffix.lower() == ".bmp"
                        or img_path.suffix.lower() == ".tga"
                    ):
                        with WandImage(filename=str(img_path.absolute())) as wimg:
                            if wimg.format == "TGA":
                                wimg.flip()
                            img = np.array(wimg)
                            img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
                    else:
                        img = cv2.imread(str(img_path.absolute()), cv2.IMREAD_UNCHANGED)

                    progress_text = f'{str(idx).zfill(len(str(len(images))))} - "{img_input_path_rel}"'
                    if self.multi_gpu:
                        device, _ = self.get_available_device()
                    else:
                        device = list(self.devices.keys())[0]

                    folder_thread_func_args = {
                        "img": img,
                        "zip_fs": zip_fs,
                        "img_output_path_rel": img_output_path_rel,
                        "device": device,
                        "task_upscaling": task_upscaling,
                        "progress": progress,
                        "progress_text": progress_text,
                        "output_zip_path": output_zip_path if self.zip else None,
                    }
                    if self.multi_gpu:
                        x = Thread(
                            target=self.__folder_thread_func,
                            kwargs=folder_thread_func_args,
                        )
                        threads.append(x)
                        x.daemon = True
                        x.start()
                    else:
                        self.__folder_thread_func(**folder_thread_func_args)

                    if delete_input:
                        img_path.unlink(missing_ok=True)

                for thread in threads:
                    thread.join()

    def __check_model_path(self, model_path: str) -> str:
        if Path(model_path).is_file():
            return model_path
        elif Path("./models/").joinpath(model_path).is_file():
            return str(Path("./models/").joinpath(model_path))
        else:
            self.log.error(f'Model "{model_path}" does not exist.')
            sys.exit(1)

    # This code is a somewhat modified version of BlueAmulet's fork of ESRGAN by Xinntao
    def process(self, img: np.ndarray, device: torch.device) -> np.ndarray:
        """
        Does the processing part of ESRGAN. This method only exists because the same block of code needs to be ran twice for images with transparency.

                Parameters:
                        img (array): The image to process

                Returns:
                        rlt (array): The processed image
        """
        if img.shape[2] == 3:
            img = img[:, :, [2, 1, 0]]
        elif img.shape[2] == 4:
            img = img[:, :, [2, 1, 0, 3]]
        img = torch.from_numpy(np.transpose(img, (2, 0, 1))).float()
        if self.fp16:
            img = img.half()
        img_LR = img.unsqueeze(0)
        # img_LR = img_LR.to(list(self.devices.keys())[0])
        img_LR = img_LR.to(device)

        # model_tuple = [m for m in self.models if m[0] == self.current_model][0]
        # model = model_tuple[1]
        model_info = [
            mi for mi in self.models[self.current_model] if mi.device == device
        ][0]
        # I don't know why but it is necessary to add .to(device)
        model = model_info.model.to(device)

        output = (
            model(img_LR).data.squeeze(0).to(device).float().cpu().clamp_(0, 1).numpy()
        )
        if output.shape[0] == 3:
            output = output[[2, 1, 0], :, :]
        elif output.shape[0] == 4:
            output = output[[2, 1, 0, 3], :, :]
        output = np.transpose(output, (1, 2, 0))
        # device_lock = self.devices[device][0]
        # device_lock.release()
        return output

    def load_model(self, model_path: str):
        self.current_model = model_path
        if len(self.models[model_path]) == 0:
            # interpolating OTF, example: 4xBox:25&4xPSNR:75
            if (":" in model_path or "@" in model_path) and (
                "&" in model_path or "|" in model_path
            ):
                interps = model_path.split("&")[:2]
                model_1 = torch.load(interps[0].split("@")[0])
                model_2 = torch.load(interps[1].split("@")[0])
                state_dict = OrderedDict()
                for k, v_1 in model_1.items():
                    v_2 = model_2[k]
                    state_dict[k] = (int(interps[0].split("@")[1]) / 100) * v_1 + (
                        int(interps[1].split("@")[1]) / 100
                    ) * v_2
            else:
                state_dict = torch.load(model_path)

            if "conv_first.weight" in state_dict:
                self.log.info("Attempting to convert and load a new-format model")
                old_net = {}
                items = []
                for k, v in state_dict.items():
                    items.append(k)

                old_net["model.0.weight"] = state_dict["conv_first.weight"]
                old_net["model.0.bias"] = state_dict["conv_first.bias"]

                for k in items.copy():
                    if "RDB" in k:
                        ori_k = k.replace("RRDB_trunk.", "model.1.sub.")
                        if ".weight" in k:
                            ori_k = ori_k.replace(".weight", ".0.weight")
                        elif ".bias" in k:
                            ori_k = ori_k.replace(".bias", ".0.bias")
                        old_net[ori_k] = state_dict[k]
                        items.remove(k)

                old_net["model.1.sub.23.weight"] = state_dict["trunk_conv.weight"]
                old_net["model.1.sub.23.bias"] = state_dict["trunk_conv.bias"]
                old_net["model.3.weight"] = state_dict["upconv1.weight"]
                old_net["model.3.bias"] = state_dict["upconv1.bias"]
                old_net["model.6.weight"] = state_dict["upconv2.weight"]
                old_net["model.6.bias"] = state_dict["upconv2.bias"]
                old_net["model.8.weight"] = state_dict["HRconv.weight"]
                old_net["model.8.bias"] = state_dict["HRconv.bias"]
                old_net["model.10.weight"] = state_dict["conv_last.weight"]
                old_net["model.10.bias"] = state_dict["conv_last.bias"]
                state_dict = old_net

            # extract model information
            scale2 = 0
            max_part = 0
            plus = False
            if "f_HR_conv1.0.weight" in state_dict:
                kind = "SPSR"
                scalemin = 4
            else:
                kind = "ESRGAN"
                scalemin = 6
            for part in list(state_dict):
                parts = part.split(".")
                n_parts = len(parts)
                if n_parts == 5 and parts[2] == "sub":
                    nb = int(parts[3])
                elif n_parts == 3:
                    part_num = int(parts[1])
                    if (
                        part_num > scalemin
                        and parts[0] == "model"
                        and parts[2] == "weight"
                    ):
                        scale2 += 1
                    if part_num > max_part:
                        max_part = part_num
                        self.out_nc = state_dict[part].shape[0]
                if "conv1x1" in part and not plus:
                    plus = True

            upscale: int = 2 ** scale2
            self.in_nc = state_dict["model.0.weight"].shape[1]
            if kind == "SPSR":
                self.out_nc = state_dict["f_HR_conv1.0.weight"].shape[0]
            nf = state_dict["model.0.weight"].shape[0]

            model: Union[arch.nn.Module, arch.RRDBNet, arch.SPSRNet] = None
            if kind == "ESRGAN":
                model = arch.RRDBNet(
                    self.in_nc,
                    self.out_nc,
                    nf,
                    nb,
                    gc=32,
                    upscale=upscale,
                    norm_type=None,
                    act_type="leakyrelu",
                    mode="CNA",
                    upsample_mode="upconv",
                    plus=plus,
                )
            elif kind == "SPSR":
                model = arch.SPSRNet(
                    self.in_nc,
                    self.out_nc,
                    nf,
                    nb,
                    gc=32,
                    upscale=upscale,
                    norm_type=None,
                    act_type="leakyrelu",
                    mode="CNA",
                    upsample_mode="upconv",
                )
            self.last_scale = upscale
            # self.current_model = model_path

            model.load_state_dict(state_dict, strict=True)
            del state_dict
            model.eval()
            for k, v in model.named_parameters():
                v.requires_grad = False
            for device in self.devices.keys():
                model = model.to(device)
                self.models[model_path].append(
                    ModelInfo(device, model, self.in_nc, self.out_nc)
                )
                # self.devices[device][0].release()

    # This code is a somewhat modified version of BlueAmulet's fork of ESRGAN by Xinntao
    def upscale(self, img: np.ndarray, device: torch.device) -> np.ndarray:
        """
        Upscales the image passed in with the specified model

                Parameters:
                        img: The image to upscale
                        device: The device to use

                Returns:
                        output: The processed image
        """

        img = img * 1.0 / np.iinfo(img.dtype).max

        model_info = [
            mi for mi in self.models[self.current_model] if mi.device == device
        ][0]
        last_in_nc = model_info.in_nc
        last_out_nc = model_info.out_nc

        if img.ndim == 3 and img.shape[2] == 4 and last_in_nc == 3 and last_out_nc == 3:
            # Fill alpha with white and with black, remove the difference
            if self.alpha_mode == AlphaOptions.bas:
                img1 = np.copy(img[:, :, :3])
                img2 = np.copy(img[:, :, :3])
                for c in range(3):
                    img1[:, :, c] *= img[:, :, 3]
                    img2[:, :, c] = (img2[:, :, c] - 1) * img[:, :, 3] + 1

                output1 = self.process(img1, device)
                output2 = self.process(img2, device)
                alpha = 1 - np.mean(output2 - output1, axis=2)
                output = np.dstack((output1, alpha))
                output = np.clip(output, 0, 1)
            # Upscale the alpha channel itself as its own image
            elif self.alpha_mode == AlphaOptions.alpha_separately:
                img1 = np.copy(img[:, :, :3])
                img2 = cv2.merge((img[:, :, 3], img[:, :, 3], img[:, :, 3]))
                output1 = self.process(img1, device)
                output2 = self.process(img2, device)
                output = cv2.merge(
                    (
                        output1[:, :, 0],
                        output1[:, :, 1],
                        output1[:, :, 2],
                        output2[:, :, 0],
                    )
                )
            # Use the alpha channel like a regular channel
            elif self.alpha_mode == AlphaOptions.swapping:
                img1 = cv2.merge((img[:, :, 0], img[:, :, 1], img[:, :, 2]))
                img2 = cv2.merge((img[:, :, 1], img[:, :, 2], img[:, :, 3]))
                output1 = self.process(img1, device)
                output2 = self.process(img2, device)
                output = cv2.merge(
                    (
                        output1[:, :, 0],
                        output1[:, :, 1],
                        output1[:, :, 2],
                        output2[:, :, 2],
                    )
                )
            # Remove alpha
            elif self.alpha_mode == AlphaOptions.no_alpha:
                img1 = np.copy(img[:, :, :3])
                output = self.process(img1, device)
                output = cv2.cvtColor(output, cv2.COLOR_BGR2BGRA)

            if self.binary_alpha:
                alpha = output[:, :, 3]
                threshold = self.alpha_threshold
                _, alpha = cv2.threshold(alpha, threshold, 1, cv2.THRESH_BINARY)
                output[:, :, 3] = alpha
            elif self.ternary_alpha:
                alpha = output[:, :, 3]
                half_transparent_lower_bound = (
                    self.alpha_threshold - self.alpha_boundary_offset
                )
                half_transparent_upper_bound = (
                    self.alpha_threshold + self.alpha_boundary_offset
                )
                alpha = np.where(
                    alpha < half_transparent_lower_bound,
                    0,
                    np.where(alpha <= half_transparent_upper_bound, 0.5, 1),
                )
                output[:, :, 3] = alpha
        else:
            if img.ndim == 2:
                img = np.tile(np.expand_dims(img, axis=2), (1, 1, min(last_in_nc, 3)))
            if img.shape[2] > last_in_nc:  # remove extra channels
                self.log.warning("Truncating image channels")
                img = img[:, :, :last_in_nc]
            # pad with solid alpha channel
            elif img.shape[2] == 3 and last_in_nc == 4:
                img = np.dstack((img, np.full(img.shape[:-1], 1.0)))
            output = self.process(img, device)

        output = (output * 255.0).round()

        return output

    def crop_seamless(self, img: np.ndarray, scale: int) -> np.ndarray:
        img_height, img_width = img.shape[:2]
        y, x = 16 * scale, 16 * scale
        h, w = img_height - (32 * scale), img_width - (32 * scale)
        img = img[y : y + h, x : x + w]
        return img