fixup #1: AttributeError

Kahsolt · Jan 3, 2023 · e89bdda · e89bdda
1 parent 33776e0
commit e89bdda
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 45 deletions.
diff --git a/README.md b/README.md
@@ -1,9 +1,19 @@
 # stable-diffusion-webui-sonar
 
-    Wrapped k-diffuison samplers with tricks to improve the generated image quality, extension script for AUTOMATIC1111/stable-diffusion-webui
+    Wrapped k-diffuison samplers with tricks to improve the generated image quality (maybe?), extension script for AUTOMATIC1111/stable-diffusion-webui
 
 ----
 
+<p align="left">
+  <a href="https://github.com/Kahsolt/stable-diffusion-webui-sonar/commits"><img alt="Last Commit" src="https://img.shields.io/github/last-commit/Kahsolt/stable-diffusion-webui-sonar"></a>
+  <a href="https://github.com/Kahsolt/stable-diffusion-webui-sonar/issues"><img alt="GitHub issues" src="https://img.shields.io/github/issues/Kahsolt/stable-diffusion-webui-sonar"></a>
+  <a href="https://github.com/Kahsolt/stable-diffusion-webui-sonar/stargazers"><img alt="GitHub stars" src="https://img.shields.io/github/stars/Kahsolt/stable-diffusion-webui-sonar"></a>
+  <a href="https://github.com/Kahsolt/stable-diffusion-webui-sonar/network"><img alt="GitHub forks" src="https://img.shields.io/github/forks/Kahsolt/stable-diffusion-webui-sonar"></a>
+  <img alt="Language" src="https://img.shields.io/github/languages/top/Kahsolt/stable-diffusion-webui-sonar">
+  <img alt="License" src="https://img.shields.io/github/license/Kahsolt/stable-diffusion-webui-sonar">
+  <br/>
+</p>
+
 ℹ This is the sister repo of [https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel](https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel), it focuses on **single prompt optimization** rather than traveling between multiple prompts. 
 
 The core idea of Sonar is to search for similar (yet even better!) images in the **neighborhood** of some known image generated by a normal denoising process. 
@@ -21,10 +31,18 @@ to get image latents with higher quality (~perhaps!), and just pray again for go
 
 ### Change Log
 
+⚪ Features
+
 - 2022/11/27: add momentum on `Euler`, add hard ref-image guidance on `Naive`
 - 2022/11/20: add an Euler-like `Naive`, the simplest difference-estimation-based sampler with momentum & gradient
 - 2022/11/18: add momentum on `Euler a`
 
+⚪ Fixups
+
+- 2023/01/03: fix issue #1 with webui's updates (error `AttributeError: 'StableDiffusionProcessingTxt2Img' object has no attribute 'firstphase_height'`)
+
+
+### Examples
 
 ⚪ momentum
 
@@ -102,12 +120,12 @@ This repo allows your to quickly implement your own k-diffusion samplers, follow
 
 Easiest way to install it is to:
 1. Go to the "Extensions" tab in the webui, switch to the "Install from URL" tab
-2. Paste https://github.com/Kahsolt/stable-diffusion-webui-sonar.git into "URL for extension's git repository", click "install" button
-3. Go to the "Installed" tab, click "Apply and Restart UI" button
+2. Paste https://github.com/Kahsolt/stable-diffusion-webui-sonar.git into "URL for extension's git repository" and click install
+3. (Optional) You will need to restart the webui for dependencies to be installed or you won't be able to generate video files
 
 Manual install:
 1. Copy this repo folder to the 'extensions' folder of https://github.com/AUTOMATIC1111/stable-diffusion-webui
-2. Go to the "Settings" tab, click "Restart Gradio and Refresh components" button
+2. (Optional) Restart the webui
 
 ----
 

diff --git a/scripts/sonar.py b/scripts/sonar.py
@@ -5,6 +5,7 @@
 
 import gradio as gr
 import torch
+from torch import Tensor
 import numpy as np
 from tqdm.auto import trange
 
@@ -13,6 +14,7 @@
 from modules.processing import *
 from modules.sd_samplers import *
 from modules.prompt_parser import ScheduledPromptConditioning, MulticondLearnedConditioning
+from modules.processing import process_images
 from k_diffusion.sampling import to_d, get_ancestral_step
 from ldm.models.diffusion.ddpm import LatentDiffusion
 
@@ -28,16 +30,16 @@
 DEFAULT_GRAD_X_ALPHA       = -0.02
 DEFAULT_GRAD_FUZZY         = False
 DEFAULT_REF_METH           = 'linear'
-DEFAULT_REF_HGF            = 0.02
+DEFAULT_REF_HGF            = 0.01
 DEFAULT_REF_MIN_STEP       = 0.0
-DEFAULT_REF_MAX_STEP       = 0.5
+DEFAULT_REF_MAX_STEP       = 0.75
 DEFAULT_REF_IMG            = None
 
 CHOICE_MOMENTUM_SIGN      = ['pos', 'neg', 'rand']
 CHOICE_MOMENTUM_HIST_INIT = ['zero', 'rand_init', 'rand_new']
 CHOICE_REF_METH           = ['linear', 'euler']
 
-# debug save latent featmap (when `euler a`)
+# debug save latent featmap (when `Euler a`)
 #FEAT_MAP_PATH = 'C:\sd-webui_featmaps'
 FEAT_MAP_PATH = None
 
@@ -64,25 +66,8 @@
     'ref_img':            DEFAULT_REF_IMG,
 }
 
-Tensor = torch.Tensor
-
 # ↓↓↓ the following is modified from 'modules/processing.py' ↓↓↓
 
-def process_images(p: StableDiffusionProcessing) -> Processed:
-    stored_opts = {k: opts.data[k] for k in p.override_settings.keys()}
-
-    try:
-        for k, v in p.override_settings.items():
-            setattr(opts, k, v) # we don't call onchange for simplicity which makes changing model, hypernet impossible
-
-        res = process_images_inner(p)
-
-    finally:
-        for k, v in stored_opts.items():
-            setattr(opts, k, v)
-
-    return res
-
 def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
 
@@ -179,10 +164,9 @@ def infotext(iteration=0, position_in_batch=0):
                 else: raise ValueError
                 samples_ddim = sample_func(p, conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, prompts=prompts)
 
-            samples_ddim = samples_ddim.to(devices.dtype_vae)
-            x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim)
+            x_samples_ddim = [decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
+            x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
-
             del samples_ddim
 
             if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
@@ -263,18 +247,22 @@ def StableDiffusionProcessingTxt2Img_sample(self:StableDiffusionProcessingTxt2Im
     # hijack the sampler~
     self.sampler = create_sampler(self.sd_model)
 
+    latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_default_mode
+    if self.enable_hr and latent_scale_mode is None:
+        assert len([x for x in shared.sd_upscalers if x.name == self.hr_upscaler]) > 0, f"could not find upscaler named {self.hr_upscaler}"
+
+    x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+    samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
+
     if not self.enable_hr:
-        x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
         return samples
 
-    x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
-    samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x, self.firstphase_width, self.firstphase_height))
-
-    samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
+    target_width = int(self.width * self.hr_scale)
+    target_height = int(self.height * self.hr_scale)
 
-    """saves image before applying hires fix, if enabled in options; takes as an arguyment either an image or batch with latent space images"""
     def save_intermediate(image, index):
+        """saves image before applying hires fix, if enabled in options; takes as an argument either an image or batch with latent space images"""
+
         if not opts.save or self.do_not_save_samples or not opts.save_images_before_highres_fix:
             return
 
@@ -283,13 +271,13 @@ def save_intermediate(image, index):
 
         images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, suffix="-before-highres-fix")
 
-    if opts.use_scale_latent_for_hires_fix:
+    if latent_scale_mode is not None:
         for i in range(samples.shape[0]):
             save_intermediate(samples, i)
 
-        samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+        samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=latent_scale_mode)
 
-        # Avoid making the inpainting conditioning unless necessary as 
+        # Avoid making the inpainting conditioning unless necessary as
         # this does need some extra compute to decode / encode the image again.
         if getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
             image_conditioning = self.img2img_image_conditioning(decode_first_stage(self.sd_model, samples), samples)
@@ -307,7 +295,7 @@ def save_intermediate(image, index):
 
             save_intermediate(image, i)
 
-            image = images.resize_image(0, image, self.width, self.height)
+            image = images.resize_image(0, image, target_width, target_height, upscaler_name=self.hr_upscaler)
             image = np.array(image).astype(np.float32) / 255.0
             image = np.moveaxis(image, 2, 0)
             batch_images.append(image)
@@ -325,7 +313,7 @@ def save_intermediate(image, index):
     # hijack the sampler~
     self.sampler = create_sampler(self.sd_model)
 
-    noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+    noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, p=self)
 
     # GC now before running the next img2img to prevent running out of memory
     x = None
@@ -338,6 +326,10 @@ def save_intermediate(image, index):
 def StableDiffusionProcessingImg2Img_sample(self:StableDiffusionProcessingImg2Img, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
     x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
 
+    if self.initial_noise_multiplier != 1.0:
+        self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier
+        x *= self.initial_noise_multiplier
+
     # hijack the sampler~
     self.sampler = create_sampler(self.sd_model)
     samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
@@ -446,8 +438,8 @@ def sample_naive_ex(model:CFGDenoiser, x:Tensor, sigmas:List, extra_args={}, cal
         with devices.autocast():
             latent_ref = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(x_ref))     # [B, C=4, H=64, W=64]
 
-            avg_s = latent_ref.mean(dim=[1, 2, 3], keepdim=True)
-            std_s = latent_ref.std (dim=[1, 2, 3], keepdim=True)
+            avg_s = latent_ref.mean(dim=[2, 3], keepdim=True)
+            std_s = latent_ref.std (dim=[2, 3], keepdim=True)
             ref_img_norm = (latent_ref - avg_s) / std_s
 
     # stochastics in gradient optimizing
@@ -567,7 +559,6 @@ def sample_euler_ex(model:CFGDenoiser, x:Tensor, sigmas:List, extra_args={}, cal
     momentum_hist      = settings['momentum_hist']
     momentum_hist_init = settings['momentum_hist_init']
 
-    # 记录梯度历史的惯性
     if   momentum_hist_init == 'zero':      history_d = 0
     elif momentum_hist_init == 'rand_init': history_d = x
     elif momentum_hist_init == 'rand_new':  history_d = torch.randn_like(x)
@@ -818,7 +809,7 @@ def sample_img2img(self, p:StableDiffusionProcessing, x:Tensor, noise:Tensor,
 
 
 def get_latent_loss(self:LatentDiffusion, x:Tensor, t:Tensor, c:Tensor, noise:Tensor) -> Tensor:
-    # 这个t似乎是预训练时ImageNet的分类数，随便往一个类方向去引导, 貌似完全不影响结果
+    # 这个t是时间嵌入(time embed)，决定了降噪程度(似乎是逆向sigma调度)，貌似对结果影响不大
     t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long() if t is None else t  # [B=1] 
 
     x_start = x
@@ -882,11 +873,11 @@ def ui(self, is_img2img):
                 momentum_sign      = gr.Radio(label='Momentum sign',         value=lambda: DEFAULT_MOMENTUM_SIGN,      choices=CHOICE_MOMENTUM_SIGN)
                 momentum_hist_init = gr.Radio(label='Momentum history init', value=lambda: DEFAULT_MOMENTUM_HIST_INIT, choices=CHOICE_MOMENTUM_HIST_INIT)
 
-        with gr.Group() as tab_gradient:
+        with gr.Group(visible=False) as tab_gradient:
             with gr.Row():
                 grad_c_iter  = gr.Slider(label='Optimize cond step count', value=lambda: DEFAULT_GRAD_C_ITER,  minimum=0,     maximum=10,   step=1)
                 grad_c_alpha = gr.Slider(label='Optimize cond step size',  value=lambda: DEFAULT_GRAD_C_ALPHA, minimum=-0.01, maximum=0.01, step=0.001)
-                grad_c_skip  = gr.Slider(label='Skip the first n-steps',   value=lambda: DEFAULT_GRAD_C_SKIP, minimum=0, maximum=100, step=1)
+                grad_c_skip  = gr.Slider(label='Skip the first n-steps',   value=lambda: DEFAULT_GRAD_C_SKIP,  minimum=0,     maximum=100,  step=1)
             with gr.Row():
                 grad_x_iter  = gr.Slider(label='Optimize latent step count', value=lambda: DEFAULT_GRAD_X_ITER,  minimum=0,    maximum=40,  step=1)
                 grad_x_alpha = gr.Slider(label='Optimize latent step size',  value=lambda: DEFAULT_GRAD_X_ALPHA, minimum=-0.1, maximum=0.1, step=0.01)