Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Cog/Replicate demo and API #179

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,16 @@ dmypy.json
# Pyre type checker
.pyre/

# Cog/Replicate generated files
test
.cog
diffusers-cache
cog_out

# Extra stuff to ignore
dreams
images
run.py
test_outputs
examples/music
examples/music
.vscode
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

Try it yourself in Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/stable-diffusion-videos/blob/main/stable_diffusion_videos.ipynb)

Try in Replicate: [![Replicate](https://replicate.com/nateraw/stable-diffusion-videos/badge)](https://replicate.com/nateraw/stable-diffusion-videos)

TPU version (~x6 faster than standard colab GPUs): [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/stable-diffusion-videos/blob/main/flax_stable_diffusion_videos.ipynb)

**Example** - morphing between "blueberry spaghetti" and "strawberry spaghetti"
Expand Down Expand Up @@ -123,6 +125,19 @@ interface = Interface(pipeline)
interface.launch()
```

#### Run with cog

First, download the pre-trained weights:
```bash
cog run scripts/download_weights
```

Run a prediction. Separate each prompt with a `|`
```bash
cog predict -i prompts="a cat | a dog | a horse"
```


## Credits

This work built off of [a script](https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355
Expand Down
22 changes: 22 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
build:
gpu: true
cuda: "11.6.2"
python_version: "3.10"
python_packages:
- "torch==1.13.1 --extra-index-url=https://download.pytorch.org/whl/cu116"
- "ftfy==6.1.1"
- "scipy==1.9.0"
- "transformers==4.21.1"
- "diffusers==0.11.1"
- "fire==0.4.0"
- "ipython==8.5.0"
- "gradio"
- "librosa"
- "av<10.0.0"
- "realesrgan==0.2.5.0"
- "accelerate==0.15.0"
run:
- apt-get update && apt-get install -y software-properties-common
- add-apt-repository ppa:ubuntu-toolchain-r/test
- apt update -y && apt-get install ffmpeg -y
predict: "predict.py:Predictor"
145 changes: 145 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import os
import shutil
import torch
from diffusers import DDIMScheduler, PNDMScheduler
from cog import BasePredictor, Input, Path
from diffusers.models import AutoencoderKL
from diffusers.schedulers import LMSDiscreteScheduler

from stable_diffusion_videos import StableDiffusionWalkPipeline

MODEL_ID = "runwayml/stable-diffusion-v1-5"
MODEL_VAE = "stabilityai/sd-vae-ft-ema"
MODEL_CACHE = "diffusers-cache"


class Predictor(BasePredictor):
def setup(self):
"""Load the model into memory to make running multiple predictions efficient"""
print("Loading pipeline...")

vae = AutoencoderKL.from_pretrained(MODEL_VAE, cache_dir=MODEL_CACHE, local_files_only=True)

self.pipeline = StableDiffusionWalkPipeline.from_pretrained(
MODEL_ID,
vae=vae,
torch_dtype=torch.float16,
revision="fp16",
safety_checker=None,
cache_dir=MODEL_CACHE,
local_files_only=True,
scheduler=LMSDiscreteScheduler(
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
)
).to("cuda")

default_scheduler = PNDMScheduler(
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
)
ddim_scheduler = DDIMScheduler(
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
)
klms_scheduler = LMSDiscreteScheduler(
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
)
self.SCHEDULERS = dict(
default=default_scheduler, ddim=ddim_scheduler, klms=klms_scheduler
)

@torch.inference_mode()
@torch.cuda.amp.autocast()
def predict(
self,
prompts: str = Input(
description="Input prompts, separate each prompt with '|'.",
default="a cat | a dog | a horse",
),
seeds: str = Input(
description="Random seed, separated with '|' to use different seeds for each of the prompt provided above. Leave blank to randomize the seed.",
default=None,
),
scheduler: str = Input(
description="Choose the scheduler",
choices=["default", "ddim", "klms"],
default="klms",
),
num_inference_steps: int = Input(
description="Number of denoising steps for each image generated from the prompt",
ge=1,
le=500,
default=50,
),
guidance_scale: float = Input(
description="Scale for classifier-free guidance", ge=1, le=20, default=7.5
),
num_steps: int = Input(
description="Steps for generating the interpolation video. Recommended to set to 3 or 5 for testing, then up it to 60-200 for better results.",
default=50,
),
fps: int = Input(
description="Frame rate for the video.", default=15, ge=5, le=60
),
) -> Path:
"""Run a single prediction on the model"""

prompts = [p.strip() for p in prompts.split("|")]
if seeds is None:
print("Setting Random seeds.")
seeds = [int.from_bytes(os.urandom(2), "big") for s in range(len(prompts))]
else:
seeds = [s.strip() for s in seeds.split("|")]
for s in seeds:
assert s.isdigit(), "Please provide integer seed."
seeds = [int(s) for s in seeds]

if len(seeds) > len(prompts):
seeds = seeds[: len(prompts)]
else:
seeds_not_set = len(prompts) - len(seeds)
print("Setting Random seeds.")
seeds = seeds + [
int.from_bytes(os.urandom(2), "big") for s in range(seeds_not_set)
]

print("Seeds used for prompts are:")
for prompt, seed in zip(prompts, seeds):
print(f"{prompt}: {seed}")

# use the default settings for the demo
height = 512
width = 512
disable_tqdm = False

self.pipeline.set_progress_bar_config(disable=disable_tqdm)
self.pipeline.scheduler = self.SCHEDULERS[scheduler]

outdir = "cog_out/out"
if os.path.exists(outdir):
shutil.rmtree(outdir)
os.makedirs(outdir)



self.pipeline.walk(
prompts=prompts,
seeds=seeds,
num_interpolation_steps=num_steps,
output_dir="cog_out",
name="out",
fps=fps,
batch_size=1,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
margin=1.0,
smooth=0.2,
height=height,
width=width,
)

video_path = f"cog_out/out/out.mp4"

return Path(video_path)
29 changes: 29 additions & 0 deletions scripts/download_weights.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import os
import shutil
import sys

from diffusers import StableDiffusionPipeline
from diffusers.models import AutoencoderKL

# append project directory to path so predict.py can be imported
sys.path.append('.')

from predict import MODEL_CACHE, MODEL_ID, MODEL_VAE

# if os.path.exists(MODEL_CACHE):
# shutil.rmtree(MODEL_CACHE)
os.makedirs(MODEL_CACHE, exist_ok=True)

vae = AutoencoderKL.from_pretrained(
MODEL_VAE,
cache_dir=MODEL_CACHE,
)

pipe = StableDiffusionPipeline.from_pretrained(
MODEL_ID,
vae=vae,
cache_dir=MODEL_CACHE,
revision="fp16",
)