Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add image tiling nodes to enable Tiled Upscaling #5142

Merged
merged 16 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
9da7888
Add nodes for tile splitting and merging. The main motivation for the…
RyanJDick Nov 17, 2023
3fd882d
Add unit tests for tile paste(...) util function.
RyanJDick Nov 20, 2023
6c959f0
Add unit tests for calc_tiles_with_overlap(...) and fix a bug in its …
RyanJDick Nov 20, 2023
0277815
Add unit tests for merge_tiles_with_linear_blending(...).
RyanJDick Nov 20, 2023
078b8eb
Tidy up tiles invocations, add documentation.
RyanJDick Nov 20, 2023
7a0cc75
(minor) Add 'Invocation' suffix to all tiling node classes.
RyanJDick Nov 23, 2023
281e8e9
Copy CropLatentsInvocation from https://github.com/skunkworxdark/XYGr…
RyanJDick Nov 27, 2023
ef52b4b
Use LATENT_SCALE_FACTOR = 8 constant in CropLatentsInvocation.
RyanJDick Nov 27, 2023
d757f68
Improve documentation of CropLatentsInvocation.
RyanJDick Nov 27, 2023
27a67ae
Rename CropLatentsInvocation -> CropLatentsCoreInvocation to prevent …
RyanJDick Nov 27, 2023
8dd5354
Update tiling nodes to use width-before-height field ordering convent…
RyanJDick Nov 27, 2023
ce0b0af
Add width and height fields to TileToPropertiesInvocation output to a…
RyanJDick Nov 27, 2023
fee53fc
Infer a tight-fitting output image size from the passed tiles in Merg…
RyanJDick Nov 27, 2023
9d288e1
Re-organize merge_tiles_with_linear_blending(...) to merge rows horiz…
RyanJDick Nov 28, 2023
2387825
Change input field ordering of CropLatentsCoreInvocation to match Ima…
RyanJDick Nov 29, 2023
972b854
(minor) Tweak field ordering and field names for tiling nodes.
RyanJDick Nov 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 69 additions & 6 deletions invokeai/app/invocations/latent.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@

SAMPLER_NAME_VALUES = Literal[tuple(SCHEDULER_MAP.keys())]

# HACK: Many nodes are currently hard-coded to use a fixed latent scale factor of 8. This is fragile, and will need to
# be addressed if future models use a different latent scale factor. Also, note that there may be places where the scale
# factor is hard-coded to a literal '8' rather than using this constant.
# The ratio of image:latent dimensions is LATENT_SCALE_FACTOR:1, or 8:1.
LATENT_SCALE_FACTOR = 8


@invocation_output("scheduler_output")
class SchedulerOutput(BaseInvocationOutput):
Expand Down Expand Up @@ -394,9 +400,9 @@ def prep_control_data(
exit_stack: ExitStack,
do_classifier_free_guidance: bool = True,
) -> List[ControlNetData]:
# assuming fixed dimensional scaling of 8:1 for image:latents
control_height_resize = latents_shape[2] * 8
control_width_resize = latents_shape[3] * 8
# Assuming fixed dimensional scaling of LATENT_SCALE_FACTOR.
control_height_resize = latents_shape[2] * LATENT_SCALE_FACTOR
control_width_resize = latents_shape[3] * LATENT_SCALE_FACTOR
if control_input is None:
control_list = None
elif isinstance(control_input, list) and len(control_input) == 0:
Expand Down Expand Up @@ -909,12 +915,12 @@ class ResizeLatentsInvocation(BaseInvocation):
)
width: int = InputField(
ge=64,
multiple_of=8,
multiple_of=LATENT_SCALE_FACTOR,
description=FieldDescriptions.width,
)
height: int = InputField(
ge=64,
multiple_of=8,
multiple_of=LATENT_SCALE_FACTOR,
description=FieldDescriptions.width,
)
mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode)
Expand All @@ -928,7 +934,7 @@ def invoke(self, context: InvocationContext) -> LatentsOutput:

resized_latents = torch.nn.functional.interpolate(
latents.to(device),
size=(self.height // 8, self.width // 8),
size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR),
mode=self.mode,
antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False,
)
Expand Down Expand Up @@ -1166,3 +1172,60 @@ def slerp(t, v0, v1, DOT_THRESHOLD=0.9995):
# context.services.latents.set(name, resized_latents)
context.services.latents.save(name, blended_latents)
return build_latents_output(latents_name=name, latents=blended_latents)


# The Crop Latents node was copied from @skunkworxdark's implementation here:
# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80
@invocation(
"crop_latents",
title="Crop Latents",
tags=["latents", "crop"],
category="latents",
version="1.0.0",
)
# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`.
# Currently, if the class names conflict then 'GET /openapi.json' fails.
class CropLatentsCoreInvocation(BaseInvocation):
"""Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be
divisible by the latent scale factor of 8.
"""

latents: LatentsField = InputField(
description=FieldDescriptions.latents,
input=Input.Connection,
)
x: int = InputField(
ge=0,
multiple_of=LATENT_SCALE_FACTOR,
description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
)
y: int = InputField(
ge=0,
multiple_of=LATENT_SCALE_FACTOR,
description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
)
width: int = InputField(
ge=1,
multiple_of=LATENT_SCALE_FACTOR,
description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
)
height: int = InputField(
ge=1,
multiple_of=LATENT_SCALE_FACTOR,
description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.",
)

def invoke(self, context: InvocationContext) -> LatentsOutput:
latents = context.services.latents.get(self.latents.latents_name)

x1 = self.x // LATENT_SCALE_FACTOR
y1 = self.y // LATENT_SCALE_FACTOR
x2 = x1 + (self.width // LATENT_SCALE_FACTOR)
y2 = y1 + (self.height // LATENT_SCALE_FACTOR)

cropped_latents = latents[..., y1:y2, x1:x2]

name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, cropped_latents)

return build_latents_output(latents_name=name, latents=cropped_latents)
181 changes: 181 additions & 0 deletions invokeai/app/invocations/tiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import numpy as np
from PIL import Image
from pydantic import BaseModel

from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
InputField,
InvocationContext,
OutputField,
WithMetadata,
WithWorkflow,
invocation,
invocation_output,
)
from invokeai.app.invocations.primitives import ImageField, ImageOutput
from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin
from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending
from invokeai.backend.tiles.utils import Tile


class TileWithImage(BaseModel):
tile: Tile
image: ImageField


@invocation_output("calculate_image_tiles_output")
class CalculateImageTilesOutput(BaseInvocationOutput):
tiles: list[Tile] = OutputField(description="The tiles coordinates that cover a particular image shape.")


@invocation("calculate_image_tiles", title="Calculate Image Tiles", tags=["tiles"], category="tiles", version="1.0.0")
class CalculateImageTilesInvocation(BaseInvocation):
"""Calculate the coordinates and overlaps of tiles that cover a target image shape."""

image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.")
image_height: int = InputField(
ge=1, default=1024, description="The image height, in pixels, to calculate tiles for."
)
tile_width: int = InputField(ge=1, default=576, description="The tile width, in pixels.")
tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.")
overlap: int = InputField(
ge=0,
default=128,
description="The target overlap, in pixels, between adjacent tiles. Adjacent tiles will overlap by at least this amount",
)

def invoke(self, context: InvocationContext) -> CalculateImageTilesOutput:
tiles = calc_tiles_with_overlap(
image_height=self.image_height,
image_width=self.image_width,
tile_height=self.tile_height,
tile_width=self.tile_width,
overlap=self.overlap,
)
return CalculateImageTilesOutput(tiles=tiles)


@invocation_output("tile_to_properties_output")
class TileToPropertiesOutput(BaseInvocationOutput):
coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.")
coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.")
coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.")
coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.")

# HACK: The width and height fields are 'meta' fields that can easily be calculated from the other fields on this
# object. Including redundant fields that can cheaply/easily be re-calculated goes against conventional API design
# principles. These fields are included, because 1) they are often useful in tiled workflows, and 2) they are
# difficult to calculate in a workflow (even though it's just a couple of subtraction nodes the graph gets
# surprisingly complicated).
width: int = OutputField(description="The width of the tile. Equal to coords_right - coords_left.")
height: int = OutputField(description="The height of the tile. Equal to coords_bottom - coords_top.")

overlap_top: int = OutputField(description="Overlap between this tile and its top neighbor.")
overlap_bottom: int = OutputField(description="Overlap between this tile and its bottom neighbor.")
overlap_left: int = OutputField(description="Overlap between this tile and its left neighbor.")
overlap_right: int = OutputField(description="Overlap between this tile and its right neighbor.")


@invocation("tile_to_properties", title="Tile to Properties", tags=["tiles"], category="tiles", version="1.0.0")
class TileToPropertiesInvocation(BaseInvocation):
"""Split a Tile into its individual properties."""

tile: Tile = InputField(description="The tile to split into properties.")

def invoke(self, context: InvocationContext) -> TileToPropertiesOutput:
return TileToPropertiesOutput(
coords_left=self.tile.coords.left,
coords_right=self.tile.coords.right,
coords_top=self.tile.coords.top,
coords_bottom=self.tile.coords.bottom,
width=self.tile.coords.right - self.tile.coords.left,
height=self.tile.coords.bottom - self.tile.coords.top,
overlap_top=self.tile.overlap.top,
overlap_bottom=self.tile.overlap.bottom,
overlap_left=self.tile.overlap.left,
overlap_right=self.tile.overlap.right,
)


@invocation_output("pair_tile_image_output")
class PairTileImageOutput(BaseInvocationOutput):
tile_with_image: TileWithImage = OutputField(description="A tile description with its corresponding image.")


@invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0")
class PairTileImageInvocation(BaseInvocation):
"""Pair an image with its tile properties."""

# TODO(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve
# order. Can this be fixed?

image: ImageField = InputField(description="The tile image.")
tile: Tile = InputField(description="The tile properties.")

def invoke(self, context: InvocationContext) -> PairTileImageOutput:
return PairTileImageOutput(
tile_with_image=TileWithImage(
tile=self.tile,
image=self.image,
)
)


@invocation("merge_tiles_to_image", title="Merge Tiles to Image", tags=["tiles"], category="tiles", version="1.0.0")
class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow):
"""Merge multiple tile images into a single image."""

# Inputs
tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.")
blend_amount: int = InputField(
ge=0,
description="The amount to blend adjacent tiles in pixels. Must be <= the amount of overlap between adjacent tiles.",
)

def invoke(self, context: InvocationContext) -> ImageOutput:
images = [twi.image for twi in self.tiles_with_images]
tiles = [twi.tile for twi in self.tiles_with_images]

# Infer the output image dimensions from the max/min tile limits.
height = 0
width = 0
for tile in tiles:
height = max(height, tile.coords.bottom)
width = max(width, tile.coords.right)

# Get all tile images for processing.
# TODO(ryand): It pains me that we spend time PNG decoding each tile from disk when they almost certainly
# existed in memory at an earlier point in the graph.
tile_np_images: list[np.ndarray] = []
for image in images:
pil_image = context.services.images.get_pil_image(image.image_name)
pil_image = pil_image.convert("RGB")
tile_np_images.append(np.array(pil_image))

# Prepare the output image buffer.
# Check the first tile to determine how many image channels are expected in the output.
channels = tile_np_images[0].shape[-1]
dtype = tile_np_images[0].dtype
np_image = np.zeros(shape=(height, width, channels), dtype=dtype)

merge_tiles_with_linear_blending(
dst_image=np_image, tiles=tiles, tile_images=tile_np_images, blend_amount=self.blend_amount
)
pil_image = Image.fromarray(np_image)

image_dto = context.services.images.create(
image=pil_image,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
session_id=context.graph_execution_state_id,
is_intermediate=self.is_intermediate,
metadata=self.metadata,
workflow=self.workflow,
)
return ImageOutput(
image=ImageField(image_name=image_dto.image_name),
width=image_dto.width,
height=image_dto.height,
)
Empty file.
Loading