Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'invoke-ai:main' into lora_loader_optional
Browse files Browse the repository at this point in the history
skunkworxdark authored Jan 31, 2025
2 parents b69b3d8 + 64475b8 commit 4f6b7d2
Showing 37 changed files with 3,071 additions and 171 deletions.
12 changes: 12 additions & 0 deletions invokeai/app/api/routers/model_manager.py
Original file line number Diff line number Diff line change
@@ -858,6 +858,18 @@ async def get_stats() -> Optional[CacheStats]:
return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats


@model_manager_router.post(
"/empty_model_cache",
operation_id="empty_model_cache",
status_code=200,
)
async def empty_model_cache() -> None:
"""Drop all models from the model cache to free RAM/VRAM. 'Locked' models that are in active use will not be dropped."""
# Request 1000GB of room in order to force the cache to drop all models.
ApiDependencies.invoker.services.logger.info("Emptying model cache.")
ApiDependencies.invoker.services.model_manager.load.ram_cache.make_room(1000 * 2**30)


class HFTokenStatus(str, Enum):
VALID = "valid"
INVALID = "invalid"
34 changes: 32 additions & 2 deletions invokeai/app/invocations/flux_lora_loader.py
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@
invocation_output,
)
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
from invokeai.app.invocations.model import CLIPField, LoRAField, ModelIdentifierField, TransformerField
from invokeai.app.invocations.model import CLIPField, LoRAField, ModelIdentifierField, T5EncoderField, TransformerField
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.config import BaseModelType

@@ -21,14 +21,17 @@ class FluxLoRALoaderOutput(BaseInvocationOutput):
default=None, description=FieldDescriptions.transformer, title="FLUX Transformer"
)
clip: Optional[CLIPField] = OutputField(default=None, description=FieldDescriptions.clip, title="CLIP")
t5_encoder: Optional[T5EncoderField] = OutputField(
default=None, description=FieldDescriptions.t5_encoder, title="T5 Encoder"
)


@invocation(
"flux_lora_loader",
title="FLUX LoRA",
tags=["lora", "model", "flux"],
category="model",
version="1.1.0",
version="1.2.0",
classification=Classification.Prototype,
)
class FluxLoRALoaderInvocation(BaseInvocation):
@@ -50,6 +53,12 @@ class FluxLoRALoaderInvocation(BaseInvocation):
description=FieldDescriptions.clip,
input=Input.Connection,
)
t5_encoder: T5EncoderField | None = InputField(
default=None,
title="T5 Encoder",
description=FieldDescriptions.t5_encoder,
input=Input.Connection,
)

def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput:
lora_key = self.lora.key
@@ -62,6 +71,8 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput:
raise ValueError(f'LoRA "{lora_key}" already applied to transformer.')
if self.clip and any(lora.lora.key == lora_key for lora in self.clip.loras):
raise ValueError(f'LoRA "{lora_key}" already applied to CLIP encoder.')
if self.t5_encoder and any(lora.lora.key == lora_key for lora in self.t5_encoder.loras):
raise ValueError(f'LoRA "{lora_key}" already applied to T5 encoder.')

output = FluxLoRALoaderOutput()

@@ -82,6 +93,14 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput:
weight=self.weight,
)
)
if self.t5_encoder is not None:
output.t5_encoder = self.t5_encoder.model_copy(deep=True)
output.t5_encoder.loras.append(
LoRAField(
lora=self.lora,
weight=self.weight,
)
)

return output

@@ -113,6 +132,12 @@ class FLUXLoRACollectionLoader(BaseInvocation):
description=FieldDescriptions.clip,
input=Input.Connection,
)
t5_encoder: T5EncoderField | None = InputField(
default=None,
title="T5 Encoder",
description=FieldDescriptions.t5_encoder,
input=Input.Connection,
)

def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput:
output = FluxLoRALoaderOutput()
@@ -145,4 +170,9 @@ def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput:
if self.clip is not None and output.clip is not None:
output.clip.loras.append(lora)

if self.t5_encoder is not None:
if output.t5_encoder is None:
output.t5_encoder = self.t5_encoder.model_copy(deep=True)
output.t5_encoder.loras.append(lora)

return output
4 changes: 2 additions & 2 deletions invokeai/app/invocations/flux_model_loader.py
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
title="Flux Main Model",
tags=["model", "flux"],
category="model",
version="1.0.4",
version="1.0.5",
classification=Classification.Prototype,
)
class FluxModelLoaderInvocation(BaseInvocation):
@@ -87,7 +87,7 @@ def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput:
return FluxModelLoaderOutput(
transformer=TransformerField(transformer=transformer, loras=[]),
clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder, loras=[]),
vae=VAEField(vae=vae),
max_seq_len=max_seq_lengths[transformer_config.config_path],
)
43 changes: 41 additions & 2 deletions invokeai/app/invocations/flux_text_encoder.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
from invokeai.backend.flux.modules.conditioner import HFEncoder
from invokeai.backend.model_manager.config import ModelFormat
from invokeai.backend.patches.layer_patcher import LayerPatcher
from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_CLIP_PREFIX
from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_CLIP_PREFIX, FLUX_LORA_T5_PREFIX
from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo

@@ -71,13 +71,45 @@ def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
def _t5_encode(self, context: InvocationContext) -> torch.Tensor:
prompt = [self.prompt]

t5_encoder_info = context.models.load(self.t5_encoder.text_encoder)
t5_encoder_config = t5_encoder_info.config
assert t5_encoder_config is not None

with (
context.models.load(self.t5_encoder.text_encoder) as t5_text_encoder,
t5_encoder_info.model_on_device() as (cached_weights, t5_text_encoder),
context.models.load(self.t5_encoder.tokenizer) as t5_tokenizer,
ExitStack() as exit_stack,
):
assert isinstance(t5_text_encoder, T5EncoderModel)
assert isinstance(t5_tokenizer, (T5Tokenizer, T5TokenizerFast))

# Determine if the model is quantized.
# If the model is quantized, then we need to apply the LoRA weights as sidecar layers. This results in
# slower inference than direct patching, but is agnostic to the quantization format.
if t5_encoder_config.format in [ModelFormat.T5Encoder, ModelFormat.Diffusers]:
model_is_quantized = False
elif t5_encoder_config.format in [
ModelFormat.BnbQuantizedLlmInt8b,
ModelFormat.BnbQuantizednf4b,
ModelFormat.GGUFQuantized,
]:
model_is_quantized = True
else:
raise ValueError(f"Unsupported model format: {t5_encoder_config.format}")

# Apply LoRA models to the T5 encoder.
# Note: We apply the LoRA after the encoder has been moved to its target device for faster patching.
exit_stack.enter_context(
LayerPatcher.apply_smart_model_patches(
model=t5_text_encoder,
patches=self._t5_lora_iterator(context),
prefix=FLUX_LORA_T5_PREFIX,
dtype=t5_text_encoder.dtype,
cached_weights=cached_weights,
force_sidecar_patching=model_is_quantized,
)
)

t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len)

context.util.signal_progress("Running T5 encoder")
@@ -132,3 +164,10 @@ def _clip_lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[Mode
assert isinstance(lora_info.model, ModelPatchRaw)
yield (lora_info.model, lora.weight)
del lora_info

def _t5_lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatchRaw, float]]:
for lora in self.t5_encoder.loras:
lora_info = context.models.load(lora.lora)
assert isinstance(lora_info.model, ModelPatchRaw)
yield (lora_info.model, lora.weight)
del lora_info
11 changes: 10 additions & 1 deletion invokeai/app/invocations/mask.py
Original file line number Diff line number Diff line change
@@ -86,7 +86,7 @@ def invoke(self, context: InvocationContext) -> MaskOutput:
title="Invert Tensor Mask",
tags=["conditioning"],
category="conditioning",
version="1.0.0",
version="1.1.0",
classification=Classification.Beta,
)
class InvertTensorMaskInvocation(BaseInvocation):
@@ -96,6 +96,15 @@ class InvertTensorMaskInvocation(BaseInvocation):

def invoke(self, context: InvocationContext) -> MaskOutput:
mask = context.tensors.load(self.mask.tensor_name)

# Verify dtype and shape.
assert mask.dtype == torch.bool
assert mask.dim() in [2, 3]

# Unsqueeze the channel dimension if it is missing. The MaskOutput type expects a single channel.
if mask.dim() == 2:
mask = mask.unsqueeze(0)

inverted = ~mask

return MaskOutput(
1 change: 1 addition & 0 deletions invokeai/app/invocations/model.py
Original file line number Diff line number Diff line change
@@ -68,6 +68,7 @@ class CLIPField(BaseModel):
class T5EncoderField(BaseModel):
tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")


class VAEField(BaseModel):
1 change: 1 addition & 0 deletions invokeai/app/invocations/primitives.py
Original file line number Diff line number Diff line change
@@ -416,6 +416,7 @@ def invoke(self, context: InvocationContext) -> ColorOutput:
class MaskOutput(BaseInvocationOutput):
"""A torch mask tensor."""

# shape: [1, H, W], dtype: bool
mask: TensorField = OutputField(description="The mask.")
width: int = OutputField(description="The width of the mask in pixels.")
height: int = OutputField(description="The height of the mask in pixels.")
2 changes: 1 addition & 1 deletion invokeai/app/invocations/sd3_model_loader.py
Original file line number Diff line number Diff line change
@@ -99,6 +99,6 @@ def invoke(self, context: InvocationContext) -> Sd3ModelLoaderOutput:
transformer=TransformerField(transformer=transformer, loras=[]),
clip_l=CLIPField(tokenizer=tokenizer_l, text_encoder=clip_encoder_l, loras=[], skipped_layers=0),
clip_g=CLIPField(tokenizer=tokenizer_g, text_encoder=clip_encoder_g, loras=[], skipped_layers=0),
t5_encoder=T5EncoderField(tokenizer=tokenizer_t5, text_encoder=t5_encoder),
t5_encoder=T5EncoderField(tokenizer=tokenizer_t5, text_encoder=t5_encoder, loras=[]),
vae=VAEField(vae=vae),
)
6 changes: 4 additions & 2 deletions invokeai/app/invocations/segment_anything.py
Original file line number Diff line number Diff line change
@@ -49,7 +49,7 @@ def to_list(self) -> list[list[int]]:
title="Segment Anything",
tags=["prompt", "segmentation"],
category="segmentation",
version="1.1.0",
version="1.2.0",
)
class SegmentAnythingInvocation(BaseInvocation):
"""Runs a Segment Anything Model."""
@@ -96,8 +96,10 @@ def invoke(self, context: InvocationContext) -> MaskOutput:
# masks contains bool values, so we merge them via max-reduce.
combined_mask, _ = torch.stack(masks).max(dim=0)

# Unsqueeze the channel dimension.
combined_mask = combined_mask.unsqueeze(0)
mask_tensor_name = context.tensors.save(combined_mask)
height, width = combined_mask.shape
_, height, width = combined_mask.shape
return MaskOutput(mask=TensorField(tensor_name=mask_tensor_name), width=width, height=height)

@staticmethod
Loading

0 comments on commit 4f6b7d2

Please sign in to comment.