From 2ec9ffa6354629a18536d6a8389509263e3fe89b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20J=C3=B6rg?= Date: Thu, 13 Mar 2025 12:37:20 +0100 Subject: [PATCH] Fix: dtype mismatch of prompt embeddings in sd3 controlnet training --- examples/controlnet/train_controlnet_sd3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/controlnet/train_controlnet_sd3.py b/examples/controlnet/train_controlnet_sd3.py index f4aadc2577f7..ffe460d72de8 100644 --- a/examples/controlnet/train_controlnet_sd3.py +++ b/examples/controlnet/train_controlnet_sd3.py @@ -1283,8 +1283,8 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32): noisy_model_input = (1.0 - sigmas) * model_input + sigmas * noise # Get the text embedding for conditioning - prompt_embeds = batch["prompt_embeds"] - pooled_prompt_embeds = batch["pooled_prompt_embeds"] + prompt_embeds = batch["prompt_embeds"].to(dtype=weight_dtype) + pooled_prompt_embeds = batch["pooled_prompt_embeds"].to(dtype=weight_dtype) # controlnet(s) inference controlnet_image = batch["conditioning_pixel_values"].to(dtype=weight_dtype)