Remove einop dependency.

JerryWu-code · JerryWu-code · commit 1dd8f3cfe3e2 · 2025-11-25T06:43:11.000Z
diff --git a/src/diffusers/models/transformers/transformer_z_image.py b/src/diffusers/models/transformers/transformer_z_image.py
@@ -18,7 +18,6 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from einops import rearrange
 from torch.nn.utils.rnn import pad_sequence
 
 from ...configuration_utils import ConfigMixin, register_to_config
@@ -429,9 +428,12 @@ def unpatchify(self, x: List[torch.Tensor], size: List[Tuple], patch_size, f_pat
         for i in range(bsz):
             F, H, W = size[i]
             ori_len = (F // pF) * (H // pH) * (W // pW)
-            x[i] = rearrange(
-                x[i][:ori_len].view(F // pF, H // pH, W // pW, pF, pH, pW, self.out_channels),
-                "f h w pf ph pw c -> c (f pf) (h ph) (w pw)",
+            # "f h w pf ph pw c -> c (f pf) (h ph) (w pw)"
+            x[i] = (
+                x[i][:ori_len]
+                .view(F // pF, H // pH, W // pW, pF, pH, pW, self.out_channels)
+                .permute(6, 0, 3, 1, 4, 2, 5)
+                .reshape(self.out_channels, F, H, W)
             )
         return x
 
@@ -497,7 +499,8 @@ def patchify_and_embed(
             F_tokens, H_tokens, W_tokens = F // pF, H // pH, W // pW
 
             image = image.view(C, F_tokens, pF, H_tokens, pH, W_tokens, pW)
-            image = rearrange(image, "c f pf h ph w pw -> (f h w) (pf ph pw c)")
+            # "c f pf h ph w pw -> (f h w) (pf ph pw c)"
+            image = image.permute(1, 3, 5, 2, 4, 6, 0).reshape(F_tokens * H_tokens * W_tokens, pF * pH * pW * C)
 
             image_ori_len = len(image)
             image_padding_len = (-image_ori_len) % SEQ_MULTI_OF