aigc-apps
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎README.md
100644100755
+174-69 b/‎README.md
100644100755
+174-69
diff --git a/‎README_ja-JP.md
100644100755
+169-66 b/‎README_ja-JP.md
100644100755
+169-66
diff --git a/‎README_zh-CN.md
100644100755
+170-65 b/‎README_zh-CN.md
100644100755
+170-65
diff --git a/‎app.py
+8-4 b/‎app.py
+8-4
diff --git a/‎asset/0a3b5fb184936a83.txt
+175 b/‎asset/0a3b5fb184936a83.txt
+175
diff --git a/‎asset/1.mp4
456 KB b/‎asset/1.mp4
456 KB
diff --git a/‎asset/pose.mp4
282 KB b/‎asset/pose.mp4
282 KB
diff --git a/‎comfyui/README.md
+69-31 b/‎comfyui/README.md
+69-31
@@ -8,6 +8,7 @@ _*
 __pycache__/
 *.py[cod]
 *$py.class
+scripts_demo*
 
 # C extensions
 *.so
 
@@ -19,7 +19,11 @@
     # 
     # "sequential_cpu_offload" means that each layer of the model will be moved to the CPU after use, 
     # resulting in slower speeds but saving a large amount of GPU memory.
-    GPU_memory_mode = "model_cpu_offload"
+    # 
+    # EasyAnimateV1, V2 and V3 support "model_cpu_offload" "sequential_cpu_offload"
+    # EasyAnimateV4, V5 support "model_cpu_offload" "model_cpu_offload_and_qfloat8" "sequential_cpu_offload"
+    # EasyAnimateV5.1 support "model_cpu_offload" "model_cpu_offload_and_qfloat8" 
+    GPU_memory_mode = "model_cpu_offload_and_qfloat8"
     # Use torch.float16 if GPU does not support torch.bfloat16
     # ome graphics cards, such as v100, 2080ti, do not support torch.bfloat16
     weight_dtype = torch.bfloat16
@@ -29,11 +33,11 @@
     server_port = 7860
 
     # Params below is used when ui_mode = "modelscope"
-    edition = "v5"
+    edition = "v5.1"
     # Config
-    config_path = "config/easyanimate_video_v5_magvit_multi_text_encoder.yaml"
+    config_path = "config/easyanimate_video_v5.1_magvit_qwen.yaml"
     # Model path of the pretrained model
-    model_name = "models/Diffusion_Transformer/EasyAnimateV5-12b-zh-InP"
+    model_name = "models/Diffusion_Transformer/EasyAnimateV5.1-12b-zh-InP"
     # "Inpaint" or "Control"
     model_type = "Inpaint"
     # Save dir
 
@@ -6,16 +6,16 @@ Easily use EasyAnimate inside ComfyUI!
 [![Modelscope Studio](https://img.shields.io/badge/Modelscope-Studio-blue)](https://modelscope.cn/studios/PAI/EasyAnimate/summary)
 [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-yellow)](https://huggingface.co/spaces/alibaba-pai/EasyAnimate)
 
-- [Installation](#1-installation)
+English | [简体中文](./README_zh-CN.md)
+
+- [Installation](#installation)
 - [Node types](#node-types)
 - [Example workflows](#example-workflows)
-    - [Image to video](#image-to-video)
-    - [Image to video generation (high FPS w/ frame interpolation)](#image-to-video-generation-high-fps-w-frame-interpolation)
 
-## 1. Installation
+## Installation
 
 ### Option 1: Install via ComfyUI Manager
-TBD
+![](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/ComfyUI_Manager.jpg)
 
 ### Option 2: Install manually
 The EasyAnimate repository needs to be placed at `ComfyUI/custom_nodes/EasyAnimate/`.
@@ -28,37 +28,50 @@ git clone https://github.com/aigc-apps/EasyAnimate.git
 
 # Git clone the video outout node
 git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git
+git clone https://github.com/kijai/ComfyUI-KJNodes.git
 
 cd EasyAnimate/
 pip install -r comfyui/requirements.txt
 ```
 
-### 2. Download models into `ComfyUI/models/EasyAnimate/`
+### Download models into `ComfyUI/models/EasyAnimate/`
+
+EasyAnimateV5.1:
+
+12B:
+| Name | Type | Storage Space | Hugging Face | Model Scope | Description |
+|--|--|--|--|--|--|
+| EasyAnimateV5.1-12b-zh-InP | EasyAnimateV5.1 | 39 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-InP) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5.1-12b-zh-InP) | Official image-to-video weights. Supports video prediction at multiple resolutions (512, 768, 1024), trained with 49 frames at 8 frames per second, and supports for multilingual prediction. |
+| EasyAnimateV5.1-12b-zh-Control | EasyAnimateV5.1 | 39 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-Control) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5.1-12b-zh-Control) | Official video control weights, supporting various control conditions such as Canny, Depth, Pose, MLSD, and trajectory control. Supports video prediction at multiple resolutions (512, 768, 1024), trained with 49 frames at 8 frames per second, and supports for multilingual prediction. |
+| EasyAnimateV5.1-12b-zh-Control-Camera | EasyAnimateV5.1 | 39 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-Control-Camera) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5.1-12b-zh-Control-Camera) | Official video camera control weights, supporting direction generation control by inputting camera motion trajectories. Supports video prediction at multiple resolutions (512, 768, 1024), trained with 49 frames at 8 frames per second, and supports for multilingual prediction. |
+| EasyAnimateV5.1-12b-zh | EasyAnimateV5.1 | 39 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5.1-12b-zh) | Official text-to-video weights. Supports video prediction at multiple resolutions (512, 768, 1024), trained with 49 frames at 8 frames per second, and supports for multilingual prediction. |
 
-EasyAnimateV5:
+<details>
+  <summary>(Obsolete) EasyAnimateV5:</summary>
 
 | Name | Type | Storage Space | Hugging Face | Model Scope | Description |
 |--|--|--|--|--|--|
 | EasyAnimateV5-12b-zh-InP | EasyAnimateV5 | 34 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5-12b-zh-InP) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5-12b-zh-InP) | Official image-to-video weights. Supports video prediction at multiple resolutions (512, 768, 1024), trained with 49 frames at 8 frames per second, and supports bilingual prediction in Chinese and English. |
 | EasyAnimateV5-12b-zh-Control | EasyAnimateV5 | 34 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5-12b-zh-Control) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5-12b-zh-Control) | Official video control weights, supporting various control conditions such as Canny, Depth, Pose, MLSD, etc. Supports video prediction at multiple resolutions (512, 768, 1024) and is trained with 49 frames at 8 frames per second. Bilingual prediction in Chinese and English is supported. |
 | EasyAnimateV5-12b-zh | EasyAnimateV5 | 34 GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV5-12b-zh) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV5-12b-zh) | Official text-to-video weights. Supports video prediction at multiple resolutions (512, 768, 1024), trained with 49 frames at 8 frames per second, and supports bilingual prediction in Chinese and English. |
+</details>
 
 <details>
   <summary>(Obsolete) EasyAnimateV4:</summary>
 
-| Name | Type | Storage Space | Url | Hugging Face | Description |
+| Name | Type | Storage Space | Hugging Face | Model Scope | Description |
 |--|--|--|--|--|--|
-| EasyAnimateV4-XL-2-InP.tar.gz | EasyAnimateV4 | Before extraction: 8.9 GB \/ After extraction: 14.0 GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV4-XL-2-InP.tar.gz) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV4-XL-2-InP)| Our official graph-generated video model is capable of predicting videos at multiple resolutions (512, 768, 1024, 1280) and has been trained on 144 frames at a rate of 24 frames per second. |
+| EasyAnimateV4-XL-2-InP | EasyAnimateV4 | Before extraction: 8.9 GB \/ After extraction: 14.0 GB |[🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV4-XL-2-InP)| [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV4-XL-2-InP)| | Our official graph-generated video model is capable of predicting videos at multiple resolutions (512, 768, 1024, 1280) and has been trained on 144 frames at a rate of 24 frames per second. |
 </details>
 
 <details>
   <summary>(Obsolete) EasyAnimateV3:</summary>
 
-| Name | Type | Storage Space | Url | Hugging Face | Description |
+| Name | Type | Storage Space | Hugging Face | Model Scope | Description |
 |--|--|--|--|--|--|
-| EasyAnimateV3-XL-2-InP-512x512.tar | EasyAnimateV3 | 18.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV3-XL-2-InP-512x512.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV3-XL-2-InP-512x512) | EasyAnimateV3 official weights for 512x512 text and image to video resolution. Training with 144 frames and fps 24 |
-| EasyAnimateV3-XL-2-InP-768x768.tar | EasyAnimateV3 | 18.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV3-XL-2-InP-768x768.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV3-XL-2-InP-768x768) | EasyAnimateV3 official weights for 768x768 text and image to video resolution. Training with 144 frames and fps 24 |
-| EasyAnimateV3-XL-2-InP-960x960.tar | EasyAnimateV3 | 18.2GB | [Download](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/Diffusion_Transformer/EasyAnimateV3-XL-2-InP-960x960.tar) | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV3-XL-2-InP-960x960) | EasyAnimateV3 official weights for 960x960 text and  image to video resolution. Training with 144 frames and fps 24 |
+| EasyAnimateV3-XL-2-InP-512x512 | EasyAnimateV3 | 18.2GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV3-XL-2-InP-512x512)| [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV3-XL-2-InP-512x512) | EasyAnimateV3 official weights for 512x512 text and image to video resolution. Training with 144 frames and fps 24 |
+| EasyAnimateV3-XL-2-InP-768x768 | EasyAnimateV3 | 18.2GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV3-XL-2-InP-768x768) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV3-XL-2-InP-768x768) | EasyAnimateV3 official weights for 768x768 text and image to video resolution. Training with 144 frames and fps 24 |
+| EasyAnimateV3-XL-2-InP-960x960 | EasyAnimateV3 | 18.2GB | [🤗Link](https://huggingface.co/alibaba-pai/EasyAnimateV3-XL-2-InP-960x960) | [😄Link](https://modelscope.cn/models/PAI/EasyAnimateV3-XL-2-InP-960x960) | EasyAnimateV3 official weights for 960x960 text and  image to video resolution. Training with 144 frames and fps 24 |
 </details>
 
 ## Node types
@@ -75,27 +88,52 @@ EasyAnimateV5:
 
 ## Example workflows
 
-### Video to video generation
-Our ui is shown as follow, this is the [download link](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_v2v.json) of the json:
-![workflow graph](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_v2v.jpg)
+### Text to Video Generation
+Our user interface is shown as follows, this is the [json](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_t2v.json):
+
+![Workflow Diagram](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_t2v.jpg)
+
+### Image to Video Generation
+Our user interface is shown as follows, this is the [json](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_i2v.json):
+
+![Workflow Diagram](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_i2v.jpg)
+
+You can run a demo using the following photo:
+
+![Demo Image](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1/firework.png)
+
+### Video to Video Generation
+Our user interface is shown as follows, this is the [json](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_v2v.json):
+
+![Workflow Diagram](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_v2v.jpg)
+
+You can run a demo using the following video:
+
+[Demo Video](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1/play_guitar.mp4)
+
+### Camera Control Video Generation
+Our user interface is shown as follows, this is the [json](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_control_camera.json):
+
+![Workflow Diagram](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_control_camera.jpg)
+
+You can run a demo using the following photo:
+
+![Demo Image](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1/firework.png)
+
+### Trajectory Control Video Generation
+Our user interface is shown as follows, this is the [json](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_control_trajectory.json):
+
+![Workflow Diagram](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_control_trajectory.jpg)
 
-You can run the demo using following video:
-[demo video](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1/play_guitar.mp4)
+You can run a demo using the following photo:
 
-### Control video generation
-Our ui is shown as follow, this is the [download link](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_v2v_control.json) of the json:
-![workflow graph](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_v2v_control.jpg)
+![Demo Image](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/dog.png)
 
-You can run the demo using following video:
-[demo video](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1.1/pose.mp4)
+### Control Video Generation
+Our user interface is shown as follows, this is the [json](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5.1_workflow_v2v_control.json):
 
-### Image to video generation
-Our ui is shown as follow, this is the [download link](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_i2v.json) of the json:
-![workflow graph](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_i2v.jpg)
+![Workflow Diagram](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5.1/easyanimatev5.1_workflow_v2v_control.jpg)
 
-You can run the demo using following photo:
-![demo image](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1/firework.png)
+You can run a demo using the following video:
 
-### Text to video generation
-Our ui is shown as follow, this is the [download link](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_t2v.json) of the json:
-![workflow graph](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/easyanimate/asset/v5/easyanimatev5_workflow_t2v.jpg)
+[Demo Video](https://pai-aigc-photog.oss-cn-hangzhou.aliyuncs.com/cogvideox_fun/asset/v1.1/pose.mp4)