From 3bb28081abf258b9be07ab3ff9da2f941df64976 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Mon, 1 Jul 2024 18:44:23 +0400 Subject: [PATCH 01/16] Remove transformers and hf hub dependencies from SD dependencies --- image_generation/stable_diffusion_1_5/cpp/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt index dd5faeb7d..c548f648b 100644 --- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt +++ b/image_generation/stable_diffusion_1_5/cpp/requirements.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -transformers==4.39.3 +# transformers==4.39.3 optimum-intel[openvino]==1.17.0 -huggingface_hub[cli]==0.22.2 +# huggingface_hub[cli]==0.22.2 From edf46493ed8aa60a2172c870ead2ef7fbb548f3c Mon Sep 17 00:00:00 2001 From: yatarkan Date: Mon, 1 Jul 2024 19:54:48 +0400 Subject: [PATCH 02/16] Try using samples requirements for SD workflow --- .github/workflows/stable_diffusion_1_5_cpp.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index a369a2e2f..6ee086491 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -49,7 +49,8 @@ jobs: run: | conda activate openvino_sd_cpp python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r requirements.txt + # python -m pip install -r requirements.txt + python -m pip install -r ../../../samples/requirements.txt - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} @@ -92,7 +93,8 @@ jobs: run: | conda activate openvino_sd_cpp python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r requirements.txt + # python -m pip install -r requirements.txt + python -m pip install -r ../../../samples/requirements.txt - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} From 63d9cc4c03f1e947f987ec66b5e85e85d7ec9901 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 19:13:34 +0400 Subject: [PATCH 03/16] Move common requirements for SD and LCM to image_generation dir --- .github/workflows/lcm_dreamshaper_cpp.yml | 4 ++-- .github/workflows/stable_diffusion_1_5_cpp.yml | 6 ++---- image_generation/lcm_dreamshaper_v7/cpp/README.md | 2 +- .../{lcm_dreamshaper_v7/cpp => }/requirements.txt | 0 image_generation/stable_diffusion_1_5/cpp/README.md | 2 +- image_generation/stable_diffusion_1_5/cpp/requirements.txt | 6 ------ 6 files changed, 6 insertions(+), 14 deletions(-) rename image_generation/{lcm_dreamshaper_v7/cpp => }/requirements.txt (100%) delete mode 100644 image_generation/stable_diffusion_1_5/cpp/requirements.txt diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index ca2f1ebac..57570a0e9 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -50,7 +50,7 @@ jobs: run: | conda activate openvino_lcm_cpp python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r requirements.txt + python -m pip install -r ../../requirements.txt - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} @@ -95,7 +95,7 @@ jobs: run: | conda activate openvino_lcm_cpp python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r requirements.txt + python -m pip install -r ../../requirements.txt - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index 6ee086491..010d115ce 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -49,8 +49,7 @@ jobs: run: | conda activate openvino_sd_cpp python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - # python -m pip install -r requirements.txt - python -m pip install -r ../../../samples/requirements.txt + python -m pip install -r ../../requirements.txt - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} @@ -93,8 +92,7 @@ jobs: run: | conda activate openvino_sd_cpp python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - # python -m pip install -r requirements.txt - python -m pip install -r ../../../samples/requirements.txt + python -m pip install -r ../../requirements.txt - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index 7432be681..c93b56256 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -32,7 +32,7 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ```shell git submodule update --init conda activate openvino_lcm_cpp - python -m pip install -r requirements.txt + python -m pip install -r ../../requirements.txt python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] ``` diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/requirements.txt similarity index 100% rename from image_generation/lcm_dreamshaper_v7/cpp/requirements.txt rename to image_generation/requirements.txt diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index 4a553d4cc..ae3b6ed9a 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -32,7 +32,7 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH git submodule update --init # Reactivate Conda environment after installing dependencies and setting env vars conda activate openvino_sd_cpp -python -m pip install -r requirements.txt +python -m pip install -r ../../requirements.txt python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] ``` 2. Download a huggingface SD v1.5 model like: diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt deleted file mode 100644 index c548f648b..000000000 --- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cpu -torch==2.2.2+cpu -diffusers==0.27.2 -# transformers==4.39.3 -optimum-intel[openvino]==1.17.0 -# huggingface_hub[cli]==0.22.2 From 82c47abf902bc68e4e709c5484ee145501c62642 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 19:16:06 +0400 Subject: [PATCH 04/16] Add LCM link to image generation readme --- image_generation/README.md | 3 ++- image_generation/lcm_dreamshaper_v7/cpp/README.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/image_generation/README.md b/image_generation/README.md index d6163e4a3..509887734 100644 --- a/image_generation/README.md +++ b/image_generation/README.md @@ -1,4 +1,4 @@ -## Image generation +## Image Generation The current folder contains: - Common folder with: @@ -6,3 +6,4 @@ The current folder contains: - [imwrite](./common/imwrite) library to dump `ov::Tensor` to `.bmp` image - Image generation samples: - [Stable Diffuison (with LoRA) C++ image generation pipeline](./stable_diffusion_1_5/cpp) + - [OpenVINO Latent Consistency Model C++ image generation pipeline](./lcm_dreamshaper_v7/cpp) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index c93b56256..4a53e6a9b 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -1,4 +1,4 @@ -# OpenVINO Latent Consistency Model C++ image generation pipeline +# OpenVINO Latent Consistency Model C++ image generation pipeline The pure C++ text-to-image pipeline, driven by the OpenVINO native API for SD v1.5 Latent Consistency Model with LCM Scheduler. It includes advanced features like LoRA integration with safetensors and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. [The common folder](../../common/) contains schedulers for image generation and `imwrite()` for saving `bmp` images. This demo has been tested for Linux platform only. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb) which provides an example of image generaztion in Python. > [!NOTE] From 175f368c2cb32740fc540556a6db703b7bdfc4ff Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 19:18:07 +0400 Subject: [PATCH 05/16] Align optimum intel version --- image_generation/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index e86e1c2eb..05f543dff 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -optimum-intel[openvino]==1.17.0 +optimum-intel[openvino]==1.20.0 From c9d52cc53dc73729da9ba979fc667c3eeef770ea Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 19:37:07 +0400 Subject: [PATCH 06/16] Add extra index url to install command for SD and LCM --- .github/workflows/lcm_dreamshaper_cpp.yml | 8 ++++---- .github/workflows/stable_diffusion_1_5_cpp.yml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 57570a0e9..8bf89bba5 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -49,8 +49,8 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r ../../requirements.txt + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} @@ -94,8 +94,8 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r ../../requirements.txt + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index 010d115ce..0d77d1f69 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -48,8 +48,8 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - python -m pip install -r ../../requirements.txt + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install -r ../../requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - name: Download and convert model and tokenizer working-directory: ${{ env.working_directory }} From 3d9c36d3e54dc93f850d615aa06f9be0731c41a2 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 20:04:49 +0400 Subject: [PATCH 07/16] Fix optimum intel version --- image_generation/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index 05f543dff..6eb4d9639 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -optimum-intel[openvino]==1.20.0 +optimum-intel[openvino]==1.18.0 From 9df7c54b75723cf73726d567d9173c64e10564e6 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 20:47:09 +0400 Subject: [PATCH 08/16] Include image generation requirements to samples requirements --- samples/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/requirements.txt b/samples/requirements.txt index d16301ad3..73e5deb1d 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -2,3 +2,4 @@ optimum[openvino]==1.20.0 einops==0.8.0 # For Qwen transformers_stream_generator==0.0.5 # For Qwen +-r ../image_generation/requirements.txt From e6729b3c18bc71a17e1a09fce708cc2ffee54867 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Tue, 2 Jul 2024 20:49:01 +0400 Subject: [PATCH 09/16] Rework LoRA section in SD --- .../lcm_dreamshaper_v7/cpp/README.md | 9 ++--- .../stable_diffusion_1_5/cpp/README.md | 34 ++++++++++++++----- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index 4a53e6a9b..d415d8f58 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -1,5 +1,5 @@ # OpenVINO Latent Consistency Model C++ image generation pipeline -The pure C++ text-to-image pipeline, driven by the OpenVINO native API for SD v1.5 Latent Consistency Model with LCM Scheduler. It includes advanced features like LoRA integration with safetensors and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. [The common folder](../../common/) contains schedulers for image generation and `imwrite()` for saving `bmp` images. This demo has been tested for Linux platform only. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb) which provides an example of image generaztion in Python. +The pure C++ text-to-image pipeline, driven by the OpenVINO native API for SD v1.5 Latent Consistency Model with LCM Scheduler. It includes advanced features like [LoRA](https://huggingface.co/docs/peft/main/en/conceptual_guides/lora#lora) integration with [safetensors](https://huggingface.co/docs/safetensors/index#format) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. [The common folder](../../common/) contains schedulers for image generation and `imwrite()` for saving `bmp` images. This demo has been tested for Linux platform only. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb) which provides an example of image generaztion in Python. > [!NOTE] > This tutorial assumes that the current working directory is `/image_generation/lcm_dreamshaper_v7/cpp/` and all paths are relative to this folder. @@ -46,6 +46,7 @@ If https://huggingface.co/ is down, the script won't be able to download the mod > Only static model is currently supported for this sample. ### LoRA enabling with safetensors + Refer to [python pipeline blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline). The safetensor model is loaded via [safetensors.h](https://github.com/hsnyder/safetensors.h). The layer name and weight are modified with `Eigen Lib` and inserted into the LCM model with `ov::pass::MatcherPass` in the file [common/diffusers/src/lora.cpp](https://github.com/openvinotoolkit/openvino.genai/blob/master/image_generation/common/diffusers/src/lora.cpp). @@ -94,7 +95,7 @@ Example: Positive prompt: a beautiful pink unicorn -Read the numpy latent input and noise for scheduler instead of C++ std lib for the alignment with Python pipeline. +To read the numpy latent input and noise for scheduler instead of C++ std lib for the alignment with Python pipeline, use `-r, --readNPLatent` argument. * Generate image with random data generated by Python: `./build/lcm_dreamshaper -r` @@ -104,7 +105,7 @@ Read the numpy latent input and noise for scheduler instead of C++ std lib for t ![image](./cpp_random.bmp) -* Generate image with soulcard lora and C++ generated latent and noise: `./stable_diffusion -r -l path/to/soulcard.safetensors` +* Generate image with soulcard lora and C++ generated latent and noise: `./build/lcm_dreamshaper -l path/to/soulcard.safetensors` ![image](./lora_cpp_random.bmp) @@ -119,7 +120,7 @@ For the generation quality, C++ random generation with MT19937 results is differ Guidance scale controls how similar the generated image will be to the prompt. A higher guidance scale means the model will try to generate an image that follows the prompt more strictly. A lower guidance scale means the model will have more creativity. `guidance_scale` is a way to increase the adherence to the conditional signal that guides the generation (text, in this case) as well as overall sample quality. It is also known as [classifier-free guidance](https://arxiv.org/abs/2207.12598). -#### Negative prompt +#### Negative Prompt Negative prompts don't work with LCM because they don’t have any effect on the denoising process. When a LCM is distilled from an LDM via latent consistency distillation (Algorithm 1) with guided distillation, the forward pass of the LCM learns to approximate sampling from the LDM using CFG with the unconditional prompt "" (the empty string). diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index ae3b6ed9a..0f2a28640 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -1,5 +1,5 @@ # OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline -The pure C++ text-to-image pipeline, driven by the OpenVINO native C++ API for Stable Diffusion v1.5 with LMS Discrete Scheduler, supports both static and dynamic model inference. It includes advanced features like [LoRA](https://huggingface.co/docs/peft/conceptual_guides/lora) integration with [safetensors](https://huggingface.co/docs/safetensors/index#format) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. The sample uses [diffusers](../../common/diffusers) for image generation and [imwrite](../../common/imwrite) for saving `.bmp` images. This demo has been tested on Windows and Unix platforms. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/stable-diffusion-text-to-image) which provides an example of image generation in Python. +The pure C++ text-to-image pipeline, driven by the OpenVINO native C++ API for Stable Diffusion v1.5 with LMS Discrete Scheduler, supports both static and dynamic model inference. It includes advanced features like [LoRA](https://huggingface.co/docs/peft/main/en/conceptual_guides/lora#lora) integration with [safetensors](https://huggingface.co/docs/safetensors/index#format) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. The sample uses [diffusers](../../common/diffusers) for image generation and [imwrite](../../common/imwrite) for saving `.bmp` images. This demo has been tested on Windows and Unix platforms. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/stable-diffusion-text-to-image) which provides an example of image generation in Python. > [!NOTE] >This tutorial assumes that the current working directory is `/image_generation/stable_diffusion_1_5/cpp/` and all paths are relative to this folder. @@ -52,14 +52,26 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] > [!NOTE] > Now the pipeline support batch size = 1 only, i.e. static model `(1, 3, 512, 512)` -### LoRA enabling with safetensors +### (Optional) Enable LoRA Weights with Safetensors -Refer to [python pipeline blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline). -The safetensor model is loaded via [safetensors.h](https://github.com/hsnyder/safetensors.h). The layer name and weight are modified with `Eigen` library and inserted into the SD models with `ov::pass::MatcherPass` in the file [common/diffusers/src/lora.cpp](https://github.com/openvinotoolkit/openvino.genai/blob/master/image_generation/common/diffusers/src/lora.cpp). +Low-Rank Adaptation(LoRA) is a novel technique introduced to deal with the problem of fine-tuning Diffusers and Large Language Models (LLMs). In the case of Stable Diffusion fine-tuning, LoRA can be applied to the cross-attention layers for the image representations with the latent described. -SD model [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) and LoRA [soulcard](https://civitai.com/models/67927?modelVersionId=72591) are tested in this pipeline. +LoRA weights can be enabled for Unet model of Stable Diffusion pipeline to generate images with different styles. + +#### LoRA Weights Format + +In this sample LoRA weights are used in [safetensors]((https://huggingface.co/docs/safetensors/index#format)) format. +Safetensors is a serialization format developed by Hugging Face that is specifically designed for efficiently storing and loading large tensors. It provides a lightweight and efficient way to serialize tensors, making it easier to store and load machine learning models. + +The LoRA safetensors model is loaded via [safetensors.h](https://github.com/hsnyder/safetensors.h). The layer name and weight are modified with `Eigen` library and inserted into the SD models with `ov::pass::MatcherPass` in the file [common/diffusers/src/lora.cpp](https://github.com/openvinotoolkit/openvino.genai/blob/master/image_generation/common/diffusers/src/lora.cpp). + +#### Using LoRA Weights + +SD model [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) and LoRA [soulcard model](https://civitai.com/models/67927?modelVersionId=72591) are tested in this pipeline. + +There are various LoRA models on https://civitai.com/tag/lora and on HuggingFace, you can consider to choose your own LoRA model in safetensor format. +Download and put LoRA safetensors model into the models directory. When running the built sample provide the path to the LoRA model with `-l, --loraPath arg` argument. -Download and put safetensors and model IR into the models folder. ## Step 3: Build the SD application @@ -104,13 +116,13 @@ Positive prompt: cyberpunk cityscape like Tokyo New York with tall buildings at Negative prompt: (empty, check the [Notes](#negative-prompt) for details) -Read the numpy latent instead of C++ std lib for the alignment with Python pipeline +To read the numpy latent instead of C++ std lib for the alignment with Python pipeline, use `-r, --readNPLatent` argument. * Generate image without lora `./build/stable_diffusion -r` ![](./without_lora.bmp) -* Generate image with soulcard lora `./build/stable_diffusion -r` +* Generate image with soulcard lora `./build/stable_diffusion -r -l path/to/soulcard.safetensors` ![](./soulcard_lora.bmp) @@ -127,7 +139,7 @@ For the generation quality, be careful with the negative prompt and random laten Guidance scale controls how similar the generated image will be to the prompt. A higher guidance scale means the model will try to generate an image that follows the prompt more strictly. A lower guidance scale means the model will have more creativity. `guidance_scale` is a way to increase the adherence to the conditional signal that guides the generation (text, in this case) as well as overall sample quality. It is also known as [classifier-free guidance](https://arxiv.org/abs/2207.12598). -#### Negative prompt +#### Negative Prompt To improve image generation quality, model supports negative prompting. Technically, positive prompt steers the diffusion toward the images associated with it, while negative prompt steers the diffusion away from it. In other words, negative prompt declares undesired concepts for generation image, e.g. if we want to have colorful and bright image, gray scale image will be result which we want to avoid, in this case gray scale can be treated as negative prompt. @@ -135,3 +147,7 @@ The positive and negative prompt are in equal footing. You can always use one wi > [!NOTE] > Negative prompting is applicable only for high guidance scale (at least > 1). + +#### LoRA Weights Enabling + +Refer to the [OpenVINO blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline) to get more information on enabling LoRA weights. From c1f74dd0ef7b450d50201ec308b36d5570e63f8c Mon Sep 17 00:00:00 2001 From: yatarkan Date: Wed, 3 Jul 2024 17:01:19 +0400 Subject: [PATCH 10/16] Add platform specific requirements for image generation --- image_generation/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index 6eb4d9639..8468873f3 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1,4 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.2.2+cpu +torch==2.2.2+cpu; sys_platform != 'darwin' +torch==2.2.2; sys_platform == 'darwin' diffusers==0.27.2 optimum-intel[openvino]==1.18.0 From 07c0f0fcec4224162208d4bbb320dc46f1c23a88 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Fri, 5 Jul 2024 12:06:41 +0400 Subject: [PATCH 11/16] Include samples requirements in image generation --- image_generation/requirements.txt | 3 ++- samples/requirements.txt | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index 8468873f3..d57fe4e0b 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1,5 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu; sys_platform != 'darwin' -torch==2.2.2; sys_platform == 'darwin' +torch==2.2.2; sys_platform == 'darwin' # Torch wheel for 2.2.2+cpu does not exist for macOS, using 2.2.2 instead diffusers==0.27.2 optimum-intel[openvino]==1.18.0 +-r ../samples/requirements.txt diff --git a/samples/requirements.txt b/samples/requirements.txt index 73e5deb1d..d16301ad3 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -2,4 +2,3 @@ optimum[openvino]==1.20.0 einops==0.8.0 # For Qwen transformers_stream_generator==0.0.5 # For Qwen --r ../image_generation/requirements.txt From 5988f3e4b078755d2a7dff7277ba6bd96c0fa767 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Fri, 5 Jul 2024 13:29:17 +0400 Subject: [PATCH 12/16] Rework models and lora sections --- .../stable_diffusion_1_5/cpp/README.md | 60 +++++++++---------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index 0f2a28640..d8fa0cd73 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -1,20 +1,21 @@ -# OpenVINO Stable Diffusion (with LoRA) C++ image generation pipeline +# OpenVINO Stable Diffusion (with LoRA) C++ Image Generation Pipeline + The pure C++ text-to-image pipeline, driven by the OpenVINO native C++ API for Stable Diffusion v1.5 with LMS Discrete Scheduler, supports both static and dynamic model inference. It includes advanced features like [LoRA](https://huggingface.co/docs/peft/main/en/conceptual_guides/lora#lora) integration with [safetensors](https://huggingface.co/docs/safetensors/index#format) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. The sample uses [diffusers](../../common/diffusers) for image generation and [imwrite](../../common/imwrite) for saving `.bmp` images. This demo has been tested on Windows and Unix platforms. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/stable-diffusion-text-to-image) which provides an example of image generation in Python. > [!NOTE] >This tutorial assumes that the current working directory is `/image_generation/stable_diffusion_1_5/cpp/` and all paths are relative to this folder. -## Step 1: Prepare build environment +## Step 1: Prepare Build Environment Prerequisites: - Conda ([installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)) - C++ Packages: * [CMake](https://cmake.org/download/): Cross-platform build tool * [OpenVINO](https://docs.openvino.ai/install): Model inference. `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. Prepare a python environment and install dependencies: + ```shell conda create -n openvino_sd_cpp python==3.10 conda activate openvino_sd_cpp @@ -23,57 +24,52 @@ conda install -c conda-forge openvino=2024.2.0 c-compiler cxx-compiler git make conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` -## Step 2: Convert Stable Diffusion v1.5 and Tokenizer models - -### Stable Diffusion v1.5 model: +## Step 2: Obtain Stable Diffusion Model 1. Install dependencies to import models from HuggingFace: -```shell -git submodule update --init -# Reactivate Conda environment after installing dependencies and setting env vars -conda activate openvino_sd_cpp -python -m pip install -r ../../requirements.txt -python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] -``` -2. Download a huggingface SD v1.5 model like: -- [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) -- [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters. - Example command for downloading and exporting FP16 model: + ```shell + git submodule update --init + # Reactivate Conda environment after installing dependencies and setting env vars + conda activate openvino_sd_cpp + python -m pip install -r ../../requirements.txt + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] + ``` - `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16` +2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). - You can also choose other precision and export FP32 or INT8 model. + Example models to download: + - [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) + - [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) - Please, refer to the official website for [🤗 Optimum](https://huggingface.co/docs/optimum/main/en/index) and [optimum-intel](https://github.com/huggingface/optimum-intel) to read more details. + Example command for downloading [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) model and exporting it with FP16 precision: - If https://huggingface.co/ is down, the script won't be able to download the model. + `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16` + + You can also choose other precision and export FP32 or INT8 model. + + Please, refer to the official website for [🤗 Optimum](https://huggingface.co/docs/optimum/main/en/index) and [optimum-intel](https://github.com/huggingface/optimum-intel) to read more details. + + If https://huggingface.co/ is down, the script won't be able to download the model. > [!NOTE] > Now the pipeline support batch size = 1 only, i.e. static model `(1, 3, 512, 512)` ### (Optional) Enable LoRA Weights with Safetensors -Low-Rank Adaptation(LoRA) is a novel technique introduced to deal with the problem of fine-tuning Diffusers and Large Language Models (LLMs). In the case of Stable Diffusion fine-tuning, LoRA can be applied to the cross-attention layers for the image representations with the latent described. +Low-Rank Adaptation (LoRA) is a technique introduced to deal with the problem of fine-tuning Diffusers and Large Language Models (LLMs). In the case of Stable Diffusion fine-tuning, LoRA can be applied to the cross-attention layers for the image representations with the latent described. LoRA weights can be enabled for Unet model of Stable Diffusion pipeline to generate images with different styles. -#### LoRA Weights Format - In this sample LoRA weights are used in [safetensors]((https://huggingface.co/docs/safetensors/index#format)) format. Safetensors is a serialization format developed by Hugging Face that is specifically designed for efficiently storing and loading large tensors. It provides a lightweight and efficient way to serialize tensors, making it easier to store and load machine learning models. The LoRA safetensors model is loaded via [safetensors.h](https://github.com/hsnyder/safetensors.h). The layer name and weight are modified with `Eigen` library and inserted into the SD models with `ov::pass::MatcherPass` in the file [common/diffusers/src/lora.cpp](https://github.com/openvinotoolkit/openvino.genai/blob/master/image_generation/common/diffusers/src/lora.cpp). -#### Using LoRA Weights - -SD model [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) and LoRA [soulcard model](https://civitai.com/models/67927?modelVersionId=72591) are tested in this pipeline. - -There are various LoRA models on https://civitai.com/tag/lora and on HuggingFace, you can consider to choose your own LoRA model in safetensor format. +There are various LoRA models on https://civitai.com/tag/lora and on HuggingFace, you can consider to choose your own LoRA model in safetensor format. For example, you can use LoRA [soulcard model](https://civitai.com/models/67927?modelVersionId=72591). Download and put LoRA safetensors model into the models directory. When running the built sample provide the path to the LoRA model with `-l, --loraPath arg` argument. - -## Step 3: Build the SD application +## Step 3: Build the SD Application ```shell conda activate openvino_sd_cpp @@ -132,7 +128,7 @@ To read the numpy latent instead of C++ std lib for the alignment with Python pi ## Notes -For the generation quality, be careful with the negative prompt and random latent generation. C++ random generation with MT19937 results is differ from `numpy.random.randn()`. Hence, please use `-r, --readNPLatent` for the alignment with Python (this latent file is for output image 512X512 only). +For the generation quality, be careful with the negative prompt and random latent generation. C++ random generation with MT19937 results differ from `numpy.random.randn()`. Hence, please use `-r, --readNPLatent` for the alignment with Python (this latent file is for output image 512X512 only). #### Guidance Scale From efbf620131863779bd57bf61bbf618f75546125d Mon Sep 17 00:00:00 2001 From: yatarkan Date: Fri, 5 Jul 2024 13:29:40 +0400 Subject: [PATCH 13/16] Add updated lora section to LCM readme --- .../lcm_dreamshaper_v7/cpp/README.md | 45 ++++++++++++------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index d415d8f58..c5c0b08cd 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -1,10 +1,11 @@ -# OpenVINO Latent Consistency Model C++ image generation pipeline +# OpenVINO Latent Consistency Model C++ Image Generation Pipeline + The pure C++ text-to-image pipeline, driven by the OpenVINO native API for SD v1.5 Latent Consistency Model with LCM Scheduler. It includes advanced features like [LoRA](https://huggingface.co/docs/peft/main/en/conceptual_guides/lora#lora) integration with [safetensors](https://huggingface.co/docs/safetensors/index#format) and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. [The common folder](../../common/) contains schedulers for image generation and `imwrite()` for saving `bmp` images. This demo has been tested for Linux platform only. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb) which provides an example of image generaztion in Python. > [!NOTE] > This tutorial assumes that the current working directory is `/image_generation/lcm_dreamshaper_v7/cpp/` and all paths are relative to this folder. -## Step 1: Prepare build environment +## Step 1: Prepare Build Environment Prerequisites: - Conda ([installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)) @@ -14,6 +15,7 @@ C++ Packages: * [OpenVINO](https://docs.openvino.ai/2024/get-started/install-openvino.html): Model inference Prepare a python environment and install dependencies: + ```shell conda create -n openvino_lcm_cpp python==3.10 conda activate openvino_lcm_cpp @@ -23,9 +25,7 @@ conda install -c conda-forge openvino=2024.2.0 c-compiler cxx-compiler git make conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` -## Step 2: Latent Consistency Model and Tokenizer models - -### Latent Consistency Model model +## Step 2: Obtain Latent Consistency Model 1. Install dependencies to import models from HuggingFace: @@ -36,26 +36,33 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] ``` -2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model: +2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). + + Example command for downloading [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) model and exporting it with FP16 precision: `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16` -If https://huggingface.co/ is down, the script won't be able to download the model. + You can also choose other precision and export FP32 or INT8 model. -> [!NOTE] -> Only static model is currently supported for this sample. + Please, refer to the official website for [🤗 Optimum](https://huggingface.co/docs/optimum/main/en/index) and [optimum-intel](https://github.com/huggingface/optimum-intel) to read more details. -### LoRA enabling with safetensors - + If https://huggingface.co/ is down, the script won't be able to download the model. -Refer to [python pipeline blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline). -The safetensor model is loaded via [safetensors.h](https://github.com/hsnyder/safetensors.h). The layer name and weight are modified with `Eigen Lib` and inserted into the LCM model with `ov::pass::MatcherPass` in the file [common/diffusers/src/lora.cpp](https://github.com/openvinotoolkit/openvino.genai/blob/master/image_generation/common/diffusers/src/lora.cpp). +### (Optional) Enable LoRA Weights with Safetensors -LCM model [lcm_dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) and Lora [soulcard](https://civitai.com/models/67927?modelVersionId=72591) are tested in this pipeline. +Low-Rank Adaptation (LoRA) is a technique introduced to deal with the problem of fine-tuning Diffusers and Large Language Models (LLMs). In the case of Stable Diffusion fine-tuning, LoRA can be applied to the cross-attention layers for the image representations with the latent described. -Download and put safetensors and model IR into the models folder. +LoRA weights can be enabled for Unet model of Stable Diffusion pipeline to generate images with different styles. -## Step 3: Build the LCM application +In this sample LoRA weights are used in [safetensors]((https://huggingface.co/docs/safetensors/index#format)) format. +Safetensors is a serialization format developed by Hugging Face that is specifically designed for efficiently storing and loading large tensors. It provides a lightweight and efficient way to serialize tensors, making it easier to store and load machine learning models. + +The LoRA safetensors model is loaded via [safetensors.h](https://github.com/hsnyder/safetensors.h). The layer name and weight are modified with `Eigen` library and inserted into the SD models with `ov::pass::MatcherPass` in the file [common/diffusers/src/lora.cpp](https://github.com/openvinotoolkit/openvino.genai/blob/master/image_generation/common/diffusers/src/lora.cpp). + +There are various LoRA models on https://civitai.com/tag/lora and on HuggingFace, you can consider to choose your own LoRA model in safetensor format. For example, you can use LoRA [soulcard model](https://civitai.com/models/67927?modelVersionId=72591). +Download and put LoRA safetensors model into the models directory. When running the built sample provide the path to the LoRA model with `-l, --loraPath arg` argument. + +## Step 3: Build the LCM Application ```shell conda activate openvino_lcm_cpp @@ -111,7 +118,7 @@ To read the numpy latent input and noise for scheduler instead of C++ std lib fo ## Benchmark: -For the generation quality, C++ random generation with MT19937 results is differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor`. Hence, please use `-r, --readNPLatent` for the alignment with Python (this latent file is for output image 512X512 only) +For the generation quality, C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor`. Hence, please use `-r, --readNPLatent` for the alignment with Python (this latent file is for output image 512X512 only) ## Notes @@ -125,3 +132,7 @@ Guidance scale controls how similar the generated image will be to the prompt. A Negative prompts don't work with LCM because they don’t have any effect on the denoising process. When a LCM is distilled from an LDM via latent consistency distillation (Algorithm 1) with guided distillation, the forward pass of the LCM learns to approximate sampling from the LDM using CFG with the unconditional prompt "" (the empty string). Due to this, LCMs currently do not support negative prompts. + +#### LoRA Weights Enabling + +Refer to the [OpenVINO blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline) to get more information on enabling LoRA weights. From 59bff2be166e6775e538dadf33e18d51742f38b0 Mon Sep 17 00:00:00 2001 From: yatarkan Date: Mon, 8 Jul 2024 13:50:21 +0400 Subject: [PATCH 14/16] Test skip optimum-intel and torch deps from image generation --- image_generation/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index d57fe4e0b..776be5b10 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.2.2+cpu; sys_platform != 'darwin' -torch==2.2.2; sys_platform == 'darwin' # Torch wheel for 2.2.2+cpu does not exist for macOS, using 2.2.2 instead +# torch==2.2.2+cpu; sys_platform != 'darwin' +# torch==2.2.2; sys_platform == 'darwin' # Torch wheel for 2.2.2+cpu does not exist for macOS, using 2.2.2 instead diffusers==0.27.2 -optimum-intel[openvino]==1.18.0 +# optimum-intel[openvino]==1.18.0 -r ../samples/requirements.txt From 942458f1c3577d2249f21b1772c2948e3c76909f Mon Sep 17 00:00:00 2001 From: yatarkan Date: Mon, 8 Jul 2024 14:30:43 +0400 Subject: [PATCH 15/16] Remove all deps from image generation --- image_generation/requirements.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index 776be5b10..61b6ebe49 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1,6 +1 @@ ---extra-index-url https://download.pytorch.org/whl/cpu -# torch==2.2.2+cpu; sys_platform != 'darwin' -# torch==2.2.2; sys_platform == 'darwin' # Torch wheel for 2.2.2+cpu does not exist for macOS, using 2.2.2 instead -diffusers==0.27.2 -# optimum-intel[openvino]==1.18.0 -r ../samples/requirements.txt From 7705af5ef0a364847af5ba2900550a7db84ece3b Mon Sep 17 00:00:00 2001 From: yatarkan Date: Mon, 8 Jul 2024 15:36:34 +0400 Subject: [PATCH 16/16] Add diffusers dependency to image generation --- image_generation/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/image_generation/requirements.txt b/image_generation/requirements.txt index 61b6ebe49..795dd10cb 100644 --- a/image_generation/requirements.txt +++ b/image_generation/requirements.txt @@ -1 +1,2 @@ -r ../samples/requirements.txt +diffusers==0.27.2