Merge pull request #277 from rupeshs/bump-openvino-2024.4.0

Add Intel Core Ultra Series 2 (Lunar Lake) NPU support
rupeshs · Nov 3, 2024 · 22fd084 · 22fd084
2 parents b994476 + ecc685a
commit 22fd084
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 15 deletions.
diff --git a/Readme.md b/Readme.md
@@ -296,7 +296,13 @@ For GPU mode `set device=GPU` and run webui. FastSD GPU benchmark on AI PC as sh
 ### NPU
 
 FastSD CPU now supports power efficient NPU (Neural Processing Unit) that comes with Intel Core Ultra processors.
-Please note that NPU support is experimental currently support [rupeshs/sd15-lcm-square-openvino-int8](https://huggingface.co/rupeshs/sd15-lcm-square-openvino-int8).
+
+FastSD tested with following Intel processor's NPUs:
+
+- Intel Core Ultra Series 1 (Meteor Lake)
+- Intel Core Ultra Series 2 (Lunar Lake)
+
+Currently FastSD support this model for NPU  [rupeshs/sd15-lcm-square-openvino-int8](https://huggingface.co/rupeshs/sd15-lcm-square-openvino-int8).
 
 Supports following modes on NPU :
 
@@ -308,7 +314,8 @@ To run model in NPU follow these steps (Please make sure that your AI PC's NPU d
 
 - Start webui
 - Select LCM-OpenVINO mode
-- Select the models settings tab and select OpenVINO model `sd15-lcm-square-openvino-int8`
+- Select the models settings tab and select OpenVINO model `rupeshs/sd15-lcm-square-openvino-int8`
+- Set device envionment variable `set DEVICE=NPU`
 - Now it will run on the NPU
 
 This is heterogeneous computing since text encoder and Unet will use NPU and VAE will use GPU for processing. Thanks to OpenVINO.

diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,7 @@ diffusers==0.30.0
 transformers==4.41.2
 PyQt5
 Pillow==9.4.0
-openvino==2024.3.0
+openvino==2024.4.0
 optimum-intel==1.18.2
 onnx==1.16.0
 onnxruntime==1.17.3

diff --git a/src/backend/lcm_text_to_image.py b/src/backend/lcm_text_to_image.py
@@ -121,7 +121,11 @@ def _is_hetero_pipeline(self) -> bool:
 
     def _load_ov_hetero_pipeline(self):
         print("Loading Heterogeneous Compute pipeline")
-        self.pipeline = OvHcLatentConsistency(self.ov_model_id)
+        if DEVICE.upper()=="NPU":
+            device = ["NPU", "NPU", "NPU"]
+            self.pipeline = OvHcLatentConsistency(self.ov_model_id,device)
+        else:
+            self.pipeline = OvHcLatentConsistency(self.ov_model_id)
 
     def _generate_images_hetero_compute(
         self,
@@ -405,16 +409,15 @@ def generate(
         else:
             seeds = [random.randint(0,999999999) for i in range(lcm_diffusion_setting.number_of_images)]
 
-        if self.use_openvino and not self._is_hetero_pipeline():
+        if self.use_openvino:
             # no support for generators; try at least to ensure reproducible results for single images
             np.random.seed(seeds[0])
-
+            if self._is_hetero_pipeline():
+                torch.manual_seed(seeds[0])
+                lcm_diffusion_setting.seed=seeds[0]
         else:
-            if  lcm_diffusion_setting.use_seed and self._is_hetero_pipeline():
-                torch.manual_seed(cur_seed)
-            else:
-                pipeline_extra_args['generator'] = [
-                torch.Generator(device=self.device).manual_seed(s) for s in seeds]
+            pipeline_extra_args['generator'] = [
+                    torch.Generator(device=self.device).manual_seed(s) for s in seeds]
 
         is_openvino_pipe = lcm_diffusion_setting.use_openvino and is_openvino_device()
         if is_openvino_pipe and not self._is_hetero_pipeline():

diff --git a/src/backend/openvino/stable_diffusion_engine.py b/src/backend/openvino/stable_diffusion_engine.py
@@ -1115,9 +1115,7 @@ def __init__(
         self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
 
     def load_model(self, model, model_name, device):
-        if "NPU" in device:
-            with open(os.path.join(model, f"{model_name}.blob"), "rb") as f:
-                return self.core.import_model(f.read(), device)
+        print(f"Compiling the {model_name} to {device} ...")
         return self.core.compile_model(os.path.join(model, f"{model_name}.xml"), device)
 
     def get_timesteps(self, num_inference_steps:int, strength:float, scheduler):

diff --git a/src/constants.py b/src/constants.py
@@ -2,7 +2,7 @@
 
 cpu_cores = cpu_count()
 cpus = cpu_cores // 2 if cpu_cores else 0
-APP_VERSION = "v1.0.0 beta 64"
+APP_VERSION = "v1.0.0 beta 90"
 LCM_DEFAULT_MODEL = "stabilityai/sd-turbo"
 LCM_DEFAULT_MODEL_OPENVINO = "rupeshs/sd-turbo-openvino"
 APP_NAME = "FastSD CPU"

diff --git a/src/frontend/webui/ui.py b/src/frontend/webui/ui.py
@@ -1,4 +1,5 @@
 import gradio as gr
+from backend.device import get_device_name
 from constants import APP_VERSION
 from frontend.webui.text_to_image_ui import get_text_to_image_ui
 from frontend.webui.image_to_image_ui import get_image_to_image_ui
@@ -11,6 +12,7 @@
 from paths import FastStableDiffusionPaths
 from state import get_settings
 
+
 app_settings = get_settings()
 
 
@@ -43,11 +45,16 @@ def change_mode(mode):
     if app_settings.settings.lcm_diffusion_setting.controlnet:
         app_settings.settings.lcm_diffusion_setting.controlnet.enabled = False
 
+
     with gr.Blocks(
         css=FastStableDiffusionPaths.get_css_path(),
         title="FastSD CPU",
     ) as fastsd_web_ui:
         gr.HTML("<center><H1>FastSD CPU</H1></center>")
+        gr.Markdown(
+            f"**Processor :  {get_device_name()}**",
+            elem_id="processor",
+        )
         current_mode = "LCM"
         if app_settings.settings.lcm_diffusion_setting.use_openvino:
             current_mode = "LCM-OpenVINO"