Skip to content

Commit

Permalink
Merge pull request #277 from rupeshs/bump-openvino-2024.4.0
Browse files Browse the repository at this point in the history
Add Intel Core Ultra Series 2 (Lunar Lake) NPU support
  • Loading branch information
rupeshs authored Nov 3, 2024
2 parents b994476 + ecc685a commit 22fd084
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 15 deletions.
11 changes: 9 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,13 @@ For GPU mode `set device=GPU` and run webui. FastSD GPU benchmark on AI PC as sh
### NPU

FastSD CPU now supports power efficient NPU (Neural Processing Unit) that comes with Intel Core Ultra processors.
Please note that NPU support is experimental currently support [rupeshs/sd15-lcm-square-openvino-int8](https://huggingface.co/rupeshs/sd15-lcm-square-openvino-int8).

FastSD tested with following Intel processor's NPUs:

- Intel Core Ultra Series 1 (Meteor Lake)
- Intel Core Ultra Series 2 (Lunar Lake)

Currently FastSD support this model for NPU [rupeshs/sd15-lcm-square-openvino-int8](https://huggingface.co/rupeshs/sd15-lcm-square-openvino-int8).

Supports following modes on NPU :

Expand All @@ -308,7 +314,8 @@ To run model in NPU follow these steps (Please make sure that your AI PC's NPU d

- Start webui
- Select LCM-OpenVINO mode
- Select the models settings tab and select OpenVINO model `sd15-lcm-square-openvino-int8`
- Select the models settings tab and select OpenVINO model `rupeshs/sd15-lcm-square-openvino-int8`
- Set device envionment variable `set DEVICE=NPU`
- Now it will run on the NPU

This is heterogeneous computing since text encoder and Unet will use NPU and VAE will use GPU for processing. Thanks to OpenVINO.
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ diffusers==0.30.0
transformers==4.41.2
PyQt5
Pillow==9.4.0
openvino==2024.3.0
openvino==2024.4.0
optimum-intel==1.18.2
onnx==1.16.0
onnxruntime==1.17.3
Expand Down
19 changes: 11 additions & 8 deletions src/backend/lcm_text_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ def _is_hetero_pipeline(self) -> bool:

def _load_ov_hetero_pipeline(self):
print("Loading Heterogeneous Compute pipeline")
self.pipeline = OvHcLatentConsistency(self.ov_model_id)
if DEVICE.upper()=="NPU":
device = ["NPU", "NPU", "NPU"]
self.pipeline = OvHcLatentConsistency(self.ov_model_id,device)
else:
self.pipeline = OvHcLatentConsistency(self.ov_model_id)

def _generate_images_hetero_compute(
self,
Expand Down Expand Up @@ -405,16 +409,15 @@ def generate(
else:
seeds = [random.randint(0,999999999) for i in range(lcm_diffusion_setting.number_of_images)]

if self.use_openvino and not self._is_hetero_pipeline():
if self.use_openvino:
# no support for generators; try at least to ensure reproducible results for single images
np.random.seed(seeds[0])

if self._is_hetero_pipeline():
torch.manual_seed(seeds[0])
lcm_diffusion_setting.seed=seeds[0]
else:
if lcm_diffusion_setting.use_seed and self._is_hetero_pipeline():
torch.manual_seed(cur_seed)
else:
pipeline_extra_args['generator'] = [
torch.Generator(device=self.device).manual_seed(s) for s in seeds]
pipeline_extra_args['generator'] = [
torch.Generator(device=self.device).manual_seed(s) for s in seeds]

is_openvino_pipe = lcm_diffusion_setting.use_openvino and is_openvino_device()
if is_openvino_pipe and not self._is_hetero_pipeline():
Expand Down
4 changes: 1 addition & 3 deletions src/backend/openvino/stable_diffusion_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,9 +1115,7 @@ def __init__(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)

def load_model(self, model, model_name, device):
if "NPU" in device:
with open(os.path.join(model, f"{model_name}.blob"), "rb") as f:
return self.core.import_model(f.read(), device)
print(f"Compiling the {model_name} to {device} ...")
return self.core.compile_model(os.path.join(model, f"{model_name}.xml"), device)

def get_timesteps(self, num_inference_steps:int, strength:float, scheduler):
Expand Down
2 changes: 1 addition & 1 deletion src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

cpu_cores = cpu_count()
cpus = cpu_cores // 2 if cpu_cores else 0
APP_VERSION = "v1.0.0 beta 64"
APP_VERSION = "v1.0.0 beta 90"
LCM_DEFAULT_MODEL = "stabilityai/sd-turbo"
LCM_DEFAULT_MODEL_OPENVINO = "rupeshs/sd-turbo-openvino"
APP_NAME = "FastSD CPU"
Expand Down
7 changes: 7 additions & 0 deletions src/frontend/webui/ui.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import gradio as gr
from backend.device import get_device_name
from constants import APP_VERSION
from frontend.webui.text_to_image_ui import get_text_to_image_ui
from frontend.webui.image_to_image_ui import get_image_to_image_ui
Expand All @@ -11,6 +12,7 @@
from paths import FastStableDiffusionPaths
from state import get_settings


app_settings = get_settings()


Expand Down Expand Up @@ -43,11 +45,16 @@ def change_mode(mode):
if app_settings.settings.lcm_diffusion_setting.controlnet:
app_settings.settings.lcm_diffusion_setting.controlnet.enabled = False


with gr.Blocks(
css=FastStableDiffusionPaths.get_css_path(),
title="FastSD CPU",
) as fastsd_web_ui:
gr.HTML("<center><H1>FastSD CPU</H1></center>")
gr.Markdown(
f"**Processor : {get_device_name()}**",
elem_id="processor",
)
current_mode = "LCM"
if app_settings.settings.lcm_diffusion_setting.use_openvino:
current_mode = "LCM-OpenVINO"
Expand Down

0 comments on commit 22fd084

Please sign in to comment.