diff --git a/Readme.md b/Readme.md index cd9ecb0..650c1c5 100644 --- a/Readme.md +++ b/Readme.md @@ -296,7 +296,13 @@ For GPU mode `set device=GPU` and run webui. FastSD GPU benchmark on AI PC as sh ### NPU FastSD CPU now supports power efficient NPU (Neural Processing Unit) that comes with Intel Core Ultra processors. -Please note that NPU support is experimental currently support [rupeshs/sd15-lcm-square-openvino-int8](https://huggingface.co/rupeshs/sd15-lcm-square-openvino-int8). + +FastSD tested with following Intel processor's NPUs: + +- Intel Core Ultra Series 1 (Meteor Lake) +- Intel Core Ultra Series 2 (Lunar Lake) + +Currently FastSD support this model for NPU [rupeshs/sd15-lcm-square-openvino-int8](https://huggingface.co/rupeshs/sd15-lcm-square-openvino-int8). Supports following modes on NPU : @@ -308,7 +314,8 @@ To run model in NPU follow these steps (Please make sure that your AI PC's NPU d - Start webui - Select LCM-OpenVINO mode -- Select the models settings tab and select OpenVINO model `sd15-lcm-square-openvino-int8` +- Select the models settings tab and select OpenVINO model `rupeshs/sd15-lcm-square-openvino-int8` +- Set device envionment variable `set DEVICE=NPU` - Now it will run on the NPU This is heterogeneous computing since text encoder and Unet will use NPU and VAE will use GPU for processing. Thanks to OpenVINO. diff --git a/requirements.txt b/requirements.txt index 405169f..fdccf14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ diffusers==0.30.0 transformers==4.41.2 PyQt5 Pillow==9.4.0 -openvino==2024.3.0 +openvino==2024.4.0 optimum-intel==1.18.2 onnx==1.16.0 onnxruntime==1.17.3 diff --git a/src/backend/lcm_text_to_image.py b/src/backend/lcm_text_to_image.py index 18bc512..b1160f1 100644 --- a/src/backend/lcm_text_to_image.py +++ b/src/backend/lcm_text_to_image.py @@ -121,7 +121,11 @@ def _is_hetero_pipeline(self) -> bool: def _load_ov_hetero_pipeline(self): print("Loading Heterogeneous Compute pipeline") - self.pipeline = OvHcLatentConsistency(self.ov_model_id) + if DEVICE.upper()=="NPU": + device = ["NPU", "NPU", "NPU"] + self.pipeline = OvHcLatentConsistency(self.ov_model_id,device) + else: + self.pipeline = OvHcLatentConsistency(self.ov_model_id) def _generate_images_hetero_compute( self, @@ -405,16 +409,15 @@ def generate( else: seeds = [random.randint(0,999999999) for i in range(lcm_diffusion_setting.number_of_images)] - if self.use_openvino and not self._is_hetero_pipeline(): + if self.use_openvino: # no support for generators; try at least to ensure reproducible results for single images np.random.seed(seeds[0]) - + if self._is_hetero_pipeline(): + torch.manual_seed(seeds[0]) + lcm_diffusion_setting.seed=seeds[0] else: - if lcm_diffusion_setting.use_seed and self._is_hetero_pipeline(): - torch.manual_seed(cur_seed) - else: - pipeline_extra_args['generator'] = [ - torch.Generator(device=self.device).manual_seed(s) for s in seeds] + pipeline_extra_args['generator'] = [ + torch.Generator(device=self.device).manual_seed(s) for s in seeds] is_openvino_pipe = lcm_diffusion_setting.use_openvino and is_openvino_device() if is_openvino_pipe and not self._is_hetero_pipeline(): diff --git a/src/backend/openvino/stable_diffusion_engine.py b/src/backend/openvino/stable_diffusion_engine.py index 7fd75ac..3546db2 100644 --- a/src/backend/openvino/stable_diffusion_engine.py +++ b/src/backend/openvino/stable_diffusion_engine.py @@ -1115,9 +1115,7 @@ def __init__( self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) def load_model(self, model, model_name, device): - if "NPU" in device: - with open(os.path.join(model, f"{model_name}.blob"), "rb") as f: - return self.core.import_model(f.read(), device) + print(f"Compiling the {model_name} to {device} ...") return self.core.compile_model(os.path.join(model, f"{model_name}.xml"), device) def get_timesteps(self, num_inference_steps:int, strength:float, scheduler): diff --git a/src/constants.py b/src/constants.py index c5b90cd..f04d0f2 100644 --- a/src/constants.py +++ b/src/constants.py @@ -2,7 +2,7 @@ cpu_cores = cpu_count() cpus = cpu_cores // 2 if cpu_cores else 0 -APP_VERSION = "v1.0.0 beta 64" +APP_VERSION = "v1.0.0 beta 90" LCM_DEFAULT_MODEL = "stabilityai/sd-turbo" LCM_DEFAULT_MODEL_OPENVINO = "rupeshs/sd-turbo-openvino" APP_NAME = "FastSD CPU" diff --git a/src/frontend/webui/ui.py b/src/frontend/webui/ui.py index cd0c40c..f85033c 100644 --- a/src/frontend/webui/ui.py +++ b/src/frontend/webui/ui.py @@ -1,4 +1,5 @@ import gradio as gr +from backend.device import get_device_name from constants import APP_VERSION from frontend.webui.text_to_image_ui import get_text_to_image_ui from frontend.webui.image_to_image_ui import get_image_to_image_ui @@ -11,6 +12,7 @@ from paths import FastStableDiffusionPaths from state import get_settings + app_settings = get_settings() @@ -43,11 +45,16 @@ def change_mode(mode): if app_settings.settings.lcm_diffusion_setting.controlnet: app_settings.settings.lcm_diffusion_setting.controlnet.enabled = False + with gr.Blocks( css=FastStableDiffusionPaths.get_css_path(), title="FastSD CPU", ) as fastsd_web_ui: gr.HTML("

FastSD CPU

") + gr.Markdown( + f"**Processor : {get_device_name()}**", + elem_id="processor", + ) current_mode = "LCM" if app_settings.settings.lcm_diffusion_setting.use_openvino: current_mode = "LCM-OpenVINO"