From 50e23dd1180b9950bd896e904e20119b33045434 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Fri, 23 Feb 2024 18:44:51 +0100
Subject: [PATCH 01/34] feat(intel): add diffusers support

---
 backend/python/diffusers/Makefile            |  4 ++
 backend/python/diffusers/diffusers-intel.yml | 65 ++++++++++++++++++++
 backend/python/diffusers/run.sh              |  5 ++
 3 files changed, 74 insertions(+)
 create mode 100644 backend/python/diffusers/diffusers-intel.yml

diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile
index 70a62b60daa9..1b1fbff84e45 100644
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@@ -4,6 +4,10 @@ ifeq ($(BUILD_TYPE), hipblas)
 export CONDA_ENV_PATH = "diffusers-rocm.yml"
 endif
 
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export CONDA_ENV_PATH = "diffusers-intel.yml"
+endif
+
 .PHONY: diffusers
 diffusers:
 	@echo "Installing $(CONDA_ENV_PATH)..."
diff --git a/backend/python/diffusers/diffusers-intel.yml b/backend/python/diffusers/diffusers-intel.yml
new file mode 100644
index 000000000000..910ebf42c64b
--- /dev/null
+++ b/backend/python/diffusers/diffusers-intel.yml
@@ -0,0 +1,65 @@
+name: diffusers
+channels:
+  - defaults
+  - intel
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2023.08.22=h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - ncurses=6.4=h6a678d5_0
+  - openssl=3.0.11=h7f8727e_2
+  - pip=23.2.1=py311h06a4308_0
+  - python=3.11.5=h955ad1f_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=68.0.0=py311h06a4308_0
+  - sqlite=3.41.2=h5eee18b_0
+  - tk=8.6.12=h1ccaba5_0
+  - tzdata=2023c=h04d1e81_0
+  - wheel=0.41.2=py311h06a4308_0
+  - xz=5.4.2=h5eee18b_0
+  - intel-extension-for-pytorch=2.1.10
+  - pytorch=2.1.0 
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+      - accelerate>=0.11.0
+      - certifi==2023.7.22
+      - charset-normalizer==3.3.0
+      - compel==2.0.2
+      - diffusers==0.24.0
+      - filelock==3.12.4
+      - fsspec==2023.9.2
+      - grpcio==1.59.0
+      - huggingface-hub>=0.19.4
+      - idna==3.4
+      - importlib-metadata==6.8.0
+      - jinja2==3.1.2
+      - markupsafe==2.1.3
+      - mpmath==1.3.0
+      - networkx==3.1
+      - numpy==1.26.0
+      - omegaconf
+      - packaging==23.2
+      - pillow==10.0.1
+      - protobuf==4.24.4
+      - psutil==5.9.5
+      - pyparsing==3.1.1
+      - pyyaml==6.0.1
+      - regex==2023.10.3
+      - requests==2.31.0
+      - safetensors==0.4.0
+      - sympy==1.12
+      - tqdm==4.66.1
+      - transformers>=4.25.1
+      - triton==2.1.0
+      - typing-extensions==4.8.0
+      - urllib3==2.0.6
+      - zipp==3.17.0
+prefix: /opt/conda/envs/diffusers
diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index 8e3e1bbfbfdd..8080230bd9f4 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -8,6 +8,11 @@ export PATH=$PATH:/opt/conda/bin
 # Activate conda environment
 source activate diffusers
 
+if [ -d "/opt/intel" ]; then
+    source /opt/intel/oneapi/compiler/latest/env/vars.sh
+    source /opt/intel/oneapi/mkl/latest/env/vars.sh
+fi
+
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 

From a544b8f67d714f976c2718a4f439b910514a113e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 24 Feb 2024 00:41:08 +0100
Subject: [PATCH 02/34] try to consume upstream container image

---
 Makefile                                      |  7 +++++++
 backend/python/diffusers/backend_diffusers.py |  7 +++++++
 backend/python/diffusers/run.sh               | 12 +++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index b24ed7972151..1c5cf1c131b2 100644
--- a/Makefile
+++ b/Makefile
@@ -557,3 +557,10 @@ docker-image-intel:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
+
+docker-image-intel-xpu:
+	docker build \
+		--build-arg BASE_IMAGE=intel/intel-extension-for-pytorch:2.1.10-xpu \
+		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
+		--build-arg GO_TAGS="none" \
+		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
\ No newline at end of file
diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py
index 6780cae626a6..eef12011f4fa 100755
--- a/backend/python/diffusers/backend_diffusers.py
+++ b/backend/python/diffusers/backend_diffusers.py
@@ -29,6 +29,7 @@
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 COMPEL=os.environ.get("COMPEL", "1") == "1"
+XPU=os.environ.get("XPU", "1") == "1"
 CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
 SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
 CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
@@ -36,6 +37,10 @@
 DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
 FRAMES=os.environ.get("FRAMES", "64")
 
+if XPU:
+    import intel_extension_for_pytorch as ipex
+    print(ipex.xpu.get_device_name(0))
+
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
 
@@ -247,6 +252,8 @@ def LoadModel(self, request, context):
                 self.pipe.to('cuda')
                 if self.controlnet:
                     self.controlnet.to('cuda')
+            if XPU:
+                self.pipe = self.pipe.to("xpu")
             # Assume directory from request.ModelFile.
             # Only if request.LoraAdapter it's not an absolute path
             if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index 8080230bd9f4..c4a0b6c79b93 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -5,12 +5,14 @@
 
 export PATH=$PATH:/opt/conda/bin
 
-# Activate conda environment
-source activate diffusers
-
 if [ -d "/opt/intel" ]; then
-    source /opt/intel/oneapi/compiler/latest/env/vars.sh
-    source /opt/intel/oneapi/mkl/latest/env/vars.sh
+    # Assumes we are using the Intel oneAPI container image
+    #source /opt/intel/oneapi/compiler/latest/env/vars.sh
+    #source /opt/intel/oneapi/mkl/latest/env/vars.sh
+    export XPU=1
+else
+    # Activate conda environment
+    source activate diffusers
 fi
 
 # get the directory where the bash script is located

From 67865aa1bc2e6e0df58736125a091087cc2c2f78 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 24 Feb 2024 00:43:53 +0100
Subject: [PATCH 03/34] Debug

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index a04a866ec7d1..903acdb13055 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,6 +4,8 @@ ARG BASE_IMAGE=ubuntu:22.04
 # extras or core
 FROM ${BASE_IMAGE} as requirements-core
 
+USER root
+
 ARG GO_VERSION=1.21.7
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=11

From 0c23fbafbc674749eb3dcdeb5bb58f2aa0aeedf8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 24 Feb 2024 23:51:23 +0100
Subject: [PATCH 04/34] Manually install deps

---
 backend/python/diffusers/install.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 0429826e3f4d..0ea8f08fee21 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -14,6 +14,16 @@ else
     echo "Virtual environment already exists."
 fi
 
+if [ -d "/opt/intel" ]; then
+    pip install --upgrade google-api-python-client
+    pip install --upgrade grpcio
+    pip install --upgrade grpcio-tools
+    pip install diffusers==0.24.0
+    pip install transformers>=4.25.1
+    pip install accelerate
+    pip install compel==2.0.2
+fi
+
 if [ "$PIP_CACHE_PURGE" = true ] ; then
     export PATH=$PATH:/opt/conda/bin
 

From 7115277a3f1a5faf120e10379ea831e790ff1941 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 24 Feb 2024 23:51:37 +0100
Subject: [PATCH 05/34] Map transformers/hf cache dir to modelpath if not
 specified

---
 pkg/model/initializers.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index fce44fe15469..1e2af8f9d287 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -69,6 +69,13 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 			return fmt.Sprintf("127.0.0.1:%d", port), nil
 		}
 
+		// If no specific model path is set for transformers/HF, set it to the model path
+		for _, env := range []string{"HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"} {
+			if os.Getenv(env) == "" {
+				os.Setenv(env, ml.ModelPath)
+			}
+		}
+
 		// Check if the backend is provided as external
 		if uri, ok := o.externalBackends[backend]; ok {
 			log.Debug().Msgf("Loading external backend: %s", uri)

From e5fdcbe6145e80e32081adf075a2a1415e24b17f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 18:37:04 +0100
Subject: [PATCH 06/34] fix(compel): update initialization, pass by all gRPC
 options

---
 backend/python/diffusers/backend_diffusers.py | 14 ++++++++++----
 core/backend/image.go                         | 18 +++---------------
 core/backend/options.go                       | 18 +++++++++++++-----
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py
index eef12011f4fa..e85684fa72c9 100755
--- a/backend/python/diffusers/backend_diffusers.py
+++ b/backend/python/diffusers/backend_diffusers.py
@@ -21,7 +21,7 @@
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image,export_to_video
-from compel import Compel
+from compel import Compel, ReturnedEmbeddingsType
 
 from transformers import CLIPTextModel
 from safetensors.torch import load_file
@@ -237,7 +237,12 @@ def LoadModel(self, request, context):
                 self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
                 
             if not self.img2vid:
-                self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder)
+                self.compel = Compel(
+                    tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], 
+                    text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
+                    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+                    requires_pooled=[False, True]
+                    )
 
 
             if request.ControlNet:
@@ -393,8 +398,9 @@ def GenerateImage(self, request, context):
 
         image = {}
         if COMPEL:
-            conditioning = self.compel.build_conditioning_tensor(prompt)
-            kwargs["prompt_embeds"]= conditioning
+            conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
+            kwargs["prompt_embeds"] = conditioning
+            kwargs["pooled_prompt_embeds"] = pooled
             # pass the kwargs dictionary to the self.pipe method
             image = self.pipe(
                 guidance_scale=self.cfg_scale,
diff --git a/core/backend/image.go b/core/backend/image.go
index 60db48f96ba1..f7209f9d2a7c 100644
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -9,26 +9,14 @@ import (
 
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
 
+	gRPCOpts := gRPCModelOpts(backendConfig)
 	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(backendConfig.Backend),
 		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithThreads(uint32(backendConfig.Threads)),
+		model.WithThreads(uint32(appConfig.Threads)),
 		model.WithContext(appConfig.Context),
 		model.WithModel(backendConfig.Model),
-		model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
-			CUDA:          backendConfig.CUDA || backendConfig.Diffusers.CUDA,
-			SchedulerType: backendConfig.Diffusers.SchedulerType,
-			PipelineType:  backendConfig.Diffusers.PipelineType,
-			CFGScale:      backendConfig.Diffusers.CFGScale,
-			LoraAdapter:   backendConfig.LoraAdapter,
-			LoraScale:     backendConfig.LoraScale,
-			LoraBase:      backendConfig.LoraBase,
-			IMG2IMG:       backendConfig.Diffusers.IMG2IMG,
-			CLIPModel:     backendConfig.Diffusers.ClipModel,
-			CLIPSubfolder: backendConfig.Diffusers.ClipSubFolder,
-			CLIPSkip:      int32(backendConfig.Diffusers.ClipSkip),
-			ControlNet:    backendConfig.Diffusers.ControlNet,
-		}),
+		model.WithLoadGRPCLoadModelOpts(gRPCOpts),
 	})
 
 	inferenceModel, err := loader.BackendLoader(
diff --git a/core/backend/options.go b/core/backend/options.go
index d2bbb2b88e6d..3af6f6797a67 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -40,11 +40,23 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 	}
 
 	return &pb.ModelOptions{
+		CUDA:                 c.CUDA || c.Diffusers.CUDA,
+		SchedulerType:        c.Diffusers.SchedulerType,
+		PipelineType:         c.Diffusers.PipelineType,
+		CFGScale:             c.Diffusers.CFGScale,
+		LoraAdapter:          c.LoraAdapter,
+		LoraScale:            c.LoraScale,
+		F16Memory:            c.F16,
+		LoraBase:             c.LoraBase,
+		IMG2IMG:              c.Diffusers.IMG2IMG,
+		CLIPModel:            c.Diffusers.ClipModel,
+		CLIPSubfolder:        c.Diffusers.ClipSubFolder,
+		CLIPSkip:             int32(c.Diffusers.ClipSkip),
+		ControlNet:           c.Diffusers.ControlNet,
 		ContextSize:          int32(c.ContextSize),
 		Seed:                 int32(c.Seed),
 		NBatch:               int32(b),
 		NoMulMatQ:            c.NoMulMatQ,
-		CUDA:                 c.CUDA, // diffusers, transformers
 		DraftModel:           c.DraftModel,
 		AudioPath:            c.VallE.AudioPath,
 		Quantization:         c.Quantization,
@@ -58,12 +70,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		YarnAttnFactor:       c.YarnAttnFactor,
 		YarnBetaFast:         c.YarnBetaFast,
 		YarnBetaSlow:         c.YarnBetaSlow,
-		LoraAdapter:          c.LoraAdapter,
-		LoraBase:             c.LoraBase,
-		LoraScale:            c.LoraScale,
 		NGQA:                 c.NGQA,
 		RMSNormEps:           c.RMSNormEps,
-		F16Memory:            c.F16,
 		MLock:                c.MMlock,
 		RopeFreqBase:         c.RopeFreqBase,
 		RopeScaling:          c.RopeScaling,

From 465b029e848984032f9eaf0aa560782a8b0e0c28 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 18:44:18 +0100
Subject: [PATCH 07/34] fix: add dependencies, implement transformers for xpu

---
 backend/python/diffusers/install.sh           | 17 ++++++++-------
 backend/python/diffusers/run.sh               |  3 +--
 backend/python/transformers/Makefile          |  1 +
 backend/python/transformers/install.sh        | 18 ++++++++++++++++
 backend/python/transformers/run.sh            | 10 +++++++--
 .../transformers/transformers_server.py       | 21 ++++++++++++++++++-
 6 files changed, 58 insertions(+), 12 deletions(-)
 create mode 100755 backend/python/transformers/install.sh

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 0ea8f08fee21..4f095912a131 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -15,13 +15,16 @@ else
 fi
 
 if [ -d "/opt/intel" ]; then
-    pip install --upgrade google-api-python-client
-    pip install --upgrade grpcio
-    pip install --upgrade grpcio-tools
-    pip install diffusers==0.24.0
-    pip install transformers>=4.25.1
-    pip install accelerate
-    pip install compel==2.0.2
+    # If the directory exists, we assume we are using the intel image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel-extension-for-pytorch==2.0.120+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl-aitools/
+    pip install google-api-python-client \
+                grpcio \
+                grpcio-tools \
+                diffusers==0.24.0 \
+                transformers>=4.25.1 \
+                accelerate \
+                compel==2.0.2
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index c4a0b6c79b93..7ff71e3988a1 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -7,8 +7,7 @@ export PATH=$PATH:/opt/conda/bin
 
 if [ -d "/opt/intel" ]; then
     # Assumes we are using the Intel oneAPI container image
-    #source /opt/intel/oneapi/compiler/latest/env/vars.sh
-    #source /opt/intel/oneapi/mkl/latest/env/vars.sh
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
     export XPU=1
 else
     # Activate conda environment
diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile
index 4eeb9ad54136..b957b10e1f15 100644
--- a/backend/python/transformers/Makefile
+++ b/backend/python/transformers/Makefile
@@ -1,6 +1,7 @@
 .PHONY: transformers
 transformers:
 	$(MAKE) -C ../common-env/transformers
+	bash install.sh
 
 .PHONY: run
 run:
diff --git a/backend/python/transformers/install.sh b/backend/python/transformers/install.sh
new file mode 100755
index 000000000000..6d930f2de267
--- /dev/null
+++ b/backend/python/transformers/install.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+set -ex
+
+if [ -d "/opt/intel" ]; then
+    # If the directory exists, we assume we are using the intel image
+    # (no conda env)
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    pip install intel-extension-for-transformers
+fi
+
+if [ "$PIP_CACHE_PURGE" = true ] ; then
+    export PATH=$PATH:/opt/conda/bin
+
+    # Activate conda environment
+    source activate diffusers
+
+    pip cache purge
+fi
\ No newline at end of file
diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh
index e6a42b7e1b6d..409cb7c8a7aa 100755
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@@ -5,8 +5,14 @@
 
 export PATH=$PATH:/opt/conda/bin
 
-# Activate conda environment
-source activate transformers
+if [ -d "/opt/intel" ]; then
+    # Assumes we are using the Intel oneAPI container image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    export XPU=1
+else
+    # Activate conda environment
+    source activate transformers
+fi
 
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index fe0b815a2226..3ebccd233760 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -16,7 +16,15 @@
 import grpc
 import torch
 import torch.cuda
-from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed
+
+XPU=os.environ.get("XPU", "1") == "1"
+if XPU:
+    import intel_extension_for_pytorch as ipex
+    from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
+    from transformers import AutoTokenizer, AutoModel, set_seed
+else:
+    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed
+
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 
@@ -75,6 +83,17 @@ def LoadModel(self, request, context):
 
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             self.CUDA = False
+            self.XPU = False
+
+            if XPU:
+                try:
+                    print("Loading model", model_name, "to XPU.", file=sys.stderr)
+                    device_map = "xpu"
+                    self.model = self.model.to(device_map)
+                    self.XPU = True
+                    self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device=device_map)
+                except Exception as err:
+                    print("Not using XPU:", err, file=sys.stderr)
 
             if request.CUDA or torch.cuda.is_available():
                 try:

From 94789f527cb1e3bdf9dc3b39e1fda0beebac1220 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 18:44:43 +0100
Subject: [PATCH 08/34] base it from the oneapi image

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1c5cf1c131b2..324aedfd8d2a 100644
--- a/Makefile
+++ b/Makefile
@@ -560,7 +560,7 @@ docker-image-intel:
 
 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/intel-extension-for-pytorch:2.1.10-xpu \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
\ No newline at end of file

From 9aacbc77aa56bf46b133534ada550fd632df021c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 18:46:06 +0100
Subject: [PATCH 09/34] Add pillow

---
 backend/python/diffusers/install.sh | 3 ++-
 core/backend/image.go               | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 4f095912a131..c8e8b4518b01 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -24,7 +24,8 @@ if [ -d "/opt/intel" ]; then
                 diffusers==0.24.0 \
                 transformers>=4.25.1 \
                 accelerate \
-                compel==2.0.2
+                compel==2.0.2 \
+                Pillow
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
diff --git a/core/backend/image.go b/core/backend/image.go
index f7209f9d2a7c..478c32fdb6d6 100644
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -13,7 +13,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
 	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(backendConfig.Backend),
 		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithThreads(uint32(appConfig.Threads)),
+		model.WithThreads(uint32(backendConfig.Threads)),
 		model.WithContext(appConfig.Context),
 		model.WithModel(backendConfig.Model),
 		model.WithLoadGRPCLoadModelOpts(gRPCOpts),

From 3878eda59cb7ad5435b947cf045ad4bd51135ea8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 18:54:44 +0100
Subject: [PATCH 10/34] set threads if specified when launching the API

---
 core/backend/image.go | 7 +++++--
 core/backend/llm.go   | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/core/backend/image.go b/core/backend/image.go
index 478c32fdb6d6..79b8d4ba15c4 100644
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -8,12 +8,15 @@ import (
 )
 
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
-
+	threads := backendConfig.Threads
+	if threads == 0 && appConfig.Threads != 0 {
+		threads = appConfig.Threads
+	}
 	gRPCOpts := gRPCModelOpts(backendConfig)
 	opts := modelOpts(backendConfig, appConfig, []model.Option{
 		model.WithBackendString(backendConfig.Backend),
 		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithThreads(uint32(backendConfig.Threads)),
+		model.WithThreads(uint32(threads)),
 		model.WithContext(appConfig.Context),
 		model.WithModel(backendConfig.Model),
 		model.WithLoadGRPCLoadModelOpts(gRPCOpts),
diff --git a/core/backend/llm.go b/core/backend/llm.go
index f16878c0f588..54e261889b8c 100644
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -28,7 +28,10 @@ type TokenUsage struct {
 
 func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model
-
+	threads := c.Threads
+	if threads == 0 && o.Threads != 0 {
+		threads = o.Threads
+	}
 	grpcOpts := gRPCModelOpts(c)
 
 	var inferenceModel grpc.Backend
@@ -36,7 +39,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
 
 	opts := modelOpts(c, o, []model.Option{
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
-		model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
+		model.WithThreads(uint32(threads)), // some models uses this to allocate threads during startup
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModel(modelFile),
 		model.WithContext(o.Context),

From 70880fc7d037a747cd1b001cac94b55dc2439ad4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 18:57:36 +0100
Subject: [PATCH 11/34] Skip conda install if intel

---
 backend/python/diffusers/Makefile            |  2 +-
 backend/python/diffusers/diffusers-intel.yml | 65 --------------------
 backend/python/diffusers/install.sh          | 16 +++--
 3 files changed, 11 insertions(+), 72 deletions(-)
 delete mode 100644 backend/python/diffusers/diffusers-intel.yml

diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile
index 1b1fbff84e45..df3ee4da9f04 100644
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@@ -5,7 +5,7 @@ export CONDA_ENV_PATH = "diffusers-rocm.yml"
 endif
 
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export CONDA_ENV_PATH = "diffusers-intel.yml"
+export SKIP=1
 endif
 
 .PHONY: diffusers
diff --git a/backend/python/diffusers/diffusers-intel.yml b/backend/python/diffusers/diffusers-intel.yml
deleted file mode 100644
index 910ebf42c64b..000000000000
--- a/backend/python/diffusers/diffusers-intel.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: diffusers
-channels:
-  - defaults
-  - intel
-  - conda-forge
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - intel-extension-for-pytorch=2.1.10
-  - pytorch=2.1.0 
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - accelerate>=0.11.0
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - compel==2.0.2
-      - diffusers==0.24.0
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - huggingface-hub>=0.19.4
-      - idna==3.4
-      - importlib-metadata==6.8.0
-      - jinja2==3.1.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - numpy==1.26.0
-      - omegaconf
-      - packaging==23.2
-      - pillow==10.0.1
-      - protobuf==4.24.4
-      - psutil==5.9.5
-      - pyparsing==3.1.1
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - safetensors==0.4.0
-      - sympy==1.12
-      - tqdm==4.66.1
-      - transformers>=4.25.1
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - urllib3==2.0.6
-      - zipp==3.17.0
-prefix: /opt/conda/envs/diffusers
diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index c8e8b4518b01..f4de8ae9bb6e 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -6,12 +6,16 @@ conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if conda_env_exists "diffusers" ; then
-    echo "Creating virtual environment..."
-    conda env create --name diffusers --file $1
-    echo "Virtual environment created."
-else 
-    echo "Virtual environment already exists."
+if [ $SKIP == 1 ]; then
+    echo "Skipping conda environment installation"
+else
+    if conda_env_exists "diffusers" ; then
+        echo "Creating virtual environment..."
+        conda env create --name diffusers --file $1
+        echo "Virtual environment created."
+    else 
+        echo "Virtual environment already exists."
+    fi
 fi
 
 if [ -d "/opt/intel" ]; then

From 3dfb665b8001b80f265cb1ee86831fecfe39d28f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 19:16:02 +0100
Subject: [PATCH 12/34] defaults to non-intel

---
 backend/python/diffusers/backend_diffusers.py      | 2 +-
 backend/python/transformers/transformers_server.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py
index e85684fa72c9..fbcfc2a19a14 100755
--- a/backend/python/diffusers/backend_diffusers.py
+++ b/backend/python/diffusers/backend_diffusers.py
@@ -29,7 +29,7 @@
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 COMPEL=os.environ.get("COMPEL", "1") == "1"
-XPU=os.environ.get("XPU", "1") == "1"
+XPU=os.environ.get("XPU", "0") == "1"
 CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
 SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
 CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 3ebccd233760..70474587ff0c 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -17,7 +17,7 @@
 import torch
 import torch.cuda
 
-XPU=os.environ.get("XPU", "1") == "1"
+XPU=os.environ.get("XPU", "0") == "1"
 if XPU:
     import intel_extension_for_pytorch as ipex
     from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM

From 373ddd2d3221296023c7989184de5363f8ad60f4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 19:18:40 +0100
Subject: [PATCH 13/34] ci: add to pipelines

---
 .github/workflows/image-pr.yml | 10 +++++++++-
 .github/workflows/image.yml    | 16 ++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 527a8479ee39..2e9a0afee511 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -59,6 +59,14 @@ jobs:
             image-type: 'extras'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            tag-suffix: 'sycl-f16-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
   core-image-build:
     uses: ./.github/workflows/image_build.yml
     with:
@@ -105,4 +113,4 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:22.04"
\ No newline at end of file
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index a9620baa5643..2a7fac27a377 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -120,6 +120,22 @@ jobs:
             image-type: 'extras'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            tag-suffix: '-sycl-f16-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f32'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            tag-suffix: '-sycl-f32-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
           # Core images
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'

From 8dc6669d17ac53c07294a67b628ded2f1fa43976 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 19:32:20 +0100
Subject: [PATCH 14/34] prepare compel only if enabled

---
 backend/python/diffusers/backend_diffusers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py
index fbcfc2a19a14..94237deaabd0 100755
--- a/backend/python/diffusers/backend_diffusers.py
+++ b/backend/python/diffusers/backend_diffusers.py
@@ -236,7 +236,7 @@ def LoadModel(self, request, context):
             if request.SchedulerType != "":
                 self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
                 
-            if not self.img2vid:
+            if COMPEL:
                 self.compel = Compel(
                     tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], 
                     text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],

From 2a2b4f6ba43b4c4395c517f6c114d975d2b55b2f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 19:36:52 +0100
Subject: [PATCH 15/34] Skip conda install if intel

---
 .../python/common-env/transformers/Makefile   |  4 ++++
 .../python/common-env/transformers/install.sh | 23 ++++++++++++++-----
 backend/python/diffusers/Makefile             |  2 +-
 backend/python/diffusers/install.sh           |  2 +-
 backend/python/transformers/Makefile          |  1 -
 backend/python/transformers/install.sh        | 18 ---------------
 6 files changed, 23 insertions(+), 27 deletions(-)
 delete mode 100755 backend/python/transformers/install.sh

diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile
index 1cd71ab177d3..b81b1ad7550e 100644
--- a/backend/python/common-env/transformers/Makefile
+++ b/backend/python/common-env/transformers/Makefile
@@ -8,6 +8,10 @@ ifeq ($(BUILD_TYPE), hipblas)
 	CONDA_ENV_PATH = "transformers-rocm.yml"
 endif
 
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
 .PHONY: transformers
 transformers:
 	@echo "Installing $(CONDA_ENV_PATH)..."
diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 42965bdbc68e..2f800b456ca8 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -6,12 +6,23 @@ conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if conda_env_exists "transformers" ; then
-    echo "Creating virtual environment..."
-    conda env create --name transformers --file $1
-    echo "Virtual environment created."
-else 
-    echo "Virtual environment already exists."
+if [ $SKIP_CONDA == 1 ]; then
+    echo "Skipping conda environment installation"
+else
+    if conda_env_exists "transformers" ; then
+        echo "Creating virtual environment..."
+        conda env create --name transformers --file $1
+        echo "Virtual environment created."
+    else 
+        echo "Virtual environment already exists."
+    fi
+fi
+
+if [ -d "/opt/intel" ]; then
+    # If the directory exists, we assume we are using the intel image
+    # (no conda env)
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    pip install intel-extension-for-transformers
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile
index df3ee4da9f04..a3901dafcaa1 100644
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@@ -5,7 +5,7 @@ export CONDA_ENV_PATH = "diffusers-rocm.yml"
 endif
 
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export SKIP=1
+export SKIP_CONDA=1
 endif
 
 .PHONY: diffusers
diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index f4de8ae9bb6e..a95ba29e8d7f 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -6,7 +6,7 @@ conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if [ $SKIP == 1 ]; then
+if [ $SKIP_CONDA == 1 ]; then
     echo "Skipping conda environment installation"
 else
     if conda_env_exists "diffusers" ; then
diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile
index b957b10e1f15..4eeb9ad54136 100644
--- a/backend/python/transformers/Makefile
+++ b/backend/python/transformers/Makefile
@@ -1,7 +1,6 @@
 .PHONY: transformers
 transformers:
 	$(MAKE) -C ../common-env/transformers
-	bash install.sh
 
 .PHONY: run
 run:
diff --git a/backend/python/transformers/install.sh b/backend/python/transformers/install.sh
deleted file mode 100755
index 6d930f2de267..000000000000
--- a/backend/python/transformers/install.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-set -ex
-
-if [ -d "/opt/intel" ]; then
-    # If the directory exists, we assume we are using the intel image
-    # (no conda env)
-    # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install intel-extension-for-transformers
-fi
-
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    export PATH=$PATH:/opt/conda/bin
-
-    # Activate conda environment
-    source activate diffusers
-
-    pip cache purge
-fi
\ No newline at end of file

From 8cabe0d8ebcfd721ffeedc07b00e9b208d58f326 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 21:34:45 +0100
Subject: [PATCH 16/34] fix cleanup

---
 backend/python/common-env/transformers/install.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 2f800b456ca8..99f7c1862d67 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -28,8 +28,10 @@ fi
 if [ "$PIP_CACHE_PURGE" = true ] ; then
     export PATH=$PATH:/opt/conda/bin
 
-    # Activate conda environment
-    source activate transformers
+    if [ ! -d "/opt/intel" ]; then
+        # Activate conda environment
+        source activate transformers
+    fi
 
     pip cache purge
 fi
\ No newline at end of file

From 8010e907dba728224671567d30f296ddd8142542 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 21:52:51 +0100
Subject: [PATCH 17/34] Disable compel by default

---
 backend/python/diffusers/backend_diffusers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py
index 94237deaabd0..ec2dea6050e0 100755
--- a/backend/python/diffusers/backend_diffusers.py
+++ b/backend/python/diffusers/backend_diffusers.py
@@ -28,7 +28,7 @@
 
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
-COMPEL=os.environ.get("COMPEL", "1") == "1"
+COMPEL=os.environ.get("COMPEL", "0") == "1"
 XPU=os.environ.get("XPU", "0") == "1"
 CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
 SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"

From aa5acfc1a42320bce715061c7a066460268df47a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 4 Mar 2024 22:51:35 +0100
Subject: [PATCH 18/34] Install torch 2.1.0 with Intel

---
 backend/python/diffusers/install.sh | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index a95ba29e8d7f..b14574ba3f33 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -21,7 +21,11 @@ fi
 if [ -d "/opt/intel" ]; then
     # If the directory exists, we assume we are using the intel image
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel-extension-for-pytorch==2.0.120+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl-aitools/
+    pip install torch==2.1.0a0 \
+                torchvision==0.16.0a0 \
+                torchaudio==2.1.0a0 \
+                intel-extension-for-pytorch==2.1.10+xpu \
+                --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
     pip install google-api-python-client \
                 grpcio \
                 grpcio-tools \
@@ -35,8 +39,12 @@ fi
 if [ "$PIP_CACHE_PURGE" = true ] ; then
     export PATH=$PATH:/opt/conda/bin
 
-    # Activate conda environment
-    source activate diffusers
+    if [ $SKIP_CONDA == 1 ]; then
+        echo "Not activating conda environment."
+    else
+        # Activate conda environment
+        source activate diffusers
+    fi
 
     pip cache purge
 fi
\ No newline at end of file

From 09c8822e778a4dd5511f76f0a341637732b654fb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 10:43:36 +0100
Subject: [PATCH 19/34] Skip conda on some setups

---
 backend/python/common-env/transformers/install.sh |  8 +++++---
 backend/python/diffusers/install.sh               | 10 +++++-----
 backend/python/exllama2/Makefile                  |  4 ++++
 backend/python/exllama2/install.sh                |  9 +++++++--
 backend/python/vall-e-x/Makefile                  |  4 ++++
 backend/python/vall-e-x/install.sh                |  9 ++++++---
 6 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 99f7c1862d67..0471595c7211 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -1,12 +1,14 @@
 #!/bin/bash
 set -ex
 
+SKIP_CONDA=${SKIP_CONDA:-0}
+
 # Check if environment exist
 conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if [ $SKIP_CONDA == 1 ]; then
+if [ $SKIP_CONDA -eq 1 ]; then
     echo "Skipping conda environment installation"
 else
     if conda_env_exists "transformers" ; then
@@ -19,7 +21,7 @@ else
 fi
 
 if [ -d "/opt/intel" ]; then
-    # If the directory exists, we assume we are using the intel image
+    # Intel GPU: If the directory exists, we assume we are using the intel image
     # (no conda env)
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
     pip install intel-extension-for-transformers
@@ -28,7 +30,7 @@ fi
 if [ "$PIP_CACHE_PURGE" = true ] ; then
     export PATH=$PATH:/opt/conda/bin
 
-    if [ ! -d "/opt/intel" ]; then
+    if [ $SKIP_CONDA -ne 1 ]; then
         # Activate conda environment
         source activate transformers
     fi
diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index b14574ba3f33..87a8a8afb9c7 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -1,12 +1,14 @@
 #!/bin/bash
 set -ex
 
+SKIP_CONDA=${SKIP_CONDA:-0}
+
 # Check if environment exist
 conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if [ $SKIP_CONDA == 1 ]; then
+if [ $SKIP_CONDA -eq 1 ]; then
     echo "Skipping conda environment installation"
 else
     if conda_env_exists "diffusers" ; then
@@ -19,7 +21,7 @@ else
 fi
 
 if [ -d "/opt/intel" ]; then
-    # If the directory exists, we assume we are using the intel image
+    # Intel GPU: If the directory exists, we assume we are using the intel image
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
     pip install torch==2.1.0a0 \
                 torchvision==0.16.0a0 \
@@ -39,9 +41,7 @@ fi
 if [ "$PIP_CACHE_PURGE" = true ] ; then
     export PATH=$PATH:/opt/conda/bin
 
-    if [ $SKIP_CONDA == 1 ]; then
-        echo "Not activating conda environment."
-    else
+    if [ $SKIP_CONDA -ne 1 ]; then
         # Activate conda environment
         source activate diffusers
     fi
diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile
index 2415815155d3..d2a133b60139 100644
--- a/backend/python/exllama2/Makefile
+++ b/backend/python/exllama2/Makefile
@@ -1,3 +1,7 @@
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
 .PHONY: exllama2
 exllama2:
 	$(MAKE) -C ../common-env/transformers
diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
index a6df3d37630b..58246b256e7f 100755
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -5,8 +5,13 @@ set -e
 export PATH=$PATH:/opt/conda/bin
 export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
 
-# Activate conda environment
-source activate transformers
+SKIP_CONDA=${SKIP_CONDA:-0}
+
+if [ $SKIP_CONDA -ne 1 ]; then
+    source activate transformers
+else
+    CONDA_PREFIX=$PWD
+fi
 
 echo $CONDA_PREFIX
 
diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile
index 4804f12ff521..8f34f559ee3a 100644
--- a/backend/python/vall-e-x/Makefile
+++ b/backend/python/vall-e-x/Makefile
@@ -1,3 +1,7 @@
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
 .PHONY: ttsvalle
 ttsvalle:
 	$(MAKE) -C ../common-env/transformers
diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh
index 26ccdccd0789..a3b65ed9baba 100644
--- a/backend/python/vall-e-x/install.sh
+++ b/backend/python/vall-e-x/install.sh
@@ -5,10 +5,13 @@
 export PATH=$PATH:/opt/conda/bin
 export SHA=3faaf8ccadb154d63b38070caf518ce9309ea0f4
 
-# Activate conda environment
-source activate transformers
+SKIP_CONDA=${SKIP_CONDA:-0}
 
-echo $CONDA_PREFIX
+if [ $SKIP_CONDA -ne 1 ]; then
+    source activate transformers
+else
+    CONDA_PREFIX=$PWD
+fi
 
 git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && popd
 

From 96c5fd488041de612c9ea76b220d17d6bcde9d9a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 13:06:14 +0100
Subject: [PATCH 20/34] Detect python

---
 backend/python/diffusers/run.sh    | 14 +++++++++++++-
 backend/python/transformers/run.sh | 14 +++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index 7ff71e3988a1..52a79072ef18 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -17,4 +17,16 @@ fi
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-python $DIR/backend_diffusers.py $@
+# Intel image: If there is no "python" command, try "python3"
+if ! [ -x "$(command -v python)" ]; then
+  if [ -x "$(command -v python3)" ]; then
+    export PYTHON=python3
+  else
+    echo 'Error: python is not installed.' >&2
+    exit 1
+  fi
+else
+  export PYTHON=python
+fi
+
+$PYTHON $DIR/backend_diffusers.py $@
diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh
index 409cb7c8a7aa..0d506dd177a8 100755
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@@ -17,4 +17,16 @@ fi
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-python $DIR/transformers_server.py $@
+# Intel image: If there is no "python" command, try "python3"
+if ! [ -x "$(command -v python)" ]; then
+  if [ -x "$(command -v python3)" ]; then
+    export PYTHON=python3
+  else
+    echo 'Error: python is not installed.' >&2
+    exit 1
+  fi
+else
+  export PYTHON=python
+fi
+
+$PYTHON $DIR/transformers_server.py $@

From 1bd4e66d0781bbe77e970424c38941eeaf6927cf Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 18:50:42 +0100
Subject: [PATCH 21/34] Quiet output

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 903acdb13055..7d59e760e5c8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,7 +23,7 @@ RUN apt-get update && \
     apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
 
 # Install Go
-RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz
+RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
 ENV PATH $PATH:/usr/local/go/bin
 
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/

From 364e36e6cae2e559d30d5e2a3665241eac0c9f60 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 18:52:26 +0100
Subject: [PATCH 22/34] Do not override system python with conda

---
 Dockerfile                                    | 26 +++++++++----------
 .../python/common-env/transformers/install.sh |  3 +--
 backend/python/diffusers/install.sh           |  3 +--
 backend/python/diffusers/run.sh               |  3 +--
 backend/python/exllama2/install.sh            |  2 +-
 backend/python/mamba/install.sh               |  3 ++-
 backend/python/petals/Makefile                |  2 +-
 backend/python/petals/install.sh              |  5 ++++
 backend/python/transformers/run.sh            |  2 +-
 backend/python/vall-e-x/install.sh            |  2 +-
 10 files changed, 27 insertions(+), 24 deletions(-)
 create mode 100644 backend/python/petals/install.sh

diff --git a/Dockerfile b/Dockerfile
index 7d59e760e5c8..04d6e21c4b90 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -168,43 +168,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/
 
 ## Duplicated from Makefile to avoid having a big layer that's hard to push
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
+	 make -C backend/python/autogptq \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
+	 make -C backend/python/bark \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
+	 make -C backend/python/diffusers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
+	 make -C backend/python/vllm \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \
+	 make -C backend/python/mamba \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
+	 make -C backend/python/sentencetransformers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
+	 make -C backend/python/transformers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
+	 make -C backend/python/vall-e-x \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
+	 make -C backend/python/exllama \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
+     make -C backend/python/exllama2 \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
+	 make -C backend/python/petals \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
+	 make -C backend/python/transformers-musicgen \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \
+	 make -C backend/python/coqui \
     ; fi
 
 # Make sure the models directory exists
diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 0471595c7211..805603746434 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -11,6 +11,7 @@ conda_env_exists(){
 if [ $SKIP_CONDA -eq 1 ]; then
     echo "Skipping conda environment installation"
 else
+    export PATH=$PATH:/opt/conda/bin
     if conda_env_exists "transformers" ; then
         echo "Creating virtual environment..."
         conda env create --name transformers --file $1
@@ -28,8 +29,6 @@ if [ -d "/opt/intel" ]; then
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
-    export PATH=$PATH:/opt/conda/bin
-
     if [ $SKIP_CONDA -ne 1 ]; then
         # Activate conda environment
         source activate transformers
diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 87a8a8afb9c7..4b0d263365ff 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -11,6 +11,7 @@ conda_env_exists(){
 if [ $SKIP_CONDA -eq 1 ]; then
     echo "Skipping conda environment installation"
 else
+    export PATH=$PATH:/opt/conda/bin
     if conda_env_exists "diffusers" ; then
         echo "Creating virtual environment..."
         conda env create --name diffusers --file $1
@@ -39,8 +40,6 @@ if [ -d "/opt/intel" ]; then
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
-    export PATH=$PATH:/opt/conda/bin
-
     if [ $SKIP_CONDA -ne 1 ]; then
         # Activate conda environment
         source activate diffusers
diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index 52a79072ef18..aeb3a3898079 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -3,13 +3,12 @@
 ##
 ## A bash script wrapper that runs the diffusers server with conda
 
-export PATH=$PATH:/opt/conda/bin
-
 if [ -d "/opt/intel" ]; then
     # Assumes we are using the Intel oneAPI container image
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
     export XPU=1
 else
+    export PATH=$PATH:/opt/conda/bin
     # Activate conda environment
     source activate diffusers
 fi
diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
index 58246b256e7f..bbffa837b542 100755
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -2,7 +2,6 @@
 set -e
 ##
 ## A bash script installs the required dependencies of VALL-E-X and prepares the environment
-export PATH=$PATH:/opt/conda/bin
 export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
 
 SKIP_CONDA=${SKIP_CONDA:-0}
@@ -10,6 +9,7 @@ SKIP_CONDA=${SKIP_CONDA:-0}
 if [ $SKIP_CONDA -ne 1 ]; then
     source activate transformers
 else
+    export PATH=$PATH:/opt/conda/bin
     CONDA_PREFIX=$PWD
 fi
 
diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh
index e56b83c2d31a..4ef26ece1c30 100755
--- a/backend/python/mamba/install.sh
+++ b/backend/python/mamba/install.sh
@@ -2,13 +2,14 @@
 set -e
 ##
 ## A bash script installs the required dependencies of VALL-E-X and prepares the environment
-export PATH=$PATH:/opt/conda/bin
 
 if [ "$BUILD_TYPE" != "cublas" ]; then
     echo "[mamba] Attention!!! nvcc is required - skipping installation"
     exit 0
 fi
 
+export PATH=$PATH:/opt/conda/bin
+
 # Activate conda environment
 source activate transformers
 
diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile
index 4bd07b112827..aa7778e15b29 100644
--- a/backend/python/petals/Makefile
+++ b/backend/python/petals/Makefile
@@ -1,7 +1,7 @@
 .PHONY: petals
 petals:
 	@echo "Creating virtual environment..."
-	@conda env create --name petals --file petals.yml
+	bash install.sh "petals.yml"
 	@echo "Virtual environment created."
 
 .PHONY: run
diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh
new file mode 100644
index 000000000000..97bcbb8af209
--- /dev/null
+++ b/backend/python/petals/install.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+export PATH=$PATH:/opt/conda/bin
+
+conda env create --name petals --file $1
\ No newline at end of file
diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh
index 0d506dd177a8..cde474dfa936 100755
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@@ -3,13 +3,13 @@
 ##
 ## A bash script wrapper that runs the transformers server with conda
 
-export PATH=$PATH:/opt/conda/bin
 
 if [ -d "/opt/intel" ]; then
     # Assumes we are using the Intel oneAPI container image
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
     export XPU=1
 else
+    export PATH=$PATH:/opt/conda/bin
     # Activate conda environment
     source activate transformers
 fi
diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh
index a3b65ed9baba..a9c4117e5cc0 100644
--- a/backend/python/vall-e-x/install.sh
+++ b/backend/python/vall-e-x/install.sh
@@ -2,7 +2,6 @@
 
 ##
 ## A bash script installs the required dependencies of VALL-E-X and prepares the environment
-export PATH=$PATH:/opt/conda/bin
 export SHA=3faaf8ccadb154d63b38070caf518ce9309ea0f4
 
 SKIP_CONDA=${SKIP_CONDA:-0}
@@ -10,6 +9,7 @@ SKIP_CONDA=${SKIP_CONDA:-0}
 if [ $SKIP_CONDA -ne 1 ]; then
     source activate transformers
 else
+    export PATH=$PATH:/opt/conda/bin
     CONDA_PREFIX=$PWD
 fi
 

From 5c9fa5dc0bdd820bbae3319abb3042f5b04cbb45 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 19:53:47 +0100
Subject: [PATCH 23/34] Prefer python3

---
 backend/python/diffusers/run.sh    | 10 +++++-----
 backend/python/transformers/run.sh | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index aeb3a3898079..d995884a1876 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -16,16 +16,16 @@ fi
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-# Intel image: If there is no "python" command, try "python3"
-if ! [ -x "$(command -v python)" ]; then
-  if [ -x "$(command -v python3)" ]; then
-    export PYTHON=python3
+# Intel image: If there is no "python3" command, try "python"
+if ! [ -x "$(command -v python3)" ]; then
+  if [ -x "$(command -v python)" ]; then
+    export PYTHON=python
   else
     echo 'Error: python is not installed.' >&2
     exit 1
   fi
 else
-  export PYTHON=python
+  export PYTHON=python3
 fi
 
 $PYTHON $DIR/backend_diffusers.py $@
diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh
index cde474dfa936..83ed1a3e9c2a 100755
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@@ -17,16 +17,16 @@ fi
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-# Intel image: If there is no "python" command, try "python3"
-if ! [ -x "$(command -v python)" ]; then
-  if [ -x "$(command -v python3)" ]; then
-    export PYTHON=python3
+# Intel image: If there is no "python3" command, try "python"
+if ! [ -x "$(command -v python3)" ]; then
+  if [ -x "$(command -v python)" ]; then
+    export PYTHON=python
   else
     echo 'Error: python is not installed.' >&2
     exit 1
   fi
 else
-  export PYTHON=python
+  export PYTHON=python3
 fi
 
 $PYTHON $DIR/transformers_server.py $@

From 235370888e5062f0c9d04a1fd27f4ab24a853e7c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 21:55:42 +0100
Subject: [PATCH 24/34] Fixups

---
 backend/python/common-env/transformers/install.sh | 2 +-
 backend/python/diffusers/install.sh               | 3 ++-
 backend/python/exllama2/install.sh                | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 805603746434..4df682abd404 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -29,7 +29,7 @@ if [ -d "/opt/intel" ]; then
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
-    if [ $SKIP_CONDA -ne 1 ]; then
+    if [ $SKIP_CONDA -eq 0 ]; then
         # Activate conda environment
         source activate transformers
     fi
diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 4b0d263365ff..d83ec0be0b3b 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -22,13 +22,14 @@ else
 fi
 
 if [ -d "/opt/intel" ]; then
-    # Intel GPU: If the directory exists, we assume we are using the intel image
+    # Intel GPU: If the directory exists, we assume we are using the Intel image
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
     pip install torch==2.1.0a0 \
                 torchvision==0.16.0a0 \
                 torchaudio==2.1.0a0 \
                 intel-extension-for-pytorch==2.1.10+xpu \
                 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+    
     pip install google-api-python-client \
                 grpcio \
                 grpcio-tools \
diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
index bbffa837b542..bcd2331d0b58 100755
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -6,10 +6,10 @@ export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
 
 SKIP_CONDA=${SKIP_CONDA:-0}
 
-if [ $SKIP_CONDA -ne 1 ]; then
+if [ $SKIP_CONDA -eq 0 ]; then
+    export PATH=$PATH:/opt/conda/bin
     source activate transformers
 else
-    export PATH=$PATH:/opt/conda/bin
     CONDA_PREFIX=$PWD
 fi
 

From a044a188f3610a0a4ff113107f08a5c11169b870 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 22:14:38 +0100
Subject: [PATCH 25/34] exllama2: do not install without conda (overrides
 pytorch version)

---
 backend/python/exllama2/install.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
index bcd2331d0b58..2c7427cedaf3 100755
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -10,7 +10,9 @@ if [ $SKIP_CONDA -eq 0 ]; then
     export PATH=$PATH:/opt/conda/bin
     source activate transformers
 else
-    CONDA_PREFIX=$PWD
+    # exllama2 is supported only with a conda environment
+    echo "[exllama2] Attention!!! conda is required - skipping installation"
+    exit 0
 fi
 
 echo $CONDA_PREFIX

From 293be13015ad23f13290efe9b7e3d1065b9290af Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 5 Mar 2024 22:18:36 +0100
Subject: [PATCH 26/34] exllama/exllama2: do not install if not using cuda

---
 backend/python/exllama/install.sh  | 5 +++++
 backend/python/exllama2/install.sh | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/backend/python/exllama/install.sh b/backend/python/exllama/install.sh
index 702bb1fbefb4..320e7f4dfac7 100755
--- a/backend/python/exllama/install.sh
+++ b/backend/python/exllama/install.sh
@@ -3,6 +3,11 @@ set -ex
 
 export PATH=$PATH:/opt/conda/bin
 
+if [ "$BUILD_TYPE" != "cublas" ]; then
+    echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation"
+    exit 0
+fi
+
 # Check if environment exist
 conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
index 2c7427cedaf3..d8e779dea8b6 100755
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -6,6 +6,11 @@ export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
 
 SKIP_CONDA=${SKIP_CONDA:-0}
 
+if [ "$BUILD_TYPE" != "cublas" ]; then
+    echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
+    exit 0
+fi
+
 if [ $SKIP_CONDA -eq 0 ]; then
     export PATH=$PATH:/opt/conda/bin
     source activate transformers

From 72d1cd257b2a805620ba667888fad5b1f84c383d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 6 Mar 2024 17:11:03 +0100
Subject: [PATCH 27/34] Add missing dataset dependency

---
 backend/python/common-env/transformers/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 4df682abd404..169a223197fd 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then
     # Intel GPU: If the directory exists, we assume we are using the intel image
     # (no conda env)
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install intel-extension-for-transformers
+    pip install intel-extension-for-transformers datasets
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then

From 0e32ba01824ae09b1660403fe416318e28afdb33 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 6 Mar 2024 21:58:47 +0100
Subject: [PATCH 28/34] Small fixups, symlink to python, add requirements

---
 Dockerfile                                         | 4 ++++
 backend/python/common-env/transformers/install.sh  | 2 +-
 backend/python/transformers/transformers_server.py | 6 +++++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 04d6e21c4b90..fd3659629395 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,6 +81,10 @@ RUN pip install --upgrade pip
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 RUN apt-get install -y espeak-ng espeak && apt-get clean
 
+RUN if [ ! -e /usr/bin/python ]; then \
+	  ln -s /usr/bin/python3 /usr/bin/python \
+    ; fi
+
 ###################################
 ###################################
 
diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index 169a223197fd..dc0e66b93f06 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then
     # Intel GPU: If the directory exists, we assume we are using the intel image
     # (no conda env)
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install intel-extension-for-transformers datasets
+    pip install intel-extension-for-transformers datasets sentencepiece tiktoken
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then
diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 70474587ff0c..e3c9ad0d9da9 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -77,7 +77,11 @@ def LoadModel(self, request, context):
         model_name = request.Model
         try:
             if request.Type == "AutoModelForCausalLM":
-                self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
+                if XPU:
+                    self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,
+                                              device_map="xpu", load_in_4bit=True)
+                else:
+                    self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
             else:
                 self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
 

From dc8ae9244f42286840c8ae45ea006e12e4acd5f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 6 Mar 2024 22:11:15 +0100
Subject: [PATCH 29/34] Add neural_speed to the deps

---
 backend/python/common-env/transformers/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
index dc0e66b93f06..e268fcc88370 100644
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then
     # Intel GPU: If the directory exists, we assume we are using the intel image
     # (no conda env)
     # https://github.com/intel/intel-extension-for-pytorch/issues/538
-    pip install intel-extension-for-transformers datasets sentencepiece tiktoken
+    pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
 fi
 
 if [ "$PIP_CACHE_PURGE" = true ] ; then

From 7f7cd68082bc7b5119ec6179912c59444ae3fe6e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 6 Mar 2024 22:13:49 +0100
Subject: [PATCH 30/34] correctly handle model offloading

---
 backend/python/transformers/transformers_server.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index e3c9ad0d9da9..0d6932b59920 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -90,11 +90,9 @@ def LoadModel(self, request, context):
             self.XPU = False
 
             if XPU:
+                self.XPU = True
                 try:
-                    print("Loading model", model_name, "to XPU.", file=sys.stderr)
-                    device_map = "xpu"
-                    self.model = self.model.to(device_map)
-                    self.XPU = True
+                    print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
                     self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device=device_map)
                 except Exception as err:
                     print("Not using XPU:", err, file=sys.stderr)

From f0bcfba1fbd6da5e90d3a4e57f0bdaf737ed21d6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 6 Mar 2024 23:27:00 +0100
Subject: [PATCH 31/34] fix: device_map == xpu

---
 backend/python/transformers/transformers_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index 0d6932b59920..d83f2ad30976 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -93,7 +93,7 @@ def LoadModel(self, request, context):
                 self.XPU = True
                 try:
                     print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
-                    self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device=device_map)
+                    self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device="xpu")
                 except Exception as err:
                     print("Not using XPU:", err, file=sys.stderr)
 

From 2ee4fd2d56c18530e0cd394285569eba682eaa1c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 7 Mar 2024 01:03:17 +0100
Subject: [PATCH 32/34] go back at calling python, fixed at dockerfile level

---
 backend/python/diffusers/run.sh    | 14 +-------------
 backend/python/transformers/run.sh | 14 +-------------
 2 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
index d995884a1876..69b25d507a62 100755
--- a/backend/python/diffusers/run.sh
+++ b/backend/python/diffusers/run.sh
@@ -16,16 +16,4 @@ fi
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-# Intel image: If there is no "python3" command, try "python"
-if ! [ -x "$(command -v python3)" ]; then
-  if [ -x "$(command -v python)" ]; then
-    export PYTHON=python
-  else
-    echo 'Error: python is not installed.' >&2
-    exit 1
-  fi
-else
-  export PYTHON=python3
-fi
-
-$PYTHON $DIR/backend_diffusers.py $@
+python $DIR/backend_diffusers.py $@
diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh
index 83ed1a3e9c2a..d09c1f5c0b47 100755
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@@ -17,16 +17,4 @@ fi
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-# Intel image: If there is no "python3" command, try "python"
-if ! [ -x "$(command -v python3)" ]; then
-  if [ -x "$(command -v python)" ]; then
-    export PYTHON=python
-  else
-    echo 'Error: python is not installed.' >&2
-    exit 1
-  fi
-else
-  export PYTHON=python3
-fi
-
-$PYTHON $DIR/transformers_server.py $@
+python $DIR/transformers_server.py $@

From 9ff1cf87935e84d8d874bfb87cd0df64fdff0331 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 7 Mar 2024 01:08:12 +0100
Subject: [PATCH 33/34] Exllama2 restricted to only nvidia gpus

---
 backend/python/common-env/transformers/Makefile |  3 +++
 backend/python/diffusers/Makefile               |  3 +++
 backend/python/exllama2/Makefile                |  4 ----
 backend/python/exllama2/install.sh              | 12 ++----------
 4 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile
index b81b1ad7550e..797af0832ef2 100644
--- a/backend/python/common-env/transformers/Makefile
+++ b/backend/python/common-env/transformers/Makefile
@@ -8,6 +8,9 @@ ifeq ($(BUILD_TYPE), hipblas)
 	CONDA_ENV_PATH = "transformers-rocm.yml"
 endif
 
+# Intel GPU are supposed to have dependencies installed in the main python
+# environment, so we skip conda installation for SYCL builds.
+# https://github.com/intel/intel-extension-for-pytorch/issues/538
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 export SKIP_CONDA=1
 endif
diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile
index a3901dafcaa1..40e1d1a7e888 100644
--- a/backend/python/diffusers/Makefile
+++ b/backend/python/diffusers/Makefile
@@ -4,6 +4,9 @@ ifeq ($(BUILD_TYPE), hipblas)
 export CONDA_ENV_PATH = "diffusers-rocm.yml"
 endif
 
+# Intel GPU are supposed to have dependencies installed in the main python
+# environment, so we skip conda installation for SYCL builds.
+# https://github.com/intel/intel-extension-for-pytorch/issues/538
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 export SKIP_CONDA=1
 endif
diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile
index d2a133b60139..2415815155d3 100644
--- a/backend/python/exllama2/Makefile
+++ b/backend/python/exllama2/Makefile
@@ -1,7 +1,3 @@
-ifneq (,$(findstring sycl,$(BUILD_TYPE)))
-export SKIP_CONDA=1
-endif
-
 .PHONY: exllama2
 exllama2:
 	$(MAKE) -C ../common-env/transformers
diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
index d8e779dea8b6..858685b07eec 100755
--- a/backend/python/exllama2/install.sh
+++ b/backend/python/exllama2/install.sh
@@ -4,21 +4,13 @@ set -e
 ## A bash script installs the required dependencies of VALL-E-X and prepares the environment
 export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
 
-SKIP_CONDA=${SKIP_CONDA:-0}
-
 if [ "$BUILD_TYPE" != "cublas" ]; then
     echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
     exit 0
 fi
 
-if [ $SKIP_CONDA -eq 0 ]; then
-    export PATH=$PATH:/opt/conda/bin
-    source activate transformers
-else
-    # exllama2 is supported only with a conda environment
-    echo "[exllama2] Attention!!! conda is required - skipping installation"
-    exit 0
-fi
+export PATH=$PATH:/opt/conda/bin
+source activate transformers
 
 echo $CONDA_PREFIX
 

From 95fb025dcef5db9d250d7ad2ec12e3c130a1b5d3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 7 Mar 2024 09:31:54 +0100
Subject: [PATCH 34/34] Tokenizer to xpu

---
 backend/python/transformers/transformers_server.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py
index d83f2ad30976..41112c44f6e5 100755
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -93,7 +93,7 @@ def LoadModel(self, request, context):
                 self.XPU = True
                 try:
                     print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
-                    self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device="xpu")
+                    self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
                 except Exception as err:
                     print("Not using XPU:", err, file=sys.stderr)
 
@@ -160,6 +160,8 @@ def Predict(self, request, context):
         inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids
         if self.CUDA:
             inputs = inputs.to("cuda")
+        if XPU:
+            inputs = inputs.to("xpu")
 
         outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP)