From 50e23dd1180b9950bd896e904e20119b33045434 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 23 Feb 2024 18:44:51 +0100 Subject: [PATCH 01/34] feat(intel): add diffusers support --- backend/python/diffusers/Makefile | 4 ++ backend/python/diffusers/diffusers-intel.yml | 65 ++++++++++++++++++++ backend/python/diffusers/run.sh | 5 ++ 3 files changed, 74 insertions(+) create mode 100644 backend/python/diffusers/diffusers-intel.yml diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index 70a62b60daa9..1b1fbff84e45 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -4,6 +4,10 @@ ifeq ($(BUILD_TYPE), hipblas) export CONDA_ENV_PATH = "diffusers-rocm.yml" endif +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export CONDA_ENV_PATH = "diffusers-intel.yml" +endif + .PHONY: diffusers diffusers: @echo "Installing $(CONDA_ENV_PATH)..." diff --git a/backend/python/diffusers/diffusers-intel.yml b/backend/python/diffusers/diffusers-intel.yml new file mode 100644 index 000000000000..910ebf42c64b --- /dev/null +++ b/backend/python/diffusers/diffusers-intel.yml @@ -0,0 +1,65 @@ +name: diffusers +channels: + - defaults + - intel + - conda-forge +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py311h06a4308_0 + - python=3.11.5=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py311h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py311h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - intel-extension-for-pytorch=2.1.10 + - pytorch=2.1.0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - accelerate>=0.11.0 + - certifi==2023.7.22 + - charset-normalizer==3.3.0 + - compel==2.0.2 + - diffusers==0.24.0 + - filelock==3.12.4 + - fsspec==2023.9.2 + - grpcio==1.59.0 + - huggingface-hub>=0.19.4 + - idna==3.4 + - importlib-metadata==6.8.0 + - jinja2==3.1.2 + - markupsafe==2.1.3 + - mpmath==1.3.0 + - networkx==3.1 + - numpy==1.26.0 + - omegaconf + - packaging==23.2 + - pillow==10.0.1 + - protobuf==4.24.4 + - psutil==5.9.5 + - pyparsing==3.1.1 + - pyyaml==6.0.1 + - regex==2023.10.3 + - requests==2.31.0 + - safetensors==0.4.0 + - sympy==1.12 + - tqdm==4.66.1 + - transformers>=4.25.1 + - triton==2.1.0 + - typing-extensions==4.8.0 + - urllib3==2.0.6 + - zipp==3.17.0 +prefix: /opt/conda/envs/diffusers diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index 8e3e1bbfbfdd..8080230bd9f4 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -8,6 +8,11 @@ export PATH=$PATH:/opt/conda/bin # Activate conda environment source activate diffusers +if [ -d "/opt/intel" ]; then + source /opt/intel/oneapi/compiler/latest/env/vars.sh + source /opt/intel/oneapi/mkl/latest/env/vars.sh +fi + # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" From a544b8f67d714f976c2718a4f439b910514a113e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 24 Feb 2024 00:41:08 +0100 Subject: [PATCH 02/34] try to consume upstream container image --- Makefile | 7 +++++++ backend/python/diffusers/backend_diffusers.py | 7 +++++++ backend/python/diffusers/run.sh | 12 +++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index b24ed7972151..1c5cf1c131b2 100644 --- a/Makefile +++ b/Makefile @@ -557,3 +557,10 @@ docker-image-intel: --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . + +docker-image-intel-xpu: + docker build \ + --build-arg BASE_IMAGE=intel/intel-extension-for-pytorch:2.1.10-xpu \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS="none" \ + --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . \ No newline at end of file diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py index 6780cae626a6..eef12011f4fa 100755 --- a/backend/python/diffusers/backend_diffusers.py +++ b/backend/python/diffusers/backend_diffusers.py @@ -29,6 +29,7 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24 COMPEL=os.environ.get("COMPEL", "1") == "1" +XPU=os.environ.get("XPU", "1") == "1" CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1" SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1" CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8") @@ -36,6 +37,10 @@ DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1" FRAMES=os.environ.get("FRAMES", "64") +if XPU: + import intel_extension_for_pytorch as ipex + print(ipex.xpu.get_device_name(0)) + # If MAX_WORKERS are specified in the environment use it, otherwise default to 1 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) @@ -247,6 +252,8 @@ def LoadModel(self, request, context): self.pipe.to('cuda') if self.controlnet: self.controlnet.to('cuda') + if XPU: + self.pipe = self.pipe.to("xpu") # Assume directory from request.ModelFile. # Only if request.LoraAdapter it's not an absolute path if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter: diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index 8080230bd9f4..c4a0b6c79b93 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -5,12 +5,14 @@ export PATH=$PATH:/opt/conda/bin -# Activate conda environment -source activate diffusers - if [ -d "/opt/intel" ]; then - source /opt/intel/oneapi/compiler/latest/env/vars.sh - source /opt/intel/oneapi/mkl/latest/env/vars.sh + # Assumes we are using the Intel oneAPI container image + #source /opt/intel/oneapi/compiler/latest/env/vars.sh + #source /opt/intel/oneapi/mkl/latest/env/vars.sh + export XPU=1 +else + # Activate conda environment + source activate diffusers fi # get the directory where the bash script is located From 67865aa1bc2e6e0df58736125a091087cc2c2f78 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 24 Feb 2024 00:43:53 +0100 Subject: [PATCH 03/34] Debug --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index a04a866ec7d1..903acdb13055 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,8 @@ ARG BASE_IMAGE=ubuntu:22.04 # extras or core FROM ${BASE_IMAGE} as requirements-core +USER root + ARG GO_VERSION=1.21.7 ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 From 0c23fbafbc674749eb3dcdeb5bb58f2aa0aeedf8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 24 Feb 2024 23:51:23 +0100 Subject: [PATCH 04/34] Manually install deps --- backend/python/diffusers/install.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 0429826e3f4d..0ea8f08fee21 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -14,6 +14,16 @@ else echo "Virtual environment already exists." fi +if [ -d "/opt/intel" ]; then + pip install --upgrade google-api-python-client + pip install --upgrade grpcio + pip install --upgrade grpcio-tools + pip install diffusers==0.24.0 + pip install transformers>=4.25.1 + pip install accelerate + pip install compel==2.0.2 +fi + if [ "$PIP_CACHE_PURGE" = true ] ; then export PATH=$PATH:/opt/conda/bin From 7115277a3f1a5faf120e10379ea831e790ff1941 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 24 Feb 2024 23:51:37 +0100 Subject: [PATCH 05/34] Map transformers/hf cache dir to modelpath if not specified --- pkg/model/initializers.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index fce44fe15469..1e2af8f9d287 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -69,6 +69,13 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string return fmt.Sprintf("127.0.0.1:%d", port), nil } + // If no specific model path is set for transformers/HF, set it to the model path + for _, env := range []string{"HF_HOME", "TRANSFORMERS_CACHE", "HUGGINGFACE_HUB_CACHE"} { + if os.Getenv(env) == "" { + os.Setenv(env, ml.ModelPath) + } + } + // Check if the backend is provided as external if uri, ok := o.externalBackends[backend]; ok { log.Debug().Msgf("Loading external backend: %s", uri) From e5fdcbe6145e80e32081adf075a2a1415e24b17f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 18:37:04 +0100 Subject: [PATCH 06/34] fix(compel): update initialization, pass by all gRPC options --- backend/python/diffusers/backend_diffusers.py | 14 ++++++++++---- core/backend/image.go | 18 +++--------------- core/backend/options.go | 18 +++++++++++++----- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py index eef12011f4fa..e85684fa72c9 100755 --- a/backend/python/diffusers/backend_diffusers.py +++ b/backend/python/diffusers/backend_diffusers.py @@ -21,7 +21,7 @@ from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.utils import load_image,export_to_video -from compel import Compel +from compel import Compel, ReturnedEmbeddingsType from transformers import CLIPTextModel from safetensors.torch import load_file @@ -237,7 +237,12 @@ def LoadModel(self, request, context): self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config) if not self.img2vid: - self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder) + self.compel = Compel( + tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], + text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], + returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, + requires_pooled=[False, True] + ) if request.ControlNet: @@ -393,8 +398,9 @@ def GenerateImage(self, request, context): image = {} if COMPEL: - conditioning = self.compel.build_conditioning_tensor(prompt) - kwargs["prompt_embeds"]= conditioning + conditioning, pooled = self.compel.build_conditioning_tensor(prompt) + kwargs["prompt_embeds"] = conditioning + kwargs["pooled_prompt_embeds"] = pooled # pass the kwargs dictionary to the self.pipe method image = self.pipe( guidance_scale=self.cfg_scale, diff --git a/core/backend/image.go b/core/backend/image.go index 60db48f96ba1..f7209f9d2a7c 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -9,26 +9,14 @@ import ( func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { + gRPCOpts := gRPCModelOpts(backendConfig) opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), - model.WithThreads(uint32(backendConfig.Threads)), + model.WithThreads(uint32(appConfig.Threads)), model.WithContext(appConfig.Context), model.WithModel(backendConfig.Model), - model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{ - CUDA: backendConfig.CUDA || backendConfig.Diffusers.CUDA, - SchedulerType: backendConfig.Diffusers.SchedulerType, - PipelineType: backendConfig.Diffusers.PipelineType, - CFGScale: backendConfig.Diffusers.CFGScale, - LoraAdapter: backendConfig.LoraAdapter, - LoraScale: backendConfig.LoraScale, - LoraBase: backendConfig.LoraBase, - IMG2IMG: backendConfig.Diffusers.IMG2IMG, - CLIPModel: backendConfig.Diffusers.ClipModel, - CLIPSubfolder: backendConfig.Diffusers.ClipSubFolder, - CLIPSkip: int32(backendConfig.Diffusers.ClipSkip), - ControlNet: backendConfig.Diffusers.ControlNet, - }), + model.WithLoadGRPCLoadModelOpts(gRPCOpts), }) inferenceModel, err := loader.BackendLoader( diff --git a/core/backend/options.go b/core/backend/options.go index d2bbb2b88e6d..3af6f6797a67 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -40,11 +40,23 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { } return &pb.ModelOptions{ + CUDA: c.CUDA || c.Diffusers.CUDA, + SchedulerType: c.Diffusers.SchedulerType, + PipelineType: c.Diffusers.PipelineType, + CFGScale: c.Diffusers.CFGScale, + LoraAdapter: c.LoraAdapter, + LoraScale: c.LoraScale, + F16Memory: c.F16, + LoraBase: c.LoraBase, + IMG2IMG: c.Diffusers.IMG2IMG, + CLIPModel: c.Diffusers.ClipModel, + CLIPSubfolder: c.Diffusers.ClipSubFolder, + CLIPSkip: int32(c.Diffusers.ClipSkip), + ControlNet: c.Diffusers.ControlNet, ContextSize: int32(c.ContextSize), Seed: int32(c.Seed), NBatch: int32(b), NoMulMatQ: c.NoMulMatQ, - CUDA: c.CUDA, // diffusers, transformers DraftModel: c.DraftModel, AudioPath: c.VallE.AudioPath, Quantization: c.Quantization, @@ -58,12 +70,8 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { YarnAttnFactor: c.YarnAttnFactor, YarnBetaFast: c.YarnBetaFast, YarnBetaSlow: c.YarnBetaSlow, - LoraAdapter: c.LoraAdapter, - LoraBase: c.LoraBase, - LoraScale: c.LoraScale, NGQA: c.NGQA, RMSNormEps: c.RMSNormEps, - F16Memory: c.F16, MLock: c.MMlock, RopeFreqBase: c.RopeFreqBase, RopeScaling: c.RopeScaling, From 465b029e848984032f9eaf0aa560782a8b0e0c28 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 18:44:18 +0100 Subject: [PATCH 07/34] fix: add dependencies, implement transformers for xpu --- backend/python/diffusers/install.sh | 17 ++++++++------- backend/python/diffusers/run.sh | 3 +-- backend/python/transformers/Makefile | 1 + backend/python/transformers/install.sh | 18 ++++++++++++++++ backend/python/transformers/run.sh | 10 +++++++-- .../transformers/transformers_server.py | 21 ++++++++++++++++++- 6 files changed, 58 insertions(+), 12 deletions(-) create mode 100755 backend/python/transformers/install.sh diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 0ea8f08fee21..4f095912a131 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -15,13 +15,16 @@ else fi if [ -d "/opt/intel" ]; then - pip install --upgrade google-api-python-client - pip install --upgrade grpcio - pip install --upgrade grpcio-tools - pip install diffusers==0.24.0 - pip install transformers>=4.25.1 - pip install accelerate - pip install compel==2.0.2 + # If the directory exists, we assume we are using the intel image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel-extension-for-pytorch==2.0.120+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl-aitools/ + pip install google-api-python-client \ + grpcio \ + grpcio-tools \ + diffusers==0.24.0 \ + transformers>=4.25.1 \ + accelerate \ + compel==2.0.2 fi if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index c4a0b6c79b93..7ff71e3988a1 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -7,8 +7,7 @@ export PATH=$PATH:/opt/conda/bin if [ -d "/opt/intel" ]; then # Assumes we are using the Intel oneAPI container image - #source /opt/intel/oneapi/compiler/latest/env/vars.sh - #source /opt/intel/oneapi/mkl/latest/env/vars.sh + # https://github.com/intel/intel-extension-for-pytorch/issues/538 export XPU=1 else # Activate conda environment diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile index 4eeb9ad54136..b957b10e1f15 100644 --- a/backend/python/transformers/Makefile +++ b/backend/python/transformers/Makefile @@ -1,6 +1,7 @@ .PHONY: transformers transformers: $(MAKE) -C ../common-env/transformers + bash install.sh .PHONY: run run: diff --git a/backend/python/transformers/install.sh b/backend/python/transformers/install.sh new file mode 100755 index 000000000000..6d930f2de267 --- /dev/null +++ b/backend/python/transformers/install.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -ex + +if [ -d "/opt/intel" ]; then + # If the directory exists, we assume we are using the intel image + # (no conda env) + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + pip install intel-extension-for-transformers +fi + +if [ "$PIP_CACHE_PURGE" = true ] ; then + export PATH=$PATH:/opt/conda/bin + + # Activate conda environment + source activate diffusers + + pip cache purge +fi \ No newline at end of file diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index e6a42b7e1b6d..409cb7c8a7aa 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -5,8 +5,14 @@ export PATH=$PATH:/opt/conda/bin -# Activate conda environment -source activate transformers +if [ -d "/opt/intel" ]; then + # Assumes we are using the Intel oneAPI container image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + export XPU=1 +else + # Activate conda environment + source activate transformers +fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index fe0b815a2226..3ebccd233760 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -16,7 +16,15 @@ import grpc import torch import torch.cuda -from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed + +XPU=os.environ.get("XPU", "1") == "1" +if XPU: + import intel_extension_for_pytorch as ipex + from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM + from transformers import AutoTokenizer, AutoModel, set_seed +else: + from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed + _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -75,6 +83,17 @@ def LoadModel(self, request, context): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.CUDA = False + self.XPU = False + + if XPU: + try: + print("Loading model", model_name, "to XPU.", file=sys.stderr) + device_map = "xpu" + self.model = self.model.to(device_map) + self.XPU = True + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device=device_map) + except Exception as err: + print("Not using XPU:", err, file=sys.stderr) if request.CUDA or torch.cuda.is_available(): try: From 94789f527cb1e3bdf9dc3b39e1fda0beebac1220 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 18:44:43 +0100 Subject: [PATCH 08/34] base it from the oneapi image --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1c5cf1c131b2..324aedfd8d2a 100644 --- a/Makefile +++ b/Makefile @@ -560,7 +560,7 @@ docker-image-intel: docker-image-intel-xpu: docker build \ - --build-arg BASE_IMAGE=intel/intel-extension-for-pytorch:2.1.10-xpu \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="none" \ --build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) . \ No newline at end of file From 9aacbc77aa56bf46b133534ada550fd632df021c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 18:46:06 +0100 Subject: [PATCH 09/34] Add pillow --- backend/python/diffusers/install.sh | 3 ++- core/backend/image.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 4f095912a131..c8e8b4518b01 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -24,7 +24,8 @@ if [ -d "/opt/intel" ]; then diffusers==0.24.0 \ transformers>=4.25.1 \ accelerate \ - compel==2.0.2 + compel==2.0.2 \ + Pillow fi if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/core/backend/image.go b/core/backend/image.go index f7209f9d2a7c..478c32fdb6d6 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -13,7 +13,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), - model.WithThreads(uint32(appConfig.Threads)), + model.WithThreads(uint32(backendConfig.Threads)), model.WithContext(appConfig.Context), model.WithModel(backendConfig.Model), model.WithLoadGRPCLoadModelOpts(gRPCOpts), From 3878eda59cb7ad5435b947cf045ad4bd51135ea8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 18:54:44 +0100 Subject: [PATCH 10/34] set threads if specified when launching the API --- core/backend/image.go | 7 +++++-- core/backend/llm.go | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/core/backend/image.go b/core/backend/image.go index 478c32fdb6d6..79b8d4ba15c4 100644 --- a/core/backend/image.go +++ b/core/backend/image.go @@ -8,12 +8,15 @@ import ( ) func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) { - + threads := backendConfig.Threads + if threads == 0 && appConfig.Threads != 0 { + threads = appConfig.Threads + } gRPCOpts := gRPCModelOpts(backendConfig) opts := modelOpts(backendConfig, appConfig, []model.Option{ model.WithBackendString(backendConfig.Backend), model.WithAssetDir(appConfig.AssetsDestination), - model.WithThreads(uint32(backendConfig.Threads)), + model.WithThreads(uint32(threads)), model.WithContext(appConfig.Context), model.WithModel(backendConfig.Model), model.WithLoadGRPCLoadModelOpts(gRPCOpts), diff --git a/core/backend/llm.go b/core/backend/llm.go index f16878c0f588..54e261889b8c 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -28,7 +28,10 @@ type TokenUsage struct { func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { modelFile := c.Model - + threads := c.Threads + if threads == 0 && o.Threads != 0 { + threads = o.Threads + } grpcOpts := gRPCModelOpts(c) var inferenceModel grpc.Backend @@ -36,7 +39,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode opts := modelOpts(c, o, []model.Option{ model.WithLoadGRPCLoadModelOpts(grpcOpts), - model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup + model.WithThreads(uint32(threads)), // some models uses this to allocate threads during startup model.WithAssetDir(o.AssetsDestination), model.WithModel(modelFile), model.WithContext(o.Context), From 70880fc7d037a747cd1b001cac94b55dc2439ad4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 18:57:36 +0100 Subject: [PATCH 11/34] Skip conda install if intel --- backend/python/diffusers/Makefile | 2 +- backend/python/diffusers/diffusers-intel.yml | 65 -------------------- backend/python/diffusers/install.sh | 16 +++-- 3 files changed, 11 insertions(+), 72 deletions(-) delete mode 100644 backend/python/diffusers/diffusers-intel.yml diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index 1b1fbff84e45..df3ee4da9f04 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -5,7 +5,7 @@ export CONDA_ENV_PATH = "diffusers-rocm.yml" endif ifneq (,$(findstring sycl,$(BUILD_TYPE))) -export CONDA_ENV_PATH = "diffusers-intel.yml" +export SKIP=1 endif .PHONY: diffusers diff --git a/backend/python/diffusers/diffusers-intel.yml b/backend/python/diffusers/diffusers-intel.yml deleted file mode 100644 index 910ebf42c64b..000000000000 --- a/backend/python/diffusers/diffusers-intel.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: diffusers -channels: - - defaults - - intel - - conda-forge -dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - bzip2=1.0.8=h7b6447c_0 - - ca-certificates=2023.08.22=h06a4308_0 - - ld_impl_linux-64=2.38=h1181459_1 - - libffi=3.4.4=h6a678d5_0 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libstdcxx-ng=11.2.0=h1234567_1 - - libuuid=1.41.5=h5eee18b_0 - - ncurses=6.4=h6a678d5_0 - - openssl=3.0.11=h7f8727e_2 - - pip=23.2.1=py311h06a4308_0 - - python=3.11.5=h955ad1f_0 - - readline=8.2=h5eee18b_0 - - setuptools=68.0.0=py311h06a4308_0 - - sqlite=3.41.2=h5eee18b_0 - - tk=8.6.12=h1ccaba5_0 - - tzdata=2023c=h04d1e81_0 - - wheel=0.41.2=py311h06a4308_0 - - xz=5.4.2=h5eee18b_0 - - intel-extension-for-pytorch=2.1.10 - - pytorch=2.1.0 - - zlib=1.2.13=h5eee18b_0 - - pip: - - accelerate>=0.11.0 - - certifi==2023.7.22 - - charset-normalizer==3.3.0 - - compel==2.0.2 - - diffusers==0.24.0 - - filelock==3.12.4 - - fsspec==2023.9.2 - - grpcio==1.59.0 - - huggingface-hub>=0.19.4 - - idna==3.4 - - importlib-metadata==6.8.0 - - jinja2==3.1.2 - - markupsafe==2.1.3 - - mpmath==1.3.0 - - networkx==3.1 - - numpy==1.26.0 - - omegaconf - - packaging==23.2 - - pillow==10.0.1 - - protobuf==4.24.4 - - psutil==5.9.5 - - pyparsing==3.1.1 - - pyyaml==6.0.1 - - regex==2023.10.3 - - requests==2.31.0 - - safetensors==0.4.0 - - sympy==1.12 - - tqdm==4.66.1 - - transformers>=4.25.1 - - triton==2.1.0 - - typing-extensions==4.8.0 - - urllib3==2.0.6 - - zipp==3.17.0 -prefix: /opt/conda/envs/diffusers diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index c8e8b4518b01..f4de8ae9bb6e 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -6,12 +6,16 @@ conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if conda_env_exists "diffusers" ; then - echo "Creating virtual environment..." - conda env create --name diffusers --file $1 - echo "Virtual environment created." -else - echo "Virtual environment already exists." +if [ $SKIP == 1 ]; then + echo "Skipping conda environment installation" +else + if conda_env_exists "diffusers" ; then + echo "Creating virtual environment..." + conda env create --name diffusers --file $1 + echo "Virtual environment created." + else + echo "Virtual environment already exists." + fi fi if [ -d "/opt/intel" ]; then From 3dfb665b8001b80f265cb1ee86831fecfe39d28f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 19:16:02 +0100 Subject: [PATCH 12/34] defaults to non-intel --- backend/python/diffusers/backend_diffusers.py | 2 +- backend/python/transformers/transformers_server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py index e85684fa72c9..fbcfc2a19a14 100755 --- a/backend/python/diffusers/backend_diffusers.py +++ b/backend/python/diffusers/backend_diffusers.py @@ -29,7 +29,7 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24 COMPEL=os.environ.get("COMPEL", "1") == "1" -XPU=os.environ.get("XPU", "1") == "1" +XPU=os.environ.get("XPU", "0") == "1" CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1" SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1" CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8") diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 3ebccd233760..70474587ff0c 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -17,7 +17,7 @@ import torch import torch.cuda -XPU=os.environ.get("XPU", "1") == "1" +XPU=os.environ.get("XPU", "0") == "1" if XPU: import intel_extension_for_pytorch as ipex from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM From 373ddd2d3221296023c7989184de5363f8ad60f4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 19:18:40 +0100 Subject: [PATCH 13/34] ci: add to pipelines --- .github/workflows/image-pr.yml | 10 +++++++++- .github/workflows/image.yml | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 527a8479ee39..2e9a0afee511 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -59,6 +59,14 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + tag-suffix: 'sycl-f16-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -105,4 +113,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:22.04" \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index a9620baa5643..2a7fac27a377 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -120,6 +120,22 @@ jobs: image-type: 'extras' base-image: "rocm/dev-ubuntu-22.04:6.0-complete" runs-on: 'arc-runner-set' + - build-type: 'sycl_f16' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + tag-suffix: '-sycl-f16-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' + - build-type: 'sycl_f32' + platforms: 'linux/amd64' + tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" + tag-suffix: '-sycl-f32-ffmpeg' + ffmpeg: 'true' + image-type: 'extras' + runs-on: 'arc-runner-set' # Core images - build-type: 'sycl_f16' platforms: 'linux/amd64' From 8dc6669d17ac53c07294a67b628ded2f1fa43976 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 19:32:20 +0100 Subject: [PATCH 14/34] prepare compel only if enabled --- backend/python/diffusers/backend_diffusers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py index fbcfc2a19a14..94237deaabd0 100755 --- a/backend/python/diffusers/backend_diffusers.py +++ b/backend/python/diffusers/backend_diffusers.py @@ -236,7 +236,7 @@ def LoadModel(self, request, context): if request.SchedulerType != "": self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config) - if not self.img2vid: + if COMPEL: self.compel = Compel( tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], From 2a2b4f6ba43b4c4395c517f6c114d975d2b55b2f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 19:36:52 +0100 Subject: [PATCH 15/34] Skip conda install if intel --- .../python/common-env/transformers/Makefile | 4 ++++ .../python/common-env/transformers/install.sh | 23 ++++++++++++++----- backend/python/diffusers/Makefile | 2 +- backend/python/diffusers/install.sh | 2 +- backend/python/transformers/Makefile | 1 - backend/python/transformers/install.sh | 18 --------------- 6 files changed, 23 insertions(+), 27 deletions(-) delete mode 100755 backend/python/transformers/install.sh diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile index 1cd71ab177d3..b81b1ad7550e 100644 --- a/backend/python/common-env/transformers/Makefile +++ b/backend/python/common-env/transformers/Makefile @@ -8,6 +8,10 @@ ifeq ($(BUILD_TYPE), hipblas) CONDA_ENV_PATH = "transformers-rocm.yml" endif +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + .PHONY: transformers transformers: @echo "Installing $(CONDA_ENV_PATH)..." diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 42965bdbc68e..2f800b456ca8 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -6,12 +6,23 @@ conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if conda_env_exists "transformers" ; then - echo "Creating virtual environment..." - conda env create --name transformers --file $1 - echo "Virtual environment created." -else - echo "Virtual environment already exists." +if [ $SKIP_CONDA == 1 ]; then + echo "Skipping conda environment installation" +else + if conda_env_exists "transformers" ; then + echo "Creating virtual environment..." + conda env create --name transformers --file $1 + echo "Virtual environment created." + else + echo "Virtual environment already exists." + fi +fi + +if [ -d "/opt/intel" ]; then + # If the directory exists, we assume we are using the intel image + # (no conda env) + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + pip install intel-extension-for-transformers fi if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index df3ee4da9f04..a3901dafcaa1 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -5,7 +5,7 @@ export CONDA_ENV_PATH = "diffusers-rocm.yml" endif ifneq (,$(findstring sycl,$(BUILD_TYPE))) -export SKIP=1 +export SKIP_CONDA=1 endif .PHONY: diffusers diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index f4de8ae9bb6e..a95ba29e8d7f 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -6,7 +6,7 @@ conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if [ $SKIP == 1 ]; then +if [ $SKIP_CONDA == 1 ]; then echo "Skipping conda environment installation" else if conda_env_exists "diffusers" ; then diff --git a/backend/python/transformers/Makefile b/backend/python/transformers/Makefile index b957b10e1f15..4eeb9ad54136 100644 --- a/backend/python/transformers/Makefile +++ b/backend/python/transformers/Makefile @@ -1,7 +1,6 @@ .PHONY: transformers transformers: $(MAKE) -C ../common-env/transformers - bash install.sh .PHONY: run run: diff --git a/backend/python/transformers/install.sh b/backend/python/transformers/install.sh deleted file mode 100755 index 6d930f2de267..000000000000 --- a/backend/python/transformers/install.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -ex - -if [ -d "/opt/intel" ]; then - # If the directory exists, we assume we are using the intel image - # (no conda env) - # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers -fi - -if [ "$PIP_CACHE_PURGE" = true ] ; then - export PATH=$PATH:/opt/conda/bin - - # Activate conda environment - source activate diffusers - - pip cache purge -fi \ No newline at end of file From 8cabe0d8ebcfd721ffeedc07b00e9b208d58f326 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 21:34:45 +0100 Subject: [PATCH 16/34] fix cleanup --- backend/python/common-env/transformers/install.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 2f800b456ca8..99f7c1862d67 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -28,8 +28,10 @@ fi if [ "$PIP_CACHE_PURGE" = true ] ; then export PATH=$PATH:/opt/conda/bin - # Activate conda environment - source activate transformers + if [ ! -d "/opt/intel" ]; then + # Activate conda environment + source activate transformers + fi pip cache purge fi \ No newline at end of file From 8010e907dba728224671567d30f296ddd8142542 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 21:52:51 +0100 Subject: [PATCH 17/34] Disable compel by default --- backend/python/diffusers/backend_diffusers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py index 94237deaabd0..ec2dea6050e0 100755 --- a/backend/python/diffusers/backend_diffusers.py +++ b/backend/python/diffusers/backend_diffusers.py @@ -28,7 +28,7 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24 -COMPEL=os.environ.get("COMPEL", "1") == "1" +COMPEL=os.environ.get("COMPEL", "0") == "1" XPU=os.environ.get("XPU", "0") == "1" CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1" SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1" From aa5acfc1a42320bce715061c7a066460268df47a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 4 Mar 2024 22:51:35 +0100 Subject: [PATCH 18/34] Install torch 2.1.0 with Intel --- backend/python/diffusers/install.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index a95ba29e8d7f..b14574ba3f33 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -21,7 +21,11 @@ fi if [ -d "/opt/intel" ]; then # If the directory exists, we assume we are using the intel image # https://github.com/intel/intel-extension-for-pytorch/issues/538 - python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel-extension-for-pytorch==2.0.120+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl-aitools/ + pip install torch==2.1.0a0 \ + torchvision==0.16.0a0 \ + torchaudio==2.1.0a0 \ + intel-extension-for-pytorch==2.1.10+xpu \ + --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ pip install google-api-python-client \ grpcio \ grpcio-tools \ @@ -35,8 +39,12 @@ fi if [ "$PIP_CACHE_PURGE" = true ] ; then export PATH=$PATH:/opt/conda/bin - # Activate conda environment - source activate diffusers + if [ $SKIP_CONDA == 1 ]; then + echo "Not activating conda environment." + else + # Activate conda environment + source activate diffusers + fi pip cache purge fi \ No newline at end of file From 09c8822e778a4dd5511f76f0a341637732b654fb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 10:43:36 +0100 Subject: [PATCH 19/34] Skip conda on some setups --- backend/python/common-env/transformers/install.sh | 8 +++++--- backend/python/diffusers/install.sh | 10 +++++----- backend/python/exllama2/Makefile | 4 ++++ backend/python/exllama2/install.sh | 9 +++++++-- backend/python/vall-e-x/Makefile | 4 ++++ backend/python/vall-e-x/install.sh | 9 ++++++--- 6 files changed, 31 insertions(+), 13 deletions(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 99f7c1862d67..0471595c7211 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -1,12 +1,14 @@ #!/bin/bash set -ex +SKIP_CONDA=${SKIP_CONDA:-0} + # Check if environment exist conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if [ $SKIP_CONDA == 1 ]; then +if [ $SKIP_CONDA -eq 1 ]; then echo "Skipping conda environment installation" else if conda_env_exists "transformers" ; then @@ -19,7 +21,7 @@ else fi if [ -d "/opt/intel" ]; then - # If the directory exists, we assume we are using the intel image + # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 pip install intel-extension-for-transformers @@ -28,7 +30,7 @@ fi if [ "$PIP_CACHE_PURGE" = true ] ; then export PATH=$PATH:/opt/conda/bin - if [ ! -d "/opt/intel" ]; then + if [ $SKIP_CONDA -ne 1 ]; then # Activate conda environment source activate transformers fi diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index b14574ba3f33..87a8a8afb9c7 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -1,12 +1,14 @@ #!/bin/bash set -ex +SKIP_CONDA=${SKIP_CONDA:-0} + # Check if environment exist conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null } -if [ $SKIP_CONDA == 1 ]; then +if [ $SKIP_CONDA -eq 1 ]; then echo "Skipping conda environment installation" else if conda_env_exists "diffusers" ; then @@ -19,7 +21,7 @@ else fi if [ -d "/opt/intel" ]; then - # If the directory exists, we assume we are using the intel image + # Intel GPU: If the directory exists, we assume we are using the intel image # https://github.com/intel/intel-extension-for-pytorch/issues/538 pip install torch==2.1.0a0 \ torchvision==0.16.0a0 \ @@ -39,9 +41,7 @@ fi if [ "$PIP_CACHE_PURGE" = true ] ; then export PATH=$PATH:/opt/conda/bin - if [ $SKIP_CONDA == 1 ]; then - echo "Not activating conda environment." - else + if [ $SKIP_CONDA -ne 1 ]; then # Activate conda environment source activate diffusers fi diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile index 2415815155d3..d2a133b60139 100644 --- a/backend/python/exllama2/Makefile +++ b/backend/python/exllama2/Makefile @@ -1,3 +1,7 @@ +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + .PHONY: exllama2 exllama2: $(MAKE) -C ../common-env/transformers diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index a6df3d37630b..58246b256e7f 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -5,8 +5,13 @@ set -e export PATH=$PATH:/opt/conda/bin export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f -# Activate conda environment -source activate transformers +SKIP_CONDA=${SKIP_CONDA:-0} + +if [ $SKIP_CONDA -ne 1 ]; then + source activate transformers +else + CONDA_PREFIX=$PWD +fi echo $CONDA_PREFIX diff --git a/backend/python/vall-e-x/Makefile b/backend/python/vall-e-x/Makefile index 4804f12ff521..8f34f559ee3a 100644 --- a/backend/python/vall-e-x/Makefile +++ b/backend/python/vall-e-x/Makefile @@ -1,3 +1,7 @@ +ifneq (,$(findstring sycl,$(BUILD_TYPE))) +export SKIP_CONDA=1 +endif + .PHONY: ttsvalle ttsvalle: $(MAKE) -C ../common-env/transformers diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh index 26ccdccd0789..a3b65ed9baba 100644 --- a/backend/python/vall-e-x/install.sh +++ b/backend/python/vall-e-x/install.sh @@ -5,10 +5,13 @@ export PATH=$PATH:/opt/conda/bin export SHA=3faaf8ccadb154d63b38070caf518ce9309ea0f4 -# Activate conda environment -source activate transformers +SKIP_CONDA=${SKIP_CONDA:-0} -echo $CONDA_PREFIX +if [ $SKIP_CONDA -ne 1 ]; then + source activate transformers +else + CONDA_PREFIX=$PWD +fi git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && popd From 96c5fd488041de612c9ea76b220d17d6bcde9d9a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 13:06:14 +0100 Subject: [PATCH 20/34] Detect python --- backend/python/diffusers/run.sh | 14 +++++++++++++- backend/python/transformers/run.sh | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index 7ff71e3988a1..52a79072ef18 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -17,4 +17,16 @@ fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -python $DIR/backend_diffusers.py $@ +# Intel image: If there is no "python" command, try "python3" +if ! [ -x "$(command -v python)" ]; then + if [ -x "$(command -v python3)" ]; then + export PYTHON=python3 + else + echo 'Error: python is not installed.' >&2 + exit 1 + fi +else + export PYTHON=python +fi + +$PYTHON $DIR/backend_diffusers.py $@ diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index 409cb7c8a7aa..0d506dd177a8 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -17,4 +17,16 @@ fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -python $DIR/transformers_server.py $@ +# Intel image: If there is no "python" command, try "python3" +if ! [ -x "$(command -v python)" ]; then + if [ -x "$(command -v python3)" ]; then + export PYTHON=python3 + else + echo 'Error: python is not installed.' >&2 + exit 1 + fi +else + export PYTHON=python +fi + +$PYTHON $DIR/transformers_server.py $@ From 1bd4e66d0781bbe77e970424c38941eeaf6927cf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 18:50:42 +0100 Subject: [PATCH 21/34] Quiet output --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 903acdb13055..7d59e760e5c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ RUN apt-get update && \ apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean # Install Go -RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz +RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz ENV PATH $PATH:/usr/local/go/bin COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ From 364e36e6cae2e559d30d5e2a3665241eac0c9f60 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 18:52:26 +0100 Subject: [PATCH 22/34] Do not override system python with conda --- Dockerfile | 26 +++++++++---------- .../python/common-env/transformers/install.sh | 3 +-- backend/python/diffusers/install.sh | 3 +-- backend/python/diffusers/run.sh | 3 +-- backend/python/exllama2/install.sh | 2 +- backend/python/mamba/install.sh | 3 ++- backend/python/petals/Makefile | 2 +- backend/python/petals/install.sh | 5 ++++ backend/python/transformers/run.sh | 2 +- backend/python/vall-e-x/install.sh | 2 +- 10 files changed, 27 insertions(+), 24 deletions(-) create mode 100644 backend/python/petals/install.sh diff --git a/Dockerfile b/Dockerfile index 7d59e760e5c8..04d6e21c4b90 100644 --- a/Dockerfile +++ b/Dockerfile @@ -168,43 +168,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/ ## Duplicated from Makefile to avoid having a big layer that's hard to push RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \ + make -C backend/python/autogptq \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/bark \ + make -C backend/python/bark \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \ + make -C backend/python/diffusers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \ + make -C backend/python/vllm \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \ + make -C backend/python/mamba \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \ + make -C backend/python/sentencetransformers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \ + make -C backend/python/transformers \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \ + make -C backend/python/vall-e-x \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \ + make -C backend/python/exllama \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \ + make -C backend/python/exllama2 \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/petals \ + make -C backend/python/petals \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \ + make -C backend/python/transformers-musicgen \ ; fi RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \ - PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \ + make -C backend/python/coqui \ ; fi # Make sure the models directory exists diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 0471595c7211..805603746434 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -11,6 +11,7 @@ conda_env_exists(){ if [ $SKIP_CONDA -eq 1 ]; then echo "Skipping conda environment installation" else + export PATH=$PATH:/opt/conda/bin if conda_env_exists "transformers" ; then echo "Creating virtual environment..." conda env create --name transformers --file $1 @@ -28,8 +29,6 @@ if [ -d "/opt/intel" ]; then fi if [ "$PIP_CACHE_PURGE" = true ] ; then - export PATH=$PATH:/opt/conda/bin - if [ $SKIP_CONDA -ne 1 ]; then # Activate conda environment source activate transformers diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 87a8a8afb9c7..4b0d263365ff 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -11,6 +11,7 @@ conda_env_exists(){ if [ $SKIP_CONDA -eq 1 ]; then echo "Skipping conda environment installation" else + export PATH=$PATH:/opt/conda/bin if conda_env_exists "diffusers" ; then echo "Creating virtual environment..." conda env create --name diffusers --file $1 @@ -39,8 +40,6 @@ if [ -d "/opt/intel" ]; then fi if [ "$PIP_CACHE_PURGE" = true ] ; then - export PATH=$PATH:/opt/conda/bin - if [ $SKIP_CONDA -ne 1 ]; then # Activate conda environment source activate diffusers diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index 52a79072ef18..aeb3a3898079 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -3,13 +3,12 @@ ## ## A bash script wrapper that runs the diffusers server with conda -export PATH=$PATH:/opt/conda/bin - if [ -d "/opt/intel" ]; then # Assumes we are using the Intel oneAPI container image # https://github.com/intel/intel-extension-for-pytorch/issues/538 export XPU=1 else + export PATH=$PATH:/opt/conda/bin # Activate conda environment source activate diffusers fi diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index 58246b256e7f..bbffa837b542 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -2,7 +2,6 @@ set -e ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment -export PATH=$PATH:/opt/conda/bin export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f SKIP_CONDA=${SKIP_CONDA:-0} @@ -10,6 +9,7 @@ SKIP_CONDA=${SKIP_CONDA:-0} if [ $SKIP_CONDA -ne 1 ]; then source activate transformers else + export PATH=$PATH:/opt/conda/bin CONDA_PREFIX=$PWD fi diff --git a/backend/python/mamba/install.sh b/backend/python/mamba/install.sh index e56b83c2d31a..4ef26ece1c30 100755 --- a/backend/python/mamba/install.sh +++ b/backend/python/mamba/install.sh @@ -2,13 +2,14 @@ set -e ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment -export PATH=$PATH:/opt/conda/bin if [ "$BUILD_TYPE" != "cublas" ]; then echo "[mamba] Attention!!! nvcc is required - skipping installation" exit 0 fi +export PATH=$PATH:/opt/conda/bin + # Activate conda environment source activate transformers diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile index 4bd07b112827..aa7778e15b29 100644 --- a/backend/python/petals/Makefile +++ b/backend/python/petals/Makefile @@ -1,7 +1,7 @@ .PHONY: petals petals: @echo "Creating virtual environment..." - @conda env create --name petals --file petals.yml + bash install.sh "petals.yml" @echo "Virtual environment created." .PHONY: run diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh new file mode 100644 index 000000000000..97bcbb8af209 --- /dev/null +++ b/backend/python/petals/install.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +export PATH=$PATH:/opt/conda/bin + +conda env create --name petals --file $1 \ No newline at end of file diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index 0d506dd177a8..cde474dfa936 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -3,13 +3,13 @@ ## ## A bash script wrapper that runs the transformers server with conda -export PATH=$PATH:/opt/conda/bin if [ -d "/opt/intel" ]; then # Assumes we are using the Intel oneAPI container image # https://github.com/intel/intel-extension-for-pytorch/issues/538 export XPU=1 else + export PATH=$PATH:/opt/conda/bin # Activate conda environment source activate transformers fi diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh index a3b65ed9baba..a9c4117e5cc0 100644 --- a/backend/python/vall-e-x/install.sh +++ b/backend/python/vall-e-x/install.sh @@ -2,7 +2,6 @@ ## ## A bash script installs the required dependencies of VALL-E-X and prepares the environment -export PATH=$PATH:/opt/conda/bin export SHA=3faaf8ccadb154d63b38070caf518ce9309ea0f4 SKIP_CONDA=${SKIP_CONDA:-0} @@ -10,6 +9,7 @@ SKIP_CONDA=${SKIP_CONDA:-0} if [ $SKIP_CONDA -ne 1 ]; then source activate transformers else + export PATH=$PATH:/opt/conda/bin CONDA_PREFIX=$PWD fi From 5c9fa5dc0bdd820bbae3319abb3042f5b04cbb45 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 19:53:47 +0100 Subject: [PATCH 23/34] Prefer python3 --- backend/python/diffusers/run.sh | 10 +++++----- backend/python/transformers/run.sh | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index aeb3a3898079..d995884a1876 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -16,16 +16,16 @@ fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -# Intel image: If there is no "python" command, try "python3" -if ! [ -x "$(command -v python)" ]; then - if [ -x "$(command -v python3)" ]; then - export PYTHON=python3 +# Intel image: If there is no "python3" command, try "python" +if ! [ -x "$(command -v python3)" ]; then + if [ -x "$(command -v python)" ]; then + export PYTHON=python else echo 'Error: python is not installed.' >&2 exit 1 fi else - export PYTHON=python + export PYTHON=python3 fi $PYTHON $DIR/backend_diffusers.py $@ diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index cde474dfa936..83ed1a3e9c2a 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -17,16 +17,16 @@ fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -# Intel image: If there is no "python" command, try "python3" -if ! [ -x "$(command -v python)" ]; then - if [ -x "$(command -v python3)" ]; then - export PYTHON=python3 +# Intel image: If there is no "python3" command, try "python" +if ! [ -x "$(command -v python3)" ]; then + if [ -x "$(command -v python)" ]; then + export PYTHON=python else echo 'Error: python is not installed.' >&2 exit 1 fi else - export PYTHON=python + export PYTHON=python3 fi $PYTHON $DIR/transformers_server.py $@ From 235370888e5062f0c9d04a1fd27f4ab24a853e7c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 21:55:42 +0100 Subject: [PATCH 24/34] Fixups --- backend/python/common-env/transformers/install.sh | 2 +- backend/python/diffusers/install.sh | 3 ++- backend/python/exllama2/install.sh | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 805603746434..4df682abd404 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -29,7 +29,7 @@ if [ -d "/opt/intel" ]; then fi if [ "$PIP_CACHE_PURGE" = true ] ; then - if [ $SKIP_CONDA -ne 1 ]; then + if [ $SKIP_CONDA -eq 0 ]; then # Activate conda environment source activate transformers fi diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh index 4b0d263365ff..d83ec0be0b3b 100755 --- a/backend/python/diffusers/install.sh +++ b/backend/python/diffusers/install.sh @@ -22,13 +22,14 @@ else fi if [ -d "/opt/intel" ]; then - # Intel GPU: If the directory exists, we assume we are using the intel image + # Intel GPU: If the directory exists, we assume we are using the Intel image # https://github.com/intel/intel-extension-for-pytorch/issues/538 pip install torch==2.1.0a0 \ torchvision==0.16.0a0 \ torchaudio==2.1.0a0 \ intel-extension-for-pytorch==2.1.10+xpu \ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ + pip install google-api-python-client \ grpcio \ grpcio-tools \ diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index bbffa837b542..bcd2331d0b58 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -6,10 +6,10 @@ export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f SKIP_CONDA=${SKIP_CONDA:-0} -if [ $SKIP_CONDA -ne 1 ]; then +if [ $SKIP_CONDA -eq 0 ]; then + export PATH=$PATH:/opt/conda/bin source activate transformers else - export PATH=$PATH:/opt/conda/bin CONDA_PREFIX=$PWD fi From a044a188f3610a0a4ff113107f08a5c11169b870 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 22:14:38 +0100 Subject: [PATCH 25/34] exllama2: do not install without conda (overrides pytorch version) --- backend/python/exllama2/install.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index bcd2331d0b58..2c7427cedaf3 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -10,7 +10,9 @@ if [ $SKIP_CONDA -eq 0 ]; then export PATH=$PATH:/opt/conda/bin source activate transformers else - CONDA_PREFIX=$PWD + # exllama2 is supported only with a conda environment + echo "[exllama2] Attention!!! conda is required - skipping installation" + exit 0 fi echo $CONDA_PREFIX From 293be13015ad23f13290efe9b7e3d1065b9290af Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 5 Mar 2024 22:18:36 +0100 Subject: [PATCH 26/34] exllama/exllama2: do not install if not using cuda --- backend/python/exllama/install.sh | 5 +++++ backend/python/exllama2/install.sh | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/backend/python/exllama/install.sh b/backend/python/exllama/install.sh index 702bb1fbefb4..320e7f4dfac7 100755 --- a/backend/python/exllama/install.sh +++ b/backend/python/exllama/install.sh @@ -3,6 +3,11 @@ set -ex export PATH=$PATH:/opt/conda/bin +if [ "$BUILD_TYPE" != "cublas" ]; then + echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation" + exit 0 +fi + # Check if environment exist conda_env_exists(){ ! conda list --name "${@}" >/dev/null 2>/dev/null diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index 2c7427cedaf3..d8e779dea8b6 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -6,6 +6,11 @@ export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f SKIP_CONDA=${SKIP_CONDA:-0} +if [ "$BUILD_TYPE" != "cublas" ]; then + echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation" + exit 0 +fi + if [ $SKIP_CONDA -eq 0 ]; then export PATH=$PATH:/opt/conda/bin source activate transformers From 72d1cd257b2a805620ba667888fad5b1f84c383d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 6 Mar 2024 17:11:03 +0100 Subject: [PATCH 27/34] Add missing dataset dependency --- backend/python/common-env/transformers/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 4df682abd404..169a223197fd 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers + pip install intel-extension-for-transformers datasets fi if [ "$PIP_CACHE_PURGE" = true ] ; then From 0e32ba01824ae09b1660403fe416318e28afdb33 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 6 Mar 2024 21:58:47 +0100 Subject: [PATCH 28/34] Small fixups, symlink to python, add requirements --- Dockerfile | 4 ++++ backend/python/common-env/transformers/install.sh | 2 +- backend/python/transformers/transformers_server.py | 6 +++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 04d6e21c4b90..fd3659629395 100644 --- a/Dockerfile +++ b/Dockerfile @@ -81,6 +81,10 @@ RUN pip install --upgrade pip RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN apt-get install -y espeak-ng espeak && apt-get clean +RUN if [ ! -e /usr/bin/python ]; then \ + ln -s /usr/bin/python3 /usr/bin/python \ + ; fi + ################################### ################################### diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index 169a223197fd..dc0e66b93f06 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers datasets + pip install intel-extension-for-transformers datasets sentencepiece tiktoken fi if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 70474587ff0c..e3c9ad0d9da9 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -77,7 +77,11 @@ def LoadModel(self, request, context): model_name = request.Model try: if request.Type == "AutoModelForCausalLM": - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) + if XPU: + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, + device_map="xpu", load_in_4bit=True) + else: + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) else: self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) From dc8ae9244f42286840c8ae45ea006e12e4acd5f5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 6 Mar 2024 22:11:15 +0100 Subject: [PATCH 29/34] Add neural_speed to the deps --- backend/python/common-env/transformers/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh index dc0e66b93f06..e268fcc88370 100644 --- a/backend/python/common-env/transformers/install.sh +++ b/backend/python/common-env/transformers/install.sh @@ -25,7 +25,7 @@ if [ -d "/opt/intel" ]; then # Intel GPU: If the directory exists, we assume we are using the intel image # (no conda env) # https://github.com/intel/intel-extension-for-pytorch/issues/538 - pip install intel-extension-for-transformers datasets sentencepiece tiktoken + pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed fi if [ "$PIP_CACHE_PURGE" = true ] ; then From 7f7cd68082bc7b5119ec6179912c59444ae3fe6e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 6 Mar 2024 22:13:49 +0100 Subject: [PATCH 30/34] correctly handle model offloading --- backend/python/transformers/transformers_server.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index e3c9ad0d9da9..0d6932b59920 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -90,11 +90,9 @@ def LoadModel(self, request, context): self.XPU = False if XPU: + self.XPU = True try: - print("Loading model", model_name, "to XPU.", file=sys.stderr) - device_map = "xpu" - self.model = self.model.to(device_map) - self.XPU = True + print("Optimizing model", model_name, "to XPU.", file=sys.stderr) self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device=device_map) except Exception as err: print("Not using XPU:", err, file=sys.stderr) From f0bcfba1fbd6da5e90d3a4e57f0bdaf737ed21d6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 6 Mar 2024 23:27:00 +0100 Subject: [PATCH 31/34] fix: device_map == xpu --- backend/python/transformers/transformers_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 0d6932b59920..d83f2ad30976 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -93,7 +93,7 @@ def LoadModel(self, request, context): self.XPU = True try: print("Optimizing model", model_name, "to XPU.", file=sys.stderr) - self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device=device_map) + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device="xpu") except Exception as err: print("Not using XPU:", err, file=sys.stderr) From 2ee4fd2d56c18530e0cd394285569eba682eaa1c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 7 Mar 2024 01:03:17 +0100 Subject: [PATCH 32/34] go back at calling python, fixed at dockerfile level --- backend/python/diffusers/run.sh | 14 +------------- backend/python/transformers/run.sh | 14 +------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh index d995884a1876..69b25d507a62 100755 --- a/backend/python/diffusers/run.sh +++ b/backend/python/diffusers/run.sh @@ -16,16 +16,4 @@ fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -# Intel image: If there is no "python3" command, try "python" -if ! [ -x "$(command -v python3)" ]; then - if [ -x "$(command -v python)" ]; then - export PYTHON=python - else - echo 'Error: python is not installed.' >&2 - exit 1 - fi -else - export PYTHON=python3 -fi - -$PYTHON $DIR/backend_diffusers.py $@ +python $DIR/backend_diffusers.py $@ diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index 83ed1a3e9c2a..d09c1f5c0b47 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -17,16 +17,4 @@ fi # get the directory where the bash script is located DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -# Intel image: If there is no "python3" command, try "python" -if ! [ -x "$(command -v python3)" ]; then - if [ -x "$(command -v python)" ]; then - export PYTHON=python - else - echo 'Error: python is not installed.' >&2 - exit 1 - fi -else - export PYTHON=python3 -fi - -$PYTHON $DIR/transformers_server.py $@ +python $DIR/transformers_server.py $@ From 9ff1cf87935e84d8d874bfb87cd0df64fdff0331 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 7 Mar 2024 01:08:12 +0100 Subject: [PATCH 33/34] Exllama2 restricted to only nvidia gpus --- backend/python/common-env/transformers/Makefile | 3 +++ backend/python/diffusers/Makefile | 3 +++ backend/python/exllama2/Makefile | 4 ---- backend/python/exllama2/install.sh | 12 ++---------- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile index b81b1ad7550e..797af0832ef2 100644 --- a/backend/python/common-env/transformers/Makefile +++ b/backend/python/common-env/transformers/Makefile @@ -8,6 +8,9 @@ ifeq ($(BUILD_TYPE), hipblas) CONDA_ENV_PATH = "transformers-rocm.yml" endif +# Intel GPU are supposed to have dependencies installed in the main python +# environment, so we skip conda installation for SYCL builds. +# https://github.com/intel/intel-extension-for-pytorch/issues/538 ifneq (,$(findstring sycl,$(BUILD_TYPE))) export SKIP_CONDA=1 endif diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile index a3901dafcaa1..40e1d1a7e888 100644 --- a/backend/python/diffusers/Makefile +++ b/backend/python/diffusers/Makefile @@ -4,6 +4,9 @@ ifeq ($(BUILD_TYPE), hipblas) export CONDA_ENV_PATH = "diffusers-rocm.yml" endif +# Intel GPU are supposed to have dependencies installed in the main python +# environment, so we skip conda installation for SYCL builds. +# https://github.com/intel/intel-extension-for-pytorch/issues/538 ifneq (,$(findstring sycl,$(BUILD_TYPE))) export SKIP_CONDA=1 endif diff --git a/backend/python/exllama2/Makefile b/backend/python/exllama2/Makefile index d2a133b60139..2415815155d3 100644 --- a/backend/python/exllama2/Makefile +++ b/backend/python/exllama2/Makefile @@ -1,7 +1,3 @@ -ifneq (,$(findstring sycl,$(BUILD_TYPE))) -export SKIP_CONDA=1 -endif - .PHONY: exllama2 exllama2: $(MAKE) -C ../common-env/transformers diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh index d8e779dea8b6..858685b07eec 100755 --- a/backend/python/exllama2/install.sh +++ b/backend/python/exllama2/install.sh @@ -4,21 +4,13 @@ set -e ## A bash script installs the required dependencies of VALL-E-X and prepares the environment export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f -SKIP_CONDA=${SKIP_CONDA:-0} - if [ "$BUILD_TYPE" != "cublas" ]; then echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation" exit 0 fi -if [ $SKIP_CONDA -eq 0 ]; then - export PATH=$PATH:/opt/conda/bin - source activate transformers -else - # exllama2 is supported only with a conda environment - echo "[exllama2] Attention!!! conda is required - skipping installation" - exit 0 -fi +export PATH=$PATH:/opt/conda/bin +source activate transformers echo $CONDA_PREFIX From 95fb025dcef5db9d250d7ad2ec12e3c130a1b5d3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 7 Mar 2024 09:31:54 +0100 Subject: [PATCH 34/34] Tokenizer to xpu --- backend/python/transformers/transformers_server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index d83f2ad30976..41112c44f6e5 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -93,7 +93,7 @@ def LoadModel(self, request, context): self.XPU = True try: print("Optimizing model", model_name, "to XPU.", file=sys.stderr) - self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, woq=True, device="xpu") + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") except Exception as err: print("Not using XPU:", err, file=sys.stderr) @@ -160,6 +160,8 @@ def Predict(self, request, context): inputs = self.tokenizer(request.Prompt, return_tensors="pt").input_ids if self.CUDA: inputs = inputs.to("cuda") + if XPU: + inputs = inputs.to("xpu") outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP)