feat(intel): add diffusers/transformers support (#1746)

* feat(intel): add diffusers support * try to consume upstream container image * Debug * Manually install deps * Map transformers/hf cache dir to modelpath if not specified * fix(compel): update initialization, pass by all gRPC options * fix: add dependencies, implement transformers for xpu * base it from the oneapi image * Add pillow * set threads if specified when launching the API * Skip conda install if intel * defaults to non-intel * ci: add to pipelines * prepare compel only if enabled * Skip conda install if intel * fix cleanup * Disable compel by default * Install torch 2.1.0 with Intel * Skip conda on some setups * Detect python * Quiet output * Do not override system python with conda * Prefer python3 * Fixups * exllama2: do not install without conda (overrides pytorch version) * exllama/exllama2: do not install if not using cuda * Add missing dataset dependency * Small fixups, symlink to python, add requirements * Add neural_speed to the deps * correctly handle model offloading * fix: device_map == xpu * go back at calling python, fixed at dockerfile level * Exllama2 restricted to only nvidia gpus * Tokenizer to xpu
mudler · Mar 7, 2024 · 5d10184 · 5d10184
1 parent ad6fd7a
commit 5d10184
Show file tree

Hide file tree

Showing 23 changed files with 250 additions and 81 deletions.
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
@@ -59,6 +59,14 @@ jobs:
             image-type: 'extras'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            tag-suffix: 'sycl-f16-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
   core-image-build:
     uses: ./.github/workflows/image_build.yml
     with:
@@ -105,4 +113,4 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:22.04"
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
@@ -120,6 +120,22 @@ jobs:
             image-type: 'extras'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f16'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            tag-suffix: '-sycl-f16-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
+          - build-type: 'sycl_f32'
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
+            tag-suffix: '-sycl-f32-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
           # Core images
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'

diff --git a/Dockerfile b/Dockerfile
@@ -4,6 +4,8 @@ ARG BASE_IMAGE=ubuntu:22.04
 # extras or core
 FROM ${BASE_IMAGE} as requirements-core
 
+USER root
+
 ARG GO_VERSION=1.21.7
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=11
@@ -21,7 +23,7 @@ RUN apt-get update && \
     apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
 
 # Install Go
-RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz
+RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
 ENV PATH $PATH:/usr/local/go/bin
 
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
@@ -79,6 +81,10 @@ RUN pip install --upgrade pip
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 RUN apt-get install -y espeak-ng espeak && apt-get clean
 
+RUN if [ ! -e /usr/bin/python ]; then \
+	  ln -s /usr/bin/python3 /usr/bin/python \
+    ; fi
+
 ###################################
 ###################################
 
@@ -166,43 +172,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/
 
 ## Duplicated from Makefile to avoid having a big layer that's hard to push
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
+	 make -C backend/python/autogptq \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
+	 make -C backend/python/bark \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
+	 make -C backend/python/diffusers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
+	 make -C backend/python/vllm \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \
+	 make -C backend/python/mamba \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
+	 make -C backend/python/sentencetransformers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
+	 make -C backend/python/transformers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
+	 make -C backend/python/vall-e-x \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
+	 make -C backend/python/exllama \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
+     make -C backend/python/exllama2 \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
+	 make -C backend/python/petals \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
+	 make -C backend/python/transformers-musicgen \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \
+	 make -C backend/python/coqui \
     ; fi
 
 # Make sure the models directory exists

diff --git a/Makefile b/Makefile
@@ -557,3 +557,10 @@ docker-image-intel:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
+
+docker-image-intel-xpu:
+	docker build \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
+		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
+		--build-arg GO_TAGS="none" \
+		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
diff --git a/backend/python/common-env/transformers/Makefile b/backend/python/common-env/transformers/Makefile
@@ -8,6 +8,13 @@ ifeq ($(BUILD_TYPE), hipblas)
 	CONDA_ENV_PATH = "transformers-rocm.yml"
 endif
 
+# Intel GPU are supposed to have dependencies installed in the main python
+# environment, so we skip conda installation for SYCL builds.
+# https://github.com/intel/intel-extension-for-pytorch/issues/538
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
 .PHONY: transformers
 transformers:
 	@echo "Installing $(CONDA_ENV_PATH)..."

diff --git a/backend/python/common-env/transformers/install.sh b/backend/python/common-env/transformers/install.sh
@@ -1,24 +1,38 @@
 #!/bin/bash
 set -ex
 
+SKIP_CONDA=${SKIP_CONDA:-0}
+
 # Check if environment exist
 conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if conda_env_exists "transformers" ; then
-    echo "Creating virtual environment..."
-    conda env create --name transformers --file $1
-    echo "Virtual environment created."
-else 
-    echo "Virtual environment already exists."
+if [ $SKIP_CONDA -eq 1 ]; then
+    echo "Skipping conda environment installation"
+else
+    export PATH=$PATH:/opt/conda/bin
+    if conda_env_exists "transformers" ; then
+        echo "Creating virtual environment..."
+        conda env create --name transformers --file $1
+        echo "Virtual environment created."
+    else 
+        echo "Virtual environment already exists."
+    fi
 fi
 
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    export PATH=$PATH:/opt/conda/bin
+if [ -d "/opt/intel" ]; then
+    # Intel GPU: If the directory exists, we assume we are using the intel image
+    # (no conda env)
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
+fi
 
-    # Activate conda environment
-    source activate transformers
+if [ "$PIP_CACHE_PURGE" = true ] ; then
+    if [ $SKIP_CONDA -eq 0 ]; then
+        # Activate conda environment
+        source activate transformers
+    fi
 
     pip cache purge
 fi
diff --git a/backend/python/diffusers/Makefile b/backend/python/diffusers/Makefile
@@ -4,6 +4,13 @@ ifeq ($(BUILD_TYPE), hipblas)
 export CONDA_ENV_PATH = "diffusers-rocm.yml"
 endif
 
+# Intel GPU are supposed to have dependencies installed in the main python
+# environment, so we skip conda installation for SYCL builds.
+# https://github.com/intel/intel-extension-for-pytorch/issues/538
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
 .PHONY: diffusers
 diffusers:
 	@echo "Installing $(CONDA_ENV_PATH)..."

diff --git a/backend/python/diffusers/backend_diffusers.py b/backend/python/diffusers/backend_diffusers.py
@@ -21,21 +21,26 @@
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image,export_to_video
-from compel import Compel
+from compel import Compel, ReturnedEmbeddingsType
 
 from transformers import CLIPTextModel
 from safetensors.torch import load_file
 
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
-COMPEL=os.environ.get("COMPEL", "1") == "1"
+COMPEL=os.environ.get("COMPEL", "0") == "1"
+XPU=os.environ.get("XPU", "0") == "1"
 CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
 SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
 CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
 FPS=os.environ.get("FPS", "7")
 DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
 FRAMES=os.environ.get("FRAMES", "64")
 
+if XPU:
+    import intel_extension_for_pytorch as ipex
+    print(ipex.xpu.get_device_name(0))
+
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
 
@@ -231,8 +236,13 @@ def LoadModel(self, request, context):
             if request.SchedulerType != "":
                 self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
 
-            if not self.img2vid:
-                self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder)
+            if COMPEL:
+                self.compel = Compel(
+                    tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], 
+                    text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
+                    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+                    requires_pooled=[False, True]
+                    )
 
 
             if request.ControlNet:
@@ -247,6 +257,8 @@ def LoadModel(self, request, context):
                 self.pipe.to('cuda')
                 if self.controlnet:
                     self.controlnet.to('cuda')
+            if XPU:
+                self.pipe = self.pipe.to("xpu")
             # Assume directory from request.ModelFile.
             # Only if request.LoraAdapter it's not an absolute path
             if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@@ -386,8 +398,9 @@ def GenerateImage(self, request, context):
 
         image = {}
         if COMPEL:
-            conditioning = self.compel.build_conditioning_tensor(prompt)
-            kwargs["prompt_embeds"]= conditioning
+            conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
+            kwargs["prompt_embeds"] = conditioning
+            kwargs["pooled_prompt_embeds"] = pooled
             # pass the kwargs dictionary to the self.pipe method
             image = self.pipe(
                 guidance_scale=self.cfg_scale,

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
@@ -1,24 +1,50 @@
 #!/bin/bash
 set -ex
 
+SKIP_CONDA=${SKIP_CONDA:-0}
+
 # Check if environment exist
 conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null
 }
 
-if conda_env_exists "diffusers" ; then
-    echo "Creating virtual environment..."
-    conda env create --name diffusers --file $1
-    echo "Virtual environment created."
-else 
-    echo "Virtual environment already exists."
+if [ $SKIP_CONDA -eq 1 ]; then
+    echo "Skipping conda environment installation"
+else
+    export PATH=$PATH:/opt/conda/bin
+    if conda_env_exists "diffusers" ; then
+        echo "Creating virtual environment..."
+        conda env create --name diffusers --file $1
+        echo "Virtual environment created."
+    else 
+        echo "Virtual environment already exists."
+    fi
 fi
 
-if [ "$PIP_CACHE_PURGE" = true ] ; then
-    export PATH=$PATH:/opt/conda/bin
+if [ -d "/opt/intel" ]; then
+    # Intel GPU: If the directory exists, we assume we are using the Intel image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    pip install torch==2.1.0a0 \
+                torchvision==0.16.0a0 \
+                torchaudio==2.1.0a0 \
+                intel-extension-for-pytorch==2.1.10+xpu \
+                --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+
+    pip install google-api-python-client \
+                grpcio \
+                grpcio-tools \
+                diffusers==0.24.0 \
+                transformers>=4.25.1 \
+                accelerate \
+                compel==2.0.2 \
+                Pillow
+fi
 
-    # Activate conda environment
-    source activate diffusers
+if [ "$PIP_CACHE_PURGE" = true ] ; then
+    if [ $SKIP_CONDA -ne 1 ]; then
+        # Activate conda environment
+        source activate diffusers
+    fi
 
     pip cache purge
 fi
diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
@@ -3,10 +3,15 @@
 ##
 ## A bash script wrapper that runs the diffusers server with conda
 
-export PATH=$PATH:/opt/conda/bin
-
-# Activate conda environment
-source activate diffusers
+if [ -d "/opt/intel" ]; then
+    # Assumes we are using the Intel oneAPI container image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    export XPU=1
+else
+    export PATH=$PATH:/opt/conda/bin
+    # Activate conda environment
+    source activate diffusers
+fi
 
 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

diff --git a/backend/python/exllama/install.sh b/backend/python/exllama/install.sh
@@ -3,6 +3,11 @@ set -ex
 
 export PATH=$PATH:/opt/conda/bin
 
+if [ "$BUILD_TYPE" != "cublas" ]; then
+    echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation"
+    exit 0
+fi
+
 # Check if environment exist
 conda_env_exists(){
     ! conda list --name "${@}" >/dev/null 2>/dev/null

diff --git a/backend/python/exllama2/install.sh b/backend/python/exllama2/install.sh
@@ -2,10 +2,14 @@
 set -e
 ##
 ## A bash script installs the required dependencies of VALL-E-X and prepares the environment
-export PATH=$PATH:/opt/conda/bin
 export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
 
-# Activate conda environment
+if [ "$BUILD_TYPE" != "cublas" ]; then
+    echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
+    exit 0
+fi
+
+export PATH=$PATH:/opt/conda/bin
 source activate transformers
 
 echo $CONDA_PREFIX