From 5169d290bfa170ea8f0c461121c53fb6879fc8b8 Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Thu, 31 Oct 2024 13:02:16 +0800 Subject: [PATCH 1/9] debug --- .github/workflows/docker-cd.yaml | 20 -------------------- .github/workflows/python.yaml | 12 +++++++++--- pyproject.toml | 7 ++++--- setup.cfg | 4 ++-- xinference/deploy/docker/Dockerfile | 2 +- 5 files changed, 16 insertions(+), 29 deletions(-) diff --git a/.github/workflows/docker-cd.yaml b/.github/workflows/docker-cd.yaml index 5048a8910c..419dc7bfbb 100644 --- a/.github/workflows/docker-cd.yaml +++ b/.github/workflows/docker-cd.yaml @@ -73,26 +73,6 @@ jobs: echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV fi - - name: Log in to Aliyun Docker Hub - uses: docker/login-action@v1 - with: - registry: registry.cn-hangzhou.aliyuncs.com - username: ${{ secrets.DOCKERHUB_ALIYUN_USERNAME }} - password: ${{ secrets.DOCKERHUB_ALIYUN_PASSWORD }} - - - name: Push docker image to Aliyun - shell: bash - if: ${{ github.repository == 'xorbitsai/inference' }} - env: - DOCKER_ORG: registry.cn-hangzhou.aliyuncs.com/xprobe_xinference - run: | - if [[ -n "$XINFERENCE_GIT_TAG" ]]; then - docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}" "$DOCKER_ORG/xinference:latest" - docker push "$DOCKER_ORG/xinference:latest" - docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu" - docker push "$DOCKER_ORG/xinference:latest-cpu" - fi - - name: Clean docker image cache shell: bash if: ${{ github.repository == 'xorbitsai/inference' }} diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 81a16122c5..b0c2d43984 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -74,13 +74,13 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-latest", "macos-12", "windows-latest" ] - python-version: [ "3.8", "3.9", "3.10", "3.11" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] module: [ "xinference" ] exclude: - - { os: macos-12, python-version: 3.9 } - { os: macos-12, python-version: 3.10 } - - { os: windows-latest, python-version: 3.9 } + - { os: macos-12, python-version: 3.11 } - { os: windows-latest, python-version: 3.10 } + - { os: windows-latest, python-version: 3.11 } include: - { os: self-hosted, module: gpu, python-version: 3.9} - { os: macos-latest, module: metal, python-version: "3.10" } @@ -99,6 +99,12 @@ jobs: python-version: ${{ matrix.python-version }} activate-environment: ${{ env.CONDA_ENV }} + # Important for python == 3.12 + - name: Update pip and setuptools + if: ${{ matrix.python-version == '3.12' }} + run: | + python -m pip install -U pip setuptools + - name: Install dependencies env: MODULE: ${{ matrix.module }} diff --git a/pyproject.toml b/pyproject.toml index 72e171146d..21b135b1d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [build-system] requires = [ - "setuptools<64", + "setuptools<64; python_version<'3.12'", + "setuptools>=75; python_version>='3.12'" ] build-backend = "setuptools.build_meta" @@ -17,6 +18,6 @@ extend-exclude = ''' asyncio_mode = "auto" [tool.cibuildwheel] -build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*"] -skip = "pp* *musllinux* *i686 cp36* cp310-win32 cp311-win32" +build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*"] +skip = "pp* *musllinux* *i686 cp36* cp39-win32 cp310-win32 cp311-win32 cp312-win32" manylinux-x86_64-image = "manylinux2014" diff --git a/setup.cfg b/setup.cfg index 3c08363e59..7b172a4052 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,15 +7,15 @@ maintainer = Qin Xuye maintainer_email = qinxuye@xprobe.io license = Apache License 2.0 url = https://github.com/xorbitsai/inference -python_requires = >=3.8 +python_requires = >=3.9 classifier = Operating System :: OS Independent Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Programming Language :: Python :: Implementation :: CPython Topic :: Software Development :: Libraries diff --git a/xinference/deploy/docker/Dockerfile b/xinference/deploy/docker/Dockerfile index 3d6afc44c3..5ee3f11771 100644 --- a/xinference/deploy/docker/Dockerfile +++ b/xinference/deploy/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM vllm/vllm-openai:v0.6.0 +FROM vllm/vllm-openai:latest COPY . /opt/inference WORKDIR /opt/inference From 02e43081c8b89287d617022f68dbb914c2ecdb51 Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Thu, 31 Oct 2024 15:21:22 +0800 Subject: [PATCH 2/9] fix CI --- .github/workflows/python.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index b0c2d43984..c2af8f7a6b 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -118,13 +118,13 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" fi if [ "$MODULE" == "metal" ]; then - pip install mlx-lm + pip install mlx-lm "FlagEmbedding<1.3.0" fi pip install "llama-cpp-python==0.2.77" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu pip install transformers pip install attrdict pip install "timm>=0.9.16" - pip install torch + pip install "torch<2.5.1" pip install torchvision pip install accelerate pip install sentencepiece From 6b66c3a92853c3c1aaa274c0ac0025e808af2cd8 Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Thu, 31 Oct 2024 15:31:02 +0800 Subject: [PATCH 3/9] fix --- .github/workflows/python.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index c2af8f7a6b..293c9209be 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -118,14 +118,13 @@ jobs: sudo rm -rf "$AGENT_TOOLSDIRECTORY" fi if [ "$MODULE" == "metal" ]; then - pip install mlx-lm "FlagEmbedding<1.3.0" + pip install mlx-lm fi pip install "llama-cpp-python==0.2.77" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu pip install transformers pip install attrdict pip install "timm>=0.9.16" - pip install "torch<2.5.1" - pip install torchvision + pip install torch torchvision pip install accelerate pip install sentencepiece pip install transformers_stream_generator @@ -134,7 +133,7 @@ jobs: pip install modelscope pip install diffusers pip install protobuf - pip install FlagEmbedding + pip install "FlagEmbedding<1.3.0" pip install "tenacity>=8.2.0,<8.4.0" pip install -e ".[dev]" pip install "jinja2==3.1.2" From 42d6b392ec4af1e21caf1a68ecf78d49a388884c Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Thu, 31 Oct 2024 16:14:46 +0800 Subject: [PATCH 4/9] fix --- .github/workflows/python.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 293c9209be..b71567b268 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -138,7 +138,6 @@ jobs: pip install -e ".[dev]" pip install "jinja2==3.1.2" pip install tensorizer - pip install eva-decord pip install jj-pytorchvideo pip install qwen-vl-utils pip install datamodel_code_generator From 14b5ca2a1a4a805dd5d340bdd11ed88d90615d5e Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Thu, 31 Oct 2024 17:01:17 +0800 Subject: [PATCH 5/9] fix --- xinference/core/tests/test_continuous_batching.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/xinference/core/tests/test_continuous_batching.py b/xinference/core/tests/test_continuous_batching.py index c58b91bb55..00e875c743 100644 --- a/xinference/core/tests/test_continuous_batching.py +++ b/xinference/core/tests/test_continuous_batching.py @@ -13,7 +13,6 @@ # limitations under the License. -import os import sys import threading import time @@ -112,18 +111,11 @@ def run_internal(self): assert result["msg"] == self._expected_res -@pytest.fixture -def enable_batch(): - os.environ["XINFERENCE_TRANSFORMERS_ENABLE_BATCHING"] = "1" - yield - os.environ["XINFERENCE_TRANSFORMERS_ENABLE_BATCHING"] = "0" - - @pytest.mark.skipif( sys.platform == "win32", reason="does not run on windows github CI due to its terrible runtime", ) -def test_continuous_batching(enable_batch, setup): +def test_continuous_batching(setup): endpoint, _ = setup url = f"{endpoint}/v1/models" client = RESTfulClient(endpoint) @@ -132,7 +124,7 @@ def test_continuous_batching(enable_batch, setup): payload = { "model_engine": "transformers", "model_type": "LLM", - "model_name": "qwen1.5-chat", + "model_name": "qwen2.5-instruct", "quantization": "none", "model_format": "pytorch", "model_size_in_billions": "0_5", @@ -146,7 +138,7 @@ def test_continuous_batching(enable_batch, setup): response = requests.post(url, json=payload) response_data = response.json() model_uid_res = response_data["model_uid"] - assert model_uid_res == "qwen1.5-chat" + assert model_uid_res == "qwen2.5-instruct" model = client.get_model(model_uid_res) From 1d439b8e68285a307685e67bb8629a511187a8c4 Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Thu, 31 Oct 2024 19:01:29 +0800 Subject: [PATCH 6/9] fix FlagEmbedding version in docker --- xinference/deploy/docker/requirements.txt | 2 +- xinference/deploy/docker/requirements_cpu.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xinference/deploy/docker/requirements.txt b/xinference/deploy/docker/requirements.txt index a3aa0a5e93..c23ecfb18c 100644 --- a/xinference/deploy/docker/requirements.txt +++ b/xinference/deploy/docker/requirements.txt @@ -42,7 +42,7 @@ optimum attrdict # For deepseek VL timm>=0.9.16 # For deepseek VL torchvision # For deepseek VL -FlagEmbedding # For rerank +FlagEmbedding<1.3.0 # For rerank funasr omegaconf~=2.3.0 # For ChatTTS nemo_text_processing<1.1.0 # 1.1.0 requires pynini==2.1.6.post1 diff --git a/xinference/deploy/docker/requirements_cpu.txt b/xinference/deploy/docker/requirements_cpu.txt index 9eb9409b4f..4d843a1a42 100644 --- a/xinference/deploy/docker/requirements_cpu.txt +++ b/xinference/deploy/docker/requirements_cpu.txt @@ -30,7 +30,7 @@ protobuf einops tiktoken sentence-transformers>=3.1.0 -FlagEmbedding +FlagEmbedding<1.3.0 diffusers>=0.30.0 controlnet_aux orjson From cd744df42b5ae83204fe682d7806a434bcf32b17 Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Fri, 1 Nov 2024 11:33:44 +0800 Subject: [PATCH 7/9] revert FlagEmbedding issue --- .github/workflows/python.yaml | 2 +- xinference/deploy/docker/requirements.txt | 2 +- xinference/deploy/docker/requirements_cpu.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index b71567b268..eba28c50b6 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -133,7 +133,7 @@ jobs: pip install modelscope pip install diffusers pip install protobuf - pip install "FlagEmbedding<1.3.0" + pip install FlagEmbedding pip install "tenacity>=8.2.0,<8.4.0" pip install -e ".[dev]" pip install "jinja2==3.1.2" diff --git a/xinference/deploy/docker/requirements.txt b/xinference/deploy/docker/requirements.txt index c23ecfb18c..a3aa0a5e93 100644 --- a/xinference/deploy/docker/requirements.txt +++ b/xinference/deploy/docker/requirements.txt @@ -42,7 +42,7 @@ optimum attrdict # For deepseek VL timm>=0.9.16 # For deepseek VL torchvision # For deepseek VL -FlagEmbedding<1.3.0 # For rerank +FlagEmbedding # For rerank funasr omegaconf~=2.3.0 # For ChatTTS nemo_text_processing<1.1.0 # 1.1.0 requires pynini==2.1.6.post1 diff --git a/xinference/deploy/docker/requirements_cpu.txt b/xinference/deploy/docker/requirements_cpu.txt index 4d843a1a42..9eb9409b4f 100644 --- a/xinference/deploy/docker/requirements_cpu.txt +++ b/xinference/deploy/docker/requirements_cpu.txt @@ -30,7 +30,7 @@ protobuf einops tiktoken sentence-transformers>=3.1.0 -FlagEmbedding<1.3.0 +FlagEmbedding diffusers>=0.30.0 controlnet_aux orjson From fbd3da2e5e2ea3f864f322f14acf04f5d858a0ae Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Fri, 1 Nov 2024 13:14:12 +0800 Subject: [PATCH 8/9] revert docker --- xinference/deploy/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/deploy/docker/Dockerfile b/xinference/deploy/docker/Dockerfile index 5ee3f11771..3d6afc44c3 100644 --- a/xinference/deploy/docker/Dockerfile +++ b/xinference/deploy/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM vllm/vllm-openai:latest +FROM vllm/vllm-openai:v0.6.0 COPY . /opt/inference WORKDIR /opt/inference From 4e93a96877c7a9812b27f9e832934dc414d0a928 Mon Sep 17 00:00:00 2001 From: ChengjieLi Date: Fri, 1 Nov 2024 14:29:15 +0800 Subject: [PATCH 9/9] only run cb on GPU --- .github/workflows/python.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index eba28c50b6..3c4c9d91cd 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -180,6 +180,9 @@ jobs: ${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y "faster_whisper" ${{ env.SELF_HOST_PYTHON }} -m pip install -U accelerate ${{ env.SELF_HOST_PYTHON }} -m pip install -U verovio + ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ + --disable-warnings \ + --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/core/tests/test_continuous_batching.py && \ ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ -W ignore::PendingDeprecationWarning \ --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/image/tests/test_stable_diffusion.py && \ @@ -211,6 +214,6 @@ jobs: --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/client/tests/test_client.py pytest --timeout=1500 \ -W ignore::PendingDeprecationWarning \ - --cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/image/tests/test_got_ocr2.py --ignore xinference/model/audio/tests xinference + --cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/core/tests/test_continuous_batching.py --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/image/tests/test_got_ocr2.py --ignore xinference/model/audio/tests xinference fi working-directory: .