Skip to content

Commit

Permalink
ENH: qwen switch to llama cpp (xorbitsai#778)
Browse files Browse the repository at this point in the history
  • Loading branch information
codingl2k1 authored and Bojun-Feng committed Dec 27, 2023
1 parent 164a9cc commit 9ae71f6
Show file tree
Hide file tree
Showing 12 changed files with 44 additions and 582 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
MODULE: ${{ matrix.module }}
if: ${{ matrix.module != 'gpu' }}
run: |
pip install "llama-cpp-python>=0.2.0,<0.2.12"
pip install "llama-cpp-python>=0.2.23"
pip install transformers
pip install torch
pip install accelerate
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ dev =
all =
chatglm-cpp>=0.3.0
ctransformers
llama-cpp-python>=0.2.0
llama-cpp-python>=0.2.23
transformers>=4.34.1
torch
accelerate>=0.20.3
Expand All @@ -91,7 +91,7 @@ all =
auto-gptq ; sys_platform!='darwin'
optimum
ggml =
llama-cpp-python>=0.2.0
llama-cpp-python>=0.2.23
ctransformers
chatglm-cpp>=0.3.0
transformers =
Expand Down
1 change: 1 addition & 0 deletions xinference/core/tests/test_restful_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ def test_restful_api_for_gorilla_openfunctions_tool_calls(
"model_format, quantization",
[
("pytorch", None),
("ggufv2", "Q4_K_M"),
],
)
@pytest.mark.skip(reason="Cost too many resources.")
Expand Down
6 changes: 0 additions & 6 deletions xinference/model/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def _install():
from .ggml.chatglm import ChatglmCppChatModel
from .ggml.ctransformers import CtransformersModel
from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
from .ggml.qwen import QWenModel
from .pytorch.baichuan import BaichuanPytorchChatModel
from .pytorch.chatglm import ChatglmPytorchChatModel
from .pytorch.core import PytorchChatModel, PytorchModel
Expand All @@ -61,11 +60,6 @@ def _install():
ChatglmCppChatModel,
]
)
LLM_CLASSES.extend(
[
QWenModel,
]
)
LLM_CLASSES.extend(
[
CtransformersModel,
Expand Down
13 changes: 11 additions & 2 deletions xinference/model/llm/ggml/llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import datetime
import logging
import os
from typing import Iterator, List, Optional, Union
from typing import Iterable, Iterator, List, Optional, Union

from ....types import (
ChatCompletion,
Expand Down Expand Up @@ -272,7 +272,6 @@ def match(
return False
if (
"chatglm" in llm_family.model_name
or "qwen" in llm_family.model_name
or llm_family.model_name in CTRANSFORMERS_SUPPORTED_MODEL
):
return False
Expand Down Expand Up @@ -306,6 +305,16 @@ def chat(
full_prompt = self.get_prompt(prompt, chat_history, prompt_style, tools=tools)

generate_config = self._sanitize_generate_config(generate_config)
# TODO(codingl2k1): qwen hacky to set stop for function call.
if tools and self.model_family.model_name == "qwen-chat":
stop = generate_config.get("stop")
if isinstance(stop, str):
generate_config["stop"] = [stop, "Observation:"]
elif isinstance(stop, Iterable):
assert not isinstance(stop, str)
generate_config["stop"] = stop + ["Observation:"]
else:
generate_config["stop"] = "Observation:"

stream = generate_config.get("stream", False)
if stream:
Expand Down
291 changes: 0 additions & 291 deletions xinference/model/llm/ggml/qwen.py

This file was deleted.

Loading

0 comments on commit 9ae71f6

Please sign in to comment.