Skip to content

Commit

Permalink
peft(models): improve implementation (#60)
Browse files Browse the repository at this point in the history
If you have a local Dolly-V2 version, please do `openllm prune`
  • Loading branch information
aarnphm authored Jun 24, 2023
1 parent 3d9cc93 commit 98328be
Show file tree
Hide file tree
Showing 21 changed files with 383 additions and 192 deletions.
8 changes: 8 additions & 0 deletions changelog.d/60.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Moved implementation of dolly-v2 and falcon serialization to save PreTrainedModel instead of pipeline.

Save dolly-v2 now save the actual model instead of the pipeline abstraction. If you have a Dolly-V2
model available locally, kindly ask you to do `openllm prune` to have the new implementation available.

Dolly-v2 and falcon nows implements some memory optimization to help with loading with lower resources system

Configuration removed field: 'use_pipeline'
14 changes: 10 additions & 4 deletions examples/bentoml-demo/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import bentoml
import openllm
from bentoml.io import Text


model = "dolly-v2"
Expand All @@ -25,7 +26,12 @@
svc = bentoml.Service(name="llm-service", runners=[llm_runner])


@svc.api(input=Text(), output=Text())
@svc.on_startup
def download(_: bentoml.Context):
llm_runner.llm.ensure_model_id_exists()


@svc.api(input=bentoml.io.Text(), output=bentoml.io.Text())
async def prompt(input_text: str) -> str:
answer = await llm_runner.generate(input_text)
return answer
answer = await llm_runner.generate.async_run(input_text)
return answer[0]["generated_text"]
7 changes: 2 additions & 5 deletions examples/langchain-chains-demo/bentofile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@

service: "service:svc"
include:
- "*.py"
- "*.py"
python:
packages:
- openllm
- langchain
- pydantic
requirements_txt: ./requirements.txt
13 changes: 0 additions & 13 deletions examples/langchain-chains-demo/download_model.py

This file was deleted.

4 changes: 4 additions & 0 deletions examples/langchain-chains-demo/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
openllm
langchain>=0.0.212
pydantic
BeautifulSoup4
32 changes: 23 additions & 9 deletions examples/langchain-chains-demo/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any
from typing import Dict
from __future__ import annotations

import subprocess
import sys
import typing as t

from langchain.chains import LLMChain
from langchain.llms import OpenLLM
Expand All @@ -28,15 +31,20 @@
class Query(BaseModel):
industry: str
product_name: str
keywords: list[str]
llm_config: Dict[str, Any]
keywords: t.List[str]
llm_config: t.Dict[str, t.Any]


llm = OpenLLM(
model_name="dolly-v2",
model_id="databricks/dolly-v2-7b",
embedded=False,
)
def gen_llm(model_name: str, model_id: str | None = None) -> OpenLLM:
args = [sys.executable, "-m", "openllm", "download", model_name]
if model_id:
args += ["--model-id", model_id]
subprocess.check_output(args)
return OpenLLM(model_name=model_name, model_id=model_id, embedded=False)


llm = gen_llm("dolly-v2", model_id="databricks/dolly-v2-7b")

prompt = PromptTemplate(
input_variables=["industry", "product_name", "keywords"],
template="""
Expand All @@ -57,6 +65,12 @@ class Query(BaseModel):

svc = bentoml.Service("fb-ads-copy", runners=[llm.runner])


@svc.on_startup
def download(_: bentoml.Context):
llm.runner.llm.ensure_model_id_exists()


SAMPLE_INPUT = Query(
industry="SAAS",
product_name="BentoML",
Expand Down
13 changes: 0 additions & 13 deletions examples/langchain-tools-demo/download_model.py

This file was deleted.

3 changes: 1 addition & 2 deletions examples/langchain-tools-demo/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@
model_id="databricks/dolly-v2-7b",
embedded=False,
)
llm = OpenLLM(model_name="dolly-v2", embedded=False)
tools = load_tools(["serpapi"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
svc = bentoml.Service("langchain-openllm", runners=[llm.runner])


@svc.api(input=Text.from_sample(SAMPLE_INPUT), output=Text())
@svc.api(input=Text.from_sample(sample=SAMPLE_INPUT), output=Text())
def chat(input_text: str):
return agent.run(input_text)
36 changes: 17 additions & 19 deletions src/openllm/_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class GenerationConfig:
from .exceptions import ForbiddenAttributeError
from .utils import ENV_VARS_TRUE_VALUES
from .utils import LazyType
from .utils import ReprMixin
from .utils import bentoml_cattr
from .utils import codegen
from .utils import dantic
Expand Down Expand Up @@ -110,7 +111,6 @@ class GenerationConfig:
import peft
from attr import _CountingAttr # type: ignore
from attr import _make_init # type: ignore
from attr import _make_repr # type: ignore
from attr import _transform_attrs # type: ignore
from attr._compat import set_closure_cell

Expand All @@ -136,7 +136,6 @@ class GenerationConfig:
from attr._compat import set_closure_cell
from attr._make import _CountingAttr
from attr._make import _make_init
from attr._make import _make_repr
from attr._make import _transform_attrs

transformers = openllm.utils.LazyLoader("transformers", globals(), "transformers")
Expand Down Expand Up @@ -652,7 +651,6 @@ class ModelSettings(t.TypedDict, total=False):
requirements: t.Optional[ListStr]

# llm implementation specifics
use_pipeline: bool
bettertransformer: bool
model_type: t.Literal["causal_lm", "seq2seq_lm"]
runtime: t.Literal["transformers", "cpp"]
Expand Down Expand Up @@ -712,7 +710,6 @@ def default(cls) -> _ModelSettingsAttr:
name_type="dasherize",
requires_gpu=False,
url="",
use_pipeline=False,
model_type="causal_lm",
trust_remote_code=False,
requirements=None,
Expand Down Expand Up @@ -988,13 +985,6 @@ def __attrs_init__(self, **attrs: t.Any):
"""The default PyPI requirements needed to run this given LLM. By default, we will depend on
bentoml, torch, transformers."""

__openllm_use_pipeline__: bool = Field(False)
"""Whether this LLM will use HuggingFace Pipeline API. By default, this is set to False.
The reason for this to be here is because we want to access this object before loading
the _bentomodel. This is because we will actually download the model weights when accessing
_bentomodel.
"""

__openllm_bettertransformer__: bool = Field(False)
"""Whether to use BetterTransformer for this given LLM. This depends per model
architecture. By default, we will use BetterTransformer for T5 and StableLM models,
Expand Down Expand Up @@ -1214,7 +1204,9 @@ def build_class(self) -> type[LLMConfig]:
for base_cls in self._cls.__mro__[1:-1]:
if base_cls.__dict__.get("__weakref__", None) is not None:
weakref_inherited = True
existing_slots.update({name: getattr(base_cls, name, codegen._sentinel) for name in getattr(base_cls, "__slots__", [])})
existing_slots.update(
{name: getattr(base_cls, name, codegen._sentinel) for name in getattr(base_cls, "__slots__", [])}
)

base_names = set(self._base_names)
names = self._attr_names
Expand Down Expand Up @@ -1295,8 +1287,11 @@ def add_attrs_init(self) -> t.Self:
)
return self

def add_repr(self, ns: str | None):
self._cls_dict["__repr__"] = codegen.add_method_dunders(self._cls, _make_repr(self._attrs, ns, self._cls))
def add_repr(self):
for key, fn in ReprMixin.__dict__.items():
if key not in ("__module__", "__doc__", "__repr_keys__"):
self._cls_dict[key] = codegen.add_method_dunders(self._cls, fn)
self._cls_dict["__repr_keys__"] = property(lambda _: {i.name for i in self._attrs})
return self

def __init_subclass__(cls: type[LLMConfig]):
Expand Down Expand Up @@ -1384,7 +1379,7 @@ def __init_subclass__(cls: type[LLMConfig]):
type=GenerationConfig,
)

cls = cls._ConfigBuilder(cls, model_name, these).add_attrs_init().add_repr(None).build_class()
cls = cls._ConfigBuilder(cls, model_name, these).add_attrs_init().add_repr().build_class()
# auto assignment attributes generated from __config__ after create the new slot class.
_make_assignment_script(cls, bentoml_cattr.structure(cls, _ModelSettingsAttr))(cls)

Expand Down Expand Up @@ -1426,14 +1421,19 @@ def __init__(
if generation_config is None:
generation_config = {k: v for k, v in attrs.items() if k in _generation_cl_dict}
else:
generation_config = config_merger.merge(generation_config, {k: v for k, v in attrs.items() if k in _generation_cl_dict})
generation_config = config_merger.merge(
generation_config, {k: v for k, v in attrs.items() if k in _generation_cl_dict}
)

for k in _cached_keys:
if k in generation_config or attrs.get(k) is None:
del attrs[k]
_cached_keys = tuple(k for k in _cached_keys if k in attrs)

self.__openllm_extras__ = config_merger.merge( first_not_none(__openllm_extras__, default={}), {k: v for k, v in attrs.items() if k not in self.__openllm_accepted_keys__})
self.__openllm_extras__ = config_merger.merge(
first_not_none(__openllm_extras__, default={}),
{k: v for k, v in attrs.items() if k not in self.__openllm_accepted_keys__},
)

for k in _cached_keys:
if k in self.__openllm_extras__:
Expand Down Expand Up @@ -1464,8 +1464,6 @@ def __getitem__(self, item: t.Literal["service_name"] = ...) -> str: ...
@overload
def __getitem__(self, item: t.Literal["requirements"] = ...) -> t.Optional[ListStr]: ...
@overload
def __getitem__(self, item: t.Literal["use_pipeline"] = ...) -> bool: ...
@overload
def __getitem__(self, item: t.Literal["bettertransformer"] = ...) -> bool: ...
@overload
def __getitem__(self, item: t.Literal["model_type"] = ...) -> t.Literal['causal_lm', 'seq2seq_lm']: ...
Expand Down
Loading

0 comments on commit 98328be

Please sign in to comment.