Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable models for NeurIPS Efficiency Challenge #1861

Merged
merged 7 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 48 additions & 8 deletions src/helm/benchmark/model_deployment_registry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Dict, Optional, List
from dataclasses import dataclass

Expand All @@ -6,32 +7,52 @@

from helm.common.hierarchical_logger import hlog
from helm.common.object_spec import ObjectSpec
from helm.proxy.models import ALL_MODELS, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, MODEL_NAME_TO_MODEL, TEXT_MODEL_TAG, Model


MODEL_DEPLOYMENTS_FILE = "model_deployments.yaml"


class ClientSpec(ObjectSpec):
pass


class WindowServiceSpec(ObjectSpec):
pass


@dataclass(frozen=True)
class ModelDeployment:
"""A model deployment is an accessible instance of this model (e.g. a hosted endpoint).

A model can have model deployments."""
A model can have multiple model deployments."""

name: str
"""Name of the model deployment."""

model_name: str
"""Name of the model that this model deployment is for."""

client_spec: ClientSpec
"""Specification for instantiating the client for this model deployment."""

max_sequence_length: Optional[int]
"""Maximum equence length for this model deployment."""
model_name: Optional[str] = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this moved down?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ordering should be that all the required parameters come first, then all the optional parameters with default arguments.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we should put an example in the docstring so people have a sense of what the difference between name and model_name is, etc.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going to defer this until we actually implement the multi-deployments feature, which isn't on the roadmap yet.

"""Name of the model that this model deployment is for.

If unset, defaults to the the same value as `name`."""

tokenizer_name: Optional[str]
"""Tokenizer for this model deployment."""
tokenizer_name: Optional[str] = None
"""Tokenizer for this model deployment.

If unset, auto-inferred by the WindowService."""

window_service_spec: Optional[WindowServiceSpec] = None
"""Specification for instantiating the window service for this model deplooyment"""

max_sequence_length: Optional[int] = None
"""Maximum sequence length for this model deployment."""

max_request_length: Optional[int] = None
"""Maximum request length for this model deployment.

If unset, defaults to the same value as max_sequence_length."""


@dataclass(frozen=True)
Expand All @@ -49,8 +70,27 @@ def register_model_deployments_from_path(path: str) -> None:
raw = yaml.safe_load(f)
model_deployments: ModelDeployments = cattrs.structure(raw, ModelDeployments)
for model_deployment in model_deployments.model_deployments:
hlog(f"Registered model deployment {model_deployment.name}")
_name_to_model_deployment[model_deployment.name] = model_deployment

# Auto-register a model with this name if none exists
model_name = model_deployment.model_name or model_deployment.name
if model_name not in MODEL_NAME_TO_MODEL:
model = Model(
group="none",
name=model_name,
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
)
MODEL_NAME_TO_MODEL[model_name] = model
ALL_MODELS.append(model)
hlog(f"Registered default metadata for model {model_name}")


def maybe_register_model_deployments_from_base_path(base_path: str) -> None:
path = os.path.join(base_path, MODEL_DEPLOYMENTS_FILE)
if os.path.exists(path):
register_model_deployments_from_path(path)


def get_model_deployment(name: str) -> Optional[ModelDeployment]:
return _name_to_model_deployment.get(name)
10 changes: 10 additions & 0 deletions src/helm/benchmark/model_metadata_registry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Optional, List
from dataclasses import dataclass, field
from datetime import date
Expand All @@ -8,6 +9,9 @@
from helm.proxy.models import ALL_MODELS, MODEL_NAME_TO_MODEL, Model


MODEL_METADATA_FILE = "model_metadata.yaml"


@dataclass(frozen=True)
class ModelMetadata:
name: str
Expand Down Expand Up @@ -58,3 +62,9 @@ def register_model_metadata_from_path(path: str) -> None:
)
MODEL_NAME_TO_MODEL[model_metadata.name] = model
ALL_MODELS.append(model)


def maybe_register_model_metadata_from_base_path(base_path: str) -> None:
path = os.path.join(base_path, MODEL_METADATA_FILE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docstring?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added docstring.

if os.path.exists(path):
register_model_metadata_from_path(path)
8 changes: 7 additions & 1 deletion src/helm/benchmark/run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2516,7 +2516,13 @@ def construct_run_specs(spec: ObjectSpec) -> List[RunSpec]:
]

def alter_run_spec(run_spec: RunSpec) -> RunSpec:
model = get_model(run_spec.adapter_spec.model)
try:
model = get_model(run_spec.adapter_spec.model)
except ValueError:
# Models registered from configs cannot have expanders applied to them,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ValueError means that the model has not been loaded yet? I was a bit confused by the comment at first, maybe connect the dots a bit more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It means the model has not been registered yet. I'll add more docs.

# because the models will not have been registered yet at this point.
# TODO: Figure out a cleaner way to deal with this.
return run_spec
# For models that strip newlines, when we're generating, we need to set
# the delimiter to be '###' so we stop properly.
if NO_NEWLINES_TAG in model.tags and run_spec.adapter_spec.method in (
Expand Down
12 changes: 8 additions & 4 deletions src/helm/benchmark/window_services/huggingface_window_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
from helm.proxy.clients.huggingface_tokenizer import HuggingFaceTokenizers
from .local_window_service import LocalWindowService
from .tokenizer_service import TokenizerService
from helm.proxy.clients.huggingface_client import HuggingFaceModelConfig


class HuggingFaceWindowService(LocalWindowService):
def __init__(
self, service: TokenizerService, model_config: HuggingFaceModelConfig, max_sequence_length: Optional[int] = None
self,
service: TokenizerService,
tokenizer_name: str,
max_sequence_length: Optional[int] = None,
max_reqeust_length: Optional[int] = None,
):
super().__init__(service)
self._tokenizer_name = model_config.model_id
self._tokenizer_name = tokenizer_name
tokenizer = HuggingFaceTokenizers.get_tokenizer(self._tokenizer_name)
self._prefix_token = tokenizer.bos_token
self._end_of_text_token = tokenizer.eos_token
Expand All @@ -22,6 +25,7 @@ def __init__(
self._max_sequence_length = max_sequence_length
else:
self._max_sequence_length = tokenizer.model_max_length
self._max_request_length = max_reqeust_length

@property
def max_sequence_length(self) -> int:
Expand All @@ -31,7 +35,7 @@ def max_sequence_length(self) -> int:
@property
def max_request_length(self) -> int:
"""Return the max request length of this tokenizer."""
return self.max_sequence_length
return self._max_request_length or self._max_sequence_length

@property
def end_of_text_token(self) -> str:
Expand Down
9 changes: 2 additions & 7 deletions src/helm/benchmark/window_services/llama_window_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from helm.proxy.clients.huggingface_client import HuggingFaceHubModelConfig
from helm.benchmark.window_services.huggingface_window_service import HuggingFaceWindowService
from helm.benchmark.window_services.tokenizer_service import TokenizerService

Expand All @@ -7,10 +6,7 @@ class LlamaWindowService(HuggingFaceWindowService):
def __init__(self, service: TokenizerService):
# Tokenizer name hf-internal-testing/llama-tokenizer is taken from:
# https://huggingface.co/docs/transformers/main/en/model_doc/llama#transformers.LlamaTokenizerFast.example
model_config = HuggingFaceHubModelConfig(
namespace="hf-internal-testing", model_name="llama-tokenizer", revision=None
)
super().__init__(service, model_config)
super().__init__(service, "hf-internal-testing/llama-tokenizer")


class Llama2WindowService(HuggingFaceWindowService):
Expand All @@ -25,8 +21,7 @@ class Llama2WindowService(HuggingFaceWindowService):
# meta-llama/Llama-2-70b-hf is not a local folder and is not a valid model identifier listed on
# 'https://huggingface.co/models'
def __init__(self, service: TokenizerService):
model_config = HuggingFaceHubModelConfig(namespace="meta-llama", model_name="Llama-2-7b-hf", revision=None)
super().__init__(service, model_config)
super().__init__(service, "meta-llama/Llama-2-7b-hf")

@property
def max_sequence_length(self) -> int:
Expand Down
38 changes: 21 additions & 17 deletions src/helm/benchmark/window_services/window_service_factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from helm.benchmark.model_deployment_registry import get_model_deployment
from helm.proxy.clients.huggingface_model_registry import HuggingFaceHubModelConfig
from helm.benchmark.model_deployment_registry import WindowServiceSpec, get_model_deployment
from helm.proxy.models import (
get_model,
get_model_names_with_tag,
Expand All @@ -20,6 +19,7 @@
from helm.benchmark.window_services.tokenizer_service import TokenizerService
from helm.proxy.clients.huggingface_client import get_huggingface_model_config
from helm.proxy.clients.remote_model_registry import get_remote_model
from helm.common.object_spec import create_object, inject_object_spec_args


class WindowServiceFactory:
Expand All @@ -40,25 +40,32 @@ def get_window_service(model_name: str, service: TokenizerService) -> WindowServ
# TODO: Migrate all window services to use use model deployments
model_deployment = get_model_deployment(model_name)
if model_deployment:
# TODO: Allow tokenizer name auto-inference in some cases.
if not model_deployment.tokenizer_name:
raise Exception("Tokenizer name must be set on model deplyment")
tokenizer_name = model_deployment.tokenizer_name
# Only use HuggingFaceWindowService for now.
# TODO: Allow using other window services.
window_service = HuggingFaceWindowService(
service=service,
model_config=HuggingFaceHubModelConfig.from_string(tokenizer_name),
max_sequence_length=model_deployment.max_sequence_length,
# If the model deployment specifies a WindowServiceSpec, instantiate it.
window_service_spec: WindowServiceSpec
if model_deployment.window_service_spec:
window_service_spec = model_deployment.window_service_spec
else:
window_service_spec = WindowServiceSpec(
class_name="helm.benchmark.window_services.default_window_service.DefaultWindowService", args={}
)
window_service_spec = inject_object_spec_args(
window_service_spec,
{
"service": service,
"tokenizer_name": model_deployment.tokenizer_name,
"max_sequence_length": model_deployment.max_sequence_length,
"max_request_length": model_deployment.max_request_length,
},
)
window_service = create_object(window_service_spec)
elif get_remote_model(model_name):
window_service = get_remote_window_service(service, model_name)
elif organization == "neurips":
from helm.benchmark.window_services.http_model_window_service import HTTPModelWindowServce

window_service = HTTPModelWindowServce(service)
elif huggingface_model_config:
window_service = HuggingFaceWindowService(service=service, model_config=huggingface_model_config)
window_service = HuggingFaceWindowService(service=service, tokenizer_name=huggingface_model_config.model_id)
elif organization == "openai":
from helm.benchmark.window_services.openai_window_service import OpenAIWindowService
from helm.benchmark.window_services.wider_openai_window_service import (
Expand Down Expand Up @@ -189,10 +196,7 @@ def get_window_service(model_name: str, service: TokenizerService) -> WindowServ
"tiiuae/falcon-40b",
"tiiuae/falcon-40b-instruct",
]:
window_service = HuggingFaceWindowService(
service=service,
model_config=HuggingFaceHubModelConfig(namespace="tiiuae", model_name="falcon-7b", revision=None),
)
window_service = HuggingFaceWindowService(service=service, tokenizer_name="tiiuae/falcon-7b")
elif model_name in [
"stabilityai/stablelm-base-alpha-3b",
"stabilityai/stablelm-base-alpha-7b",
Expand Down
41 changes: 37 additions & 4 deletions src/helm/common/object_spec.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import importlib
import dataclasses
from dataclasses import dataclass
from typing import Any, Dict, Optional, Tuple, Hashable, Type
import inspect
from typing import Any, Callable, Dict, Optional, Tuple, Hashable, Type, TypeVar


@dataclass(frozen=True)
Expand Down Expand Up @@ -32,13 +34,44 @@ def get_class_by_name(full_class_name: str) -> Type[Any]:
return getattr(importlib.import_module(module_name), class_name)


def create_object(spec: ObjectSpec, additional_args: Optional[Dict[str, Any]] = None):
ObjectSpecT = TypeVar("ObjectSpecT", bound=ObjectSpec)


def inject_object_spec_args(
spec: ObjectSpecT,
constant_bindings: Optional[Dict[str, Any]] = None,
provider_bindings: Optional[Dict[str, Callable[[], Any]]] = None,
) -> ObjectSpecT:
"""Return a new ObjectSpec that is a copy of the original ObjectSpec with additional arguments.

The original ObjectSpec may be missing arguments for parameters that are required by the
ObjectSpec's class's constructor.
This function returns a new ObjectSpec with these missing parameter filled in.
To do this, for every missing parameter, check look up each of the `*_bindings` arguments in order until we
find one with a key matching the missing parameter's name.
If found in constant_bindings, add the corresponding value to args.
If found in provider_bindings, call the corresponding value and add the return values to args.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you provide an example or two of usage?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added example.

This is loosely based on instance (constant) bindings and provider bindings in Guice dependency injection."""
cls = get_class_by_name(spec.class_name)
init_signature = inspect.signature(cls.__init__)
args = {}
args.update(spec.args)
for parameter_name in init_signature.parameters.keys():
if parameter_name == "self" or parameter_name in args:
continue
elif constant_bindings and parameter_name in constant_bindings:
args[parameter_name] = constant_bindings[parameter_name]
elif provider_bindings and parameter_name in provider_bindings:
args[parameter_name] = provider_bindings[parameter_name]()
return dataclasses.replace(spec, args=args)


def create_object(spec: ObjectSpec):
"""Create the actual object given the `spec`."""
cls = get_class_by_name(spec.class_name)
args = {}
args.update(spec.args)
if additional_args:
args.update(additional_args)
return cls(**args)


Expand Down
39 changes: 26 additions & 13 deletions src/helm/proxy/clients/auto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from retrying import RetryError, Attempt

from helm.benchmark.model_deployment_registry import get_model_deployment
from helm.benchmark.tokenizer_config_registry import get_tokenizer_config
from helm.common.cache import CacheConfig, MongoCacheConfig, SqliteCacheConfig
from helm.common.hierarchical_logger import hlog
from helm.common.object_spec import create_object
from helm.common.object_spec import create_object, inject_object_spec_args
from helm.common.request import Request, RequestResult
from helm.common.tokenization_request import (
TokenizationRequest,
Expand Down Expand Up @@ -70,18 +71,23 @@ def _get_client(self, model: str) -> Client:
# TODO: Migrate all clients to use model deployments
model_deployment = get_model_deployment(model)
if model_deployment:
api_key = None
if "deployments" not in self.credentials:
raise AuthenticationError("Could not find key 'deployments' in credentials.conf")
deployment_api_keys = self.credentials["deployments"]
if model not in deployment_api_keys:
raise AuthenticationError(
f"Could not find key '{model}' under key 'deployments' in credentials.conf"
)
api_key = deployment_api_keys[model]
client = create_object(
model_deployment.client_spec, additional_args={"cache_config": cache_config, "api_key": api_key}

def provide_api_key():
if "deployments" not in self.credentials:
raise AuthenticationError("Could not find key 'deployments' in credentials.conf")
deployment_api_keys = self.credentials["deployments"]
if model not in deployment_api_keys:
raise AuthenticationError(
f"Could not find key '{model}' under key 'deployments' in credentials.conf"
)
return deployment_api_keys[model]

client_spec = inject_object_spec_args(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you write some comments on why this injection is needed? My initial impression is that it seems a bit complicated / fancy...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a passage.

Dependency injection is needed here for these reasons:

  1. Different clients have different parameters. Dependency injection provides arguments that match the parameters of the client.
  2. Some arguments, such as the tokenizer, are not static data objects that can be in the users configuration file. Instead, they have to be constructed dynamically at runtime.
  3. The providers must be lazily-evaluated, because eager evaluation can result in an exception. For instance, some clients do not require an API key, so trying to fetch the API key from configuration eagerly will result in an exception because the user will not have configured an API key.

model_deployment.client_spec,
constant_bindings={"cache_config": cache_config},
provider_bindings={"api_key": provide_api_key},
)
client = create_object(client_spec)

elif get_huggingface_model_config(model):
from helm.proxy.clients.huggingface_client import HuggingFaceClient
Expand Down Expand Up @@ -211,7 +217,14 @@ def _get_tokenizer_client(self, tokenizer: str) -> Client:

if client is None:
cache_config: CacheConfig = self._build_cache_config(organization)
if get_huggingface_model_config(tokenizer):
# TODO: Migrate all clients to use tokenizer configs
tokenizer_config = get_tokenizer_config(tokenizer)
if tokenizer_config:
tokenizer_spec = inject_object_spec_args(
tokenizer_config.tokenizer_spec, constant_bindings={"cache_config": cache_config}
)
client = create_object(tokenizer_spec)
elif get_huggingface_model_config(tokenizer):
from helm.proxy.clients.huggingface_client import HuggingFaceClient

client = HuggingFaceClient(cache_config=cache_config)
Expand Down
Loading