Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

anthropic model (Sonet tested) and a asset file added #323

Merged
merged 5 commits into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json

from llmebench.datasets import ThatiARDataset
from llmebench.models import AnthropicModel
from llmebench.tasks import SubjectivityTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "claude-3-5-sonnet-20240620",
"description": "Anthropic model - claude-3-5-sonnet. Find more https://www.anthropic.com/news/claude-3-5-sonnet",
"scores": {},
}


def config():
system_msg = "AI assistant specialized in classifying news article sentences into subjective or objective. A subjective sentence expresses personal opinions, feelings, or beliefs, while an objective sentence presents facts, data, or unbiased information."
return {
"dataset": ThatiARDataset,
"task": SubjectivityTask,
"model": AnthropicModel,
"model_args": {
"class_labels": ["SUBJ", "OBJ"],
"max_tries": 30,
"system": system_msg,
},
}


def prompt(input_sample):

prompt = f"""
Classify the following Arabic 'sentence' as subjective or objective. Provide only the label.
Provide your response in the following JSON format: {{"label": "your label"}}.
Please provide JSON output only. No additional text.

sentence: {input_sample}
"""
return [
{
"role": "user",
"content": prompt,
},
]


def post_process(response):
data = response["content"][0]["text"].lower()
data = json.loads(data)
label = data["label"]
if "label: objective" in label:
label_fixed = "OBJ"
elif "label: subjective" in label:
label_fixed = "SUBJ"
elif label == "objective" or label == "objective.":
label_fixed = "OBJ"
elif label == "subjective" or label == "subjective.":
label_fixed = "SUBJ"
else:
label_fixed = None

return label_fixed
130 changes: 130 additions & 0 deletions llmebench/models/Anthropic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import json
import logging
import os

import anthropic

from llmebench.models.model_base import ModelBase


class AnthropicFailure(Exception):
"""Exception class to map various failure types from the AzureModel server"""

def __init__(self, failure_type, failure_message):
self.type_mapping = {
"processing": "Model Inference failure",
"connection": "Failed to connect to the API endpoint",
}
self.type = failure_type
self.failure_message = failure_message

def __str__(self):
return (
f"{self.type_mapping.get(self.type, self.type)}: \n {self.failure_message}"
)


class AnthropicModel(ModelBase):
"""
Anthropic Model interface.

Arguments
---------
api_url : EMPTY
timeout : int
Number of seconds before the request to the server is timed out
temperature : float
Temperature value to use for the model. Defaults to zero for reproducibility.
top_p : float
Top P value to use for the model. Defaults to 0.95
max_tokens : int
Maximum number of tokens to pass to the model. Defaults to 1512
"""

def __init__(
self,
api_base=None,
api_key=None,
model_name=None,
timeout=20,
temperature=0,
top_p=0.95,
max_tokens=2000,
**kwargs,
):
# API parameters
self.api_base = api_base or os.getenv("ANTHROPIC_API_URL")
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
self.model_name = model_name or os.getenv("ANTHROPIC_MODEL")

# Parameters
self.api_timeout = timeout
tolerance = 1e-7
self.temperature = temperature
if self.temperature < tolerance:
# Currently, the model inference fails if temperature
# is exactly 0, so we nudge it slightly to work around
# the issue
self.temperature += tolerance
self.top_p = top_p
self.max_tokens = max_tokens

if self.api_key is None:
raise Exception(
"API key must be provided as model config or environment variable (`ANTHROPIC_API_KEY`)"
)
if self.model_name is None:
raise Exception(
"Model name must be provided as model config or environment variable (`ANTHROPIC_MODEL`)"
)
self.model = self.model_name
# GPT parameters
self.model_params = {}
self.model_params["system"] = (
kwargs.get("system_msg")
if "system_msg" in kwargs and kwargs["system_msg"]
else "You are an expert AI assistant"
)
self.model_params["temperature"] = temperature
self.model_params["top_p"] = top_p
self.model_params["max_tokens"] = max_tokens
self.client = anthropic.Anthropic(api_key=self.api_key)

super(AnthropicModel, self).__init__(
retry_exceptions=(TimeoutError, AnthropicFailure), **kwargs
)

def summarize_response(self, response):
"""Returns the first reply from the "assistant", if available"""
if (
"choices" in response
and isinstance(response["choices"], list)
and len(response["choices"]) > 0
and "message" in response["choices"][0]
and "content" in response["choices"][0]["message"]
and response["choices"][0]["message"]["role"] == "assistant"
):
return response["choices"][0]["message"]["content"]

return response

def prompt(self, processed_input):
"""
AnthropicModel API Implementation

Arguments
---------
processed_input : dictionary
Must be a dictionary with one key "prompt", the value of which
must be a string.

Returns
-------
response : AnthropicModel API response
"""

response = self.client.messages.create(
model=self.model, messages=processed_input, **self.model_params
)
response = json.loads(response.json())
return response
1 change: 1 addition & 0 deletions llmebench/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .Anthropic import AnthropicModel
from .AzureModel import AzureModel
from .FastChat import FastChatModel
from .HuggingFaceInferenceAPI import HuggingFaceInferenceAPIModel, HuggingFaceTaskTypes
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ install_requires =
datasets==2.14.6
nltk==3.8.1
openai==1.35.10
anthropic==0.31.2
pandas==2.0.2
pooch==1.7.0
python-dotenv==1.0.0
Expand Down
Loading