diff --git a/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Sonnet_ZeroShot_en.py b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Sonnet_ZeroShot_en.py new file mode 100644 index 00000000..71662c9b --- /dev/null +++ b/assets/ar/factuality_disinformation_harmful_content/subjectivity/ThatiARSubjectivity_Sonnet_ZeroShot_en.py @@ -0,0 +1,63 @@ +import json + +from llmebench.datasets import ThatiARDataset +from llmebench.models import AnthropicModel +from llmebench.tasks import SubjectivityTask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "claude-3-5-sonnet-20240620", + "description": "Anthropic model - claude-3-5-sonnet. Find more https://www.anthropic.com/news/claude-3-5-sonnet", + "scores": {}, + } + + +def config(): + system_msg = "AI assistant specialized in classifying news article sentences into subjective or objective. A subjective sentence expresses personal opinions, feelings, or beliefs, while an objective sentence presents facts, data, or unbiased information." + return { + "dataset": ThatiARDataset, + "task": SubjectivityTask, + "model": AnthropicModel, + "model_args": { + "class_labels": ["SUBJ", "OBJ"], + "max_tries": 30, + "system": system_msg, + }, + } + + +def prompt(input_sample): + + prompt = f""" + Classify the following Arabic 'sentence' as subjective or objective. Provide only the label. + Provide your response in the following JSON format: {{"label": "your label"}}. + Please provide JSON output only. No additional text. + + sentence: {input_sample} + """ + return [ + { + "role": "user", + "content": prompt, + }, + ] + + +def post_process(response): + data = response["content"][0]["text"].lower() + data = json.loads(data) + label = data["label"] + if "label: objective" in label: + label_fixed = "OBJ" + elif "label: subjective" in label: + label_fixed = "SUBJ" + elif label == "objective" or label == "objective.": + label_fixed = "OBJ" + elif label == "subjective" or label == "subjective.": + label_fixed = "SUBJ" + else: + label_fixed = None + + return label_fixed diff --git a/llmebench/models/Anthropic.py b/llmebench/models/Anthropic.py new file mode 100644 index 00000000..a5436de8 --- /dev/null +++ b/llmebench/models/Anthropic.py @@ -0,0 +1,130 @@ +import json +import logging +import os + +import anthropic + +from llmebench.models.model_base import ModelBase + + +class AnthropicFailure(Exception): + """Exception class to map various failure types from the AzureModel server""" + + def __init__(self, failure_type, failure_message): + self.type_mapping = { + "processing": "Model Inference failure", + "connection": "Failed to connect to the API endpoint", + } + self.type = failure_type + self.failure_message = failure_message + + def __str__(self): + return ( + f"{self.type_mapping.get(self.type, self.type)}: \n {self.failure_message}" + ) + + +class AnthropicModel(ModelBase): + """ + Anthropic Model interface. + + Arguments + --------- + api_url : EMPTY + timeout : int + Number of seconds before the request to the server is timed out + temperature : float + Temperature value to use for the model. Defaults to zero for reproducibility. + top_p : float + Top P value to use for the model. Defaults to 0.95 + max_tokens : int + Maximum number of tokens to pass to the model. Defaults to 1512 + """ + + def __init__( + self, + api_base=None, + api_key=None, + model_name=None, + timeout=20, + temperature=0, + top_p=0.95, + max_tokens=2000, + **kwargs, + ): + # API parameters + self.api_base = api_base or os.getenv("ANTHROPIC_API_URL") + self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY") + self.model_name = model_name or os.getenv("ANTHROPIC_MODEL") + + # Parameters + self.api_timeout = timeout + tolerance = 1e-7 + self.temperature = temperature + if self.temperature < tolerance: + # Currently, the model inference fails if temperature + # is exactly 0, so we nudge it slightly to work around + # the issue + self.temperature += tolerance + self.top_p = top_p + self.max_tokens = max_tokens + + if self.api_key is None: + raise Exception( + "API key must be provided as model config or environment variable (`ANTHROPIC_API_KEY`)" + ) + if self.model_name is None: + raise Exception( + "Model name must be provided as model config or environment variable (`ANTHROPIC_MODEL`)" + ) + self.model = self.model_name + # GPT parameters + self.model_params = {} + self.model_params["system"] = ( + kwargs.get("system_msg") + if "system_msg" in kwargs and kwargs["system_msg"] + else "You are an expert AI assistant" + ) + self.model_params["temperature"] = temperature + self.model_params["top_p"] = top_p + self.model_params["max_tokens"] = max_tokens + self.client = anthropic.Anthropic(api_key=self.api_key) + + super(AnthropicModel, self).__init__( + retry_exceptions=(TimeoutError, AnthropicFailure), **kwargs + ) + + def summarize_response(self, response): + """Returns the first reply from the "assistant", if available""" + if ( + "choices" in response + and isinstance(response["choices"], list) + and len(response["choices"]) > 0 + and "message" in response["choices"][0] + and "content" in response["choices"][0]["message"] + and response["choices"][0]["message"]["role"] == "assistant" + ): + return response["choices"][0]["message"]["content"] + + return response + + def prompt(self, processed_input): + """ + AnthropicModel API Implementation + + Arguments + --------- + processed_input : dictionary + Must be a dictionary with one key "prompt", the value of which + must be a string. + + Returns + ------- + response : AnthropicModel API response + """ + + response = self.client.messages.create( + model=self.model, messages=processed_input, **self.model_params + ) + response = json.loads(response.json()) + return response diff --git a/llmebench/models/__init__.py b/llmebench/models/__init__.py index 92e44a30..16b0524a 100644 --- a/llmebench/models/__init__.py +++ b/llmebench/models/__init__.py @@ -1,3 +1,4 @@ +from .Anthropic import AnthropicModel from .AzureModel import AzureModel from .FastChat import FastChatModel from .HuggingFaceInferenceAPI import HuggingFaceInferenceAPIModel, HuggingFaceTaskTypes diff --git a/setup.cfg b/setup.cfg index eb8df2cc..97e1dc82 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,6 +23,7 @@ install_requires = datasets==2.14.6 nltk==3.8.1 openai==1.35.10 + anthropic==0.31.2 pandas==2.0.2 pooch==1.7.0 python-dotenv==1.0.0