Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Adult scripts for multiple models and shot configurations #364

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from llmebench.datasets import AdultDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import AdultTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"Macro-F1": "0.832"},
}


def config():
return {
"dataset": AdultDataset,
"task": AdultTask,
"model": OpenAIModel,
"model_args": {
"class_labels": ["ADULT", "NOT_ADULT"],
"max_tries": 30,
},
"general_args": {
"fewshot": {
"deduplicate": True,
},
},
}


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt + "\n\n"
out_prompt = out_prompt + "اليك بعض الامثلة:\n\n"
for index, example in enumerate(examples):
label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"

out_prompt = (
out_prompt
+ "مثال "
+ str(index)
+ ":"
+ "\n"
+ "التغريدة: "
+ example["input"]
+ "\التصنيف: "
+ label
+ "\n\n"
)

# Append the sentence we want the model to predict for but leave the Label blank
out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"

return out_prompt


def prompt(input_sample, examples):
base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير."
return [
{
"role": "system",
"content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
},
{
"role": "user",
"content": few_shot_prompt(input_sample, base_prompt, examples),
},
]


def post_process(response):
if not response:
return None

label = response["choices"][0]["message"]["content"]
label = label.replace("التصنيف:", "").strip()
label = label.replace("label:", "").strip()
label = label.lower()

if (
"ليس" in label
or "ليست" in label
or "not" in label
or "no" in label
or "غير" in label
or "لا" in label
or "not_adult" in label
or "not adult" in label
):
return "NOT_ADULT"
elif "للبالغين" in label or "نعم" in label or "adult" in label:
return "ADULT"
else:
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from llmebench.datasets import AdultDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import AdultTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"Macro-F1": "0.832"},
}


def config():
return {
"dataset": AdultDataset,
"task": AdultTask,
"model": OpenAIModel,
"model_args": {
"class_labels": ["ADULT", "NOT_ADULT"],
"max_tries": 30,
},
"general_args": {
"fewshot": {
"deduplicate": True,
},
},
}


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt + "\n\n"
out_prompt = out_prompt + "Here are some examples:\n\n"
for index, example in enumerate(examples):
out_prompt = (
out_prompt
+ "Example "
+ str(index)
+ ":"
+ "\n"
+ "tweet: "
+ example["input"]
+ "\nlabel: "
+ example["label"]
+ "\n\n"
)

# Append the sentence we want the model to predict for but leave the Label blank
out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"

return out_prompt


def prompt(input_sample, examples):
base_prompt = f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.'
return [
{
"role": "system",
"content": "You are an expert annotator, you can identify and label adult content within a tweet.",
},
{
"role": "user",
"content": few_shot_prompt(input_sample, base_prompt, examples),
},
]


def post_process(response):
label = response["choices"][0]["message"]["content"]
label = label.replace("التصنيف:", "").strip()
label = label.replace("label:", "").strip()

label = label.replace("<s>", "").replace("</s>", "")
label = label.lower()

if (
"ليس" in label
or "ليست" in label
or "not" in label
or "no" in label
or "غير" in label
or "لا" in label
or "not_adult" in label
or "not adult" in label
):
return "NOT_ADULT"
elif "للبالغين" in label or "نعم" in label or "adult" in label:
return "ADULT"
else:
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from llmebench.datasets import AdultDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import AdultTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
"scores": {"Macro-F1": "0.832"},
}


def config():
return {
"dataset": AdultDataset,
"task": AdultTask,
"model": OpenAIModel,
"model_args": {
"class_labels": ["ADULT", "NOT_ADULT"],
"max_tries": 30,
},
"general_args": {
"fewshot": {
"deduplicate": True,
},
},
}


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt + "\n\n"
out_prompt = out_prompt + "اليك بعض الامثلة:\n\n"
for index, example in enumerate(examples):
out_prompt = (
out_prompt
+ "مثال "
+ str(index)
+ ":"
+ "\n"
+ "التغريدة: "
+ example["input"]
+ "\التصنيف: "
+ example["label"]
+ "\n\n"
)

# Append the sentence we want the model to predict for but leave the Label blank
out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"

return out_prompt


def prompt(input_sample, examples):
base_prompt = f'بالنظر إلى التغريدة التالية، صنفها على أنها "ADULT" أو "NOT_ADULT" بناء على محتوى التغريدة'
return [
{
"role": "system",
"content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
},
{
"role": "user",
"content": few_shot_prompt(input_sample, base_prompt, examples),
},
]


def post_process(response):
if not response:
return None

label = response["choices"][0]["message"]["content"]

label = label.replace("التصنيف:", "").strip()
label = label.replace("label:", "").strip()

label = label.replace("<s>", "").replace("</s>", "")
label = label.lower()

if (
"ليس" in label
or "ليست" in label
or "not" in label
or "no" in label
or "غير" in label
or "لا" in label
or "not_adult" in label
or "not adult" in label
):
return "NOT_ADULT"
elif "للبالغين" in label or "نعم" in label or "adult" in label:
return "ADULT"
else:
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from llmebench.datasets import AdultDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import AdultTask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "gpt-4-32k (version 0314)",
"description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
"scores": {"Macro-F1": "0.727"},
}


def config():
return {
"dataset": AdultDataset,
"task": AdultTask,
"model": OpenAIModel,
"model_args": {
"class_labels": ["ADULT", "NOT_ADULT"],
"max_tries": 3,
},
}


def prompt(input_sample):
prompt_string = (
"هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+ "التغريدة: "
+ input_sample
+ "\nالتصنيف: "
)
return [
{
"role": "system",
"content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
},
{
"role": "user",
"content": prompt_string,
},
]


def post_process(response):
label = response["choices"][0]["message"]["content"]
label = label.replace("التصنيف:", "").strip()
label = label.replace("label:", "").strip()

label = label.replace("<s>", "").replace("</s>", "")
label = label.lower()

if (
"ليس" in label
or "ليست" in label
or "not" in label
or "no" in label
or "غير" in label
or "لا" in label
or "not_adult" in label
or "not adult" in label
):
return "NOT_ADULT"
elif "للبالغين" in label or "نعم" in label or "adult" in label:
return "ADULT"
else:
return None
Loading