diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py
new file mode 100755
index 00000000..6d0fd69d
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Arabic.py
@@ -0,0 +1,94 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+ "scores": {"Macro-F1": "0.832"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": OpenAIModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 30,
+ },
+ "general_args": {
+ "fewshot": {
+ "deduplicate": True,
+ },
+ },
+ }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+ out_prompt = base_prompt + "\n\n"
+ out_prompt = out_prompt + "اليك بعض الامثلة:\n\n"
+ for index, example in enumerate(examples):
+ label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"
+
+ out_prompt = (
+ out_prompt
+ + "مثال "
+ + str(index)
+ + ":"
+ + "\n"
+ + "التغريدة: "
+ + example["input"]
+ + "\التصنيف: "
+ + label
+ + "\n\n"
+ )
+
+ # Append the sentence we want the model to predict for but leave the Label blank
+ out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+ return out_prompt
+
+
+def prompt(input_sample, examples):
+ base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير."
+ return [
+ {
+ "role": "system",
+ "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+ },
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, base_prompt, examples),
+ },
+ ]
+
+
+def post_process(response):
+ if not response:
+ return None
+
+ label = response["choices"][0]["message"]["content"]
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ or "not_adult" in label
+ or "not adult" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py
new file mode 100755
index 00000000..7027e5a6
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_English.py
@@ -0,0 +1,91 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+ "scores": {"Macro-F1": "0.832"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": OpenAIModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 30,
+ },
+ "general_args": {
+ "fewshot": {
+ "deduplicate": True,
+ },
+ },
+ }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+ out_prompt = base_prompt + "\n\n"
+ out_prompt = out_prompt + "Here are some examples:\n\n"
+ for index, example in enumerate(examples):
+ out_prompt = (
+ out_prompt
+ + "Example "
+ + str(index)
+ + ":"
+ + "\n"
+ + "tweet: "
+ + example["input"]
+ + "\nlabel: "
+ + example["label"]
+ + "\n\n"
+ )
+
+ # Append the sentence we want the model to predict for but leave the Label blank
+ out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+ return out_prompt
+
+
+def prompt(input_sample, examples):
+ base_prompt = f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.'
+ return [
+ {
+ "role": "system",
+ "content": "You are an expert annotator, you can identify and label adult content within a tweet.",
+ },
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, base_prompt, examples),
+ },
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ or "not_adult" in label
+ or "not adult" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py
new file mode 100755
index 00000000..fd0ba527
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_FewShot_Mixed.py
@@ -0,0 +1,95 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+ "scores": {"Macro-F1": "0.832"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": OpenAIModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 30,
+ },
+ "general_args": {
+ "fewshot": {
+ "deduplicate": True,
+ },
+ },
+ }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+ out_prompt = base_prompt + "\n\n"
+ out_prompt = out_prompt + "اليك بعض الامثلة:\n\n"
+ for index, example in enumerate(examples):
+ out_prompt = (
+ out_prompt
+ + "مثال "
+ + str(index)
+ + ":"
+ + "\n"
+ + "التغريدة: "
+ + example["input"]
+ + "\التصنيف: "
+ + example["label"]
+ + "\n\n"
+ )
+
+ # Append the sentence we want the model to predict for but leave the Label blank
+ out_prompt = out_prompt + "التغريدة: " + input_sample + "\التصنيف: \n"
+
+ return out_prompt
+
+
+def prompt(input_sample, examples):
+ base_prompt = f'بالنظر إلى التغريدة التالية، صنفها على أنها "ADULT" أو "NOT_ADULT" بناء على محتوى التغريدة'
+ return [
+ {
+ "role": "system",
+ "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+ },
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, base_prompt, examples),
+ },
+ ]
+
+
+def post_process(response):
+ if not response:
+ return None
+
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ or "not_adult" in label
+ or "not adult" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py
new file mode 100755
index 00000000..11f70753
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Arabic.py
@@ -0,0 +1,68 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+ "scores": {"Macro-F1": "0.727"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": OpenAIModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ prompt_string = (
+ "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+ + "التغريدة: "
+ + input_sample
+ + "\nالتصنيف: "
+ )
+ return [
+ {
+ "role": "system",
+ "content": "أنت محلل خبير، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+ },
+ {
+ "role": "user",
+ "content": prompt_string,
+ },
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ or "not_adult" in label
+ or "not adult" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py
new file mode 100755
index 00000000..9c1aef26
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_English.py
@@ -0,0 +1,68 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+ "scores": {"Macro-F1": "0.727"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": OpenAIModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ prompt_string = (
+ # f'Classify whether the following Arabic sentence is appropriate for children. If not appropriate, just write "ADULT" without explaination, and if appropriate, just write "NOT_ADULT" without explaination \n {input_sample}'
+ f'Does the following tweet contain adult content? label it as "ADULT" if it does, or "NOT_ADULT" otherwise. Return label only.\n\n'
+ f"tweet: {input_sample}\n"
+ f"label: \n"
+ )
+ return [
+ {
+ "role": "system",
+ "content": "You are an expert annotator, you can identify and label adult content within a tweet.",
+ },
+ {
+ "role": "user",
+ "content": prompt_string,
+ },
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ or "not_adult" in label
+ or "not adult" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py
new file mode 100755
index 00000000..7b8ba099
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_GPT4_ZeroShot_Mixed.py
@@ -0,0 +1,70 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+ "scores": {"Macro-F1": "0.727"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": OpenAIModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ prompt_string = (
+ f'هل تحتوي التغريدة التالية على لغة للبالغين؟ صنفها كـ "ADULT" أو "NOT_ADULT" بناءً على محتوى التغريدة.\n\n'
+ f"التغريدة: {input_sample}\n"
+ f"التصنيف: \n"
+ )
+ return [
+ {
+ "role": "system",
+ "content": "أنت خبير في تحليل وسائل التواصل، يمكنك تحديد وتصنيف المحتوى الخاص بالبالغين في التغريدة.",
+ },
+ {
+ "role": "user",
+ "content": prompt_string,
+ },
+ ]
+
+
+def post_process(response):
+ out = response["choices"][0]["message"]["content"].replace("label: ", "")
+ j = out.find(".")
+ if j > 0:
+ out = out[0:j]
+ label = out.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ or "not_adult" in label
+ or "not adult" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py
new file mode 100755
index 00000000..f59f2c61
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Arabic.py
@@ -0,0 +1,98 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "تقنيات اللغة العربية، QCRI، HBKU",
+ "model": "LLama 3 8b",
+ "description": "نشر على Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": FastChatModel,
+ "model_args": {
+ "class_labels": ["للبالغين", "ليس للبالغين"],
+ "max_tries": 3,
+ },
+ }
+
+
+def few_shot_prompt(input_sample, examples):
+ base_prompt = (
+ "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. "
+ "في حالة لغة البالغين، اكتب فقط 'للبالغين' بدون تفسير، وفي حالة عدم احتوائها على لغة البالغين، اكتب فقط 'ليست للبالغين' بدون تفسير:\n\n"
+ )
+ for index, example in enumerate(examples):
+ label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"
+ base_prompt += (
+ f"مثال {index + 1}:\n"
+ f"التغريدة: '{example['input']}'\n"
+ f"التصنيف: {label}\n\n"
+ )
+ base_prompt += (
+ f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" f"التصنيف: "
+ )
+ return base_prompt
+
+
+def prompt(input_sample, examples):
+ return [
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, examples),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "cannot classify" in label
+ or "cannot complete" in label
+ or "لا يمكنني" in label
+ or "cannot fulfill" in label
+ ):
+ return None
+ elif (
+ "غير مناسب للبالغين" in label
+ or "غير مناسبة للبالغين" in label
+ or "مناسب للجمهور العام" in label
+ ):
+ return "NOT_ADULT"
+ elif "غير مناسب للنشر العام" in label:
+ return "ADULT"
+ elif "not_adult" in label or "not adult" in label:
+ return "NOT_ADULT"
+ elif (
+ "التغريدة مناسبة للبالغين" in label
+ or "المحتوى مناسب للبالغين" in label
+ or "للبالغين" in label
+ or "نعم" in label
+ or "adult" in label
+ ):
+ return "ADULT"
+ elif (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" == label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py
new file mode 100755
index 00000000..320659e5
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_English.py
@@ -0,0 +1,89 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": FastChatModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def few_shot_prompt(input_sample, examples):
+ base_prompt = "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label.\n\n"
+ for index, example in enumerate(examples):
+ label = "adult" if example["label"] == "ADULT" else "not adult"
+ base_prompt += (
+ f"Example {index + 1}:\n"
+ f"Tweet: '{example['input']}'\n"
+ f"Label: {label}\n\n"
+ )
+ base_prompt += f"Now classify the new tweet:\nTweet: '{input_sample}'\n" f"Label: "
+ return base_prompt
+
+
+def prompt(input_sample, examples):
+ return [
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, examples),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+ label = label.lower()
+
+ if (
+ "cannot classify" in label
+ or "cannot complete" in label
+ or "لا يمكنني" in label
+ or "cannot fulfill" in label
+ ):
+ return None
+ elif (
+ "غير مناسب للبالغين" in label
+ or "غير مناسبة للبالغين" in label
+ or "مناسب للجمهور العام" in label
+ ):
+ return "NOT_ADULT"
+ elif "غير مناسب للنشر العام" in label:
+ return "ADULT"
+ elif "not_adult" in label or "not adult" in label:
+ return "NOT_ADULT"
+ elif (
+ "التغريدة مناسبة للبالغين" in label
+ or "المحتوى مناسب للبالغين" in label
+ or "للبالغين" in label
+ or "نعم" in label
+ or "adult" in label
+ ):
+ return "ADULT"
+ elif (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" == label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py
new file mode 100755
index 00000000..adece4ee
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_FewShot_Mixed.py
@@ -0,0 +1,92 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": FastChatModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def few_shot_prompt(input_sample, examples):
+ base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+ for index, example in enumerate(examples):
+ label = "adult" if example["label"] == "ADULT" else "not adult"
+ base_prompt += (
+ f"مثال {index + 1}:\n"
+ f"التغريدة: '{example['input']}'\n"
+ f"التصنيف: {label}\n\n"
+ )
+ base_prompt += f"صنف الآن التغريدة الجديدة:\nجملة: '{input_sample}'\n" f"التصنيف: "
+ return base_prompt
+
+
+def prompt(input_sample, examples):
+ return [
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, examples),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "cannot classify" in label
+ or "cannot complete" in label
+ or "لا يمكنني" in label
+ or "cannot fulfill" in label
+ ):
+ return None
+ elif (
+ "غير مناسب للبالغين" in label
+ or "غير مناسبة للبالغين" in label
+ or "مناسب للجمهور العام" in label
+ ):
+ return "NOT_ADULT"
+ elif "غير مناسب للنشر العام" in label:
+ return "ADULT"
+ elif "not_adult" in label or "not adult" in label:
+ return "NOT_ADULT"
+ elif (
+ "التغريدة مناسبة للبالغين" in label
+ or "المحتوى مناسب للبالغين" in label
+ or "للبالغين" in label
+ or "نعم" in label
+ or "adult" in label
+ ):
+ return "ADULT"
+ elif (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" == label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py
new file mode 100755
index 00000000..7fad3348
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Arabic.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": FastChatModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ return [
+ {
+ "role": "user",
+ "content": (
+ "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+ + "التغريدة: "
+ + input_sample
+ + "\nالتصنيف: "
+ ),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "cannot classify" in label
+ or "cannot complete" in label
+ or "لا يمكنني" in label
+ or "cannot fulfill" in label
+ ):
+ return None
+ elif (
+ "غير مناسب للبالغين" in label
+ or "غير مناسبة للبالغين" in label
+ or "مناسب للجمهور العام" in label
+ ):
+ return "NOT_ADULT"
+ elif "غير مناسب للنشر العام" in label:
+ return "ADULT"
+ elif "not_adult" in label or "not adult" in label:
+ return "NOT_ADULT"
+ elif (
+ "التغريدة مناسبة للبالغين" in label
+ or "المحتوى مناسب للبالغين" in label
+ or "للبالغين" in label
+ or "نعم" in label
+ or "adult" in label
+ ):
+ return "ADULT"
+ elif (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" == label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py
new file mode 100755
index 00000000..50fe32b7
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_English.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": FastChatModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ return [
+ {
+ "role": "user",
+ "content": (
+ "Classify the following Arabic sentence as adult language (the language used in adult advertisement and porno advertisement) or not adult language without illustration. "
+ "In case of adult language, just write 'adult' without explanation, and in case of not adult language, just write 'not adult' without explanation:\n\n"
+ + "Sentence: "
+ + input_sample
+ + "\nLabel: "
+ ),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("", "").replace("", "")
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+ label = label.lower()
+
+ if (
+ "cannot classify" in label
+ or "cannot complete" in label
+ or "لا يمكنني" in label
+ or "cannot fulfill" in label
+ ):
+ return None
+ elif (
+ "غير مناسب للبالغين" in label
+ or "غير مناسبة للبالغين" in label
+ or "مناسب للجمهور العام" in label
+ ):
+ return "NOT_ADULT"
+ elif "غير مناسب للنشر العام" in label:
+ return "ADULT"
+ elif "not_adult" in label or "not adult" in label:
+ return "NOT_ADULT"
+ elif (
+ "التغريدة مناسبة للبالغين" in label
+ or "المحتوى مناسب للبالغين" in label
+ or "للبالغين" in label
+ or "نعم" in label
+ or "adult" in label
+ ):
+ return "ADULT"
+ elif (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" == label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py
new file mode 100755
index 00000000..f974234b
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_JAIS13b_ZeroShot_Mixed.py
@@ -0,0 +1,85 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import FastChatModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": FastChatModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ return [
+ {
+ "role": "user",
+ "content": (
+ "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+ + "تغريدة: "
+ + input_sample
+ + "\nالتصنيف: "
+ ),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "cannot classify" in label
+ or "cannot complete" in label
+ or "لا يمكنني" in label
+ or "cannot fulfill" in label
+ ):
+ return None
+ elif (
+ "غير مناسب للبالغين" in label
+ or "غير مناسبة للبالغين" in label
+ or "مناسب للجمهور العام" in label
+ ):
+ return "NOT_ADULT"
+ elif "غير مناسب للنشر العام" in label:
+ return "ADULT"
+ elif "not_adult" in label or "not adult" in label:
+ return "NOT_ADULT"
+ elif (
+ "التغريدة مناسبة للبالغين" in label
+ or "المحتوى مناسب للبالغين" in label
+ or "للبالغين" in label
+ or "نعم" in label
+ or "adult" in label
+ ):
+ return "ADULT"
+ elif (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" == label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_Arabic.py
new file mode 100755
index 00000000..d95e887c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_Arabic.py
@@ -0,0 +1,77 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": AzureModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def few_shot_prompt(input_sample, examples):
+ base_prompt = (
+ "صنف التغريدة العربية التالية على أنها تحتوي على لغة للبالغين (اللغة المستخدمة في إعلانات البالغين والإعلانات الإباحية) أو لا تحتوي على لغة للبالغين بدون توضيح. "
+ "في حالة لغة البالغين، اكتب فقط 'للبالغين' بدون تفسير، وفي حالة عدم احتوائها على لغة البالغين، اكتب فقط 'ليست للبالغين' بدون تفسير:\n\n"
+ )
+ for index, example in enumerate(examples):
+ label = "للبالغين" if example["label"] == "ADULT" else "ليست للبالغين"
+ base_prompt += (
+ f"مثال {index + 1}:\n"
+ f"التغريدة: '{example['input']}'\n"
+ f"التصنيف: {label}\n\n"
+ )
+ base_prompt += (
+ f"صنف الآن التغريدة الجديدة:\nالتغريدة: '{input_sample}'\n" f"التصنيف: "
+ )
+ return base_prompt
+
+
+def prompt(input_sample, examples):
+ return [
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, examples),
+ }
+ ]
+
+
+def post_process(response):
+ # if not response or 'error' in response or 'output' not in response:
+ # print("Error or missing output in response:", response)
+ # return "NOT_ADULT" # Safely default to NOT_ADULT when unsure
+
+ label = response["output"].strip().lower()
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_English.py
new file mode 100755
index 00000000..7fa0d4e9
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_English.py
@@ -0,0 +1,90 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "gpt-4-32k (version 0314)",
+ "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+ "scores": {"Macro-F1": "0.832"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": AzureModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 30,
+ },
+ "general_args": {
+ "fewshot": {
+ "deduplicate": True,
+ },
+ },
+ }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+ out_prompt = base_prompt + "\n\n"
+ out_prompt = out_prompt + "Here are some examples:\n\n"
+ for index, example in enumerate(examples):
+ out_prompt = (
+ out_prompt
+ + "Example "
+ + str(index)
+ + ":"
+ + "\n"
+ + "tweet: "
+ + example["input"]
+ + "\nlabel: "
+ + example["label"]
+ + "\n\n"
+ )
+
+ # Append the sentence we want the model to predict for but leave the Label blank
+ out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+ return out_prompt
+
+
+def prompt(input_sample, examples):
+ base_prompt = "Given the following tweet, label it as 'ADULT' or 'NOT_ADULT' based on the content of the tweet. Provide only label."
+ return [
+ {
+ "role": "user",
+ "content": (few_shot_prompt(input_sample, base_prompt, examples)),
+ }
+ ]
+
+
+def post_process(response):
+
+ if not response or "error" in response or "output" not in response:
+ print("Error or missing output in response:", response)
+ return "NOT_ADULT" # Safely default to NOT_ADULT when unsure
+
+ label = response["output"].strip().lower()
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_Mixed.py
new file mode 100755
index 00000000..581a31b0
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_FewShot_Mixed.py
@@ -0,0 +1,70 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": AzureModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def few_shot_prompt(input_sample, examples):
+ base_prompt = "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+ for index, example in enumerate(examples):
+ label = "adult" if example["label"] == "ADULT" else "not adult"
+ base_prompt += (
+ f"مثال {index + 1}:\n"
+ f"جملة: '{example['input']}'\n"
+ f"التصنيف: {label}\n\n"
+ )
+ base_prompt += f"صنف الآن الجملة الجديدة:\nجملة: '{input_sample}'\n" f"التصنيف: "
+ return base_prompt
+
+
+def prompt(input_sample, examples):
+ return [
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, examples),
+ }
+ ]
+
+
+def post_process(response):
+
+ label = response["output"].strip().lower()
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_Arabic.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_Arabic.py
new file mode 100755
index 00000000..c312569f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_Arabic.py
@@ -0,0 +1,65 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": AzureModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ return [
+ {
+ "role": "user",
+ "content": (
+ "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'للبالغين' أو 'ليست للبالغين' بدون أي تفسير.\n\n"
+ + "تغريدة: "
+ + input_sample
+ + "\nالتصنيف: "
+ ),
+ }
+ ]
+
+
+def post_process(response):
+ # if not response or 'error' in response or 'output' not in response:
+ # print("Error or missing output in response:", response)
+ # return "No Response"
+
+ label = response["output"].strip().lower()
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_English.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_English.py
new file mode 100755
index 00000000..81a94f70
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_English.py
@@ -0,0 +1,66 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.66081"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": AzureModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ return [
+ {
+ "role": "user",
+ "content": (
+ "Classify the following Arabic sentence as adult language (the language used in adult advertisement and porno advertisement) or not adult language without illustration. "
+ "In case of adult language, just write 'adult' without explanation, and in case of not adult language, just write 'not adult' without explanation:\n\n"
+ + "Sentence: "
+ + input_sample
+ + "\nLabel: "
+ ),
+ }
+ ]
+
+
+def post_process(response):
+ # if not response or 'error' in response or 'output' not in response:
+ # print("Error or missing output in response:", response)
+ # return "NOT_ADULT" # Safely default to NOT_ADULT when unsure
+
+ label = response["output"].strip().lower()
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+
+ label = label.replace("", "").replace("", "")
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+ return "ADULT"
+ else:
+ return None
diff --git a/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_Mixed.py b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_Mixed.py
new file mode 100755
index 00000000..c1cc7f5a
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/adult_content_detection/Adult_llama3_ZeroShot_Mixed.py
@@ -0,0 +1,61 @@
+from llmebench.datasets import AdultDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import AdultTask
+
+
+def metadata():
+ return {
+ "author": "Arabic Language Technologies, QCRI, HBKU",
+ "model": "LLama 3 8b",
+ "description": "Deployed on Azure.",
+ "scores": {"Macro-F1": "0.3731"},
+ }
+
+
+def config():
+ return {
+ "dataset": AdultDataset,
+ "task": AdultTask,
+ "model": AzureModel,
+ "model_args": {
+ "class_labels": ["ADULT", "NOT_ADULT"],
+ "max_tries": 3,
+ },
+ }
+
+
+def prompt(input_sample):
+ return [
+ {
+ "role": "user",
+ "content": (
+ "هل تحتوي التغريدة التالية على لغة للبالغين؟ اجب 'adult' أو 'not adult' بدون أي تفسير.\n\n"
+ + "تغريدة: "
+ + input_sample
+ + "\nالتصنيف: "
+ ),
+ }
+ ]
+
+
+def post_process(response):
+ label = response["output"].strip().lower()
+ label = label.replace("", "").replace("", "").lower()
+
+ label = label.replace("التصنيف:", "").strip()
+ label = label.replace("label:", "").strip()
+ label = label.lower()
+
+ if (
+ "ليس" in label
+ or "ليست" in label
+ or "not" in label
+ or "no" in label
+ or "غير" in label
+ or "لا" in label
+ ):
+ return "NOT_ADULT"
+ elif "للبالغين" in label or "نعم" in label or "adult" in label or "بالغين" in label:
+ return "ADULT"
+ else:
+ return None