diff --git a/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_BLOOMZ_ENZeroShot.py b/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_BLOOMZ_ENZeroShot.py
new file mode 100644
index 00000000..c3add3ff
--- /dev/null
+++ b/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_BLOOMZ_ENZeroShot.py
@@ -0,0 +1,52 @@
+import os
+
+from arabic_llm_benchmark.datasets import CheckworthinessDataset
+from arabic_llm_benchmark.models import BLOOMPetalModel
+from arabic_llm_benchmark.tasks import CheckworthinessTask
+
+
+def config():
+    return {
+        "dataset": CheckworthinessDataset,
+        "dataset_args": {},
+        "task": CheckworthinessTask,
+        "task_args": {},
+        "model": BLOOMPetalModel,
+        "model_args": {
+            "api_url": os.environ["API_URL"],
+            "class_labels": ["0", "1"],
+            "max_tries": 3,
+        },
+        "general_args": {
+            "data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv"
+        },
+    }
+
+
+def prompt(input_sample):
+    return {
+        "prompt": "Classify the tweet as checkworthy or not checkworthy. Provide only label.\n\n"
+        + "tweet: "
+        + input_sample
+        + "label: \n"
+    }
+
+
+def post_process(response):
+    label = response["outputs"].strip().lower()
+    label = label.replace("<s>", "")
+    label = label.replace("</s>", "")
+
+    label_fixed = None
+
+    if label == "checkworthy":
+        label_fixed = "1"
+    elif (
+        label == "Not_checkworthy."
+        or label == "not_checkworthy"
+        or label == "not checkworthy"
+        or label.lower() == "no"
+    ):
+        label_fixed = "0"
+
+    return label_fixed
diff --git a/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENFewShot.py b/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENFewShot.py
new file mode 100644
index 00000000..57c61e5a
--- /dev/null
+++ b/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENFewShot.py
@@ -0,0 +1,93 @@
+import os
+import re
+
+from arabic_llm_benchmark.datasets import CheckworthinessDataset
+from arabic_llm_benchmark.models import GPTChatCompletionModel
+from arabic_llm_benchmark.tasks import CheckworthinessTask
+
+
+def config():
+    return {
+        "dataset": CheckworthinessDataset,
+        "dataset_args": {},
+        "task": CheckworthinessTask,
+        "task_args": {},
+        "model": GPTChatCompletionModel,
+        "model_args": {
+            "api_type": "azure",
+            "api_version": "2023-03-15-preview",
+            "api_base": os.environ["AZURE_API_URL"],
+            "api_key": os.environ["AZURE_API_KEY"],
+            "engine_name": os.environ["ENGINE_NAME"],
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {
+            "data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv",
+            "fewshot": {
+                "train_data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_train.tsv",
+            },
+        },
+    }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+    out_prompt = out_prompt + "Here are some examples:\n\n"
+    for index, example in enumerate(examples):
+        label = "no" if example["label"] == "0" else "yes"
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(example["input_id"])
+            + ":"
+            + "\n"
+            + "tweet: "
+            + example["input"]
+            + "\nlabel: "
+            + label
+            + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def prompt(input_sample, examples):
+    base_prompt = f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy. Provide only label.'
+    return [
+        {
+            "role": "system",
+            "content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.",
+        },
+        {
+            "role": "user",
+            "content": few_shot_prompt(input_sample, base_prompt, examples),
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("label:", "").strip()
+
+    if "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+
+    if label == "checkworthy" or label == "Checkworthy":
+        label_fixed = "1"
+    elif label == "Not_checkworthy." or label == "not_checkworthy":
+        label_fixed = "0"
+    elif "not_checkworthy" in label or "label: not_checkworthy" in label:
+        label_fixed = "0"
+    elif "checkworthy" in label or "label: checkworthy" in label:
+        label_fixed = "1"
+    else:
+        label_fixed = None
+
+    return label_fixed
diff --git a/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENZeroShot.py b/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENZeroShot.py
new file mode 100644
index 00000000..75800b2e
--- /dev/null
+++ b/assets/benchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENZeroShot.py
@@ -0,0 +1,69 @@
+import os
+import re
+
+from arabic_llm_benchmark.datasets import CheckworthinessDataset
+from arabic_llm_benchmark.models import GPTChatCompletionModel
+from arabic_llm_benchmark.tasks import CheckworthinessTask
+
+
+def config():
+    return {
+        "dataset": CheckworthinessDataset,
+        "dataset_args": {},
+        "task": CheckworthinessTask,
+        "task_args": {},
+        "model": GPTChatCompletionModel,
+        "model_args": {
+            "api_type": "azure",
+            "api_version": "2023-03-15-preview",
+            "api_base": os.environ["AZURE_API_URL"],
+            "api_key": os.environ["AZURE_API_KEY"],
+            "engine_name": os.environ["ENGINE_NAME"],
+            "class_labels": ["0", "1"],
+            "max_tries": 30,
+        },
+        "general_args": {
+            "data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv"
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_string = (
+        f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy\n\n'
+        f"tweet: {input_sample}\n"
+        f"label: \n"
+    )
+    return [
+        {
+            "role": "system",
+            "content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.",
+        },
+        {
+            "role": "user",
+            "content": prompt_string,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]
+
+    label = label.replace("label:", "").strip()
+
+    if "label: " in label:
+        arr = label.split("label: ")
+        label = arr[1].strip()
+
+    if label == "checkworthy" or label == "Checkworthy":
+        label_fixed = "1"
+    elif label == "Not_checkworthy." or label == "not_checkworthy":
+        label_fixed = "0"
+    elif "not_checkworthy" in label or "label: not_checkworthy" in label:
+        label_fixed = "0"
+    elif "checkworthy" in label or "label: checkworthy" in label:
+        label_fixed = "1"
+    else:
+        label_fixed = None
+
+    return label_fixed