Add new assets lang: English, task: checkworthy, data: CLEF-22 (#169)

* lang: English, task: checkworthy, data: CLEF-22 * Improve BLOOM asset postprocessing * Remove spurious imports --------- Co-authored-by: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
qcri · Aug 22, 2023 · fbd3800 · fbd3800
1 parent 598999a
commit fbd3800
Show file tree

Hide file tree

Showing 3 changed files with 214 additions and 0 deletions.
diff --git a/...nchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_BLOOMZ_ENZeroShot.py b/...nchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_BLOOMZ_ENZeroShot.py
@@ -0,0 +1,52 @@
+import os
+
+from arabic_llm_benchmark.datasets import CheckworthinessDataset
+from arabic_llm_benchmark.models import BLOOMPetalModel
+from arabic_llm_benchmark.tasks import CheckworthinessTask
+
+
+def config():
+ return {
+ "dataset": CheckworthinessDataset,
+ "dataset_args": {},
+ "task": CheckworthinessTask,
+ "task_args": {},
+ "model": BLOOMPetalModel,
+ "model_args": {
+ "api_url": os.environ["API_URL"],
+ "class_labels": ["0", "1"],
+ "max_tries": 3,
+ },
+ "general_args": {
+ "data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv"
+ },
+ }
+
+
+def prompt(input_sample):
+ return {
+ "prompt": "Classify the tweet as checkworthy or not checkworthy. Provide only label.\n\n"
+ + "tweet: "
+ + input_sample
+ + "label: \n"
+ }
+
+
+def post_process(response):
+ label = response["outputs"].strip().lower()
+ label = label.replace("<s>", "")
+ label = label.replace("</s>", "")
+
+ label_fixed = None
+
+ if label == "checkworthy":
+ label_fixed = "1"
+ elif (
+ label == "Not_checkworthy."
+ or label == "not_checkworthy"
+ or label == "not checkworthy"
+ or label.lower() == "no"
+ ):
+ label_fixed = "0"
+
+ return label_fixed
diff --git a/.../factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENFewShot.py b/.../factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENFewShot.py
@@ -0,0 +1,93 @@
+import os
+import re
+
+from arabic_llm_benchmark.datasets import CheckworthinessDataset
+from arabic_llm_benchmark.models import GPTChatCompletionModel
+from arabic_llm_benchmark.tasks import CheckworthinessTask
+
+
+def config():
+ return {
+ "dataset": CheckworthinessDataset,
+ "dataset_args": {},
+ "task": CheckworthinessTask,
+ "task_args": {},
+ "model": GPTChatCompletionModel,
+ "model_args": {
+ "api_type": "azure",
+ "api_version": "2023-03-15-preview",
+ "api_base": os.environ["AZURE_API_URL"],
+ "api_key": os.environ["AZURE_API_KEY"],
+ "engine_name": os.environ["ENGINE_NAME"],
+ "class_labels": ["0", "1"],
+ "max_tries": 30,
+ },
+ "general_args": {
+ "data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv",
+ "fewshot": {
+ "train_data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_train.tsv",
+ },
+ },
+ }
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+ out_prompt = base_prompt + "\n"
+ out_prompt = out_prompt + "Here are some examples:\n\n"
+ for index, example in enumerate(examples):
+ label = "no" if example["label"] == "0" else "yes"
+
+ out_prompt = (
+ out_prompt
+ + "Example "
+ + str(example["input_id"])
+ + ":"
+ + "\n"
+ + "tweet: "
+ + example["input"]
+ + "\nlabel: "
+ + label
+ + "\n\n"
+ )
+
+ # Append the sentence we want the model to predict for but leave the Label blank
+ out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"
+
+ return out_prompt
+
+
+def prompt(input_sample, examples):
+ base_prompt = f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy. Provide only label.'
+ return [
+ {
+ "role": "system",
+ "content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.",
+ },
+ {
+ "role": "user",
+ "content": few_shot_prompt(input_sample, base_prompt, examples),
+ },
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("label:", "").strip()
+
+ if "label: " in label:
+ arr = label.split("label: ")
+ label = arr[1].strip()
+
+ if label == "checkworthy" or label == "Checkworthy":
+ label_fixed = "1"
+ elif label == "Not_checkworthy." or label == "not_checkworthy":
+ label_fixed = "0"
+ elif "not_checkworthy" in label or "label: not_checkworthy" in label:
+ label_fixed = "0"
+ elif "checkworthy" in label or "label: checkworthy" in label:
+ label_fixed = "1"
+ else:
+ label_fixed = None
+
+ return label_fixed
diff --git a/...factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENZeroShot.py b/...factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENZeroShot.py
@@ -0,0 +1,69 @@
+import os
+import re
+
+from arabic_llm_benchmark.datasets import CheckworthinessDataset
+from arabic_llm_benchmark.models import GPTChatCompletionModel
+from arabic_llm_benchmark.tasks import CheckworthinessTask
+
+
+def config():
+ return {
+ "dataset": CheckworthinessDataset,
+ "dataset_args": {},
+ "task": CheckworthinessTask,
+ "task_args": {},
+ "model": GPTChatCompletionModel,
+ "model_args": {
+ "api_type": "azure",
+ "api_version": "2023-03-15-preview",
+ "api_base": os.environ["AZURE_API_URL"],
+ "api_key": os.environ["AZURE_API_KEY"],
+ "engine_name": os.environ["ENGINE_NAME"],
+ "class_labels": ["0", "1"],
+ "max_tries": 30,
+ },
+ "general_args": {
+ "data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv"
+ },
+ }
+
+
+def prompt(input_sample):
+ prompt_string = (
+ f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy\n\n'
+ f"tweet: {input_sample}\n"
+ f"label: \n"
+ )
+ return [
+ {
+ "role": "system",
+ "content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.",
+ },
+ {
+ "role": "user",
+ "content": prompt_string,
+ },
+ ]
+
+
+def post_process(response):
+ label = response["choices"][0]["message"]["content"]
+
+ label = label.replace("label:", "").strip()
+
+ if "label: " in label:
+ arr = label.split("label: ")
+ label = arr[1].strip()
+
+ if label == "checkworthy" or label == "Checkworthy":
+ label_fixed = "1"
+ elif label == "Not_checkworthy." or label == "not_checkworthy":
+ label_fixed = "0"
+ elif "not_checkworthy" in label or "label: not_checkworthy" in label:
+ label_fixed = "0"
+ elif "checkworthy" in label or "label: checkworthy" in label:
+ label_fixed = "1"
+ else:
+ label_fixed = None
+
+ return label_fixed