Skip to content

Commit

Permalink
Add new assets lang: English, task: checkworthy, data: CLEF-22 (#169)
Browse files Browse the repository at this point in the history
* lang: English, task: checkworthy, data: CLEF-22

* Improve BLOOM asset postprocessing

* Remove spurious imports

---------

Co-authored-by: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
  • Loading branch information
firojalam and fdalvi authored Aug 22, 2023
1 parent 598999a commit fbd3800
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os

from arabic_llm_benchmark.datasets import CheckworthinessDataset
from arabic_llm_benchmark.models import BLOOMPetalModel
from arabic_llm_benchmark.tasks import CheckworthinessTask


def config():
return {
"dataset": CheckworthinessDataset,
"dataset_args": {},
"task": CheckworthinessTask,
"task_args": {},
"model": BLOOMPetalModel,
"model_args": {
"api_url": os.environ["API_URL"],
"class_labels": ["0", "1"],
"max_tries": 3,
},
"general_args": {
"data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv"
},
}


def prompt(input_sample):
return {
"prompt": "Classify the tweet as checkworthy or not checkworthy. Provide only label.\n\n"
+ "tweet: "
+ input_sample
+ "label: \n"
}


def post_process(response):
label = response["outputs"].strip().lower()
label = label.replace("<s>", "")
label = label.replace("</s>", "")

label_fixed = None

if label == "checkworthy":
label_fixed = "1"
elif (
label == "Not_checkworthy."
or label == "not_checkworthy"
or label == "not checkworthy"
or label.lower() == "no"
):
label_fixed = "0"

return label_fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import os
import re

from arabic_llm_benchmark.datasets import CheckworthinessDataset
from arabic_llm_benchmark.models import GPTChatCompletionModel
from arabic_llm_benchmark.tasks import CheckworthinessTask


def config():
return {
"dataset": CheckworthinessDataset,
"dataset_args": {},
"task": CheckworthinessTask,
"task_args": {},
"model": GPTChatCompletionModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": ["0", "1"],
"max_tries": 30,
},
"general_args": {
"data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv",
"fewshot": {
"train_data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_train.tsv",
},
},
}


def few_shot_prompt(input_sample, base_prompt, examples):
out_prompt = base_prompt + "\n"
out_prompt = out_prompt + "Here are some examples:\n\n"
for index, example in enumerate(examples):
label = "no" if example["label"] == "0" else "yes"

out_prompt = (
out_prompt
+ "Example "
+ str(example["input_id"])
+ ":"
+ "\n"
+ "tweet: "
+ example["input"]
+ "\nlabel: "
+ label
+ "\n\n"
)

# Append the sentence we want the model to predict for but leave the Label blank
out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n"

return out_prompt


def prompt(input_sample, examples):
base_prompt = f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy. Provide only label.'
return [
{
"role": "system",
"content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.",
},
{
"role": "user",
"content": few_shot_prompt(input_sample, base_prompt, examples),
},
]


def post_process(response):
label = response["choices"][0]["message"]["content"]

label = label.replace("label:", "").strip()

if "label: " in label:
arr = label.split("label: ")
label = arr[1].strip()

if label == "checkworthy" or label == "Checkworthy":
label_fixed = "1"
elif label == "Not_checkworthy." or label == "not_checkworthy":
label_fixed = "0"
elif "not_checkworthy" in label or "label: not_checkworthy" in label:
label_fixed = "0"
elif "checkworthy" in label or "label: checkworthy" in label:
label_fixed = "1"
else:
label_fixed = None

return label_fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os
import re

from arabic_llm_benchmark.datasets import CheckworthinessDataset
from arabic_llm_benchmark.models import GPTChatCompletionModel
from arabic_llm_benchmark.tasks import CheckworthinessTask


def config():
return {
"dataset": CheckworthinessDataset,
"dataset_args": {},
"task": CheckworthinessTask,
"task_args": {},
"model": GPTChatCompletionModel,
"model_args": {
"api_type": "azure",
"api_version": "2023-03-15-preview",
"api_base": os.environ["AZURE_API_URL"],
"api_key": os.environ["AZURE_API_KEY"],
"engine_name": os.environ["ENGINE_NAME"],
"class_labels": ["0", "1"],
"max_tries": 30,
},
"general_args": {
"data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv"
},
}


def prompt(input_sample):
prompt_string = (
f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy\n\n'
f"tweet: {input_sample}\n"
f"label: \n"
)
return [
{
"role": "system",
"content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.",
},
{
"role": "user",
"content": prompt_string,
},
]


def post_process(response):
label = response["choices"][0]["message"]["content"]

label = label.replace("label:", "").strip()

if "label: " in label:
arr = label.split("label: ")
label = arr[1].strip()

if label == "checkworthy" or label == "Checkworthy":
label_fixed = "1"
elif label == "Not_checkworthy." or label == "not_checkworthy":
label_fixed = "0"
elif "not_checkworthy" in label or "label: not_checkworthy" in label:
label_fixed = "0"
elif "checkworthy" in label or "label: checkworthy" in label:
label_fixed = "1"
else:
label_fixed = None

return label_fixed

0 comments on commit fbd3800

Please sign in to comment.