-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new assets lang: English, task: checkworthy, data: CLEF-22 (#169)
* lang: English, task: checkworthy, data: CLEF-22 * Improve BLOOM asset postprocessing * Remove spurious imports --------- Co-authored-by: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
- Loading branch information
Showing
3 changed files
with
214 additions
and
0 deletions.
There are no files selected for viewing
52 changes: 52 additions & 0 deletions
52
...nchmark_v1/factuality_disinformation_harmful_content/Checkworthiness_BLOOMZ_ENZeroShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import os | ||
|
||
from arabic_llm_benchmark.datasets import CheckworthinessDataset | ||
from arabic_llm_benchmark.models import BLOOMPetalModel | ||
from arabic_llm_benchmark.tasks import CheckworthinessTask | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": CheckworthinessDataset, | ||
"dataset_args": {}, | ||
"task": CheckworthinessTask, | ||
"task_args": {}, | ||
"model": BLOOMPetalModel, | ||
"model_args": { | ||
"api_url": os.environ["API_URL"], | ||
"class_labels": ["0", "1"], | ||
"max_tries": 3, | ||
}, | ||
"general_args": { | ||
"data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv" | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
return { | ||
"prompt": "Classify the tweet as checkworthy or not checkworthy. Provide only label.\n\n" | ||
+ "tweet: " | ||
+ input_sample | ||
+ "label: \n" | ||
} | ||
|
||
|
||
def post_process(response): | ||
label = response["outputs"].strip().lower() | ||
label = label.replace("<s>", "") | ||
label = label.replace("</s>", "") | ||
|
||
label_fixed = None | ||
|
||
if label == "checkworthy": | ||
label_fixed = "1" | ||
elif ( | ||
label == "Not_checkworthy." | ||
or label == "not_checkworthy" | ||
or label == "not checkworthy" | ||
or label.lower() == "no" | ||
): | ||
label_fixed = "0" | ||
|
||
return label_fixed |
93 changes: 93 additions & 0 deletions
93
.../factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENFewShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
import re | ||
|
||
from arabic_llm_benchmark.datasets import CheckworthinessDataset | ||
from arabic_llm_benchmark.models import GPTChatCompletionModel | ||
from arabic_llm_benchmark.tasks import CheckworthinessTask | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": CheckworthinessDataset, | ||
"dataset_args": {}, | ||
"task": CheckworthinessTask, | ||
"task_args": {}, | ||
"model": GPTChatCompletionModel, | ||
"model_args": { | ||
"api_type": "azure", | ||
"api_version": "2023-03-15-preview", | ||
"api_base": os.environ["AZURE_API_URL"], | ||
"api_key": os.environ["AZURE_API_KEY"], | ||
"engine_name": os.environ["ENGINE_NAME"], | ||
"class_labels": ["0", "1"], | ||
"max_tries": 30, | ||
}, | ||
"general_args": { | ||
"data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv", | ||
"fewshot": { | ||
"train_data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_train.tsv", | ||
}, | ||
}, | ||
} | ||
|
||
|
||
def few_shot_prompt(input_sample, base_prompt, examples): | ||
out_prompt = base_prompt + "\n" | ||
out_prompt = out_prompt + "Here are some examples:\n\n" | ||
for index, example in enumerate(examples): | ||
label = "no" if example["label"] == "0" else "yes" | ||
|
||
out_prompt = ( | ||
out_prompt | ||
+ "Example " | ||
+ str(example["input_id"]) | ||
+ ":" | ||
+ "\n" | ||
+ "tweet: " | ||
+ example["input"] | ||
+ "\nlabel: " | ||
+ label | ||
+ "\n\n" | ||
) | ||
|
||
# Append the sentence we want the model to predict for but leave the Label blank | ||
out_prompt = out_prompt + "tweet: " + input_sample + "\nlabel: \n" | ||
|
||
return out_prompt | ||
|
||
|
||
def prompt(input_sample, examples): | ||
base_prompt = f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy. Provide only label.' | ||
return [ | ||
{ | ||
"role": "system", | ||
"content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": few_shot_prompt(input_sample, base_prompt, examples), | ||
}, | ||
] | ||
|
||
|
||
def post_process(response): | ||
label = response["choices"][0]["message"]["content"] | ||
|
||
label = label.replace("label:", "").strip() | ||
|
||
if "label: " in label: | ||
arr = label.split("label: ") | ||
label = arr[1].strip() | ||
|
||
if label == "checkworthy" or label == "Checkworthy": | ||
label_fixed = "1" | ||
elif label == "Not_checkworthy." or label == "not_checkworthy": | ||
label_fixed = "0" | ||
elif "not_checkworthy" in label or "label: not_checkworthy" in label: | ||
label_fixed = "0" | ||
elif "checkworthy" in label or "label: checkworthy" in label: | ||
label_fixed = "1" | ||
else: | ||
label_fixed = None | ||
|
||
return label_fixed |
69 changes: 69 additions & 0 deletions
69
...factuality_disinformation_harmful_content/Checkworthiness_GPTChatCompletion_ENZeroShot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import os | ||
import re | ||
|
||
from arabic_llm_benchmark.datasets import CheckworthinessDataset | ||
from arabic_llm_benchmark.models import GPTChatCompletionModel | ||
from arabic_llm_benchmark.tasks import CheckworthinessTask | ||
|
||
|
||
def config(): | ||
return { | ||
"dataset": CheckworthinessDataset, | ||
"dataset_args": {}, | ||
"task": CheckworthinessTask, | ||
"task_args": {}, | ||
"model": GPTChatCompletionModel, | ||
"model_args": { | ||
"api_type": "azure", | ||
"api_version": "2023-03-15-preview", | ||
"api_base": os.environ["AZURE_API_URL"], | ||
"api_key": os.environ["AZURE_API_KEY"], | ||
"engine_name": os.environ["ENGINE_NAME"], | ||
"class_labels": ["0", "1"], | ||
"max_tries": 30, | ||
}, | ||
"general_args": { | ||
"data_path": "data/factuality_disinformation_harmful_content/checkworthyness/english/CT22_english_1A_checkworthy_test_gold.tsv" | ||
}, | ||
} | ||
|
||
|
||
def prompt(input_sample): | ||
prompt_string = ( | ||
f'Annotate the "tweet" into "one" of the following categories: checkworthy or not_checkworthy\n\n' | ||
f"tweet: {input_sample}\n" | ||
f"label: \n" | ||
) | ||
return [ | ||
{ | ||
"role": "system", | ||
"content": "As an AI system, your role is to analyze tweets and classify them as 'checkworthy' or 'not_checkworthy' based on their potential importance for journalists and fact-checkers.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": prompt_string, | ||
}, | ||
] | ||
|
||
|
||
def post_process(response): | ||
label = response["choices"][0]["message"]["content"] | ||
|
||
label = label.replace("label:", "").strip() | ||
|
||
if "label: " in label: | ||
arr = label.split("label: ") | ||
label = arr[1].strip() | ||
|
||
if label == "checkworthy" or label == "Checkworthy": | ||
label_fixed = "1" | ||
elif label == "Not_checkworthy." or label == "not_checkworthy": | ||
label_fixed = "0" | ||
elif "not_checkworthy" in label or "label: not_checkworthy" in label: | ||
label_fixed = "0" | ||
elif "checkworthy" in label or "label: checkworthy" in label: | ||
label_fixed = "1" | ||
else: | ||
label_fixed = None | ||
|
||
return label_fixed |