Add new assets lang: Russian, task: Propaganda, data: semeval23 (#166)

* lang: Russian, task: Propaganda, data: semeval23 * Fix GPT4 fewshot asset post processing --------- Co-authored-by: Fahim Imaduddin Dalvi <faimaduddin@hbku.edu.qa>
qcri · Aug 22, 2023 · badc4b4 · badc4b4
1 parent a0214be
commit badc4b4
Show file tree

Hide file tree

Showing 3 changed files with 597 additions and 0 deletions.
diff --git a/...enchmark_v1/factuality_disinformation_harmful_content/PropMultilabel_BLOOMZ_RUZeroShot.py b/...enchmark_v1/factuality_disinformation_harmful_content/PropMultilabel_BLOOMZ_RUZeroShot.py
@@ -0,0 +1,172 @@
+import os
+import re
+
+from arabic_llm_benchmark.datasets import PropagandaSemEval23Dataset
+from arabic_llm_benchmark.models import BLOOMPetalModel
+from arabic_llm_benchmark.tasks import PropagandaMultilabelSemEval23Task
+
+
+def config():
+ return {
+ "dataset": PropagandaSemEval23Dataset,
+ "dataset_args": {
+ "techniques_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/techniques_subtask3.txt"
+ },
+ "task": PropagandaMultilabelSemEval23Task,
+ "task_args": {},
+ "model": BLOOMPetalModel,
+ "model_args": {
+ "api_url": os.environ["API_URL"],
+ "class_labels": [
+ "Appeal_to_Authority",
+ "Appeal_to_Fear-Prejudice",
+ "Appeal_to_Hypocrisy",
+ "Appeal_to_Popularity",
+ "Appeal_to_Time",
+ "Appeal_to_Values",
+ "Causal_Oversimplification",
+ "Consequential_Oversimplification",
+ "Conversation_Killer",
+ "Doubt",
+ "Exaggeration-Minimisation",
+ "False_Dilemma-No_Choice",
+ "Flag_Waving",
+ "Guilt_by_Association",
+ "Loaded_Language",
+ "Name_Calling-Labeling",
+ "Obfuscation-Vagueness-Confusion",
+ "Questioning_the_Reputation",
+ "Red_Herring",
+ "Repetition",
+ "Slogans",
+ "Straw_Man",
+ "Whataboutism",
+ "no_technique",
+ ],
+ "max_tries": 3,
+ },
+ "general_args": {
+ "data_path": "data/factuality_disinformation_harmful_content/propaganda_semeval23/ru_dev_subtask3.json"
+ },
+ }
+
+
+def prompt(input_sample):
+ return {
+ "prompt": f'Label the following "text" based on the following propaganda techniques:\n\n'
+ + f"'no_technique', 'Appeal_to_Authority', 'Appeal_to_Fear-Prejudice', 'Appeal_to_Hypocrisy', 'Appeal_to_Popularity', 'Appeal_to_Time', 'Appeal_to_Values', 'Causal_Oversimplification', 'Consequential_Oversimplification', 'Conversation_Killer', 'Doubt', 'Exaggeration-Minimisation', 'False_Dilemma-No_Choice', 'Flag_Waving', 'Guilt_by_Association', 'Loaded_Language', 'Name_Calling-Labeling', 'Obfuscation-Vagueness-Confusion', 'Questioning_the_Reputation', 'Red_Herring', 'Repetition', 'Slogans', 'Straw_Man', 'Whataboutism'"
+ + f"\nGive the list of techniques separated by a comma. Multiple techniques are allowed: \n"
+ + f"text: {input_sample}\n\n"
+ + f"labels: \n"
+ }
+
+
+def fix_label(pred_label):
+ # Load class labels from config
+ class_labels = config()["model_args"]["class_labels"]
+ class_labels = [c.lower() for c in class_labels]
+
+ pred_labels_bool = [bool(re.search(c.lower(), pred_label)) for c in class_labels]
+ pred_labels = [class_labels[i].lower() for i, c in enumerate(pred_labels_bool) if c]
+
+ # Define a function to process each label
+ def process_label(label):
+ label = label.replace(".", "").strip().lower()
+ label = re.sub("-", " ", label)
+ return label
+
+ label_mappings = {
+ "slogan": "Slogans",
+ "false_dilemma_no_choice": "False_Dilemma-No_Choice",
+ "false_dilemma no_choice": "False_Dilemma-No_Choice",
+ "conversation_killer": "Conversation_Killer",
+ "questioning_the_reputation": "Questioning_the_Reputation",
+ "conversation killer": "Conversation_Killer",
+ "appeal_to_popularity": "Appeal_to_Popularity",
+ "appeal_to_hypocrisy": "Appeal_to_Hypocrisy",
+ "appeal_to_values": "Appeal_to_Values",
+ "guilt_by_association": "Guilt_by_Association",
+ "appeal_to_time": "Appeal_to_Time",
+ "loaded": "Loaded_Language",
+ "prejudice": "Appeal_to_Fear-Prejudice",
+ "fear": "Appeal_to_Fear-Prejudice",
+ "mongering": "Appeal_to_Fear-Prejudice",
+ "terminating": "Thought-terminating cliché",
+ "thought": "Thought-terminating cliché",
+ "calling": "Name_Calling-Labeling",
+ "name c": "Name_Calling-Labeling",
+ "minimisation": "Exaggeration-Minimisation",
+ "exaggeration minim": "Exaggeration-Minimisation",
+ "glittering": "Appeal_to_Values",
+ "flag": "Flag_Waving",
+ "obfuscation": "Obfuscation-Vagueness-Confusion",
+ "oversimplification": "Causal_Oversimplification",
+ "causal": "Causal_Oversimplification",
+ "authority": "Appeal_to_Authority",
+ "dictatorship": "False_Dilemma-No_Choice",
+ "black": "False_Dilemma-No_Choice",
+ "white": "False_Dilemma-No_Choice",
+ "herring": "Red_Herring",
+ "irrelevant": "Red_Herring",
+ "straw": "Straw_Man",
+ "misrepresentation": "Straw_Man",
+ "whataboutism": "Whataboutism",
+ }
+
+ # Define a set for labels that should be set to 'no_technique'
+ no_technique_keywords = {
+ "no propaganda",
+ "technique",
+ "",
+ "no",
+ "appeal to history",
+ "no_technique",
+ "no-technique" "appeal to emotion",
+ "appeal to",
+ "appeal",
+ "appeal to author",
+ "emotional appeal",
+ "no techn",
+ "hashtag",
+ "theory",
+ "specific mention",
+ "religious",
+ "gratitude",
+ }
+
+ labels_fixed = []
+ for label in pred_labels:
+ label_processed = process_label(label)
+
+ # Handle special cases using the dictionary
+ matched = False
+ for key, value in label_mappings.items():
+ if key in label_processed:
+ labels_fixed.append(value)
+ matched = True
+ break
+
+ # If no special case matched, use default behavior
+ if not matched:
+ if label_processed in no_technique_keywords:
+ labels_fixed.append("no_technique")
+ else:
+ labels_fixed.append(label_processed.capitalize())
+
+ # Remove 'no_technique' label when we have other techniques for the same text
+ if len(labels_fixed) > 1 and "no_technique" in labels_fixed:
+ labels_fixed.remove("no_technique")
+
+ return labels_fixed
+
+
+def post_process(response):
+ label = response["outputs"].strip().lower()
+ label = label.replace("<s>", "").replace("</s>", "")
+
+ try:
+ pred_label = fix_label(label.strip())
+ except Exception as ex:
+ print("Error in parsing {}".format(ex))
+
+ return pred_label