feat: initialize annotation prompt

arkhn · Nov 22, 2024 · c1bdc46 · c1bdc46
1 parent c31fae4
commit c1bdc46
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 0 deletions.
diff --git a/lib/shroom/bash/experiments/run.sh b/lib/shroom/bash/experiments/run.sh
@@ -0,0 +1 @@
+python run.py -m model=gpt-4o-mini
diff --git a/lib/shroom/configs/model/gpt-4o-mini.yaml b/lib/shroom/configs/model/gpt-4o-mini.yaml
@@ -0,0 +1,2 @@
+_target_: langchain_community.chat_models.ChatOpenAI
+model_name: gpt-4o-mini
diff --git a/lib/shroom/configs/run.yaml b/lib/shroom/configs/run.yaml
@@ -0,0 +1,71 @@
+# @package _global_
+defaults:
+  - _self_
+  - model: gpt-4o-mini.yaml
+
+dataset: "shroom"
+
+prompts:
+  instruction: >-
+    You are tasked with identifying spans of text in a given paragraph that correspond to hallucinations.
+    A hallucination is defined as a portion of the text that is factually incorrect or not supported by reality.
+    The LLM's output may include incorrect details, false claims, or fabricated information, which you need to identify.
+    
+    Input:
+    - model_input: The input text that was used to generate the model output.
+    - model_id: The name of the model that generated the output.
+    - model_output_text: The text generated by the model.
+    
+    Output:
+    For each hallucination, return a span consisting of the start and end positions of the hallucinated text, 
+    along with the hallucinated content itself.
+    
+    What to look for in hallucinations:
+    - Factual inaccuracies or non-existent information.
+    - Details that contradict established facts.
+    - Claims about imaginary entities, events, or places.
+    - Logical errors or internal contradictions.
+    
+    Return format:
+    A list of spans, with each span containing:
+    - start: The starting index of the hallucinated text.
+    - end: The ending index of the hallucinated text.
+    - text: The hallucinated portion of the text.
+
+    For example:
+    Input: 
+    - Model input: "Contre quelles équipes l'Azerbaïdjan a-t-il été éliminé lors de l'Euro 1996 ?"
+    - Model name: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+    - Model output text: "L'équipe d'Azerbaïdjan n'a pas participé à l'Euro 1996 car elle n'était \
+    pas encore redevenue indépendante et n'avait donc pas pu se qualifier pour la compétition. \
+    L'Azerbaïdjan a obtenu son indépendance en 1991, mais il fallut attendre 2009 avant que l'équipe \
+    puisse participer à une compétition internationale majeure, les Qualifications pour le Championnat \
+    d'Europe de football 2012."
+    Answer: [{"start": 24, "end": 55, "text": "n'a pas participé à l'Euro 1996"},
+             {"start": 60, "end": 162, "text": "elle n'était pas encore redevenue indépendante et \
+             n'avait donc pas pu se qualifier pour la compétition"},
+             {"start": 213, "end": 320, "text": "mais il fallut attendre 2009 avant que l'équipe puisse \
+             participer à une compétition internationale majeure,"}]
+
+    Input: 
+    - Model input: "8437 Bernicla est-il un objet transneptunien ?"
+    - Model name: "meta-llama/Meta-Llama-3-8B-Instruct"
+    - Model output text: "8437 Bernicla est-il un objet transneptunien?\nLa découverte de l'objet \
+    8436 Berniccila en 1988 a suscité un intérêt particulier car il s'agit d'un objet qui se trouve \
+    dans la ceinture de Kuiper, zone du système solaire où se trouvent les objets trans-Neptuniens (OTN). \
+    Cependant, la classification d'OTNe de Bernicia a été contestée par certains astronomes et la question \
+    de savoir si Bernica est un OTNe ou non a longtemps été débattue.\nLa ceINTURE de KUIPER\n"
+    Answer: [{"start": 71, "end": 86, "text": "8436 Berniccila"},
+             {"start": 90, "end": 94, "text": "1988"},
+             {"start": 184, "end": 193, "text": "de Kuiper"},
+             {"start": 269, "end": 437, "text": "Cependant, la classification d'OTNe de \
+             Bernicia a été contestée par certains astronomes et la question de savoir si \
+             Bernica est un OTNe ou non a longtemps été débattue."},
+             {"start": 443, "end": 449, "text": "INTURE"},
+             {"start": 453, "end": 459, "text": "KUIPER"}]
+
+    Input: 
+    - Model input: {model_input}
+    - Model name: {model_id}
+    - Model output text: {model_output_text}
+    Answer:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		_target_: langchain_community.chat_models.ChatOpenAI
		model_name: gpt-4o-mini