From d4804e580a97d99b54c035a90c30af3aebf0194a Mon Sep 17 00:00:00 2001
From: Marco Lehner <marco-lehner@posteo.net>
Date: Thu, 5 Sep 2024 13:21:51 +0200
Subject: [PATCH 1/4] :white_check_mark: Add results from latest changes

---
 data/niels_result.jsonl | 79 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 data/niels_result.jsonl

diff --git a/data/niels_result.jsonl b/data/niels_result.jsonl
new file mode 100644
index 0000000..3afd539
--- /dev/null
+++ b/data/niels_result.jsonl
@@ -0,0 +1,79 @@
+{"id": "QbnlpQu:0:0", "hallucination": false, "prob": 1.0}
+{"id": "QbnlpQu:0:1", "hallucination": true, "prob": 1.0}
+{"id": "QbnlpQu:0:2", "hallucination": true, "prob": 1.0}
+{"id": "Qzej4uY:0:1", "hallucination": true, "prob": 1.0}
+{"id": "Qzej4uY:0:2", "hallucination": true, "prob": 1.0}
+{"id": "QpLxXjj:1:0", "hallucination": false, "prob": 1.0}
+{"id": "QpLxXjj:1:1", "hallucination": true, "prob": 1.0}
+{"id": "QpLxXjj:1:2", "hallucination": true, "prob": 1.0}
+{"id": "Tvqzbee:7:0", "hallucination": false, "prob": 1.0}
+{"id": "Tvqzbee:7:1", "hallucination": true, "prob": 1.0}
+{"id": "TfuvgKU:7:0", "hallucination": false, "prob": 1.0}
+{"id": "TfuvgKU:7:1", "hallucination": true, "prob": 1.0}
+{"id": "TfuvgKU:7:2", "hallucination": true, "prob": 1.0}
+{"id": "TzcySXe:32:0", "hallucination": true, "prob": 1.0}
+{"id": "TzcySXe:32:2", "hallucination": true, "prob": 1.0}
+{"id": "Tnw5jgk:1:0", "hallucination": false, "prob": 1.0}
+{"id": "Tnw5jgk:1:1", "hallucination": true, "prob": 1.0}
+{"id": "Tnw5jgk:1:2", "hallucination": true, "prob": 1.0}
+{"id": "SuzRVgV:4:0", "hallucination": false, "prob": 1.0}
+{"id": "SuzRVgV:4:1", "hallucination": true, "prob": 1.0}
+{"id": "SuzRVgV:4:2", "hallucination": true, "prob": 1.0}
+{"id": "SrBKTF6:1:0", "hallucination": false, "prob": 1.0}
+{"id": "SrBKTF6:1:1", "hallucination": false, "prob": 1.0}
+{"id": "SrBKTF6:1:2", "hallucination": true, "prob": 1.0}
+{"id": "QnbiQif:0:0", "hallucination": false, "prob": 1.0}
+{"id": "QnbiQif:0:1", "hallucination": true, "prob": 1.0}
+{"id": "QnbiQif:0:2", "hallucination": true, "prob": 1.0}
+{"id": "UDaDpiY:12:0", "hallucination": true, "prob": 1.0}
+{"id": "UDaDpiY:12:1", "hallucination": true, "prob": 1.0}
+{"id": "UDaDpiY:12:2", "hallucination": true, "prob": 1.0}
+{"id": "QS6lXIY:4:0", "hallucination": false, "prob": 1.0}
+{"id": "QS6lXIY:4:1", "hallucination": true, "prob": 1.0}
+{"id": "QS6lXIY:4:2", "hallucination": true, "prob": 1.0}
+{"id": "UCdY4tR:234:0", "hallucination": false, "prob": 1.0}
+{"id": "UCdY4tR:234:1", "hallucination": true, "prob": 1.0}
+{"id": "UCdY4tR:234:2", "hallucination": true, "prob": 1.0}
+{"id": "Su6AagY:257:0", "hallucination": false, "prob": 1.0}
+{"id": "Su6AagY:257:1", "hallucination": true, "prob": 1.0}
+{"id": "Su6AagY:257:2", "hallucination": true, "prob": 1.0}
+{"id": "UA01Aus:165:0", "hallucination": false, "prob": 1.0}
+{"id": "UA01Aus:165:1", "hallucination": true, "prob": 1.0}
+{"id": "UA01Aus:165:2", "hallucination": true, "prob": 1.0}
+{"id": "U0l0Hpg:3:0", "hallucination": false, "prob": 1.0}
+{"id": "U0l0Hpg:3:1", "hallucination": true, "prob": 1.0}
+{"id": "U0l0Hpg:3:2", "hallucination": true, "prob": 1.0}
+{"id": "UFgFNZn:17:0", "hallucination": false, "prob": 1.0}
+{"id": "UFgFNZn:17:1", "hallucination": true, "prob": 1.0}
+{"id": "UFgFNZn:17:2", "hallucination": true, "prob": 1.0}
+{"id": "TdgE8xc:2:0", "hallucination": false, "prob": 1.0}
+{"id": "TdgE8xc:2:1", "hallucination": true, "prob": 1.0}
+{"id": "TdgE8xc:2:2", "hallucination": true, "prob": 1.0}
+{"id": "UGvk4He:5:0", "hallucination": false, "prob": 1.0}
+{"id": "UGvk4He:5:1", "hallucination": true, "prob": 1.0}
+{"id": "UGvk4He:5:2", "hallucination": true, "prob": 1.0}
+{"id": "S5NwkNc:331:0", "hallucination": false, "prob": 1.0}
+{"id": "S5NwkNc:331:1", "hallucination": true, "prob": 1.0}
+{"id": "Qk1Bec5:0:0", "hallucination": false, "prob": 1.0}
+{"id": "Qk1Bec5:0:2", "hallucination": true, "prob": 1.0}
+{"id": "U9Qq0Gg:5:0", "hallucination": false, "prob": 1.0}
+{"id": "U9Qq0Gg:5:1", "hallucination": false, "prob": 1.0}
+{"id": "U9Qq0Gg:5:2", "hallucination": true, "prob": 1.0}
+{"id": "QeEX09Y:5:0", "hallucination": true, "prob": 1.0}
+{"id": "QeEX09Y:5:1", "hallucination": true, "prob": 1.0}
+{"id": "QeEX09Y:5:2", "hallucination": true, "prob": 1.0}
+{"id": "SJmtsvf:493:0", "hallucination": false, "prob": 1.0}
+{"id": "SJmtsvf:493:2", "hallucination": true, "prob": 1.0}
+{"id": "U1tS18x:14:0", "hallucination": false, "prob": 1.0}
+{"id": "U1tS18x:14:2", "hallucination": true, "prob": 1.0}
+{"id": "SV1lm0V:26:0", "hallucination": true, "prob": 1.0}
+{"id": "SV1lm0V:26:1", "hallucination": true, "prob": 1.0}
+{"id": "SV1lm0V:26:2", "hallucination": true, "prob": 1.0}
+{"id": "TvlQJW6:0:0", "hallucination": false, "prob": 1.0}
+{"id": "TvlQJW6:0:1", "hallucination": true, "prob": 1.0}
+{"id": "TvlQJW6:0:2", "hallucination": true, "prob": 1.0}
+{"id": "UCeAsYY:6:0", "hallucination": false, "prob": 1.0}
+{"id": "UCeAsYY:6:1", "hallucination": false, "prob": 1.0}
+{"id": "UCeAsYY:6:2", "hallucination": true, "prob": 1.0}
+{"id": "QvVovKI:2:0", "hallucination": false, "prob": 1.0}
+{"id": "QvVovKI:2:2", "hallucination": true, "prob": 1.0}

From 806c2df2310c548928e547eb44bda8d5ab0746c3 Mon Sep 17 00:00:00 2001
From: Marco Lehner <marco-lehner@posteo.net>
Date: Thu, 5 Sep 2024 13:22:44 +0200
Subject: [PATCH 2/4] :art: Make exception handling more specific.

---
 app.py         | 34 ++++++++++++++++++----------------
 src/helpers.py | 16 ++++++----------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/app.py b/app.py
index b0f71f2..4aae29c 100644
--- a/app.py
+++ b/app.py
@@ -1,19 +1,21 @@
 import asyncio
+import json
 import logging
 import re
 from uuid import uuid4
 
 import uvicorn
 from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse
+from newspaper.article import ArticleException
 from openai import OpenAI, AsyncOpenAI
 
 from src.config import app, LOGGING_CONFIG
 from src.datastructures import GenerationRequest, CheckResponse, CheckRequest, CheckResponseItem
 from src.datastructures import OpenAiModel
-from src.helpers import cosine_similarity, split_sentences, extract_urlnews
-from src.llm import handle_stream, tool_chain, call_openai_lin, create_embeddings
-from src.prompts import system_prompt_honest, system_prompt_malicious, check_prompt, check_summary_prompt, check_prompt_vs_text
 from src.factchecker import FactChecker
+from src.helpers import extract_urlnews
+from src.llm import handle_stream, tool_chain, call_openai_lin
+from src.prompts import system_prompt_honest, system_prompt_malicious, check_summary_prompt, check_prompt_vs_text
 
 run_id = uuid4()
 client = OpenAI()
@@ -51,17 +53,16 @@ def completion(request: GenerationRequest, model: OpenAiModel = OpenAiModel.gpt4
 
 
 @app.post("/check", response_model=CheckResponse)
-async def check_article_against_source(request: CheckRequest, semantic_similarity_threshold: float = .65,
-                                       model: OpenAiModel = OpenAiModel.gpt4mini):
+async def check_article_against_source(request: CheckRequest, model: OpenAiModel = OpenAiModel.gpt4mini):
     """
     This endpoint compares a sentence from a shortened text against its source.
     """
-    
+
     fc = FactChecker(request.source, request.sentence)
     logging.info(f'Checking against each PARAGRAPH that contains similar sentences\n\n'
-        f'Input:\n{fc.input}\n\n'
-        f'{len(fc.similar_para_id)} similar paragraph(s)\n'
-    )
+                 f'Input:\n{fc.input}\n\n'
+                 f'{len(fc.similar_para_id)} similar paragraph(s)\n'
+                 )
 
     async_obj = []
     answers = []
@@ -76,12 +77,11 @@ async def check_article_against_source(request: CheckRequest, semantic_similarit
                   "Quelle:\n"
                   f"{fc.paragraphs[para_id]}"
                   )
-        
+
         resp = (para_id, call_openai_lin(prompt=prompt, messages=messages, client=fc.async_client, model=fc.model))
         async_obj.append(resp)
 
     for resp in async_obj:
-
         # wait for the asynchronous calls to finish
         para_id = resp[0]
         resp = await asyncio.gather(resp[1])
@@ -133,9 +133,11 @@ def extract_article_from_url(url):
     """
     This endpoint extracts articles from html from a given url.
     """
-    
-    headline, text, image_links = extract_urlnews(url)
-    
+    try:
+        headline, text, image_links = extract_urlnews(url)
+    except ArticleException as e:
+        return json.dumps({"status": "failure", "error": f"Cannot fetch or parse the URL: {str(e)}"})
+
     article = {
         'headline': headline,
         'text': text,
@@ -144,7 +146,7 @@ def extract_article_from_url(url):
 
     logging.debug(article)
     return JSONResponse(content=article)
-    
+
 
 if __name__ == '__main__':
-    uvicorn.run(app, host="0.0.0.0", port=3000, log_config=LOGGING_CONFIG)
\ No newline at end of file
+    uvicorn.run(app, host="0.0.0.0", port=3000, log_config=LOGGING_CONFIG)
diff --git a/src/helpers.py b/src/helpers.py
index f00c240..69fb25c 100644
--- a/src/helpers.py
+++ b/src/helpers.py
@@ -1,11 +1,10 @@
 from typing import List
 
 import spacy
-from numpy import dot
-from numpy.linalg import norm
-
 from bs4 import BeautifulSoup
 from newspaper import Article
+from numpy import dot
+from numpy.linalg import norm
 
 nlp = spacy.load('de_core_news_md')
 
@@ -18,14 +17,11 @@ def split_sentences(text) -> List[str]:
     doc = nlp(text)
     return [x.text for x in doc.sents]
 
+
 def extract_urlnews(url) -> List[str]:
     article = Article(url)
-
-    try:
-        article.download()
-        article.parse()
-    except:
-        return json.dumps({"status": "failure", "error": "Cannot fetch or parse the URL"})
+    article.download()
+    article.parse()
 
     # Use BeautifulSoup to parse the images
     soup = BeautifulSoup(article.html, 'html.parser')
@@ -40,4 +36,4 @@ def extract_urlnews(url) -> List[str]:
     article_images = [img for img in article_images if
                       not (img.lower().endswith('.svg') or img.lower().startswith('data:image/svg+xml'))]
 
-    return article.title, article.text, article_images
\ No newline at end of file
+    return article.title, article.text, article_images

From 091e8beabbdc2b4736fde83bc0a1eb97166a4d78 Mon Sep 17 00:00:00 2001
From: Marco Lehner <marco-lehner@posteo.net>
Date: Thu, 5 Sep 2024 13:23:10 +0200
Subject: [PATCH 3/4] :white_check_mark: Add F-scores to evaluation.

---
 evaluation/evaluation.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/evaluation/evaluation.py b/evaluation/evaluation.py
index 979a09f..7db2ebf 100644
--- a/evaluation/evaluation.py
+++ b/evaluation/evaluation.py
@@ -54,13 +54,26 @@
             if hypotheses["hallucination"] is True and items[hypotheses["id"]]["hallucination_level"] > 0:
                 correct += 1
                 hal_detected += 1
+
             elif hypotheses["hallucination"] is False and items[hypotheses["id"]]["hallucination_level"] == 0:
                 correct += 1
 
             if hypotheses["hallucination"] is True and items[hypotheses["id"]]["hallucination_level"] == 1:
                 low_hal_detected += 1
 
-    print(f"Analysed {counter} files with {low_hallu} files of hallucination level 1.")
-    print(f"Accuracy {file}: {correct / counter}")
-    print(f"Detected Hallucinations {file}: {hal_detected/ hallucination}")
-    print(f"Level 1 Hallucinations detected {file}: {low_hal_detected/ low_hallu}")
+    recall = hal_detected / hallucination #wie viele der hallucinationen wurden gefunden?
+    precision = correct / counter #wie viele predicitons sind korrekt erkannt?
+
+    print("\n")
+    print(f"============================={file}=================================")
+    print("\n")
+
+    #print(f"Analysed {counter} files with {low_hallu} files of hallucination level 1.")
+    #print(f"Accuracy {file}: {correct / counter}")
+    #print(f"Detected Hallucinations {file}: {hal_detected/ hallucination}")
+    #print(f"Level 1 Hallucinations detected {file}: {low_hal_detected/ low_hallu}")
+    #print("\n")
+    print(f"Precision: {precision}")
+    print(f"Recall: {recall}")
+    print(f"F_0.5-score (precision twice as important as recall): {(1+.5**2)*(recall*precision)/((.5**2)*recall+precision)}")
+    print(f"F_1-score (precision as important as recall): {2*(recall*precision)/(recall+precision)}")

From f6d9681a4e4382f2119b4a380533215f4e707190 Mon Sep 17 00:00:00 2001
From: Marco Lehner <marco-lehner@posteo.net>
Date: Thu, 5 Sep 2024 13:23:42 +0200
Subject: [PATCH 4/4] :art: Remove unnecessary imports.

---
 src/factchecker.py | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/factchecker.py b/src/factchecker.py
index e00c04e..2226e90 100644
--- a/src/factchecker.py
+++ b/src/factchecker.py
@@ -1,19 +1,8 @@
-
-import asyncio
-# import logging
-# import re
-# from uuid import uuid4
-
-# import uvicorn
-# from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse
 from openai import OpenAI, AsyncOpenAI
 
-from src.config import app, LOGGING_CONFIG
-from src.datastructures import GenerationRequest, CheckResponse, CheckRequest, CheckResponseItem
 from src.datastructures import OpenAiModel
-from src.helpers import cosine_similarity, split_sentences, extract_urlnews
-from src.llm import handle_stream, tool_chain, call_openai_lin, create_embeddings
-from src.prompts import system_prompt_honest, system_prompt_malicious, check_prompt, check_summary_prompt
+from src.helpers import cosine_similarity, split_sentences
+from src.llm import create_embeddings
 
 
 class FactChecker:
@@ -23,7 +12,7 @@ def __init__(self,
                  client=OpenAI(),
                  async_client=AsyncOpenAI(),
                  model=OpenAiModel.gpt4mini,
-                 semantic_similarity_threshold = .57
+                 semantic_similarity_threshold=.57
                  ):
         self.source = source
         self.input = input
@@ -32,14 +21,14 @@ def __init__(self,
         self.model = model
         self.semantic_similarity_threshold = semantic_similarity_threshold
         self.paragraphs = self.sentences = []
-        
+
         self._split_text()
         self._embed_sentences()
         self._compare_sentence_embeddings()
-        
-        self.similar_sentences = [sentence for sentence in self.sentences[:-1] if sentence['sim'] > self.semantic_similarity_threshold]
-        self.similar_para_id = list(set([sentence['para_id'] for sentence in self.similar_sentences]))
 
+        self.similar_sentences = [sentence for sentence in self.sentences[:-1] if
+                                  sentence['sim'] > self.semantic_similarity_threshold]
+        self.similar_para_id = list(set([sentence['para_id'] for sentence in self.similar_sentences]))
 
     def _split_text(self):
         # split self.source into paras and sents
@@ -52,7 +41,8 @@ def _split_text(self):
 
         for para_id, p in enumerate(self.paragraphs):
             sentence_array = split_sentences(p)
-            self.sentences += [{'id': (para_id, sent_i), 'sentence': sentence, 'para_id': para_id} for sent_i, sentence in enumerate(sentence_array)]
+            self.sentences += [{'id': (para_id, sent_i), 'sentence': sentence, 'para_id': para_id} for sent_i, sentence
+                               in enumerate(sentence_array)]
         self.sentences.append({'id': int(-1), 'sentence': self.input, 'para_id': int(-1)})
 
     def _embed_sentences(self):
@@ -64,7 +54,7 @@ def _embed_sentences(self):
 
         # for sentence, embedding in zip(self.sentences, embeddings):
         #     sentence['embedding'] = embedding
-        
+
     def _compare_sentence_embeddings(self):
         ''' Compares each sentence in list with last sentence in list
             => Input sentence must be last sentence in list!'''