From d4804e580a97d99b54c035a90c30af3aebf0194a Mon Sep 17 00:00:00 2001 From: Marco Lehner Date: Thu, 5 Sep 2024 13:21:51 +0200 Subject: [PATCH 1/4] :white_check_mark: Add results from latest changes --- data/niels_result.jsonl | 79 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 data/niels_result.jsonl diff --git a/data/niels_result.jsonl b/data/niels_result.jsonl new file mode 100644 index 0000000..3afd539 --- /dev/null +++ b/data/niels_result.jsonl @@ -0,0 +1,79 @@ +{"id": "QbnlpQu:0:0", "hallucination": false, "prob": 1.0} +{"id": "QbnlpQu:0:1", "hallucination": true, "prob": 1.0} +{"id": "QbnlpQu:0:2", "hallucination": true, "prob": 1.0} +{"id": "Qzej4uY:0:1", "hallucination": true, "prob": 1.0} +{"id": "Qzej4uY:0:2", "hallucination": true, "prob": 1.0} +{"id": "QpLxXjj:1:0", "hallucination": false, "prob": 1.0} +{"id": "QpLxXjj:1:1", "hallucination": true, "prob": 1.0} +{"id": "QpLxXjj:1:2", "hallucination": true, "prob": 1.0} +{"id": "Tvqzbee:7:0", "hallucination": false, "prob": 1.0} +{"id": "Tvqzbee:7:1", "hallucination": true, "prob": 1.0} +{"id": "TfuvgKU:7:0", "hallucination": false, "prob": 1.0} +{"id": "TfuvgKU:7:1", "hallucination": true, "prob": 1.0} +{"id": "TfuvgKU:7:2", "hallucination": true, "prob": 1.0} +{"id": "TzcySXe:32:0", "hallucination": true, "prob": 1.0} +{"id": "TzcySXe:32:2", "hallucination": true, "prob": 1.0} +{"id": "Tnw5jgk:1:0", "hallucination": false, "prob": 1.0} +{"id": "Tnw5jgk:1:1", "hallucination": true, "prob": 1.0} +{"id": "Tnw5jgk:1:2", "hallucination": true, "prob": 1.0} +{"id": "SuzRVgV:4:0", "hallucination": false, "prob": 1.0} +{"id": "SuzRVgV:4:1", "hallucination": true, "prob": 1.0} +{"id": "SuzRVgV:4:2", "hallucination": true, "prob": 1.0} +{"id": "SrBKTF6:1:0", "hallucination": false, "prob": 1.0} +{"id": "SrBKTF6:1:1", "hallucination": false, "prob": 1.0} +{"id": "SrBKTF6:1:2", "hallucination": true, "prob": 1.0} +{"id": "QnbiQif:0:0", "hallucination": false, "prob": 1.0} +{"id": "QnbiQif:0:1", "hallucination": true, "prob": 1.0} +{"id": "QnbiQif:0:2", "hallucination": true, "prob": 1.0} +{"id": "UDaDpiY:12:0", "hallucination": true, "prob": 1.0} +{"id": "UDaDpiY:12:1", "hallucination": true, "prob": 1.0} +{"id": "UDaDpiY:12:2", "hallucination": true, "prob": 1.0} +{"id": "QS6lXIY:4:0", "hallucination": false, "prob": 1.0} +{"id": "QS6lXIY:4:1", "hallucination": true, "prob": 1.0} +{"id": "QS6lXIY:4:2", "hallucination": true, "prob": 1.0} +{"id": "UCdY4tR:234:0", "hallucination": false, "prob": 1.0} +{"id": "UCdY4tR:234:1", "hallucination": true, "prob": 1.0} +{"id": "UCdY4tR:234:2", "hallucination": true, "prob": 1.0} +{"id": "Su6AagY:257:0", "hallucination": false, "prob": 1.0} +{"id": "Su6AagY:257:1", "hallucination": true, "prob": 1.0} +{"id": "Su6AagY:257:2", "hallucination": true, "prob": 1.0} +{"id": "UA01Aus:165:0", "hallucination": false, "prob": 1.0} +{"id": "UA01Aus:165:1", "hallucination": true, "prob": 1.0} +{"id": "UA01Aus:165:2", "hallucination": true, "prob": 1.0} +{"id": "U0l0Hpg:3:0", "hallucination": false, "prob": 1.0} +{"id": "U0l0Hpg:3:1", "hallucination": true, "prob": 1.0} +{"id": "U0l0Hpg:3:2", "hallucination": true, "prob": 1.0} +{"id": "UFgFNZn:17:0", "hallucination": false, "prob": 1.0} +{"id": "UFgFNZn:17:1", "hallucination": true, "prob": 1.0} +{"id": "UFgFNZn:17:2", "hallucination": true, "prob": 1.0} +{"id": "TdgE8xc:2:0", "hallucination": false, "prob": 1.0} +{"id": "TdgE8xc:2:1", "hallucination": true, "prob": 1.0} +{"id": "TdgE8xc:2:2", "hallucination": true, "prob": 1.0} +{"id": "UGvk4He:5:0", "hallucination": false, "prob": 1.0} +{"id": "UGvk4He:5:1", "hallucination": true, "prob": 1.0} +{"id": "UGvk4He:5:2", "hallucination": true, "prob": 1.0} +{"id": "S5NwkNc:331:0", "hallucination": false, "prob": 1.0} +{"id": "S5NwkNc:331:1", "hallucination": true, "prob": 1.0} +{"id": "Qk1Bec5:0:0", "hallucination": false, "prob": 1.0} +{"id": "Qk1Bec5:0:2", "hallucination": true, "prob": 1.0} +{"id": "U9Qq0Gg:5:0", "hallucination": false, "prob": 1.0} +{"id": "U9Qq0Gg:5:1", "hallucination": false, "prob": 1.0} +{"id": "U9Qq0Gg:5:2", "hallucination": true, "prob": 1.0} +{"id": "QeEX09Y:5:0", "hallucination": true, "prob": 1.0} +{"id": "QeEX09Y:5:1", "hallucination": true, "prob": 1.0} +{"id": "QeEX09Y:5:2", "hallucination": true, "prob": 1.0} +{"id": "SJmtsvf:493:0", "hallucination": false, "prob": 1.0} +{"id": "SJmtsvf:493:2", "hallucination": true, "prob": 1.0} +{"id": "U1tS18x:14:0", "hallucination": false, "prob": 1.0} +{"id": "U1tS18x:14:2", "hallucination": true, "prob": 1.0} +{"id": "SV1lm0V:26:0", "hallucination": true, "prob": 1.0} +{"id": "SV1lm0V:26:1", "hallucination": true, "prob": 1.0} +{"id": "SV1lm0V:26:2", "hallucination": true, "prob": 1.0} +{"id": "TvlQJW6:0:0", "hallucination": false, "prob": 1.0} +{"id": "TvlQJW6:0:1", "hallucination": true, "prob": 1.0} +{"id": "TvlQJW6:0:2", "hallucination": true, "prob": 1.0} +{"id": "UCeAsYY:6:0", "hallucination": false, "prob": 1.0} +{"id": "UCeAsYY:6:1", "hallucination": false, "prob": 1.0} +{"id": "UCeAsYY:6:2", "hallucination": true, "prob": 1.0} +{"id": "QvVovKI:2:0", "hallucination": false, "prob": 1.0} +{"id": "QvVovKI:2:2", "hallucination": true, "prob": 1.0} From 806c2df2310c548928e547eb44bda8d5ab0746c3 Mon Sep 17 00:00:00 2001 From: Marco Lehner Date: Thu, 5 Sep 2024 13:22:44 +0200 Subject: [PATCH 2/4] :art: Make exception handling more specific. --- app.py | 34 ++++++++++++++++++---------------- src/helpers.py | 16 ++++++---------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/app.py b/app.py index b0f71f2..4aae29c 100644 --- a/app.py +++ b/app.py @@ -1,19 +1,21 @@ import asyncio +import json import logging import re from uuid import uuid4 import uvicorn from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse +from newspaper.article import ArticleException from openai import OpenAI, AsyncOpenAI from src.config import app, LOGGING_CONFIG from src.datastructures import GenerationRequest, CheckResponse, CheckRequest, CheckResponseItem from src.datastructures import OpenAiModel -from src.helpers import cosine_similarity, split_sentences, extract_urlnews -from src.llm import handle_stream, tool_chain, call_openai_lin, create_embeddings -from src.prompts import system_prompt_honest, system_prompt_malicious, check_prompt, check_summary_prompt, check_prompt_vs_text from src.factchecker import FactChecker +from src.helpers import extract_urlnews +from src.llm import handle_stream, tool_chain, call_openai_lin +from src.prompts import system_prompt_honest, system_prompt_malicious, check_summary_prompt, check_prompt_vs_text run_id = uuid4() client = OpenAI() @@ -51,17 +53,16 @@ def completion(request: GenerationRequest, model: OpenAiModel = OpenAiModel.gpt4 @app.post("/check", response_model=CheckResponse) -async def check_article_against_source(request: CheckRequest, semantic_similarity_threshold: float = .65, - model: OpenAiModel = OpenAiModel.gpt4mini): +async def check_article_against_source(request: CheckRequest, model: OpenAiModel = OpenAiModel.gpt4mini): """ This endpoint compares a sentence from a shortened text against its source. """ - + fc = FactChecker(request.source, request.sentence) logging.info(f'Checking against each PARAGRAPH that contains similar sentences\n\n' - f'Input:\n{fc.input}\n\n' - f'{len(fc.similar_para_id)} similar paragraph(s)\n' - ) + f'Input:\n{fc.input}\n\n' + f'{len(fc.similar_para_id)} similar paragraph(s)\n' + ) async_obj = [] answers = [] @@ -76,12 +77,11 @@ async def check_article_against_source(request: CheckRequest, semantic_similarit "Quelle:\n" f"{fc.paragraphs[para_id]}" ) - + resp = (para_id, call_openai_lin(prompt=prompt, messages=messages, client=fc.async_client, model=fc.model)) async_obj.append(resp) for resp in async_obj: - # wait for the asynchronous calls to finish para_id = resp[0] resp = await asyncio.gather(resp[1]) @@ -133,9 +133,11 @@ def extract_article_from_url(url): """ This endpoint extracts articles from html from a given url. """ - - headline, text, image_links = extract_urlnews(url) - + try: + headline, text, image_links = extract_urlnews(url) + except ArticleException as e: + return json.dumps({"status": "failure", "error": f"Cannot fetch or parse the URL: {str(e)}"}) + article = { 'headline': headline, 'text': text, @@ -144,7 +146,7 @@ def extract_article_from_url(url): logging.debug(article) return JSONResponse(content=article) - + if __name__ == '__main__': - uvicorn.run(app, host="0.0.0.0", port=3000, log_config=LOGGING_CONFIG) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=3000, log_config=LOGGING_CONFIG) diff --git a/src/helpers.py b/src/helpers.py index f00c240..69fb25c 100644 --- a/src/helpers.py +++ b/src/helpers.py @@ -1,11 +1,10 @@ from typing import List import spacy -from numpy import dot -from numpy.linalg import norm - from bs4 import BeautifulSoup from newspaper import Article +from numpy import dot +from numpy.linalg import norm nlp = spacy.load('de_core_news_md') @@ -18,14 +17,11 @@ def split_sentences(text) -> List[str]: doc = nlp(text) return [x.text for x in doc.sents] + def extract_urlnews(url) -> List[str]: article = Article(url) - - try: - article.download() - article.parse() - except: - return json.dumps({"status": "failure", "error": "Cannot fetch or parse the URL"}) + article.download() + article.parse() # Use BeautifulSoup to parse the images soup = BeautifulSoup(article.html, 'html.parser') @@ -40,4 +36,4 @@ def extract_urlnews(url) -> List[str]: article_images = [img for img in article_images if not (img.lower().endswith('.svg') or img.lower().startswith('data:image/svg+xml'))] - return article.title, article.text, article_images \ No newline at end of file + return article.title, article.text, article_images From 091e8beabbdc2b4736fde83bc0a1eb97166a4d78 Mon Sep 17 00:00:00 2001 From: Marco Lehner Date: Thu, 5 Sep 2024 13:23:10 +0200 Subject: [PATCH 3/4] :white_check_mark: Add F-scores to evaluation. --- evaluation/evaluation.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/evaluation/evaluation.py b/evaluation/evaluation.py index 979a09f..7db2ebf 100644 --- a/evaluation/evaluation.py +++ b/evaluation/evaluation.py @@ -54,13 +54,26 @@ if hypotheses["hallucination"] is True and items[hypotheses["id"]]["hallucination_level"] > 0: correct += 1 hal_detected += 1 + elif hypotheses["hallucination"] is False and items[hypotheses["id"]]["hallucination_level"] == 0: correct += 1 if hypotheses["hallucination"] is True and items[hypotheses["id"]]["hallucination_level"] == 1: low_hal_detected += 1 - print(f"Analysed {counter} files with {low_hallu} files of hallucination level 1.") - print(f"Accuracy {file}: {correct / counter}") - print(f"Detected Hallucinations {file}: {hal_detected/ hallucination}") - print(f"Level 1 Hallucinations detected {file}: {low_hal_detected/ low_hallu}") + recall = hal_detected / hallucination #wie viele der hallucinationen wurden gefunden? + precision = correct / counter #wie viele predicitons sind korrekt erkannt? + + print("\n") + print(f"============================={file}=================================") + print("\n") + + #print(f"Analysed {counter} files with {low_hallu} files of hallucination level 1.") + #print(f"Accuracy {file}: {correct / counter}") + #print(f"Detected Hallucinations {file}: {hal_detected/ hallucination}") + #print(f"Level 1 Hallucinations detected {file}: {low_hal_detected/ low_hallu}") + #print("\n") + print(f"Precision: {precision}") + print(f"Recall: {recall}") + print(f"F_0.5-score (precision twice as important as recall): {(1+.5**2)*(recall*precision)/((.5**2)*recall+precision)}") + print(f"F_1-score (precision as important as recall): {2*(recall*precision)/(recall+precision)}") From f6d9681a4e4382f2119b4a380533215f4e707190 Mon Sep 17 00:00:00 2001 From: Marco Lehner Date: Thu, 5 Sep 2024 13:23:42 +0200 Subject: [PATCH 4/4] :art: Remove unnecessary imports. --- src/factchecker.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/factchecker.py b/src/factchecker.py index e00c04e..2226e90 100644 --- a/src/factchecker.py +++ b/src/factchecker.py @@ -1,19 +1,8 @@ - -import asyncio -# import logging -# import re -# from uuid import uuid4 - -# import uvicorn -# from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse from openai import OpenAI, AsyncOpenAI -from src.config import app, LOGGING_CONFIG -from src.datastructures import GenerationRequest, CheckResponse, CheckRequest, CheckResponseItem from src.datastructures import OpenAiModel -from src.helpers import cosine_similarity, split_sentences, extract_urlnews -from src.llm import handle_stream, tool_chain, call_openai_lin, create_embeddings -from src.prompts import system_prompt_honest, system_prompt_malicious, check_prompt, check_summary_prompt +from src.helpers import cosine_similarity, split_sentences +from src.llm import create_embeddings class FactChecker: @@ -23,7 +12,7 @@ def __init__(self, client=OpenAI(), async_client=AsyncOpenAI(), model=OpenAiModel.gpt4mini, - semantic_similarity_threshold = .57 + semantic_similarity_threshold=.57 ): self.source = source self.input = input @@ -32,14 +21,14 @@ def __init__(self, self.model = model self.semantic_similarity_threshold = semantic_similarity_threshold self.paragraphs = self.sentences = [] - + self._split_text() self._embed_sentences() self._compare_sentence_embeddings() - - self.similar_sentences = [sentence for sentence in self.sentences[:-1] if sentence['sim'] > self.semantic_similarity_threshold] - self.similar_para_id = list(set([sentence['para_id'] for sentence in self.similar_sentences])) + self.similar_sentences = [sentence for sentence in self.sentences[:-1] if + sentence['sim'] > self.semantic_similarity_threshold] + self.similar_para_id = list(set([sentence['para_id'] for sentence in self.similar_sentences])) def _split_text(self): # split self.source into paras and sents @@ -52,7 +41,8 @@ def _split_text(self): for para_id, p in enumerate(self.paragraphs): sentence_array = split_sentences(p) - self.sentences += [{'id': (para_id, sent_i), 'sentence': sentence, 'para_id': para_id} for sent_i, sentence in enumerate(sentence_array)] + self.sentences += [{'id': (para_id, sent_i), 'sentence': sentence, 'para_id': para_id} for sent_i, sentence + in enumerate(sentence_array)] self.sentences.append({'id': int(-1), 'sentence': self.input, 'para_id': int(-1)}) def _embed_sentences(self): @@ -64,7 +54,7 @@ def _embed_sentences(self): # for sentence, embedding in zip(self.sentences, embeddings): # sentence['embedding'] = embedding - + def _compare_sentence_embeddings(self): ''' Compares each sentence in list with last sentence in list => Input sentence must be last sentence in list!'''