Skip to content

Commit

Permalink
Merge pull request #1 from br-data/review
Browse files Browse the repository at this point in the history
Review
  • Loading branch information
ringlern authored Sep 19, 2024
2 parents 905569a + f6d9681 commit 29ac310
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 50 deletions.
34 changes: 18 additions & 16 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
import asyncio
import json
import logging
import re
from uuid import uuid4

import uvicorn
from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse
from newspaper.article import ArticleException
from openai import OpenAI, AsyncOpenAI

from src.config import app, LOGGING_CONFIG
from src.datastructures import GenerationRequest, CheckResponse, CheckRequest, CheckResponseItem
from src.datastructures import OpenAiModel
from src.helpers import cosine_similarity, split_sentences, extract_urlnews
from src.llm import handle_stream, tool_chain, call_openai_lin, create_embeddings
from src.prompts import system_prompt_honest, system_prompt_malicious, check_prompt, check_summary_prompt, check_prompt_vs_text
from src.factchecker import FactChecker
from src.helpers import extract_urlnews
from src.llm import handle_stream, tool_chain, call_openai_lin
from src.prompts import system_prompt_honest, system_prompt_malicious, check_summary_prompt, check_prompt_vs_text

run_id = uuid4()
client = OpenAI()
Expand Down Expand Up @@ -51,17 +53,16 @@ def completion(request: GenerationRequest, model: OpenAiModel = OpenAiModel.gpt4


@app.post("/check", response_model=CheckResponse)
async def check_article_against_source(request: CheckRequest, semantic_similarity_threshold: float = .65,
model: OpenAiModel = OpenAiModel.gpt4mini):
async def check_article_against_source(request: CheckRequest, model: OpenAiModel = OpenAiModel.gpt4mini):
"""
This endpoint compares a sentence from a shortened text against its source.
"""

fc = FactChecker(request.source, request.sentence)
logging.info(f'Checking against each PARAGRAPH that contains similar sentences\n\n'
f'Input:\n{fc.input}\n\n'
f'{len(fc.similar_para_id)} similar paragraph(s)\n'
)
f'Input:\n{fc.input}\n\n'
f'{len(fc.similar_para_id)} similar paragraph(s)\n'
)

async_obj = []
answers = []
Expand All @@ -76,12 +77,11 @@ async def check_article_against_source(request: CheckRequest, semantic_similarit
"Quelle:\n"
f"{fc.paragraphs[para_id]}"
)

resp = (para_id, call_openai_lin(prompt=prompt, messages=messages, client=fc.async_client, model=fc.model))
async_obj.append(resp)

for resp in async_obj:

# wait for the asynchronous calls to finish
para_id = resp[0]
resp = await asyncio.gather(resp[1])
Expand Down Expand Up @@ -133,9 +133,11 @@ def extract_article_from_url(url):
"""
This endpoint extracts articles from html from a given url.
"""

headline, text, image_links = extract_urlnews(url)

try:
headline, text, image_links = extract_urlnews(url)
except ArticleException as e:
return json.dumps({"status": "failure", "error": f"Cannot fetch or parse the URL: {str(e)}"})

article = {
'headline': headline,
'text': text,
Expand All @@ -144,7 +146,7 @@ def extract_article_from_url(url):

logging.debug(article)
return JSONResponse(content=article)


if __name__ == '__main__':
uvicorn.run(app, host="0.0.0.0", port=3000, log_config=LOGGING_CONFIG)
uvicorn.run(app, host="0.0.0.0", port=3000, log_config=LOGGING_CONFIG)
79 changes: 79 additions & 0 deletions data/niels_result.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{"id": "QbnlpQu:0:0", "hallucination": false, "prob": 1.0}
{"id": "QbnlpQu:0:1", "hallucination": true, "prob": 1.0}
{"id": "QbnlpQu:0:2", "hallucination": true, "prob": 1.0}
{"id": "Qzej4uY:0:1", "hallucination": true, "prob": 1.0}
{"id": "Qzej4uY:0:2", "hallucination": true, "prob": 1.0}
{"id": "QpLxXjj:1:0", "hallucination": false, "prob": 1.0}
{"id": "QpLxXjj:1:1", "hallucination": true, "prob": 1.0}
{"id": "QpLxXjj:1:2", "hallucination": true, "prob": 1.0}
{"id": "Tvqzbee:7:0", "hallucination": false, "prob": 1.0}
{"id": "Tvqzbee:7:1", "hallucination": true, "prob": 1.0}
{"id": "TfuvgKU:7:0", "hallucination": false, "prob": 1.0}
{"id": "TfuvgKU:7:1", "hallucination": true, "prob": 1.0}
{"id": "TfuvgKU:7:2", "hallucination": true, "prob": 1.0}
{"id": "TzcySXe:32:0", "hallucination": true, "prob": 1.0}
{"id": "TzcySXe:32:2", "hallucination": true, "prob": 1.0}
{"id": "Tnw5jgk:1:0", "hallucination": false, "prob": 1.0}
{"id": "Tnw5jgk:1:1", "hallucination": true, "prob": 1.0}
{"id": "Tnw5jgk:1:2", "hallucination": true, "prob": 1.0}
{"id": "SuzRVgV:4:0", "hallucination": false, "prob": 1.0}
{"id": "SuzRVgV:4:1", "hallucination": true, "prob": 1.0}
{"id": "SuzRVgV:4:2", "hallucination": true, "prob": 1.0}
{"id": "SrBKTF6:1:0", "hallucination": false, "prob": 1.0}
{"id": "SrBKTF6:1:1", "hallucination": false, "prob": 1.0}
{"id": "SrBKTF6:1:2", "hallucination": true, "prob": 1.0}
{"id": "QnbiQif:0:0", "hallucination": false, "prob": 1.0}
{"id": "QnbiQif:0:1", "hallucination": true, "prob": 1.0}
{"id": "QnbiQif:0:2", "hallucination": true, "prob": 1.0}
{"id": "UDaDpiY:12:0", "hallucination": true, "prob": 1.0}
{"id": "UDaDpiY:12:1", "hallucination": true, "prob": 1.0}
{"id": "UDaDpiY:12:2", "hallucination": true, "prob": 1.0}
{"id": "QS6lXIY:4:0", "hallucination": false, "prob": 1.0}
{"id": "QS6lXIY:4:1", "hallucination": true, "prob": 1.0}
{"id": "QS6lXIY:4:2", "hallucination": true, "prob": 1.0}
{"id": "UCdY4tR:234:0", "hallucination": false, "prob": 1.0}
{"id": "UCdY4tR:234:1", "hallucination": true, "prob": 1.0}
{"id": "UCdY4tR:234:2", "hallucination": true, "prob": 1.0}
{"id": "Su6AagY:257:0", "hallucination": false, "prob": 1.0}
{"id": "Su6AagY:257:1", "hallucination": true, "prob": 1.0}
{"id": "Su6AagY:257:2", "hallucination": true, "prob": 1.0}
{"id": "UA01Aus:165:0", "hallucination": false, "prob": 1.0}
{"id": "UA01Aus:165:1", "hallucination": true, "prob": 1.0}
{"id": "UA01Aus:165:2", "hallucination": true, "prob": 1.0}
{"id": "U0l0Hpg:3:0", "hallucination": false, "prob": 1.0}
{"id": "U0l0Hpg:3:1", "hallucination": true, "prob": 1.0}
{"id": "U0l0Hpg:3:2", "hallucination": true, "prob": 1.0}
{"id": "UFgFNZn:17:0", "hallucination": false, "prob": 1.0}
{"id": "UFgFNZn:17:1", "hallucination": true, "prob": 1.0}
{"id": "UFgFNZn:17:2", "hallucination": true, "prob": 1.0}
{"id": "TdgE8xc:2:0", "hallucination": false, "prob": 1.0}
{"id": "TdgE8xc:2:1", "hallucination": true, "prob": 1.0}
{"id": "TdgE8xc:2:2", "hallucination": true, "prob": 1.0}
{"id": "UGvk4He:5:0", "hallucination": false, "prob": 1.0}
{"id": "UGvk4He:5:1", "hallucination": true, "prob": 1.0}
{"id": "UGvk4He:5:2", "hallucination": true, "prob": 1.0}
{"id": "S5NwkNc:331:0", "hallucination": false, "prob": 1.0}
{"id": "S5NwkNc:331:1", "hallucination": true, "prob": 1.0}
{"id": "Qk1Bec5:0:0", "hallucination": false, "prob": 1.0}
{"id": "Qk1Bec5:0:2", "hallucination": true, "prob": 1.0}
{"id": "U9Qq0Gg:5:0", "hallucination": false, "prob": 1.0}
{"id": "U9Qq0Gg:5:1", "hallucination": false, "prob": 1.0}
{"id": "U9Qq0Gg:5:2", "hallucination": true, "prob": 1.0}
{"id": "QeEX09Y:5:0", "hallucination": true, "prob": 1.0}
{"id": "QeEX09Y:5:1", "hallucination": true, "prob": 1.0}
{"id": "QeEX09Y:5:2", "hallucination": true, "prob": 1.0}
{"id": "SJmtsvf:493:0", "hallucination": false, "prob": 1.0}
{"id": "SJmtsvf:493:2", "hallucination": true, "prob": 1.0}
{"id": "U1tS18x:14:0", "hallucination": false, "prob": 1.0}
{"id": "U1tS18x:14:2", "hallucination": true, "prob": 1.0}
{"id": "SV1lm0V:26:0", "hallucination": true, "prob": 1.0}
{"id": "SV1lm0V:26:1", "hallucination": true, "prob": 1.0}
{"id": "SV1lm0V:26:2", "hallucination": true, "prob": 1.0}
{"id": "TvlQJW6:0:0", "hallucination": false, "prob": 1.0}
{"id": "TvlQJW6:0:1", "hallucination": true, "prob": 1.0}
{"id": "TvlQJW6:0:2", "hallucination": true, "prob": 1.0}
{"id": "UCeAsYY:6:0", "hallucination": false, "prob": 1.0}
{"id": "UCeAsYY:6:1", "hallucination": false, "prob": 1.0}
{"id": "UCeAsYY:6:2", "hallucination": true, "prob": 1.0}
{"id": "QvVovKI:2:0", "hallucination": false, "prob": 1.0}
{"id": "QvVovKI:2:2", "hallucination": true, "prob": 1.0}
21 changes: 17 additions & 4 deletions evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,26 @@
if hypotheses["hallucination"] is True and items[hypotheses["id"]]["hallucination_level"] > 0:
correct += 1
hal_detected += 1

elif hypotheses["hallucination"] is False and items[hypotheses["id"]]["hallucination_level"] == 0:
correct += 1

if hypotheses["hallucination"] is True and items[hypotheses["id"]]["hallucination_level"] == 1:
low_hal_detected += 1

print(f"Analysed {counter} files with {low_hallu} files of hallucination level 1.")
print(f"Accuracy {file}: {correct / counter}")
print(f"Detected Hallucinations {file}: {hal_detected/ hallucination}")
print(f"Level 1 Hallucinations detected {file}: {low_hal_detected/ low_hallu}")
recall = hal_detected / hallucination #wie viele der hallucinationen wurden gefunden?
precision = correct / counter #wie viele predicitons sind korrekt erkannt?

print("\n")
print(f"============================={file}=================================")
print("\n")

#print(f"Analysed {counter} files with {low_hallu} files of hallucination level 1.")
#print(f"Accuracy {file}: {correct / counter}")
#print(f"Detected Hallucinations {file}: {hal_detected/ hallucination}")
#print(f"Level 1 Hallucinations detected {file}: {low_hal_detected/ low_hallu}")
#print("\n")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F_0.5-score (precision twice as important as recall): {(1+.5**2)*(recall*precision)/((.5**2)*recall+precision)}")
print(f"F_1-score (precision as important as recall): {2*(recall*precision)/(recall+precision)}")
30 changes: 10 additions & 20 deletions src/factchecker.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@

import asyncio
# import logging
# import re
# from uuid import uuid4

# import uvicorn
# from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse
from openai import OpenAI, AsyncOpenAI

from src.config import app, LOGGING_CONFIG
from src.datastructures import GenerationRequest, CheckResponse, CheckRequest, CheckResponseItem
from src.datastructures import OpenAiModel
from src.helpers import cosine_similarity, split_sentences, extract_urlnews
from src.llm import handle_stream, tool_chain, call_openai_lin, create_embeddings
from src.prompts import system_prompt_honest, system_prompt_malicious, check_prompt, check_summary_prompt
from src.helpers import cosine_similarity, split_sentences
from src.llm import create_embeddings


class FactChecker:
Expand All @@ -23,7 +12,7 @@ def __init__(self,
client=OpenAI(),
async_client=AsyncOpenAI(),
model=OpenAiModel.gpt4mini,
semantic_similarity_threshold = .57
semantic_similarity_threshold=.57
):
self.source = source
self.input = input
Expand All @@ -32,14 +21,14 @@ def __init__(self,
self.model = model
self.semantic_similarity_threshold = semantic_similarity_threshold
self.paragraphs = self.sentences = []

self._split_text()
self._embed_sentences()
self._compare_sentence_embeddings()

self.similar_sentences = [sentence for sentence in self.sentences[:-1] if sentence['sim'] > self.semantic_similarity_threshold]
self.similar_para_id = list(set([sentence['para_id'] for sentence in self.similar_sentences]))

self.similar_sentences = [sentence for sentence in self.sentences[:-1] if
sentence['sim'] > self.semantic_similarity_threshold]
self.similar_para_id = list(set([sentence['para_id'] for sentence in self.similar_sentences]))

def _split_text(self):
# split self.source into paras and sents
Expand All @@ -52,7 +41,8 @@ def _split_text(self):

for para_id, p in enumerate(self.paragraphs):
sentence_array = split_sentences(p)
self.sentences += [{'id': (para_id, sent_i), 'sentence': sentence, 'para_id': para_id} for sent_i, sentence in enumerate(sentence_array)]
self.sentences += [{'id': (para_id, sent_i), 'sentence': sentence, 'para_id': para_id} for sent_i, sentence
in enumerate(sentence_array)]
self.sentences.append({'id': int(-1), 'sentence': self.input, 'para_id': int(-1)})

def _embed_sentences(self):
Expand All @@ -64,7 +54,7 @@ def _embed_sentences(self):

# for sentence, embedding in zip(self.sentences, embeddings):
# sentence['embedding'] = embedding

def _compare_sentence_embeddings(self):
''' Compares each sentence in list with last sentence in list
=> Input sentence must be last sentence in list!'''
Expand Down
16 changes: 6 additions & 10 deletions src/helpers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import List

import spacy
from numpy import dot
from numpy.linalg import norm

from bs4 import BeautifulSoup
from newspaper import Article
from numpy import dot
from numpy.linalg import norm

nlp = spacy.load('de_core_news_md')

Expand All @@ -18,14 +17,11 @@ def split_sentences(text) -> List[str]:
doc = nlp(text)
return [x.text for x in doc.sents]


def extract_urlnews(url) -> List[str]:
article = Article(url)

try:
article.download()
article.parse()
except:
return json.dumps({"status": "failure", "error": "Cannot fetch or parse the URL"})
article.download()
article.parse()

# Use BeautifulSoup to parse the images
soup = BeautifulSoup(article.html, 'html.parser')
Expand All @@ -40,4 +36,4 @@ def extract_urlnews(url) -> List[str]:
article_images = [img for img in article_images if
not (img.lower().endswith('.svg') or img.lower().startswith('data:image/svg+xml'))]

return article.title, article.text, article_images
return article.title, article.text, article_images

0 comments on commit 29ac310

Please sign in to comment.