Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ GROQ_API_KEY= <groq_api_key>
PINECONE_API_KEY = <your_pinecone_API_KEY>
PORT = 8000
SEARCH_KEY = <your_Google_custom_search_engine_API_key>
HF_TOKEN = <your_huggingface_access_token>
```

*Run backend:*
Expand Down
4 changes: 4 additions & 0 deletions backend/app/llm_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import os

# Default to a stable model
LLM_MODEL = os.getenv("LLM_MODEL", "llama-3.3-70b-versatile")
3 changes: 2 additions & 1 deletion backend/app/modules/bias_detection/check_bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from dotenv import load_dotenv
import json
from app.logging.logging_config import setup_logger
from app.llm_config import LLM_MODEL

logger = setup_logger(__name__)

Expand Down Expand Up @@ -61,7 +62,7 @@ def check_bias(text):
"content": (f"Give bias score to the following article \n\n{text}"),
},
],
model="gemma2-9b-it",
model=LLM_MODEL,
temperature=0.3,
max_tokens=512,
)
Expand Down
2 changes: 1 addition & 1 deletion backend/app/modules/chat/get_rag_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

load_dotenv()

pc = Pinecone(os.getenv("PINECONE_API_KEY"))
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("perspective")


Expand Down
3 changes: 2 additions & 1 deletion backend/app/modules/chat/llm_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from groq import Groq
from dotenv import load_dotenv
from app.logging.logging_config import setup_logger
from app.llm_config import LLM_MODEL

logger = setup_logger(__name__)

Expand Down Expand Up @@ -55,7 +56,7 @@ def ask_llm(question, docs):
"""

response = client.chat.completions.create(
model="gemma2-9b-it",
model=LLM_MODEL,
messages=[
{"role": "system", "content": "Use only the context to answer."},
{"role": "user", "content": prompt},
Expand Down
166 changes: 166 additions & 0 deletions backend/app/modules/fact_check_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""
fact checking tool node implementation to replace the google search

"""

import os
import json
import asyncio
from groq import Groq
from langchain_community.tools import DuckDuckGoSearchRun
from app.logging.logging_config import setup_logger
from dotenv import load_dotenv
from app.llm_config import LLM_MODEL

load_dotenv()

client = Groq(api_key=os.getenv("GROQ_API_KEY"))
search_tool = DuckDuckGoSearchRun()

logger = setup_logger(__name__)

async def extract_claims_node(state):
logger.info("--- Fact Check Step 1: Extracting Claims ---")
try:
text = state.get("cleaned_text", "")

response = await asyncio.to_thread(
client.chat.completions.create,
messages=[
{
"role": "system",
"content": "Extract specific, verifiable factual claims from the text. Ignore opinions. Return a simple list of strings, one per line."
},
{"role": "user", "content": text[:4000]}
],
model=LLM_MODEL,
temperature=0.0
)

raw_content = response.choices[0].message.content

claims = [line.strip("- *") for line in raw_content.split("\n") if len(line.strip()) > 10]

logger.info(f"Extracted {len(claims)} claims.")
return {"claims": claims}

except Exception as e:
logger.error(f"Error extraction claims: {e}")
return {"claims": []}

async def plan_searches_node(state):
logger.info("--- Fact Check Step 2: Planning Searches ---")
claims = state.get("claims", [])

if not claims:
return {"search_queries": []}

claims_text = "\n".join([f"{i}. {c}" for i, c in enumerate(claims)])

prompt = f"""
You are a search query generator.
For each claim, generate a search query to verify it.

Output MUST be valid JSON in this format:
{{
"searches": [
{{"query": "search query 1", "claim_id": 0}},
{{"query": "search query 2", "claim_id": 1}}
]
}}

Claims:
{claims_text}
"""

try:
response = await asyncio.to_thread(
client.chat.completions.create,
messages=[{"role": "user", "content": prompt}],
model=LLM_MODEL,
temperature=0.0,
response_format={"type": "json_object"}
)

plan_json = json.loads(response.choices[0].message.content)
queries = plan_json.get("searches", [])

return {"search_queries": queries}

except Exception as e:
logger.error(f"Failed to plan searches: {e}")
return {"search_queries": []}

async def execute_searches_node(state):
logger.info("--- Fact Check Step 3: Executing Parallel Searches ---")
queries = state.get("search_queries", [])

if not queries:
return {"search_results": []}

async def run_one_search(q):
try:
query_str = q.get("query")
c_id = q.get("claim_id")

res = await asyncio.to_thread(search_tool.invoke, query_str)
logger.info(f"Search Result for Claim {c_id}: {res[:200]}...")
return {"claim_id": c_id, "result": res}
except Exception as e:
return {"claim_id": q.get("claim_id"), "result": "Search failed"}

results = await asyncio.gather(*[run_one_search(q) for q in queries])

logger.info(f"Completed {len(results)} searches.")
return {"search_results": results}

async def verify_facts_node(state):
logger.info("--- Fact Check Step 4: Verifying Facts ---")
claims = state.get("claims", [])
results = state.get("search_results", [])

if not claims:
return {"facts": [], "fact_check_done": True}

context = "Verify these claims based on the search results:\n"
for item in results:
c_id = item["claim_id"]
if c_id < len(claims):
context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
Comment on lines +126 to +129
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Potential KeyError or type mismatch for claim_id.

The code assumes item["claim_id"] exists and is an integer comparable to len(claims). If the LLM returns malformed JSON or the search fails, claim_id might be missing, None, or a non-integer, causing runtime errors.

🛡️ Add defensive handling
     for item in results:
-        c_id = item["claim_id"]
-        if c_id < len(claims):
+        c_id = item.get("claim_id")
+        if c_id is not None and isinstance(c_id, int) and 0 <= c_id < len(claims):
             context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for item in results:
c_id = item["claim_id"]
if c_id < len(claims):
context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
for item in results:
c_id = item.get("claim_id")
if c_id is not None and isinstance(c_id, int) and 0 <= c_id < len(claims):
context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
🤖 Prompt for AI Agents
In @backend/app/modules/fact_check_tool.py around lines 126 - 129, The loop over
results assumes item["claim_id"] is present and an int; add defensive checks in
the for item in results loop to (1) verify "claim_id" in item, (2) coerce or
validate it as an integer (e.g., try int(item["claim_id"]) and handle
ValueError/TypeError), (3) ensure the resulting index is within range 0 <= c_id
< len(claims), and (4) skip or log malformed entries instead of using them when
building context; update references to item["claim_id"], c_id, claims, and
context accordingly so missing/invalid claim_id values cannot raise
KeyError/TypeError or index errors.


try:
response = await asyncio.to_thread(
client.chat.completions.create,
messages=[
{
"role": "system",
"content": "You are a strict fact checker. Return a JSON list of objects with keys: 'claim', 'status' (True/False/Unverified), and 'reason'."
},
{"role": "user", "content": context}
],
model=LLM_MODEL,
temperature=0.0,
response_format={"type": "json_object"}
)

final_verdict_str = response.choices[0].message.content

data = json.loads(final_verdict_str)

facts_list = []
if isinstance(data, dict):
# Look for common keys if wrapped
if "facts" in data:
facts_list = data["facts"]
elif "verified_claims" in data:
facts_list = data["verified_claims"]
else:
facts_list = [data]
elif isinstance(data, list):
facts_list = data

return {"facts": facts_list, "fact_check_done": True}

except Exception as e:
logger.error(f"Verification failed: {e}")
return {"facts": [], "fact_check_done": True}
5 changes: 3 additions & 2 deletions backend/app/modules/facts_check/llm_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import json
import re
from app.logging.logging_config import setup_logger
from app.llm_config import LLM_MODEL

logger = setup_logger(__name__)

Expand Down Expand Up @@ -63,7 +64,7 @@ def run_claim_extractor_sdk(state):
),
},
],
model="gemma2-9b-it",
model=LLM_MODEL,
temperature=0.3,
max_tokens=512,
)
Expand Down Expand Up @@ -128,7 +129,7 @@ def run_fact_verifier_sdk(search_results):
),
},
],
model="gemma2-9b-it",
model=LLM_MODEL,
temperature=0.3,
max_tokens=256,
)
Expand Down
38 changes: 18 additions & 20 deletions backend/app/modules/langgraph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
and retry logic.

Workflow:
1. Sentiment analysis on the cleaned text.
2. Fact-checking detected claims.
3. Generating a counter-perspective.
4. Judging the quality of the generated perspective.
5. Storing results and sending them downstream.
6. Error handling at any step if failures occur.
1. Parallel analysis: sentiment analysis and fact checking tool pipeline
(extract_claims -> plan_searches -> execute_searches -> verify_facts)
2. Generating a counter-perspective.
3. Judging the quality of the generated perspective.
4. Storing results and sending.
5. Error handling at any step if failures occur.

Core Features:
- Uses a TypedDict (`MyState`) to define the shape of the pipeline's
Expand All @@ -31,18 +31,18 @@
"""


from typing import List, Any
from langgraph.graph import StateGraph
from typing_extensions import TypedDict

from app.modules.langgraph_nodes import (
sentiment,
fact_check,
generate_perspective,
judge,
store_and_send,
error_handler,
)

from typing_extensions import TypedDict


class MyState(TypedDict):
cleaned_text: str
Expand All @@ -52,29 +52,26 @@ class MyState(TypedDict):
score: int
retries: int
status: str
claims: List[str]
search_queries: List[Any]
search_results: List[Any]


def build_langgraph():
graph = StateGraph(MyState)

graph.add_node("sentiment_analysis", sentiment.run_sentiment_sdk)
graph.add_node("fact_checking", fact_check.run_fact_check)
# parallel analysis runs sentiment and fact_check tool pipeline in parallel
graph.add_node("parallel_analysis", sentiment.run_parallel_analysis)

graph.add_node("generate_perspective", generate_perspective.generate_perspective)
graph.add_node("judge_perspective", judge.judge_perspective)
graph.add_node("store_and_send", store_and_send.store_and_send)
graph.add_node("error_handler", error_handler.error_handler)

graph.set_entry_point(
"sentiment_analysis",
)
graph.set_entry_point("parallel_analysis")

graph.add_conditional_edges(
"sentiment_analysis",
lambda x: ("error_handler" if x.get("status") == "error" else "fact_checking"),
)

graph.add_conditional_edges(
"fact_checking",
"parallel_analysis",
lambda x: (
"error_handler" if x.get("status") == "error" else "generate_perspective"
),
Expand All @@ -101,6 +98,7 @@ def build_langgraph():
else "store_and_send"
),
)

graph.add_conditional_edges(
"store_and_send",
lambda x: ("error_handler" if x.get("status") == "error" else "__end__"),
Expand Down
30 changes: 16 additions & 14 deletions backend/app/modules/langgraph_nodes/generate_perspective.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,23 @@


from app.utils.prompt_templates import generation_prompt
from app.llm_config import LLM_MODEL
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
from app.logging.logging_config import setup_logger

from typing import List
logger = setup_logger(__name__)


prompt = generation_prompt


class PerspectiveOutput(BaseModel):
reasoning: str = Field(..., description="Chain-of-thought reasoning steps")
reasoning: List[str] = Field(description="Chain-of-thought reasoning steps", alias="reasoning_steps")
perspective: str = Field(..., description="Generated opposite perspective")


my_llm = "llama-3.3-70b-versatile"
my_llm = LLM_MODEL

llm = ChatGroq(model=my_llm, temperature=0.7)

Expand All @@ -56,17 +57,18 @@ def generate_perspective(state):

if not text:
raise ValueError("Missing or empty 'cleaned_text' in state")
elif not facts:
raise ValueError("Missing or empty 'facts' in state")

facts_str = "\n".join(
[
f"Claim: {f['original_claim']}\n"
"Verdict: {f['verdict']}\nExplanation: "
"{f['explanation']}"
for f in state["facts"]
]
)
if not facts:
logger.warning("No facts found in state. Generating perspective based on text only.")
facts_str = "No specific claims verified."
else:
facts_str = "\n".join(
[
f"Claim: {f.get('claim', f.get('original_claim', 'Unknown Claim'))}\n"
f"Verdict: {f.get('status', f.get('verdict', 'Unknown Verdict'))}\n"
f"Explanation: {f.get('reason', f.get('explanation', 'No explanation'))}"
for f in facts
]
)

result = chain.invoke(
{
Expand Down
Loading