AOSSIE-Org · khushal1512 · Dec 28, 2025 · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025
diff --git a/README.md b/README.md
@@ -167,6 +167,7 @@ GROQ_API_KEY= <groq_api_key>
 PINECONE_API_KEY = <your_pinecone_API_KEY>
 PORT = 8000
 SEARCH_KEY = <your_Google_custom_search_engine_API_key>
+HF_TOKEN = <your_huggingface_access_token>
   ```
 
 *Run backend:*

diff --git a/backend/app/llm_config.py b/backend/app/llm_config.py
@@ -0,0 +1,4 @@
+import os
+
+# Default to a stable model
+LLM_MODEL = os.getenv("LLM_MODEL", "llama-3.3-70b-versatile")
diff --git a/backend/app/modules/bias_detection/check_bias.py b/backend/app/modules/bias_detection/check_bias.py
@@ -27,6 +27,7 @@
 from dotenv import load_dotenv
 import json
 from app.logging.logging_config import setup_logger
+from app.llm_config import LLM_MODEL
 
 logger = setup_logger(__name__)
 
@@ -61,7 +62,7 @@ def check_bias(text):
                     "content": (f"Give bias score to the following article \n\n{text}"),
                 },
             ],
-            model="gemma2-9b-it",
+            model=LLM_MODEL,
             temperature=0.3,
             max_tokens=512,
         )

diff --git a/backend/app/modules/chat/get_rag_data.py b/backend/app/modules/chat/get_rag_data.py
@@ -31,7 +31,7 @@
 
 load_dotenv()
 
-pc = Pinecone(os.getenv("PINECONE_API_KEY"))
+pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
 index = pc.Index("perspective")
 
 

diff --git a/backend/app/modules/chat/llm_processing.py b/backend/app/modules/chat/llm_processing.py
@@ -27,6 +27,7 @@
 from groq import Groq
 from dotenv import load_dotenv
 from app.logging.logging_config import setup_logger
+from app.llm_config import LLM_MODEL
 
 logger = setup_logger(__name__)
 
@@ -55,7 +56,7 @@ def ask_llm(question, docs):
 """
 
     response = client.chat.completions.create(
-        model="gemma2-9b-it",
+        model=LLM_MODEL,
         messages=[
             {"role": "system", "content": "Use only the context to answer."},
             {"role": "user", "content": prompt},

diff --git a/backend/app/modules/fact_check_tool.py b/backend/app/modules/fact_check_tool.py
@@ -0,0 +1,166 @@
+"""
+fact checking tool node implementation to replace the google search
+
+"""
+
+import os
+import json
+import asyncio
+from groq import Groq
+from langchain_community.tools import DuckDuckGoSearchRun
+from app.logging.logging_config import setup_logger
+from dotenv import load_dotenv
+from app.llm_config import LLM_MODEL
+
+load_dotenv()
+
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+search_tool = DuckDuckGoSearchRun()
+
+logger = setup_logger(__name__)
+
+async def extract_claims_node(state):
+    logger.info("--- Fact Check Step 1: Extracting Claims ---")
+    try:
+        text = state.get("cleaned_text", "")
+
+        response = await asyncio.to_thread(
+            client.chat.completions.create,
+            messages=[
+                {
+                    "role": "system", 
+                    "content": "Extract specific, verifiable factual claims from the text. Ignore opinions. Return a simple list of strings, one per line."
+                },
+                {"role": "user", "content": text[:4000]}
+            ],
+            model=LLM_MODEL,
+            temperature=0.0
+        )
+
+        raw_content = response.choices[0].message.content
+
+        claims = [line.strip("- *") for line in raw_content.split("\n") if len(line.strip()) > 10]
+
+        logger.info(f"Extracted {len(claims)} claims.")
+        return {"claims": claims}
+
+    except Exception as e:
+        logger.error(f"Error extraction claims: {e}")
+        return {"claims": []}
+
+async def plan_searches_node(state):
+    logger.info("--- Fact Check Step 2: Planning Searches ---")
+    claims = state.get("claims", [])
+
+    if not claims:
+        return {"search_queries": []}
+
+    claims_text = "\n".join([f"{i}. {c}" for i, c in enumerate(claims)])
+
+    prompt = f"""
+    You are a search query generator.
+    For each claim, generate a search query to verify it.
+
+    Output MUST be valid JSON in this format:
+    {{
+        "searches": [
+            {{"query": "search query 1", "claim_id": 0}},
+            {{"query": "search query 2", "claim_id": 1}}
+        ]
+    }}
+
+    Claims:
+    {claims_text}
+    """
+
+    try:
+        response = await asyncio.to_thread(
+            client.chat.completions.create,
+            messages=[{"role": "user", "content": prompt}],
+            model=LLM_MODEL,
+            temperature=0.0,
+            response_format={"type": "json_object"}
+        )
+
+        plan_json = json.loads(response.choices[0].message.content)
+        queries = plan_json.get("searches", [])
+
+        return {"search_queries": queries}
+
+    except Exception as e:
+        logger.error(f"Failed to plan searches: {e}")
+        return {"search_queries": []}
+
+async def execute_searches_node(state):
+    logger.info("--- Fact Check Step 3: Executing Parallel Searches ---")
+    queries = state.get("search_queries", [])
+
+    if not queries:
+        return {"search_results": []}
+
+    async def run_one_search(q):
+        try:
+            query_str = q.get("query")
+            c_id = q.get("claim_id")
+
+            res = await asyncio.to_thread(search_tool.invoke, query_str)
+            logger.info(f"Search Result for Claim {c_id}: {res[:200]}...")
+            return {"claim_id": c_id, "result": res}
+        except Exception as e:
+            return {"claim_id": q.get("claim_id"), "result": "Search failed"}
+
+    results = await asyncio.gather(*[run_one_search(q) for q in queries])
+
+    logger.info(f"Completed {len(results)} searches.")
+    return {"search_results": results}
+
+async def verify_facts_node(state):
+    logger.info("--- Fact Check Step 4: Verifying Facts ---")
+    claims = state.get("claims", [])
+    results = state.get("search_results", [])
+
+    if not claims:
+        return {"facts": [], "fact_check_done": True}
+
+    context = "Verify these claims based on the search results:\n"
+    for item in results:
+        c_id = item["claim_id"]
+        if c_id < len(claims):
+            context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
-    for item in results:
-        c_id = item["claim_id"]
-        if c_id < len(claims):
-            context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
+    for item in results:
+        c_id = item.get("claim_id")
+        if c_id is not None and isinstance(c_id, int) and 0 <= c_id < len(claims):
+            context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
-    for item in results:
-        c_id = item["claim_id"]
-        if c_id < len(claims):
-            context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
+    for item in results:
+        c_id = item.get("claim_id")
+        if c_id is not None and isinstance(c_id, int) and 0 <= c_id < len(claims):
+            context += f"\nClaim: {claims[c_id]}\nEvidence: {item['result']}\n"
+
+    try:
+        response = await asyncio.to_thread(
+            client.chat.completions.create,
+            messages=[
+                {
+                    "role": "system", 
+                    "content": "You are a strict fact checker. Return a JSON list of objects with keys: 'claim', 'status' (True/False/Unverified), and 'reason'."
+                },
+                {"role": "user", "content": context}
+            ],
+            model=LLM_MODEL,
+            temperature=0.0,
+            response_format={"type": "json_object"}
+        )
+
+        final_verdict_str = response.choices[0].message.content
+
+        data = json.loads(final_verdict_str)
+
+        facts_list = []
+        if isinstance(data, dict):
+            # Look for common keys if wrapped
+            if "facts" in data:
+                facts_list = data["facts"]
+            elif "verified_claims" in data:
+                 facts_list = data["verified_claims"]
+            else:
+                facts_list = [data]
+        elif isinstance(data, list):
+            facts_list = data
+
+        return {"facts": facts_list, "fact_check_done": True}
+
+    except Exception as e:
+        logger.error(f"Verification failed: {e}")
+        return {"facts": [], "fact_check_done": True}
diff --git a/backend/app/modules/facts_check/llm_processing.py b/backend/app/modules/facts_check/llm_processing.py
@@ -29,6 +29,7 @@
 import json
 import re
 from app.logging.logging_config import setup_logger
+from app.llm_config import LLM_MODEL
 
 logger = setup_logger(__name__)
 
@@ -63,7 +64,7 @@ def run_claim_extractor_sdk(state):
                     ),
                 },
             ],
-            model="gemma2-9b-it",
+            model=LLM_MODEL,
             temperature=0.3,
             max_tokens=512,
         )
@@ -128,7 +129,7 @@ def run_fact_verifier_sdk(search_results):
                         ),
                     },
                 ],
-                model="gemma2-9b-it",
+                model=LLM_MODEL,
                 temperature=0.3,
                 max_tokens=256,
             )

diff --git a/backend/app/modules/langgraph_builder.py b/backend/app/modules/langgraph_builder.py
@@ -6,12 +6,12 @@
 and retry logic.
 
 Workflow:
-    1. Sentiment analysis on the cleaned text.
-    2. Fact-checking detected claims.
-    3. Generating a counter-perspective.
-    4. Judging the quality of the generated perspective.
-    5. Storing results and sending them downstream.
-    6. Error handling at any step if failures occur.
+    1. Parallel analysis: sentiment analysis and fact checking tool pipeline
+       (extract_claims -> plan_searches -> execute_searches -> verify_facts)
+    2. Generating a counter-perspective.
+    3. Judging the quality of the generated perspective.
+    4. Storing results and sending.
+    5. Error handling at any step if failures occur.
 
 Core Features:
     - Uses a TypedDict (`MyState`) to define the shape of the pipeline's
@@ -31,18 +31,18 @@
 """
 
 
+from typing import List, Any
 from langgraph.graph import StateGraph
+from typing_extensions import TypedDict
+
 from app.modules.langgraph_nodes import (
     sentiment,
-    fact_check,
     generate_perspective,
     judge,
     store_and_send,
     error_handler,
 )
 
-from typing_extensions import TypedDict
-
 
 class MyState(TypedDict):
     cleaned_text: str
@@ -52,29 +52,26 @@ class MyState(TypedDict):
     score: int
     retries: int
     status: str
+    claims: List[str]
+    search_queries: List[Any]
+    search_results: List[Any]
 
 
 def build_langgraph():
     graph = StateGraph(MyState)
 
-    graph.add_node("sentiment_analysis", sentiment.run_sentiment_sdk)
-    graph.add_node("fact_checking", fact_check.run_fact_check)
+    # parallel analysis runs sentiment and fact_check tool pipeline in parallel
+    graph.add_node("parallel_analysis", sentiment.run_parallel_analysis)
+
     graph.add_node("generate_perspective", generate_perspective.generate_perspective)
     graph.add_node("judge_perspective", judge.judge_perspective)
     graph.add_node("store_and_send", store_and_send.store_and_send)
     graph.add_node("error_handler", error_handler.error_handler)
 
-    graph.set_entry_point(
-        "sentiment_analysis",
-    )
+    graph.set_entry_point("parallel_analysis")
 
     graph.add_conditional_edges(
-        "sentiment_analysis",
-        lambda x: ("error_handler" if x.get("status") == "error" else "fact_checking"),
-    )
-
-    graph.add_conditional_edges(
-        "fact_checking",
+        "parallel_analysis",
         lambda x: (
             "error_handler" if x.get("status") == "error" else "generate_perspective"
         ),
@@ -101,6 +98,7 @@ def build_langgraph():
             else "store_and_send"
         ),
     )
+
     graph.add_conditional_edges(
         "store_and_send",
         lambda x: ("error_handler" if x.get("status") == "error" else "__end__"),

diff --git a/backend/app/modules/langgraph_nodes/generate_perspective.py b/backend/app/modules/langgraph_nodes/generate_perspective.py
@@ -21,22 +21,23 @@
 
 
 from app.utils.prompt_templates import generation_prompt
+from app.llm_config import LLM_MODEL
 from langchain_groq import ChatGroq
 from pydantic import BaseModel, Field
 from app.logging.logging_config import setup_logger
-
+from typing import List
 logger = setup_logger(__name__)
 
 
 prompt = generation_prompt
 
 
 class PerspectiveOutput(BaseModel):
-    reasoning: str = Field(..., description="Chain-of-thought reasoning steps")
+    reasoning: List[str] = Field(description="Chain-of-thought reasoning steps", alias="reasoning_steps")
     perspective: str = Field(..., description="Generated opposite perspective")
 
 
-my_llm = "llama-3.3-70b-versatile"
+my_llm = LLM_MODEL
 
 llm = ChatGroq(model=my_llm, temperature=0.7)
 
@@ -56,17 +57,18 @@ def generate_perspective(state):
 
         if not text:
             raise ValueError("Missing or empty 'cleaned_text' in state")
-        elif not facts:
-            raise ValueError("Missing or empty 'facts' in state")
-
-        facts_str = "\n".join(
-            [
-                f"Claim: {f['original_claim']}\n"
-                "Verdict: {f['verdict']}\nExplanation: "
-                "{f['explanation']}"
-                for f in state["facts"]
-            ]
-        )
+        if not facts:
+            logger.warning("No facts found in state. Generating perspective based on text only.")
+            facts_str = "No specific claims verified."
+        else:
+            facts_str = "\n".join(
+                [
+                    f"Claim: {f.get('claim', f.get('original_claim', 'Unknown Claim'))}\n"
+                    f"Verdict: {f.get('status', f.get('verdict', 'Unknown Verdict'))}\n"
+                    f"Explanation: {f.get('reason', f.get('explanation', 'No explanation'))}"
+                    for f in facts
+                ]
+            )
 
         result = chain.invoke(
             {