AOSSIE-Org · ManavSarkar · Jun 26, 2025 · Jun 10, 2025 · Jun 12, 2025 · Jun 12, 2025
diff --git a/frontend/app/page.tsx b/frontend/app/page.tsx
@@ -115,7 +115,10 @@ export default function Home() {
             Get Started
             <ArrowRight className="w-4 h-4 md:w-5 md:h-5 ml-2 md:ml-3 transition-transform duration-300 group-hover:translate-x-1" />
           </Button>
-
+          <p className="mt-3 text-xs text-slate-500 dark:text-slate-400 animate-fade-in delay-600">
+             No sign in required. It’s completely free.
+          </p>
+
           {/* Floating stats */}
           <div className="grid grid-cols-2 md:grid-cols-4 gap-4 md:gap-8 mt-12 md:mt-20 animate-fade-in delay-700">
             {stats.map((stat, index) => (

diff --git a/new-backend/app/modules/langgraph_builder.py b/new-backend/app/modules/langgraph_builder.py
@@ -0,0 +1,94 @@
+from langgraph.graph import StateGraph
+from app.modules.langgraph_nodes import (
+    sentiment,
+    fact_check,
+    generate_perspective,
+    judge,
+    store_and_send,
+    error_handler
+    )
+
+
+def build_langgraph():
+    graph = StateGraph()
+
+    graph.add_node(
+                    "sentiment_analysis",
+                    sentiment.run_sentiment
+                    )
+    graph.add_node(
+                    "fact_checking",
+                    fact_check.run_fact_check
+                    )
+    graph.add_node(
+                    "generate_perspective",
+                    generate_perspective.generate_perspective
+                    )
+    graph.add_node(
+                    "judge_perspective",
+                    judge.judge_perspective
+                    )
+    graph.add_node(
+                    "store_and_send",
+                    store_and_send.store_and_send
+                    )
+    graph.add_node(
+                    "error_handler",
+                    error_handler
+                    )
+
+    graph.set_entry_point(
+                    "sentiment_analysis"
+                    )
+
+    graph.set_conditional_edges(
+        "sentiment_analysis",
+        lambda x: (
+            "error_handler" if x.get("status") == "error" else "fact_checking"
+            )
+    )
+
+    graph.set_conditional_edges(
+        "fact_checking",
+        lambda x: (
+            "error_handler"
+            if x.get("status") == "error"
+            else "generate_perspective"
+            )
+    )
+
+    graph.set_conditional_edges(
+        "generate_perspective",
+        lambda x: (
+            "error_handler"
+            if x.get("status") == "error"
+            else "judge_perspective"
+            )
+    )
+
+    graph.set_conditional_edges(
+        "judge_perspective",
+        lambda state: (
+            "error_handler"
+            if state.get("status") == "error"
+            else (
+                "store_and_send"
+                if state.get("retries", 0) >= 3
+                else "generate_perspective"
+            )
+            if state.get("score", 0) < 70
+            else "store_and_send"
+            )
+    )
+    graph.set_conditional_edges(
+        "store_and_send",
+        lambda x: (
+            "error_handler"
+            if x.get("status") == "error"
+            else None
+            )
+    )
+
+    graph.set_finish_point("store_and_send")
+
+    return graph.compile()
diff --git a/new-backend/app/modules/langgraph_nodes/__init__.py b/new-backend/app/modules/langgraph_nodes/__init__.py
diff --git a/new-backend/app/modules/langgraph_nodes/error_handler.py b/new-backend/app/modules/langgraph_nodes/error_handler.py
@@ -0,0 +1,11 @@
+
+
+def error_handler(input):
+    print("Error detected!")
+    print(f"From: {input.get('error_from')}")
+    print(f"Message: {input.get('message')}")
-def error_handler(input):
-    print("Error detected!")
-    print(f"From: {input.get('error_from')}")
-    print(f"Message: {input.get('message')}")
+import logging
+logger = logging.getLogger(__name__)
+
+def error_handler(error_payload: dict):
+    logger.error("Error detected!")
+    logger.error("From: %s", error_payload.get("error_from"))
+    logger.error("Message: %s", error_payload.get("message"))
-def error_handler(input):
-    print("Error detected!")
-    print(f"From: {input.get('error_from')}")
-    print(f"Message: {input.get('message')}")
+import logging
+logger = logging.getLogger(__name__)
+
+def error_handler(error_payload: dict):
+    logger.error("Error detected!")
+    logger.error("From: %s", error_payload.get("error_from"))
+    logger.error("Message: %s", error_payload.get("message"))
+
+    return {"status": "stopped_due_to_error",
+            "from": [input.get("error_from")],
+            "error": [input.get("message")]
+            }
diff --git a/new-backend/app/modules/langgraph_nodes/fact_check.py b/new-backend/app/modules/langgraph_nodes/fact_check.py
@@ -0,0 +1,30 @@
+# web search + fact  check
+
+def search_web():
+    return []
-def search_web():
-    return []
+def search_web(query: str) -> list[dict]:
+    """
+    Placeholder web-search. Keeps the pipeline alive until a real
+    implementation is plugged in.
+    """
+    # TODO: integrate actual search provider
+    return []
-def search_web():
-    return []
+def search_web(query: str) -> list[dict]:
+    """
+    Placeholder web-search. Keeps the pipeline alive until a real
+    implementation is plugged in.
+    """
+    # TODO: integrate actual search provider
+    return []
+
+
+def run_fact_check(state):
+    try:
+        text = state.get("cleaned_text")
+        keywords = state["keywords"]
+
+        if not text:
+            raise ValueError("Missing or empty 'cleaned_text' in state")
+        elif not keywords:
+            raise ValueError("Missing or empty 'keywords' in state")
-        keywords = state["keywords"]
-
-        if not text:
-            raise ValueError("Missing or empty 'cleaned_text' in state")
-        elif not keywords:
-            raise ValueError("Missing or empty 'keywords' in state")
+        keywords = state.get("keywords", [])
+
+        if not text:
+            raise ValueError("Missing or empty 'cleaned_text' in state")
+        if not keywords:
+            raise ValueError("Missing or empty 'keywords' in state")
-        keywords = state["keywords"]
-
-        if not text:
-            raise ValueError("Missing or empty 'cleaned_text' in state")
-        elif not keywords:
-            raise ValueError("Missing or empty 'keywords' in state")
+        keywords = state.get("keywords", [])
+
+        if not text:
+            raise ValueError("Missing or empty 'cleaned_text' in state")
+        if not keywords:
+            raise ValueError("Missing or empty 'keywords' in state")
+
+        results = search_web(text + " " + " ".join(keywords))
+        sources = [{"snippet": r.text, "url": r.link} for r in results]
+    except Exception as e:
+        print(f"some error occured in fact_checking:{e}")
+        return {
+            "status": "error",
+            "error_from": "fact_checking",
+            "message": f"{e}",
+            }
+    return {
+        **state,
+        "facts": sources,
+        "status": "success"
+        }
diff --git a/new-backend/app/modules/langgraph_nodes/generate_perspective.py b/new-backend/app/modules/langgraph_nodes/generate_perspective.py
@@ -0,0 +1,47 @@
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+
+prompt = PromptTemplate(
+    input_variables=["text", "facts"],
+    template="""Given the following article:
+{text}
+
+And the following verified facts:
+{facts}
+
+Generate a reasoned opposing perspective using chain-of-thought logic.
+"""
+)
+
+my_llm = "groq llm"
+
+chain = LLMChain(prompt=prompt, llm=my_llm)
+
-my_llm = "groq llm"
-
-chain = LLMChain(prompt=prompt, llm=my_llm)
+from langchain.chains import LLMChain
+from langchain_openai import ChatOpenAI   # or any provider
+
+# Replace the string placeholder with a real LLM instance
+my_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
+
+chain = LLMChain(prompt=prompt, llm=my_llm)
-my_llm = "groq llm"
-
-chain = LLMChain(prompt=prompt, llm=my_llm)
+from langchain.chains import LLMChain
+from langchain_openai import ChatOpenAI   # or any provider
+
+# Replace the string placeholder with a real LLM instance
+my_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
+
+chain = LLMChain(prompt=prompt, llm=my_llm)
+
+def generate_perspective(state):
+    try:
+        retries = state.get("retries", 0)
+        state["retries"] = retries + 1
+
+        text = state["cleaned_text"]
+        facts = state.get("facts")
+
+        if not text:
+            raise ValueError("Missing or empty 'cleaned_text' in state")
+        elif not facts:
+            raise ValueError("Missing or empty 'facts' in state")
+
+        facts = "\n".join([f["snippet"] for f in state["facts"]])
+        result = chain.run({"text": text, "facts": facts})
+    except Exception as e:
+        print(f"some error occured in generate_perspective:{e}")
+        return {
+            "status": "error",
+            "error_from": "generate_perspective",
+            "message": f"{e}",
+        }
+    return {
+        **state,
+        "perspective": result,
+        "status": "success"
+        }
diff --git a/new-backend/app/modules/langgraph_nodes/judge.py b/new-backend/app/modules/langgraph_nodes/judge.py
@@ -0,0 +1,23 @@
+def judge_perspective(state):
+    # Dummy scoring
+    try:
+        perspective = state.get("perspective")
+
+        if not perspective:
+            raise ValueError("Missing or empty 'perspective' in state")
+
+        score = 85 if "reasoned" in perspective else 40
+    except Exception as e:
+        print(f"some error occured in judge_perspetive:{e}")
+        return {
+            "status": "error",
+            "error_from": "judge_perspective",
+            "message": f"{e}",
+            }
+    return {
+        **state,
+        "score": score,
+        "status": "success"
+        }
+
+# llm based score assignment
diff --git a/new-backend/app/modules/langgraph_nodes/sentiment.py b/new-backend/app/modules/langgraph_nodes/sentiment.py
@@ -0,0 +1,27 @@
+from transformers import pipeline
+
+sentiment_pipeline = pipeline("sentiment-analysis")
-from transformers import pipeline
-
-sentiment_pipeline = pipeline("sentiment-analysis")
+from transformers import pipeline
+
+_sentiment_pipeline = None
+
+
+def _get_pipeline():
+    global _sentiment_pipeline
+    if _sentiment_pipeline is None:          # lazy, thread-safe under GIL
+        _sentiment_pipeline = pipeline("sentiment-analysis")
+    return _sentiment_pipeline
-from transformers import pipeline
-
-sentiment_pipeline = pipeline("sentiment-analysis")
+from transformers import pipeline
+
+_sentiment_pipeline = None
+
+
+def _get_pipeline():
+    global _sentiment_pipeline
+    if _sentiment_pipeline is None:          # lazy, thread-safe under GIL
+        _sentiment_pipeline = pipeline("sentiment-analysis")
+    return _sentiment_pipeline
+
+
+def run_sentiment(state):
+    try:
+        text = state.get("cleaned_text")
+        if not text:
+            raise ValueError("Missing or empty 'cleaned_text' in state")
+
+        result = sentiment_pipeline(text)[0]
+
+        return {
+            **state,
+            "sentiment": result["label"],
+            "sentiment_score": result["score"],
+            "status": "success"
+        }
+
+    except Exception as e:
+        print(f"Error in sentiment_analysis: {e}")
+        return {
+            "status": "error",
+            "error_from": "sentiment_analysis",
+            "message": str(e),
+        }
diff --git a/new-backend/app/modules/langgraph_nodes/store_and_send.py b/new-backend/app/modules/langgraph_nodes/store_and_send.py
@@ -0,0 +1,21 @@
+from utils.vector_store import save_to_vector_db
-from utils.vector_store import save_to_vector_db
+from app.utils.vector_store import save_to_vector_db
-from utils.vector_store import save_to_vector_db
+from app.utils.vector_store import save_to_vector_db
+
+
+def store_and_send(state):
+    # to store data in vector db
+    try:
+        save_to_vector_db({
+            **state
+        })
+    except Exception as e:
+        print(f"some error occured in store_and_send:{e}")
+        return {
+            "status": "error",
+            "error_from": "store_and_send",
+            "message": f"{e}",
+        }
+    #  sending to frontend
+    return {
+        **state,
+        "status": "success"
+        }
diff --git a/new-backend/app/modules/pipeline.py b/new-backend/app/modules/pipeline.py
@@ -1,8 +1,12 @@
 from app.modules.scraper.extractor import Article_extractor
 from app.modules.scraper.cleaner import clean_extracted_text
 from app.modules.scraper.keywords import extract_keywords
+from app.modules.langgraph_builder import build_langgraph
 import json
 
+# Compile once when module loads
+_LANGGRAPH_WORKFLOW = build_langgraph()
+
 
 def run_scraper_pipeline(url: str) -> dict:
     extractor = Article_extractor(url)
@@ -21,3 +25,9 @@ def run_scraper_pipeline(url: str) -> dict:
     print(json.dumps(result, indent=2, ensure_ascii=False))
 
     return result
+
+
+def run_langgraph_workflow(state: dict):
+    """Execute the pre-compiled LangGraph workflow."""
+    result = _LANGGRAPH_WORKFLOW.invoke(state)
+    return result
diff --git a/new-backend/app/modules/scraper/cleaner.py b/new-backend/app/modules/scraper/cleaner.py
@@ -9,20 +9,57 @@ def clean_extracted_text(text: str):
     if not text:
         return ""
 
-    # 1. Remove multiple line breaks to single line break
+    # 1. Removing multiple line breaks to single line break
     text = re.sub(r'\n{2,}', '\n\n', text)
 
-    # 2. Remove common boilerplate patterns
+    # 2. Removing common boilerplate patterns
     # (example: "Read more at...", "Subscribe", etc.)
     boilerplate_phrases = [
         r"read more at.*",
         r"subscribe to.*",
         r"click here to.*",
         r"follow us on.*",
         r"advertisement",
-        r"© \d{4}.*",  # copyright lines
+        r"sponsored content",
+        r"promoted by.*",
+        r"recommended for you",
+        r"© \d{4}.*",               # copyright lines
         r"all rights reserved",
         r"terms of service",
+        r"privacy policy",
+        r"cookie policy",
+        r"about us",
+        r"contact us",
+        r"share this article",
+        r"sign up for our newsletter",
+        r"report this ad",
+        r"this story was originally published.*",
+        r"originally appeared on.*",
+        r"download our app.*",
+        r"view comments",
+        r"comment below",
+        r"leave a comment",
+        r"next article",
+        r"previous article",
+        r"related articles",
+        r"top stories",
+        r"breaking news",
+        r"editor's picks",
+        r"latest news",
+        r"trending now",
+        r"this content is provided by.*",
+        r"image source:.*",
+        r"photo by.*",
+        r"disclaimer:.*",
+        r"support independent journalism.*",
+        r"if you enjoyed this article.*",
+        r"don’t miss out on.*",
+        r"watch the video",
+        r"listen to the podcast",
+        r"stay connected with.*",
+        r"visit our homepage.*",
+        r"post a job on.*",
+        r"powered by .*",
     ]
     for pattern in boilerplate_phrases:
         text = re.sub(pattern, '', text, flags=re.IGNORECASE)

diff --git a/new-backend/app/modules/scraper/extractor.py b/new-backend/app/modules/scraper/extractor.py
@@ -37,6 +37,7 @@ def extract_with_trafilatura(self):
             return {}
         result = trafilatura.extract(
             downloaded,
+            no_fallback=True,
             include_comments=False,
             include_tables=False,
             favor_recall=True,

diff --git a/new-backend/app/routes/routes.py b/new-backend/app/routes/routes.py
@@ -1,6 +1,7 @@
 from fastapi import APIRouter
 from pydantic import BaseModel
 from app.modules.pipeline import run_scraper_pipeline
+from app.modules.pipeline import run_langgraph_workflow
 import json
 
 router = APIRouter()
@@ -19,4 +20,5 @@ async def home():
 async def run_pipelines(request: URlRequest):
     article_text = run_scraper_pipeline(request.url)
     print(json.dumps(article_text, indent=2))
-    return article_text
+    data = run_langgraph_workflow(article_text)
+    return data
-    data = run_langgraph_workflow(article_text)
-    return data
+    # Map scraper output to the keys expected by the LangGraph workflow
+    langgraph_state = {
+        "text": article_text["cleaned_text"],
+        "keywords": article_text["keywords"],
+    }
+    data = run_langgraph_workflow(langgraph_state)
+    return data
-    data = run_langgraph_workflow(article_text)
-    return data
+    # Map scraper output to the keys expected by the LangGraph workflow
+    langgraph_state = {
+        "text": article_text["cleaned_text"],
+        "keywords": article_text["keywords"],
+    }
+    data = run_langgraph_workflow(langgraph_state)
+    return data
diff --git a/new-backend/app/utils/prompt_templates.py b/new-backend/app/utils/prompt_templates.py
diff --git a/new-backend/app/utils/vector_store.py b/new-backend/app/utils/vector_store.py
diff --git a/new-backend/pyproject.toml b/new-backend/pyproject.toml
@@ -7,11 +7,14 @@ requires-python = ">=3.13"
 dependencies = [
     "bs4>=0.0.2",
     "fastapi>=0.115.12",
+    "langchain>=0.3.25",
+    "langgraph>=0.4.8",
     "logging>=0.4.9.6",
     "newspaper3k>=0.2.8",
     "rake-nltk>=1.0.6",
     "readability-lxml>=0.8.4.1",
     "requests>=2.32.3",
     "trafilatura>=2.0.0",
+    "transformers>=4.52.4",
     "uvicorn>=0.34.3",
 ]