Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion frontend/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ export default function Home() {
Get Started
<ArrowRight className="w-4 h-4 md:w-5 md:h-5 ml-2 md:ml-3 transition-transform duration-300 group-hover:translate-x-1" />
</Button>

<p className="mt-3 text-xs text-slate-500 dark:text-slate-400 animate-fade-in delay-600">
No sign in required. It’s completely free.
</p>

{/* Floating stats */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 md:gap-8 mt-12 md:mt-20 animate-fade-in delay-700">
{stats.map((stat, index) => (
Expand Down
94 changes: 94 additions & 0 deletions new-backend/app/modules/langgraph_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from langgraph.graph import StateGraph
from app.modules.langgraph_nodes import (
sentiment,
fact_check,
generate_perspective,
judge,
store_and_send,
error_handler
)


def build_langgraph():
graph = StateGraph()

graph.add_node(
"sentiment_analysis",
sentiment.run_sentiment
)
graph.add_node(
"fact_checking",
fact_check.run_fact_check
)
graph.add_node(
"generate_perspective",
generate_perspective.generate_perspective
)
graph.add_node(
"judge_perspective",
judge.judge_perspective
)
graph.add_node(
"store_and_send",
store_and_send.store_and_send
)
graph.add_node(
"error_handler",
error_handler
)

graph.set_entry_point(
"sentiment_analysis"
)

graph.set_conditional_edges(
"sentiment_analysis",
lambda x: (
"error_handler" if x.get("status") == "error" else "fact_checking"
)
)

graph.set_conditional_edges(
"fact_checking",
lambda x: (
"error_handler"
if x.get("status") == "error"
else "generate_perspective"
)
)

graph.set_conditional_edges(
"generate_perspective",
lambda x: (
"error_handler"
if x.get("status") == "error"
else "judge_perspective"
)
)

graph.set_conditional_edges(
"judge_perspective",
lambda state: (
"error_handler"
if state.get("status") == "error"
else (
"store_and_send"
if state.get("retries", 0) >= 3
else "generate_perspective"
)
if state.get("score", 0) < 70
else "store_and_send"
)
)
graph.set_conditional_edges(
"store_and_send",
lambda x: (
"error_handler"
if x.get("status") == "error"
else None
)
)

graph.set_finish_point("store_and_send")

return graph.compile()
Empty file.
11 changes: 11 additions & 0 deletions new-backend/app/modules/langgraph_nodes/error_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@


def error_handler(input):
print("Error detected!")
print(f"From: {input.get('error_from')}")
print(f"Message: {input.get('message')}")
Comment on lines +3 to +6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Avoid shadowing built-ins and switch to proper logging

Using the parameter name input shadows Python’s built-in input() function, which can be confusing.
Additionally, print() statements are not suitable for production logging; prefer the project’s configured logger.

-def error_handler(input):
-    print("Error detected!")
-    print(f"From: {input.get('error_from')}")
-    print(f"Message: {input.get('message')}")
+import logging
+logger = logging.getLogger(__name__)
+
+def error_handler(error_payload: dict):
+    logger.error("Error detected!")
+    logger.error("From: %s", error_payload.get("error_from"))
+    logger.error("Message: %s", error_payload.get("message"))
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def error_handler(input):
print("Error detected!")
print(f"From: {input.get('error_from')}")
print(f"Message: {input.get('message')}")
import logging
logger = logging.getLogger(__name__)
def error_handler(error_payload: dict):
logger.error("Error detected!")
logger.error("From: %s", error_payload.get("error_from"))
logger.error("Message: %s", error_payload.get("message"))
🤖 Prompt for AI Agents
In new-backend/app/modules/langgraph_nodes/error_handler.py around lines 3 to 6,
rename the parameter from 'input' to a non-built-in name like 'error_info' to
avoid shadowing Python's built-in input() function. Replace the print statements
with calls to the project's configured logger (e.g., logger.error or
logger.info) to properly log the error messages instead of printing them to
standard output.


return {"status": "stopped_due_to_error",
"from": [input.get("error_from")],
"error": [input.get("message")]
}
30 changes: 30 additions & 0 deletions new-backend/app/modules/langgraph_nodes/fact_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# web search + fact check

def search_web():
return []
Comment on lines +3 to +4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Runtime‐blocking: search_web signature mismatch

search_web is declared with no parameters, yet an argument is supplied at the call-site (line 11). Python will raise
TypeError: search_web() takes 0 positional arguments but 1 was given the first time the node runs.

Diff to align the signature with its usage and add a minimal contract:

-def search_web():
-    return []
+def search_web(query: str) -> list[dict]:
+    """
+    Placeholder web-search. Keeps the pipeline alive until a real
+    implementation is plugged in.
+    """
+    # TODO: integrate actual search provider
+    return []
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def search_web():
return []
def search_web(query: str) -> list[dict]:
"""
Placeholder web-search. Keeps the pipeline alive until a real
implementation is plugged in.
"""
# TODO: integrate actual search provider
return []
🤖 Prompt for AI Agents
In new-backend/app/modules/langgraph_nodes/fact_check.py around lines 3 to 4,
the function search_web is defined without parameters but is called with one
argument at line 11, causing a TypeError. Update the search_web function
signature to accept one parameter matching the call-site argument, and add a
minimal type hint or docstring to clarify the expected input and output.



def run_fact_check(state):
try:
text = state.get("cleaned_text")
keywords = state["keywords"]

if not text:
raise ValueError("Missing or empty 'cleaned_text' in state")
elif not keywords:
raise ValueError("Missing or empty 'keywords' in state")
Comment on lines +10 to +15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

state["keywords"] may raise KeyError and the elif after a raise is redundant

-        keywords = state["keywords"]
+        keywords = state.get("keywords", [])

After the first raise, the elif branch is unreachable; switch to a plain if for clarity.

-        if not text:
-            raise ValueError(...)
-        elif not keywords:
+        if not text:
+            raise ValueError(...)
+        if not keywords:
             raise ValueError(...)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
keywords = state["keywords"]
if not text:
raise ValueError("Missing or empty 'cleaned_text' in state")
elif not keywords:
raise ValueError("Missing or empty 'keywords' in state")
keywords = state.get("keywords", [])
if not text:
raise ValueError("Missing or empty 'cleaned_text' in state")
if not keywords:
raise ValueError("Missing or empty 'keywords' in state")
🧰 Tools
🪛 Pylint (3.3.7)

[refactor] 12-15: Unnecessary "elif" after "raise", remove the leading "el" from "elif"

(R1720)

🤖 Prompt for AI Agents
In new-backend/app/modules/langgraph_nodes/fact_check.py around lines 10 to 15,
the code accesses state["keywords"] directly which may raise a KeyError if the
key is missing, and the elif after a raise is redundant since the raise exits
the function. Fix this by using state.get("keywords") to safely access the
keywords key and replace the elif with a separate if statement for clarity.


results = search_web(text + " " + " ".join(keywords))
sources = [{"snippet": r.text, "url": r.link} for r in results]
except Exception as e:
print(f"some error occured in fact_checking:{e}")
return {
"status": "error",
"error_from": "fact_checking",
"message": f"{e}",
}
return {
**state,
"facts": sources,
"status": "success"
}
47 changes: 47 additions & 0 deletions new-backend/app/modules/langgraph_nodes/generate_perspective.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
input_variables=["text", "facts"],
template="""Given the following article:
{text}

And the following verified facts:
{facts}

Generate a reasoned opposing perspective using chain-of-thought logic.
"""
)

my_llm = "groq llm"

chain = LLMChain(prompt=prompt, llm=my_llm)

Comment on lines +16 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

my_llm is an invalid placeholder – LLMChain expects a BaseLLM instance

Passing a plain string will raise during graph compilation. Either inject a real LLM or keep the node disabled behind a feature flag.

Example fix with LangChain’s OpenAI wrapper:

-from langchain.chains import LLMChain
+from langchain.chains import LLMChain
+from langchain_openai import ChatOpenAI   # or any provider

-my_llm = "groq llm"
+my_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
my_llm = "groq llm"
chain = LLMChain(prompt=prompt, llm=my_llm)
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI # or any provider
# Replace the string placeholder with a real LLM instance
my_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
chain = LLMChain(prompt=prompt, llm=my_llm)
🤖 Prompt for AI Agents
In new-backend/app/modules/langgraph_nodes/generate_perspective.py around lines
16 to 19, the variable my_llm is assigned a string instead of a BaseLLM
instance, which causes LLMChain to fail. Replace the string assignment with an
actual LLM instance, such as LangChain's OpenAI wrapper, or conditionally
disable this node behind a feature flag to avoid runtime errors.


def generate_perspective(state):
try:
retries = state.get("retries", 0)
state["retries"] = retries + 1

text = state["cleaned_text"]
facts = state.get("facts")

if not text:
raise ValueError("Missing or empty 'cleaned_text' in state")
elif not facts:
raise ValueError("Missing or empty 'facts' in state")

facts = "\n".join([f["snippet"] for f in state["facts"]])
result = chain.run({"text": text, "facts": facts})
except Exception as e:
print(f"some error occured in generate_perspective:{e}")
return {
"status": "error",
"error_from": "generate_perspective",
"message": f"{e}",
}
return {
**state,
"perspective": result,
"status": "success"
}
23 changes: 23 additions & 0 deletions new-backend/app/modules/langgraph_nodes/judge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
def judge_perspective(state):
# Dummy scoring
try:
perspective = state.get("perspective")

if not perspective:
raise ValueError("Missing or empty 'perspective' in state")

score = 85 if "reasoned" in perspective else 40
except Exception as e:
print(f"some error occured in judge_perspetive:{e}")
return {
"status": "error",
"error_from": "judge_perspective",
"message": f"{e}",
}
return {
**state,
"score": score,
"status": "success"
}

# llm based score assignment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLM should consider factors like logic, factual alignment, coherence, and tone and return a numeric score (0–100) which can be parsed and added to the state

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly.
We will have prompt templates explaining LLM to evaluate on the basis of logic, factual alignment,coherence and give score.

27 changes: 27 additions & 0 deletions new-backend/app/modules/langgraph_nodes/sentiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from transformers import pipeline

sentiment_pipeline = pipeline("sentiment-analysis")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of using default model for sentiment analysis, consider exploring more robust models for improved sentiment analysis

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a base setup, I am going to build all the modules in detail.
I'm going to integrate robust LLMs through groq LLM APIs for each LLM related task.

Comment on lines +1 to +3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Eager pipeline initialisation will slow cold-starts and consumes ~1-2 GB RAM
Loading the HF model at import time means every worker instantiates it even if the node is never executed. Move the initialisation behind a lazy singleton or load it inside run_sentiment the first time it is called to cut cold-start latency and memory.

-from transformers import pipeline
-
-sentiment_pipeline = pipeline("sentiment-analysis")
+from transformers import pipeline
+_sentiment_pipeline = None
+
+
+def _get_pipeline():
+    global _sentiment_pipeline
+    if _sentiment_pipeline is None:          # lazy, thread-safe under GIL
+        _sentiment_pipeline = pipeline("sentiment-analysis")
+    return _sentiment_pipeline
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")
from transformers import pipeline
_sentiment_pipeline = None
def _get_pipeline():
global _sentiment_pipeline
if _sentiment_pipeline is None: # lazy, thread-safe under GIL
_sentiment_pipeline = pipeline("sentiment-analysis")
return _sentiment_pipeline
🤖 Prompt for AI Agents
In new-backend/app/modules/langgraph_nodes/sentiment.py at lines 1 to 3, the
sentiment-analysis pipeline is eagerly initialized at import time, causing high
memory usage and slow cold-starts. Refactor the code to initialize the pipeline
lazily by moving the pipeline creation inside the run_sentiment function or
implement a lazy singleton pattern so the model loads only on the first call,
reducing unnecessary resource consumption.



def run_sentiment(state):
try:
text = state.get("cleaned_text")
if not text:
raise ValueError("Missing or empty 'cleaned_text' in state")

result = sentiment_pipeline(text)[0]

return {
**state,
"sentiment": result["label"],
"sentiment_score": result["score"],
"status": "success"
}

except Exception as e:
print(f"Error in sentiment_analysis: {e}")
return {
"status": "error",
"error_from": "sentiment_analysis",
"message": str(e),
}
21 changes: 21 additions & 0 deletions new-backend/app/modules/langgraph_nodes/store_and_send.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from utils.vector_store import save_to_vector_db
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Check import path correctness

utils.vector_store is referenced as a top-level package, but all LangGraph code lives under new-backend/app. If vector_store.py sits in the same package hierarchy (e.g. app.utils.vector_store), the import will fail at runtime.

Run the quick search below to confirm the real module path and adjust accordingly:


🏁 Script executed:

#!/bin/bash
fd vector_store.py

Length of output: 58


Update the import to include the app package

The module vector_store.py lives at new-backend/app/utils/vector_store.py, so the import in
new-backend/app/modules/langgraph_nodes/store_and_send.py should reference app.utils, not utils.

• File: new-backend/app/modules/langgraph_nodes/store_and_send.py

- from utils.vector_store import save_to_vector_db
+ from app.utils.vector_store import save_to_vector_db
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from utils.vector_store import save_to_vector_db
from app.utils.vector_store import save_to_vector_db
🤖 Prompt for AI Agents
In new-backend/app/modules/langgraph_nodes/store_and_send.py at line 1, the
import statement uses 'utils.vector_store' which is incorrect because the module
resides under the 'app' package. Update the import to 'app.utils.vector_store'
to reflect the correct package hierarchy and ensure the import works at runtime.



def store_and_send(state):
# to store data in vector db
try:
save_to_vector_db({
**state
})
except Exception as e:
print(f"some error occured in store_and_send:{e}")
return {
"status": "error",
"error_from": "store_and_send",
"message": f"{e}",
}
# sending to frontend
return {
**state,
"status": "success"
}
10 changes: 10 additions & 0 deletions new-backend/app/modules/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from app.modules.scraper.extractor import Article_extractor
from app.modules.scraper.cleaner import clean_extracted_text
from app.modules.scraper.keywords import extract_keywords
from app.modules.langgraph_builder import build_langgraph
import json

# Compile once when module loads
_LANGGRAPH_WORKFLOW = build_langgraph()


def run_scraper_pipeline(url: str) -> dict:
extractor = Article_extractor(url)
Expand All @@ -21,3 +25,9 @@ def run_scraper_pipeline(url: str) -> dict:
print(json.dumps(result, indent=2, ensure_ascii=False))

return result


def run_langgraph_workflow(state: dict):
"""Execute the pre-compiled LangGraph workflow."""
result = _LANGGRAPH_WORKFLOW.invoke(state)
return result
43 changes: 40 additions & 3 deletions new-backend/app/modules/scraper/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,57 @@ def clean_extracted_text(text: str):
if not text:
return ""

# 1. Remove multiple line breaks to single line break
# 1. Removing multiple line breaks to single line break
text = re.sub(r'\n{2,}', '\n\n', text)

# 2. Remove common boilerplate patterns
# 2. Removing common boilerplate patterns
# (example: "Read more at...", "Subscribe", etc.)
boilerplate_phrases = [
r"read more at.*",
r"subscribe to.*",
r"click here to.*",
r"follow us on.*",
r"advertisement",
r"© \d{4}.*", # copyright lines
r"sponsored content",
r"promoted by.*",
r"recommended for you",
r"© \d{4}.*", # copyright lines
r"all rights reserved",
r"terms of service",
r"privacy policy",
r"cookie policy",
r"about us",
r"contact us",
r"share this article",
r"sign up for our newsletter",
r"report this ad",
r"this story was originally published.*",
r"originally appeared on.*",
r"download our app.*",
r"view comments",
r"comment below",
r"leave a comment",
r"next article",
r"previous article",
r"related articles",
r"top stories",
r"breaking news",
r"editor's picks",
r"latest news",
r"trending now",
r"this content is provided by.*",
r"image source:.*",
r"photo by.*",
r"disclaimer:.*",
r"support independent journalism.*",
r"if you enjoyed this article.*",
r"don’t miss out on.*",
r"watch the video",
r"listen to the podcast",
r"stay connected with.*",
r"visit our homepage.*",
r"post a job on.*",
r"powered by .*",
]
for pattern in boilerplate_phrases:
text = re.sub(pattern, '', text, flags=re.IGNORECASE)
Expand Down
1 change: 1 addition & 0 deletions new-backend/app/modules/scraper/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def extract_with_trafilatura(self):
return {}
result = trafilatura.extract(
downloaded,
no_fallback=True,
include_comments=False,
include_tables=False,
favor_recall=True,
Expand Down
4 changes: 3 additions & 1 deletion new-backend/app/routes/routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from fastapi import APIRouter
from pydantic import BaseModel
from app.modules.pipeline import run_scraper_pipeline
from app.modules.pipeline import run_langgraph_workflow
import json

router = APIRouter()
Expand All @@ -19,4 +20,5 @@ async def home():
async def run_pipelines(request: URlRequest):
article_text = run_scraper_pipeline(request.url)
print(json.dumps(article_text, indent=2))
return article_text
data = run_langgraph_workflow(article_text)
return data
Comment on lines +23 to +24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

State payload is incompatible with the sentiment node – will raise KeyError.

run_scraper_pipeline() returns {"cleaned_text": ..., "keywords": ...}, whereas run_sentiment() expects state["text"]. Calling the workflow with the current dict will crash on the first node.

-    data = run_langgraph_workflow(article_text)
-    return data
+    # Map scraper output to the keys expected by the LangGraph workflow
+    langgraph_state = {
+        "text": article_text["cleaned_text"],
+        "keywords": article_text["keywords"],
+    }
+    data = run_langgraph_workflow(langgraph_state)
+    return data
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
data = run_langgraph_workflow(article_text)
return data
# Map scraper output to the keys expected by the LangGraph workflow
langgraph_state = {
"text": article_text["cleaned_text"],
"keywords": article_text["keywords"],
}
data = run_langgraph_workflow(langgraph_state)
return data
🤖 Prompt for AI Agents
In new-backend/app/routes/routes.py around lines 23 to 24, the dictionary
returned by run_scraper_pipeline() uses the key "cleaned_text" but
run_sentiment() expects the key "text" in the state payload. To fix this, modify
the dictionary keys before passing the state to run_sentiment() or adjust the
output of run_scraper_pipeline() to include the key "text" instead of
"cleaned_text" so that the sentiment node receives the expected key and avoids a
KeyError.

Empty file.
Empty file.
3 changes: 3 additions & 0 deletions new-backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@ requires-python = ">=3.13"
dependencies = [
"bs4>=0.0.2",
"fastapi>=0.115.12",
"langchain>=0.3.25",
"langgraph>=0.4.8",
"logging>=0.4.9.6",
"newspaper3k>=0.2.8",
"rake-nltk>=1.0.6",
"readability-lxml>=0.8.4.1",
"requests>=2.32.3",
"trafilatura>=2.0.0",
"transformers>=4.52.4",
"uvicorn>=0.34.3",
]
Loading