Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python-envs.defaultEnvManager": "ms-python.python:system",
"python-envs.pythonProjects": []
}
Empty file added backend/__init__.py
Empty file.
28 changes: 28 additions & 0 deletions backend/app/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
.env
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules

# next.js
/.next/
/out/

# production
/build

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# env files
.env*

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts
38 changes: 38 additions & 0 deletions backend/app/db/mongo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
from typing import Optional

from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase


_client: Optional[AsyncIOMotorClient] = None
_db: Optional[AsyncIOMotorDatabase] = None


def get_mongo_uri() -> str:
# Expect a MongoDB connection string in environment (Atlas URI)
return os.getenv("MONGODB_URI", "mongodb://localhost:27017")


def init_mongo(app=None) -> None:
global _client, _db
if _client is None:
uri = get_mongo_uri()
_client = AsyncIOMotorClient(uri)
# default database name
db_name = os.getenv("MONGODB_DB", "perspective")
_db = _client[db_name]


def close_mongo() -> None:
global _client, _db
if _client is not None:
_client.close()
# Reset globals so future calls re-init a fresh client instead of returning closed handle
_client = None
_db = None


def get_db() -> AsyncIOMotorDatabase:
if _db is None:
init_mongo()
return _db
33 changes: 33 additions & 0 deletions backend/app/db/user_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Optional
from datetime import datetime

from app.models.user import User
from app.db.mongo import get_db


async def get_user_by_email(email: str) -> Optional[User]:
db = get_db()
doc = await db.users.find_one({"email": email})
if not doc:
return None
# convert Mongo's _id and possible datetime
if "_id" in doc:
doc["id"] = str(doc.pop("_id"))
return User(**doc)


async def create_user(user: User) -> User:
db = get_db()
existing = await db.users.find_one({"email": user.email})
if existing:
raise ValueError("User with this email already exists")
payload = user.model_dump()
# store created_at as datetime
if isinstance(payload.get("created_at"), str):
try:
payload["created_at"] = datetime.fromisoformat(payload["created_at"])
except Exception:
payload["created_at"] = datetime.utcnow()
result = await db.users.insert_one(payload)
payload["id"] = str(result.inserted_id)
return User(**payload)
1 change: 1 addition & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""User models for authentication."""
25 changes: 25 additions & 0 deletions backend/app/models/user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pydantic import BaseModel, EmailStr, Field
from typing import Optional
from datetime import datetime, timezone
from uuid import uuid4


class UserCreate(BaseModel):
name: str
email: EmailStr
password: str


class User(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4()))
name: str
email: EmailStr
hashed_password: str
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))


class UserPublic(BaseModel):
id: str
name: str
email: EmailStr
created_at: datetime
62 changes: 59 additions & 3 deletions backend/app/modules/vector_store/embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,59 @@
"""


from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
import os
import logging

embedder = SentenceTransformer("all-MiniLM-L6-v2")

_embedder = None
_model_name = os.getenv("EMBED_MODEL_NAME", "all-MiniLM-L6-v2")


def _get_embedder():
"""Lazily load the SentenceTransformer embedder. If loading fails (network/DNS),
return a deterministic fallback embedder that produces fixed-size vectors.
"""
global _embedder
if _embedder is not None:
return _embedder

try:
from sentence_transformers import SentenceTransformer

_embedder = SentenceTransformer(_model_name)
return _embedder
except Exception as exc: # pragma: no cover - defensive fallback
logging.warning(
"Failed to load SentenceTransformer '%s' (%s). Falling back to deterministic embedder.",
_model_name,
exc,
)

class _FallbackEmbedder:
def __init__(self, dim: int = 384):
self.dim = dim

def encode(self, texts: List[str]):
# deterministic hash-based vectors (not semantically meaningful)
import hashlib

out = []
for t in texts:
h = hashlib.sha256(t.encode("utf-8")).digest()
# expand/repeat to required dim and convert to floats in [-1,1]
vals = []
i = 0
while len(vals) < self.dim:
b = h[i % len(h)]
# map byte to [-1,1]
vals.append((b / 127.5) - 1.0)
i += 1
out.append(vals[: self.dim])
return out

_embedder = _FallbackEmbedder()
return _embedder


def embed_chunks(chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
Expand All @@ -40,7 +89,14 @@ def embed_chunks(chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
)

texts = [chunk["text"] for chunk in chunks]
embeddings = embedder.encode(texts).tolist()
embedder = _get_embedder()
embeddings = embedder.encode(texts)
# some embedders return numpy arrays
try:
embeddings = embeddings.tolist()
except Exception:
# assume it's already a list of lists
pass

vectors = []
for chunk, embedding in zip(chunks, embeddings):
Expand Down
60 changes: 60 additions & 0 deletions backend/app/routes/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from fastapi import APIRouter, HTTPException, status
from pydantic import BaseModel, EmailStr
from app.models.user import User, UserCreate, UserPublic
from app.db.user_store import get_user_by_email, create_user
from app.utils.auth import hash_password, verify_password, create_access_token


router = APIRouter()


class SignupRequest(UserCreate):
pass


class LoginRequest(BaseModel):
email: EmailStr
password: str


def _validate_password_strength(password: str):
if len(password) < 8:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Password must be at least 8 characters long")


@router.post("/signup")
async def signup(body: SignupRequest):
_validate_password_strength(body.password)
existing = await get_user_by_email(body.email)
if existing:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Email already registered")
user = User(name=body.name, email=body.email, hashed_password=hash_password(body.password))
user = await create_user(user)
token = create_access_token(user.email)
return {
"access_token": token,
"token_type": "bearer",
"user": UserPublic(**user.model_dump()).model_dump(),
}


@router.post("/login")
async def login(body: LoginRequest):
# Timing attack mitigation:
# Always perform a password verification step even if user does not exist.
user = await get_user_by_email(body.email)
# Pre-generated dummy hash (bcrypt_sha256 of a constant) ensures constant-time path.
# We generate it lazily to avoid import-time work.
from app.utils.auth import hash_password as _hp, verify_password as _vp # local import to avoid circularity
dummy_hash = _hp("__dummy_constant_password__")
hashed = user.hashed_password if user else dummy_hash
password_ok = _vp(body.password, hashed)
if not user or not password_ok:
# Return generic error regardless of which check failed
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid email or password")
token = create_access_token(user.email)
return {
"access_token": token,
"token_type": "bearer",
"user": UserPublic(**user.model_dump()).model_dump(),
}
9 changes: 5 additions & 4 deletions backend/app/routes/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@
"""


from fastapi import APIRouter
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from app.modules.pipeline import run_scraper_pipeline
from app.modules.pipeline import run_langgraph_workflow
from app.modules.bias_detection.check_bias import check_bias
from app.modules.chat.get_rag_data import search_pinecone
from app.modules.chat.llm_processing import ask_llm
from app.logging.logging_config import setup_logger
from app.utils.auth import get_current_user
import asyncio
import json

Expand All @@ -60,23 +61,23 @@ async def home():


@router.post("/bias")
async def bias_detection(request: URlRequest):
async def bias_detection(request: URlRequest, user=Depends(get_current_user)):
content = await asyncio.to_thread(run_scraper_pipeline, (request.url))
bias_score = await asyncio.to_thread(check_bias, (content))
logger.info(f"Bias detection result: {bias_score}")
return bias_score


@router.post("/process")
async def run_pipelines(request: URlRequest):
async def run_pipelines(request: URlRequest, user=Depends(get_current_user)):
article_text = await asyncio.to_thread(run_scraper_pipeline, (request.url))
logger.debug(f"Scraper output: {json.dumps(article_text, indent=2, ensure_ascii=False)}")
data = await asyncio.to_thread(run_langgraph_workflow, (article_text))
return data


@router.post("/chat")
async def answer_query(request: ChatQuery):
async def answer_query(request: ChatQuery, user=Depends(get_current_user)):
query = request.message
results = search_pinecone(query)
answer = ask_llm(query, results)
Expand Down
56 changes: 56 additions & 0 deletions backend/app/utils/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
from datetime import datetime, timedelta, timezone
from typing import Optional

import jwt
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from passlib.context import CryptContext

JWT_SECRET = os.getenv("JWT_SECRET", "change-me")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Critical: Weak default JWT secret exposes token forgery risk.

The default "change-me" secret is insecure and must not reach production. If JWT_SECRET is not set, tokens can be forged by attackers.

Apply this diff to fail fast when the secret is not configured:

-JWT_SECRET = os.getenv("JWT_SECRET", "change-me")
+JWT_SECRET = os.getenv("JWT_SECRET")
+if not JWT_SECRET:
+    raise ValueError("JWT_SECRET environment variable must be set")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
JWT_SECRET = os.getenv("JWT_SECRET", "change-me")
JWT_SECRET = os.getenv("JWT_SECRET")
if not JWT_SECRET:
raise ValueError("JWT_SECRET environment variable must be set")
🤖 Prompt for AI Agents
In backend/app/utils/auth.py around line 10 the default JWT_SECRET is set to the
insecure string "change-me"; replace this behavior so the app fails fast when
JWT_SECRET is not provided: remove the insecure default, read JWT_SECRET from
environment, and if it's missing or empty raise an exception (or call sys.exit
with a clear error log) during startup so the process will not start with an
insecure secret; ensure any tests or startup scripts set JWT_SECRET accordingly
and include a descriptive error message mentioning the missing JWT_SECRET.

ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "120"))

pwd_context = CryptContext(schemes=["bcrypt_sha256"], deprecated="auto")
bearer_scheme = HTTPBearer(auto_error=True)


def hash_password(password: str) -> str:
# passlib's bcrypt_sha256 handles long passwords safely.
return pwd_context.hash(password)


def verify_password(password: str, hashed: str) -> bool:
return pwd_context.verify(password, hashed)


def create_access_token(subject: str) -> str:
now = datetime.now(timezone.utc)
payload = {
"sub": subject,
"iat": int(now.timestamp()),
"exp": int((now + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)).timestamp()),
}
return jwt.encode(payload, JWT_SECRET, algorithm=ALGORITHM)


def decode_token(token: str) -> dict:
try:
return jwt.decode(token, JWT_SECRET, algorithms=[ALGORITHM])
except jwt.ExpiredSignatureError:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expired")
except jwt.PyJWTError:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token")


async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)) -> dict:
token = credentials.credentials
payload = decode_token(token)
from app.db.user_store import get_user_by_email # local import to avoid circulars
email = payload.get("sub")
if not email:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token payload")
user = await get_user_by_email(email)
if not user:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="User no longer exists")
return {"email": email}
Loading