AOSSIE-Org · ManavSarkar · Aug 10, 2025 · Aug 1, 2025 · Aug 1, 2025 · Aug 1, 2025
diff --git a/.github/workflows/deploy-backend-to-hf.yml b/.github/workflows/deploy-backend-to-hf.yml
@@ -3,57 +3,67 @@ name: 🚀 Deploy Backend to HF Space
 on:
   push:
     branches:
-      - main # or your primary branch
+      - main
     paths:
-      - "backend/**" # only trigger when anything under backend/ changes
+      - "backend/**"
 
 jobs:
   deploy:
     runs-on: ubuntu-latest
+    # set your HF username here (or replace with a secret if you prefer)
+    env:
+      HF_USER: Thunder1245
+      HF_REPO: perspective-backend
+
     steps:
       - name: 👉 Checkout code
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
-      - name: 🔒 Install HF CLI
-        run: pip install huggingface_hub
-
-      - name: 🔑 HF login
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: huggingface-cli login --token "$HF_TOKEN"
+      - name: 🔍 Ensure HF_TOKEN is set
+        run: |
+          if [ -z "${{ secrets.HF_TOKEN }}" ]; then
+            echo "ERROR: HF_TOKEN secret is not set. Add it in repository secrets: Settings → Secrets & variables → Actions."
+            exit 1
+          fi
 
-      - name: 📂 Prepare Space repo
+      - name: 📂 Prepare Space repo (clone)
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
-          rm -rf space-backend
+          rm -rf space-backend || true
+          # clone using token in URL (this authenticates the clone)
           git clone https://Thunder1245:${HF_TOKEN}@huggingface.co/spaces/Thunder1245/perspective-backend.git space-backend
 
       - name: 📦 Install rsync
         run: |
           sudo apt-get update
           sudo apt-get install -y rsync
 
-      - name: 📤 Sync backend code
+      - name: 📤 Sync backend code to Space
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
+          set -e
+
           cd space-backend
 
-          # Only remove tracked files (preserve .git and config)
+          # Remove tracked files while preserving .git and config (ignore failure)
           git rm -r . || true
           cd ..
 
-          # Copy new backend files in
+          # Copy backend files into the cloned space directory
           cp -R backend/. space-backend/
 
-          # Push new code to HF Space
+          # Commit & push
           cd space-backend
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
           git add --all
-          git commit -m "Auto‑deploy backend: ${{ github.sha }}" || echo "No changes to commit"
+          git commit -m "Auto-deploy backend: ${{ github.sha }}" || echo "No changes to commit"
           git push origin main
 
+      - name: ✅ Done
+        run: |
+          echo "Backend deployed to Hugging Face Space: https://huggingface.co/spaces/${HF_USER}/${HF_REPO}"
diff --git a/README.md b/README.md
@@ -2,15 +2,32 @@
 ![Perspective banner](frontend/public/perspective_banner.jpg)
 
 ### Table of Contents
-- [System Overview](#system-overview)
-- [Architecture Components](#architecture-components)
-- [Technical Stack](#technical-stack)
-- [Core Features](#core-features)
-- [Data Flow & Security](#data-flow--security)
-- [Setup & Deployment](#setup--deployment)
-- [Detailed Architecture Diagram](#detailed-architecture-diagram)
-- [Expected Outcomes](#expected-outcomes)
-- [Required Skills](#required-skills)
+- [Perspective-AI](#perspective-ai)
+    - [Table of Contents](#table-of-contents)
+  - [System Overview](#system-overview)
+    - [High-Level Concept](#high-level-concept)
+  - [Architecture Components](#architecture-components)
+    - [1. Frontend Layer](#1-frontend-layer)
+    - [3. Core Backend](#3-core-backend)
+    - [4. AI \& NLP Integration](#4-ai--nlp-integration)
+    - [5. Data Storage](#5-data-storage)
+  - [Technical Stack](#technical-stack)
+    - [Frontend Technologies](#frontend-technologies)
+    - [Backend Technologies](#backend-technologies)
+    - [I Integration](#i-integration)
+  - [Core Features](#core-features)
+    - [1. Counter-Perspective Generation](#1-counter-perspective-generation)
+    - [2. Reasoned Thinking](#2-reasoned-thinking)
+    - [3. Updated Facts](#3-updated-facts)
+    - [4. Seamless Integration](#4-seamless-integration)
+    - [5. Real-Time Analysis](#5-real-time-analysis)
+  - [Data Flow \& Security](#data-flow--security)
+  - [Setup \& Deployment](#setup--deployment)
+    - [Frontend Setup](#frontend-setup)
+    - [Backend Setup](#backend-setup)
+  - [Architecture Diagram](#architecture-diagram)
+  - [Expected Outcomes](#expected-outcomes)
+  - [Required Skills](#required-skills)
- [Perspective-AI](#perspective-ai)
-    - [Table of Contents](#table-of-contents)
-  - [System Overview](#system-overview)
-    - [High-Level Concept](#high-level-concept)
-  - [Architecture Components](#architecture-components)
-    - [1. Frontend Layer](#1-frontend-layer)
-    - [3. Core Backend](#3-core-backend)
-    - [4. AI \& NLP Integration](#4-ai--nlp-integration)
-    - [5. Data Storage](#5-data-storage)
-  - [Technical Stack](#technical-stack)
-    - [Frontend Technologies](#frontend-technologies)
-    - [Backend Technologies](#backend-technologies)
-    - [I Integration](#i-integration)
-  - [Core Features](#core-features)
-    - [1. Counter-Perspective Generation](#1-counter-perspective-generation)
-    - [2. Reasoned Thinking](#2-reasoned-thinking)
-    - [3. Updated Facts](#3-updated-facts)
-    - [4. Seamless Integration](#4-seamless-integration)
-    - [5. Real-Time Analysis](#5-real-time-analysis)
-  - [Data Flow \& Security](#data-flow--security)
-  - [Setup \& Deployment](#setup--deployment)
-    - [Frontend Setup](#frontend-setup)
-    - [Backend Setup](#backend-setup)
-  - [Architecture Diagram](#architecture-diagram)
-  - [Expected Outcomes](#expected-outcomes)
-  - [Required Skills](#required-skills)
+- [Perspective-AI](#perspective-ai)
+    - [Table of Contents](#table-of-contents)
+  - [System Overview](#system-overview)
+    - [High-Level Concept](#high-level-concept)
+  - [Architecture Components](#architecture-components)
+    - [1. Frontend Layer](#1-frontend-layer)
+    - [3. Core Backend](#3-core-backend)
+    - [4. AI & NLP Integration](#4-ai--nlp-integration)
+    - [5. Data Storage](#5-data-storage)
+  - [Technical Stack](#technical-stack)
+    - [Frontend Technologies](#frontend-technologies)
+    - [Backend Technologies](#backend-technologies)
+  - [Core Features](#core-features)
+    - [1. Counter-Perspective Generation](#1-counter-perspective-generation)
+    - [2. Reasoned Thinking](#2-reasoned-thinking)
+    - [3. Updated Facts](#3-updated-facts)
+    - [4. Seamless Integration](#4-seamless-integration)
+    - [5. Real-Time Analysis](#5-real-time-analysis)
+  - [Data Flow & Security](#data-flow--security)
+  - [Setup & Deployment](#setup--deployment)
+    - [Frontend Setup](#frontend-setup)
+    - [Backend Setup](#backend-setup)
+  - [Architecture Diagram](#architecture-diagram)
+  - [Expected Outcomes](#expected-outcomes)
+  - [Required Skills](#required-skills)
- [Perspective-AI](#perspective-ai)
-    - [Table of Contents](#table-of-contents)
-  - [System Overview](#system-overview)
-    - [High-Level Concept](#high-level-concept)
-  - [Architecture Components](#architecture-components)
-    - [1. Frontend Layer](#1-frontend-layer)
-    - [3. Core Backend](#3-core-backend)
-    - [4. AI \& NLP Integration](#4-ai--nlp-integration)
-    - [5. Data Storage](#5-data-storage)
-  - [Technical Stack](#technical-stack)
-    - [Frontend Technologies](#frontend-technologies)
-    - [Backend Technologies](#backend-technologies)
-    - [I Integration](#i-integration)
-  - [Core Features](#core-features)
-    - [1. Counter-Perspective Generation](#1-counter-perspective-generation)
-    - [2. Reasoned Thinking](#2-reasoned-thinking)
-    - [3. Updated Facts](#3-updated-facts)
-    - [4. Seamless Integration](#4-seamless-integration)
-    - [5. Real-Time Analysis](#5-real-time-analysis)
-  - [Data Flow \& Security](#data-flow--security)
-  - [Setup \& Deployment](#setup--deployment)
-    - [Frontend Setup](#frontend-setup)
-    - [Backend Setup](#backend-setup)
-  - [Architecture Diagram](#architecture-diagram)
-  - [Expected Outcomes](#expected-outcomes)
-  - [Required Skills](#required-skills)
+- [Perspective-AI](#perspective-ai)
+    - [Table of Contents](#table-of-contents)
+  - [System Overview](#system-overview)
+    - [High-Level Concept](#high-level-concept)
+  - [Architecture Components](#architecture-components)
+    - [1. Frontend Layer](#1-frontend-layer)
+    - [3. Core Backend](#3-core-backend)
+    - [4. AI & NLP Integration](#4-ai--nlp-integration)
+    - [5. Data Storage](#5-data-storage)
+  - [Technical Stack](#technical-stack)
+    - [Frontend Technologies](#frontend-technologies)
+    - [Backend Technologies](#backend-technologies)
+  - [Core Features](#core-features)
+    - [1. Counter-Perspective Generation](#1-counter-perspective-generation)
+    - [2. Reasoned Thinking](#2-reasoned-thinking)
+    - [3. Updated Facts](#3-updated-facts)
+    - [4. Seamless Integration](#4-seamless-integration)
+    - [5. Real-Time Analysis](#5-real-time-analysis)
+  - [Data Flow & Security](#data-flow--security)
+  - [Setup & Deployment](#setup--deployment)
+    - [Frontend Setup](#frontend-setup)
+    - [Backend Setup](#backend-setup)
+  - [Architecture Diagram](#architecture-diagram)
+  - [Expected Outcomes](#expected-outcomes)
+  - [Required Skills](#required-skills)
 
 ---
 
@@ -137,20 +154,25 @@ npm run dev
   - add .env file in `/new-backend`directory.
   - add following environment variable in your .env file.
   ```
-  HF_TOKEN = <Your_hugging_face_access_token>
+  GROQ_API_KEY= <groq_api_key>
+PINECONE_API_KEY = <your_pinecone_API_KEY>
+PORT = 8000
+SEARCH_KEY = <your_Google_custom_search_engine_API_key>
   ```
-  GROQ_API_KEY= <groq_api_key>
-PINECONE_API_KEY = <your_pinecone_API_KEY>
-PORT = 8000
-SEARCH_KEY = <your_Google_custom_search_engine_API_key>
-  ```
+GROQ_API_KEY=<GROQ_API_KEY>
+PINECONE_API_KEY=<PINECONE_API_KEY>
+PORT=8000
+SEARCH_KEY=<GOOGLE_CSE_API_KEY>
-  GROQ_API_KEY= <groq_api_key>
-PINECONE_API_KEY = <your_pinecone_API_KEY>
-PORT = 8000
-SEARCH_KEY = <your_Google_custom_search_engine_API_key>
-  ```
+GROQ_API_KEY=<GROQ_API_KEY>
+PINECONE_API_KEY=<PINECONE_API_KEY>
+PORT=8000
+SEARCH_KEY=<GOOGLE_CSE_API_KEY>
 
 *Run backend:*
 ```bash
-cd new-backend
+cd backend
 uv sync # Creating virtual environment at: .venv
 uv run main.py #Runs the backend server
 ```
 
 ---
 
+
 ## Architecture Diagram
 
+
 ```mermaid
 graph TB
     %% Define Subgraphs with Colors and Text Styles
@@ -168,6 +190,7 @@ graph TB
         Analyzer[Content Analyzer]
         CNEngine[Counter-Narrative Engine]
         Context[Context Manager]
+
     end
 
     subgraph AI & NLP Layer
@@ -212,7 +235,7 @@ graph TB
 
 ## Required Skills
 
-- **Frontend Development**: Experience with Next.js and modern UI frameworks.
+- **Frontend Development**:  Experience with Next.js and modern UI frameworks.
 - **Backend Development**: Proficiency in Python and FastAPI.
 - **AI & NLP**: Familiarity with LangChain, Langgraph, and prompt engineering techniques.
 - **Database Management**: Knowledge of vector databases system.

diff --git a/backend/app/modules/bias_detection/__init__.py b/backend/app/modules/bias_detection/__init__.py
diff --git a/backend/app/modules/bias_detection/check_bias.py b/backend/app/modules/bias_detection/check_bias.py
@@ -0,0 +1,57 @@
+import os
+from groq import Groq
+from dotenv import load_dotenv
+import json
+
+load_dotenv()
+
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+load_dotenv()
+api_key = os.getenv("GROQ_API_KEY")
+if not api_key:
+    raise RuntimeError("GROQ_API_KEY is not set")
+client = Groq(api_key=api_key)
-client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+load_dotenv()
+api_key = os.getenv("GROQ_API_KEY")
+if not api_key:
+    raise RuntimeError("GROQ_API_KEY is not set")
+client = Groq(api_key=api_key)
+
+
+def check_bias(text):
+    try:
+        print(text)
+        print(json.dumps(text))
+
-        print(text)
-        print(json.dumps(text))
-        
+        # Consider using a structured logger at DEBUG level if needed:
+        # logger.debug("check_bias called with text length=%d", len(text or ""))
-        print(text)
-        print(json.dumps(text))
-        
+        # Consider using a structured logger at DEBUG level if needed:
+        # logger.debug("check_bias called with text length=%d", len(text or ""))
+        if not text:
+            raise ValueError("Missing or empty 'cleaned_text'")
+
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You are an assistant that checks  "
+                        "if given article is biased and give"
+                        "score to each based on biasness where 0 is lowest bias and 100 is highest bias"
+                        "Only return a number between 0 to 100 base on bias."
+                        "only return Number No Text"
+                    ),
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        "Give bias score to the following article "
+                        f"\n\n{text}"
+                    ),
+                },
+            ],
+            model="gemma2-9b-it",
+            temperature=0.3,
+            max_tokens=512,
+        )
+
+        bias_score = chat_completion.choices[0].message.content.strip()
+
+        return {
+            "bias_score": bias_score,
+            "status": "success",
+        }
-        bias_score = chat_completion.choices[0].message.content.strip()
-
-        return {
-            "bias_score": bias_score,
-            "status": "success",
-        }
+        raw = chat_completion.choices[0].message.content.strip()
+        # Extract first integer/float from the response
+        import re
+        m = re.search(r"(\d{1,3})(?:\.\d+)?", raw)
+        if not m:
+            raise ValueError(f"Model did not return a numeric score: {raw!r}")
+        score = int(m.group(1))
+        # Enforce bounds
+        score = max(0, min(100, score))
+        return {
+            "bias_score": score,
+            "status": "success",
+        }
-        bias_score = chat_completion.choices[0].message.content.strip()
-
-        return {
-            "bias_score": bias_score,
-            "status": "success",
-        }
+        raw = chat_completion.choices[0].message.content.strip()
+        # Extract first integer/float from the response
+        import re
+        m = re.search(r"(\d{1,3})(?:\.\d+)?", raw)
+        if not m:
+            raise ValueError(f"Model did not return a numeric score: {raw!r}")
+        score = int(m.group(1))
+        # Enforce bounds
+        score = max(0, min(100, score))
+        return {
+            "bias_score": score,
+            "status": "success",
+        }
+
+    except Exception as e:
+        print(f"Error in bias_detection: {e}")
+        return {
+            "status": "error",
+            "error_from": "bias_detection",
+            "message": str(e),
+        }
diff --git a/backend/app/modules/chat/__init__.py b/backend/app/modules/chat/__init__.py
diff --git a/backend/app/modules/chat/embed_query.py b/backend/app/modules/chat/embed_query.py
@@ -0,0 +1,10 @@
+from sentence_transformers import SentenceTransformer
+
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+
+
+def embed_query(query: str):
+
+    embeddings = embedder.encode(query).tolist()
+
+    return embeddings
-from sentence_transformers import SentenceTransformer
-
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-
-
-def embed_query(query: str):
-
-    embeddings = embedder.encode(query).tolist()
-
-    return embeddings
+from typing import List
+from app.modules.vector_store.embed import embedder
+
+def embed_query(query: str) -> List[float]:
+    if not query or not query.strip():
+        raise ValueError("query must be a non-empty string")
+    embedding = embedder.encode(query).tolist()
+    # Optionally: normalize if index uses cosine similarity without normalized vectors
+    return embedding
-from sentence_transformers import SentenceTransformer
-
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-
-
-def embed_query(query: str):
-
-    embeddings = embedder.encode(query).tolist()
-
-    return embeddings
+from typing import List
+from app.modules.vector_store.embed import embedder
+
+def embed_query(query: str) -> List[float]:
+    if not query or not query.strip():
+        raise ValueError("query must be a non-empty string")
+    embedding = embedder.encode(query).tolist()
+    # Optionally: normalize if index uses cosine similarity without normalized vectors
+    return embedding
diff --git a/backend/app/modules/chat/get_rag_data.py b/backend/app/modules/chat/get_rag_data.py
@@ -0,0 +1,31 @@
+from pinecone import Pinecone
+from dotenv import load_dotenv
+from app.modules.chat.embed_query import embed_query
+import os
+
+load_dotenv()
+
+pc = Pinecone(os.getenv("PINECONE_API_KEY"))
+index = pc.Index("perspective")
+
+
+def search_pinecone(query: str, top_k: int = 5):
+
+    embeddings = embed_query(query)
+
+    results = index.query(
+        vector=embeddings,
+        top_k=top_k,
+        include_metadata=True,
+        namespace="default"
+
+    )
+
+    matches = []
+    for match in results["matches"]:
+        matches.append({
+            "id": match["id"],
+            "score": match["score"],
+            "metadata": match["metadata"]
+        })
+    return matches
diff --git a/backend/app/modules/chat/llm_processing.py b/backend/app/modules/chat/llm_processing.py
@@ -0,0 +1,35 @@
+import os
+from groq import Groq
+from dotenv import load_dotenv
+
+load_dotenv()
+
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+
+
+def build_context(docs):
+
+    return "\n".join(f"{m['metadata'].get('explanation') or m['metadata'].get('reasoning', '')}"for m in docs)
+
+
+def ask_llm(question, docs):
+    context = build_context(docs)
+    print(context)
+    prompt = f"""You are an assistant that answers based on context.
+
+Context:
+{context}
+
+Question:
+{question}
+"""
+
+    response = client.chat.completions.create(
+        model="gemma2-9b-it",
+        messages=[
+            {"role": "system", "content": "Use only the context to answer."},
+            {"role": "user", "content": prompt}
+        ]
+    )
+
+    return response.choices[0].message.content
diff --git a/backend/app/modules/vector_store/embed.py b/backend/app/modules/vector_store/embed.py
@@ -28,3 +28,4 @@ def embed_chunks(chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
             "metadata": chunk["metadata"]
         })
     return vectors
+
diff --git a/backend/app/routes/routes.py b/backend/app/routes/routes.py
@@ -2,6 +2,10 @@
 from pydantic import BaseModel
 from app.modules.pipeline import run_scraper_pipeline
 from app.modules.pipeline import run_langgraph_workflow
+from app.modules.bias_detection.check_bias import check_bias
+from app.modules.chat.get_rag_data import search_pinecone
+from app.modules.chat.llm_processing import ask_llm
+import asyncio
 import json
 
 router = APIRouter()
@@ -11,14 +15,37 @@ class URlRequest(BaseModel):
     url: str
 
 
+class ChatQuery(BaseModel):
+    message: str
+
+
 @router.get("/")
 async def home():
     return {"message": "Perspective API is live!"}
 
 
+@router.post("/bias")
+async def bias_detection(request: URlRequest):
+    content = await asyncio.to_thread(run_scraper_pipeline, (request.url))
+    bias_score = await asyncio.to_thread(check_bias, (content))
+    print(bias_score)
+    return bias_score
+
+
 @router.post("/process")
 async def run_pipelines(request: URlRequest):
-    article_text = run_scraper_pipeline(request.url)
+    article_text = await asyncio.to_thread(run_scraper_pipeline, (request.url))
     print(json.dumps(article_text, indent=2))
-    data = run_langgraph_workflow(article_text)
+    data = await asyncio.to_thread(run_langgraph_workflow, (article_text))
     return data
+
+
+@router.post("/chat")
+async def answer_query(request: ChatQuery):
+
+    query = request.message
+    results = search_pinecone(query)
+    answer = ask_llm(query, results)
+    print(answer)
+
+    return {"answer": answer}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -28,3 +28,4 @@ def embed_chunks(chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
		"metadata": chunk["metadata"]
		})
		return vectors