From 3238449ae9e3c6927efafd45bfdbf5e7bdf06dd7 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:21:15 +0530 Subject: [PATCH 01/10] [refactor]: restructure agents system architecture --- backend/app/agents/__init__.py | 4 +-- backend/app/agents/{shared => }/base_agent.py | 0 .../{shared => }/classification_router.py | 0 backend/app/agents/devrel/__init__.py | 1 + backend/app/agents/devrel/agent.py | 18 +++++----- .../{nodes => }/generate_response_node.py | 24 +++---------- ...ther_context_node.py => gather_context.py} | 3 +- .../{handle_faq_node.py => handlers/faq.py} | 2 +- .../onboarding.py} | 2 +- .../technical_support.py} | 2 +- .../nodes/{ => handlers}/user_support.py | 0 .../web_search.py} | 35 ++++++++++++++++--- .../app/agents/devrel/nodes/human_in_loop.py | 0 ...summarization_node.py => summarization.py} | 15 ++++---- backend/app/agents/shared/__init__.py | 0 .../app/agents/shared/response_coordinator.py | 0 backend/app/agents/{shared => }/state.py | 0 17 files changed, 58 insertions(+), 48 deletions(-) rename backend/app/agents/{shared => }/base_agent.py (100%) rename backend/app/agents/{shared => }/classification_router.py (100%) rename backend/app/agents/devrel/{nodes => }/generate_response_node.py (75%) rename backend/app/agents/devrel/nodes/{gather_context_node.py => gather_context.py} (90%) rename backend/app/agents/devrel/nodes/{handle_faq_node.py => handlers/faq.py} (94%) rename backend/app/agents/devrel/nodes/{handle_onboarding_node.py => handlers/onboarding.py} (91%) rename backend/app/agents/devrel/nodes/{handle_technical_support_node.py => handlers/technical_support.py} (91%) rename backend/app/agents/devrel/nodes/{ => handlers}/user_support.py (100%) rename backend/app/agents/devrel/nodes/{handle_web_search_node.py => handlers/web_search.py} (54%) delete mode 100644 backend/app/agents/devrel/nodes/human_in_loop.py rename backend/app/agents/devrel/nodes/{summarization_node.py => summarization.py} (93%) delete mode 100644 backend/app/agents/shared/__init__.py delete mode 100644 backend/app/agents/shared/response_coordinator.py rename backend/app/agents/{shared => }/state.py (100%) diff --git a/backend/app/agents/__init__.py b/backend/app/agents/__init__.py index f846d24e..a48cce4b 100644 --- a/backend/app/agents/__init__.py +++ b/backend/app/agents/__init__.py @@ -1,6 +1,6 @@ from .devrel.agent import DevRelAgent -from .shared.base_agent import BaseAgent, AgentState -from .shared.classification_router import ClassificationRouter +from .base_agent import BaseAgent, AgentState +from .classification_router import ClassificationRouter __all__ = [ "DevRelAgent", diff --git a/backend/app/agents/shared/base_agent.py b/backend/app/agents/base_agent.py similarity index 100% rename from backend/app/agents/shared/base_agent.py rename to backend/app/agents/base_agent.py diff --git a/backend/app/agents/shared/classification_router.py b/backend/app/agents/classification_router.py similarity index 100% rename from backend/app/agents/shared/classification_router.py rename to backend/app/agents/classification_router.py diff --git a/backend/app/agents/devrel/__init__.py b/backend/app/agents/devrel/__init__.py index e69de29b..8b137891 100644 --- a/backend/app/agents/devrel/__init__.py +++ b/backend/app/agents/devrel/__init__.py @@ -0,0 +1 @@ + diff --git a/backend/app/agents/devrel/agent.py b/backend/app/agents/devrel/agent.py index c94fe211..d3733db3 100644 --- a/backend/app/agents/devrel/agent.py +++ b/backend/app/agents/devrel/agent.py @@ -4,18 +4,18 @@ from langgraph.graph import StateGraph, END from langchain_google_genai import ChatGoogleGenerativeAI from langgraph.checkpoint.memory import InMemorySaver -from ..shared.base_agent import BaseAgent, AgentState -from ..shared.classification_router import MessageCategory +from ..base_agent import BaseAgent, AgentState +from ..classification_router import MessageCategory from .tools.search_tool import TavilySearchTool from .tools.faq_tool import FAQTool from app.core.config import settings -from .nodes.gather_context_node import gather_context_node -from .nodes.handle_faq_node import handle_faq_node -from .nodes.handle_web_search_node import handle_web_search_node -from .nodes.handle_technical_support_node import handle_technical_support_node -from .nodes.handle_onboarding_node import handle_onboarding_node -from .nodes.generate_response_node import generate_response_node -from .nodes.summarization_node import check_summarization_needed, summarize_conversation_node, store_summary_to_database +from .nodes.gather_context import gather_context_node +from .nodes.handlers.faq import handle_faq_node +from .nodes.handlers.web_search import handle_web_search_node +from .nodes.handlers.technical_support import handle_technical_support_node +from .nodes.handlers.onboarding import handle_onboarding_node +from .generate_response_node import generate_response_node +from .nodes.summarization import check_summarization_needed, summarize_conversation_node, store_summary_to_database logger = logging.getLogger(__name__) diff --git a/backend/app/agents/devrel/nodes/generate_response_node.py b/backend/app/agents/devrel/generate_response_node.py similarity index 75% rename from backend/app/agents/devrel/nodes/generate_response_node.py rename to backend/app/agents/devrel/generate_response_node.py index 503ebb95..a83bcb5f 100644 --- a/backend/app/agents/devrel/nodes/generate_response_node.py +++ b/backend/app/agents/devrel/generate_response_node.py @@ -1,28 +1,12 @@ import logging from typing import Dict, Any -from app.agents.shared.state import AgentState +from app.agents.state import AgentState from langchain_core.messages import HumanMessage -from ..prompts.base_prompt import GENERAL_LLM_RESPONSE_PROMPT +from .prompts.base_prompt import GENERAL_LLM_RESPONSE_PROMPT +from .nodes.handlers.web_search import create_search_response logger = logging.getLogger(__name__) -async def _create_search_response(task_result: Dict[str, Any]) -> str: - """Create a response string from search results.""" - query = task_result.get("query") - results = task_result.get("results", []) - if not results: - return f"I couldn't find any information for '{query}'. You might want to try rephrasing your search." - - response_parts = [f"Here's what I found for '{query}':"] - for i, result in enumerate(results[:3]): - title = result.get('title', 'N/A') - snippet = result.get('snippet', 'N/A') - url = result.get('url', '#') - result_line = f"{i+1}. {title}: {snippet}" - response_parts.append(result_line) - response_parts.append(f" (Source: {url})") - response_parts.append("You can ask me to search again with a different query if these aren't helpful.") - return "\n".join(response_parts) async def _create_llm_response(state: AgentState, task_result: Dict[str, Any], llm) -> str: """Generate a response using the LLM based on the current state and task result.""" @@ -89,7 +73,7 @@ async def generate_response_node(state: AgentState, llm) -> dict: if task_result.get("type") == "faq": final_response = task_result.get("response", "I don't have a specific answer for that question.") elif task_result.get("type") == "web_search": - final_response = await _create_search_response(task_result) + final_response = create_search_response(task_result) else: final_response = await _create_llm_response(state, task_result, llm) diff --git a/backend/app/agents/devrel/nodes/gather_context_node.py b/backend/app/agents/devrel/nodes/gather_context.py similarity index 90% rename from backend/app/agents/devrel/nodes/gather_context_node.py rename to backend/app/agents/devrel/nodes/gather_context.py index f7fbc8e3..0a33de68 100644 --- a/backend/app/agents/devrel/nodes/gather_context_node.py +++ b/backend/app/agents/devrel/nodes/gather_context.py @@ -1,7 +1,6 @@ import logging from datetime import datetime -from app.agents.shared.state import AgentState -from app.agents.shared.classification_router import MessageCategory +from app.agents.state import AgentState logger = logging.getLogger(__name__) diff --git a/backend/app/agents/devrel/nodes/handle_faq_node.py b/backend/app/agents/devrel/nodes/handlers/faq.py similarity index 94% rename from backend/app/agents/devrel/nodes/handle_faq_node.py rename to backend/app/agents/devrel/nodes/handlers/faq.py index e6b2aaec..8855c323 100644 --- a/backend/app/agents/devrel/nodes/handle_faq_node.py +++ b/backend/app/agents/devrel/nodes/handlers/faq.py @@ -1,5 +1,5 @@ import logging -from app.agents.shared.state import AgentState +from app.agents.state import AgentState logger = logging.getLogger(__name__) diff --git a/backend/app/agents/devrel/nodes/handle_onboarding_node.py b/backend/app/agents/devrel/nodes/handlers/onboarding.py similarity index 91% rename from backend/app/agents/devrel/nodes/handle_onboarding_node.py rename to backend/app/agents/devrel/nodes/handlers/onboarding.py index 3f63d65b..86bba563 100644 --- a/backend/app/agents/devrel/nodes/handle_onboarding_node.py +++ b/backend/app/agents/devrel/nodes/handlers/onboarding.py @@ -1,5 +1,5 @@ import logging -from app.agents.shared.state import AgentState +from app.agents.state import AgentState logger = logging.getLogger(__name__) diff --git a/backend/app/agents/devrel/nodes/handle_technical_support_node.py b/backend/app/agents/devrel/nodes/handlers/technical_support.py similarity index 91% rename from backend/app/agents/devrel/nodes/handle_technical_support_node.py rename to backend/app/agents/devrel/nodes/handlers/technical_support.py index edb672c8..2d4414e8 100644 --- a/backend/app/agents/devrel/nodes/handle_technical_support_node.py +++ b/backend/app/agents/devrel/nodes/handlers/technical_support.py @@ -1,5 +1,5 @@ import logging -from app.agents.shared.state import AgentState +from app.agents.state import AgentState logger = logging.getLogger(__name__) diff --git a/backend/app/agents/devrel/nodes/user_support.py b/backend/app/agents/devrel/nodes/handlers/user_support.py similarity index 100% rename from backend/app/agents/devrel/nodes/user_support.py rename to backend/app/agents/devrel/nodes/handlers/user_support.py diff --git a/backend/app/agents/devrel/nodes/handle_web_search_node.py b/backend/app/agents/devrel/nodes/handlers/web_search.py similarity index 54% rename from backend/app/agents/devrel/nodes/handle_web_search_node.py rename to backend/app/agents/devrel/nodes/handlers/web_search.py index 8fbd8c1f..db7cd9cc 100644 --- a/backend/app/agents/devrel/nodes/handle_web_search_node.py +++ b/backend/app/agents/devrel/nodes/handlers/web_search.py @@ -1,12 +1,15 @@ import logging -from app.agents.shared.state import AgentState +from typing import Dict, Any +from app.agents.state import AgentState from langchain_core.messages import HumanMessage -from ..prompts.search_prompt import EXTRACT_SEARCH_QUERY_PROMPT +from ...prompts.search_prompt import EXTRACT_SEARCH_QUERY_PROMPT logger = logging.getLogger(__name__) async def _extract_search_query(message: str, llm) -> str: - """Extract a concise search query from the user's message.""" + """ + Extract a concise search query from the user's message by invoking the LLM. + """ logger.info(f"Extracting search query from: {message[:100]}") try: prompt = EXTRACT_SEARCH_QUERY_PROMPT.format(message=message) @@ -19,7 +22,9 @@ async def _extract_search_query(message: str, llm) -> str: return search_query async def handle_web_search_node(state: AgentState, search_tool, llm) -> dict: - """Handle web search requests""" + """ + Handle web search requests + """ logger.info(f"Handling web search for session {state.session_id}") latest_message = "" @@ -41,3 +46,25 @@ async def handle_web_search_node(state: AgentState, search_tool, llm) -> dict: "tools_used": ["tavily_search"], "current_task": "web_search_handled" } + +def create_search_response(task_result: Dict[str, Any]) -> str: + """ + Create a user-friendly response string from search results. + """ + query = task_result.get("query") + results = task_result.get("results", []) + + if not results: + return f"I couldn't find any information for '{query}'. You might want to try rephrasing your search." + + response_parts = [f"Here's what I found for '{query}':"] + for i, result in enumerate(results[:5]): + title = result.get('title', 'N/A') + snippet = result.get('snippet', 'N/A') + url = result.get('url', '#') + result_line = f"{i+1}. {title}: {snippet}" + response_parts.append(result_line) + response_parts.append(f" (Source: {url})") + + response_parts.append("You can ask me to search again with a different query if these aren't helpful.") + return "\n".join(response_parts) diff --git a/backend/app/agents/devrel/nodes/human_in_loop.py b/backend/app/agents/devrel/nodes/human_in_loop.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/app/agents/devrel/nodes/summarization_node.py b/backend/app/agents/devrel/nodes/summarization.py similarity index 93% rename from backend/app/agents/devrel/nodes/summarization_node.py rename to backend/app/agents/devrel/nodes/summarization.py index 0219fc50..10281a2f 100644 --- a/backend/app/agents/devrel/nodes/summarization_node.py +++ b/backend/app/agents/devrel/nodes/summarization.py @@ -1,7 +1,7 @@ import logging from datetime import datetime, timedelta from typing import Dict, Any -from app.agents.shared.state import AgentState +from app.agents.state import AgentState from langchain_core.messages import HumanMessage from app.agents.devrel.prompts.summarization_prompt import CONVERSATION_SUMMARY_PROMPT @@ -12,7 +12,9 @@ THREAD_TIMEOUT_HOURS = 1 async def check_summarization_needed(state: AgentState) -> Dict[str, Any]: - """Check if summarization is needed and update interaction count""" + """ + Check if summarization is needed and update interaction count + """ current_count = getattr(state, 'interaction_count', 0) new_count = current_count + 1 @@ -46,14 +48,15 @@ async def check_summarization_needed(state: AgentState) -> Dict[str, Any]: return updates async def summarize_conversation_node(state: AgentState, llm) -> Dict[str, Any]: - """Summarize the conversation and update the state""" + """ + Summarize the conversation and update the state + """ logger.info(f"Summarizing conversation for session {state.session_id}") try: current_count = state.interaction_count logger.info(f"Summarizing at interaction count: {current_count}") - # Get the recent messages all_messages = state.messages if not all_messages: @@ -66,7 +69,6 @@ async def summarize_conversation_node(state: AgentState, llm) -> Dict[str, Any]: for msg in all_messages ]) - # Create prompt existing_summary = state.conversation_summary if not existing_summary or existing_summary == "This is the beginning of our conversation.": existing_summary = "No previous summary - this is the start of our conversation tracking." @@ -85,11 +87,9 @@ async def summarize_conversation_node(state: AgentState, llm) -> Dict[str, Any]: logger.info(f"Generating summary with {len(all_messages)} messages, " f"conversation text length: {len(conversation_text)}") - # Generate summary response = await llm.ainvoke([HumanMessage(content=prompt)]) new_summary = response.content.strip() - # Extract key topics from summary new_topics = await _extract_key_topics(new_summary, llm) logger.info(f"Conversation summarized successfully for session {state.session_id}") @@ -121,7 +121,6 @@ async def _extract_key_topics(summary: str, llm) -> list[str]: response = await llm.ainvoke([HumanMessage(content=topic_prompt)]) topics_text = response.content.strip() - # Parse topics from response topics = [topic.strip() for topic in topics_text.split(',') if topic.strip()] return topics[:5] # Limiting to 5 topics diff --git a/backend/app/agents/shared/__init__.py b/backend/app/agents/shared/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/app/agents/shared/response_coordinator.py b/backend/app/agents/shared/response_coordinator.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/app/agents/shared/state.py b/backend/app/agents/state.py similarity index 100% rename from backend/app/agents/shared/state.py rename to backend/app/agents/state.py From a6ee0959fd05e63b61260b9781750d911cb63f26 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:21:57 +0530 Subject: [PATCH 02/10] =?UTF-8?q?[refactor]:=20restructure=20database=20la?= =?UTF-8?q?yer=20(db=20=E2=86=92=20database)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/database/__init__.py | 0 backend/app/database/supabase/__init__.py | 0 .../supabase/client.py} | 0 .../database/supabase/scripts/create_db.sql | 107 ++++++ .../database/supabase/scripts/populate_db.sql | 127 +++++++ backend/app/database/weaviate/__init__.py | 0 .../weaviate/client.py} | 0 .../weaviate/operations.py} | 4 +- .../app/database/weaviate/scripts/__init__.py | 0 .../weaviate/scripts/create_schemas.py | 57 ++++ .../database/weaviate/scripts/populate_db.py | 306 +++++++++++++++++ backend/app/db/supabase/auth.py | 49 --- backend/app/db/supabase/users_service.py | 203 ------------ backend/app/db/weaviate/user_profiling.py | 310 ------------------ 14 files changed, 599 insertions(+), 564 deletions(-) create mode 100644 backend/app/database/__init__.py create mode 100644 backend/app/database/supabase/__init__.py rename backend/app/{db/supabase/supabase_client.py => database/supabase/client.py} (100%) create mode 100644 backend/app/database/supabase/scripts/create_db.sql create mode 100644 backend/app/database/supabase/scripts/populate_db.sql create mode 100644 backend/app/database/weaviate/__init__.py rename backend/app/{db/weaviate/weaviate_client.py => database/weaviate/client.py} (100%) rename backend/app/{db/weaviate/weaviate_operations.py => database/weaviate/operations.py} (97%) create mode 100644 backend/app/database/weaviate/scripts/__init__.py create mode 100644 backend/app/database/weaviate/scripts/create_schemas.py create mode 100644 backend/app/database/weaviate/scripts/populate_db.py delete mode 100644 backend/app/db/supabase/auth.py delete mode 100644 backend/app/db/supabase/users_service.py delete mode 100644 backend/app/db/weaviate/user_profiling.py diff --git a/backend/app/database/__init__.py b/backend/app/database/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/database/supabase/__init__.py b/backend/app/database/supabase/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/db/supabase/supabase_client.py b/backend/app/database/supabase/client.py similarity index 100% rename from backend/app/db/supabase/supabase_client.py rename to backend/app/database/supabase/client.py diff --git a/backend/app/database/supabase/scripts/create_db.sql b/backend/app/database/supabase/scripts/create_db.sql new file mode 100644 index 00000000..8ccd2df3 --- /dev/null +++ b/backend/app/database/supabase/scripts/create_db.sql @@ -0,0 +1,107 @@ +-- Drop existing tables if they exist +DROP TABLE IF EXISTS conversation_context; +DROP TABLE IF EXISTS interactions; +DROP TABLE IF EXISTS repositories; +DROP TABLE IF EXISTS users; + +-- Table: users +CREATE TABLE users ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + -- The email is optional to allow social-only sign-ups, but must be unique if provided. + email TEXT UNIQUE, + + -- Social IDs + discord_id TEXT UNIQUE, + discord_username TEXT, + github_id TEXT UNIQUE, + github_username TEXT, + slack_id TEXT UNIQUE, + slack_username TEXT, + + display_name TEXT NOT NULL, + avatar_url TEXT, + bio TEXT, + location TEXT, + + -- Verification fields to manage the GitHub linking flow. + is_verified BOOLEAN NOT NULL DEFAULT false, + verification_token TEXT UNIQUE, + verification_token_expires_at TIMESTAMPTZ, + verified_at TIMESTAMPTZ, + + skills JSONB, + github_stats JSONB, + + last_active_discord TIMESTAMPTZ, + last_active_github TIMESTAMPTZ, + last_active_slack TIMESTAMPTZ, + + total_interactions_count INTEGER NOT NULL DEFAULT 0, + preferred_languages TEXT[] +); + +-- Create index for efficient cleanup queries +CREATE INDEX IF NOT EXISTS idx_users_verification_token_expires_at +ON users(verification_token_expires_at) +WHERE verification_token_expires_at IS NOT NULL; + +-- Create index for efficient verification queries +CREATE INDEX IF NOT EXISTS idx_users_discord_verification +ON users(discord_id, verification_token) +WHERE verification_token IS NOT NULL; + +-- Table: repositories +CREATE TABLE repositories ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + github_id BIGINT UNIQUE NOT NULL, + full_name TEXT NOT NULL, + name TEXT NOT NULL, + owner TEXT NOT NULL, + description TEXT, + stars_count INTEGER NOT NULL DEFAULT 0, + forks_count INTEGER NOT NULL DEFAULT 0, + open_issues_count INTEGER NOT NULL DEFAULT 0, + languages_used TEXT[], + topics TEXT[], + is_indexed BOOLEAN NOT NULL DEFAULT false, + indexed_at TIMESTAMPTZ, + indexing_status TEXT, + last_commit_hash TEXT +); + +-- Table: interactions +CREATE TABLE interactions ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + repository_id UUID REFERENCES repositories(id) ON DELETE SET NULL, + platform TEXT NOT NULL, + platform_specific_id TEXT NOT NULL, + channel_id TEXT, + thread_id TEXT, + content TEXT, + interaction_type TEXT, + sentiment_score FLOAT, + intent_classification TEXT, + topics_discussed TEXT[], + metadata JSONB +); + +-- Table: conversation_contexts +CREATE TABLE conversation_context ( + id UUID PRIMARY KEY NOT NULL, + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + platform TEXT NOT NULL, + memory_thread_id TEXT NOT NULL UNIQUE, + conversation_summary TEXT, + key_topics TEXT[], + total_interactions INTEGER, + session_start_time TIMESTAMPTZ, + session_end_time TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); diff --git a/backend/app/database/supabase/scripts/populate_db.sql b/backend/app/database/supabase/scripts/populate_db.sql new file mode 100644 index 00000000..a3d3ee5d --- /dev/null +++ b/backend/app/database/supabase/scripts/populate_db.sql @@ -0,0 +1,127 @@ +-- Users +insert into + users ( + id, created_at, updated_at, email, discord_id, discord_username, + github_id, github_username, slack_id, slack_username, display_name, + avatar_url, bio, location, is_verified, verification_token, + verification_token_expires_at, verified_at, skills, github_stats, + last_active_discord, last_active_github, last_active_slack, + total_interactions_count, preferred_languages + ) +values + ( + '6afc59e3-18b7-4182-b42c-8210d1152b07', '2025-05-05 03:56:41', '2025-01-22 14:50:25', + 'blakeerik@yahoo.com', '3eb13b9046684257', 'donaldgarcia', '16419f828b9d4434', 'fjohnson', + '9a1de644815e46d1', 'hoffmanjennifer', 'Jennifer Cole', 'https://dummyimage.com/696x569', + 'Bill here grow gas enough analysis. Movie win her need stop peace technology.', 'East Steven', + true, null, null, '2025-05-14 15:04:01', + '{"skills": ["Python", "C++", "Java"]}'::jsonb, '{"commits": 300}'::jsonb, + '2025-04-19 03:34:26', '2025-02-12 15:28:51', '2025-05-13 22:32:01', 28, + array['JavaScript', 'C++'] + ), + ( + '6f990423-0d57-4c64-b191-17e53f39c799', '2025-01-11 20:41:23', '2025-02-14 11:26:28', + 'jeffrey28@yahoo.com', '50c187fcce174b4e', 'nadams', 'e059a0ee9132463e', 'jason76', + '757750a9a49140b2', 'josephwright', 'Deborah Richards', 'https://www.lorempixel.com/186/96', + 'Civil quite others his other life edge network. Quite boy those.', 'Kathrynside', + true, null, null, '2025-01-01 02:39:54', + '{"skills": ["C++", "TypeScript", "Rust"]}'::jsonb, '{"commits": 139}'::jsonb, + '2025-04-27 07:17:02', '2025-03-04 22:40:36', '2025-04-05 21:04:03', 75, + array['Go', 'Python'] + ), + ( + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', '2025-03-01 17:07:10', '2025-02-16 11:55:43', + 'samuel87@gmail.com', '913e4de2e0c54cb8', 'millertodd', '885f6e66c2b642c5', 'davidalvarez', + '8715a10343da4043', 'ibrandt', 'Melissa Marquez', 'https://www.lorempixel.com/507/460', + 'Open discover detail. Remain arrive attack all. Audience draw protect Democrat car very.', 'Stevenland', + false, 'db20a56e-dc81-4fe7-8eda-8bbb71710434', '2025-06-21 12:00:00', null, + '{"skills": ["Python", "JavaScript", "C++"]}'::jsonb, '{"commits": 567}'::jsonb, + '2025-01-20 00:17:15', '2025-01-10 19:45:31', '2025-05-07 15:12:55', 77, + array['Python', 'Rust'] + ); + +-- Repositories +insert into + repositories ( + id, created_at, updated_at, github_id, full_name, name, owner, description, + stars_count, forks_count, open_issues_count, languages_used, topics, is_indexed, + indexed_at, indexing_status, last_commit_hash + ) +values + ( + 'f6b0bff9-074d-4062-86f5-0a853e521334', '2025-05-16 10:34:41', '2025-02-16 08:54:52', 3728882, + 'jamessellers/repo_0', 'repo_0', 'jamessellers', 'Him task improve fish list tree high.', + 3032, 363, 26, array['C++', 'Python'], array['Java', 'C++'], true, '2025-05-09 21:00:50', + 'completed', 'e270dbf424cff6864cc592f6611d8df90c895ec5' + ), + ( + '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', '2025-01-08 04:31:26', '2025-01-25 12:21:00', 3741438, + 'gallowayjoseph/repo_1', 'repo_1', 'gallowayjoseph', 'Whole forward beyond suddenly between treat address.', + 3786, 388, 34, array['C++', 'Go'], array['C++', 'Rust'], true, '2025-01-28 23:48:46', + 'completed', 'c9f97db5d2fc4b809df59bc23dd7345dbe6d14d5' + ), + ( + '08946f22-0d74-4499-b40d-0f60218d5152', '2025-04-02 03:59:05', '2025-02-21 11:05:44', 6292423, + 'fjohnson/repo_2', 'repo_2', 'fjohnson', 'Perhaps however bag forget purpose move.', + 3286, 274, 8, array['JavaScript', 'HTML'], array['Rust', 'C++'], false, '2025-03-03 11:44:52', + 'pending', '5e3af4aafc18e025cea707fa7707a1d945e0ffef' + ); + +-- Interactions +insert into + interactions ( + id, created_at, user_id, repository_id, platform, platform_specific_id, channel_id, + thread_id, content, interaction_type, sentiment_score, intent_classification, + topics_discussed, metadata + ) +values + ( + '7c59fe66-53b6-44b5-8ae1-ddc29b071097', '2025-03-10 12:14:30', '6afc59e3-18b7-4182-b42c-8210d1152b07', + 'f6b0bff9-074d-4062-86f5-0a853e521334', 'github', 'aa143cd82ff34de4', + 'f982f4e08603456a', '86abd4e7f4124360', + 'Skill medical after them analysis hit health. Ground attack drop. Billion old series card good full poor store.', + 'comment', -0.07, 'help_request', array['C++', 'TypeScript'], '{"info": "capital"}'::jsonb + ), + ( + 'f0c80815-fde1-4644-94ca-cd8915f11e46', '2025-03-19 16:14:11', '6f990423-0d57-4c64-b191-17e53f39c799', + '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', 'github', '62fb26d7f4db4a07', + '7f072cb92fd340c0', 'ec9f9c545e0a42ab', + 'Song risk bad own state. Family bill foreign fast knowledge response coach. Goal amount thank good your ever.', + 'pr', 0.6, 'help_request', array['JavaScript', 'TypeScript'], '{"info": "already"}'::jsonb + ), + ( + 'ef139daa-fa4c-445a-8bf7-fdd725bdb82c', '2025-05-06 06:40:36', '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + '08946f22-0d74-4499-b40d-0f60218d5152', 'slack', '9136f1f8f31046dc', + 'add702c92747493c', '5f3c44dc5ef747b8', + 'Off morning huge power. Whether ago control military trial. Energy employee land you.', + 'message', -0.16, 'feature_request', array['Go', 'JavaScript'], '{"info": "security"}'::jsonb + ); + +-- Conversation Context +insert into + conversation_context ( + id, user_id, platform, memory_thread_id, conversation_summary, key_topics, + total_interactions, session_start_time, session_end_time, created_at + ) +values + ( + 'c1b2c3d4-e5f6-a7b8-c9d0-e1f2a3b4c5d6', '6afc59e3-18b7-4182-b42c-8210d1152b07', + 'discord', '112233445566778899', + 'The user asked about getting started with the API and had questions about authentication. They were provided with a link to the documentation.', + array['onboarding', 'api_keys', 'authentication'], 8, '2025-06-20 10:00:00', + '2025-06-20 10:25:00', '2025-06-20 10:25:00' + ), + ( + 'd2c3d4e5-f6a7-b8c9-d0e1-f2a3b4c5d6e7', '6f990423-0d57-4c64-b191-17e53f39c799', + 'slack', '998877665544332211', + 'User reported a potential bug related to the repository indexing service. They provided logs and a repository URL. The issue was acknowledged and a ticket was created.', + array['bug_report', 'indexing', 'repositories'], 12, '2025-06-21 09:00:00', + '2025-06-21 09:45:00', '2025-06-21 09:45:00' + ), + ( + 'e3d4e5f6-a7b8-c9d0-e1f2-a3b4c5d6e7f8', '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + 'discord', '123451234512345123', + 'A general discussion about the future of Rust and its use in web development. The user shared an article and asked for opinions.', + array['Rust', 'web_development', 'discussion'], 5, '2025-06-19 14:30:00', + '2025-06-19 15:00:00', '2025-06-19 15:00:00' + ); diff --git a/backend/app/database/weaviate/__init__.py b/backend/app/database/weaviate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/db/weaviate/weaviate_client.py b/backend/app/database/weaviate/client.py similarity index 100% rename from backend/app/db/weaviate/weaviate_client.py rename to backend/app/database/weaviate/client.py diff --git a/backend/app/db/weaviate/weaviate_operations.py b/backend/app/database/weaviate/operations.py similarity index 97% rename from backend/app/db/weaviate/weaviate_operations.py rename to backend/app/database/weaviate/operations.py index a86adcf7..600b52c0 100644 --- a/backend/app/db/weaviate/weaviate_operations.py +++ b/backend/app/database/weaviate/operations.py @@ -2,8 +2,8 @@ import json from typing import Optional, Dict, Any from datetime import datetime, timezone -from app.model.weaviate.models import WeaviateUserProfile -from app.db.weaviate.weaviate_client import get_weaviate_client +from app.models.database.weaviate import WeaviateUserProfile +from app.database.weaviate.client import get_weaviate_client import weaviate.exceptions as weaviate_exceptions import weaviate.classes as wvc diff --git a/backend/app/database/weaviate/scripts/__init__.py b/backend/app/database/weaviate/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/database/weaviate/scripts/create_schemas.py b/backend/app/database/weaviate/scripts/create_schemas.py new file mode 100644 index 00000000..351c47a8 --- /dev/null +++ b/backend/app/database/weaviate/scripts/create_schemas.py @@ -0,0 +1,57 @@ +import asyncio +from app.database.weaviate.client import get_client +import weaviate.classes.config as wc + +async def create_schema(client, name, properties): + await client.collections.create( + name=name, + properties=properties, + ) + print(f"Created: {name}") + +async def create_user_profile_schema(client): + """ + Create schema for WeaviateUserProfile model. + Main vectorization will be on profile_text_for_embedding field. + """ + properties = [ + wc.Property(name="user_id", data_type=wc.DataType.TEXT), + wc.Property(name="github_username", data_type=wc.DataType.TEXT), + wc.Property(name="display_name", data_type=wc.DataType.TEXT), + wc.Property(name="bio", data_type=wc.DataType.TEXT), + wc.Property(name="location", data_type=wc.DataType.TEXT), + wc.Property(name="repositories", data_type=wc.DataType.TEXT), # JSON string + wc.Property(name="pull_requests", data_type=wc.DataType.TEXT), # JSON string + wc.Property(name="languages", data_type=wc.DataType.TEXT_ARRAY), + wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), + wc.Property(name="followers_count", data_type=wc.DataType.INT), + wc.Property(name="following_count", data_type=wc.DataType.INT), + wc.Property(name="total_stars_received", data_type=wc.DataType.INT), + wc.Property(name="total_forks", data_type=wc.DataType.INT), + wc.Property(name="profile_text_for_embedding", data_type=wc.DataType.TEXT), + wc.Property(name="last_updated", data_type=wc.DataType.DATE), + ] + await create_schema(client, "weaviate_user_profile", properties) + +async def create_all_schemas(): + """ + Create only the user profile schema as per the model structure. + """ + client = get_client() + try: + await client.connect() + await create_user_profile_schema(client) + print("✅ User profile schema created successfully.") + except Exception as e: + print(f"❌ Error creating schema: {str(e)}") + raise + finally: + await client.close() + +def main(): + """Entry point for running the schema creation.""" + asyncio.run(create_all_schemas()) + + +if __name__ == "__main__": + main() diff --git a/backend/app/database/weaviate/scripts/populate_db.py b/backend/app/database/weaviate/scripts/populate_db.py new file mode 100644 index 00000000..12bb9ebc --- /dev/null +++ b/backend/app/database/weaviate/scripts/populate_db.py @@ -0,0 +1,306 @@ +import json +import asyncio +from datetime import datetime +from app.database.weaviate.client import get_weaviate_client + +async def populate_weaviate_user_profile(client): + """ + Populate WeaviateUserProfile collection with sample data matching the model structure. + """ + current_time = datetime.now().astimezone() + + user_profiles = [ + { + "user_id": "a1b2c3d4-e5f6-7890-1234-567890abcdef", + "github_username": "jane-dev", + "display_name": "Jane Developer", + "bio": ("Creator of innovative open-source tools. Full-stack developer " + "with a passion for Rust and WebAssembly."), + "location": "Berlin, Germany", + "repositories": json.dumps([ + { + "name": "rust-web-framework", + "description": "A high-performance web framework for Rust.", + "url": "https://github.com/jane-dev/rust-web-framework", + "languages": ["Rust", "TOML"], + "stars": 2500, + "forks": 400 + }, + { + "name": "data-viz-lib", + "description": "A declarative data visualization library for JavaScript.", + "url": "https://github.com/jane-dev/data-viz-lib", + "languages": ["JavaScript", "TypeScript"], + "stars": 1200, + "forks": 150 + } + ]), + "pull_requests": json.dumps([ + { + "title": "Add async support for database connections", + "body": ("This PR adds comprehensive async support for database " + "connections, improving performance by 40%..."), + "state": "closed", + "repository": "microsoft/vscode", + "created_at": "2024-01-15T10:30:00Z", + "closed_at": "2024-01-20T14:20:00Z", + "merged_at": "2024-01-20T14:20:00Z", + "labels": ["enhancement", "database", "performance"], + "url": "https://github.com/microsoft/vscode/pull/12345" + } + ]), + "languages": ["Rust", "JavaScript", "TypeScript", "TOML"], + "topics": ["rust", "webdev", "performance", "framework", + "data-visualization", "d3", "charts"], + "followers_count": 1800, + "following_count": 250, + "total_stars_received": 3700, + "total_forks": 550, + "profile_text_for_embedding": ( + "Jane Developer, Creator of innovative open-source tools. " + "Full-stack developer with a passion for Rust and WebAssembly. " + "Repositories: rust-web-framework, A high-performance web framework for Rust. " + "data-viz-lib, A declarative data visualization library for JavaScript. " + "Languages: Rust, JavaScript, TypeScript. " + "Topics: rust, webdev, performance, data-visualization." + ), + "last_updated": current_time.isoformat() + }, + { + "user_id": "b2c3d4e5-f6g7-8901-2345-678901bcdefg", + "github_username": "python-ninja", + "display_name": "Alex Chen", + "bio": "Python enthusiast and machine learning researcher. Building the future of AI.", + "location": "San Francisco, CA", + "repositories": json.dumps([ + { + "name": "ml-toolkit", + "description": "A comprehensive machine learning toolkit for Python.", + "url": "https://github.com/python-ninja/ml-toolkit", + "languages": ["Python", "Jupyter Notebook"], + "stars": 3200, + "forks": 580 + }, + { + "name": "data-pipeline", + "description": "Scalable data processing pipeline for big data applications.", + "url": "https://github.com/python-ninja/data-pipeline", + "languages": ["Python", "SQL"], + "stars": 1800, + "forks": 320 + } + ]), + "pull_requests": json.dumps([ + { + "title": "Implement advanced ML algorithms", + "body": ("Adding support for advanced machine learning algorithms " + "including neural networks..."), + "state": "open", + "repository": "tensorflow/tensorflow", + "created_at": "2024-02-01T09:15:00Z", + "closed_at": None, + "merged_at": None, + "labels": ["enhancement", "ml", "algorithms"], + "url": "https://github.com/tensorflow/tensorflow/pull/67890" + } + ]), + "languages": ["Python", "SQL", "Jupyter Notebook"], + "topics": ["machine-learning", "ai", "data-science", "python", "big-data"], + "followers_count": 2400, + "following_count": 180, + "total_stars_received": 5000, + "total_forks": 900, + "profile_text_for_embedding": ( + "Alex Chen, Python enthusiast and machine learning researcher. " + "Building the future of AI. " + "Repositories: ml-toolkit, A comprehensive machine learning toolkit for Python. " + "data-pipeline, Scalable data processing pipeline for big data applications. " + "Languages: Python, SQL. " + "Topics: machine-learning, ai, data-science, python." + ), + "last_updated": current_time.isoformat() + }, + { + "user_id": "c3d4e5f6-g7h8-9012-3456-789012cdefgh", + "github_username": "go-developer", + "display_name": "Sam Rodriguez", + "bio": "Cloud infrastructure engineer specializing in Go and Kubernetes.", + "location": "Austin, TX", + "repositories": json.dumps([ + { + "name": "k8s-operator", + "description": "Custom Kubernetes operator for managing microservices.", + "url": "https://github.com/go-developer/k8s-operator", + "languages": ["Go", "Dockerfile"], + "stars": 1500, + "forks": 280 + } + ]), + "pull_requests": json.dumps([ + { + "title": "Add support for custom resources", + "body": ("Implementing support for custom Kubernetes resources " + "in the operator..."), + "state": "merged", + "repository": "kubernetes/kubernetes", + "created_at": "2024-01-10T14:30:00Z", + "closed_at": "2024-01-15T16:45:00Z", + "merged_at": "2024-01-15T16:45:00Z", + "labels": ["enhancement", "k8s", "operator"], + "url": "https://github.com/kubernetes/kubernetes/pull/54321" + } + ]), + "languages": ["Go", "Dockerfile"], + "topics": ["kubernetes", "microservices", "cloud", "devops", "api"], + "followers_count": 890, + "following_count": 120, + "total_stars_received": 1500, + "total_forks": 280, + "profile_text_for_embedding": ( + "Sam Rodriguez, Cloud infrastructure engineer specializing in Go and Kubernetes. " + "Repositories: k8s-operator, Custom Kubernetes operator for managing microservices. " + "Languages: Go, Dockerfile. " + "Topics: kubernetes, microservices, cloud, devops." + ), + "last_updated": current_time.isoformat() + }, + { + "user_id": "d4e5f6g7-h8i9-0123-4567-890123defghi", + "github_username": "frontend-wizard", + "display_name": "Emily Johnson", + "bio": "Frontend developer creating beautiful and accessible web experiences.", + "location": "New York, NY", + "repositories": json.dumps([ + { + "name": "react-components", + "description": "Reusable React component library with TypeScript.", + "url": "https://github.com/frontend-wizard/react-components", + "languages": ["TypeScript", "CSS", "JavaScript"], + "stars": 2100, + "forks": 420 + }, + { + "name": "css-animations", + "description": "Collection of smooth CSS animations and transitions.", + "url": "https://github.com/frontend-wizard/css-animations", + "languages": ["CSS", "HTML"], + "stars": 850, + "forks": 180 + } + ]), + "pull_requests": json.dumps([ + { + "title": "Improve accessibility features", + "body": ("Adding comprehensive accessibility features to the " + "React component library..."), + "state": "open", + "repository": "facebook/react", + "created_at": "2024-02-05T11:20:00Z", + "closed_at": None, + "merged_at": None, + "labels": ["accessibility", "enhancement", "a11y"], + "url": "https://github.com/facebook/react/pull/98765" + } + ]), + "languages": ["TypeScript", "JavaScript", "CSS", "HTML"], + "topics": ["react", "frontend", "typescript", "css", "ui-ux", "accessibility"], + "followers_count": 1320, + "following_count": 200, + "total_stars_received": 2950, + "total_forks": 600, + "profile_text_for_embedding": ( + "Emily Johnson, Frontend developer creating beautiful and accessible web experiences. " + "Repositories: react-components, Reusable React component library with TypeScript. " + "css-animations, Collection of smooth CSS animations and transitions. " + "Languages: TypeScript, JavaScript, CSS. " + "Topics: react, frontend, typescript, css, ui-ux." + ), + "last_updated": current_time.isoformat() + }, + { + "user_id": "e5f6g7h8-i9j0-1234-5678-901234efghij", + "github_username": "rust-enthusiast", + "display_name": "David Kim", + "bio": "Systems programmer passionate about performance and memory safety.", + "location": "Seattle, WA", + "repositories": json.dumps([ + { + "name": "memory-allocator", + "description": "Custom memory allocator written in Rust for high-performance applications.", + "url": "https://github.com/rust-enthusiast/memory-allocator", + "languages": ["Rust"], + "stars": 1750, + "forks": 240 + }, + { + "name": "concurrent-data-structures", + "description": "Lock-free data structures for concurrent programming in Rust.", + "url": "https://github.com/rust-enthusiast/concurrent-data-structures", + "languages": ["Rust"], + "stars": 1200, + "forks": 180 + } + ]), + "pull_requests": json.dumps([ + { + "title": "Optimize memory allocation patterns", + "body": ("Implementing advanced memory allocation optimization techniques " + "for better performance..."), + "state": "merged", + "repository": "rust-lang/rust", + "created_at": "2024-01-25T08:45:00Z", + "closed_at": "2024-02-01T10:30:00Z", + "merged_at": "2024-02-01T10:30:00Z", + "labels": ["performance", "memory", "optimization"], + "url": "https://github.com/rust-lang/rust/pull/13579" + } + ]), + "languages": ["Rust", "C++", "Assembly"], + "topics": ["rust", "systems-programming", "performance", "memory-safety", "concurrency"], + "followers_count": 980, + "following_count": 85, + "total_stars_received": 2950, + "total_forks": 420, + "profile_text_for_embedding": ( + "David Kim, Systems programmer passionate about performance and memory safety. " + "Repositories: memory-allocator, Custom memory allocator written in Rust for " + "high-performance applications. concurrent-data-structures, Lock-free data structures " + "for concurrent programming in Rust. Languages: Rust, C++, Assembly. " + "Topics: rust, systems-programming, performance, memory-safety." + ), + "last_updated": current_time.isoformat() + } + ] + + try: + collection = client.collections.get("weaviate_user_profile") + async with collection.batch.dynamic() as batch: + for profile in user_profiles: + batch.add_object( + properties=profile + ) + print("✅ Populated weaviate_user_profile with sample user data.") + except Exception as e: + print(f"❌ Error populating weaviate_user_profile: {e}") + raise + +async def populate_all_collections(): + """ + Populate only the user profile collection as per the updated model structure. + """ + try: + async with get_weaviate_client() as client: + print("Populating Weaviate user profile collection with sample data...") + await populate_weaviate_user_profile(client) + print("✅ User profile collection populated successfully.") + except Exception as e: + print(f"❌ Error during population: {e}") + raise + +def main(): + """Entry point for running the population script.""" + asyncio.run(populate_all_collections()) + + +if __name__ == "__main__": + main() diff --git a/backend/app/db/supabase/auth.py b/backend/app/db/supabase/auth.py deleted file mode 100644 index 6e31f807..00000000 --- a/backend/app/db/supabase/auth.py +++ /dev/null @@ -1,49 +0,0 @@ -from typing import Optional -from app.db.supabase.supabase_client import get_supabase_client -import logging - -logger = logging.getLogger(__name__) - -async def login_with_oauth(provider: str, redirect_to: Optional[str] = None, state: Optional[str] = None): - """ - Generates an asynchronous OAuth sign-in URL. - """ - supabase = get_supabase_client() - try: - options = {} - if redirect_to: - options['redirect_to'] = redirect_to - if state: - options['queryParams'] = {'state': state} - - result = await supabase.auth.sign_in_with_oauth({ - "provider": provider, - "options": options - }) - return {"url": result.url} - except Exception as e: - logger.error(f"OAuth login failed for provider {provider}: {e}", exc_info=True) - raise - -async def login_with_github(redirect_to: Optional[str] = None, state: Optional[str] = None): - """Generates a GitHub OAuth login URL.""" - return await login_with_oauth("github", redirect_to=redirect_to, state=state) - -async def login_with_discord(redirect_to: Optional[str] = None): - """Generates a Discord OAuth login URL.""" - return await login_with_oauth("discord", redirect_to=redirect_to) - -async def login_with_slack(redirect_to: Optional[str] = None): - """Generates a Slack OAuth login URL.""" - return await login_with_oauth("slack", redirect_to=redirect_to) - -async def logout(access_token: str): - """Logs out a user by revoking their session.""" - supabase = get_supabase_client() - try: - await supabase.auth.set_session(access_token, refresh_token="") - await supabase.auth.sign_out() - return {"message": "User logged out successfully"} - except Exception as e: - logger.error(f"Logout failed: {e}", exc_info=True) - raise diff --git a/backend/app/db/supabase/users_service.py b/backend/app/db/supabase/users_service.py deleted file mode 100644 index 7ef98a56..00000000 --- a/backend/app/db/supabase/users_service.py +++ /dev/null @@ -1,203 +0,0 @@ -import uuid -from datetime import datetime, timedelta -from typing import Optional, Dict, Tuple -from app.db.supabase.supabase_client import get_supabase_client -from app.model.supabase.models import User -import logging - -logger = logging.getLogger(__name__) - -# session_id -> (discord_id, expiry_time) -_verification_sessions: Dict[str, Tuple[str, datetime]] = {} - -SESSION_EXPIRY_MINUTES = 5 - -async def get_or_create_user_by_discord( - discord_id: str, display_name: str, discord_username: str, avatar_url: Optional[str] -) -> User: - """ - Get or create a user by Discord ID. - """ - supabase = get_supabase_client() - existing_user_res = await supabase.table("users").select("*").eq("discord_id", discord_id).limit(1).execute() - - if existing_user_res.data: - logger.info(f"Found existing user for Discord ID: {discord_id}") - return User(**existing_user_res.data[0]) - logger.info(f"No user found for Discord ID: {discord_id}. Creating new user.") - new_user_data = { - "id": str(uuid.uuid4()), - "discord_id": discord_id, - "display_name": display_name, - "discord_username": discord_username, - "avatar_url": avatar_url, - "preferred_languages": [], - "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() - } - insert_res = await supabase.table("users").insert(new_user_data).execute() - if not insert_res.data: - raise Exception("Failed to create new user in database.") - return User(**insert_res.data[0]) - -def _cleanup_expired_sessions(): - """ - Remove expired verification sessions. - """ - current_time = datetime.now() - expired_sessions = [ - session_id for session_id, (discord_id, expiry_time) in _verification_sessions.items() - if current_time > expiry_time - ] - - for session_id in expired_sessions: - discord_id, _ = _verification_sessions[session_id] - del _verification_sessions[session_id] - logger.info(f"Cleaned up expired verification session {session_id} for Discord user {discord_id}") - - if expired_sessions: - logger.info(f"Cleaned up {len(expired_sessions)} expired verification sessions") - -async def create_verification_session(discord_id: str) -> Optional[str]: - """ - Create a verification session with expiry and return session ID. - """ - supabase = get_supabase_client() - - _cleanup_expired_sessions() - - token = str(uuid.uuid4()) - session_id = str(uuid.uuid4()) - expiry_time = datetime.now() + timedelta(minutes=SESSION_EXPIRY_MINUTES) - - try: - update_res = await supabase.table("users").update({ - "verification_token": token, - "verification_token_expires_at": expiry_time.isoformat(), - "updated_at": datetime.now().isoformat() - }).eq("discord_id", discord_id).execute() - - if update_res.data: - _verification_sessions[session_id] = (discord_id, expiry_time) - logger.info( - f"Created verification session {session_id} for Discord user {discord_id}, expires at {expiry_time}") - return session_id - logger.error(f"Failed to set verification token for Discord ID: {discord_id}. User not found.") - return None - except Exception as e: - logger.error(f"Error creating verification session for Discord ID {discord_id}: {str(e)}") - return None - -async def find_user_by_session_and_verify( - session_id: str, github_id: str, github_username: str, email: Optional[str] -) -> Optional[User]: - """ - Find and verify user using session ID with expiry validation. - """ - supabase = get_supabase_client() - - _cleanup_expired_sessions() - - try: - session_data = _verification_sessions.get(session_id) - if not session_data: - logger.warning(f"No verification session found for session ID: {session_id}") - return None - - discord_id, expiry_time = session_data - - current_time = datetime.now().isoformat() - user_res = await supabase.table("users").select("*").eq( - "discord_id", discord_id - ).neq( - "verification_token", None - ).gt( - "verification_token_expires_at", current_time - ).limit(1).execute() - - if not user_res.data: - logger.warning(f"No valid pending verification found for Discord ID: {discord_id} (token may have expired)") - del _verification_sessions[session_id] - return None - - # Delete the session after successful validation - del _verification_sessions[session_id] - - user_to_verify = user_res.data[0] - - existing_github_user = await supabase.table("users").select("*").eq( - "github_id", github_id - ).neq("id", user_to_verify['id']).limit(1).execute() - if existing_github_user.data: - logger.warning(f"GitHub account {github_username} is already linked to another user") - await supabase.table("users").update({ - "verification_token": None, - "verification_token_expires_at": None, - "updated_at": datetime.now().isoformat() - }).eq("id", user_to_verify['id']).execute() - raise Exception(f"GitHub account {github_username} is already linked to another Discord user") - - update_data = { - "github_id": github_id, - "github_username": github_username, - "email": user_to_verify.get('email') or email, - "is_verified": True, - "verified_at": datetime.now().isoformat(), - "verification_token": None, - "verification_token_expires_at": None, - "updated_at": datetime.now().isoformat() - } - - await supabase.table("users").update(update_data).eq("id", user_to_verify['id']).execute() - - updated_user_res = await supabase.table("users").select("*").eq("id", user_to_verify['id']).limit(1).execute() - - if not updated_user_res.data: - raise Exception(f"Failed to fetch updated user with ID: {user_to_verify['id']}") - - logger.info(f"Successfully verified user {user_to_verify['id']} and linked GitHub account {github_username}.") - return User(**updated_user_res.data[0]) - except Exception as e: - logger.error(f"Database error in find_user_by_session_and_verify: {e}", exc_info=True) - raise - -async def cleanup_expired_tokens(): - """ - Clean up expired verification tokens from database. - """ - supabase = get_supabase_client() - current_time = datetime.now().isoformat() - - try: - cleanup_res = await supabase.table("users").update({ - "verification_token": None, - "verification_token_expires_at": None, - "updated_at": current_time - }).lt("verification_token_expires_at", current_time).neq("verification_token", None).execute() - - if cleanup_res.data: - logger.info(f"Cleaned up {len(cleanup_res.data)} expired verification tokens from database") - except Exception as e: - logger.error(f"Error cleaning up expired tokens: {e}") - -async def get_verification_session_info(session_id: str) -> Optional[Dict[str, str]]: - """ - Get information about a verification session. - """ - _cleanup_expired_sessions() - - session_data = _verification_sessions.get(session_id) - if not session_data: - return None - - discord_id, expiry_time = session_data - - if datetime.now() > expiry_time: - del _verification_sessions[session_id] - return None - - return { - "discord_id": discord_id, - "expiry_time": expiry_time.isoformat(), - "time_remaining": str(expiry_time - datetime.now()) - } diff --git a/backend/app/db/weaviate/user_profiling.py b/backend/app/db/weaviate/user_profiling.py deleted file mode 100644 index 8d77255d..00000000 --- a/backend/app/db/weaviate/user_profiling.py +++ /dev/null @@ -1,310 +0,0 @@ -import logging -import asyncio -import aiohttp -from typing import List, Optional, Dict -from datetime import datetime -from collections import Counter -from app.model.weaviate.models import WeaviateUserProfile, WeaviateRepository, WeaviatePullRequest -from app.db.weaviate.weaviate_operations import store_user_profile -from app.core.config import settings - -logger = logging.getLogger(__name__) - - -class GitHubUserProfiler: - """ - Class to handle GitHub user profiling and Weaviate storage. - Uses organization's GitHub token to fetch public user data via GitHub REST API. - """ - - def __init__(self): - if not settings.github_token: - raise ValueError("GitHub token not configured in environment variables") - - self.headers = { - "Authorization": f"token {settings.github_token}", - "Accept": "application/vnd.github.v3+json", - "User-Agent": "DevRel-AI-Bot/1.0" - } - self.base_url = "https://api.github.com" - self.session = None - - async def __aenter__(self): - """Create async HTTP session""" - timeout = aiohttp.ClientTimeout(total=60, connect=10, sock_read=30) - connector = aiohttp.TCPConnector( - limit=50, # Total connection pool size - limit_per_host=10, # Per-host connection limit - ttl_dns_cache=300, # DNS cache TTL - use_dns_cache=True, - ) - - self.session = aiohttp.ClientSession( - headers=self.headers, - timeout=timeout, - connector=connector - ) - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - """Close async HTTP session""" - if self.session: - await self.session.close() - - async def _make_request(self, url: str, params: Dict = None) -> Optional[Dict]: - """Make a GET request to GitHub API""" - try: - async with self.session.get(url, params=params) as response: - if response.status == 200: - return await response.json() - elif response.status == 404: - logger.warning(f"GitHub API 404: {url}") - return None - elif response.status == 403: - logger.error(f"GitHub API rate limit exceeded: {url}") - return None - else: - logger.error(f"GitHub API error {response.status}: {url}") - return None - except asyncio.TimeoutError: - logger.error(f"Timeout accessing GitHub API: {url}") - return None - except Exception as e: - logger.error(f"Error making request to {url}: {str(e)}") - return None - - async def get_user_data(self, github_username: str) -> Optional[Dict]: - """Fetch user data""" - url = f"{self.base_url}/users/{github_username}" - user_data = await self._make_request(url) - - if user_data: - logger.info(f"Successfully fetched user data for {github_username}") - else: - logger.error(f"Failed to fetch user data for {github_username}") - - return user_data - - async def get_user_repositories(self, github_username: str, max_repos: int = 50) -> List[Dict]: - """Fetch user repositories""" - try: - params = { - "type": "owner", - "sort": "updated", - "direction": "desc", - "per_page": max_repos - } - - url = f"{self.base_url}/users/{github_username}/repos" - repos = await self._make_request(url, params) - - if repos and isinstance(repos, list): - logger.info(f"Successfully fetched {len(repos)} repositories for {github_username}") - return repos - else: - logger.info(f"No repositories found for {github_username}") - return [] - - except Exception as e: - logger.error(f"Error fetching repositories for {github_username}: {str(e)}") - return [] - - async def get_repository_languages(self, languages_url: str) -> List[str]: - """Fetch repository languages""" - try: - languages_data = await self._make_request(languages_url) - if languages_data and isinstance(languages_data, dict): - return list(languages_data.keys()) - return [] - except Exception as e: - logger.warning(f"Error fetching languages from {languages_url}: {str(e)}") - return [] - - async def get_user_pull_requests(self, github_username: str, max_prs: int = 100) -> List[WeaviatePullRequest]: - """Fetch pull requests""" - try: - params = { - "q": f"author:{github_username} is:pr", - "sort": "created", - "order": "desc", - "per_page": max_prs - } - - url = f"{self.base_url}/search/issues" - search_result = await self._make_request(url, params) - - if not search_result or "items" not in search_result: - logger.info(f"No pull requests found for {github_username}") - return [] - - items = search_result["items"] - pull_requests = [] - - for pr_data in items: - try: - repo_name = "unknown" - if pr_data.get("html_url"): - url_parts = pr_data["html_url"].split('/') - if len(url_parts) >= 5: - repo_name = f"{url_parts[3]}/{url_parts[4]}" - - merged_at = None - if pr_data.get("pull_request") and pr_data["pull_request"].get("merged_at"): - merged_at = pr_data["pull_request"]["merged_at"] - - pr_obj = WeaviatePullRequest( - title=pr_data["title"], - body=pr_data.get("body", "")[:500] if pr_data.get("body") else "", - state=pr_data["state"], - repository=repo_name, - created_at=pr_data.get("created_at"), - closed_at=pr_data.get("closed_at"), - merged_at=merged_at, - labels=[label["name"] for label in pr_data.get("labels", [])], - url=pr_data["html_url"] - ) - pull_requests.append(pr_obj) - - except Exception as e: - logger.warning(f"Error processing pull request: {str(e)}") - continue - - logger.info(f"Successfully fetched {len(pull_requests)} pull requests for {github_username}") - return pull_requests - - except Exception as e: - logger.error(f"Error fetching pull requests for {github_username}: {str(e)}") - return [] - - async def _process_repository(self, repo_data: Dict) -> Optional[WeaviateRepository]: - """Process a single repository""" - try: - languages = [] - if repo_data.get("languages_url"): - languages = await self.get_repository_languages(repo_data["languages_url"]) - - return WeaviateRepository( - name=repo_data["name"], - description=repo_data.get("description"), - url=repo_data["html_url"], - languages=languages, - stars=repo_data.get("stargazers_count", 0), - forks=repo_data.get("forks_count", 0) - ) - except Exception as e: - logger.warning(f"Error processing repository {repo_data.get('name', 'unknown')}: {str(e)}") - return None - - def analyze_language_frequency(self, repositories: List[WeaviateRepository]) -> List[str]: - """ - Analyze language frequency across repositories to identify top 5 languages. - """ - language_counter = Counter() - for repo in repositories: - language_counter.update(repo.languages) - - top_languages = language_counter.most_common(5) - logger.info(f"Top 5 languages by frequency: {top_languages}") - return [lang for lang, _ in top_languages] - - async def build_user_profile(self, user_id: str, github_username: str) -> Optional[WeaviateUserProfile]: - """ - Build a complete user profile for Weaviate indexing - """ - logger.info(f"Building user profile for GitHub user: {github_username}") - - # Run user data, repositories, and pull requests fetch concurrently - user_task = self.get_user_data(github_username) - repos_task = self.get_user_repositories(github_username) - prs_task = self.get_user_pull_requests(github_username) - - try: - user_data, repos_data, pull_requests = await asyncio.gather( - user_task, repos_task, prs_task, return_exceptions=True - ) - except Exception as e: - logger.error(f"Error in concurrent data fetching: {str(e)}") - return None - - if isinstance(user_data, Exception) or not user_data: - logger.error(f"Could not fetch user data for {github_username}") - return None - - if isinstance(repos_data, Exception): - logger.warning(f"Error fetching repositories: {repos_data}") - repos_data = [] - - if isinstance(pull_requests, Exception): - logger.warning(f"Error fetching pull requests: {pull_requests}") - pull_requests = [] - - logger.info(f"Found {len(repos_data)} repositories and {len(pull_requests)} pull requests for {github_username}") - - repository_tasks = [self._process_repository(repo) for repo in repos_data] - - repositories = [] - if repository_tasks: - try: - repo_results = await asyncio.gather(*repository_tasks, return_exceptions=True) - repositories = [r for r in repo_results if r is not None and not isinstance(r, Exception)] - except Exception as e: - logger.warning(f"Error processing repositories: {str(e)}") - - all_languages = set() - all_topics = set() - total_stars = 0 - total_forks = 0 - - for repo_obj in repositories: - all_languages.update(repo_obj.languages) - total_stars += repo_obj.stars - total_forks += repo_obj.forks - - for repo_data in repos_data: - topics = repo_data.get("topics", []) - if topics: - all_topics.update(topics) - - top_languages = self.analyze_language_frequency(repositories) - - profile = WeaviateUserProfile( - user_id=user_id, - github_username=github_username, - display_name=user_data.get("name"), - bio=user_data.get("bio"), - location=user_data.get("location"), - repositories=repositories, - pull_requests=pull_requests, - languages=top_languages, - topics=list(all_topics), - followers_count=user_data.get("followers", 0), - following_count=user_data.get("following", 0), - total_stars_received=total_stars, - total_forks=total_forks, - profile_text_for_embedding="", # TODO: Invoke agent/llm to generate this - last_updated=datetime.now() - ) - - logger.info( - f"Successfully built profile for {github_username}: " - f"{len(repositories)} repos, {len(top_languages)} top languages, " - f"{len(pull_requests)} pull requests analyzed" - ) - return profile - - -async def profile_user_from_github(user_id: str, github_username: str) -> bool: - """Profile a user and store in Weaviate with proper resource management.""" - - async with GitHubUserProfiler() as profiler: - try: - profile = await profiler.build_user_profile(user_id, github_username) - if profile: - success = await store_user_profile(profile) - if success: - logger.info(f"Successfully stored profile for user {github_username}") - return success - return False - except Exception as e: - logger.error(f"Failed to profile user {github_username}: {str(e)}") - return False From 3efb58dd2289609f00e1209dbd327b250da97538 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:22:31 +0530 Subject: [PATCH 03/10] [refactor]: restructure models layer --- backend/app/model/__init__.py | 0 backend/app/model/supabase/__init__.py | 0 backend/app/models.py | 4 ---- backend/app/models/__init__.py | 1 + .../{model/supabase/models.py => models/database/supabase.py} | 0 .../{model/weaviate/models.py => models/database/weaviate.py} | 0 6 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 backend/app/model/__init__.py delete mode 100644 backend/app/model/supabase/__init__.py delete mode 100644 backend/app/models.py create mode 100644 backend/app/models/__init__.py rename backend/app/{model/supabase/models.py => models/database/supabase.py} (100%) rename backend/app/{model/weaviate/models.py => models/database/weaviate.py} (100%) diff --git a/backend/app/model/__init__.py b/backend/app/model/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/app/model/supabase/__init__.py b/backend/app/model/supabase/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/app/models.py b/backend/app/models.py deleted file mode 100644 index 0c5196ff..00000000 --- a/backend/app/models.py +++ /dev/null @@ -1,4 +0,0 @@ -from pydantic import BaseModel - -class RepoRequest(BaseModel): - repo_url: str diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/backend/app/models/__init__.py @@ -0,0 +1 @@ + diff --git a/backend/app/model/supabase/models.py b/backend/app/models/database/supabase.py similarity index 100% rename from backend/app/model/supabase/models.py rename to backend/app/models/database/supabase.py diff --git a/backend/app/model/weaviate/models.py b/backend/app/models/database/weaviate.py similarity index 100% rename from backend/app/model/weaviate/models.py rename to backend/app/models/database/weaviate.py From 3f842a1029981fe6b358b8f5b4ac158b77d4194e Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:22:48 +0530 Subject: [PATCH 04/10] [refactor]: reorganize configuration management --- backend/app/core/config/__init__.py | 3 +++ backend/app/core/{config.py => config/settings.py} | 0 2 files changed, 3 insertions(+) create mode 100644 backend/app/core/config/__init__.py rename backend/app/core/{config.py => config/settings.py} (100%) diff --git a/backend/app/core/config/__init__.py b/backend/app/core/config/__init__.py new file mode 100644 index 00000000..84a6cc56 --- /dev/null +++ b/backend/app/core/config/__init__.py @@ -0,0 +1,3 @@ +from .settings import settings + +__all__ = ["settings"] diff --git a/backend/app/core/config.py b/backend/app/core/config/settings.py similarity index 100% rename from backend/app/core/config.py rename to backend/app/core/config/settings.py From d634726c1b3fe8629fe1a3d8338976ab9f9e18e2 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:23:07 +0530 Subject: [PATCH 05/10] [feat]: create services layer architecture --- backend/app/services/auth/__init__.py | 0 backend/app/services/auth/management.py | 93 +++++++ backend/app/services/auth/supabase.py | 49 ++++ backend/app/services/auth/verification.py | 176 ++++++++++++ backend/app/services/user/__init__.py | 0 backend/app/services/user/profiling.py | 310 ++++++++++++++++++++++ 6 files changed, 628 insertions(+) create mode 100644 backend/app/services/auth/__init__.py create mode 100644 backend/app/services/auth/management.py create mode 100644 backend/app/services/auth/supabase.py create mode 100644 backend/app/services/auth/verification.py create mode 100644 backend/app/services/user/__init__.py create mode 100644 backend/app/services/user/profiling.py diff --git a/backend/app/services/auth/__init__.py b/backend/app/services/auth/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/services/auth/management.py b/backend/app/services/auth/management.py new file mode 100644 index 00000000..4d0f35d8 --- /dev/null +++ b/backend/app/services/auth/management.py @@ -0,0 +1,93 @@ +import uuid +from datetime import datetime +from typing import Optional +from app.database.supabase.client import get_supabase_client +from app.models.database.supabase import User +import logging + +logger = logging.getLogger(__name__) + +async def get_or_create_user_by_discord( + discord_id: str, display_name: str, discord_username: str, avatar_url: Optional[str] +) -> User: + """ + Get or create a user by Discord ID. + """ + supabase = get_supabase_client() + existing_user_res = await supabase.table("users").select("*").eq("discord_id", discord_id).limit(1).execute() + + if existing_user_res.data: + logger.info(f"Found existing user for Discord ID: {discord_id}") + return User(**existing_user_res.data[0]) + + # Create new user if not found + logger.info(f"No user found for Discord ID: {discord_id}. Creating new user.") + new_user_data = { + "id": str(uuid.uuid4()), + "discord_id": discord_id, + "display_name": display_name, + "discord_username": discord_username, + "avatar_url": avatar_url, + "preferred_languages": [], + "created_at": datetime.now().isoformat(), + "updated_at": datetime.now().isoformat() + } + + insert_res = await supabase.table("users").insert(new_user_data).execute() + if not insert_res.data: + raise Exception("Failed to create new user in database.") + + logger.info(f"Successfully created new user with ID: {insert_res.data[0]['id']}") + return User(**insert_res.data[0]) + +async def get_user_by_id(user_id: str) -> Optional[User]: + """ + Get user by their ID. + """ + supabase = get_supabase_client() + + try: + user_res = await supabase.table("users").select("*").eq("id", user_id).limit(1).execute() + + if user_res.data: + return User(**user_res.data[0]) + return None + except Exception as e: + logger.error(f"Error getting user by ID {user_id}: {e}") + return None + +async def get_user_by_github_id(github_id: str) -> Optional[User]: + """ + Get user by their GitHub ID. + """ + supabase = get_supabase_client() + + try: + user_res = await supabase.table("users").select("*").eq("github_id", github_id).limit(1).execute() + + if user_res.data: + return User(**user_res.data[0]) + return None + except Exception as e: + logger.error(f"Error getting user by GitHub ID {github_id}: {e}") + return None + +async def update_user_profile(user_id: str, **updates) -> Optional[User]: + """ + Update user profile data. + """ + supabase = get_supabase_client() + + try: + # Add updated_at timestamp + updates["updated_at"] = datetime.now().isoformat() + + update_res = await supabase.table("users").update(updates).eq("id", user_id).execute() + + if update_res.data: + logger.info(f"Successfully updated user {user_id}") + return User(**update_res.data[0]) + return None + except Exception as e: + logger.error(f"Error updating user {user_id}: {e}") + return None diff --git a/backend/app/services/auth/supabase.py b/backend/app/services/auth/supabase.py new file mode 100644 index 00000000..6ee0d58e --- /dev/null +++ b/backend/app/services/auth/supabase.py @@ -0,0 +1,49 @@ +from typing import Optional +from app.database.supabase.client import get_supabase_client +import logging + +logger = logging.getLogger(__name__) + +async def login_with_oauth(provider: str, redirect_to: Optional[str] = None, state: Optional[str] = None): + """ + Generates an asynchronous OAuth sign-in URL. + """ + supabase = get_supabase_client() + try: + options = {} + if redirect_to: + options['redirect_to'] = redirect_to + if state: + options['queryParams'] = {'state': state} + + result = await supabase.auth.sign_in_with_oauth({ + "provider": provider, + "options": options + }) + return {"url": result.url} + except Exception as e: + logger.error(f"OAuth login failed for provider {provider}: {e}", exc_info=True) + raise + +async def login_with_github(redirect_to: Optional[str] = None, state: Optional[str] = None): + """Generates a GitHub OAuth login URL.""" + return await login_with_oauth("github", redirect_to=redirect_to, state=state) + +async def login_with_discord(redirect_to: Optional[str] = None): + """Generates a Discord OAuth login URL.""" + return await login_with_oauth("discord", redirect_to=redirect_to) + +async def login_with_slack(redirect_to: Optional[str] = None): + """Generates a Slack OAuth login URL.""" + return await login_with_oauth("slack", redirect_to=redirect_to) + +async def logout(access_token: str): + """Logs out a user by revoking their session.""" + supabase = get_supabase_client() + try: + await supabase.auth.set_session(access_token, refresh_token="") + await supabase.auth.sign_out() + return {"message": "User logged out successfully"} + except Exception as e: + logger.error(f"Logout failed: {e}", exc_info=True) + raise diff --git a/backend/app/services/auth/verification.py b/backend/app/services/auth/verification.py new file mode 100644 index 00000000..cbfa156c --- /dev/null +++ b/backend/app/services/auth/verification.py @@ -0,0 +1,176 @@ +import uuid +from datetime import datetime, timedelta +from typing import Optional, Dict, Tuple +from app.database.supabase.client import get_supabase_client +from app.models.database.supabase import User +import logging + +logger = logging.getLogger(__name__) + +# session_id -> (discord_id, expiry_time) +_verification_sessions: Dict[str, Tuple[str, datetime]] = {} + +SESSION_EXPIRY_MINUTES = 5 + +def _cleanup_expired_sessions(): + """ + Remove expired verification sessions. + """ + current_time = datetime.now() + expired_sessions = [ + session_id for session_id, (discord_id, expiry_time) in _verification_sessions.items() + if current_time > expiry_time + ] + + for session_id in expired_sessions: + discord_id, _ = _verification_sessions[session_id] + del _verification_sessions[session_id] + logger.info(f"Cleaned up expired verification session {session_id} for Discord user {discord_id}") + + if expired_sessions: + logger.info(f"Cleaned up {len(expired_sessions)} expired verification sessions") + +async def create_verification_session(discord_id: str) -> Optional[str]: + """ + Create a verification session with expiry and return session ID. + """ + supabase = get_supabase_client() + + _cleanup_expired_sessions() + + token = str(uuid.uuid4()) + session_id = str(uuid.uuid4()) + expiry_time = datetime.now() + timedelta(minutes=SESSION_EXPIRY_MINUTES) + + try: + update_res = await supabase.table("users").update({ + "verification_token": token, + "verification_token_expires_at": expiry_time.isoformat(), + "updated_at": datetime.now().isoformat() + }).eq("discord_id", discord_id).execute() + + if update_res.data: + _verification_sessions[session_id] = (discord_id, expiry_time) + logger.info( + f"Created verification session {session_id} for Discord user {discord_id}, expires at {expiry_time}") + return session_id + logger.error(f"Failed to set verification token for Discord ID: {discord_id}. User not found.") + return None + except Exception as e: + logger.error(f"Error creating verification session for Discord ID {discord_id}: {str(e)}") + return None + +async def find_user_by_session_and_verify( + session_id: str, github_id: str, github_username: str, email: Optional[str] +) -> Optional[User]: + """ + Find and verify user using session ID with expiry validation. + Links GitHub account to Discord user. + """ + supabase = get_supabase_client() + + _cleanup_expired_sessions() + + try: + session_data = _verification_sessions.get(session_id) + if not session_data: + logger.warning(f"No verification session found for session ID: {session_id}") + return None + + discord_id, expiry_time = session_data + + current_time = datetime.now().isoformat() + user_res = await supabase.table("users").select("*").eq( + "discord_id", discord_id + ).neq( + "verification_token", None + ).gt( + "verification_token_expires_at", current_time + ).limit(1).execute() + + if not user_res.data: + logger.warning(f"No valid pending verification found for Discord ID: {discord_id} (token may have expired)") + del _verification_sessions[session_id] + return None + + # Delete the session after successful validation + del _verification_sessions[session_id] + + user_to_verify = user_res.data[0] + + existing_github_user = await supabase.table("users").select("*").eq( + "github_id", github_id + ).neq("id", user_to_verify['id']).limit(1).execute() + if existing_github_user.data: + logger.warning(f"GitHub account {github_username} is already linked to another user") + await supabase.table("users").update({ + "verification_token": None, + "verification_token_expires_at": None, + "updated_at": datetime.now().isoformat() + }).eq("id", user_to_verify['id']).execute() + raise Exception(f"GitHub account {github_username} is already linked to another Discord user") + + update_data = { + "github_id": github_id, + "github_username": github_username, + "email": user_to_verify.get('email') or email, + "is_verified": True, + "verified_at": datetime.now().isoformat(), + "verification_token": None, + "verification_token_expires_at": None, + "updated_at": datetime.now().isoformat() + } + + await supabase.table("users").update(update_data).eq("id", user_to_verify['id']).execute() + + updated_user_res = await supabase.table("users").select("*").eq("id", user_to_verify['id']).limit(1).execute() + + if not updated_user_res.data: + raise Exception(f"Failed to fetch updated user with ID: {user_to_verify['id']}") + + logger.info(f"Successfully verified user {user_to_verify['id']} and linked GitHub account {github_username}.") + return User(**updated_user_res.data[0]) + except Exception as e: + logger.error(f"Database error in find_user_by_session_and_verify: {e}", exc_info=True) + raise + +async def cleanup_expired_tokens(): + """ + Clean up expired verification tokens from database. + """ + supabase = get_supabase_client() + current_time = datetime.now().isoformat() + + try: + cleanup_res = await supabase.table("users").update({ + "verification_token": None, + "verification_token_expires_at": None, + "updated_at": current_time + }).lt("verification_token_expires_at", current_time).neq("verification_token", None).execute() + + if cleanup_res.data: + logger.info(f"Cleaned up {len(cleanup_res.data)} expired verification tokens from database") + except Exception as e: + logger.error(f"Error cleaning up expired tokens: {e}") + +async def get_verification_session_info(session_id: str) -> Optional[Dict[str, str]]: + """ + Get information about a verification session. + """ + _cleanup_expired_sessions() + + session_data = _verification_sessions.get(session_id) + if not session_data: + return None + + discord_id, expiry_time = session_data + + if datetime.now() > expiry_time: + del _verification_sessions[session_id] + return None + + return { + "discord_id": discord_id, + "expiry_time": expiry_time.isoformat(), + "time_remaining": str(expiry_time - datetime.now()) + } diff --git a/backend/app/services/user/__init__.py b/backend/app/services/user/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/services/user/profiling.py b/backend/app/services/user/profiling.py new file mode 100644 index 00000000..223f3abd --- /dev/null +++ b/backend/app/services/user/profiling.py @@ -0,0 +1,310 @@ +import logging +import asyncio +import aiohttp +from typing import List, Optional, Dict +from datetime import datetime +from collections import Counter +from app.models.database.weaviate import WeaviateUserProfile, WeaviateRepository, WeaviatePullRequest +from app.database.weaviate.operations import store_user_profile +from app.core.config import settings + +logger = logging.getLogger(__name__) + + +class GitHubUserProfiler: + """ + Class to handle GitHub user profiling and Weaviate storage. + Uses organization's GitHub token to fetch public user data via GitHub REST API. + """ + + def __init__(self): + if not settings.github_token: + raise ValueError("GitHub token not configured in environment variables") + + self.headers = { + "Authorization": f"token {settings.github_token}", + "Accept": "application/vnd.github.v3+json", + "User-Agent": "DevRel-AI-Bot/1.0" + } + self.base_url = "https://api.github.com" + self.session = None + + async def __aenter__(self): + """Create async HTTP session""" + timeout = aiohttp.ClientTimeout(total=60, connect=10, sock_read=30) + connector = aiohttp.TCPConnector( + limit=50, # Total connection pool size + limit_per_host=10, # Per-host connection limit + ttl_dns_cache=300, # DNS cache TTL + use_dns_cache=True, + ) + + self.session = aiohttp.ClientSession( + headers=self.headers, + timeout=timeout, + connector=connector + ) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Close async HTTP session""" + if self.session: + await self.session.close() + + async def _make_request(self, url: str, params: Dict = None) -> Optional[Dict]: + """Make a GET request to GitHub API""" + try: + async with self.session.get(url, params=params) as response: + if response.status == 200: + return await response.json() + elif response.status == 404: + logger.warning(f"GitHub API 404: {url}") + return None + elif response.status == 403: + logger.error(f"GitHub API rate limit exceeded: {url}") + return None + else: + logger.error(f"GitHub API error {response.status}: {url}") + return None + except asyncio.TimeoutError: + logger.error(f"Timeout accessing GitHub API: {url}") + return None + except Exception as e: + logger.error(f"Error making request to {url}: {str(e)}") + return None + + async def get_user_data(self, github_username: str) -> Optional[Dict]: + """Fetch user data""" + url = f"{self.base_url}/users/{github_username}" + user_data = await self._make_request(url) + + if user_data: + logger.info(f"Successfully fetched user data for {github_username}") + else: + logger.error(f"Failed to fetch user data for {github_username}") + + return user_data + + async def get_user_repositories(self, github_username: str, max_repos: int = 50) -> List[Dict]: + """Fetch user repositories""" + try: + params = { + "type": "owner", + "sort": "updated", + "direction": "desc", + "per_page": max_repos + } + + url = f"{self.base_url}/users/{github_username}/repos" + repos = await self._make_request(url, params) + + if repos and isinstance(repos, list): + logger.info(f"Successfully fetched {len(repos)} repositories for {github_username}") + return repos + else: + logger.info(f"No repositories found for {github_username}") + return [] + + except Exception as e: + logger.error(f"Error fetching repositories for {github_username}: {str(e)}") + return [] + + async def get_repository_languages(self, languages_url: str) -> List[str]: + """Fetch repository languages""" + try: + languages_data = await self._make_request(languages_url) + if languages_data and isinstance(languages_data, dict): + return list(languages_data.keys()) + return [] + except Exception as e: + logger.warning(f"Error fetching languages from {languages_url}: {str(e)}") + return [] + + async def get_user_pull_requests(self, github_username: str, max_prs: int = 100) -> List[WeaviatePullRequest]: + """Fetch pull requests""" + try: + params = { + "q": f"author:{github_username} is:pr", + "sort": "created", + "order": "desc", + "per_page": max_prs + } + + url = f"{self.base_url}/search/issues" + search_result = await self._make_request(url, params) + + if not search_result or "items" not in search_result: + logger.info(f"No pull requests found for {github_username}") + return [] + + items = search_result["items"] + pull_requests = [] + + for pr_data in items: + try: + repo_name = "unknown" + if pr_data.get("html_url"): + url_parts = pr_data["html_url"].split('/') + if len(url_parts) >= 5: + repo_name = f"{url_parts[3]}/{url_parts[4]}" + + merged_at = None + if pr_data.get("pull_request") and pr_data["pull_request"].get("merged_at"): + merged_at = pr_data["pull_request"]["merged_at"] + + pr_obj = WeaviatePullRequest( + title=pr_data["title"], + body=pr_data.get("body", "")[:500] if pr_data.get("body") else "", + state=pr_data["state"], + repository=repo_name, + created_at=pr_data.get("created_at"), + closed_at=pr_data.get("closed_at"), + merged_at=merged_at, + labels=[label["name"] for label in pr_data.get("labels", [])], + url=pr_data["html_url"] + ) + pull_requests.append(pr_obj) + + except Exception as e: + logger.warning(f"Error processing pull request: {str(e)}") + continue + + logger.info(f"Successfully fetched {len(pull_requests)} pull requests for {github_username}") + return pull_requests + + except Exception as e: + logger.error(f"Error fetching pull requests for {github_username}: {str(e)}") + return [] + + async def _process_repository(self, repo_data: Dict) -> Optional[WeaviateRepository]: + """Process a single repository""" + try: + languages = [] + if repo_data.get("languages_url"): + languages = await self.get_repository_languages(repo_data["languages_url"]) + + return WeaviateRepository( + name=repo_data["name"], + description=repo_data.get("description"), + url=repo_data["html_url"], + languages=languages, + stars=repo_data.get("stargazers_count", 0), + forks=repo_data.get("forks_count", 0) + ) + except Exception as e: + logger.warning(f"Error processing repository {repo_data.get('name', 'unknown')}: {str(e)}") + return None + + def analyze_language_frequency(self, repositories: List[WeaviateRepository]) -> List[str]: + """ + Analyze language frequency across repositories to identify top 5 languages. + """ + language_counter = Counter() + for repo in repositories: + language_counter.update(repo.languages) + + top_languages = language_counter.most_common(5) + logger.info(f"Top 5 languages by frequency: {top_languages}") + return [lang for lang, _ in top_languages] + + async def build_user_profile(self, user_id: str, github_username: str) -> Optional[WeaviateUserProfile]: + """ + Build a complete user profile for Weaviate indexing + """ + logger.info(f"Building user profile for GitHub user: {github_username}") + + # Run user data, repositories, and pull requests fetch concurrently + user_task = self.get_user_data(github_username) + repos_task = self.get_user_repositories(github_username) + prs_task = self.get_user_pull_requests(github_username) + + try: + user_data, repos_data, pull_requests = await asyncio.gather( + user_task, repos_task, prs_task, return_exceptions=True + ) + except Exception as e: + logger.error(f"Error in concurrent data fetching: {str(e)}") + return None + + if isinstance(user_data, Exception) or not user_data: + logger.error(f"Could not fetch user data for {github_username}") + return None + + if isinstance(repos_data, Exception): + logger.warning(f"Error fetching repositories: {repos_data}") + repos_data = [] + + if isinstance(pull_requests, Exception): + logger.warning(f"Error fetching pull requests: {pull_requests}") + pull_requests = [] + + logger.info(f"Found {len(repos_data)} repositories and {len(pull_requests)} pull requests for {github_username}") + + repository_tasks = [self._process_repository(repo) for repo in repos_data] + + repositories = [] + if repository_tasks: + try: + repo_results = await asyncio.gather(*repository_tasks, return_exceptions=True) + repositories = [r for r in repo_results if r is not None and not isinstance(r, Exception)] + except Exception as e: + logger.warning(f"Error processing repositories: {str(e)}") + + all_languages = set() + all_topics = set() + total_stars = 0 + total_forks = 0 + + for repo_obj in repositories: + all_languages.update(repo_obj.languages) + total_stars += repo_obj.stars + total_forks += repo_obj.forks + + for repo_data in repos_data: + topics = repo_data.get("topics", []) + if topics: + all_topics.update(topics) + + top_languages = self.analyze_language_frequency(repositories) + + profile = WeaviateUserProfile( + user_id=user_id, + github_username=github_username, + display_name=user_data.get("name"), + bio=user_data.get("bio"), + location=user_data.get("location"), + repositories=repositories, + pull_requests=pull_requests, + languages=top_languages, + topics=list(all_topics), + followers_count=user_data.get("followers", 0), + following_count=user_data.get("following", 0), + total_stars_received=total_stars, + total_forks=total_forks, + profile_text_for_embedding="", # TODO: Invoke agent/llm to generate this + last_updated=datetime.now() + ) + + logger.info( + f"Successfully built profile for {github_username}: " + f"{len(repositories)} repos, {len(top_languages)} top languages, " + f"{len(pull_requests)} pull requests analyzed" + ) + return profile + + +async def profile_user_from_github(user_id: str, github_username: str) -> bool: + """Profile a user and store in Weaviate with proper resource management.""" + + async with GitHubUserProfiler() as profiler: + try: + profile = await profiler.build_user_profile(user_id, github_username) + if profile: + success = await store_user_profile(profile) + if success: + logger.info(f"Successfully stored profile for user {github_username}") + return success + return False + except Exception as e: + logger.error(f"Failed to profile user {github_username}: {str(e)}") + return False From d0501149475cac20954936392da99f2d4a5dead8 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:23:28 +0530 Subject: [PATCH 06/10] =?UTF-8?q?[refactor]:=20restructure=20platform=20in?= =?UTF-8?q?tegrations=20(bots=20=E2=86=92=20integrations)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/{bots => integrations}/__init__.py | 0 backend/{bots => integrations}/discord/__init__.py | 0 .../discord_bot.py => integrations/discord/bot.py} | 2 +- .../discord/cogs.py} | 13 +++++-------- .../discord/views.py} | 0 .../github_bot => integrations/github}/__init__.py | 0 .../slack_bot => integrations/slack}/__init__.py | 0 7 files changed, 6 insertions(+), 9 deletions(-) rename backend/{bots => integrations}/__init__.py (100%) rename backend/{bots => integrations}/discord/__init__.py (100%) rename backend/{bots/discord/discord_bot.py => integrations/discord/bot.py} (98%) rename backend/{bots/discord/discord_cogs.py => integrations/discord/cogs.py} (97%) rename backend/{bots/discord/discord_views.py => integrations/discord/views.py} (100%) rename backend/{bots/github_bot => integrations/github}/__init__.py (100%) rename backend/{bots/slack_bot => integrations/slack}/__init__.py (100%) diff --git a/backend/bots/__init__.py b/backend/integrations/__init__.py similarity index 100% rename from backend/bots/__init__.py rename to backend/integrations/__init__.py diff --git a/backend/bots/discord/__init__.py b/backend/integrations/discord/__init__.py similarity index 100% rename from backend/bots/discord/__init__.py rename to backend/integrations/discord/__init__.py diff --git a/backend/bots/discord/discord_bot.py b/backend/integrations/discord/bot.py similarity index 98% rename from backend/bots/discord/discord_bot.py rename to backend/integrations/discord/bot.py index 59ebab0c..d17d5346 100644 --- a/backend/bots/discord/discord_bot.py +++ b/backend/integrations/discord/bot.py @@ -3,7 +3,7 @@ import logging from typing import Dict, Any, Optional from app.core.orchestration.queue_manager import AsyncQueueManager, QueuePriority -from app.agents.shared.classification_router import ClassificationRouter +from app.agents.classification_router import ClassificationRouter logger = logging.getLogger(__name__) diff --git a/backend/bots/discord/discord_cogs.py b/backend/integrations/discord/cogs.py similarity index 97% rename from backend/bots/discord/discord_cogs.py rename to backend/integrations/discord/cogs.py index 0c78549b..1f892953 100644 --- a/backend/bots/discord/discord_cogs.py +++ b/backend/integrations/discord/cogs.py @@ -2,14 +2,11 @@ from discord.ext import commands, tasks import logging from app.core.orchestration.queue_manager import AsyncQueueManager, QueuePriority -from app.db.supabase.auth import login_with_github -from app.db.supabase.users_service import ( - get_or_create_user_by_discord, - create_verification_session, - cleanup_expired_tokens -) -from bots.discord.discord_bot import DiscordBot -from bots.discord.discord_views import OAuthView +from app.services.auth.supabase import login_with_github +from app.services.auth.management import get_or_create_user_by_discord +from app.services.auth.verification import create_verification_session, cleanup_expired_tokens +from integrations.discord.bot import DiscordBot +from integrations.discord.views import OAuthView from app.core.config import settings logger = logging.getLogger(__name__) diff --git a/backend/bots/discord/discord_views.py b/backend/integrations/discord/views.py similarity index 100% rename from backend/bots/discord/discord_views.py rename to backend/integrations/discord/views.py diff --git a/backend/bots/github_bot/__init__.py b/backend/integrations/github/__init__.py similarity index 100% rename from backend/bots/github_bot/__init__.py rename to backend/integrations/github/__init__.py diff --git a/backend/bots/slack_bot/__init__.py b/backend/integrations/slack/__init__.py similarity index 100% rename from backend/bots/slack_bot/__init__.py rename to backend/integrations/slack/__init__.py From 388d86057eeb8c82083020a14d6ad984a5ca1f37 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:23:45 +0530 Subject: [PATCH 07/10] [feat]: modernize API layer structure --- backend/app/api/__init__.py | 11 +++++ backend/app/api/router.py | 19 ++++++++ backend/app/api/v1/__init__.py | 1 + backend/app/api/v1/auth.py | 6 +-- backend/app/api/v1/health.py | 85 ++++++++++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 backend/app/api/router.py create mode 100644 backend/app/api/v1/__init__.py create mode 100644 backend/app/api/v1/health.py diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index e69de29b..75059e88 100644 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -0,0 +1,11 @@ +""" +API package for the Devr.AI backend. + +This package contains all API-related components: +- router: Main API router with all endpoints +- v1: Version 1 API endpoints +""" + +from .router import api_router + +__all__ = ["api_router"] diff --git a/backend/app/api/router.py b/backend/app/api/router.py new file mode 100644 index 00000000..93664dc8 --- /dev/null +++ b/backend/app/api/router.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter +from .v1.auth import router as auth_router +from .v1.health import router as health_router + +api_router = APIRouter() + +api_router.include_router( + auth_router, + prefix="/v1/auth", + tags=["Authentication"] +) + +api_router.include_router( + health_router, + prefix="/v1", + tags=["Health"] +) + +__all__ = ["api_router"] diff --git a/backend/app/api/v1/__init__.py b/backend/app/api/v1/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/backend/app/api/v1/__init__.py @@ -0,0 +1 @@ + diff --git a/backend/app/api/v1/auth.py b/backend/app/api/v1/auth.py index 2a20e1c7..e70c5dc3 100644 --- a/backend/app/api/v1/auth.py +++ b/backend/app/api/v1/auth.py @@ -1,8 +1,8 @@ from fastapi import APIRouter, Request, HTTPException, Query from fastapi.responses import HTMLResponse -from app.db.supabase.supabase_client import get_supabase_client -from app.db.supabase.users_service import find_user_by_session_and_verify, get_verification_session_info -from app.db.weaviate.user_profiling import profile_user_from_github +from app.database.supabase.client import get_supabase_client +from app.services.auth.verification import find_user_by_session_and_verify, get_verification_session_info +from app.services.user.profiling import profile_user_from_github from typing import Optional import logging import asyncio diff --git a/backend/app/api/v1/health.py b/backend/app/api/v1/health.py new file mode 100644 index 00000000..26bbabd3 --- /dev/null +++ b/backend/app/api/v1/health.py @@ -0,0 +1,85 @@ +import logging +from fastapi import APIRouter, HTTPException +from app.database.weaviate.client import get_weaviate_client + +router = APIRouter() +logger = logging.getLogger(__name__) + + +@router.get("/health") +async def health_check(): + """ + General health check endpoint to verify services are running. + + Returns: + dict: Status of the application and its services + """ + try: + async with get_weaviate_client() as client: + weaviate_ready = await client.is_ready() + + from main import app_instance + + return { + "status": "healthy", + "services": { + "weaviate": "ready" if weaviate_ready else "not_ready", + "discord_bot": "running" if app_instance.discord_bot and not app_instance.discord_bot.is_closed() else "stopped" + } + } + except Exception as e: + logger.error(f"Health check failed: {e}") + raise HTTPException( + status_code=503, + detail={ + "status": "unhealthy", + "error": str(e) + } + ) + + +@router.get("/health/weaviate") +async def weaviate_health(): + """Check specifically Weaviate service health.""" + try: + async with get_weaviate_client() as client: + is_ready = await client.is_ready() + + return { + "service": "weaviate", + "status": "ready" if is_ready else "not_ready" + } + except Exception as e: + logger.error(f"Weaviate health check failed: {e}") + raise HTTPException( + status_code=503, + detail={ + "service": "weaviate", + "status": "unhealthy", + "error": str(e) + } + ) + + +@router.get("/health/discord") +async def discord_health(): + """Check specifically Discord bot health.""" + try: + from main import app_instance + + bot_status = "running" if app_instance.discord_bot and not app_instance.discord_bot.is_closed() else "stopped" + + return { + "service": "discord_bot", + "status": bot_status + } + except Exception as e: + logger.error(f"Discord bot health check failed: {e}") + raise HTTPException( + status_code=503, + detail={ + "service": "discord_bot", + "status": "unhealthy", + "error": str(e) + } + ) From 092b1a52f1a4f982d2c28f0feca1721023c6df4e Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 12:24:02 +0530 Subject: [PATCH 08/10] [cleanup]: remove legacy components --- backend/app/routes.py | 12 - backend/app/scripts/supabase/create_db.sql | 107 ------ backend/app/scripts/supabase/populate_db.sql | 127 -------- backend/app/scripts/weaviate/__init__.py | 0 .../app/scripts/weaviate/create_schemas.py | 57 ---- backend/app/scripts/weaviate/populate_db.py | 306 ------------------ backend/app/services/vector_db/sql.txt | 133 -------- backend/app/utils/github_api.py | 144 --------- backend/app/utils/helpers.py | 0 9 files changed, 886 deletions(-) delete mode 100644 backend/app/routes.py delete mode 100644 backend/app/scripts/supabase/create_db.sql delete mode 100644 backend/app/scripts/supabase/populate_db.sql delete mode 100644 backend/app/scripts/weaviate/__init__.py delete mode 100644 backend/app/scripts/weaviate/create_schemas.py delete mode 100644 backend/app/scripts/weaviate/populate_db.py delete mode 100644 backend/app/services/vector_db/sql.txt delete mode 100644 backend/app/utils/github_api.py delete mode 100644 backend/app/utils/helpers.py diff --git a/backend/app/routes.py b/backend/app/routes.py deleted file mode 100644 index 8b0b277f..00000000 --- a/backend/app/routes.py +++ /dev/null @@ -1,12 +0,0 @@ -from fastapi import APIRouter, HTTPException -from app.models import RepoRequest -from app.utils.github_api import get_repo_stats - -router = APIRouter() - -@router.post("/repo-stats") -async def repo_stats_endpoint(repo: RepoRequest): - try: - return await get_repo_stats(repo.repo_url) - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/app/scripts/supabase/create_db.sql b/backend/app/scripts/supabase/create_db.sql deleted file mode 100644 index 8ccd2df3..00000000 --- a/backend/app/scripts/supabase/create_db.sql +++ /dev/null @@ -1,107 +0,0 @@ --- Drop existing tables if they exist -DROP TABLE IF EXISTS conversation_context; -DROP TABLE IF EXISTS interactions; -DROP TABLE IF EXISTS repositories; -DROP TABLE IF EXISTS users; - --- Table: users -CREATE TABLE users ( - id UUID PRIMARY KEY NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), - - -- The email is optional to allow social-only sign-ups, but must be unique if provided. - email TEXT UNIQUE, - - -- Social IDs - discord_id TEXT UNIQUE, - discord_username TEXT, - github_id TEXT UNIQUE, - github_username TEXT, - slack_id TEXT UNIQUE, - slack_username TEXT, - - display_name TEXT NOT NULL, - avatar_url TEXT, - bio TEXT, - location TEXT, - - -- Verification fields to manage the GitHub linking flow. - is_verified BOOLEAN NOT NULL DEFAULT false, - verification_token TEXT UNIQUE, - verification_token_expires_at TIMESTAMPTZ, - verified_at TIMESTAMPTZ, - - skills JSONB, - github_stats JSONB, - - last_active_discord TIMESTAMPTZ, - last_active_github TIMESTAMPTZ, - last_active_slack TIMESTAMPTZ, - - total_interactions_count INTEGER NOT NULL DEFAULT 0, - preferred_languages TEXT[] -); - --- Create index for efficient cleanup queries -CREATE INDEX IF NOT EXISTS idx_users_verification_token_expires_at -ON users(verification_token_expires_at) -WHERE verification_token_expires_at IS NOT NULL; - --- Create index for efficient verification queries -CREATE INDEX IF NOT EXISTS idx_users_discord_verification -ON users(discord_id, verification_token) -WHERE verification_token IS NOT NULL; - --- Table: repositories -CREATE TABLE repositories ( - id UUID PRIMARY KEY NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), - github_id BIGINT UNIQUE NOT NULL, - full_name TEXT NOT NULL, - name TEXT NOT NULL, - owner TEXT NOT NULL, - description TEXT, - stars_count INTEGER NOT NULL DEFAULT 0, - forks_count INTEGER NOT NULL DEFAULT 0, - open_issues_count INTEGER NOT NULL DEFAULT 0, - languages_used TEXT[], - topics TEXT[], - is_indexed BOOLEAN NOT NULL DEFAULT false, - indexed_at TIMESTAMPTZ, - indexing_status TEXT, - last_commit_hash TEXT -); - --- Table: interactions -CREATE TABLE interactions ( - id UUID PRIMARY KEY NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT now(), - user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, - repository_id UUID REFERENCES repositories(id) ON DELETE SET NULL, - platform TEXT NOT NULL, - platform_specific_id TEXT NOT NULL, - channel_id TEXT, - thread_id TEXT, - content TEXT, - interaction_type TEXT, - sentiment_score FLOAT, - intent_classification TEXT, - topics_discussed TEXT[], - metadata JSONB -); - --- Table: conversation_contexts -CREATE TABLE conversation_context ( - id UUID PRIMARY KEY NOT NULL, - user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, - platform TEXT NOT NULL, - memory_thread_id TEXT NOT NULL UNIQUE, - conversation_summary TEXT, - key_topics TEXT[], - total_interactions INTEGER, - session_start_time TIMESTAMPTZ, - session_end_time TIMESTAMPTZ, - created_at TIMESTAMPTZ NOT NULL DEFAULT now() -); diff --git a/backend/app/scripts/supabase/populate_db.sql b/backend/app/scripts/supabase/populate_db.sql deleted file mode 100644 index a3d3ee5d..00000000 --- a/backend/app/scripts/supabase/populate_db.sql +++ /dev/null @@ -1,127 +0,0 @@ --- Users -insert into - users ( - id, created_at, updated_at, email, discord_id, discord_username, - github_id, github_username, slack_id, slack_username, display_name, - avatar_url, bio, location, is_verified, verification_token, - verification_token_expires_at, verified_at, skills, github_stats, - last_active_discord, last_active_github, last_active_slack, - total_interactions_count, preferred_languages - ) -values - ( - '6afc59e3-18b7-4182-b42c-8210d1152b07', '2025-05-05 03:56:41', '2025-01-22 14:50:25', - 'blakeerik@yahoo.com', '3eb13b9046684257', 'donaldgarcia', '16419f828b9d4434', 'fjohnson', - '9a1de644815e46d1', 'hoffmanjennifer', 'Jennifer Cole', 'https://dummyimage.com/696x569', - 'Bill here grow gas enough analysis. Movie win her need stop peace technology.', 'East Steven', - true, null, null, '2025-05-14 15:04:01', - '{"skills": ["Python", "C++", "Java"]}'::jsonb, '{"commits": 300}'::jsonb, - '2025-04-19 03:34:26', '2025-02-12 15:28:51', '2025-05-13 22:32:01', 28, - array['JavaScript', 'C++'] - ), - ( - '6f990423-0d57-4c64-b191-17e53f39c799', '2025-01-11 20:41:23', '2025-02-14 11:26:28', - 'jeffrey28@yahoo.com', '50c187fcce174b4e', 'nadams', 'e059a0ee9132463e', 'jason76', - '757750a9a49140b2', 'josephwright', 'Deborah Richards', 'https://www.lorempixel.com/186/96', - 'Civil quite others his other life edge network. Quite boy those.', 'Kathrynside', - true, null, null, '2025-01-01 02:39:54', - '{"skills": ["C++", "TypeScript", "Rust"]}'::jsonb, '{"commits": 139}'::jsonb, - '2025-04-27 07:17:02', '2025-03-04 22:40:36', '2025-04-05 21:04:03', 75, - array['Go', 'Python'] - ), - ( - '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', '2025-03-01 17:07:10', '2025-02-16 11:55:43', - 'samuel87@gmail.com', '913e4de2e0c54cb8', 'millertodd', '885f6e66c2b642c5', 'davidalvarez', - '8715a10343da4043', 'ibrandt', 'Melissa Marquez', 'https://www.lorempixel.com/507/460', - 'Open discover detail. Remain arrive attack all. Audience draw protect Democrat car very.', 'Stevenland', - false, 'db20a56e-dc81-4fe7-8eda-8bbb71710434', '2025-06-21 12:00:00', null, - '{"skills": ["Python", "JavaScript", "C++"]}'::jsonb, '{"commits": 567}'::jsonb, - '2025-01-20 00:17:15', '2025-01-10 19:45:31', '2025-05-07 15:12:55', 77, - array['Python', 'Rust'] - ); - --- Repositories -insert into - repositories ( - id, created_at, updated_at, github_id, full_name, name, owner, description, - stars_count, forks_count, open_issues_count, languages_used, topics, is_indexed, - indexed_at, indexing_status, last_commit_hash - ) -values - ( - 'f6b0bff9-074d-4062-86f5-0a853e521334', '2025-05-16 10:34:41', '2025-02-16 08:54:52', 3728882, - 'jamessellers/repo_0', 'repo_0', 'jamessellers', 'Him task improve fish list tree high.', - 3032, 363, 26, array['C++', 'Python'], array['Java', 'C++'], true, '2025-05-09 21:00:50', - 'completed', 'e270dbf424cff6864cc592f6611d8df90c895ec5' - ), - ( - '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', '2025-01-08 04:31:26', '2025-01-25 12:21:00', 3741438, - 'gallowayjoseph/repo_1', 'repo_1', 'gallowayjoseph', 'Whole forward beyond suddenly between treat address.', - 3786, 388, 34, array['C++', 'Go'], array['C++', 'Rust'], true, '2025-01-28 23:48:46', - 'completed', 'c9f97db5d2fc4b809df59bc23dd7345dbe6d14d5' - ), - ( - '08946f22-0d74-4499-b40d-0f60218d5152', '2025-04-02 03:59:05', '2025-02-21 11:05:44', 6292423, - 'fjohnson/repo_2', 'repo_2', 'fjohnson', 'Perhaps however bag forget purpose move.', - 3286, 274, 8, array['JavaScript', 'HTML'], array['Rust', 'C++'], false, '2025-03-03 11:44:52', - 'pending', '5e3af4aafc18e025cea707fa7707a1d945e0ffef' - ); - --- Interactions -insert into - interactions ( - id, created_at, user_id, repository_id, platform, platform_specific_id, channel_id, - thread_id, content, interaction_type, sentiment_score, intent_classification, - topics_discussed, metadata - ) -values - ( - '7c59fe66-53b6-44b5-8ae1-ddc29b071097', '2025-03-10 12:14:30', '6afc59e3-18b7-4182-b42c-8210d1152b07', - 'f6b0bff9-074d-4062-86f5-0a853e521334', 'github', 'aa143cd82ff34de4', - 'f982f4e08603456a', '86abd4e7f4124360', - 'Skill medical after them analysis hit health. Ground attack drop. Billion old series card good full poor store.', - 'comment', -0.07, 'help_request', array['C++', 'TypeScript'], '{"info": "capital"}'::jsonb - ), - ( - 'f0c80815-fde1-4644-94ca-cd8915f11e46', '2025-03-19 16:14:11', '6f990423-0d57-4c64-b191-17e53f39c799', - '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', 'github', '62fb26d7f4db4a07', - '7f072cb92fd340c0', 'ec9f9c545e0a42ab', - 'Song risk bad own state. Family bill foreign fast knowledge response coach. Goal amount thank good your ever.', - 'pr', 0.6, 'help_request', array['JavaScript', 'TypeScript'], '{"info": "already"}'::jsonb - ), - ( - 'ef139daa-fa4c-445a-8bf7-fdd725bdb82c', '2025-05-06 06:40:36', '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', - '08946f22-0d74-4499-b40d-0f60218d5152', 'slack', '9136f1f8f31046dc', - 'add702c92747493c', '5f3c44dc5ef747b8', - 'Off morning huge power. Whether ago control military trial. Energy employee land you.', - 'message', -0.16, 'feature_request', array['Go', 'JavaScript'], '{"info": "security"}'::jsonb - ); - --- Conversation Context -insert into - conversation_context ( - id, user_id, platform, memory_thread_id, conversation_summary, key_topics, - total_interactions, session_start_time, session_end_time, created_at - ) -values - ( - 'c1b2c3d4-e5f6-a7b8-c9d0-e1f2a3b4c5d6', '6afc59e3-18b7-4182-b42c-8210d1152b07', - 'discord', '112233445566778899', - 'The user asked about getting started with the API and had questions about authentication. They were provided with a link to the documentation.', - array['onboarding', 'api_keys', 'authentication'], 8, '2025-06-20 10:00:00', - '2025-06-20 10:25:00', '2025-06-20 10:25:00' - ), - ( - 'd2c3d4e5-f6a7-b8c9-d0e1-f2a3b4c5d6e7', '6f990423-0d57-4c64-b191-17e53f39c799', - 'slack', '998877665544332211', - 'User reported a potential bug related to the repository indexing service. They provided logs and a repository URL. The issue was acknowledged and a ticket was created.', - array['bug_report', 'indexing', 'repositories'], 12, '2025-06-21 09:00:00', - '2025-06-21 09:45:00', '2025-06-21 09:45:00' - ), - ( - 'e3d4e5f6-a7b8-c9d0-e1f2-a3b4c5d6e7f8', '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', - 'discord', '123451234512345123', - 'A general discussion about the future of Rust and its use in web development. The user shared an article and asked for opinions.', - array['Rust', 'web_development', 'discussion'], 5, '2025-06-19 14:30:00', - '2025-06-19 15:00:00', '2025-06-19 15:00:00' - ); diff --git a/backend/app/scripts/weaviate/__init__.py b/backend/app/scripts/weaviate/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/app/scripts/weaviate/create_schemas.py b/backend/app/scripts/weaviate/create_schemas.py deleted file mode 100644 index 6d6fedcd..00000000 --- a/backend/app/scripts/weaviate/create_schemas.py +++ /dev/null @@ -1,57 +0,0 @@ -import asyncio -from app.db.weaviate.weaviate_client import get_client -import weaviate.classes.config as wc - -async def create_schema(client, name, properties): - await client.collections.create( - name=name, - properties=properties, - ) - print(f"Created: {name}") - -async def create_user_profile_schema(client): - """ - Create schema for WeaviateUserProfile model. - Main vectorization will be on profile_text_for_embedding field. - """ - properties = [ - wc.Property(name="user_id", data_type=wc.DataType.TEXT), - wc.Property(name="github_username", data_type=wc.DataType.TEXT), - wc.Property(name="display_name", data_type=wc.DataType.TEXT), - wc.Property(name="bio", data_type=wc.DataType.TEXT), - wc.Property(name="location", data_type=wc.DataType.TEXT), - wc.Property(name="repositories", data_type=wc.DataType.TEXT), # JSON string - wc.Property(name="pull_requests", data_type=wc.DataType.TEXT), # JSON string - wc.Property(name="languages", data_type=wc.DataType.TEXT_ARRAY), - wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), - wc.Property(name="followers_count", data_type=wc.DataType.INT), - wc.Property(name="following_count", data_type=wc.DataType.INT), - wc.Property(name="total_stars_received", data_type=wc.DataType.INT), - wc.Property(name="total_forks", data_type=wc.DataType.INT), - wc.Property(name="profile_text_for_embedding", data_type=wc.DataType.TEXT), - wc.Property(name="last_updated", data_type=wc.DataType.DATE), - ] - await create_schema(client, "weaviate_user_profile", properties) - -async def create_all_schemas(): - """ - Create only the user profile schema as per the model structure. - """ - client = get_client() - try: - await client.connect() - await create_user_profile_schema(client) - print("✅ User profile schema created successfully.") - except Exception as e: - print(f"❌ Error creating schema: {str(e)}") - raise - finally: - await client.close() - -def main(): - """Entry point for running the schema creation.""" - asyncio.run(create_all_schemas()) - - -if __name__ == "__main__": - main() diff --git a/backend/app/scripts/weaviate/populate_db.py b/backend/app/scripts/weaviate/populate_db.py deleted file mode 100644 index 0137570c..00000000 --- a/backend/app/scripts/weaviate/populate_db.py +++ /dev/null @@ -1,306 +0,0 @@ -import json -import asyncio -from datetime import datetime -from app.db.weaviate.weaviate_client import get_weaviate_client - -async def populate_weaviate_user_profile(client): - """ - Populate WeaviateUserProfile collection with sample data matching the model structure. - """ - current_time = datetime.now().astimezone() - - user_profiles = [ - { - "user_id": "a1b2c3d4-e5f6-7890-1234-567890abcdef", - "github_username": "jane-dev", - "display_name": "Jane Developer", - "bio": ("Creator of innovative open-source tools. Full-stack developer " - "with a passion for Rust and WebAssembly."), - "location": "Berlin, Germany", - "repositories": json.dumps([ - { - "name": "rust-web-framework", - "description": "A high-performance web framework for Rust.", - "url": "https://github.com/jane-dev/rust-web-framework", - "languages": ["Rust", "TOML"], - "stars": 2500, - "forks": 400 - }, - { - "name": "data-viz-lib", - "description": "A declarative data visualization library for JavaScript.", - "url": "https://github.com/jane-dev/data-viz-lib", - "languages": ["JavaScript", "TypeScript"], - "stars": 1200, - "forks": 150 - } - ]), - "pull_requests": json.dumps([ - { - "title": "Add async support for database connections", - "body": ("This PR adds comprehensive async support for database " - "connections, improving performance by 40%..."), - "state": "closed", - "repository": "microsoft/vscode", - "created_at": "2024-01-15T10:30:00Z", - "closed_at": "2024-01-20T14:20:00Z", - "merged_at": "2024-01-20T14:20:00Z", - "labels": ["enhancement", "database", "performance"], - "url": "https://github.com/microsoft/vscode/pull/12345" - } - ]), - "languages": ["Rust", "JavaScript", "TypeScript", "TOML"], - "topics": ["rust", "webdev", "performance", "framework", - "data-visualization", "d3", "charts"], - "followers_count": 1800, - "following_count": 250, - "total_stars_received": 3700, - "total_forks": 550, - "profile_text_for_embedding": ( - "Jane Developer, Creator of innovative open-source tools. " - "Full-stack developer with a passion for Rust and WebAssembly. " - "Repositories: rust-web-framework, A high-performance web framework for Rust. " - "data-viz-lib, A declarative data visualization library for JavaScript. " - "Languages: Rust, JavaScript, TypeScript. " - "Topics: rust, webdev, performance, data-visualization." - ), - "last_updated": current_time.isoformat() - }, - { - "user_id": "b2c3d4e5-f6g7-8901-2345-678901bcdefg", - "github_username": "python-ninja", - "display_name": "Alex Chen", - "bio": "Python enthusiast and machine learning researcher. Building the future of AI.", - "location": "San Francisco, CA", - "repositories": json.dumps([ - { - "name": "ml-toolkit", - "description": "A comprehensive machine learning toolkit for Python.", - "url": "https://github.com/python-ninja/ml-toolkit", - "languages": ["Python", "Jupyter Notebook"], - "stars": 3200, - "forks": 580 - }, - { - "name": "data-pipeline", - "description": "Scalable data processing pipeline for big data applications.", - "url": "https://github.com/python-ninja/data-pipeline", - "languages": ["Python", "SQL"], - "stars": 1800, - "forks": 320 - } - ]), - "pull_requests": json.dumps([ - { - "title": "Implement advanced ML algorithms", - "body": ("Adding support for advanced machine learning algorithms " - "including neural networks..."), - "state": "open", - "repository": "tensorflow/tensorflow", - "created_at": "2024-02-01T09:15:00Z", - "closed_at": None, - "merged_at": None, - "labels": ["enhancement", "ml", "algorithms"], - "url": "https://github.com/tensorflow/tensorflow/pull/67890" - } - ]), - "languages": ["Python", "SQL", "Jupyter Notebook"], - "topics": ["machine-learning", "ai", "data-science", "python", "big-data"], - "followers_count": 2400, - "following_count": 180, - "total_stars_received": 5000, - "total_forks": 900, - "profile_text_for_embedding": ( - "Alex Chen, Python enthusiast and machine learning researcher. " - "Building the future of AI. " - "Repositories: ml-toolkit, A comprehensive machine learning toolkit for Python. " - "data-pipeline, Scalable data processing pipeline for big data applications. " - "Languages: Python, SQL. " - "Topics: machine-learning, ai, data-science, python." - ), - "last_updated": current_time.isoformat() - }, - { - "user_id": "c3d4e5f6-g7h8-9012-3456-789012cdefgh", - "github_username": "go-developer", - "display_name": "Sam Rodriguez", - "bio": "Cloud infrastructure engineer specializing in Go and Kubernetes.", - "location": "Austin, TX", - "repositories": json.dumps([ - { - "name": "k8s-operator", - "description": "Custom Kubernetes operator for managing microservices.", - "url": "https://github.com/go-developer/k8s-operator", - "languages": ["Go", "Dockerfile"], - "stars": 1500, - "forks": 280 - } - ]), - "pull_requests": json.dumps([ - { - "title": "Add support for custom resources", - "body": ("Implementing support for custom Kubernetes resources " - "in the operator..."), - "state": "merged", - "repository": "kubernetes/kubernetes", - "created_at": "2024-01-10T14:30:00Z", - "closed_at": "2024-01-15T16:45:00Z", - "merged_at": "2024-01-15T16:45:00Z", - "labels": ["enhancement", "k8s", "operator"], - "url": "https://github.com/kubernetes/kubernetes/pull/54321" - } - ]), - "languages": ["Go", "Dockerfile"], - "topics": ["kubernetes", "microservices", "cloud", "devops", "api"], - "followers_count": 890, - "following_count": 120, - "total_stars_received": 1500, - "total_forks": 280, - "profile_text_for_embedding": ( - "Sam Rodriguez, Cloud infrastructure engineer specializing in Go and Kubernetes. " - "Repositories: k8s-operator, Custom Kubernetes operator for managing microservices. " - "Languages: Go, Dockerfile. " - "Topics: kubernetes, microservices, cloud, devops." - ), - "last_updated": current_time.isoformat() - }, - { - "user_id": "d4e5f6g7-h8i9-0123-4567-890123defghi", - "github_username": "frontend-wizard", - "display_name": "Emily Johnson", - "bio": "Frontend developer creating beautiful and accessible web experiences.", - "location": "New York, NY", - "repositories": json.dumps([ - { - "name": "react-components", - "description": "Reusable React component library with TypeScript.", - "url": "https://github.com/frontend-wizard/react-components", - "languages": ["TypeScript", "CSS", "JavaScript"], - "stars": 2100, - "forks": 420 - }, - { - "name": "css-animations", - "description": "Collection of smooth CSS animations and transitions.", - "url": "https://github.com/frontend-wizard/css-animations", - "languages": ["CSS", "HTML"], - "stars": 850, - "forks": 180 - } - ]), - "pull_requests": json.dumps([ - { - "title": "Improve accessibility features", - "body": ("Adding comprehensive accessibility features to the " - "React component library..."), - "state": "open", - "repository": "facebook/react", - "created_at": "2024-02-05T11:20:00Z", - "closed_at": None, - "merged_at": None, - "labels": ["accessibility", "enhancement", "a11y"], - "url": "https://github.com/facebook/react/pull/98765" - } - ]), - "languages": ["TypeScript", "JavaScript", "CSS", "HTML"], - "topics": ["react", "frontend", "typescript", "css", "ui-ux", "accessibility"], - "followers_count": 1320, - "following_count": 200, - "total_stars_received": 2950, - "total_forks": 600, - "profile_text_for_embedding": ( - "Emily Johnson, Frontend developer creating beautiful and accessible web experiences. " - "Repositories: react-components, Reusable React component library with TypeScript. " - "css-animations, Collection of smooth CSS animations and transitions. " - "Languages: TypeScript, JavaScript, CSS. " - "Topics: react, frontend, typescript, css, ui-ux." - ), - "last_updated": current_time.isoformat() - }, - { - "user_id": "e5f6g7h8-i9j0-1234-5678-901234efghij", - "github_username": "rust-enthusiast", - "display_name": "David Kim", - "bio": "Systems programmer passionate about performance and memory safety.", - "location": "Seattle, WA", - "repositories": json.dumps([ - { - "name": "memory-allocator", - "description": "Custom memory allocator written in Rust for high-performance applications.", - "url": "https://github.com/rust-enthusiast/memory-allocator", - "languages": ["Rust"], - "stars": 1750, - "forks": 240 - }, - { - "name": "concurrent-data-structures", - "description": "Lock-free data structures for concurrent programming in Rust.", - "url": "https://github.com/rust-enthusiast/concurrent-data-structures", - "languages": ["Rust"], - "stars": 1200, - "forks": 180 - } - ]), - "pull_requests": json.dumps([ - { - "title": "Optimize memory allocation patterns", - "body": ("Implementing advanced memory allocation optimization techniques " - "for better performance..."), - "state": "merged", - "repository": "rust-lang/rust", - "created_at": "2024-01-25T08:45:00Z", - "closed_at": "2024-02-01T10:30:00Z", - "merged_at": "2024-02-01T10:30:00Z", - "labels": ["performance", "memory", "optimization"], - "url": "https://github.com/rust-lang/rust/pull/13579" - } - ]), - "languages": ["Rust", "C++", "Assembly"], - "topics": ["rust", "systems-programming", "performance", "memory-safety", "concurrency"], - "followers_count": 980, - "following_count": 85, - "total_stars_received": 2950, - "total_forks": 420, - "profile_text_for_embedding": ( - "David Kim, Systems programmer passionate about performance and memory safety. " - "Repositories: memory-allocator, Custom memory allocator written in Rust for " - "high-performance applications. concurrent-data-structures, Lock-free data structures " - "for concurrent programming in Rust. Languages: Rust, C++, Assembly. " - "Topics: rust, systems-programming, performance, memory-safety." - ), - "last_updated": current_time.isoformat() - } - ] - - try: - collection = client.collections.get("weaviate_user_profile") - async with collection.batch.dynamic() as batch: - for profile in user_profiles: - batch.add_object( - properties=profile - ) - print("✅ Populated weaviate_user_profile with sample user data.") - except Exception as e: - print(f"❌ Error populating weaviate_user_profile: {e}") - raise - -async def populate_all_collections(): - """ - Populate only the user profile collection as per the updated model structure. - """ - try: - async with get_weaviate_client() as client: - print("Populating Weaviate user profile collection with sample data...") - await populate_weaviate_user_profile(client) - print("✅ User profile collection populated successfully.") - except Exception as e: - print(f"❌ Error during population: {e}") - raise - -def main(): - """Entry point for running the population script.""" - asyncio.run(populate_all_collections()) - - -if __name__ == "__main__": - main() diff --git a/backend/app/services/vector_db/sql.txt b/backend/app/services/vector_db/sql.txt deleted file mode 100644 index 766b07cb..00000000 --- a/backend/app/services/vector_db/sql.txt +++ /dev/null @@ -1,133 +0,0 @@ --- Ensure vector extension is enabled -CREATE EXTENSION IF NOT EXISTS vector; - --- Ensure `authenticator` has proper schema permissions -REVOKE ALL ON SCHEMA public FROM PUBLIC; -GRANT ALL ON SCHEMA public TO postgres; -GRANT USAGE, CREATE ON SCHEMA public TO authenticator; -ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO authenticator; - --- Drop existing functions -DROP FUNCTION IF EXISTS get_embedding; -DROP FUNCTION IF EXISTS delete_embedding; -DROP FUNCTION IF EXISTS add_embedding; -DROP FUNCTION IF EXISTS update_embedding; -DROP FUNCTION IF EXISTS add_mutiple_embedding; -DROP FUNCTION IF EXISTS search_embeddings; -DROP FUNCTION IF EXISTS create_embeddings_table; - --- Step 3: Ensure the vector extension exists before using it -CREATE OR REPLACE FUNCTION create_embeddings_table() RETURNS VOID AS $$ -BEGIN - -- Create table only if it doesn't exist - DROP TABLE IF EXISTS embeddings CASCADE; - - CREATE TABLE embeddings ( - id TEXT, - collection TEXT NOT NULL, - content TEXT NOT NULL, - metadata JSONB, - embedding VECTOR(100), - CONSTRAINT embeddings_pkey PRIMARY KEY (collection, id) - ); - - -- Create vector index (ensure this is compatible with your vector extension) - CREATE INDEX IF NOT EXISTS embeddings_embedding_idx - ON embeddings USING ivfflat (embedding vector_cosine_ops) - WITH (lists = 100); - - -- Index for efficient collection queries - CREATE INDEX IF NOT EXISTS embeddings_collection_idx - ON embeddings (collection); -END; -$$ LANGUAGE plpgsql SECURITY DEFINER; - --- Function to add an embedding (handle potential constraint issues) --- Explicit vector(100) in all functions -CREATE OR REPLACE FUNCTION add_embedding( - p_id TEXT, - p_collection TEXT, - p_content TEXT, - p_metadata JSONB, - p_embedding VECTOR(100) -- Fixed here -) RETURNS VOID AS $$ -BEGIN - INSERT INTO embeddings (id, collection, content, metadata, embedding) - VALUES (p_id, p_collection, p_content, p_metadata, p_embedding) - ON CONFLICT (collection, id) DO UPDATE - SET content = EXCLUDED.content, metadata = EXCLUDED.metadata, embedding = EXCLUDED.embedding; -END; -$$ LANGUAGE plpgsql; - - --- Function to bulk insert embeddings -CREATE OR REPLACE FUNCTION add_multiple_embeddings(data JSONB) RETURNS VOID AS $$ -BEGIN - INSERT INTO embeddings (id, collection, content, metadata, embedding) - SELECT - item->>'p_id', - item->>'p_collection', - item->>'p_content', - item->'p_metadata', - (item->>'p_embedding')::vector(100) -- Correct casting to vector - FROM jsonb_array_elements(data) - ON CONFLICT (collection, id) DO UPDATE - SET content = EXCLUDED.content, metadata = EXCLUDED.metadata, embedding = EXCLUDED.embedding; -END; -$$ LANGUAGE plpgsql; - --- Function to search for embeddings -CREATE OR REPLACE FUNCTION search_embeddings( - p_query_embedding VECTOR(100), - p_collection TEXT, - p_limit INT, - p_threshold FLOAT -) -RETURNS TABLE(id TEXT, collection TEXT, content TEXT, metadata JSONB, embedding VECTOR(100)) AS $$ -SELECT * FROM embeddings -WHERE collection = p_collection -ORDER BY embedding <-> p_query_embedding -LIMIT p_limit; -$$ LANGUAGE sql; - --- Function to retrieve an embedding by ID -CREATE OR REPLACE FUNCTION get_embedding(p_id TEXT, p_collection TEXT) -RETURNS TABLE(id TEXT, collection TEXT, content TEXT, metadata JSONB, embedding VECTOR(100)) AS $$ -SELECT * FROM embeddings WHERE id = p_id AND collection = p_collection; -$$ LANGUAGE sql; - --- Function to delete an embedding -CREATE OR REPLACE FUNCTION delete_embedding(p_id TEXT, p_collection TEXT) -RETURNS VOID AS $$ -DELETE FROM embeddings WHERE id = p_id AND collection = p_collection; -$$ LANGUAGE sql; - --- Function to update an embedding -CREATE OR REPLACE FUNCTION update_embedding( - p_id TEXT, p_collection TEXT, p_content TEXT, p_metadata JSONB, p_embedding VECTOR(100) -) RETURNS VOID AS $$ -UPDATE embeddings -SET content = p_content, metadata = p_metadata, embedding = p_embedding -WHERE id = p_id AND collection = p_collection; -$$ LANGUAGE sql; - --- Function to list all collections -CREATE OR REPLACE FUNCTION list_collections() RETURNS TABLE(collection TEXT) AS $$ -SELECT DISTINCT collection FROM embeddings; -$$ LANGUAGE sql; - --- Function to check database connection -CREATE OR REPLACE FUNCTION check_embeddings_connection() RETURNS BOOLEAN AS $$ -SELECT EXISTS(SELECT 1 FROM embeddings LIMIT 1); -$$ LANGUAGE sql; - --- Ensure `authenticator` can execute functions -GRANT EXECUTE ON FUNCTION create_embeddings_table() TO authenticator; -GRANT EXECUTE ON FUNCTION add_embedding(TEXT, TEXT, TEXT, JSONB, VECTOR(100)) TO authenticator; -GRANT EXECUTE ON FUNCTION add_multiple_embeddings(JSONB) TO authenticator; -GRANT EXECUTE ON FUNCTION search_embeddings(VECTOR(100), TEXT, INT, FLOAT) TO authenticator; -GRANT EXECUTE ON FUNCTION get_embedding(TEXT, TEXT) TO authenticator; -GRANT EXECUTE ON FUNCTION delete_embedding(TEXT, TEXT) TO authenticator; -GRANT EXECUTE ON FUNCTION update_embedding(TEXT, TEXT, TEXT, JSONB, VECTOR(100)) TO authenticator; -GRANT EXECUTE ON FUNCTION list_collections() TO authenticator; -GRANT EXECUTE ON FUNCTION check_embeddings_connection() TO authenticator; \ No newline at end of file diff --git a/backend/app/utils/github_api.py b/backend/app/utils/github_api.py deleted file mode 100644 index 23ac0f80..00000000 --- a/backend/app/utils/github_api.py +++ /dev/null @@ -1,144 +0,0 @@ -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -import requests -import os -from urllib.parse import urlparse -from dotenv import load_dotenv - -load_dotenv() # Load environment variables - -app = FastAPI() - -class RepoRequest(BaseModel): - repo_url: str - -def parse_github_url(url: str) -> tuple: - """Extract owner/repo from GitHub URL""" - parsed = urlparse(url) - path = parsed.path.strip('/').split('/') - if len(path) < 2: - raise ValueError("Invalid GitHub URL") - return path[0], path[1] - -def github_api_request(endpoint: str) -> dict: - """Make authenticated GitHub API request""" - headers = { - "Authorization": f"token {os.getenv('GITHUB_TOKEN')}", - "Accept": "application/vnd.github.v3+json" - } - response = requests.get(f"https://api.github.com{endpoint}", headers=headers) - response.raise_for_status() - return response.json() - - -@app.post("/repo-stats") -async def get_repo_stats(repo_url: str): - try: - owner, repo_name = parse_github_url(repo_url) - # Rest of your function... - # Get basic repo info - repo_info = github_api_request(f"/repos/{owner}/{repo_name}") - - # Get contributors - contributors = github_api_request(f"/repos/{owner}/{repo_name}/contributors") - - # Get pull requests - prs = github_api_request(f"/repos/{owner}/{repo_name}/pulls?state=all") - - # Get issues - issues = github_api_request(f"/repos/{owner}/{repo_name}/issues?state=all") - - community_profile = github_api_request(f"/repos/{owner}/{repo_name}/community/profile") - - # Recent commits (last 5) - commits = github_api_request(f"/repos/{owner}/{repo_name}/commits?per_page=5") - - code_frequency = github_api_request(f"/repos/{owner}/{repo_name}/stats/code_frequency") - - pull_requests_by_state = { - "open": sum(1 for pr in prs if pr["state"] == "open"), - "closed": sum(1 for pr in prs if pr["state"] == "closed"), - "draft": sum(1 for pr in prs if pr.get("draft", False)), - "merged": sum(1 for pr in prs if pr.get("merged_at")) - } - pr_details = [{ - "title": pr["title"], - "number": pr["number"], - "state": pr["state"], - "url": pr["html_url"], - "author": { - "login": pr["user"]["login"], - "avatar_url": pr["user"]["avatar_url"], - "profile_url": pr["user"]["html_url"] - }, - } for pr in prs] - - - return { - "name": repo_info["full_name"], - "stars": repo_info["stargazers_count"], - "forks": repo_info["forks_count"], - "watchers": repo_info["subscribers_count"], - "created_at": repo_info["created_at"], - "updated_at": repo_info["updated_at"], - # Licensing and topics - # "license": repo_info.get("license", {}).get("spdx_id", "No License"), - - "topics": repo_info.get("topics", []), - - "contributors": [{ - "login": c["login"], - "contributions": c["contributions"], - "avatar_url": c["avatar_url"] - } for c in contributors], - "recent_commits": [{ - "sha": commit["sha"][:7], - "author": commit["commit"]["author"]["name"], - "message": commit["commit"]["message"], - "date": commit["commit"]["author"]["date"] - } for commit in commits], - - - "community": { - "health_percentage": community_profile["health_percentage"], - "code_of_conduct": community_profile.get("files", {}).get("code_of_conduct") is not None, - "license": community_profile.get("files", {}).get("license") is not None, - "readme": community_profile.get("files", {}).get("readme") is not None - }, - # Issues - "issues": { - "total": len(issues), - "open": sum(1 for issue in issues if issue["state"] == "open"), - "closed": sum(1 for issue in issues if issue["state"] == "closed"), - "labels": list({label["name"] for issue in issues for label in issue["labels"]}) - }, - - # Code statistics - "code_activity": { - "weekly_commits": len(code_frequency) if isinstance(code_frequency, list) else 0, - "total_additions": sum(week[1] for week in code_frequency) if isinstance(code_frequency, list) else 0, - "total_deletions": sum(abs(week[2]) for week in code_frequency) if isinstance(code_frequency, list) else 0 - }, - - # Pull Requests - "pull_requests": { - **pull_requests_by_state, - "total": len(prs), - "details": pr_details - "total": len(prs), - "merged": sum(1 for pr in prs if pr["merged_at"]), - "draft": sum(1 for pr in prs if pr["draft"]), - "by_state": { - "open": sum(1 for pr in prs if pr["state"] == "open"), - "closed": sum(1 for pr in prs if pr["state"] == "closed") - } - - }, - } - - except requests.HTTPError as e: - raise HTTPException(status_code=e.response.status_code, - detail="GitHub API error") - except ValueError: - raise HTTPException(status_code=400, - detail="Invalid GitHub URL format") diff --git a/backend/app/utils/helpers.py b/backend/app/utils/helpers.py deleted file mode 100644 index e69de29b..00000000 From fcc46c69304c7727b05ba4819cb6fbacd23750a5 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 20:39:11 +0530 Subject: [PATCH 09/10] [fix]: update integration points after restructuring --- .../core/orchestration/agent_coordinator.py | 4 +-- backend/main.py | 32 ++++--------------- backend/routes.py | 3 +- tests/test_supabase.py | 4 +-- 4 files changed, 12 insertions(+), 31 deletions(-) diff --git a/backend/app/core/orchestration/agent_coordinator.py b/backend/app/core/orchestration/agent_coordinator.py index 25ed96ad..b80ab979 100644 --- a/backend/app/core/orchestration/agent_coordinator.py +++ b/backend/app/core/orchestration/agent_coordinator.py @@ -5,9 +5,9 @@ from app.agents.devrel.agent import DevRelAgent # TODO: Implement GitHub agent # from app.agents.github.agent import GitHubAgent -from app.agents.shared.state import AgentState +from app.agents.state import AgentState from app.core.orchestration.queue_manager import AsyncQueueManager -from app.agents.devrel.nodes.summarization_node import store_summary_to_database +from app.agents.devrel.nodes.summarization import store_summary_to_database from langsmith import traceable logger = logging.getLogger(__name__) diff --git a/backend/main.py b/backend/main.py index 77a69440..0ac3c2fc 100644 --- a/backend/main.py +++ b/backend/main.py @@ -6,13 +6,13 @@ import uvicorn from fastapi import FastAPI, Response -from app.api.v1.auth import router as auth_router +from app.api.router import api_router from app.core.config import settings from app.core.orchestration.agent_coordinator import AgentCoordinator from app.core.orchestration.queue_manager import AsyncQueueManager -from app.db.weaviate.weaviate_client import get_weaviate_client -from bots.discord.discord_bot import DiscordBot -from bots.discord.discord_cogs import DevRelCommands +from app.database.weaviate.client import get_weaviate_client +from integrations.discord.bot import DiscordBot +from integrations.discord.cogs import DevRelCommands logging.basicConfig( level=logging.INFO, @@ -103,28 +103,8 @@ async def favicon(): """Return empty favicon to prevent 404 logs""" return Response(status_code=204) -@api.get("/health") -async def health_check(): - """Health check endpoint to verify services are running""" - try: - async with get_weaviate_client() as client: - weaviate_ready = await client.is_ready() - - return { - "status": "healthy", - "services": { - "weaviate": "ready" if weaviate_ready else "not_ready", - "discord_bot": "running" if app_instance.discord_bot and not app_instance.discord_bot.is_closed() else "stopped" - } - } - except Exception as e: - logger.error(f"Health check failed: {e}") - return { - "status": "unhealthy", - "error": str(e) - } - -api.include_router(auth_router, prefix="/v1/auth", tags=["Authentication"]) + +api.include_router(api_router) if __name__ == "__main__": diff --git a/backend/routes.py b/backend/routes.py index 1bf8bed0..7dbd6463 100644 --- a/backend/routes.py +++ b/backend/routes.py @@ -13,6 +13,7 @@ class RepoRequest(BaseModel): repo_url: str + logging.basicConfig(level=logging.INFO) handler_registry = HandlerRegistry() event_bus = EventBus(handler_registry) @@ -71,7 +72,7 @@ async def github_webhook(request: Request): event_type = EventType.PR_MERGED else: logging.info("Pull request closed without merge; no event dispatched.") - + # Handle pull request comment events elif event_header in ["pull_request_review_comment", "pull_request_comment"]: action = payload.get("action") diff --git a/tests/test_supabase.py b/tests/test_supabase.py index 2cf7f1b3..55b98671 100644 --- a/tests/test_supabase.py +++ b/tests/test_supabase.py @@ -1,6 +1,6 @@ -from ..backend.app.model.supabase.models import User, Interaction, CodeChunk, Repository +from backend.app.models.database.supabase import User, Interaction, CodeChunk, Repository from uuid import uuid4 -from ..backend.app.db.supabase.supabase_client import get_supabase_client +from backend.app.database.supabase.client import get_supabase_client from datetime import datetime # Your User model import client = get_supabase_client() From d5e7f84ea418d630e1ba4fa3e8a18708dfc0cb30 Mon Sep 17 00:00:00 2001 From: kartikbhtt7 Date: Sat, 28 Jun 2025 20:40:27 +0530 Subject: [PATCH 10/10] coderabbit fixes --- .../agents/devrel/nodes/handlers/__init__.py | 0 .../devrel/nodes/handlers/web_search.py | 3 ++- backend/app/api/v1/health.py | 21 ++++++++++--------- backend/app/core/dependencies.py | 12 +++++++++++ backend/main.py | 1 + 5 files changed, 26 insertions(+), 11 deletions(-) create mode 100644 backend/app/agents/devrel/nodes/handlers/__init__.py create mode 100644 backend/app/core/dependencies.py diff --git a/backend/app/agents/devrel/nodes/handlers/__init__.py b/backend/app/agents/devrel/nodes/handlers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/agents/devrel/nodes/handlers/web_search.py b/backend/app/agents/devrel/nodes/handlers/web_search.py index db7cd9cc..96fac811 100644 --- a/backend/app/agents/devrel/nodes/handlers/web_search.py +++ b/backend/app/agents/devrel/nodes/handlers/web_search.py @@ -2,7 +2,8 @@ from typing import Dict, Any from app.agents.state import AgentState from langchain_core.messages import HumanMessage -from ...prompts.search_prompt import EXTRACT_SEARCH_QUERY_PROMPT +from app.agents.devrel.prompts.search_prompt import EXTRACT_SEARCH_QUERY_PROMPT + logger = logging.getLogger(__name__) diff --git a/backend/app/api/v1/health.py b/backend/app/api/v1/health.py index 26bbabd3..a60ef0a5 100644 --- a/backend/app/api/v1/health.py +++ b/backend/app/api/v1/health.py @@ -1,13 +1,18 @@ import logging -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, Depends from app.database.weaviate.client import get_weaviate_client +from app.core.dependencies import get_app_instance +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from main import DevRAIApplication router = APIRouter() logger = logging.getLogger(__name__) @router.get("/health") -async def health_check(): +async def health_check(app_instance: "DevRAIApplication" = Depends(get_app_instance)): """ General health check endpoint to verify services are running. @@ -18,8 +23,6 @@ async def health_check(): async with get_weaviate_client() as client: weaviate_ready = await client.is_ready() - from main import app_instance - return { "status": "healthy", "services": { @@ -35,7 +38,7 @@ async def health_check(): "status": "unhealthy", "error": str(e) } - ) + ) from e @router.get("/health/weaviate") @@ -58,15 +61,13 @@ async def weaviate_health(): "status": "unhealthy", "error": str(e) } - ) + ) from e @router.get("/health/discord") -async def discord_health(): +async def discord_health(app_instance: "DevRAIApplication" = Depends(get_app_instance)): """Check specifically Discord bot health.""" try: - from main import app_instance - bot_status = "running" if app_instance.discord_bot and not app_instance.discord_bot.is_closed() else "stopped" return { @@ -82,4 +83,4 @@ async def discord_health(): "status": "unhealthy", "error": str(e) } - ) + ) from e diff --git a/backend/app/core/dependencies.py b/backend/app/core/dependencies.py new file mode 100644 index 00000000..10175385 --- /dev/null +++ b/backend/app/core/dependencies.py @@ -0,0 +1,12 @@ +from fastapi import Request +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from main import DevRAIApplication + +async def get_app_instance(request: Request) -> "DevRAIApplication": + """ + Dependency to get the application instance from FastAPI's state. + This avoids circular imports by using dependency injection. + """ + return request.app.state.app_instance diff --git a/backend/main.py b/backend/main.py index 0ac3c2fc..912f67b9 100644 --- a/backend/main.py +++ b/backend/main.py @@ -91,6 +91,7 @@ async def lifespan(app: FastAPI): """ Lifespan manager for the FastAPI application. Handles startup and shutdown events. """ + app.state.app_instance = app_instance await app_instance.start_background_tasks() yield await app_instance.stop_background_tasks()