diff --git a/backend/app/db/supabase/auth.py b/backend/app/db/supabase/auth.py new file mode 100644 index 00000000..ac738246 --- /dev/null +++ b/backend/app/db/supabase/auth.py @@ -0,0 +1,28 @@ +from app.db.supabase.supabase_client import supabase_client +import os +def login_with_oauth(provider: str): + try: + result = supabase_client.auth.sign_in_with_oauth({ + "provider": provider, + "options": { + "redirect_to": os.getenv("SUPABASE_REDIRECT_URL", "http://localhost:3000/home") + } + }) + return {"url": result.url} + except Exception as e: + raise Exception(f"OAuth login failed for {provider}: {str(e)}") + + +def login_with_github(): + return login_with_oauth("github") + +def login_with_discord(): + return login_with_oauth("discord") + +def logout(access_token: str): + try: + supabase_client.auth.set_session(access_token, refresh_token="") + supabase_client.auth.sign_out() + return {"message": "User logged out successfully"} + except Exception as e: + raise Exception(f"Logout failed: {str(e)}") diff --git a/backend/app/db/supabase/supabase_client.py b/backend/app/db/supabase/supabase_client.py new file mode 100644 index 00000000..4eb72d1d --- /dev/null +++ b/backend/app/db/supabase/supabase_client.py @@ -0,0 +1,17 @@ +import os +from dotenv import load_dotenv +from supabase import create_client + +load_dotenv() + +SUPABASE_URL = os.getenv("SUPABASE_URL") +SUPABASE_KEY = os.getenv("SUPABASE_KEY") + +if SUPABASE_URL is None or SUPABASE_KEY is None: + raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set in environment variables.") + +supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY) + + +def get_supabase_client(): + return supabase_client diff --git a/backend/app/db/weaviate/weaviate_client.py b/backend/app/db/weaviate/weaviate_client.py new file mode 100644 index 00000000..98bf241e --- /dev/null +++ b/backend/app/db/weaviate/weaviate_client.py @@ -0,0 +1,8 @@ +import weaviate + +# Connect to local Weaviate instance +client = weaviate.connect_to_local() + + +def get_client(): + return client diff --git a/backend/app/model/__init__.py b/backend/app/model/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/model/supabase/__init__.py b/backend/app/model/supabase/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/model/supabase/models.py b/backend/app/model/supabase/models.py new file mode 100644 index 00000000..f87a354d --- /dev/null +++ b/backend/app/model/supabase/models.py @@ -0,0 +1,183 @@ +from pydantic import BaseModel, Field +from uuid import UUID +from typing import Optional, List +from datetime import datetime + + +class User(BaseModel): + """ + Represents a user profile with various platform integrations and metadata. + + Attributes: + id (UUID): Unique identifier for the user. + created_at (datetime): Timestamp when the user was created. + updated_at (datetime): Timestamp when the user was last updated. + discord_id (Optional[str]): Discord user ID, if linked. + discord_username (Optional[str]): Discord username, if linked. + github_id (Optional[str]): GitHub user ID, if linked. + github_username (Optional[str]): GitHub username, if linked. + slack_id (Optional[str]): Slack user ID, if linked. + slack_username (Optional[str]): Slack username, if linked. + display_name (str): Display name of the user. + email (str): Email address of the user. + avatar_url (Optional[str]): URL to the user's avatar image. + bio (Optional[str]): Short biography or description of the user. + location (Optional[str]): User's location. + is_verified (bool): Indicates if the user is verified. + verification_token (Optional[str]): Token used for verifying the user. + verified_at (Optional[datetime]): Timestamp when the user was verified. + skills (Optional[List[str]]): List of user's skills. + github_stats (Optional[dict]): GitHub statistics for the user. + last_active_discord (Optional[datetime]): Last active time on Discord. + last_active_github (Optional[datetime]): Last active time on GitHub. + last_active_slack (Optional[datetime]): Last active time on Slack. + total_interactions_count (int): Total number of user interactions. + preferred_languages (List[str]): List of user's preferred programming languages. + weaviate_user_id (Optional[str]): Associated Weaviate user ID, if any. + """ + id: UUID + created_at: datetime + updated_at: datetime + discord_id: Optional[str] = None + discord_username: Optional[str] = None + github_id: Optional[str] = None + github_username: Optional[str] = None + slack_id: Optional[str] = None + slack_username: Optional[str] = None + display_name: str + email: str + avatar_url: Optional[str] = None + bio: Optional[str] = None + location: Optional[str] = None + is_verified: bool = False + verification_token: Optional[str] = None + verified_at: Optional[datetime] = None + skills: Optional[List[str]] = None + github_stats: Optional[dict] = None + last_active_discord: Optional[datetime] = None + last_active_github: Optional[datetime] = None + last_active_slack: Optional[datetime] = None + total_interactions_count: int = 0 + preferred_languages: List[str] = Field(default_factory=list) + weaviate_user_id: Optional[str] = None + +class Repository(BaseModel): + """ + Represents a GitHub repository with metadata and indexing status. + + Attributes: + id (UUID): Unique identifier for the repository. + created_at (datetime): Timestamp when the repository record was created. + updated_at (datetime): Timestamp when the repository record was last updated. + github_id (Optional[int]): GitHub's unique identifier for the repository. + full_name (str): Full name of the repository (e.g., "owner/name"). + name (str): Name of the repository. + owner (str): Owner of the repository. + description (Optional[str]): Description of the repository. + stars_count (int): Number of stars the repository has received. + forks_count (int): Number of times the repository has been forked. + open_issues_count (int): Number of open issues in the repository. + language (Optional[str]): Primary programming language used in the repository. + topics (List[str]): List of topics/tags associated with the repository. + is_indexed (bool): Indicates if the repository has been indexed. + indexed_at (Optional[datetime]): Timestamp when the repository was indexed. + indexing_status (Optional[str]): Current status of the indexing process. + total_chunks_count (int): Total number of chunks generated during indexing. + last_commit_hash (Optional[str]): Hash of the last commit indexed. + indexing_progress (Optional[dict]): Progress details of the indexing process. + weaviate_repo_id (Optional[str]): Identifier for the repository in Weaviate. + """ + id: UUID + created_at: datetime + updated_at: datetime + github_id: Optional[int] = None + full_name: str + name: str + owner: str + description: Optional[str] = None + stars_count: int = 0 + forks_count: int = 0 + open_issues_count: int = 0 + language: Optional[str] = None + topics: List[str] = Field(default_factory=list) + is_indexed: bool = False + indexed_at: Optional[datetime] = None + indexing_status: Optional[str] = None + total_chunks_count: int = 0 + last_commit_hash: Optional[str] = None + indexing_progress: Optional[dict] = None + weaviate_repo_id: Optional[str] = None + +class CodeChunk(BaseModel): + """ + Represents a chunk of code extracted from a file within a repository. + + Attributes: + id (UUID): Unique identifier for the code chunk. + repository_id (UUID): Identifier of the repository this chunk belongs to. + created_at (datetime): Timestamp when the chunk was created. + file_path (str): Path to the file containing the code chunk. + file_name (str): Name of the file containing the code chunk. + file_extension (Optional[str]): Extension of the file (e.g., '.py', '.js'). + chunk_index (int): Index of the chunk within the file. + content (str): The actual code content of the chunk. + chunk_type (Optional[str]): Type of the chunk (e.g., 'function', 'class', 'block'). + language (Optional[str]): Programming language of the code chunk. + lines_start (Optional[int]): Starting line number of the chunk in the file. + lines_end (Optional[int]): Ending line number of the chunk in the file. + code_metadata (Optional[dict]): Additional metadata related to the code chunk. + weaviate_chunk_id (Optional[str]): Identifier for the chunk in Weaviate vector database. + """ + id: UUID + repository_id: UUID + created_at: datetime + file_path: str + file_name: str + file_extension: Optional[str] = None + chunk_index: int + content: str + chunk_type: Optional[str] = None + language: Optional[str] = None + lines_start: Optional[int] = None + lines_end: Optional[int] = None + code_metadata: Optional[dict] = None + weaviate_chunk_id: Optional[str] = None + +class Interaction(BaseModel): + """ + Represents an interaction within a repository platform, such as a message, comment, or post. + + Attributes: + id (UUID): Unique identifier for the interaction. + created_at (datetime): Timestamp when the interaction was created. + updated_at (datetime): Timestamp when the interaction was last updated. + user_id (UUID): Unique identifier of the user who performed the interaction. + repository_id (UUID): Unique identifier of the repository associated with the interaction. + platform (str): Name of the platform where the interaction occurred (e.g., GitHub, Slack). + platform_specific_id (str): Platform-specific identifier for the interaction. + channel_id (Optional[str]): Identifier for the channel where the interaction took place, if applicable. + thread_id (Optional[str]): Identifier for the thread within the channel, if applicable. + content (str): The textual content of the interaction. + interaction_type (str): Type of interaction (e.g., message, comment, issue). + sentiment_score (Optional[float]): Sentiment analysis score of the interaction content. + intent_classification (Optional[str]): Classification of the user's intent in the interaction. + topics_discussed (List[str]): List of topics discussed in the interaction. + metadata (Optional[dict]): Additional metadata related to the interaction. + weaviate_interaction_id (Optional[str]): Identifier for the interaction in the Weaviate vector database. + """ + id: UUID + created_at: datetime + updated_at: datetime + user_id: UUID + repository_id: UUID + platform: str + platform_specific_id: str + channel_id: Optional[str] = None + thread_id: Optional[str] = None + content: str + interaction_type: str + sentiment_score: Optional[float] = None + intent_classification: Optional[str] = None + topics_discussed: List[str] = Field(default_factory=list) + metadata: Optional[dict] = None + weaviate_interaction_id: Optional[str] = None diff --git a/backend/app/model/weaviate/models.py b/backend/app/model/weaviate/models.py new file mode 100644 index 00000000..976ff84d --- /dev/null +++ b/backend/app/model/weaviate/models.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel, Field +from typing import List + + +class WeaviateUserProfile(BaseModel): + """ + Represents a vectorized user profile for semantic search in Weaviate. + """ + supabase_user_id: str = Field(..., alias="supabaseUserId") + profile_summary: str = Field(..., alias="profileSummary") + primary_languages: List[str] = Field(..., alias="primaryLanguages") + expertise_areas: List[str] = Field(..., alias="expertiseAreas") + embedding: List[float] = Field(..., description="384-dimensional vector") + + +class WeaviateCodeChunk(BaseModel): + """ + Vectorized representation of code chunks stored in Weaviate. + """ + supabase_chunk_id: str = Field(..., alias="supabaseChunkId") + code_content: str = Field(..., alias="codeContent") + language: str + function_names: List[str] = Field(..., alias="functionNames") + embedding: List[float] = Field(..., description="384-dimensional vector") + + +class WeaviateInteraction(BaseModel): + """ + Vectorized interaction representation stored in Weaviate. + """ + supabase_interaction_id: str = Field(..., alias="supabaseInteractionId") + conversation_summary: str = Field(..., alias="conversationSummary") + platform: str + topics: List[str] + embedding: List[float] = Field(..., description="384-dimensional vector") diff --git a/backend/app/scripts/supabase/populate_db.sql b/backend/app/scripts/supabase/populate_db.sql new file mode 100644 index 00000000..f627795b --- /dev/null +++ b/backend/app/scripts/supabase/populate_db.sql @@ -0,0 +1,1057 @@ +-- Table: users +-- Stores user profile and authentication information, including social platform identities and activity metadata. +CREATE TABLE users ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + discord_id TEXT UNIQUE, + discord_username TEXT, + + github_id TEXT UNIQUE, + github_username TEXT, + + slack_id TEXT UNIQUE, + slack_username TEXT, + + display_name TEXT NOT NULL, + email TEXT NOT NULL, + avatar_url TEXT, + bio TEXT, + location TEXT, + + is_verified BOOLEAN NOT NULL DEFAULT false, + verification_token TEXT, + verified_at TIMESTAMPTZ, + + skills JSONB, -- Array or object of user skills + github_stats JSONB, -- GitHub statistics (e.g., contributions) + + last_active_discord TIMESTAMPTZ, + last_active_github TIMESTAMPTZ, + last_active_slack TIMESTAMPTZ, + + total_interactions_count INTEGER NOT NULL DEFAULT 0, + preferred_languages TEXT[], -- List of programming languages + + weaviate_user_id TEXT UNIQUE -- External vector DB reference +); + +-- Table: repositories +-- Stores metadata for code repositories, including indexing and statistics. +CREATE TABLE repositories ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + github_id BIGINT UNIQUE, -- Unique GitHub repository identifier + full_name TEXT NOT NULL, -- Format: owner/repo + name TEXT NOT NULL, -- Repository name + owner TEXT NOT NULL, -- Repository owner + description TEXT, + + stars_count INTEGER NOT NULL DEFAULT 0, + forks_count INTEGER NOT NULL DEFAULT 0, + open_issues_count INTEGER NOT NULL DEFAULT 0, + + language TEXT, -- Primary language + topics TEXT[], -- List of repository topics + + is_indexed BOOLEAN NOT NULL DEFAULT false, + indexed_at TIMESTAMPTZ, -- When repository was indexed + + indexing_status TEXT, -- Status: pending, processing, completed, failed + total_chunks_count INTEGER NOT NULL DEFAULT 0, + + last_commit_hash TEXT, -- Last commit hash + indexing_progress JSONB, -- Progress details + + weaviate_repo_id TEXT UNIQUE -- External vector DB reference +); + +-- Table: code_chunks +-- Stores segmented code blocks from repositories for analysis and retrieval. +CREATE TABLE code_chunks ( + id UUID PRIMARY KEY NOT NULL, + repository_id UUID NOT NULL REFERENCES repositories(id) ON DELETE CASCADE, + + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + file_path TEXT NOT NULL, -- Full path to the file + file_name TEXT NOT NULL, -- File name + file_extension TEXT, -- File extension (e.g., .py, .js) + + chunk_index INTEGER NOT NULL, -- Order of chunk in file + content TEXT, -- Code content + + chunk_type TEXT, -- Type: function, class, module, comment, import + language TEXT, -- Programming language + + lines_start INTEGER, -- Start line number + lines_end INTEGER, -- End line number + + code_metadata JSONB, -- Additional analysis data + + weaviate_chunk_id TEXT UNIQUE -- External vector DB reference +); + +-- Table: interactions +-- Stores user interactions across platforms, including messages, issues, and comments. +CREATE TABLE interactions ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + repository_id UUID REFERENCES repositories(id) ON DELETE SET NULL, + + platform TEXT NOT NULL, -- Platform: discord, github, slack + platform_specific_id TEXT NOT NULL, -- Platform-specific identifier + channel_id TEXT, -- Channel or repository reference + thread_id TEXT, -- Thread or conversation reference + + content TEXT, -- Content of the interaction + interaction_type TEXT, -- Type: message, issue, pr, comment, reaction + + sentiment_score FLOAT, -- Sentiment score (-1 to 1) + intent_classification TEXT, -- Classified intent (e.g., help_request) + + topics_discussed TEXT[], -- Topics extracted from content + metadata JSONB, -- Additional platform-specific data + + weaviate_interaction_id TEXT UNIQUE -- External vector DB reference +); + +insert into + users ( + id, + created_at, + updated_at, + discord_id, + discord_username, + github_id, + github_username, + slack_id, + slack_username, + display_name, + email, + avatar_url, + bio, + location, + is_verified, + verification_token, + verified_at, + skills, + github_stats, + last_active_discord, + last_active_github, + last_active_slack, + total_interactions_count, + preferred_languages, + weaviate_user_id + ) +values + ( + '6afc59e3-18b7-4182-b42c-8210d1152b07', + '2025-05-05 03:56:41', + '2025-01-22 14:50:25', + '3eb13b90-4668-4257-bdd6-40fb06671ad1', + 'donaldgarcia', + '16419f82-8b9d-4434-a465-e150bd9c66b3', + 'fjohnson', + '9a1de644-815e-46d1-bb8f-aa1837f8a88b', + 'hoffmanjennifer', + 'Jennifer Cole', + 'blakeerik@yahoo.com', + 'https://dummyimage.com/696x569', + 'Bill here grow gas enough analysis. Movie win her need stop peace technology.', + 'East Steven', + true, + 'a3d70628-ece6-4fa2-bd51-66e6451b4cf3', + '2025-05-14 15:04:01', + '{"skills": ["Python", "C++", "Java"]}'::jsonb, + '{"commits": 300}'::jsonb, + '2025-04-19 03:34:26', + '2025-02-12 15:28:51', + '2025-05-13 22:32:01', + 28, + array['JavaScript', 'C++'], + 'c6a7ee39-c4b0-42cc-97c5-24a55304317f' + ), + ( + '6f990423-0d57-4c64-b191-17e53f39c799', + '2025-01-11 20:41:23', + '2025-02-14 11:26:28', + '50c187fc-ce17-4b4e-8837-b8a3d261a7ab', + 'nadams', + 'e059a0ee-9132-463e-b162-87e4e9c349e0', + 'jason76', + '757750a9-a491-40b2-aa1f-ca65e27a984d', + 'josephwright', + 'Deborah Richards', + 'jeffrey28@yahoo.com', + 'https://www.lorempixel.com/186/96', + 'Civil quite others his other life edge network. Quite boy those.', + 'Kathrynside', + true, + '87c5421e-ec24-43c5-8754-108ff4188f3f', + '2025-01-01 02:39:54', + '{"skills": ["C++", "TypeScript", "Rust"]}'::jsonb, + '{"commits": 139}'::jsonb, + '2025-04-27 07:17:02', + '2025-03-04 22:40:36', + '2025-04-05 21:04:03', + 75, + array['Go', 'Python'], + '5cec4eb5-edd9-4831-9ca3-5cfb04fc6d82' + ), + ( + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + '2025-03-01 17:07:10', + '2025-02-16 11:55:43', + '913e4de2-e0c5-4cb8-bda9-c2a90ed42f1a', + 'millertodd', + '885f6e66-c2b6-42c5-ba5d-310011b7e948', + 'davidalvarez', + '8715a103-43da-4043-aa45-c2ab8cbfedb0', + 'ibrandt', + 'Melissa Marquez', + 'samuel87@gmail.com', + 'https://www.lorempixel.com/507/460', + 'Open discover detail. Remain arrive attack all. Audience draw protect Democrat car very.', + 'Stevenland', + true, + 'db20a56e-dc81-4fe7-8eda-8bbb71710434', + '2025-04-17 20:42:06', + '{"skills": ["Python", "JavaScript", "C++"]}'::jsonb, + '{"commits": 567}'::jsonb, + '2025-01-20 00:17:15', + '2025-01-10 19:45:31', + '2025-05-07 15:12:55', + 77, + array['Python', 'Rust'], + '03c72ba8-d605-4770-8a63-f881ffd0f9d5' + ), + ( + '9b56cac8-504a-4dd8-b7ba-0a5bfce7abf7', + '2025-01-19 02:38:09', + '2025-05-27 07:52:11', + '680ac07a-2a93-4d62-bc83-5dc0d9441fa5', + 'zolson', + '610461e3-2a25-4888-8f02-bad0e7067ef4', + 'gallowayjoseph', + '490617f2-747b-4dba-88fe-3ccdc8b8d9c6', + 'uhorton', + 'Kristi Higgins MD', + 'tanyariley@hotmail.com', + 'https://www.lorempixel.com/124/642', + 'Live try most arm meet surface attention attack.', + 'Stewartland', + true, + 'ff9ab5c2-9f04-4aed-b552-332702627f73', + '2025-04-20 07:44:25', + '{"skills": ["C++", "TypeScript", "Rust"]}'::jsonb, + '{"commits": 479}'::jsonb, + '2025-01-20 09:56:47', + '2025-01-15 05:25:10', + '2025-04-15 09:33:28', + 28, + array['Go', 'Rust'], + '21e8ac68-43e4-4caf-8181-a8cc369147eb' + ), + ( + '5a64824b-afcd-4586-9a25-16af29e673a3', + '2025-03-09 18:29:56', + '2025-01-14 08:35:26', + '48f4ef12-5e99-43d2-be89-6c64e117dac3', + 'samueldaniels', + 'fcbb4e59-fbdd-4f7c-9c96-e9ec4d71c366', + 'contrerasangela', + 'f05db76e-1a84-451a-a9d3-d7c7ee87905e', + 'josephpreston', + 'Brittney Campbell', + 'james48@king-odonnell.com', + 'https://placekitten.com/189/867', + 'Four capital woman. +Necessary into act away third tough. Along hard need involve among half value.', + 'East Michelle', + false, + '2d534dd0-cf8e-4c5a-8cc5-6569f9e8a369', + '2025-03-22 01:25:52', + '{"skills": ["TypeScript", "Python", "JavaScript"]}'::jsonb, + '{"commits": 764}'::jsonb, + '2025-01-05 19:30:46', + '2025-02-04 19:52:52', + '2025-05-24 00:38:21', + 54, + array['Java', 'TypeScript'], + 'ee49f329-c84a-4b28-950a-1b46ecab3301' + ), + ( + '26283e71-c735-4a11-9831-afd279af4a4f', + '2025-03-21 22:43:20', + '2025-05-11 01:59:36', + '3f87e362-cf8d-446a-bc2c-bb0ddd334cc7', + 'erik16', + '787f2425-dbcc-4477-89e9-db0adf465290', + 'jamessellers', + 'cb9bc326-d20e-4c17-8e20-fd1a598336e3', + 'jeffreykeller', + 'Donald Jones', + 'darlene68@yahoo.com', + 'https://placekitten.com/56/236', + 'While enter board its rock finish paper memory. Tonight couple and job mind southern.', + 'South Elizabeth', + true, + 'e71e43a6-bf85-4f0e-ad64-b56c610faa3f', + '2025-02-03 19:01:34', + '{"skills": ["JavaScript", "Java", "Python"]}'::jsonb, + '{"commits": 144}'::jsonb, + '2025-04-30 12:30:26', + '2025-04-21 12:13:21', + '2025-02-28 10:31:41', + 48, + array['Python', 'Java'], + '001a9a8b-d56f-4350-8c45-9ce267f48ad5' + ), + ( + '010d518f-362b-435b-a4de-148914bcbdb9', + '2025-03-01 00:09:35', + '2025-02-25 17:06:53', + '9479e1e6-c927-4d9b-ae0d-264835ce8841', + 'steven73', + 'e6b3c944-cb32-4e35-b922-bac282dc4c8e', + 'donnacampbell', + '55cee5db-9e87-404c-a208-6977a9f25336', + 'cardenaskaren', + 'Courtney Gonzalez', + 'sanchezthomas@gibson.org', + 'https://placeimg.com/398/786/any', + 'Imagine my indeed deal information toward. Watch affect thing offer local wall fear hope.', + 'West Nicholasborough', + false, + 'c2dff335-5666-4f9f-93ac-2ab974672cd9', + '2025-03-15 14:51:22', + '{"skills": ["Rust", "Java", "Python"]}'::jsonb, + '{"commits": 797}'::jsonb, + '2025-02-24 00:46:11', + '2025-05-27 00:37:11', + '2025-03-23 20:38:04', + 58, + array['Rust', 'Python'], + '78660765-14f7-4e8d-95bc-b8d04094dded' + ), + ( + 'c28e727c-f6f1-4fb1-98c4-d7230cd1c855', + '2025-01-04 18:19:57', + '2025-05-26 10:42:21', + 'f3b1025b-fff9-4585-8d55-7b618a175dfe', + 'jeffrey10', + '3f4df561-f319-4125-87f1-94f9c1156d6d', + 'maryhowell', + 'ab61a7b1-793b-4c32-a050-04943d114802', + 'elliottjeffery', + 'Pamela Jackson', + 'tamirodriguez@hickman.biz', + 'https://www.lorempixel.com/812/406', + 'Right with modern executive beyond. Fast guess few remain call. Window network recently.', + 'Christopherbury', + false, + '6a8a616f-c3b2-40d0-8edd-dfcd1e52d770', + '2025-04-28 17:38:42', + '{"skills": ["Python", "Rust", "Java"]}'::jsonb, + '{"commits": 899}'::jsonb, + '2025-04-26 18:09:08', + '2025-05-01 02:02:18', + '2025-02-13 19:46:41', + 80, + array['Rust', 'Java'], + '7354ea6f-6160-4459-85c7-504bc693da11' + ), + ( + '7771182e-ed52-4f4e-a376-0750b9854324', + '2025-03-27 23:13:45', + '2025-02-27 17:45:57', + '6dc7cac7-fd72-4050-96a9-954fdc33e1f9', + 'ashley09', + 'f295456e-1967-4f06-bd76-7e35f5c9b047', + 'colleenbaker', + '2c7f0b79-3d67-4de9-a834-e4c014c8b3b4', + 'moorericky', + 'Julie Johnson', + 'jacqueline71@hotmail.com', + 'https://dummyimage.com/478x541', + 'Name positive training step. Arrive society organization station. Keep light fight I evening.', + 'Rickymouth', + true, + '292bd156-db94-4570-9ac7-0ec0ab8ddeb4', + '2025-03-06 00:59:47', + '{"skills": ["C++", "Python", "TypeScript"]}'::jsonb, + '{"commits": 727}'::jsonb, + '2025-03-21 22:24:18', + '2025-05-15 16:45:13', + '2025-04-07 05:03:10', + 29, + array['TypeScript', 'Java'], + '668409e3-f1f8-443e-a99f-131849c8a43f' + ), + ( + 'e4ca4ab9-de13-42ea-a394-db08a247abb7', + '2025-04-17 19:38:09', + '2025-01-08 03:01:15', + '409d3602-5084-4242-968b-1625746f7891', + 'darrell68', + 'a85c6e4a-004b-4fab-bcf5-6188d32e6dcd', + 'mgutierrez', + 'c1a6423b-9f64-4eed-9c9d-927d84b871bb', + 'wgarrett', + 'Derek Anderson', + 'fgilmore@gmail.com', + 'https://dummyimage.com/59x490', + 'Add impact different success box water positive. Marriage respond meeting event.', + 'Grimesmouth', + true, + '551ac8ea-585a-4afa-bbfd-cc1289e06ab3', + '2025-04-18 04:07:17', + '{"skills": ["TypeScript", "JavaScript", "Python"]}'::jsonb, + '{"commits": 439}'::jsonb, + '2025-04-16 12:34:03', + '2025-03-15 05:55:34', + '2025-03-30 10:03:34', + 35, + array['Go', 'C++'], + '304b8590-de9e-4757-9260-001eeecf67d2' + ); + +insert into + repositories ( + id, + created_at, + updated_at, + github_id, + full_name, + name, + owner, + description, + stars_count, + forks_count, + open_issues_count, + language, + topics, + is_indexed, + indexed_at, + indexing_status, + total_chunks_count, + last_commit_hash, + indexing_progress, + weaviate_repo_id + ) +values + ( + 'f6b0bff9-074d-4062-86f5-0a853e521334', + '2025-05-16 10:34:41', + '2025-02-16 08:54:52', + 3728882, + 'jamessellers/repo_0', + 'repo_0', + 'jamessellers', + 'Him task improve fish list tree high.', + 3032, + 363, + 26, + 'C++', + array['Java', 'C++'], + true, + '2025-05-09 21:00:50', + 'processing', + 18, + 'e270dbf424cff6864cc592f6611d8df90c895ec5', + '{"progress": 93}'::jsonb, + '7ecddbaf-26f0-4fcf-bb16-e5dba6eab79e' + ), + ( + '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', + '2025-01-08 04:31:26', + '2025-01-25 12:21:00', + 3741438, + 'gallowayjoseph/repo_1', + 'repo_1', + 'gallowayjoseph', + 'Whole forward beyond suddenly between treat address.', + 3786, + 388, + 34, + 'C++', + array['C++', 'Rust'], + true, + '2025-01-28 23:48:46', + 'completed', + 2, + 'c9f97db5d2fc4b809df59bc23dd7345dbe6d14d5', + '{"progress": 29}'::jsonb, + '1327f1bc-2784-478f-b84f-16b3a79fbfaf' + ), + ( + '08946f22-0d74-4499-b40d-0f60218d5152', + '2025-04-02 03:59:05', + '2025-02-21 11:05:44', + 6292423, + 'fjohnson/repo_2', + 'repo_2', + 'fjohnson', + 'Perhaps however bag forget purpose move.', + 3286, + 274, + 8, + 'JavaScript', + array['Rust', 'C++'], + false, + '2025-03-03 11:44:52', + 'processing', + 16, + '5e3af4aafc18e025cea707fa7707a1d945e0ffef', + '{"progress": 50}'::jsonb, + 'df547e50-7cea-4045-8268-283ee32f2e63' + ), + ( + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-04-16 01:19:02', + '2025-01-07 23:18:06', + 3396987, + 'maryhowell/repo_3', + 'repo_3', + 'maryhowell', + 'Attention piece TV young section its better plant.', + 2169, + 142, + 31, + 'C++', + array['Rust', 'TypeScript'], + false, + '2025-01-20 12:23:51', + 'failed', + 19, + '22a9658e1dcda6fa5df48102f5882b204e39bc17', + '{"progress": 51}'::jsonb, + '0769165f-e746-4cb9-8ca9-cf07b1aa0f6a' + ), + ( + '7a9eab06-0656-43dd-8b8d-b17b9e5f396c', + '2025-01-09 22:20:00', + '2025-03-04 23:41:43', + 4679591, + 'jamessellers/repo_4', + 'repo_4', + 'jamessellers', + 'Fall hear certainly most.', + 1133, + 521, + 63, + 'Python', + array['TypeScript', 'Python'], + true, + '2025-04-20 21:24:57', + 'processing', + 6, + 'effd50723baca7c5da884a171f8b5bbed8320a23', + '{"progress": 87}'::jsonb, + '14374509-2cd1-486a-ab84-0c672e183554' + ), + ( + 'ea879f36-d060-4f65-bf5d-9138a542f74a', + '2025-04-29 08:26:15', + '2025-03-16 06:47:08', + 2065818, + 'donnacampbell/repo_5', + 'repo_5', + 'donnacampbell', + 'Raise marriage on discussion point least project together.', + 3152, + 390, + 76, + 'Go', + array['Rust', 'Java'], + true, + '2025-04-01 08:14:14', + 'pending', + 18, + 'a76a3a0bef5688fbee63da697c29fa6d719b37d9', + '{"progress": 96}'::jsonb, + '12e89d10-2871-4733-8bed-db12ad77e82f' + ), + ( + '07921dba-1ae8-422b-9f29-9c908080aa1b', + '2025-03-27 18:36:35', + '2025-03-09 02:27:09', + 6707197, + 'contrerasangela/repo_6', + 'repo_6', + 'contrerasangela', + 'Federal while real lead few yourself table blood.', + 913, + 300, + 55, + 'JavaScript', + array['Go', 'Python'], + false, + '2025-03-16 01:17:11', + 'processing', + 17, + '3a65c2c24c52e4b1907c677fc07132e89ec719bc', + '{"progress": 13}'::jsonb, + 'b303f438-fe21-40d0-8bbe-4aff9326dffd' + ), + ( + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + '2025-02-27 07:52:30', + '2025-01-05 06:14:45', + 9517169, + 'contrerasangela/repo_7', + 'repo_7', + 'contrerasangela', + 'Ever not rate seat any paper.', + 4988, + 203, + 19, + 'Java', + array['TypeScript', 'JavaScript'], + true, + '2025-04-07 10:55:00', + 'completed', + 16, + '4b0c490400e9caf8027725c024538d6df508bd11', + '{"progress": 2}'::jsonb, + 'cc8218da-c696-45e6-8944-051be726be23' + ), + ( + '4882ce56-489d-4abc-bc29-bf3ad5c48930', + '2025-02-14 16:25:45', + '2025-04-28 21:07:40', + 7089806, + 'jason76/repo_8', + 'repo_8', + 'jason76', + 'Despite couple economy sense should race.', + 2519, + 245, + 7, + 'JavaScript', + array['Rust', 'Python'], + true, + '2025-01-27 13:26:14', + 'failed', + 3, + '6ab21b990f1416846b362fdb26b90d80cbf249a9', + '{"progress": 97}'::jsonb, + '0a17991e-a576-4411-a0a1-1839e7457704' + ), + ( + '46d4cf41-4cd4-4043-a835-625a0bf349f2', + '2025-03-01 23:43:53', + '2025-03-27 14:23:05', + 3109911, + 'colleenbaker/repo_9', + 'repo_9', + 'colleenbaker', + 'Often run bed.', + 1051, + 675, + 60, + 'Rust', + array['JavaScript', 'Java'], + false, + '2025-02-27 07:18:51', + 'processing', + 18, + '1f93145c08645e58c60f86341a4f5e572e111863', + '{"progress": 96}'::jsonb, + '21d53971-3367-49b5-acf6-bf756a5e6920' + ); + +insert into + code_chunks ( + id, + repository_id, + created_at, + file_path, + file_name, + file_extension, + chunk_index, + content, + chunk_type, + language, + lines_start, + lines_end, + code_metadata, + weaviate_chunk_id + ) +values + ( + '095a5ff0-545a-48ff-83ad-2ea3566f5674', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-04-15 17:49:20', + '/src/file_0.py', + 'file_0.py', + '.py', + 0, + 'Maybe evening clearly trial want whose far. Sound life away senior difficult put. Whose source hand so add Mr.', + 'comment', + 'C++', + 92, + 106, + '{"length": 14}'::jsonb, + 'f23e323d-0b9b-4934-a3c8-6d301dde7969' + ), + ( + 'b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed', + 'ea879f36-d060-4f65-bf5d-9138a542f74a', + '2025-01-08 05:25:15', + '/src/file_1.py', + 'file_1.py', + '.py', + 1, + 'Break doctor Mr home he we recent. Industry score choice increase between majority impact. +Real describe know. Talk between rate name within.', + 'function', + 'Go', + 57, + 76, + '{"length": 19}'::jsonb, + '00b9d4a3-9892-40ac-a689-33a9c9e48e8c' + ), + ( + '1f787967-316c-4232-b251-64bcf8e3251b', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-02-23 20:11:39', + '/src/file_2.py', + 'file_2.py', + '.py', + 2, + 'Music sometimes body term. Address so draw food. +Appear score moment second live. Message board mean war analysis situation.', + 'module', + 'C++', + 29, + 36, + '{"length": 7}'::jsonb, + '1963c26d-6e21-4b09-9afd-4015816bcb9f' + ), + ( + '233530b2-d89f-416d-a73c-40b4ebb33c50', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + '2025-05-17 06:31:44', + '/src/file_3.py', + 'file_3.py', + '.py', + 3, + 'Result Democrat later direction fund law indeed. Fine fine effort well. +Before be it season. Speech news only no form business. Them wait institution trouble anything explain.', + 'import', + 'C++', + 76, + 88, + '{"length": 12}'::jsonb, + '8e867f3c-a487-4eab-accb-461a9d132363' + ), + ( + 'b3103899-d683-422a-9072-2ad26050d8f5', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-01-06 02:21:06', + '/src/file_4.py', + 'file_4.py', + '.py', + 4, + 'Ahead event several TV go. Thank not husband center. Begin most heavy. Game have return since nothing be apply.', + 'function', + 'C++', + 1, + 8, + '{"length": 7}'::jsonb, + '0e0630cd-996d-4c50-bc04-a168652ffb49' + ), + ( + '28ea68b7-1f26-472c-b568-319e1d41732b', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-01-02 11:49:27', + '/src/file_5.py', + 'file_5.py', + '.py', + 5, + 'War should share face build. Section compare herself region matter street south. +Technology amount affect TV television office. Identify policy face if whom commercial way.', + 'module', + 'C++', + 9, + 15, + '{"length": 6}'::jsonb, + '2c6a6e9a-3280-47a1-8187-222b257d5e52' + ), + ( + '1cb8ccc0-db27-49c5-8dff-8d535d5a37d3', + '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', + '2025-04-27 23:22:57', + '/src/file_6.py', + 'file_6.py', + '.py', + 6, + 'Concern significant management senior. Large under north play person ten physical character. +Kind field ever argue medical financial later. Hard expert popular within.', + 'module', + 'C++', + 66, + 78, + '{"length": 12}'::jsonb, + 'fb7d9f1c-57eb-49b1-965e-59dde62d2d06' + ), + ( + '9edaae8a-3d6c-47c1-8777-ff0b0002b85a', + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + '2025-05-19 16:57:06', + '/src/file_7.py', + 'file_7.py', + '.py', + 7, + 'Position always remain yard model particular hair. Hold simple quickly appear piece.', + 'import', + 'Java', + 28, + 37, + '{"length": 9}'::jsonb, + '86c1b6cb-e996-40f7-af77-520eff4625af' + ), + ( + 'd1927881-d0e7-4df3-a97a-18521db08ff4', + '46d4cf41-4cd4-4043-a835-625a0bf349f2', + '2025-01-19 03:31:20', + '/src/file_8.py', + 'file_8.py', + '.py', + 8, + 'Gun guy Congress degree way main difficult. Choice fast small medical. Strong this also from short capital heavy. +Story side speak close. Analysis hair rest wide particular sell.', + 'comment', + 'Rust', + 61, + 73, + '{"length": 12}'::jsonb, + 'ef2ddcc4-8df6-41da-9f07-c1a5dfc620ce' + ), + ( + 'fdda052a-ca4f-40b5-ae99-a711e2161d85', + '07921dba-1ae8-422b-9f29-9c908080aa1b', + '2025-01-20 22:06:10', + '/src/file_9.py', + 'file_9.py', + '.py', + 9, + 'Expect several evening town. Store begin treat stage. Us increase how hear history bank. +Five between research. Social case expert stop receive catch.', + 'function', + 'JavaScript', + 25, + 33, + '{"length": 8}'::jsonb, + '9d642932-0066-453d-ade2-99a14a90cd0c' + ); + +insert into + interactions ( + id, + created_at, + updated_at, + user_id, + repository_id, + platform, + platform_specific_id, + channel_id, + thread_id, + content, + interaction_type, + sentiment_score, + intent_classification, + topics_discussed, + metadata, + weaviate_interaction_id + ) +values + ( + '7c59fe66-53b6-44b5-8ae1-ddc29b071097', + '2025-03-10 12:14:30', + '2025-02-16 17:06:38', + '010d518f-362b-435b-a4de-148914bcbdb9', + 'ea879f36-d060-4f65-bf5d-9138a542f74a', + 'github', + 'aa143cd8-2ff3-4de4-aaa4-2c9f92170475', + 'f982f4e0-8603-456a-95ea-cbcfab1021ce', + '86abd4e7-f412-4360-9153-6e995c508720', + 'Skill medical after them analysis hit health. Ground attack drop. Billion old series card good full poor store.', + 'comment', + -0.07, + 'help_request', + array['C++', 'TypeScript'], + '{"info": "capital"}'::jsonb, + 'e3b56360-6fdc-4bad-9e36-8127cca1b45c' + ), + ( + 'f0c80815-fde1-4644-94ca-cd8915f11e46', + '2025-03-19 16:14:11', + '2025-05-25 08:03:53', + '6f990423-0d57-4c64-b191-17e53f39c799', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'github', + '62fb26d7-f4db-4a07-a506-f6707092947d', + '7f072cb9-2fd3-40c0-b945-f2fd56cb1ab0', + 'ec9f9c54-5e0a-42ab-bf5d-b163b12b6680', + 'Song risk bad own state. Family bill foreign fast knowledge response coach. Goal amount thank good your ever.', + 'pr', + 0.6, + 'help_request', + array['JavaScript', 'TypeScript'], + '{"info": "already"}'::jsonb, + 'c74cc890-3c6a-4174-9136-34a520509c62' + ), + ( + 'ef139daa-fa4c-445a-8bf7-fdd725bdb82c', + '2025-05-06 06:40:36', + '2025-03-13 03:12:51', + '9b56cac8-504a-4dd8-b7ba-0a5bfce7abf7', + '4882ce56-489d-4abc-bc29-bf3ad5c48930', + 'github', + '9136f1f8-f310-46dc-a202-bee65cb5e69c', + 'add702c9-2747-493c-9ae7-7eab084a6780', + '5f3c44dc-5ef7-47b8-b2e6-195f732e2016', + 'Off morning huge power. Whether ago control military trial. Energy employee land you.', + 'issue', + -0.16, + 'feature_request', + array['Go', 'JavaScript'], + '{"info": "security"}'::jsonb, + '2c913a7c-a340-4f08-b341-91b8ed6522b4' + ), + ( + 'd5c02f3d-6d9a-49b9-8d71-33ba08c610a2', + '2025-03-12 04:40:32', + '2025-04-09 18:58:25', + '6f990423-0d57-4c64-b191-17e53f39c799', + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + 'slack', + '3373730e-fc31-4597-9f11-9c0f3967e60a', + 'ca55e38b-3c9a-4e10-a38d-6f44cac4d0eb', + '8dd4595b-5c63-46f3-86ba-8cd37e7838c9', + 'Level work candidate this assume huge. Moment shoulder statement available win politics last. General there sister policy consider whom item.', + 'message', + -0.9, + 'help_request', + array['Python', 'JavaScript'], + '{"info": "prove"}'::jsonb, + '200b2903-4642-4c45-8d03-f17af4d375c1' + ), + ( + '5696eff8-bba4-41a4-953f-f70eece14b2d', + '2025-05-02 08:48:55', + '2025-02-28 19:15:53', + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + '07921dba-1ae8-422b-9f29-9c908080aa1b', + 'github', + '80a8a23d-17ea-4c83-8892-042f9d4b2bf9', + 'f10d27c8-9780-4215-ab2b-b8e9a417c093', + 'a8a2b7ad-2bd3-4dcd-a779-468594a53fde', + 'Wish candidate have no five letter. Last cell anything war ten. Eat tend civil force officer fine.', + 'comment', + -0.57, + 'general_discussion', + array['Python', 'JavaScript'], + '{"info": "ready"}'::jsonb, + '896490ab-4926-4e5f-b878-6140ac2a4f71' + ), + ( + '2ea1d897-a515-40cd-a92a-01eada9542d8', + '2025-01-02 14:06:34', + '2025-01-21 20:58:21', + '010d518f-362b-435b-a4de-148914bcbdb9', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'github', + '43b47ee5-e1e8-4e7e-a249-8f666e51484d', + '673ba8bd-c38c-4dec-9da3-9a73ba3df7ff', + '0d18ab95-668c-4477-8b95-017c5dae1201', + 'Foreign party class wrong. Order medical meeting majority none. Staff happy purpose woman on someone rise.', + 'pr', + 0.85, + 'general_discussion', + array['Java', 'Go'], + '{"info": "market"}'::jsonb, + 'ba370623-bc5f-44dd-992f-3c1edf70fb2a' + ), + ( + 'dc1ad7fb-edca-4c34-b07d-7b51f7a92974', + '2025-01-13 02:15:06', + '2025-05-18 01:09:03', + '7771182e-ed52-4f4e-a376-0750b9854324', + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + 'discord', + '8f964685-3514-4890-84c8-6c4623595fa4', + '7a768555-a987-4218-bf84-faef5336723b', + 'f4e95734-5052-4700-a077-96a38685abaa', + 'Treatment garden great sign return poor really. Machine whatever everything fear walk word side relate.', + 'issue', + -0.41, + 'help_request', + array['Rust', 'C++'], + '{"info": "defense"}'::jsonb, + 'eedaa802-4568-4426-89e0-3e22d3f4a49b' + ), + ( + '30634a69-7c7b-4f11-8ec5-b83299015938', + '2025-04-20 03:11:37', + '2025-05-16 04:23:36', + '7771182e-ed52-4f4e-a376-0750b9854324', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'slack', + '9d9d028e-1bf6-45f6-a324-52114588fc1b', + '3e11bafe-3d41-46fe-963a-8617bdab07e7', + '87f255d6-e7ba-46ac-ab7a-3d1c0cfef683', + 'Appear including response beyond side. Who within citizen.', + 'pr', + -0.89, + 'general_discussion', + array['Rust', 'TypeScript'], + '{"info": "cultural"}'::jsonb, + 'df4e713e-f64e-4dfc-bfbe-ac7aefc59738' + ), + ( + '87916dfb-a7ce-4315-80f3-a72be814f08c', + '2025-02-26 05:00:49', + '2025-01-08 06:36:18', + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'slack', + '9a4ffc0c-9165-42ed-8c63-6e95025f5543', + '55c551fc-fba5-4cc8-adaf-37661b780ede', + 'a42d0cd7-fd35-4f6a-b450-388748d90846', + 'According himself land environment form. Reveal activity president realize artist brother fill if. Type thousand show real police wait happen.', + 'message', + 0.7, + 'help_request', + array['Rust', 'Python'], + '{"info": "store"}'::jsonb, + '87365a84-725e-434d-8687-9aa914f573d0' + ), + ( + 'c29c38dc-10be-4da2-81b7-6f82b746a359', + '2025-05-17 15:47:16', + '2025-03-12 06:02:22', + '9b56cac8-504a-4dd8-b7ba-0a5bfce7abf7', + '07921dba-1ae8-422b-9f29-9c908080aa1b', + 'discord', + '1cb9b73a-906d-4c8c-aad0-8c8913fe8a29', + 'e723ada3-8c32-4db2-942a-895e0fcf601f', + '89628f6e-929c-43b3-b3c0-8bf18167999f', + 'Foreign minute break day. Major together knowledge argue car indeed nor next. +How staff second. Authority interest red must art thus worry line.', + 'reaction', + -0.51, + 'help_request', + array['Rust', 'Python'], + '{"info": "cell"}'::jsonb, + '1e5ebe54-5907-4299-9c3b-bdd8a74e02a9' + ); diff --git a/backend/app/scripts/weaviate/__init__.py b/backend/app/scripts/weaviate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/scripts/weaviate/create_schemas.py b/backend/app/scripts/weaviate/create_schemas.py new file mode 100644 index 00000000..1aca808f --- /dev/null +++ b/backend/app/scripts/weaviate/create_schemas.py @@ -0,0 +1,48 @@ +from app.db.weaviate.weaviate_client import get_client +import weaviate.classes.config as wc +def create_schema(client, name, properties): + client.collections.create( + name=name, + properties=properties, + vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), + generative_config=wc.Configure.Generative.openai() + ) + print(f"Created: {name}") +def create_user_profile_schema(client): + properties = [ + wc.Property(name="supabaseUserId", data_type=wc.DataType.TEXT), + wc.Property(name="profileSummary", data_type=wc.DataType.TEXT), + wc.Property(name="primaryLanguages", data_type=wc.DataType.TEXT_ARRAY), + wc.Property(name="expertiseAreas", data_type=wc.DataType.TEXT_ARRAY), + ] + create_schema(client, "weaviate_user_profile", properties) + + +def create_code_chunk_schema(client): + properties = [ + wc.Property(name="supabaseChunkId", data_type=wc.DataType.TEXT), + wc.Property(name="codeContent", data_type=wc.DataType.TEXT), + wc.Property(name="language", data_type=wc.DataType.TEXT), + wc.Property(name="functionNames", data_type=wc.DataType.TEXT_ARRAY), + ] + create_schema(client, "weaviate_code_chunk", properties) + +def create_interaction_schema(client): + properties = [ + wc.Property(name="supabaseInteractionId", data_type=wc.DataType.TEXT), + wc.Property(name="conversationSummary", data_type=wc.DataType.TEXT), + wc.Property(name="platform", data_type=wc.DataType.TEXT), + wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), + ] + create_schema(client, "weaviate_interaction", properties) + +def create_all_schemas(): + client = get_client() + existing_collections = client.collections.list_all() + if "weaviate_code_chunk" not in existing_collections: + create_code_chunk_schema(client) + if "weaviate_interaction" not in existing_collections: + create_interaction_schema(client) + if "weaviate_user_profile" not in existing_collections: + create_user_profile_schema(client) + print("✅ All schemas ensured.") diff --git a/backend/app/scripts/weaviate/populate_db.py b/backend/app/scripts/weaviate/populate_db.py new file mode 100644 index 00000000..f1051730 --- /dev/null +++ b/backend/app/scripts/weaviate/populate_db.py @@ -0,0 +1,268 @@ +from app.db.weaviate.weaviate_client import get_client + + +def populate_Weaviate_code_chunk(client): + code_chunks = [ + { + "supabaseChunkId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "codeContent": ( + "Maybe evening clearly trial want whose far. Sound life away senior difficult put. " + "Whose source hand so add Mr." + ), + "language": "C++", + "functionNames": ["comment"] + }, + { + "supabaseChunkId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "codeContent": ( + "Break doctor Mr home he we recent. Industry score choice increase between majority impact.\n" + "Real describe know. Talk between rate name within." + ), + "language": "Go", + "functionNames": ["function"] + }, + { + "supabaseChunkId": "1f787967-316c-4232-b251-64bcf8e3251b", + "codeContent": ( + "Music sometimes body term. Address so draw food.\n" + "Appear score moment second live. Message board mean war analysis situation." + ), + "language": "C++", + "functionNames": ["module"] + }, + { + "supabaseChunkId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "codeContent": ( + "Result Democrat later direction fund law indeed. Fine fine effort well.\n" + "Before be it season. Speech news only form business. Them wait institution trouble anything explain." + ), + "language": "C++", + "functionNames": ["import"] + }, + { + "supabaseChunkId": "b3103899-d683-422a-9072-2ad26050d8f5", + "codeContent": ( + "Ahead event several TV go. Thank not husband center. Begin most heavy. " + "Game have return since nothing be apply." + ), + "language": "C++", + "functionNames": ["function"] + }, + { + "supabaseChunkId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "codeContent": ( + "War should share face build. Section compare herself region matter street south.\n" + "Technology amount affect TV television office. Identify policy face if whom commercial way." + ), + "language": "C++", + "functionNames": ["module"] + }, + { + "supabaseChunkId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "codeContent": ( + "Concern significant management senior. Large under north play person ten physical character.\n" + "Kind field ever argue medical financial later. Hard expert popular within." + ), + "language": "C++", + "functionNames": ["module"] + }, + { + "supabaseChunkId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "codeContent": ( + "Position always remain yard model particular hair. Hold simple quickly appear piece." + ), + "language": "Java", + "functionNames": ["import"] + }, + { + "supabaseChunkId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "codeContent": ( + "Gun guy Congress degree way main difficult. Choice fast small medical. Strong this also from short.\n" + "Story side speak close. Analysis hair rest wide particular sell." + ), + "language": "Rust", + "functionNames": ["comment"] + }, + { + "supabaseChunkId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "codeContent": ( + "Expect several evening town. Store begin treat stage. Us increase how hear history bank.\n" + "Five between research. Social case expert stop receive catch." + ), + "language": "JavaScript", + "functionNames": ["function"] + } + ] + try: + with client.batch.dynamic() as batch: + for chunk in code_chunks: + batch.add_object( + collection="weaviate_code_chunk", + properties=chunk + ) + print("Populated: weaviate_code_chunk with sample data.") + except Exception as e: + print(f"Error populating weaviate_code_chunk: {e}") +def populate_Weaviate_interaction(client): + interactions = [ + { + "supabaseInteractionId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "conversationSummary": "User asked about C++ code chunk.", + "platform": "web", + "topics": ["C++", "Code Chunk"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "conversationSummary": "User inquired about Go function.", + "platform": "mobile", + "topics": ["Go", "Function"], + "embedding": [0.4, 0.5, 0.6] # Example embedding vector + }, + { + "supabaseInteractionId": "1f787967-316c-4232-b251-64bcf8e3251b", + "conversationSummary": "User asked for a summary of the C++ module.", + "platform": "web", + "topics": ["C++", "Module"], + "embedding": [0.7, 0.8, 0.9] # Example embedding vector + }, + { + "supabaseInteractionId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "conversationSummary": "User inquired about the import statement in C++.", + "platform": "web", + "topics": ["C++", "Import"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "b3103899-d683-422a-9072-2ad26050d8f5", + "conversationSummary": "User asked if this function in C++ is recursive.", + "platform": "web", + "topics": ["C++", "Function", "Recursion"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "conversationSummary": "User inquired about what this module handles in the codebase.", + "platform": "web", + "topics": ["C++", "Module"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "conversationSummary": "User asked about the logic in this C++ module.", + "platform": "web", + "topics": ["C++", "Module"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "conversationSummary": "User inquired about the import statement in Java.", + "platform": "web", + "topics": ["Java", "Import"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "conversationSummary": "User asked if this is a comment or code in Rust.", + "platform": "web", + "topics": ["Rust", "Comment", "Code"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + }, + { + "supabaseInteractionId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "conversationSummary": "User inquired about the output of this JavaScript function.", + "platform": "web", + "topics": ["JavaScript", "Function", "Output"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector + } + ] + try: + with client.batch.dynamic() as batch: + for interaction in interactions: + batch.add_object( + collection="weaviate_interaction", + properties=interaction + ) + print("Populated: weaviate_interaction with sample data.") + except Exception as e: + print(f"Error populating weaviate_interaction: {e}") +def populate_Weaviate_user_profile(client): + user_profiles = [ + { + "supabaseUserId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "profileSummary": "Experienced C++ developer with a focus on performance optimization.", + "primaryLanguages": ["C++", "Python"], + "expertiseAreas": ["Performance Tuning", "Concurrency"] + }, + { + "supabaseUserId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "profileSummary": "Go developer with a passion for building scalable systems.", + "primaryLanguages": ["Go", "JavaScript"], + "expertiseAreas": ["Microservices", "Cloud Computing"] + }, + { + "supabaseUserId": "1f787967-316c-4232-b251-64bcf8e3251b", + "profileSummary": "C++ module developer with experience in embedded systems.", + "primaryLanguages": ["C++", "Rust"], + "expertiseAreas": ["Embedded Systems", "Real-time Processing"] + }, + { + "supabaseUserId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "profileSummary": "C++ developer with a knack for clean imports and modular code.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["Code Organization", "Modularity"] + }, + { + "supabaseUserId": "b3103899-d683-422a-9072-2ad26050d8f5", + "profileSummary": "C++ enthusiast focusing on algorithmic challenges.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["Algorithms", "Problem Solving"] + }, + { + "supabaseUserId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "profileSummary": "C++ developer with experience in system architecture.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["System Design", "Architecture"] + }, + { + "supabaseUserId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "profileSummary": "C++ developer passionate about medical technology.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["Medical Tech", "Data Analysis"] + }, + { + "supabaseUserId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "profileSummary": "Java developer with a focus on enterprise solutions.", + "primaryLanguages": ["Java"], + "expertiseAreas": ["Enterprise Software", "APIs"] + }, + { + "supabaseUserId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "profileSummary": "Rustacean interested in safe and fast code.", + "primaryLanguages": ["Rust"], + "expertiseAreas": ["Memory Safety", "Performance"] + }, + { + "supabaseUserId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "profileSummary": "JavaScript developer with a love for UI/UX.", + "primaryLanguages": ["JavaScript"], + "expertiseAreas": ["Frontend", "User Experience"] + } + ] + try: + with client.batch.dynamic() as batch: + for profile in user_profiles: + batch.add_object( + collection="weaviate_user_profile", + properties=profile + ) + print("Populated: weaviate_user_profile with sample data.") + except Exception as e: + print(f"Error populating weaviate_user_profile: {e}") +def populate_all_collections(): + client = get_client() + print("Populating Weaviate collections with sample data...") + populate_Weaviate_code_chunk(client) + populate_Weaviate_interaction(client) + populate_Weaviate_user_profile(client) + print("✅ All collections populated with sample data.") diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml new file mode 100644 index 00000000..fa53ce8b --- /dev/null +++ b/backend/docker-compose.yml @@ -0,0 +1,28 @@ +--- +services: + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.31.0 + ports: + - 8080:8080 + - 50051:50051 + volumes: + - weaviate_data:/var/lib/weaviate + restart: on-failure:0 + environment: + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + ENABLE_API_BASED_MODULES: 'true' + ENABLE_MODULES: 'text2vec-ollama,generative-ollama' + CLUSTER_HOSTNAME: 'node1' +volumes: + weaviate_data: +... + diff --git a/backend/main.py b/backend/main.py index 9608cdfa..277bc793 100644 --- a/backend/main.py +++ b/backend/main.py @@ -7,6 +7,7 @@ from app.core.orchestration.queue_manager import AsyncQueueManager from app.core.orchestration.agent_coordinator import AgentCoordinator from bots.discord.discord_bot import DiscordBot +from app.db.weaviate.weaviate_client import get_client # Configure logging logging.basicConfig( @@ -19,6 +20,12 @@ class DevRAIApplication: """Main application class""" def __init__(self): + try: + self.weaviate_client = get_client() + logger.info(f"Weaviate client initialized: {self.weaviate_client.is_ready()}") + except Exception as e: + logger.error(f"Error initializing Weaviate client: {str(e)}") + self.weaviate_client = None self.queue_manager = AsyncQueueManager() self.agent_coordinator = AgentCoordinator(self.queue_manager) self.discord_bot = DiscordBot(self.queue_manager) @@ -52,18 +59,24 @@ async def stop(self): logger.info("Stopping Devr.AI Application...") self.running = False + # Close Weaviate client + try: + if hasattr(self, 'weaviate_client') and self.weaviate_client is not None: + self.weaviate_client.close() + logger.info("Weaviate client closed") + except Exception as e: + logger.error(f"Error closing Weaviate client: {str(e)}") - # Stop Discord bot + # Stop Discord bot try: if not self.discord_bot.is_closed(): await self.discord_bot.close() except Exception as e: logger.error(f"Error closing Discord bot: {str(e)}") - - # Stop queue manager + # Stop queue manager await self.queue_manager.stop() - logger.info("Devr.AI Application stopped") + logger.info("Devr.AI Application stopped") # Global application instance @@ -71,7 +84,6 @@ async def stop(self): async def main(): """Main entry point""" - # Setup signal handlers for graceful shutdown loop = asyncio.get_running_loop() diff --git a/tests/test_supabase.py b/tests/test_supabase.py new file mode 100644 index 00000000..2cf7f1b3 --- /dev/null +++ b/tests/test_supabase.py @@ -0,0 +1,287 @@ +from ..backend.app.model.supabase.models import User, Interaction, CodeChunk, Repository +from uuid import uuid4 +from ..backend.app.db.supabase.supabase_client import get_supabase_client +from datetime import datetime # Your User model import + +client = get_supabase_client() + +def insert_user_into_supabase(user: User): + # Convert Pydantic User model to dict to send to Supabase + user_dict = user.dict() + + # Supabase expects datetime fields as ISO 8601 strings + # Convert datetime fields to ISO strings + for key in ['created_at', 'updated_at', 'verified_at', 'last_active_discord', 'last_active_github', 'last_active_slack']: + if user_dict.get(key): + user_dict[key] = user_dict[key].isoformat() + + response = client.table("users").insert(user_dict).execute() # type: ignore + + if response.status_code != 201: + raise Exception(f"Failed to insert user: {response}") + return response.data[0] + +def test_create_and_save_user(): + user = User( + id=str(uuid4()), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + discord_id="1234567890", + discord_username="discordUser#1234", + github_id="987654321", + github_username="githubUser", + slack_id="U12345678", + slack_username="slackUser", + display_name="John Doe", + email="john.doe@example.com", + avatar_url="https://example.com/avatar.jpg", + bio="Software developer and open source enthusiast.", + location="San Francisco, CA", + is_verified=True, + verification_token="verif_token_abc123", + verified_at=datetime.utcnow(), + skills=["Python", "Go", "Docker"], + github_stats={"repos": 42, "followers": 100}, + last_active_discord=datetime.utcnow(), + last_active_github=datetime.utcnow(), + last_active_slack=datetime.utcnow(), + total_interactions_count=256, + preferred_languages=["Python", "JavaScript", "Rust"], + weaviate_user_id="weaviate-uuid-1234" + ) + + saved_user = insert_user_into_supabase(user) + print("User saved:", saved_user) + +def get_user_by_id(user_id: str): + response = client.table("users").select("*").eq("id", user_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch user: {response}") + if not response.data: + raise ValueError(f"No user found with ID: {user_id}") + return response.data[0] + +def update_user(user_id: str, updates: dict): + response = client.table("users").update(updates).eq("id", user_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update user: {response}") + return response.data[0] + +def delete_user(user_id: str): + response = client.table("users").delete().eq("id", user_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete user: {response}") + return response.data[0] + +# Test the user creation and saving functionality +def test_user(): + user = User( + id=str(uuid4()), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + discord_id="1234567890", + discord_username="discordUser#1234", + github_id="987654321", + github_username="githubUser", + slack_id="U12345678", + slack_username="slackUser", + display_name="John Doe", + email="john.doe@example.com", + avatar_url="https://example.com/avatar.jpg", + bio="Software developer and open source enthusiast.", + location="San Francisco, CA", + is_verified=True, + verification_token="verif_token_abc123", + verified_at=datetime.utcnow(), + skills=["Python", "Go", "Docker"], + github_stats={"repos": 42, "followers": 100}, + last_active_discord=datetime.utcnow(), + last_active_github=datetime.utcnow(), + last_active_slack=datetime.utcnow(), + total_interactions_count=256, + preferred_languages=["Python", "JavaScript", "Rust"], + weaviate_user_id="weaviate-uuid-1234" + ) + inserted_user = insert_user_into_supabase(user) + print(f"Inserted User: {inserted_user}") + get_user = get_user_by_id(inserted_user['id']) + print(f"Fetched User: {get_user}") + updated_user = update_user(inserted_user['id'], {"display_name": "John Updated"}) + print(f"Updated User: {updated_user}") + deleted_user = delete_user(inserted_user['id']) + print(f"Deleted User: {deleted_user}") + + +def insert_interaction(interaction: Interaction): + interaction_dict = interaction.dict() + for key in ['created_at', 'updated_at']: + if interaction_dict.get(key): + interaction_dict[key] = interaction_dict[key].isoformat() + + response = client.table("interactions").insert(interaction_dict).execute() + + if response.status_code != 201: + raise Exception(f"Failed to insert interaction: {response}") + return response.data[0] + +def read_interaction_by_id(interaction_id: str): + response = client.table("interactions").select("*").eq("id", interaction_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch interaction: {response}") + if not response.data: + raise ValueError(f"No interaction found with ID: {interaction_id}") + return response.data[0] + +def update_interaction(interaction_id: str, updates: dict): + response = client.table("interactions").update(updates).eq("id", interaction_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update interaction: {response}") + return response.data[0] +def delete_interaction(interaction_id: str): + response = client.table("interactions").delete().eq("id", interaction_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete interaction: {response}") + return response.data[0] + +def test_interaction(): + interaction = Interaction( + id=str(uuid4()), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + user_id=str(uuid4()), + repository_id=str(uuid4()), + interaction_type="comment", + content="Hello, this is a test interaction.", + metadata={"source": "test_script"}, + platform="github", + platform_specific_id="gh-interaction-5678", + weaviate_interaction_id="weaviate-interaction-1234" + ) + inserted_interaction = insert_interaction(interaction) + print(f"Inserted Interaction: {inserted_interaction}") + get_interaction = read_interaction_by_id(inserted_interaction['id']) + print(f"Fetched Interaction: {get_interaction}") + updated_interaction = update_interaction(inserted_interaction['id'], {"content": "Updated interaction content."}) + print(f"Updated Interaction: {updated_interaction}") + deleted_interaction = delete_interaction(inserted_interaction['id']) + print(f"Deleted Interaction: {deleted_interaction}") + +def insert_code_chunk(code_chunk: CodeChunk): + code_chunk_dict = code_chunk.dict() + for key in ['created_at']: + if code_chunk_dict.get(key): + code_chunk_dict[key] = code_chunk_dict[key].isoformat() + + response = client.table("code_chunks").insert(code_chunk_dict).execute() + + if response.status_code != 201: + raise Exception(f"Failed to insert code chunk: {response}") + return response.data[0] +def read_code_chunk_by_id(code_chunk_id: str): + response = client.table("code_chunks").select("*").eq("id", code_chunk_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch code chunk: {response}") + if not response.data: + raise ValueError(f"No code chunk found with ID: {code_chunk_id}") + return response.data[0] +def update_code_chunk(code_chunk_id: str, updates: dict): + response = client.table("code_chunks").update(updates).eq("id", code_chunk_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update code chunk: {response}") + return response.data[0] +def delete_code_chunk(code_chunk_id: str): + response = client.table("code_chunks").delete().eq("id", code_chunk_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete code chunk: {response}") + return response.data[0] +def test_code_chunk(): + code_chunk = CodeChunk( + id=str(uuid4()), + repository_id=str(uuid4()), + created_at=datetime.utcnow(), + file_path="/path/to/file.py", + file_name="file.py", + file_extension=".py", + chunk_index=1, + content="def hello_world():\n print('Hello, world!')", + chunk_type="function", + language="Python", + lines_start=1, + lines_end=3, + code_metadata={"complexity": "low"}, + weaviate_chunk_id="weaviate-chunk-1234" + ) + inserted_code_chunk = insert_code_chunk(code_chunk) + print(f"Inserted Code Chunk: {inserted_code_chunk}") + get_code_chunk = read_code_chunk_by_id(inserted_code_chunk['id']) + print(f"Fetched Code Chunk: {get_code_chunk}") + updated_code_chunk = update_code_chunk(inserted_code_chunk['id'], { + "content": "def hello_world():\n print('Updated content!')"}) + print(f"Updated Code Chunk: {updated_code_chunk}") + deleted_code_chunk = delete_code_chunk(inserted_code_chunk['id']) + print(f"Deleted Code Chunk: {deleted_code_chunk}") +def insert_repository(repository: Repository): + repository_dict = repository.dict() + for key in ['created_at', 'updated_at', 'indexed_at']: + if repository_dict.get(key): + repository_dict[key] = repository_dict[key].isoformat() + + response = client.table("repositories").insert(repository_dict).execute() + + if response.status_code != 201: + raise Exception(f"Failed to insert repository: {response}") + return response.data[0] +def read_repository_by_id(repository_id: str): + response = client.table("repositories").select("*").eq("id", repository_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch repository: {response}") + if not response.data: + raise ValueError(f"No repository found with ID: {repository_id}") + return response.data[0] +def update_repository(repository_id: str, updates: dict): + response = client.table("repositories").update(updates).eq("id", repository_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update repository: {response}") + return response.data[0] +def delete_repository(repository_id: str): + response = client.table("repositories").delete().eq("id", repository_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete repository: {response}") + return response.data[0] +def test_repository(): + repository = Repository( + id=str(uuid4()), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + github_id=123456789, + full_name="example/repo", + name="repo", + owner="example", + description="An example repository for testing.", + stars_count=100, + forks_count=10, + open_issues_count=5, + language="Python", + topics=["example", "test"], + is_indexed=True, + indexed_at=datetime.utcnow(), + indexing_status="completed", + total_chunks_count=50, + last_commit_hash="abc123def456", + indexing_progress={"current": 50, "total": 100}, + weaviate_repo_id="weaviate-repo-1234" + ) + inserted_repository = insert_repository(repository) + print(f"Inserted Repository: {inserted_repository}") + get_repository = read_repository_by_id(inserted_repository['id']) + print(f"Fetched Repository: {get_repository}") + updated_repository = update_repository(inserted_repository['id'], {"description": "Updated description."}) + print(f"Updated Repository: {updated_repository}") + deleted_repository = delete_repository(inserted_repository['id']) + print(f"Deleted Repository: {deleted_repository}") + +def all_tests(): + test_user() + test_interaction() + test_code_chunk() + test_repository() diff --git a/tests/test_weaviate.py b/tests/test_weaviate.py new file mode 100644 index 00000000..ff8fd863 --- /dev/null +++ b/tests/test_weaviate.py @@ -0,0 +1,252 @@ +from app.db.weaviate.weaviate_client import get_client +from datetime import datetime +from uuid import uuid4 +from app.model.weaviate.models import ( + WeaviateUserProfile, + WeaviateCodeChunk, + WeaviateInteraction +) + + +def test_weaviate_client(): + client = get_client() + assert client is not None, "Weaviate client should not be None" + try: + ready = client.is_ready() + assert ready, "Weaviate client is not ready" + except Exception as e: + raise AssertionError(f"Weaviate client connection failed: {e}") + +def insert_user_profile(): + user_profile = WeaviateUserProfile( + supabase_user_id=str(uuid4()), + profile_summary="Test user profile summary", + primary_languages=["Python", "JavaScript"], + expertise_areas=["Web Development", "Data Science"], + embedding=[0.1] * 384 # Example embedding + ) + client = get_client() + try: + client.data_object.create( + data_object=user_profile.dict(by_alias=True), + class_name="weaviate_user_profile" + ) + print("User profile inserted successfully.") + return user_profile + except Exception as e: + print(f"Error inserting user profile: {e}") + return None + + +def get_user_profile_by_id(user_id: str): + client = get_client() + try: + questions = client.collections.get("weaviate_user_profile"") + response = questions.query.bm25( + query=user_id, + properties=["supabaseUserId", "profileSummary", "primaryLanguages", "expertiseAreas"] + ) + if response and len(response) > 0: + user_profile_data = response[0] + return WeaviateUserProfile(**user_profile_data) + except Exception as e: + print(f"Error retrieving user profile: {e}") + return None + +def update_user_profile(user_id: str): + questions = get_client().collections.get("weaviate_user_profile"") + try: + user_profile = questions.query.bm25( + query=user_id, + properties=["supabaseUserId", "profileSummary", "primaryLanguages", "expertiseAreas"] + ) + if user_profile: + user_profile[0]["profileSummary"] = "Updated profile summary" + questions.update(user_profile[0]) + print("User profile updated successfully.") + return user_profile[0] + else: + print("User profile not found.") + return None + except Exception as e: + print(f"Error updating user profile: {e}") + return None + +def delete_user_profile(user_id: str): + questions = get_client().collections.get("weaviate_user_profile"") + try: + deleted = questions.data.delete_by_id(user_id) + if deleted: + print("User profile deleted successfully.") + return True + else: + print("User profile not found.") + return False + except Exception as e: + print(f"Error deleting user profile: {e}") + return False + +def test_user_profile(): + inserted_user = insert_user_profile() + assert inserted_user is not None, "User profile insertion failed" + get_user_profile_by_id(inserted_user.supabase_user_id) + update_user_profile(inserted_user.supabase_user_id) + delete_user_profile(inserted_user.supabase_user_id) + + +def insert_code_chunk(): + client = get_client() + code_chunk = WeaviateCodeChunk( + supabase_chunk_id=str(uuid4()), + code_content="def hello_world():\n print('Hello, world!')", + language="Python", + function_names=["hello_world"], + embedding=[0.1] * 384 # Example embedding + ) + try: + client.data_object.create( + data_object=code_chunk.dict(by_alias=True), + class_name="weaviate_code_chunk" + ) + print("Code chunk inserted successfully.") + return code_chunk + except Exception as e: + print(f"Error inserting code chunk: {e}") + return None +def get_code_chunk_by_id(code_chunk_id: str): + client = get_client() + try: + code_chunk = client.data_object.get( + id=code_chunk_id, + class_name="weaviate_code_chunk" + ) + if code_chunk: + return WeaviateCodeChunk(**code_chunk) + except Exception as e: + print(f"Error retrieving code chunk: {e}") + return None +def update_code_chunk(code_chunk_id: str): + client = get_client() + try: + code_chunk = client.data_object.get( + id=code_chunk_id, + class_name="weaviate_code_chunk" + ) + if code_chunk: + code_chunk["codeContent"] = "Updated code content" + client.data_object.update( + data_object=code_chunk, + class_name="weaviate_code_chunk" + ) + print("Code chunk updated successfully.") + return WeaviateCodeChunk(**code_chunk) + else: + print("Code chunk not found.") + return None + except Exception as e: + print(f"Error updating code chunk: {e}") + return None +def delete_code_chunk(code_chunk_id: str): + client = get_client() + try: + deleted = client.data_object.delete( + id=code_chunk_id, + class_name="weaviate_code_chunk" + ) + if deleted: + print("Code chunk deleted successfully.") + return True + else: + print("Code chunk not found.") + return False + except Exception as e: + print(f"Error deleting code chunk: {e}") + return False +def test_code_chunk(): + inserted_chunk = insert_code_chunk() + assert inserted_chunk is not None, "Code chunk insertion failed" + get_code_chunk_by_id(inserted_chunk.supabase_chunk_id) + update_code_chunk(inserted_chunk.supabase_chunk_id) + delete_code_chunk(inserted_chunk.supabase_chunk_id) +def insert_interaction(): + client = get_client() + interaction = WeaviateInteraction( + supabase_interaction_id=str(uuid4()), + conversation_summary="Test interaction summary", + platform="Web", + topics=["AI", "Machine Learning"], + embedding=[0.1] * 384 # Example embedding + ) + try: + client.data_object.create( + data_object=interaction.dict(by_alias=True), + class_name="weaviate_interaction" + ) + print("Interaction inserted successfully.") + return interaction + except Exception as e: + print(f"Error inserting interaction: {e}") + return None + +def get_interaction_by_id(interaction_id: str): + client = get_client() + try: + interaction = client.data_object.get( + id=interaction_id, + class_name="weaviate_interaction" + ) + if interaction: + return WeaviateInteraction(**interaction) + except Exception as e: + print(f"Error retrieving interaction: {e}") + return None +def update_interaction(interaction_id: str): + client = get_client() + try: + interaction = client.data_object.get( + id=interaction_id, + class_name="weaviate_interaction" + ) + if interaction: + interaction["conversationSummary"] = "Updated interaction summary" + client.data_object.update( + data_object=interaction, + class_name="weaviate_interaction" + ) + print("Interaction updated successfully.") + return WeaviateInteraction(**interaction) + else: + print("Interaction not found.") + return None + except Exception as e: + print(f"Error updating interaction: {e}") + return None +def delete_interaction(interaction_id: str): + client = get_client() + try: + deleted = client.data_object.delete( + id=interaction_id, + class_name="weaviate_interaction" + ) + if deleted: + print("Interaction deleted successfully.") + return True + else: + print("Interaction not found.") + return False + except Exception as e: + print(f"Error deleting interaction: {e}") + return False + +def test_interaction(): + inserted_interaction = insert_interaction() + assert inserted_interaction is not None, "Interaction insertion failed" + get_interaction_by_id(inserted_interaction.supabase_interaction_id) + update_interaction(inserted_interaction.supabase_interaction_id) + delete_interaction(inserted_interaction.supabase_interaction_id) + +def all_tests(): + test_weaviate_client() + test_user_profile() + test_code_chunk() + test_interaction()