Skip to content

Commit

Permalink
chore: add static anaysis
Browse files Browse the repository at this point in the history
  • Loading branch information
mamadoudicko committed Jul 2, 2023
1 parent 5721917 commit 44c6a2a
Show file tree
Hide file tree
Showing 28 changed files with 305 additions and 160 deletions.
4 changes: 0 additions & 4 deletions .flake8

This file was deleted.

8 changes: 8 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"recommendations": [
"charliermarsh.ruff",
"dbaeumer.vscode-eslint",
"ms-python.vscode-pylance",
"ms-pyright.pyright"
]
}
13 changes: 9 additions & 4 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
"source.unusedImports": true
},
"python.linting.enabled": true,
"python.linting.flake8Enabled": true,
"editor.formatOnSave": true,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": true
"source.fixAll": true,
"source.organizeImports.ruff": true
}
},
"[typescriptreact]": {
Expand All @@ -30,5 +30,10 @@
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
},
"editor.formatOnSaveMode": "file"
}
"editor.formatOnSaveMode": "file",
"ruff.args": ["--config=/backend/ruff.toml"],
"python.linting.pycodestyleCategorySeverity.W": "Error",
"python.defaultInterpreterPath": "python3",
"python.linting.flake8CategorySeverity.W": "Error",
"python.analysis.typeCheckingMode": "strict"
}
3 changes: 2 additions & 1 deletion backend/auth/api_key_handler.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from datetime import datetime

from fastapi import HTTPException
from models.settings import common_dependencies
from pydantic import DateError

from models.settings import common_dependencies


async def verify_api_key(
api_key: str,
Expand Down
1 change: 0 additions & 1 deletion backend/crawl/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import unicodedata

import requests
from langchain.document_loaders import GitLoader
from pydantic import BaseModel


Expand Down
8 changes: 8 additions & 0 deletions backend/lint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Check if "python3" command is available
if command -v python3 &>/dev/null; then
# Run with Python 3
python3 -m ruff check .
else
# Run with Python 2 (assuming it's available)
python -m ruff check .
fi
11 changes: 7 additions & 4 deletions backend/llm/brainpicking.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import LLM
from pydantic import BaseModel # For data validation and settings management
from supabase import (
Client, # For interacting with Supabase database
create_client,
)

from llm.prompt.CONDENSE_PROMPT import CONDENSE_QUESTION_PROMPT
from logger import get_logger
from models.settings import BrainSettings # Importing settings related to the 'brain'
from pydantic import BaseModel # For data validation and settings management
from repository.chat.get_chat_history import get_chat_history
from repository.chat.update_chat_history import update_chat_history
from repository.chat.update_message_by_id import update_message_by_id
from supabase import Client # For interacting with Supabase database
from supabase import create_client
from vectorstore.supabase import (
CustomSupabaseVectorStore,
) # Custom class for handling vector storage with Supabase
)

logger = get_logger(__name__)

Expand Down
5 changes: 3 additions & 2 deletions backend/models/brains.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
from typing import Any, List, Optional
from uuid import UUID

from pydantic import BaseModel

from models.settings import CommonsDep, common_dependencies
from models.users import User
from pydantic import BaseModel


class Brain(BaseModel):
Expand Down Expand Up @@ -43,7 +44,7 @@ def remaining_brain_size(self):
@classmethod
def create(cls, *args, **kwargs):
commons = common_dependencies()
return cls(commons=commons, *args, **kwargs)
return cls(commons=commons, *args, **kwargs) # noqa: B026

def get_user_brains(self, user_id):
response = (
Expand Down
51 changes: 29 additions & 22 deletions backend/models/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from fastapi import UploadFile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel

from logger import get_logger
from models.brains import Brain
from models.settings import CommonsDep, common_dependencies
from pydantic import BaseModel
from utils.file import compute_sha1_from_file

logger = get_logger(__name__)
Expand All @@ -30,14 +31,16 @@ class File(BaseModel):

def __init__(self, **kwargs):
super().__init__(**kwargs)

if self.file:
self.file_name = self.file.filename
self.file_size = self.file.file._file.tell()
self.file_size = self.file.file._file.tell()
self.file_extension = os.path.splitext(self.file.filename)[-1].lower()

async def compute_file_sha1(self):
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file:
with tempfile.NamedTemporaryFile(
delete=False, suffix=self.file.filename
) as tmp_file:
await self.file.seek(0)
self.content = await self.file.read()
tmp_file.write(self.content)
Expand All @@ -48,18 +51,20 @@ async def compute_file_sha1(self):

def compute_documents(self, loader_class):
logger.info(f"Computing documents from file {self.file_name}")

documents = []
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file.filename) as tmp_file:
with tempfile.NamedTemporaryFile(
delete=False, suffix=self.file.filename
) as tmp_file:
tmp_file.write(self.content)
tmp_file.flush()
loader = loader_class(tmp_file.name)
documents = loader.load()

print("documents", documents)

os.remove(tmp_file.name)

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
)
Expand All @@ -70,20 +75,21 @@ def compute_documents(self, loader_class):

def set_file_vectors_ids(self):
"""
Set the vectors_ids property with the ids of the vectors
Set the vectors_ids property with the ids of the vectors
that are associated with the file in the vectors table
"""

commons = common_dependencies()
commons = common_dependencies()
response = (
commons["supabase"].table("vectors")
commons["supabase"]
.table("vectors")
.select("id")
.filter("metadata->>file_sha1", "eq", self.file_sha1)
.execute()
)
self.vectors_ids = response.data
return

def file_already_exists(self):
"""
Check if file already exists in vectors table
Expand All @@ -97,15 +103,16 @@ def file_already_exists(self):
# if the file does not exist in vectors then no need to go check in brains_vectors
if len(self.vectors_ids) == 0:
return False

return True

def file_already_exists_in_brain(self, brain_id):
commons = common_dependencies()
commons = common_dependencies()
self.set_file_vectors_ids()
# Check if file exists in that brain
response = (
commons["supabase"].table("brains_vectors")
commons["supabase"]
.table("brains_vectors")
.select("brain_id, vector_id")
.filter("brain_id", "eq", brain_id)
.filter("file_sha1", "eq", self.file_sha1)
Expand All @@ -114,15 +121,15 @@ def file_already_exists_in_brain(self, brain_id):
print("response.data", response.data)
if len(response.data) == 0:
return False

return True

def file_is_empty(self):
return self.file.file._file.tell() < 1
return self.file.file._file.tell() < 1

def link_file_to_brain(self, brain: Brain):
self.set_file_vectors_ids()

for vector_id in self.vectors_ids:
brain.create_brain_vector(vector_id['id'], self.file_sha1)
brain.create_brain_vector(vector_id["id"], self.file_sha1)
print(f"Successfully linked file {self.file_sha1} to brain {brain.id}")
42 changes: 31 additions & 11 deletions backend/models/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi import Depends
from langchain.embeddings.openai import OpenAIEmbeddings
from pydantic import BaseSettings
from supabase import Client, create_client
from supabase.client import Client, create_client
from vectorstore.supabase import SupabaseVectorStore


Expand All @@ -21,9 +21,29 @@ class LLMSettings(BaseSettings):
model_n_batch: int = 8


def common_dependencies() -> dict:
settings = BrainSettings()
embeddings = OpenAIEmbeddings(openai_api_key=settings.openai_api_key)
class CommonDependencies:
def __init__(
self,
supabase: Client,
embeddings: OpenAIEmbeddings,
documents_vector_store: SupabaseVectorStore,
summaries_vector_store: SupabaseVectorStore,
):
self.supabase = supabase
self.embeddings = embeddings
self.documents_vector_store = documents_vector_store
self.summaries_vector_store = summaries_vector_store


def common_dependencies() -> CommonDependencies:
settings = BrainSettings(
# type: ignore automatically loads .env file
)

embeddings = OpenAIEmbeddings(
openai_api_key=settings.openai_api_key,
# type: ignore other parameters are optional
)
supabase_client: Client = create_client(
settings.supabase_url, settings.supabase_service_key
)
Expand All @@ -34,12 +54,12 @@ def common_dependencies() -> dict:
supabase_client, embeddings, table_name="summaries"
)

return {
"supabase": supabase_client,
"embeddings": embeddings,
"documents_vector_store": documents_vector_store,
"summaries_vector_store": summaries_vector_store,
}
return CommonDependencies(
supabase_client,
embeddings,
documents_vector_store,
summaries_vector_store,
)


CommonsDep = Annotated[dict, Depends(common_dependencies)]
CommonsDep = Annotated[CommonDependencies, Depends(common_dependencies)]
30 changes: 22 additions & 8 deletions backend/models/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,31 @@ class User(BaseModel):

# [TODO] Rename the user table and its references to 'user_usage'
def create_user(self, date):

commons = common_dependencies()
logger.info(f"New user entry in db document for user {self.email}")

return (commons['supabase'].table("users").insert(
{"user_id": self.id, "email": self.email, "date": date, "requests_count": 1}).execute())
return (
commons.supabase.table("users")
.insert(
{
"user_id": self.id,
"email": self.email,
"date": date,
"requests_count": 1,
}
)
.execute()
)

def get_user_request_stats(self):
commons = common_dependencies()
requests_stats = commons['supabase'].from_('users').select(
'*').filter("user_id", "eq", self.id).execute()
requests_stats = (
commons["supabase"]
.from_("users")
.select("*")
.filter("user_id", "eq", self.id)
.execute()
)
return requests_stats.data

def fetch_user_requests_count(self, date):
Expand All @@ -46,7 +60,7 @@ def increment_user_request_count(self, date):
commons = common_dependencies()
requests_count = self.fetch_user_requests_count(date) + 1
logger.info(f"User {self.email} request count updated to {requests_count}")
commons['supabase'].table("users").update(
{"requests_count": requests_count}).match({"user_id": self.id, "date": date}).execute()
commons["supabase"].table("users").update(
{"requests_count": requests_count}
).match({"user_id": self.id, "date": date}).execute()
self.requests_count = requests_count

Loading

0 comments on commit 44c6a2a

Please sign in to comment.