Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: create "files" package #1626

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions backend/celery_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@
from models.files import File
from models.notifications import NotificationsStatusEnum
from models.settings import get_supabase_client
from parsers.github import process_github
from repository.brain.update_brain_last_update_time import (
update_brain_last_update_time,
)
from packages.files.parsers.github import process_github
from packages.files.processors import filter_file
from repository.brain.update_brain_last_update_time import update_brain_last_update_time
from repository.notification.update_notification import update_notification_by_id
from repository.onboarding.remove_onboarding_more_than_x_days import (
remove_onboarding_more_than_x_days,
)
from utils.processors import filter_file

CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "")
CELEBRY_BROKER_QUEUE_NAME = os.getenv("CELEBRY_BROKER_QUEUE_NAME", "quivr")
Expand Down
2 changes: 1 addition & 1 deletion backend/models/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from models.brains import Brain
from models.databases.supabase.supabase import SupabaseDB
from models.settings import get_supabase_db
from packages.files.file import compute_sha1_from_file
from pydantic import BaseModel
from utils.file import compute_sha1_from_file

logger = get_logger(__name__)

Expand Down
File renamed without changes.
File renamed without changes.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
import openai
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

from models import File, get_documents_vector_store
from utils.file import compute_sha1_from_content
from packages.files.file import compute_sha1_from_content


async def process_audio(
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from models import Brain, File
from utils.file import compute_sha1_from_content
from packages.files.file import compute_sha1_from_content
from utils.vectors import Neurons


Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from models.brains import Brain
from models.files import File
from parsers.audio import process_audio
from parsers.code_python import process_python
from parsers.csv import process_csv
from parsers.docx import process_docx
from parsers.epub import process_epub
from parsers.html import process_html
from parsers.markdown import process_markdown
from parsers.notebook import process_ipnyb
from parsers.odt import process_odt
from parsers.pdf import process_pdf
from parsers.powerpoint import process_powerpoint
from parsers.telegram import process_telegram
from parsers.txt import process_txt
from parsers.xlsx import process_xlsx
from repository.brain.get_brain_by_id import get_brain_by_id

from .parsers.audio import process_audio
from .parsers.code_python import process_python
from .parsers.csv import process_csv
from .parsers.docx import process_docx
from .parsers.epub import process_epub
from .parsers.html import process_html
from .parsers.markdown import process_markdown
from .parsers.notebook import process_ipnyb
from .parsers.odt import process_odt
from .parsers.pdf import process_pdf
from .parsers.powerpoint import process_powerpoint
from .parsers.telegram import process_telegram
from .parsers.txt import process_txt
from .parsers.xlsx import process_xlsx

file_processors = {
".txt": process_txt,
".csv": process_csv,
Expand Down Expand Up @@ -46,8 +46,9 @@ def create_response(message, type):
return {"message": message, "type": type}


# TODO: Move filter_file to a file service to avoid circular imports from models/files.py for File class
async def filter_file(
file: File,
file,
enable_summarization: bool,
brain_id,
openai_api_key,
Expand Down
2 changes: 1 addition & 1 deletion backend/routes/crawl_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from models.databases.supabase.knowledge import CreateKnowledgeProperties
from models.databases.supabase.notifications import CreateNotificationProperties
from models.notifications import NotificationsStatusEnum
from packages.files.file import convert_bytes
from repository.knowledge.add_knowledge import add_knowledge
from repository.notification.add_notification import add_notification
from utils.file import convert_bytes

logger = get_logger(__name__)
crawl_router = APIRouter()
Expand Down
2 changes: 1 addition & 1 deletion backend/routes/upload_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from models.databases.supabase.knowledge import CreateKnowledgeProperties
from models.databases.supabase.notifications import CreateNotificationProperties
from models.notifications import NotificationsStatusEnum
from packages.files.file import convert_bytes, get_file_size
from repository.brain import get_brain_details
from repository.files.upload_file import upload_file_storage
from repository.knowledge.add_knowledge import add_knowledge
Expand All @@ -19,7 +20,6 @@
RoleEnum,
validate_brain_authorization,
)
from utils.file import convert_bytes, get_file_size

logger = get_logger(__name__)
upload_router = APIRouter()
Expand Down
Loading