Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Filecatcher #1

Open
wants to merge 1 commit into
base: rag-chainlit
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions app/chainlit_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import sys, os, yaml, torch
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))

from src.components import RagPipeline, load_config, AudioTranscriber
from src.components import RagPipeline, load_config
from loguru import logger
from io import BytesIO

APP_DIR = Path(__file__).parent.absolute() # Path.cwd().parent.absolute()
UPLOAD_DIR = APP_DIR / "upload_dir"
Expand Down
3 changes: 2 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import asyncio
from loguru import logger
from pathlib import Path
from src.components import RagPipeline, load_config, evaluate, Indexer
from src.components import RagPipeline, load_config, evaluate
from filecatcher.components import Indexer

# config_path = Path(__file__) / '.hydra_config'
config = load_config()
Expand Down
4 changes: 2 additions & 2 deletions manage_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ async def main():
print(config)

if args.folder:
from src.components import Indexer
from filecatcher.components import Indexer

collection = config.vectordb["collection_name"]
logger.warning(f"Data will be upserted to the collection {collection}")

indexer = Indexer(config, logger)
indexer = Indexer(config = config, logger = logger)

start = time.time()
await indexer.add_files2vdb(path=args.folder)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ langchain-community = "^0.3.7"
langchain-openai = "^0.2.8"
ragatouille = "^0.0.8.post4"
whisperx = {git = "https://github.com/federicotorrielli/BetterWhisperX", branch="main"}
filecatcher = {git = "https://github.com/OpenLLM-France/fileCatcher.git"}
pyannote-audio = "3.1.1"
ctranslate2 = "4.4.0"
qdrant-client = "^1.12.1"
Expand Down
299 changes: 0 additions & 299 deletions src/components/chunker.py

This file was deleted.

14 changes: 2 additions & 12 deletions src/components/config.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
import os
from dotenv import load_dotenv, find_dotenv
from omegaconf import OmegaConf
from pathlib import Path
from hydra import initialize, compose

def load_config(config_path="../../.hydra_config", overrides=None)-> OmegaConf:
load_dotenv()
print(overrides)

def load_config(config_path="../../.hydra_config", overrides=None)-> OmegaConf:
with initialize(config_path=config_path, job_name="config_loader"):
config = compose(config_name="config", overrides=overrides)
config.paths.root_dir = Path(config.paths.root_dir).absolute()
return config

# # Example usage
# if __name__ == "__main__":
# config = load_config()
# print(config)
return config
Loading