Skip to content

Commit

Permalink
Feature/load obsidian (#14)
Browse files Browse the repository at this point in the history
* feat: tts using ElevenLabs (not streaming) (#10)

* feat: load all files in SOURCE_DOCUMENTS (#11)

* feat: update obisidian loader

* feat: update README
# Conflicts:
#	apps/api/README.md
#	apps/api/ingest.py
  • Loading branch information
siisee11 committed Jun 7, 2023
1 parent 37dec6c commit 0c64626
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
3 changes: 2 additions & 1 deletion apps/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ poetry install
```

### Load Documents into memory
First, locate your .pdf, .csv, .txt files in SOURCE_DOCUMENTS
First, locate your .pdf, .csv, .txt, .obs files in SOURCE_DOCUMENTS
(Write obisidian root path into .obs file to load obsidian)

Second, run ingest script.
```
Expand Down
21 changes: 17 additions & 4 deletions apps/api/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,41 @@
from typing import List

from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader
from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader, ObsidianLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS

from constant import SOURCE_DIRECTORY

def load_single_document(file_path: str) -> Document:
def load_single_document(file_path: str) -> List[Document]:
# Loads a single document from a file path
if file_path.endswith(".txt"):
loader = TextLoader(file_path, encoding="utf8")
elif file_path.endswith(".pdf"):
loader = PyPDFLoader(file_path)
elif file_path.endswith(".csv"):
loader = CSVLoader(file_path)
return loader.load()[0]
elif file_path.endswith(".obs"):
f = open(file_path, 'r')
obsidian_path = f.readline()
print(obsidian_path)
loader = ObsidianLoader(obsidian_path.strip())
docs = loader.load()
print(f"Loaded {len(docs)} documents from {file_path}")
return docs


def load_documents(source_dir: str) -> List[Document]:
# Loads all documents from source documents directory
all_files = os.listdir(source_dir)
return [load_single_document(f"{source_dir}/{file_path}") for file_path in all_files if file_path[-4:] in ['.txt', '.pdf', '.csv'] ]
docs = []
for file_path in all_files :
if file_path[-4:] in ['.txt', '.pdf', '.csv', '.obs']:
absolute_path = (f"{source_dir}/{file_path}")
docs += load_single_document(absolute_path)

return docs


def ingest_docs():
Expand Down

0 comments on commit 0c64626

Please sign in to comment.