From 0c64626e7e38e84b18c8af03479e901f70d70bb3 Mon Sep 17 00:00:00 2001
From: Jaeyoun Nam <siisee111@gmail.com>
Date: Sun, 4 Jun 2023 01:32:54 +0900
Subject: [PATCH] Feature/load obsidian (#14)

* feat: tts using ElevenLabs (not streaming) (#10)

* feat: load all files in SOURCE_DOCUMENTS (#11)

* feat: update obisidian loader

* feat: update README
# Conflicts:
#	apps/api/README.md
#	apps/api/ingest.py
---
 apps/api/README.md |  3 ++-
 apps/api/ingest.py | 21 +++++++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/apps/api/README.md b/apps/api/README.md
index 322d886..1035c41 100644
--- a/apps/api/README.md
+++ b/apps/api/README.md
@@ -18,7 +18,8 @@ poetry install
 ``` 
 
 ### Load Documents into memory
-First, locate your .pdf, .csv, .txt files in SOURCE_DOCUMENTS
+First, locate your .pdf, .csv, .txt, .obs files in SOURCE_DOCUMENTS
+(Write obisidian root path into .obs file to load obsidian)
 
 Second, run ingest script. 
 ```
diff --git a/apps/api/ingest.py b/apps/api/ingest.py
index bf200bc..5e00550 100644
--- a/apps/api/ingest.py
+++ b/apps/api/ingest.py
@@ -4,14 +4,14 @@
 from typing import List
 
 from langchain.docstore.document import Document
-from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader
+from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader, ObsidianLoader
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.vectorstores.faiss import FAISS
 
 from constant import SOURCE_DIRECTORY
 
-def load_single_document(file_path: str) -> Document:
+def load_single_document(file_path: str) -> List[Document]:
     # Loads a single document from a file path
     if file_path.endswith(".txt"):
         loader = TextLoader(file_path, encoding="utf8")
@@ -19,13 +19,26 @@ def load_single_document(file_path: str) -> Document:
         loader = PyPDFLoader(file_path)
     elif file_path.endswith(".csv"):
         loader = CSVLoader(file_path)
-    return loader.load()[0]
+    elif file_path.endswith(".obs"):
+        f = open(file_path, 'r')
+        obsidian_path = f.readline()
+        print(obsidian_path)
+        loader = ObsidianLoader(obsidian_path.strip())
+    docs = loader.load()
+    print(f"Loaded {len(docs)} documents from {file_path}")
+    return docs
 
 
 def load_documents(source_dir: str) -> List[Document]:
     # Loads all documents from source documents directory
     all_files = os.listdir(source_dir)
-    return [load_single_document(f"{source_dir}/{file_path}") for file_path in all_files if file_path[-4:] in ['.txt', '.pdf', '.csv'] ]
+    docs = []
+    for file_path in all_files  :
+        if file_path[-4:] in ['.txt', '.pdf', '.csv', '.obs']:
+            absolute_path = (f"{source_dir}/{file_path}") 
+            docs += load_single_document(absolute_path)
+
+    return docs
 
 
 def ingest_docs():