From ae40f93937eb53e9f16041c86bccfe0ce60e1d67 Mon Sep 17 00:00:00 2001
From: Arjun Bingly <arjunbin@gmail.com>
Date: Sun, 24 Mar 2024 17:52:21 -0400
Subject: [PATCH 1/3] MyPy ignore missing imports

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index f7c2d4f..3185030 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,3 +116,6 @@ docstring-code-format = true
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
+
+[tool.mypy]
+ignore_missing_imports = true

From 546d163e13417ffc8dd30dd280bb7fa4287d59f3 Mon Sep 17 00:00:00 2001
From: Arjun Bingly <arjunbin@gmail.com>
Date: Tue, 26 Mar 2024 19:01:33 -0400
Subject: [PATCH 2/3] Top_k bug multivec retriever

---
 src/grag/components/multivec_retriever.py | 38 +++++++++++------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
index 05478df..b7c8c2f 100644
--- a/src/grag/components/multivec_retriever.py
+++ b/src/grag/components/multivec_retriever.py
@@ -44,13 +44,13 @@ class Retriever:
     """
 
     def __init__(
-        self,
-        vectordb: Optional[VectorDB] = None,
-        store_path: str = multivec_retriever_conf["store_path"],
-        id_key: str = multivec_retriever_conf["id_key"],
-        namespace: str = multivec_retriever_conf["namespace"],
-        top_k=1,
-        client_kwargs: Optional[Dict[str, Any]] = None,
+            self,
+            vectordb: Optional[VectorDB] = None,
+            store_path: str = multivec_retriever_conf["store_path"],
+            id_key: str = multivec_retriever_conf["id_key"],
+            namespace: str = multivec_retriever_conf["namespace"],
+            top_k=multivec_retriever_conf["top_k"],
+            client_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """Initialize the Retriever.
 
@@ -236,12 +236,12 @@ def get_docs_from_chunks(self, chunks: List[Document], one_to_one=False):
                 return [d for d in docs if d is not None]
 
     def ingest(
-        self,
-        dir_path: Union[str, Path],
-        glob_pattern: str = "**/*.pdf",
-        dry_run: bool = False,
-        verbose: bool = True,
-        parser_kwargs: dict = None,
+            self,
+            dir_path: Union[str, Path],
+            glob_pattern: str = "**/*.pdf",
+            dry_run: bool = False,
+            verbose: bool = True,
+            parser_kwargs: dict = None,
     ):
         """Ingests the files in directory.
 
@@ -278,12 +278,12 @@ def ingest(
                     print(f"DRY RUN: found - {filepath.relative_to(dir_path)}")
 
     async def aingest(
-        self,
-        dir_path: Union[str, Path],
-        glob_pattern: str = "**/*.pdf",
-        dry_run: bool = False,
-        verbose: bool = True,
-        parser_kwargs: dict = None,
+            self,
+            dir_path: Union[str, Path],
+            glob_pattern: str = "**/*.pdf",
+            dry_run: bool = False,
+            verbose: bool = True,
+            parser_kwargs: dict = None,
     ):
         """Asynchronously ingests the files in directory.
 

From f433944cdcb509348cdb8834d2edd80810f18a4b Mon Sep 17 00:00:00 2001
From: sanchitvj <sanchitvj1026@gmail.com>
Date: Tue, 26 Mar 2024 20:52:04 -0400
Subject: [PATCH 3/3] coverage checking

---
 projects/Basic-RAG/BasicRAG_stuff.py      |  1 +
 src/config.ini                            |  2 +-
 src/grag/components/multivec_retriever.py | 38 +++++++++++------------
 src/tests/rag/basic_rag_test.py           | 16 ++++++----
 4 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/projects/Basic-RAG/BasicRAG_stuff.py b/projects/Basic-RAG/BasicRAG_stuff.py
index da95ec6..554c305 100644
--- a/projects/Basic-RAG/BasicRAG_stuff.py
+++ b/projects/Basic-RAG/BasicRAG_stuff.py
@@ -6,6 +6,7 @@
 
 client = DeepLakeClient(collection_name="test")
 retriever = Retriever(vectordb=client)
+
 rag = BasicRAG(doc_chain="stuff", retriever=retriever)
 
 if __name__ == "__main__":
diff --git a/src/config.ini b/src/config.ini
index e23c1b5..d55e002 100644
--- a/src/config.ini
+++ b/src/config.ini
@@ -1,5 +1,5 @@
 [llm]
-model_name : Llama-2-7b-chat
+model_name : Llama-2-13b-chat
 # meta-llama/Llama-2-70b-chat-hf Mixtral-8x7B-Instruct-v0.1
 quantization : Q5_K_M
 pipeline : llama_cpp
diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
index b7c8c2f..5a396fa 100644
--- a/src/grag/components/multivec_retriever.py
+++ b/src/grag/components/multivec_retriever.py
@@ -44,13 +44,13 @@ class Retriever:
     """
 
     def __init__(
-            self,
-            vectordb: Optional[VectorDB] = None,
-            store_path: str = multivec_retriever_conf["store_path"],
-            id_key: str = multivec_retriever_conf["id_key"],
-            namespace: str = multivec_retriever_conf["namespace"],
-            top_k=multivec_retriever_conf["top_k"],
-            client_kwargs: Optional[Dict[str, Any]] = None,
+        self,
+        vectordb: Optional[VectorDB] = None,
+        store_path: str = multivec_retriever_conf["store_path"],
+        id_key: str = multivec_retriever_conf["id_key"],
+        namespace: str = multivec_retriever_conf["namespace"],
+        top_k=int(multivec_retriever_conf["top_k"]),
+        client_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """Initialize the Retriever.
 
@@ -236,12 +236,12 @@ def get_docs_from_chunks(self, chunks: List[Document], one_to_one=False):
                 return [d for d in docs if d is not None]
 
     def ingest(
-            self,
-            dir_path: Union[str, Path],
-            glob_pattern: str = "**/*.pdf",
-            dry_run: bool = False,
-            verbose: bool = True,
-            parser_kwargs: dict = None,
+        self,
+        dir_path: Union[str, Path],
+        glob_pattern: str = "**/*.pdf",
+        dry_run: bool = False,
+        verbose: bool = True,
+        parser_kwargs: dict = None,
     ):
         """Ingests the files in directory.
 
@@ -278,12 +278,12 @@ def ingest(
                     print(f"DRY RUN: found - {filepath.relative_to(dir_path)}")
 
     async def aingest(
-            self,
-            dir_path: Union[str, Path],
-            glob_pattern: str = "**/*.pdf",
-            dry_run: bool = False,
-            verbose: bool = True,
-            parser_kwargs: dict = None,
+        self,
+        dir_path: Union[str, Path],
+        glob_pattern: str = "**/*.pdf",
+        dry_run: bool = False,
+        verbose: bool = True,
+        parser_kwargs: dict = None,
     ):
         """Asynchronously ingests the files in directory.
 
diff --git a/src/tests/rag/basic_rag_test.py b/src/tests/rag/basic_rag_test.py
index 2249028..b8c2ceb 100644
--- a/src/tests/rag/basic_rag_test.py
+++ b/src/tests/rag/basic_rag_test.py
@@ -1,11 +1,16 @@
-from typing import Text, List
+from typing import List, Text
 
+from grag.components.multivec_retriever import Retriever
+from grag.components.vectordb.deeplake_client import DeepLakeClient
 from grag.rag.basic_rag import BasicRAG
 
+client = DeepLakeClient(collection_name="test")
+retriever = Retriever(vectordb=client)
+
 
 def test_rag_stuff():
-    rag = BasicRAG(doc_chain="stuff")
-    response, sources = rag("What is simulated annealing?")
+    rag = BasicRAG(doc_chain="stuff", retriever=retriever)
+    response, sources = rag("What is Flash Attention?")
     assert isinstance(response, Text)
     assert isinstance(sources, List)
     assert all(isinstance(s, str) for s in sources)
@@ -13,9 +18,8 @@ def test_rag_stuff():
 
 
 def test_rag_refine():
-    rag = BasicRAG(doc_chain="refine")
-    response, sources = rag("What is simulated annealing?")
-    # assert isinstance(response, Text)
+    rag = BasicRAG(doc_chain="refine", retriever=retriever)
+    response, sources = rag("What is Flash Attention?")
     assert isinstance(response, List)
     assert all(isinstance(s, str) for s in response)
     assert isinstance(sources, List)