diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..850ef84
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,27 @@
+name: documentation
+
+on: [ push, pull_request, workflow_dispatch ]
+
+permissions:
+ contents: write
+
+jobs:
+ docs:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v3
+ - name: Install dependencies
+ run: |
+ pip install sphinx sphinx_rtd_theme myst_parser
+ - name: Sphinx build
+ run: |
+ sphinx-build doc _build
+ - name: Deploy to GitHub Pages
+ uses: peaceiris/actions-gh-pages@v3
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
+ with:
+ publish_branch: gh-pages
+ # github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: _build/
+ force_orphan: true
\ No newline at end of file
diff --git a/src/docs/_build/doctrees/environment.pickle b/src/docs/_build/doctrees/environment.pickle
index 975ad23..4a888b4 100644
Binary files a/src/docs/_build/doctrees/environment.pickle and b/src/docs/_build/doctrees/environment.pickle differ
diff --git a/src/docs/_build/doctrees/get_started.doctree b/src/docs/_build/doctrees/get_started.doctree
index ad0888a..97c993a 100644
Binary files a/src/docs/_build/doctrees/get_started.doctree and b/src/docs/_build/doctrees/get_started.doctree differ
diff --git a/src/docs/_build/doctrees/get_started.introduction.doctree b/src/docs/_build/doctrees/get_started.introduction.doctree
index f980c11..0597075 100644
Binary files a/src/docs/_build/doctrees/get_started.introduction.doctree and b/src/docs/_build/doctrees/get_started.introduction.doctree differ
diff --git a/src/docs/_build/doctrees/get_started.llms.doctree b/src/docs/_build/doctrees/get_started.llms.doctree
index 887c658..9a20c82 100644
Binary files a/src/docs/_build/doctrees/get_started.llms.doctree and b/src/docs/_build/doctrees/get_started.llms.doctree differ
diff --git a/src/docs/_build/doctrees/get_started.parse_pdf.doctree b/src/docs/_build/doctrees/get_started.parse_pdf.doctree
new file mode 100644
index 0000000..4f8da94
Binary files /dev/null and b/src/docs/_build/doctrees/get_started.parse_pdf.doctree differ
diff --git a/src/docs/_build/doctrees/get_started.vectordb.doctree b/src/docs/_build/doctrees/get_started.vectordb.doctree
index 7ab4197..7bb2883 100644
Binary files a/src/docs/_build/doctrees/get_started.vectordb.doctree and b/src/docs/_build/doctrees/get_started.vectordb.doctree differ
diff --git a/src/docs/_build/doctrees/grag.components.doctree b/src/docs/_build/doctrees/grag.components.doctree
index 96d86fd..2bccbfc 100644
Binary files a/src/docs/_build/doctrees/grag.components.doctree and b/src/docs/_build/doctrees/grag.components.doctree differ
diff --git a/src/docs/_build/doctrees/grag.components.vectordb.doctree b/src/docs/_build/doctrees/grag.components.vectordb.doctree
index 95d2186..f0de757 100644
Binary files a/src/docs/_build/doctrees/grag.components.vectordb.doctree and b/src/docs/_build/doctrees/grag.components.vectordb.doctree differ
diff --git a/src/docs/_build/doctrees/grag.rag.doctree b/src/docs/_build/doctrees/grag.rag.doctree
index 71390fd..bb7cf58 100644
Binary files a/src/docs/_build/doctrees/grag.rag.doctree and b/src/docs/_build/doctrees/grag.rag.doctree differ
diff --git a/src/docs/_build/html/.buildinfo b/src/docs/_build/html/.buildinfo
index ff14325..071ad00 100644
--- a/src/docs/_build/html/.buildinfo
+++ b/src/docs/_build/html/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 33176d1a0fbc2e489b6d5201070d328e
+config: 1ced34aae86d195057701cf655c56180
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/src/docs/_build/html/_sources/get_started.introduction.rst.txt b/src/docs/_build/html/_sources/get_started.introduction.rst.txt
index c72307f..d3c4197 100644
--- a/src/docs/_build/html/_sources/get_started.introduction.rst.txt
+++ b/src/docs/_build/html/_sources/get_started.introduction.rst.txt
@@ -3,9 +3,22 @@ GRAG Overview
GRAG provides an implementation of Retrieval-Augmented Generation that is completely open-sourced.
Since it does not use any external services or APIs, this enables a cost-saving solution as well a solution to data privacy concerns.
-For more information, refer to :ref:`Test `.
+For more information, refer to `our readme `_.
-Retrieval-Augmented Generation
-##############################
+Retrieval-Augmented Generation (RAG)
+####################################
-Re
\ No newline at end of file
+Retrieval-Augmented Generation (RAG) is a technique in machine learning that helps to enhance large-language models (LLM) by incorporating external data.
+
+In RAG, a model first retrieves relevant documents or data from a large corpus and then uses this information to guide the generation of new text. This approach allows the model to produce more informed, accurate, and contextually appropriate responses.
+
+By leveraging both the retrieval of existing knowledge and the generative capabilities of neural networks, RAG models can improve over traditional generation methods, particularly in tasks requiring deep domain-specific knowledge or factual accuracy.
+
+.. figure:: ../../_static/basic_RAG_pipeline.png
+ :width: 800
+ :alt: Basic-RAG Pipeline
+ :align: center
+
+ Illustration of a basic RAG pipeline
+
+Traditionally, it uses a vector database/vector store for both retrieval and generation processes.
diff --git a/src/docs/_build/html/_sources/get_started.llms.rst.txt b/src/docs/_build/html/_sources/get_started.llms.rst.txt
index c284755..b79074e 100644
--- a/src/docs/_build/html/_sources/get_started.llms.rst.txt
+++ b/src/docs/_build/html/_sources/get_started.llms.rst.txt
@@ -1,4 +1,4 @@
- `LLMs
+LLMs
=====
GRAG offers two ways to run LLMs locally:
@@ -17,10 +17,10 @@ provide an auth token*
To run LLMs using LlamaCPP
#############################
LlamaCPP requires models in the form of `.gguf` file. You can either download these model files online,
-or
+or **quantize** the model yourself following the instructions below.
-How to quantize models.
-************************
+How to quantize models
+***********************
To quantize the model, run:
``python -m grag.quantize.quantize``
@@ -34,4 +34,4 @@ After running the above command, user will be prompted with the following:
* If the user has the model downloaded locally, then user will be instructed to copy the model and input the name of the model directory.
-3.Finally, the user will be prompted to enter **quantization** settings (recommended Q5_K_M or Q4_K_M, etc.). For more details, check `llama.cpp/examples/quantize/quantize.cpp `_.
+3. Finally, the user will be prompted to enter **quantization** settings (recommended Q5_K_M or Q4_K_M, etc.). For more details, check `llama.cpp/examples/quantize/quantize.cpp `_.
diff --git a/src/docs/_build/html/_sources/get_started.parse_pdf.rst.txt b/src/docs/_build/html/_sources/get_started.parse_pdf.rst.txt
new file mode 100644
index 0000000..4ace62b
--- /dev/null
+++ b/src/docs/_build/html/_sources/get_started.parse_pdf.rst.txt
@@ -0,0 +1,61 @@
+Parse PDF
+=========
+
+The parsing and partitioning were primarily done using the unstructured.io library, which is designed for this purpose. However, for PDFs with complex layouts, such as nested tables or tax forms, the pdfplumber and pytesseract libraries were employed to improve the parsing accuracy.
+
+The class has several attributes that control the behavior of the parsing and partitioning process.
+
+Attributes
+##########
+
+- single_text_out (bool): If True, all text elements are combined into a single output document. The default value is True.
+
+- strategy (str): The strategy for PDF partitioning. The default is "hi_res" for better accuracy
+
+- extract_image_block_types (list): A list of elements to be extracted as image blocks. By default, it includes "Image" and "Table".The default value is True.
+
+- infer_table_structure (bool): Whether to extract tables during partitioning. The default value is True.
+
+- extract_images (bool): Whether to extract images. The default value is True.
+
+- image_output_dir (str): The directory to save extracted images, if any.
+
+- add_captions_to_text (bool): Whether to include figure captions in the text output. The default value is True.
+
+- add_captions_to_blocks (bool): Whether to add captions to table and image blocks. The default value is True.
+
+- add_caption_first (bool): Whether to place captions before their corresponding image or table in the output. The default value is True.
+
+- table_as_html (bool): Whether to represent tables as HTML.
+
+Parsing Complex PDF Layouts
+###########################
+
+While unstructured.io performed well in parsing PDFs with straightforward layouts, PDFs with complex layouts, such as nested tables or tax forms, were not parsed accurately. To address this issue, the pdfplumber and pytesseract libraries were employed.
+
+Table Parsing Methodology
+=========================
+
+For each page in the PDF file, the find_tables method is called with specific table settings to find the tables on that page. The table settings used are:
+
+- ``"vertical_strategy": "text"``: This setting tells the function to detect tables based on the text content.
+
+- ``"horizontal_strategy": "lines"``: This setting tells the function to detect tables based on the horizontal lines.
+
+- ``"min_words_vertical": 3``: This setting specifies the minimum number of words required to consider a row as part of a table.
+
+**For each table found on the page, the following steps are performed:**
+
+1. The table area is cropped from the page using the crop method and the bbox (bounding box) of the table.
+
+2. The text content of the cropped table area is extracted using the `extract_text` method with `layout=True`.
+
+3. A dictionary is created with the `table_number` and `extracted_text` of the table, and it is appended to the `extracted_tables_in_page` list.
+After processing all the tables on the page, a dictionary is created with the `page_number` and the list of `extracted_tables_in_page`, and it is appended to the `extracted_tables` list.
+Finally, the extracted_tables list is returned, which contains all the extracted tables from the PDF file, organized by page and table number.
+
+Limitations
+===========
+
+While the table parsing methodology using `pdfplumber` could process most tables, it could not parse every table layout accurately. The table settings need to be adjusted for different types of table layouts. Additionally, pdfplumber could not extract figure captions, whereas `unstructured.io` could.
+Future work may involve developing a more robust and flexible table parsing algorithm that can handle a wider range of table layouts and integrate seamlessly with the ParsePDF class to leverage the strengths of both unstructured.io and pdfplumber libraries.
diff --git a/src/docs/_build/html/_sources/get_started.rst.txt b/src/docs/_build/html/_sources/get_started.rst.txt
index ca80073..19a2d99 100644
--- a/src/docs/_build/html/_sources/get_started.rst.txt
+++ b/src/docs/_build/html/_sources/get_started.rst.txt
@@ -5,6 +5,7 @@ Get Started
get_started.introduction
get_started.installation
+ get_started.parse_pdf
get_started.llms
get_started.vectordb
diff --git a/src/docs/_build/html/_sources/get_started.vectordb.rst.txt b/src/docs/_build/html/_sources/get_started.vectordb.rst.txt
index f6f749a..02f83c9 100644
--- a/src/docs/_build/html/_sources/get_started.vectordb.rst.txt
+++ b/src/docs/_build/html/_sources/get_started.vectordb.rst.txt
@@ -1,5 +1,3 @@
-.. _Vector Stores:
-
Vector Stores
===============
@@ -28,7 +26,14 @@ Since Chroma is a server-client based vector database, make sure to run the serv
* If Chroma is not run locally, change ``host`` and ``port`` under ``chroma`` in `src/config.ini`, or provide the arguments
explicitly.
-For non-supported vectorstores, (...)
+Once you have chroma running, just use the Chroma Client class.
+
+DeepLake
+*********
+Since DeepLake is not a server based vector store, it is much easier to get started.
+
+Just make sure you have DeepLake installed and use the DeepLake Client class.
+
Embeddings
###########
@@ -52,4 +57,3 @@ For more details on data ingestion, refer to our `cookbook Get Started
@@ -91,13 +93,20 @@ Get Started
+
+Retrieval-Augmented Generation (RAG)
+Retrieval-Augmented Generation (RAG) is a technique in machine learning that helps to enhance large-language models (LLM) by incorporating external data.
+In RAG, a model first retrieves relevant documents or data from a large corpus and then uses this information to guide the generation of new text. This approach allows the model to produce more informed, accurate, and contextually appropriate responses.
+By leveraging both the retrieval of existing knowledge and the generative capabilities of neural networks, RAG models can improve over traditional generation methods, particularly in tasks requiring deep domain-specific knowledge or factual accuracy.
+
+
+
+Illustration of a basic RAG pipeline
+
+
+Traditionally, it uses a vector database/vector store for both retrieval and generation processes.
diff --git a/src/docs/_build/html/get_started.llms.html b/src/docs/_build/html/get_started.llms.html
index 0fb9a66..1d94f64 100644
--- a/src/docs/_build/html/get_started.llms.html
+++ b/src/docs/_build/html/get_started.llms.html
@@ -4,7 +4,7 @@
- To run LLMs using HuggingFace — GRAG 0.0.1 documentation
+ LLMs — GRAG 0.0.1 documentation
@@ -26,7 +26,7 @@
-
+
@@ -53,9 +53,15 @@
Get Started
GRAG Overview
Installation
-To run LLMs using HuggingFace
-To run LLMs using LlamaCPP
-How to quantize models.
+Parse PDF
+Table Parsing Methodology
+Limitations
+LLMs
Vector Stores
@@ -80,7 +86,7 @@
Get Started
- To run LLMs using HuggingFace
+ LLMs
Edit on GitHub
@@ -90,17 +96,15 @@
-
-
-
+
+LLMs
GRAG offers two ways to run LLMs locally:
LlamaCPP
HuggingFace
-To run LLMs using HuggingFace
+To run LLMs using HuggingFace
This is the easiest way to get started, but does not offer as much
flexibility.
If using a config file (config.ini ), just change the model_name to
@@ -108,11 +112,11 @@
To run LLMs using HuggingFace
@@ -134,7 +141,7 @@
How to quantize models.
diff --git a/src/docs/_build/html/get_started.parse_pdf.html b/src/docs/_build/html/get_started.parse_pdf.html
new file mode 100644
index 0000000..dfda89a
--- /dev/null
+++ b/src/docs/_build/html/get_started.parse_pdf.html
@@ -0,0 +1,174 @@
+
+
+
+
+
+
+ Parse PDF — GRAG 0.0.1 documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ GRAG
+
+
+
+
+
+
+
+
+
+Parse PDF
+The parsing and partitioning were primarily done using the unstructured.io library, which is designed for this purpose. However, for PDFs with complex layouts, such as nested tables or tax forms, the pdfplumber and pytesseract libraries were employed to improve the parsing accuracy.
+The class has several attributes that control the behavior of the parsing and partitioning process.
+
+Attributes
+
+single_text_out (bool): If True, all text elements are combined into a single output document. The default value is True.
+strategy (str): The strategy for PDF partitioning. The default is “hi_res” for better accuracy
+extract_image_block_types (list): A list of elements to be extracted as image blocks. By default, it includes “Image” and “Table”.The default value is True.
+infer_table_structure (bool): Whether to extract tables during partitioning. The default value is True.
+extract_images (bool): Whether to extract images. The default value is True.
+image_output_dir (str): The directory to save extracted images, if any.
+add_captions_to_text (bool): Whether to include figure captions in the text output. The default value is True.
+add_captions_to_blocks (bool): Whether to add captions to table and image blocks. The default value is True.
+add_caption_first (bool): Whether to place captions before their corresponding image or table in the output. The default value is True.
+table_as_html (bool): Whether to represent tables as HTML.
+
+
+
+Parsing Complex PDF Layouts
+While unstructured.io performed well in parsing PDFs with straightforward layouts, PDFs with complex layouts, such as nested tables or tax forms, were not parsed accurately. To address this issue, the pdfplumber and pytesseract libraries were employed.
+
+
+
+Table Parsing Methodology
+For each page in the PDF file, the find_tables method is called with specific table settings to find the tables on that page. The table settings used are:
+
+"vertical_strategy": "text"
: This setting tells the function to detect tables based on the text content.
+"horizontal_strategy": "lines"
: This setting tells the function to detect tables based on the horizontal lines.
+"min_words_vertical": 3
: This setting specifies the minimum number of words required to consider a row as part of a table.
+
+For each table found on the page, the following steps are performed:
+
+The table area is cropped from the page using the crop method and the bbox (bounding box) of the table.
+The text content of the cropped table area is extracted using the extract_text method with layout=True .
+
+3. A dictionary is created with the table_number and extracted_text of the table, and it is appended to the extracted_tables_in_page list.
+After processing all the tables on the page, a dictionary is created with the page_number and the list of extracted_tables_in_page , and it is appended to the extracted_tables list.
+Finally, the extracted_tables list is returned, which contains all the extracted tables from the PDF file, organized by page and table number.
+
+
+Limitations
+While the table parsing methodology using pdfplumber could process most tables, it could not parse every table layout accurately. The table settings need to be adjusted for different types of table layouts. Additionally, pdfplumber could not extract figure captions, whereas unstructured.io could.
+Future work may involve developing a more robust and flexible table parsing algorithm that can handle a wider range of table layouts and integrate seamlessly with the ParsePDF class to leverage the strengths of both unstructured.io and pdfplumber libraries.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/docs/_build/html/get_started.vectordb.html b/src/docs/_build/html/get_started.vectordb.html
index 8c37e60..a73a2dc 100644
--- a/src/docs/_build/html/get_started.vectordb.html
+++ b/src/docs/_build/html/get_started.vectordb.html
@@ -26,7 +26,7 @@
-
+
@@ -53,11 +53,14 @@
Get Started
GRAG Overview
Installation
-To run LLMs using HuggingFace
-To run LLMs using LlamaCPP
+Parse PDF
+Table Parsing Methodology
+Limitations
+LLMs
Vector Stores
Supported Vector Stores
Embeddings
@@ -96,7 +99,7 @@
-Vector Stores
+Vector Stores
Vector store or vector database is a type of database that stores data in high-dimensional vectors.
This is a crucial component of RAG, storing embeddings for both retrieval and generation processes.
@@ -120,7 +123,12 @@ Chroma
+
+DeepLake
+Since DeepLake is not a server based vector store, it is much easier to get started.
+Just make sure you have DeepLake installed and use the DeepLake Client class.
diff --git a/src/docs/_build/html/grag.components.html b/src/docs/_build/html/grag.components.html
index 6b46a25..20feb4f 100644
--- a/src/docs/_build/html/grag.components.html
+++ b/src/docs/_build/html/grag.components.html
@@ -192,9 +192,9 @@ VectorDB
diff --git a/src/docs/_build/html/objects.inv b/src/docs/_build/html/objects.inv
index 6025fb3..b8bdc45 100644
Binary files a/src/docs/_build/html/objects.inv and b/src/docs/_build/html/objects.inv differ
diff --git a/src/docs/_build/html/searchindex.js b/src/docs/_build/html/searchindex.js
index 79265bf..fcecd1f 100644
--- a/src/docs/_build/html/searchindex.js
+++ b/src/docs/_build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"Base": [[18, "module-grag.components.vectordb.base"]], "Basic RAG": [[21, "module-grag.rag.basic_rag"]], "Basic-RAG Cookbooks": [[5, "basic-rag-cookbooks"]], "Chroma": [[15, "chroma"]], "Chroma Client": [[18, "module-grag.components.vectordb.chroma_client"]], "Components": [[17, "components"]], "Computation times": [[6, "computation-times"], [9, "computation-times"], [24, "computation-times"]], "Contents:": [[22, null]], "Cookbooks": [[10, "cookbooks"]], "Custom Few-Shot Prompts": [[1, "custom-few-shot-prompts"]], "Custom Prompts": [[0, "custom-prompts"]], "Data Ingestion": [[15, "data-ingestion"]], "Deeplake Client": [[18, "module-grag.components.vectordb.deeplake_client"]], "Document Ingestion": [[2, "document-ingestion"]], "Embedding": [[17, "module-grag.components.embedding"]], "Embeddings": [[15, "embeddings"]], "GRAG": [[16, "grag"]], "GRAG Overview": [[13, "grag-overview"]], "Get Started": [[11, "get-started"]], "How to quantize models.": [[14, "how-to-quantize-models"]], "Indices and tables": [[22, "indices-and-tables"]], "Installation": [[12, "installation"]], "LLM": [[17, "module-grag.components.llm"]], "Module Contents": [[18, "module-grag.components.vectordb"]], "Module contents": [[16, "module-grag"], [17, "module-grag.components"], [19, "module-grag.prompts"], [20, "module-grag.quantize"], [21, "module-grag.rag"]], "Parse PDF": [[17, "module-grag.components.parse_pdf"]], "Prompt": [[17, "module-grag.components.prompt"]], "Prompts": [[19, "prompts"]], "Quantize": [[20, "quantize"], [20, "id1"]], "RAG": [[21, "rag"]], "Refine Chain": [[3, "refine-chain"]], "Retrieval-Augmented Generation": [[13, "retrieval-augmented-generation"]], "Retriever": [[17, "module-grag.components.multivec_retriever"]], "Retriever GUI": [[8, "retriever-gui"]], "Retriever-GUI Cookbooks": [[7, "retriever-gui-cookbooks"]], "Stuff Chain": [[4, "stuff-chain"]], "Submodules": [[18, "submodules"], [20, "submodules"], [21, "submodules"]], "Supported Vector Stores": [[15, "supported-vector-stores"]], "Text Splitter": [[17, "module-grag.components.text_splitter"]], "To run LLMs using HuggingFace": [[14, "to-run-llms-using-huggingface"]], "To run LLMs using LlamaCPP": [[14, "to-run-llms-using-llamacpp"]], "Utils": [[17, "module-grag.components.utils"], [20, "module-grag.quantize.utils"]], "Vector Stores": [[15, "vector-stores"]], "VectorDB": [[17, "vectordb"], [18, "vectordb"]], "Welcome to GRAG\u2019s documentation!": [[22, "welcome-to-grag-s-documentation"]], "grag": [[23, "grag"]]}, "docnames": ["auto_examples/Basic-RAG/BasicRAG_CustomPrompt", "auto_examples/Basic-RAG/BasicRAG_FewShotPrompt", "auto_examples/Basic-RAG/BasicRAG_ingest", "auto_examples/Basic-RAG/BasicRAG_refine", "auto_examples/Basic-RAG/BasicRAG_stuff", "auto_examples/Basic-RAG/index", "auto_examples/Basic-RAG/sg_execution_times", "auto_examples/Retriver-GUI/index", "auto_examples/Retriver-GUI/retriever_app", "auto_examples/Retriver-GUI/sg_execution_times", "auto_examples_index", "get_started", "get_started.installation", "get_started.introduction", "get_started.llms", "get_started.vectordb", "grag", "grag.components", "grag.components.vectordb", "grag.prompts", "grag.quantize", "grag.rag", "index", "modules", "sg_execution_times"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2}, "filenames": ["auto_examples/Basic-RAG/BasicRAG_CustomPrompt.rst", "auto_examples/Basic-RAG/BasicRAG_FewShotPrompt.rst", "auto_examples/Basic-RAG/BasicRAG_ingest.rst", "auto_examples/Basic-RAG/BasicRAG_refine.rst", "auto_examples/Basic-RAG/BasicRAG_stuff.rst", "auto_examples/Basic-RAG/index.rst", "auto_examples/Basic-RAG/sg_execution_times.rst", "auto_examples/Retriver-GUI/index.rst", "auto_examples/Retriver-GUI/retriever_app.rst", "auto_examples/Retriver-GUI/sg_execution_times.rst", "auto_examples_index.rst", "get_started.rst", "get_started.installation.rst", "get_started.introduction.rst", "get_started.llms.rst", "get_started.vectordb.rst", "grag.rst", "grag.components.rst", "grag.components.vectordb.rst", "grag.prompts.rst", "grag.quantize.rst", "grag.rag.rst", "index.rst", "modules.rst", "sg_execution_times.rst"], "indexentries": {"aadd_docs() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.aadd_docs", false]], "aadd_docs() (grag.components.vectordb.base.vectordb method)": [[18, "grag.components.vectordb.base.VectorDB.aadd_docs", false]], "aadd_docs() (grag.components.vectordb.chroma_client.chromaclient method)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.aadd_docs", false]], "aadd_docs() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.aadd_docs", false]], "add_caption_first (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.add_caption_first", false]], "add_captions_to_blocks (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.add_captions_to_blocks", false]], "add_captions_to_text (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.add_captions_to_text", false]], "add_docs() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.add_docs", false]], "add_docs() (grag.components.vectordb.base.vectordb method)": [[18, "grag.components.vectordb.base.VectorDB.add_docs", false]], "add_docs() (grag.components.vectordb.chroma_client.chromaclient method)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.add_docs", false]], "add_docs() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.add_docs", false]], "aget_chunk() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.aget_chunk", false]], "aget_chunk() (grag.components.vectordb.base.vectordb method)": [[18, "grag.components.vectordb.base.VectorDB.aget_chunk", false]], "aget_chunk() (grag.components.vectordb.chroma_client.chromaclient method)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.aget_chunk", false]], "aget_chunk() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.aget_chunk", false]], "aget_doc() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.aget_doc", false]], "aingest() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.aingest", false]], "basicrag (class in grag.rag.basic_rag)": [[21, "grag.rag.basic_rag.BasicRAG", false]], "building_llamacpp() (in module grag.quantize.utils)": [[20, "grag.quantize.utils.building_llamacpp", false]], "chromaclient (class in grag.components.vectordb.chroma_client)": [[18, "grag.components.vectordb.chroma_client.ChromaClient", false]], "chunk_overlap (grag.components.text_splitter.textsplitter attribute)": [[17, "grag.components.text_splitter.TextSplitter.chunk_overlap", false]], "chunk_size (grag.components.text_splitter.textsplitter attribute)": [[17, "grag.components.text_splitter.TextSplitter.chunk_size", false]], "classify() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.classify", false]], "client (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.client", false]], "client (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.client", false]], "collection (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.collection", false]], "collection (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.collection", false]], "collection_name (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.collection_name", false]], "custom_prompt (grag.rag.basic_rag.basicrag attribute)": [[21, "grag.rag.basic_rag.BasicRAG.custom_prompt", false]], "deeplakeclient (class in grag.components.vectordb.deeplake_client)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient", false]], "delete() (grag.components.vectordb.base.vectordb method)": [[18, "grag.components.vectordb.base.VectorDB.delete", false]], "delete() (grag.components.vectordb.chroma_client.chromaclient method)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.delete", false]], "delete() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.delete", false]], "device_map (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.device_map", false]], "doc_chain (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.doc_chain", false]], "doc_chain (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.doc_chain", false], [17, "id6", false]], "doc_chain (grag.rag.basic_rag.basicrag attribute)": [[21, "grag.rag.basic_rag.BasicRAG.doc_chain", false]], "doc_chain (grag.rag.basic_rag.basicrag property)": [[21, "id0", false]], "embedding (class in grag.components.embedding)": [[17, "grag.components.embedding.Embedding", false]], "embedding_function (grag.components.embedding.embedding attribute)": [[17, "grag.components.embedding.Embedding.embedding_function", false]], "embedding_function (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.embedding_function", false]], "embedding_function (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.embedding_function", false]], "embedding_model (grag.components.embedding.embedding attribute)": [[17, "grag.components.embedding.Embedding.embedding_model", false]], "embedding_model (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.embedding_model", false]], "embedding_model (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.embedding_model", false]], "embedding_type (grag.components.embedding.embedding attribute)": [[17, "grag.components.embedding.Embedding.embedding_type", false]], "embedding_type (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.embedding_type", false]], "embedding_type (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.embedding_type", false]], "example_template (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.example_template", false], [17, "id2", false]], "examples (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.examples", false], [17, "id3", false]], "extract_image_block_types (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.extract_image_block_types", false]], "extract_images (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.extract_images", false]], "fetch_model_repo() (in module grag.quantize.utils)": [[20, "grag.quantize.utils.fetch_model_repo", false]], "fewshotprompt (class in grag.components.prompt)": [[17, "grag.components.prompt.FewShotPrompt", false]], "filepath (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.filepath", false]], "filepath (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.filepath", false], [17, "id7", false]], "find_config_path() (in module grag.components.utils)": [[17, "grag.components.utils.find_config_path", false]], "format() (grag.components.prompt.prompt method)": [[17, "grag.components.prompt.Prompt.format", false]], "gen_doc_ids() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.gen_doc_ids", false]], "get_chunk() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.get_chunk", false]], "get_chunk() (grag.components.vectordb.base.vectordb method)": [[18, "grag.components.vectordb.base.VectorDB.get_chunk", false]], "get_chunk() (grag.components.vectordb.chroma_client.chromaclient method)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.get_chunk", false]], "get_chunk() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.get_chunk", false]], "get_config() (in module grag.components.utils)": [[17, "grag.components.utils.get_config", false]], "get_doc() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.get_doc", false]], "get_docs_from_chunks() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.get_docs_from_chunks", false]], "get_llamacpp_repo() (in module grag.quantize.utils)": [[20, "grag.quantize.utils.get_llamacpp_repo", false]], "grag": [[16, "module-grag", false]], "grag.components": [[17, "module-grag.components", false]], "grag.components.embedding": [[17, "module-grag.components.embedding", false]], "grag.components.llm": [[17, "module-grag.components.llm", false]], "grag.components.multivec_retriever": [[17, "module-grag.components.multivec_retriever", false]], "grag.components.parse_pdf": [[17, "module-grag.components.parse_pdf", false]], "grag.components.prompt": [[17, "module-grag.components.prompt", false]], "grag.components.text_splitter": [[17, "module-grag.components.text_splitter", false]], "grag.components.utils": [[17, "module-grag.components.utils", false]], "grag.components.vectordb": [[18, "module-grag.components.vectordb", false]], "grag.components.vectordb.base": [[18, "module-grag.components.vectordb.base", false]], "grag.components.vectordb.chroma_client": [[18, "module-grag.components.vectordb.chroma_client", false]], "grag.components.vectordb.deeplake_client": [[18, "module-grag.components.vectordb.deeplake_client", false]], "grag.prompts": [[19, "module-grag.prompts", false]], "grag.quantize": [[20, "module-grag.quantize", false]], "grag.quantize.quantize": [[20, "module-grag.quantize.quantize", false]], "grag.quantize.utils": [[20, "module-grag.quantize.utils", false]], "grag.rag": [[21, "module-grag.rag", false]], "grag.rag.basic_rag": [[21, "module-grag.rag.basic_rag", false]], "hf_pipeline() (grag.components.llm.llm method)": [[17, "grag.components.llm.LLM.hf_pipeline", false]], "host (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.host", false]], "id_gen() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.id_gen", false]], "id_key (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.id_key", false]], "image_output_dir (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.image_output_dir", false]], "infer_table_structure (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.infer_table_structure", false]], "ingest() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.ingest", false]], "input_keys (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.input_keys", false], [17, "id1", false]], "input_keys (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.input_keys", false], [17, "id8", false]], "langchain_client (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.langchain_client", false]], "langchain_client (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.langchain_client", false]], "language (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.language", false]], "language (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.language", false], [17, "id9", false]], "llama_cpp() (grag.components.llm.llm method)": [[17, "grag.components.llm.LLM.llama_cpp", false]], "llm (class in grag.components.llm)": [[17, "grag.components.llm.LLM", false]], "llm_kwargs (grag.rag.basic_rag.basicrag attribute)": [[21, "grag.rag.basic_rag.BasicRAG.llm_kwargs", false]], "llm_type (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.llm_type", false]], "llm_type (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.llm_type", false], [17, "id10", false]], "load() (grag.components.prompt.prompt class method)": [[17, "grag.components.prompt.Prompt.load", false]], "load_file() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.load_file", false]], "load_model() (grag.components.llm.llm method)": [[17, "grag.components.llm.LLM.load_model", false]], "max_new_tokens (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.max_new_tokens", false]], "model_computed_fields (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.model_computed_fields", false]], "model_computed_fields (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.model_computed_fields", false]], "model_config (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.model_config", false]], "model_config (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.model_config", false]], "model_fields (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.model_fields", false]], "model_fields (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.model_fields", false]], "model_name (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.model_name", false]], "model_name (grag.components.llm.llm property)": [[17, "id0", false]], "model_name (grag.rag.basic_rag.basicrag attribute)": [[21, "grag.rag.basic_rag.BasicRAG.model_name", false]], "model_name (grag.rag.basic_rag.basicrag property)": [[21, "id1", false]], "model_path (grag.components.llm.llm property)": [[17, "grag.components.llm.LLM.model_path", false]], "module": [[16, "module-grag", false], [17, "module-grag.components", false], [17, "module-grag.components.embedding", false], [17, "module-grag.components.llm", false], [17, "module-grag.components.multivec_retriever", false], [17, "module-grag.components.parse_pdf", false], [17, "module-grag.components.prompt", false], [17, "module-grag.components.text_splitter", false], [17, "module-grag.components.utils", false], [18, "module-grag.components.vectordb", false], [18, "module-grag.components.vectordb.base", false], [18, "module-grag.components.vectordb.chroma_client", false], [18, "module-grag.components.vectordb.deeplake_client", false], [19, "module-grag.prompts", false], [20, "module-grag.quantize", false], [20, "module-grag.quantize.quantize", false], [20, "module-grag.quantize.utils", false], [21, "module-grag.rag", false], [21, "module-grag.rag.basic_rag", false]], "n_batch (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.n_batch", false]], "n_ctx (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.n_ctx", false]], "n_gpu_layers (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.n_gpu_layers", false]], "name (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.name", false]], "name (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.name", false], [17, "id11", false]], "namespace (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.namespace", false]], "output_keys (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.output_keys", false]], "output_parser() (grag.rag.basic_rag.basicrag static method)": [[21, "grag.rag.basic_rag.BasicRAG.output_parser", false]], "parsepdf (class in grag.components.parse_pdf)": [[17, "grag.components.parse_pdf.ParsePDF", false]], "partition() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.partition", false]], "port (grag.components.vectordb.chroma_client.chromaclient attribute)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.port", false]], "prefix (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.prefix", false], [17, "id4", false]], "process_images() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.process_images", false]], "process_tables() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.process_tables", false]], "process_text() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.process_text", false]], "prompt (class in grag.components.prompt)": [[17, "grag.components.prompt.Prompt", false]], "prompt (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.prompt", false]], "prompt_matcher() (grag.rag.basic_rag.basicrag method)": [[21, "grag.rag.basic_rag.BasicRAG.prompt_matcher", false]], "quantize_model() (in module grag.quantize.utils)": [[20, "grag.quantize.utils.quantize_model", false]], "refine_call() (grag.rag.basic_rag.basicrag method)": [[21, "grag.rag.basic_rag.BasicRAG.refine_call", false]], "retriever (class in grag.components.multivec_retriever)": [[17, "grag.components.multivec_retriever.Retriever", false]], "retriever (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.retriever", false]], "retriever_kwargs (grag.rag.basic_rag.basicrag attribute)": [[21, "grag.rag.basic_rag.BasicRAG.retriever_kwargs", false]], "save() (grag.components.prompt.prompt method)": [[17, "grag.components.prompt.Prompt.save", false]], "single_text_out (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.single_text_out", false]], "source (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.source", false]], "source (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.source", false], [17, "id12", false]], "split_docs() (grag.components.multivec_retriever.retriever method)": [[17, "grag.components.multivec_retriever.Retriever.split_docs", false]], "splitter (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.splitter", false]], "store (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.store", false]], "store_path (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.store_path", false]], "store_path (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[18, "grag.components.vectordb.deeplake_client.DeepLakeClient.store_path", false]], "strategy (grag.components.parse_pdf.parsepdf attribute)": [[17, "grag.components.parse_pdf.ParsePDF.strategy", false]], "stuff_call() (grag.rag.basic_rag.basicrag method)": [[21, "grag.rag.basic_rag.BasicRAG.stuff_call", false]], "stuff_docs() (grag.rag.basic_rag.basicrag static method)": [[21, "grag.rag.basic_rag.BasicRAG.stuff_docs", false]], "stuff_docs() (in module grag.components.utils)": [[17, "grag.components.utils.stuff_docs", false]], "suffix (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.suffix", false], [17, "id5", false]], "task (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.task", false]], "task (grag.components.prompt.fewshotprompt attribute)": [[17, "grag.components.prompt.FewShotPrompt.task", false]], "task (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.task", false], [17, "id13", false]], "task (grag.rag.basic_rag.basicrag attribute)": [[21, "grag.rag.basic_rag.BasicRAG.task", false]], "task (grag.rag.basic_rag.basicrag property)": [[21, "id2", false]], "temperature (grag.components.llm.llm attribute)": [[17, "grag.components.llm.LLM.temperature", false]], "template (grag.components.prompt.prompt attribute)": [[17, "grag.components.prompt.Prompt.template", false]], "test_connection() (grag.components.vectordb.chroma_client.chromaclient method)": [[18, "grag.components.vectordb.chroma_client.ChromaClient.test_connection", false]], "text_concat() (grag.components.parse_pdf.parsepdf method)": [[17, "grag.components.parse_pdf.ParsePDF.text_concat", false]], "text_splitter (grag.components.text_splitter.textsplitter attribute)": [[17, "grag.components.text_splitter.TextSplitter.text_splitter", false]], "textsplitter (class in grag.components.text_splitter)": [[17, "grag.components.text_splitter.TextSplitter", false]], "top_k (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.top_k", false]], "validate_doc_chain() (grag.components.prompt.prompt class method)": [[17, "grag.components.prompt.Prompt.validate_doc_chain", false]], "validate_examples() (grag.components.prompt.fewshotprompt class method)": [[17, "grag.components.prompt.FewShotPrompt.validate_examples", false]], "validate_input_keys() (grag.components.prompt.prompt class method)": [[17, "grag.components.prompt.Prompt.validate_input_keys", false]], "validate_output_keys() (grag.components.prompt.fewshotprompt class method)": [[17, "grag.components.prompt.FewShotPrompt.validate_output_keys", false]], "validate_task() (grag.components.prompt.prompt class method)": [[17, "grag.components.prompt.Prompt.validate_task", false]], "vectordb (class in grag.components.vectordb.base)": [[18, "grag.components.vectordb.base.VectorDB", false]], "vectordb (grag.components.multivec_retriever.retriever attribute)": [[17, "grag.components.multivec_retriever.Retriever.vectordb", false]]}, "objects": {"": [[16, 0, 0, "-", "grag"]], "grag": [[17, 0, 0, "-", "components"], [19, 0, 0, "-", "prompts"], [20, 0, 0, "-", "quantize"], [21, 0, 0, "-", "rag"]], "grag.components": [[17, 0, 0, "-", "embedding"], [17, 0, 0, "-", "llm"], [17, 0, 0, "-", "multivec_retriever"], [17, 0, 0, "-", "parse_pdf"], [17, 0, 0, "-", "prompt"], [17, 0, 0, "-", "text_splitter"], [17, 0, 0, "-", "utils"], [18, 0, 0, "-", "vectordb"]], "grag.components.embedding": [[17, 1, 1, "", "Embedding"]], "grag.components.embedding.Embedding": [[17, 2, 1, "", "embedding_function"], [17, 2, 1, "", "embedding_model"], [17, 2, 1, "", "embedding_type"]], "grag.components.llm": [[17, 1, 1, "", "LLM"]], "grag.components.llm.LLM": [[17, 2, 1, "", "device_map"], [17, 3, 1, "", "hf_pipeline"], [17, 3, 1, "", "llama_cpp"], [17, 3, 1, "", "load_model"], [17, 2, 1, "", "max_new_tokens"], [17, 4, 1, "id0", "model_name"], [17, 4, 1, "", "model_path"], [17, 2, 1, "", "n_batch"], [17, 2, 1, "", "n_ctx"], [17, 2, 1, "", "n_gpu_layers"], [17, 2, 1, "", "task"], [17, 2, 1, "", "temperature"]], "grag.components.multivec_retriever": [[17, 1, 1, "", "Retriever"]], "grag.components.multivec_retriever.Retriever": [[17, 3, 1, "", "aadd_docs"], [17, 3, 1, "", "add_docs"], [17, 3, 1, "", "aget_chunk"], [17, 3, 1, "", "aget_doc"], [17, 3, 1, "", "aingest"], [17, 3, 1, "", "gen_doc_ids"], [17, 3, 1, "", "get_chunk"], [17, 3, 1, "", "get_doc"], [17, 3, 1, "", "get_docs_from_chunks"], [17, 3, 1, "", "id_gen"], [17, 2, 1, "", "id_key"], [17, 3, 1, "", "ingest"], [17, 2, 1, "", "namespace"], [17, 2, 1, "", "retriever"], [17, 3, 1, "", "split_docs"], [17, 2, 1, "", "splitter"], [17, 2, 1, "", "store"], [17, 2, 1, "", "store_path"], [17, 2, 1, "", "top_k"], [17, 2, 1, "", "vectordb"]], "grag.components.parse_pdf": [[17, 1, 1, "", "ParsePDF"]], "grag.components.parse_pdf.ParsePDF": [[17, 2, 1, "", "add_caption_first"], [17, 2, 1, "", "add_captions_to_blocks"], [17, 2, 1, "", "add_captions_to_text"], [17, 3, 1, "", "classify"], [17, 2, 1, "", "extract_image_block_types"], [17, 2, 1, "", "extract_images"], [17, 2, 1, "", "image_output_dir"], [17, 2, 1, "", "infer_table_structure"], [17, 3, 1, "", "load_file"], [17, 3, 1, "", "partition"], [17, 3, 1, "", "process_images"], [17, 3, 1, "", "process_tables"], [17, 3, 1, "", "process_text"], [17, 2, 1, "", "single_text_out"], [17, 2, 1, "", "strategy"], [17, 3, 1, "", "text_concat"]], "grag.components.prompt": [[17, 1, 1, "", "FewShotPrompt"], [17, 1, 1, "", "Prompt"]], "grag.components.prompt.FewShotPrompt": [[17, 2, 1, "", "doc_chain"], [17, 2, 1, "id2", "example_template"], [17, 2, 1, "id3", "examples"], [17, 2, 1, "", "filepath"], [17, 2, 1, "id1", "input_keys"], [17, 2, 1, "", "language"], [17, 2, 1, "", "llm_type"], [17, 2, 1, "", "model_computed_fields"], [17, 2, 1, "", "model_config"], [17, 2, 1, "", "model_fields"], [17, 2, 1, "", "name"], [17, 2, 1, "", "output_keys"], [17, 2, 1, "id4", "prefix"], [17, 2, 1, "", "source"], [17, 2, 1, "id5", "suffix"], [17, 2, 1, "", "task"], [17, 3, 1, "", "validate_examples"], [17, 3, 1, "", "validate_output_keys"]], "grag.components.prompt.Prompt": [[17, 2, 1, "id6", "doc_chain"], [17, 2, 1, "id7", "filepath"], [17, 3, 1, "", "format"], [17, 2, 1, "id8", "input_keys"], [17, 2, 1, "id9", "language"], [17, 2, 1, "id10", "llm_type"], [17, 3, 1, "", "load"], [17, 2, 1, "", "model_computed_fields"], [17, 2, 1, "", "model_config"], [17, 2, 1, "", "model_fields"], [17, 2, 1, "id11", "name"], [17, 2, 1, "", "prompt"], [17, 3, 1, "", "save"], [17, 2, 1, "id12", "source"], [17, 2, 1, "id13", "task"], [17, 2, 1, "", "template"], [17, 3, 1, "", "validate_doc_chain"], [17, 3, 1, "", "validate_input_keys"], [17, 3, 1, "", "validate_task"]], "grag.components.text_splitter": [[17, 1, 1, "", "TextSplitter"]], "grag.components.text_splitter.TextSplitter": [[17, 2, 1, "", "chunk_overlap"], [17, 2, 1, "", "chunk_size"], [17, 2, 1, "", "text_splitter"]], "grag.components.utils": [[17, 5, 1, "", "find_config_path"], [17, 5, 1, "", "get_config"], [17, 5, 1, "", "stuff_docs"]], "grag.components.vectordb": [[18, 0, 0, "-", "base"], [18, 0, 0, "-", "chroma_client"], [18, 0, 0, "-", "deeplake_client"]], "grag.components.vectordb.base": [[18, 1, 1, "", "VectorDB"]], "grag.components.vectordb.base.VectorDB": [[18, 3, 1, "", "aadd_docs"], [18, 3, 1, "", "add_docs"], [18, 3, 1, "", "aget_chunk"], [18, 3, 1, "", "delete"], [18, 3, 1, "", "get_chunk"]], "grag.components.vectordb.chroma_client": [[18, 1, 1, "", "ChromaClient"]], "grag.components.vectordb.chroma_client.ChromaClient": [[18, 3, 1, "", "aadd_docs"], [18, 3, 1, "", "add_docs"], [18, 3, 1, "", "aget_chunk"], [18, 2, 1, "", "client"], [18, 2, 1, "", "collection"], [18, 2, 1, "", "collection_name"], [18, 3, 1, "", "delete"], [18, 2, 1, "", "embedding_function"], [18, 2, 1, "", "embedding_model"], [18, 2, 1, "", "embedding_type"], [18, 3, 1, "", "get_chunk"], [18, 2, 1, "", "host"], [18, 2, 1, "", "langchain_client"], [18, 2, 1, "", "port"], [18, 3, 1, "", "test_connection"]], "grag.components.vectordb.deeplake_client": [[18, 1, 1, "", "DeepLakeClient"]], "grag.components.vectordb.deeplake_client.DeepLakeClient": [[18, 3, 1, "", "aadd_docs"], [18, 3, 1, "", "add_docs"], [18, 3, 1, "", "aget_chunk"], [18, 2, 1, "", "client"], [18, 2, 1, "", "collection"], [18, 3, 1, "", "delete"], [18, 2, 1, "", "embedding_function"], [18, 2, 1, "", "embedding_model"], [18, 2, 1, "", "embedding_type"], [18, 3, 1, "", "get_chunk"], [18, 2, 1, "", "langchain_client"], [18, 2, 1, "", "store_path"]], "grag.quantize": [[20, 0, 0, "-", "quantize"], [20, 0, 0, "-", "utils"]], "grag.quantize.utils": [[20, 5, 1, "", "building_llamacpp"], [20, 5, 1, "", "fetch_model_repo"], [20, 5, 1, "", "get_llamacpp_repo"], [20, 5, 1, "", "quantize_model"]], "grag.rag": [[21, 0, 0, "-", "basic_rag"]], "grag.rag.basic_rag": [[21, 1, 1, "", "BasicRAG"]], "grag.rag.basic_rag.BasicRAG": [[21, 2, 1, "", "custom_prompt"], [21, 4, 1, "id0", "doc_chain"], [21, 2, 1, "", "llm_kwargs"], [21, 4, 1, "id1", "model_name"], [21, 3, 1, "", "output_parser"], [21, 3, 1, "", "prompt_matcher"], [21, 3, 1, "", "refine_call"], [21, 2, 1, "", "retriever_kwargs"], [21, 3, 1, "", "stuff_call"], [21, 3, 1, "", "stuff_docs"], [21, 4, 1, "id2", "task"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "terms": {"": 17, "0": [6, 8, 9, 17, 24], "00": [6, 9, 24], "000": [6, 9], "001": 24, "01": 24, "03": 24, "036": 24, "06": 24, "1": [8, 9, 17], "1024": 17, "12": 24, "13b": 17, "2": [2, 15, 17], "237": 24, "275": 24, "3": [8, 14, 17], "3f": 8, "400": 17, "484": 24, "5": [6, 24], "5000": 17, "6000": 17, "8000": [15, 18], "8c9040b0b5cd4d7cbc2e737da1b24ebf": 17, "935": 24, "A": [8, 17, 18, 20], "By": 15, "For": [3, 4, 12, 13, 14, 15], "If": [14, 15, 17], "It": [17, 22], "OR": 15, "The": [8, 17, 18, 20, 22], "To": [11, 15, 22], "_": [], "__fields__": 17, "__file__": [2, 15], "__init__": 8, "__main__": [3, 4, 8], "__name__": [3, 4, 8], "_build": [], "_queri": 8, "_static": [], "_top_k": 8, "aadd_doc": [16, 17, 18], "abc": 18, "about": [8, 17], "abov": [14, 17], "abstract": 18, "accuraci": 17, "activ": 8, "add": [17, 18], "add_caption_first": [16, 17], "add_captions_to_block": [16, 17], "add_captions_to_text": [16, 17], "add_doc": [16, 17, 18], "addit": 17, "address": [8, 18], "after": [14, 17], "aget_chunk": [16, 17, 18], "aget_doc": [16, 17], "aingest": [2, 16, 17], "alik": 8, "aliv": 18, "all": [5, 7, 17, 18, 19, 24], "also": [17, 22], "alt": [], "altern": 2, "alwai": 17, "an": [7, 13, 14, 22], "ani": [13, 15, 17, 22], "annot": 17, "answer": [0, 1], "anyth": 8, "api": [13, 18], "app": 8, "appli": 20, "applic": 8, "ar": [12, 14, 15], "arg": 8, "argument": [15, 17, 21], "artifici": 8, "arxiv": 18, "async": [2, 17, 18], "asynchron": [2, 17, 18], "asyncio": 2, "attempt": 20, "attribut": 8, "augment": [11, 22], "auth": 14, "auto": 17, "auto_exampl": [6, 9], "avail": 17, "awar": 17, "backend": 8, "bar": 18, "base": [0, 1, 8, 15, 16, 17, 21], "base_dir": 17, "basemodel": 17, "basi": 21, "basic": [0, 1, 6, 10, 16, 22, 23, 24], "basic_rag": [0, 1, 3, 4, 21], "basicrag": [0, 1, 3, 4, 16, 21, 23], "basicrag_customprompt": [0, 6, 24], "basicrag_fewshotprompt": [1, 6, 24], "basicrag_ingest": [2, 6, 24], "basicrag_refin": [3, 6, 24], "basicrag_stuff": [4, 6, 24], "batch": 17, "befor": 17, "being": 17, "below": [1, 5, 7], "better": 17, "block": 17, "blog": 8, "bool": [17, 18], "both": 15, "build": 20, "building_llamacpp": [16, 20, 23], "bulb": 8, "button": 8, "call": 17, "call_func": 21, "came": 1, "can": [12, 14, 15], "cannot": 17, "capstone_5": [17, 18], "caption": 17, "chain": [5, 6, 17, 21], "chang": [14, 15], "chat": 17, "check": [8, 14], "check_connect": 8, "chroma": [2, 8, 16, 17, 22], "chroma_cli": [2, 18], "chromacli": [2, 17, 18], "chromadb": 18, "chunk": [8, 17, 18], "chunk_overlap": [16, 17], "chunk_siz": [16, 17], "ci_test": 2, "class": [8, 17, 18, 21], "classifi": [16, 17], "classmethod": 17, "classvar": 17, "client": [2, 3, 4, 8, 15, 16, 17], "client_kwarg": 17, "clientserv": 15, "clone": [12, 14, 20], "cmake": 20, "co": 20, "code": [0, 1, 2, 3, 4, 5, 7, 8], "collect": [8, 17, 18], "collection_nam": [2, 3, 4, 8, 17, 18], "combin": 17, "command": 14, "complet": 13, "completedprocess": 20, "compon": [0, 1, 2, 3, 4, 8, 15, 16, 18, 22, 23], "comput": 17, "computedfieldinfo": 17, "concat": 17, "concaten": [17, 21], "concern": 13, "config": [12, 14, 15, 17], "configdict": 17, "configpars": 17, "configur": 17, "conform": 17, "connect": [8, 17, 18], "contain": [15, 17, 19, 20], "content": 23, "context": [0, 1, 17], "convolut": 1, "cookbook": [0, 1, 2, 3, 4, 8, 15, 22, 24], "copi": [14, 20], "core": 18, "correspond": [17, 18], "cosin": [17, 18], "cost": 13, "could": 8, "count": 8, "cpp": [14, 17, 20], "crucial": 15, "current": [15, 17], "current_path": 17, "custom": [5, 6, 12, 17], "custom_few_shot_prompt": 1, "custom_prompt": [0, 1, 16, 17, 21], "data": [2, 11, 13, 17, 18], "databas": [2, 15, 17, 18], "decor": 21, "deeplak": [15, 16, 17, 22], "deeplake_cli": [2, 3, 4, 18], "deeplakecli": [2, 3, 4, 15, 17, 18], "def": 8, "default": [15, 17, 18, 20, 21], "defin": 17, "delet": [17, 18], "demonstr": [0, 1, 2, 3, 4, 5, 7, 8], "deriv": 18, "detail": [8, 14, 15, 17], "develop": 12, "devic": 17, "device_map": [16, 17], "dict": [17, 21], "dictionari": 17, "differ": 19, "dimension": 15, "dir_path": [2, 15, 17], "directori": [14, 17, 20], "displai": 8, "doc": [8, 17, 18, 21], "doc_chain": [0, 1, 3, 4, 16, 17, 21], "doc_id": 17, "document": [5, 6, 17, 18, 21], "doe": [13, 14, 17], "download": [0, 1, 2, 3, 4, 5, 7, 8, 14, 20], "dry_run": 17, "dure": 17, "e": 12, "each": [17, 18], "easi": 22, "easiest": 14, "either": [14, 15], "element": 17, "elif": 2, "els": [8, 18], "embed": [8, 11, 16, 18, 23], "embedding_funct": [16, 17, 18], "embedding_model": [8, 15, 16, 17, 18], "embedding_modelnam": 18, "embedding_typ": [8, 15, 16, 17, 18], "en": 17, "enabl": 13, "end": [0, 1, 2, 3, 4, 8, 22], "ensur": 17, "enter": [8, 14], "enumer": 8, "etc": [14, 17], "exampl": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 16, 17, 24], "example_templ": [1, 16, 17], "exclud": 17, "execut": [6, 9, 17, 24], "expand": 8, "explain": [], "explicitli": 15, "extern": 13, "extract": 17, "extract_imag": [16, 17], "extract_image_block_typ": [16, 17], "f": 8, "f16": 17, "face": 17, "fals": [8, 17, 18], "fetch_model_repo": [16, 20, 23], "few": [5, 6, 17], "fewshotprompt": [1, 16, 17, 21, 23], "field": 17, "fieldinfo": 17, "figur": 17, "file": [6, 9, 12, 14, 15, 17, 20, 24], "filenotfounderror": 17, "filepath": [16, 17], "final": 14, "find": [8, 14, 17], "find_config_path": [16, 17, 23], "flexibl": 14, "float": [17, 18], "folder": 15, "follow": [0, 1, 14], "form": [8, 14], "form_submit_button": 8, "format": [16, 17, 21], "found": 17, "from": [0, 1, 2, 3, 4, 6, 8, 9, 12, 14, 17, 18, 20, 24], "full": [0, 1, 2, 3, 4, 8], "function": [8, 17, 18, 20], "further": 12, "galleri": [0, 1, 2, 3, 4, 5, 7, 8, 24], "gate": 14, "gen_doc_id": [16, 17], "gener": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 22], "get": [14, 17, 22], "get_chunk": [8, 16, 17, 18], "get_config": [16, 17, 23], "get_doc": [16, 17], "get_docs_from_chunk": [16, 17], "get_llamacpp_repo": [16, 20, 23], "get_search_result": 8, "getcwd": 8, "gguf": 14, "git": [12, 20], "given": [0, 1, 17, 20], "glob": 17, "glob_pattern": 17, "go": [0, 1, 2, 3, 4, 8], "gpu": 17, "grag": [0, 1, 2, 3, 4, 8, 11, 14, 17, 18, 20, 21], "green": 8, "gui": [9, 10, 22], "gui_jupyt": 7, "gui_python": 7, "ha": [14, 17], "handl": 17, "has_result": 8, "have": 12, "hexadecim": 17, "hf_pipelin": [16, 17], "hi_r": 17, "high": 15, "hkunlp": 18, "hold": 8, "home": [8, 17, 18], "host": [8, 15, 17, 18], "how": [0, 1, 2, 3, 4, 11], "html": [], "http": 8, "httpclient": 18, "hug": 17, "huggingfac": [11, 15, 17, 20, 22], "i": [1, 5, 7, 8, 13, 14, 15, 17, 18, 20, 22], "id": [14, 17, 20], "id_gen": [16, 17], "id_kei": [16, 17], "identifi": 17, "illustr": [3, 4], "imag": 17, "image_output_dir": [16, 17], "implement": [13, 22], "import": [0, 1, 2, 3, 4, 8], "includ": 17, "index": 22, "infer_table_structur": [16, 17], "inform": [3, 4, 13], "ingest": [5, 6, 11, 16, 17], "ini": [12, 14, 15, 17], "initi": [8, 17], "input": [3, 4, 14, 17], "input_kei": [0, 1, 16, 17], "insert": 8, "instal": [11, 22], "instanc": [8, 17, 20], "instruct": 14, "instructor": [15, 17, 18], "int": [17, 18], "integ": 18, "integrag": 22, "intellig": 8, "interact": [8, 20], "interfac": 8, "introduc": 1, "io": 8, "ip": 18, "ipynb": [0, 1, 2, 3, 4, 8], "is_loc": 17, "its": 17, "join": [17, 21], "jpg": [], "json": 17, "jupit": 1, "jupyt": [0, 1, 2, 3, 4, 5, 7, 8], "just": 14, "k": 8, "kei": 17, "keyword": 21, "kwarg": [17, 21], "langchain": [3, 4, 17, 18], "langchain_cli": [17, 18], "langchain_commun": 18, "langchain_cor": [17, 21], "languag": [16, 17], "larg": 17, "largest": 1, "layer": 17, "lecun": 1, "len": 8, "level": 20, "like": [1, 8, 17, 22], "link": 17, "list": [8, 17, 18, 21], "llama": [14, 17, 20], "llama2": 17, "llama_cpp": [16, 17], "llamacpp": [11, 22], "llm": [11, 16, 19, 21, 22, 23], "llm_kwarg": [16, 21], "llm_type": [16, 17], "load": [16, 17], "load_env": 17, "load_fil": [16, 17], "load_model": [16, 17], "load_prompt": 17, "local": [14, 15, 17, 22], "localfilestor": 17, "localhost": 18, "locat": [17, 20], "look": 8, "lost": 8, "m": 14, "made": 12, "main": 8, "make": [14, 15, 20, 22], "manag": [8, 17], "map": 17, "markdown": 8, "match": 21, "max_new_token": [16, 17], "maximum": 17, "mb": [6, 9, 24], "mem": [6, 9, 24], "metadata": [8, 17], "metadata_toggl": 8, "method": 17, "min_valu": 8, "model": [8, 11, 15, 17, 18, 20, 21], "model_computed_field": [16, 17], "model_config": [16, 17], "model_dir_path": 20, "model_field": [16, 17], "model_nam": [14, 16, 17, 21], "model_path": [16, 17], "model_typ": 17, "modul": [22, 23], "more": [3, 4, 13, 14, 15, 17], "moreov": [], "most": [17, 18], "move": [15, 17], "much": 14, "multi": 17, "multi_vector": 17, "multivec_retriev": [2, 3, 4, 8, 17], "multivectorretriev": 17, "n": [8, 17], "n_batch": [16, 17], "n_ctx": [16, 17], "n_gpu_lay": [16, 17], "name": [1, 8, 14, 16, 17, 18, 21], "namespac": [16, 17], "network": 1, "neural": 1, "new": 17, "new_pap": 2, "nn": [17, 21], "non": 15, "none": [17, 18, 20, 21], "nonetyp": 17, "nosourc": 17, "note": 14, "notebook": [0, 1, 2, 3, 4, 5, 7, 8], "number": [8, 17, 18], "number_input": 8, "o": 8, "object": [17, 21], "offer": [14, 22], "one_to_on": 17, "onlin": 14, "open": 13, "oper": 20, "option": [8, 17, 20], "orchestr": 8, "otherwis": 8, "our": 15, "output": [17, 18, 21], "output_dir": 20, "output_kei": [1, 16, 17], "output_pars": [16, 21], "overlap": 17, "overview": [11, 22], "overwrit": 17, "packag": 22, "page": [8, 17, 21, 22], "page_cont": 8, "pagehom": 8, "pair": 17, "paramet": [8, 17, 18, 20, 21], "parent": [2, 8, 15, 17], "pars": [16, 23], "parse_pdf": 17, "parsepdf": [16, 17, 23], "parser": 17, "parser_kwarg": 17, "partit": [16, 17], "pass": 17, "path": [2, 8, 14, 15, 17, 18, 20], "pathlib": [2, 8], "pattern": 17, "pdf": [2, 15, 16, 23], "phase": 12, "pip": 12, "pipelin": [3, 4, 17], "place": 17, "planet": 1, "point": 17, "port": [8, 15, 17, 18], "prefix": [1, 16, 17], "print": 18, "priorit": 17, "privaci": 13, "process": [15, 17], "process_imag": [16, 17], "process_t": [16, 17], "process_text": [16, 17], "produc": 17, "progress": [17, 18], "project": 20, "prompt": [5, 6, 14, 16, 21, 22, 23], "prompt_match": [16, 21], "prompttempl": 17, "properti": [17, 21], "provid": [13, 14, 15, 17, 18, 19, 21, 22], "publish": 12, "pull": 20, "py": [0, 1, 2, 3, 4, 6, 8, 9, 24], "pydant": 17, "pypi": 12, "python": [0, 1, 2, 3, 4, 5, 7, 8, 14, 22], "q4_k_m": 14, "q5_k_m": [14, 17], "qa": [17, 21], "quantiz": [11, 16, 17, 22, 23], "quantize_model": [16, 20, 23], "queri": [3, 4, 8, 17, 18], "question": [0, 1], "rag": [0, 1, 3, 4, 6, 10, 15, 16, 22, 23, 24], "rag_jupyt": 5, "rag_python": 5, "rais": 17, "random": 18, "re": 13, "reach": 17, "read": 17, "read_onli": 18, "recommend": 14, "recurs": 17, "red": 8, "ref": [], "refer": [3, 4, 13, 15, 17], "refin": [5, 6, 17, 21], "refine_cal": [16, 21], "refine_chain_langchain_illustr": [], "relvant": 21, "render": 8, "render_search_form": 8, "render_search_result": 8, "render_sidebar": 8, "render_stat": 8, "replac": 17, "repo": 14, "repo_id": 20, "repositori": [12, 14, 20], "repr": 17, "repres": 8, "requir": [14, 17], "respect": 17, "respons": 8, "result": [8, 20], "retriev": [2, 3, 4, 9, 10, 11, 15, 16, 21, 22, 23], "retriever_app": [8, 9], "retriever_kwarg": [16, 21], "retriv": [7, 9], "return": [8, 17, 18, 20, 21], "root": [17, 20], "root_path": 20, "run": [2, 11, 15, 22], "run_chroma": 15, "same": 17, "sampl": 17, "save": [13, 16, 17, 20], "score": [8, 17, 18], "script": [15, 17], "search": [8, 17, 22], "search_form": 8, "self": [8, 17, 18], "semant": 8, "sentenc": [17, 18], "seper": [17, 21], "server": 15, "servic": 13, "session_st": 8, "set": [8, 14, 17], "sh": 15, "shot": [5, 6, 17], "should": [14, 17, 18, 20], "show": [8, 17, 18], "sidebar": 8, "similar": [8, 17, 18], "similiar": 8, "simpl": [7, 8, 22], "sinc": [12, 13, 15], "singl": [17, 18], "single_text_out": [16, 17], "size": 17, "slow": 2, "smaller": 17, "so": 17, "soifjpaosiujposoifj": [], "solut": [13, 22], "sourc": [0, 1, 2, 3, 4, 5, 7, 8, 13, 15, 16, 17, 18, 20, 21], "specifi": [17, 20], "sphinx": [0, 1, 2, 3, 4, 5, 7, 8], "sphx_glr_auto_examples_basicrag_customprompt": 24, "sphx_glr_auto_examples_basicrag_fewshotprompt": 24, "sphx_glr_auto_examples_basicrag_ingest": 24, "sphx_glr_auto_examples_basicrag_refin": 24, "sphx_glr_auto_examples_basicrag_stuff": 24, "spinner": 8, "split": 17, "split_doc": [16, 17], "splitter": [16, 23], "src": [12, 15], "st": 8, "start": [14, 17, 22], "state": 8, "static": 21, "statist": 8, "statu": 18, "std_out": 17, "step": 8, "storag": 17, "store": [11, 16, 17, 18, 22], "store_path": [16, 17, 18], "str": [8, 17, 18, 20, 21], "strategi": [16, 17], "streamlit": [7, 8], "stremlit": 8, "string": [17, 18, 21], "stuff": [0, 1, 5, 6, 17, 21], "stuff_cal": [16, 21], "stuff_chain_langchain_illustr": [], "stuff_doc": [16, 17, 21, 23], "submit": 8, "submodul": [16, 17, 23], "subprocess": 20, "suffix": [1, 16, 17], "support": [11, 17, 18, 22], "sure": [14, 15], "sy": 8, "sync": 2, "synchron": 2, "system": 8, "tab": 8, "tab1": 8, "tab2": 8, "tabl": 17, "table_as_html": 17, "take": 17, "task": [16, 17, 21], "temperatur": [16, 17], "templat": [0, 16, 17], "test": [2, 3, 4, 13, 18], "test_connect": [8, 17, 18], "text": [16, 23], "text_concat": [16, 17], "text_input": 8, "text_splitt": [16, 17], "textsplitt": [16, 17, 23], "thank": 22, "them": 17, "thi": [0, 1, 2, 3, 4, 8, 13, 14, 15, 17, 18, 21], "time": 17, "titl": 8, "toggl": 8, "token": [14, 17], "top": [8, 17, 18], "top_k": [8, 16, 17, 18], "total": [6, 9, 24], "transform": [17, 18], "travers": 17, "tree": 17, "true": [2, 3, 4, 8, 17, 18], "tupl": 18, "two": 14, "type": [8, 15, 17, 18, 21], "typic": 17, "ubuntu": [17, 18], "ui": 8, "under": 15, "union": 17, "uniqu": 17, "until": 17, "up": [1, 8, 17], "updat": 20, "us": [0, 1, 2, 3, 4, 5, 7, 8, 11, 13, 15, 17, 18, 20, 21, 22], "user": 14, "util": [16, 23], "uuid": 17, "uuid5": 17, "v": 17, "v1": 17, "valid": 17, "validate_doc_chain": [16, 17], "validate_exampl": [16, 17], "validate_input_kei": [16, 17], "validate_output_kei": [16, 17], "validate_task": [16, 17], "valu": [8, 17], "valueerror": 17, "variou": 22, "vector": [2, 11, 17, 18, 22], "vectordb": [2, 3, 4, 8, 15, 16, 23], "vectorstor": [15, 18], "verbos": [17, 18], "volume_2k": [17, 18], "wai": [14, 22], "want": 14, "we": 12, "web": 8, "well": 13, "what": [1, 8], "where": [14, 20], "whether": 17, "which": 17, "while": [3, 4], "who": 1, "width": [], "with_scor": [8, 17, 18], "wrapper": 18, "write": 8, "xl": [15, 18], "yann": 1, "yet": 12, "you": 14, "your": 8, "your_collection_nam": 2, "zip": [5, 7]}, "titles": ["Custom Prompts", "Custom Few-Shot Prompts", "Document Ingestion", "Refine Chain", "Stuff Chain", "Basic-RAG Cookbooks", "Computation times", "Retriever-GUI Cookbooks", "Retriever GUI", "Computation times", "Cookbooks", "Get Started", "Installation", "GRAG Overview", "To run LLMs using HuggingFace", "Vector Stores", "GRAG", "Components", "VectorDB", "Prompts", "Quantize", "RAG", "Welcome to GRAG\u2019s documentation!", "grag", "Computation times"], "titleterms": {"": 22, "To": 14, "augment": 13, "base": 18, "basic": [5, 21], "chain": [3, 4], "chroma": [15, 18], "client": 18, "compon": 17, "comput": [6, 9, 24], "content": [16, 17, 18, 19, 20, 21, 22], "cookbook": [5, 7, 10], "custom": [0, 1], "data": 15, "deeplak": 18, "document": [2, 22], "embed": [15, 17], "few": 1, "gener": 13, "get": 11, "grag": [13, 16, 22, 23], "gui": [7, 8], "how": 14, "huggingfac": 14, "indic": 22, "ingest": [2, 15], "instal": 12, "llamacpp": 14, "llm": [14, 17], "model": 14, "modul": [16, 17, 18, 19, 20, 21], "overview": 13, "pars": 17, "pdf": 17, "prompt": [0, 1, 17, 19], "quantiz": [14, 20], "rag": [5, 21], "refin": 3, "retriev": [7, 8, 13, 17], "run": 14, "shot": 1, "splitter": 17, "start": 11, "store": 15, "stuff": 4, "submodul": [18, 20, 21], "support": 15, "tabl": 22, "text": 17, "time": [6, 9, 24], "us": 14, "util": [17, 20], "vector": 15, "vectordb": [17, 18], "welcom": 22}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"Attributes": [[15, "attributes"]], "Base": [[19, "module-grag.components.vectordb.base"]], "Basic RAG": [[22, "module-grag.rag.basic_rag"]], "Basic-RAG Cookbooks": [[5, "basic-rag-cookbooks"]], "Chroma": [[16, "chroma"]], "Chroma Client": [[19, "module-grag.components.vectordb.chroma_client"]], "Components": [[18, "components"]], "Computation times": [[6, "computation-times"], [9, "computation-times"], [25, "computation-times"]], "Contents:": [[23, null]], "Cookbooks": [[10, "cookbooks"]], "Custom Few-Shot Prompts": [[1, "custom-few-shot-prompts"]], "Custom Prompts": [[0, "custom-prompts"]], "Data Ingestion": [[16, "data-ingestion"]], "DeepLake": [[16, "deeplake"]], "Deeplake Client": [[19, "module-grag.components.vectordb.deeplake_client"]], "Document Ingestion": [[2, "document-ingestion"]], "Embedding": [[18, "module-grag.components.embedding"]], "Embeddings": [[16, "embeddings"]], "GRAG": [[17, "grag"]], "GRAG Overview": [[13, "grag-overview"]], "Get Started": [[11, "get-started"]], "How to quantize models": [[14, "how-to-quantize-models"]], "Indices and tables": [[23, "indices-and-tables"]], "Installation": [[12, "installation"]], "LLM": [[18, "module-grag.components.llm"]], "LLMs": [[14, "llms"]], "Limitations": [[15, "limitations"]], "Module Contents": [[19, "module-grag.components.vectordb"]], "Module contents": [[17, "module-grag"], [18, "module-grag.components"], [20, "module-grag.prompts"], [21, "module-grag.quantize"], [22, "module-grag.rag"]], "Parse PDF": [[15, "parse-pdf"], [18, "module-grag.components.parse_pdf"]], "Parsing Complex PDF Layouts": [[15, "parsing-complex-pdf-layouts"]], "Prompt": [[18, "module-grag.components.prompt"]], "Prompts": [[20, "prompts"]], "Quantize": [[21, "quantize"], [21, "id1"]], "RAG": [[22, "rag"]], "Refine Chain": [[3, "refine-chain"]], "Retrieval-Augmented Generation (RAG)": [[13, "retrieval-augmented-generation-rag"]], "Retriever": [[18, "module-grag.components.multivec_retriever"]], "Retriever GUI": [[8, "retriever-gui"]], "Retriever-GUI Cookbooks": [[7, "retriever-gui-cookbooks"]], "Stuff Chain": [[4, "stuff-chain"]], "Submodules": [[19, "submodules"], [21, "submodules"], [22, "submodules"]], "Supported Vector Stores": [[16, "supported-vector-stores"]], "Table Parsing Methodology": [[15, "table-parsing-methodology"]], "Text Splitter": [[18, "module-grag.components.text_splitter"]], "To run LLMs using HuggingFace": [[14, "to-run-llms-using-huggingface"]], "To run LLMs using LlamaCPP": [[14, "to-run-llms-using-llamacpp"]], "Utils": [[18, "module-grag.components.utils"], [21, "module-grag.quantize.utils"]], "Vector Stores": [[16, "vector-stores"]], "VectorDB": [[18, "vectordb"], [19, "vectordb"]], "Welcome to GRAG\u2019s documentation!": [[23, "welcome-to-grag-s-documentation"]], "grag": [[24, "grag"]]}, "docnames": ["auto_examples/Basic-RAG/BasicRAG_CustomPrompt", "auto_examples/Basic-RAG/BasicRAG_FewShotPrompt", "auto_examples/Basic-RAG/BasicRAG_ingest", "auto_examples/Basic-RAG/BasicRAG_refine", "auto_examples/Basic-RAG/BasicRAG_stuff", "auto_examples/Basic-RAG/index", "auto_examples/Basic-RAG/sg_execution_times", "auto_examples/Retriver-GUI/index", "auto_examples/Retriver-GUI/retriever_app", "auto_examples/Retriver-GUI/sg_execution_times", "auto_examples_index", "get_started", "get_started.installation", "get_started.introduction", "get_started.llms", "get_started.parse_pdf", "get_started.vectordb", "grag", "grag.components", "grag.components.vectordb", "grag.prompts", "grag.quantize", "grag.rag", "index", "modules", "sg_execution_times"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2}, "filenames": ["auto_examples/Basic-RAG/BasicRAG_CustomPrompt.rst", "auto_examples/Basic-RAG/BasicRAG_FewShotPrompt.rst", "auto_examples/Basic-RAG/BasicRAG_ingest.rst", "auto_examples/Basic-RAG/BasicRAG_refine.rst", "auto_examples/Basic-RAG/BasicRAG_stuff.rst", "auto_examples/Basic-RAG/index.rst", "auto_examples/Basic-RAG/sg_execution_times.rst", "auto_examples/Retriver-GUI/index.rst", "auto_examples/Retriver-GUI/retriever_app.rst", "auto_examples/Retriver-GUI/sg_execution_times.rst", "auto_examples_index.rst", "get_started.rst", "get_started.installation.rst", "get_started.introduction.rst", "get_started.llms.rst", "get_started.parse_pdf.rst", "get_started.vectordb.rst", "grag.rst", "grag.components.rst", "grag.components.vectordb.rst", "grag.prompts.rst", "grag.quantize.rst", "grag.rag.rst", "index.rst", "modules.rst", "sg_execution_times.rst"], "indexentries": {"aadd_docs() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.aadd_docs", false]], "aadd_docs() (grag.components.vectordb.base.vectordb method)": [[19, "grag.components.vectordb.base.VectorDB.aadd_docs", false]], "aadd_docs() (grag.components.vectordb.chroma_client.chromaclient method)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.aadd_docs", false]], "aadd_docs() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.aadd_docs", false]], "add_caption_first (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.add_caption_first", false]], "add_captions_to_blocks (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.add_captions_to_blocks", false]], "add_captions_to_text (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.add_captions_to_text", false]], "add_docs() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.add_docs", false]], "add_docs() (grag.components.vectordb.base.vectordb method)": [[19, "grag.components.vectordb.base.VectorDB.add_docs", false]], "add_docs() (grag.components.vectordb.chroma_client.chromaclient method)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.add_docs", false]], "add_docs() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.add_docs", false]], "aget_chunk() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.aget_chunk", false]], "aget_chunk() (grag.components.vectordb.base.vectordb method)": [[19, "grag.components.vectordb.base.VectorDB.aget_chunk", false]], "aget_chunk() (grag.components.vectordb.chroma_client.chromaclient method)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.aget_chunk", false]], "aget_chunk() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.aget_chunk", false]], "aget_doc() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.aget_doc", false]], "aingest() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.aingest", false]], "basicrag (class in grag.rag.basic_rag)": [[22, "grag.rag.basic_rag.BasicRAG", false]], "building_llamacpp() (in module grag.quantize.utils)": [[21, "grag.quantize.utils.building_llamacpp", false]], "chromaclient (class in grag.components.vectordb.chroma_client)": [[19, "grag.components.vectordb.chroma_client.ChromaClient", false]], "chunk_overlap (grag.components.text_splitter.textsplitter attribute)": [[18, "grag.components.text_splitter.TextSplitter.chunk_overlap", false]], "chunk_size (grag.components.text_splitter.textsplitter attribute)": [[18, "grag.components.text_splitter.TextSplitter.chunk_size", false]], "classify() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.classify", false]], "client (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.client", false]], "client (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.client", false]], "collection (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.collection", false]], "collection (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.collection", false]], "collection_name (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.collection_name", false]], "custom_prompt (grag.rag.basic_rag.basicrag attribute)": [[22, "grag.rag.basic_rag.BasicRAG.custom_prompt", false]], "deeplakeclient (class in grag.components.vectordb.deeplake_client)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient", false]], "delete() (grag.components.vectordb.base.vectordb method)": [[19, "grag.components.vectordb.base.VectorDB.delete", false]], "delete() (grag.components.vectordb.chroma_client.chromaclient method)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.delete", false]], "delete() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.delete", false]], "device_map (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.device_map", false]], "doc_chain (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.doc_chain", false]], "doc_chain (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.doc_chain", false], [18, "id6", false]], "doc_chain (grag.rag.basic_rag.basicrag attribute)": [[22, "grag.rag.basic_rag.BasicRAG.doc_chain", false]], "doc_chain (grag.rag.basic_rag.basicrag property)": [[22, "id0", false]], "embedding (class in grag.components.embedding)": [[18, "grag.components.embedding.Embedding", false]], "embedding_function (grag.components.embedding.embedding attribute)": [[18, "grag.components.embedding.Embedding.embedding_function", false]], "embedding_function (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.embedding_function", false]], "embedding_function (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.embedding_function", false]], "embedding_model (grag.components.embedding.embedding attribute)": [[18, "grag.components.embedding.Embedding.embedding_model", false]], "embedding_model (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.embedding_model", false]], "embedding_model (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.embedding_model", false]], "embedding_type (grag.components.embedding.embedding attribute)": [[18, "grag.components.embedding.Embedding.embedding_type", false]], "embedding_type (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.embedding_type", false]], "embedding_type (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.embedding_type", false]], "example_template (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.example_template", false], [18, "id2", false]], "examples (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.examples", false], [18, "id3", false]], "extract_image_block_types (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.extract_image_block_types", false]], "extract_images (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.extract_images", false]], "fetch_model_repo() (in module grag.quantize.utils)": [[21, "grag.quantize.utils.fetch_model_repo", false]], "fewshotprompt (class in grag.components.prompt)": [[18, "grag.components.prompt.FewShotPrompt", false]], "filepath (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.filepath", false]], "filepath (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.filepath", false], [18, "id7", false]], "find_config_path() (in module grag.components.utils)": [[18, "grag.components.utils.find_config_path", false]], "format() (grag.components.prompt.prompt method)": [[18, "grag.components.prompt.Prompt.format", false]], "gen_doc_ids() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.gen_doc_ids", false]], "get_chunk() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.get_chunk", false]], "get_chunk() (grag.components.vectordb.base.vectordb method)": [[19, "grag.components.vectordb.base.VectorDB.get_chunk", false]], "get_chunk() (grag.components.vectordb.chroma_client.chromaclient method)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.get_chunk", false]], "get_chunk() (grag.components.vectordb.deeplake_client.deeplakeclient method)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.get_chunk", false]], "get_config() (in module grag.components.utils)": [[18, "grag.components.utils.get_config", false]], "get_doc() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.get_doc", false]], "get_docs_from_chunks() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.get_docs_from_chunks", false]], "get_llamacpp_repo() (in module grag.quantize.utils)": [[21, "grag.quantize.utils.get_llamacpp_repo", false]], "grag": [[17, "module-grag", false]], "grag.components": [[18, "module-grag.components", false]], "grag.components.embedding": [[18, "module-grag.components.embedding", false]], "grag.components.llm": [[18, "module-grag.components.llm", false]], "grag.components.multivec_retriever": [[18, "module-grag.components.multivec_retriever", false]], "grag.components.parse_pdf": [[18, "module-grag.components.parse_pdf", false]], "grag.components.prompt": [[18, "module-grag.components.prompt", false]], "grag.components.text_splitter": [[18, "module-grag.components.text_splitter", false]], "grag.components.utils": [[18, "module-grag.components.utils", false]], "grag.components.vectordb": [[19, "module-grag.components.vectordb", false]], "grag.components.vectordb.base": [[19, "module-grag.components.vectordb.base", false]], "grag.components.vectordb.chroma_client": [[19, "module-grag.components.vectordb.chroma_client", false]], "grag.components.vectordb.deeplake_client": [[19, "module-grag.components.vectordb.deeplake_client", false]], "grag.prompts": [[20, "module-grag.prompts", false]], "grag.quantize": [[21, "module-grag.quantize", false]], "grag.quantize.quantize": [[21, "module-grag.quantize.quantize", false]], "grag.quantize.utils": [[21, "module-grag.quantize.utils", false]], "grag.rag": [[22, "module-grag.rag", false]], "grag.rag.basic_rag": [[22, "module-grag.rag.basic_rag", false]], "hf_pipeline() (grag.components.llm.llm method)": [[18, "grag.components.llm.LLM.hf_pipeline", false]], "host (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.host", false]], "id_gen() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.id_gen", false]], "id_key (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.id_key", false]], "image_output_dir (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.image_output_dir", false]], "infer_table_structure (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.infer_table_structure", false]], "ingest() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.ingest", false]], "input_keys (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.input_keys", false], [18, "id1", false]], "input_keys (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.input_keys", false], [18, "id8", false]], "langchain_client (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.langchain_client", false]], "langchain_client (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.langchain_client", false]], "language (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.language", false]], "language (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.language", false], [18, "id9", false]], "llama_cpp() (grag.components.llm.llm method)": [[18, "grag.components.llm.LLM.llama_cpp", false]], "llm (class in grag.components.llm)": [[18, "grag.components.llm.LLM", false]], "llm_kwargs (grag.rag.basic_rag.basicrag attribute)": [[22, "grag.rag.basic_rag.BasicRAG.llm_kwargs", false]], "llm_type (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.llm_type", false]], "llm_type (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.llm_type", false], [18, "id10", false]], "load() (grag.components.prompt.prompt class method)": [[18, "grag.components.prompt.Prompt.load", false]], "load_file() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.load_file", false]], "load_model() (grag.components.llm.llm method)": [[18, "grag.components.llm.LLM.load_model", false]], "max_new_tokens (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.max_new_tokens", false]], "model_computed_fields (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.model_computed_fields", false]], "model_computed_fields (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.model_computed_fields", false]], "model_config (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.model_config", false]], "model_config (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.model_config", false]], "model_fields (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.model_fields", false]], "model_fields (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.model_fields", false]], "model_name (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.model_name", false]], "model_name (grag.components.llm.llm property)": [[18, "id0", false]], "model_name (grag.rag.basic_rag.basicrag attribute)": [[22, "grag.rag.basic_rag.BasicRAG.model_name", false]], "model_name (grag.rag.basic_rag.basicrag property)": [[22, "id1", false]], "model_path (grag.components.llm.llm property)": [[18, "grag.components.llm.LLM.model_path", false]], "module": [[17, "module-grag", false], [18, "module-grag.components", false], [18, "module-grag.components.embedding", false], [18, "module-grag.components.llm", false], [18, "module-grag.components.multivec_retriever", false], [18, "module-grag.components.parse_pdf", false], [18, "module-grag.components.prompt", false], [18, "module-grag.components.text_splitter", false], [18, "module-grag.components.utils", false], [19, "module-grag.components.vectordb", false], [19, "module-grag.components.vectordb.base", false], [19, "module-grag.components.vectordb.chroma_client", false], [19, "module-grag.components.vectordb.deeplake_client", false], [20, "module-grag.prompts", false], [21, "module-grag.quantize", false], [21, "module-grag.quantize.quantize", false], [21, "module-grag.quantize.utils", false], [22, "module-grag.rag", false], [22, "module-grag.rag.basic_rag", false]], "n_batch (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.n_batch", false]], "n_ctx (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.n_ctx", false]], "n_gpu_layers (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.n_gpu_layers", false]], "name (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.name", false]], "name (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.name", false], [18, "id11", false]], "namespace (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.namespace", false]], "output_keys (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.output_keys", false]], "output_parser() (grag.rag.basic_rag.basicrag static method)": [[22, "grag.rag.basic_rag.BasicRAG.output_parser", false]], "parsepdf (class in grag.components.parse_pdf)": [[18, "grag.components.parse_pdf.ParsePDF", false]], "partition() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.partition", false]], "port (grag.components.vectordb.chroma_client.chromaclient attribute)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.port", false]], "prefix (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.prefix", false], [18, "id4", false]], "process_images() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.process_images", false]], "process_tables() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.process_tables", false]], "process_text() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.process_text", false]], "prompt (class in grag.components.prompt)": [[18, "grag.components.prompt.Prompt", false]], "prompt (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.prompt", false]], "prompt_matcher() (grag.rag.basic_rag.basicrag method)": [[22, "grag.rag.basic_rag.BasicRAG.prompt_matcher", false]], "quantize_model() (in module grag.quantize.utils)": [[21, "grag.quantize.utils.quantize_model", false]], "refine_call() (grag.rag.basic_rag.basicrag method)": [[22, "grag.rag.basic_rag.BasicRAG.refine_call", false]], "retriever (class in grag.components.multivec_retriever)": [[18, "grag.components.multivec_retriever.Retriever", false]], "retriever (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.retriever", false]], "retriever_kwargs (grag.rag.basic_rag.basicrag attribute)": [[22, "grag.rag.basic_rag.BasicRAG.retriever_kwargs", false]], "save() (grag.components.prompt.prompt method)": [[18, "grag.components.prompt.Prompt.save", false]], "single_text_out (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.single_text_out", false]], "source (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.source", false]], "source (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.source", false], [18, "id12", false]], "split_docs() (grag.components.multivec_retriever.retriever method)": [[18, "grag.components.multivec_retriever.Retriever.split_docs", false]], "splitter (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.splitter", false]], "store (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.store", false]], "store_path (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.store_path", false]], "store_path (grag.components.vectordb.deeplake_client.deeplakeclient attribute)": [[19, "grag.components.vectordb.deeplake_client.DeepLakeClient.store_path", false]], "strategy (grag.components.parse_pdf.parsepdf attribute)": [[18, "grag.components.parse_pdf.ParsePDF.strategy", false]], "stuff_call() (grag.rag.basic_rag.basicrag method)": [[22, "grag.rag.basic_rag.BasicRAG.stuff_call", false]], "stuff_docs() (grag.rag.basic_rag.basicrag static method)": [[22, "grag.rag.basic_rag.BasicRAG.stuff_docs", false]], "stuff_docs() (in module grag.components.utils)": [[18, "grag.components.utils.stuff_docs", false]], "suffix (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.suffix", false], [18, "id5", false]], "task (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.task", false]], "task (grag.components.prompt.fewshotprompt attribute)": [[18, "grag.components.prompt.FewShotPrompt.task", false]], "task (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.task", false], [18, "id13", false]], "task (grag.rag.basic_rag.basicrag attribute)": [[22, "grag.rag.basic_rag.BasicRAG.task", false]], "task (grag.rag.basic_rag.basicrag property)": [[22, "id2", false]], "temperature (grag.components.llm.llm attribute)": [[18, "grag.components.llm.LLM.temperature", false]], "template (grag.components.prompt.prompt attribute)": [[18, "grag.components.prompt.Prompt.template", false]], "test_connection() (grag.components.vectordb.chroma_client.chromaclient method)": [[19, "grag.components.vectordb.chroma_client.ChromaClient.test_connection", false]], "text_concat() (grag.components.parse_pdf.parsepdf method)": [[18, "grag.components.parse_pdf.ParsePDF.text_concat", false]], "text_splitter (grag.components.text_splitter.textsplitter attribute)": [[18, "grag.components.text_splitter.TextSplitter.text_splitter", false]], "textsplitter (class in grag.components.text_splitter)": [[18, "grag.components.text_splitter.TextSplitter", false]], "top_k (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.top_k", false]], "validate_doc_chain() (grag.components.prompt.prompt class method)": [[18, "grag.components.prompt.Prompt.validate_doc_chain", false]], "validate_examples() (grag.components.prompt.fewshotprompt class method)": [[18, "grag.components.prompt.FewShotPrompt.validate_examples", false]], "validate_input_keys() (grag.components.prompt.prompt class method)": [[18, "grag.components.prompt.Prompt.validate_input_keys", false]], "validate_output_keys() (grag.components.prompt.fewshotprompt class method)": [[18, "grag.components.prompt.FewShotPrompt.validate_output_keys", false]], "validate_task() (grag.components.prompt.prompt class method)": [[18, "grag.components.prompt.Prompt.validate_task", false]], "vectordb (class in grag.components.vectordb.base)": [[19, "grag.components.vectordb.base.VectorDB", false]], "vectordb (grag.components.multivec_retriever.retriever attribute)": [[18, "grag.components.multivec_retriever.Retriever.vectordb", false]]}, "objects": {"": [[17, 0, 0, "-", "grag"]], "grag": [[18, 0, 0, "-", "components"], [20, 0, 0, "-", "prompts"], [21, 0, 0, "-", "quantize"], [22, 0, 0, "-", "rag"]], "grag.components": [[18, 0, 0, "-", "embedding"], [18, 0, 0, "-", "llm"], [18, 0, 0, "-", "multivec_retriever"], [18, 0, 0, "-", "parse_pdf"], [18, 0, 0, "-", "prompt"], [18, 0, 0, "-", "text_splitter"], [18, 0, 0, "-", "utils"], [19, 0, 0, "-", "vectordb"]], "grag.components.embedding": [[18, 1, 1, "", "Embedding"]], "grag.components.embedding.Embedding": [[18, 2, 1, "", "embedding_function"], [18, 2, 1, "", "embedding_model"], [18, 2, 1, "", "embedding_type"]], "grag.components.llm": [[18, 1, 1, "", "LLM"]], "grag.components.llm.LLM": [[18, 2, 1, "", "device_map"], [18, 3, 1, "", "hf_pipeline"], [18, 3, 1, "", "llama_cpp"], [18, 3, 1, "", "load_model"], [18, 2, 1, "", "max_new_tokens"], [18, 4, 1, "id0", "model_name"], [18, 4, 1, "", "model_path"], [18, 2, 1, "", "n_batch"], [18, 2, 1, "", "n_ctx"], [18, 2, 1, "", "n_gpu_layers"], [18, 2, 1, "", "task"], [18, 2, 1, "", "temperature"]], "grag.components.multivec_retriever": [[18, 1, 1, "", "Retriever"]], "grag.components.multivec_retriever.Retriever": [[18, 3, 1, "", "aadd_docs"], [18, 3, 1, "", "add_docs"], [18, 3, 1, "", "aget_chunk"], [18, 3, 1, "", "aget_doc"], [18, 3, 1, "", "aingest"], [18, 3, 1, "", "gen_doc_ids"], [18, 3, 1, "", "get_chunk"], [18, 3, 1, "", "get_doc"], [18, 3, 1, "", "get_docs_from_chunks"], [18, 3, 1, "", "id_gen"], [18, 2, 1, "", "id_key"], [18, 3, 1, "", "ingest"], [18, 2, 1, "", "namespace"], [18, 2, 1, "", "retriever"], [18, 3, 1, "", "split_docs"], [18, 2, 1, "", "splitter"], [18, 2, 1, "", "store"], [18, 2, 1, "", "store_path"], [18, 2, 1, "", "top_k"], [18, 2, 1, "", "vectordb"]], "grag.components.parse_pdf": [[18, 1, 1, "", "ParsePDF"]], "grag.components.parse_pdf.ParsePDF": [[18, 2, 1, "", "add_caption_first"], [18, 2, 1, "", "add_captions_to_blocks"], [18, 2, 1, "", "add_captions_to_text"], [18, 3, 1, "", "classify"], [18, 2, 1, "", "extract_image_block_types"], [18, 2, 1, "", "extract_images"], [18, 2, 1, "", "image_output_dir"], [18, 2, 1, "", "infer_table_structure"], [18, 3, 1, "", "load_file"], [18, 3, 1, "", "partition"], [18, 3, 1, "", "process_images"], [18, 3, 1, "", "process_tables"], [18, 3, 1, "", "process_text"], [18, 2, 1, "", "single_text_out"], [18, 2, 1, "", "strategy"], [18, 3, 1, "", "text_concat"]], "grag.components.prompt": [[18, 1, 1, "", "FewShotPrompt"], [18, 1, 1, "", "Prompt"]], "grag.components.prompt.FewShotPrompt": [[18, 2, 1, "", "doc_chain"], [18, 2, 1, "id2", "example_template"], [18, 2, 1, "id3", "examples"], [18, 2, 1, "", "filepath"], [18, 2, 1, "id1", "input_keys"], [18, 2, 1, "", "language"], [18, 2, 1, "", "llm_type"], [18, 2, 1, "", "model_computed_fields"], [18, 2, 1, "", "model_config"], [18, 2, 1, "", "model_fields"], [18, 2, 1, "", "name"], [18, 2, 1, "", "output_keys"], [18, 2, 1, "id4", "prefix"], [18, 2, 1, "", "source"], [18, 2, 1, "id5", "suffix"], [18, 2, 1, "", "task"], [18, 3, 1, "", "validate_examples"], [18, 3, 1, "", "validate_output_keys"]], "grag.components.prompt.Prompt": [[18, 2, 1, "id6", "doc_chain"], [18, 2, 1, "id7", "filepath"], [18, 3, 1, "", "format"], [18, 2, 1, "id8", "input_keys"], [18, 2, 1, "id9", "language"], [18, 2, 1, "id10", "llm_type"], [18, 3, 1, "", "load"], [18, 2, 1, "", "model_computed_fields"], [18, 2, 1, "", "model_config"], [18, 2, 1, "", "model_fields"], [18, 2, 1, "id11", "name"], [18, 2, 1, "", "prompt"], [18, 3, 1, "", "save"], [18, 2, 1, "id12", "source"], [18, 2, 1, "id13", "task"], [18, 2, 1, "", "template"], [18, 3, 1, "", "validate_doc_chain"], [18, 3, 1, "", "validate_input_keys"], [18, 3, 1, "", "validate_task"]], "grag.components.text_splitter": [[18, 1, 1, "", "TextSplitter"]], "grag.components.text_splitter.TextSplitter": [[18, 2, 1, "", "chunk_overlap"], [18, 2, 1, "", "chunk_size"], [18, 2, 1, "", "text_splitter"]], "grag.components.utils": [[18, 5, 1, "", "find_config_path"], [18, 5, 1, "", "get_config"], [18, 5, 1, "", "stuff_docs"]], "grag.components.vectordb": [[19, 0, 0, "-", "base"], [19, 0, 0, "-", "chroma_client"], [19, 0, 0, "-", "deeplake_client"]], "grag.components.vectordb.base": [[19, 1, 1, "", "VectorDB"]], "grag.components.vectordb.base.VectorDB": [[19, 3, 1, "", "aadd_docs"], [19, 3, 1, "", "add_docs"], [19, 3, 1, "", "aget_chunk"], [19, 3, 1, "", "delete"], [19, 3, 1, "", "get_chunk"]], "grag.components.vectordb.chroma_client": [[19, 1, 1, "", "ChromaClient"]], "grag.components.vectordb.chroma_client.ChromaClient": [[19, 3, 1, "", "aadd_docs"], [19, 3, 1, "", "add_docs"], [19, 3, 1, "", "aget_chunk"], [19, 2, 1, "", "client"], [19, 2, 1, "", "collection"], [19, 2, 1, "", "collection_name"], [19, 3, 1, "", "delete"], [19, 2, 1, "", "embedding_function"], [19, 2, 1, "", "embedding_model"], [19, 2, 1, "", "embedding_type"], [19, 3, 1, "", "get_chunk"], [19, 2, 1, "", "host"], [19, 2, 1, "", "langchain_client"], [19, 2, 1, "", "port"], [19, 3, 1, "", "test_connection"]], "grag.components.vectordb.deeplake_client": [[19, 1, 1, "", "DeepLakeClient"]], "grag.components.vectordb.deeplake_client.DeepLakeClient": [[19, 3, 1, "", "aadd_docs"], [19, 3, 1, "", "add_docs"], [19, 3, 1, "", "aget_chunk"], [19, 2, 1, "", "client"], [19, 2, 1, "", "collection"], [19, 3, 1, "", "delete"], [19, 2, 1, "", "embedding_function"], [19, 2, 1, "", "embedding_model"], [19, 2, 1, "", "embedding_type"], [19, 3, 1, "", "get_chunk"], [19, 2, 1, "", "langchain_client"], [19, 2, 1, "", "store_path"]], "grag.quantize": [[21, 0, 0, "-", "quantize"], [21, 0, 0, "-", "utils"]], "grag.quantize.utils": [[21, 5, 1, "", "building_llamacpp"], [21, 5, 1, "", "fetch_model_repo"], [21, 5, 1, "", "get_llamacpp_repo"], [21, 5, 1, "", "quantize_model"]], "grag.rag": [[22, 0, 0, "-", "basic_rag"]], "grag.rag.basic_rag": [[22, 1, 1, "", "BasicRAG"]], "grag.rag.basic_rag.BasicRAG": [[22, 2, 1, "", "custom_prompt"], [22, 4, 1, "id0", "doc_chain"], [22, 2, 1, "", "llm_kwargs"], [22, 4, 1, "id1", "model_name"], [22, 3, 1, "", "output_parser"], [22, 3, 1, "", "prompt_matcher"], [22, 3, 1, "", "refine_call"], [22, 2, 1, "", "retriever_kwargs"], [22, 3, 1, "", "stuff_call"], [22, 3, 1, "", "stuff_docs"], [22, 4, 1, "id2", "task"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "method", "Python method"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:method", "4": "py:property", "5": "py:function"}, "terms": {"": 18, "0": [6, 8, 9, 18, 25], "00": [6, 9, 25], "000": [6, 9], "001": 25, "01": 25, "03": 25, "036": 25, "06": 25, "1": [8, 9, 18], "1024": 18, "12": 25, "13b": 18, "2": [2, 16, 18], "237": 25, "275": 25, "3": [8, 15, 18], "3f": 8, "400": 18, "484": 25, "5": [6, 25], "5000": 18, "6000": 18, "8000": [16, 19], "8c9040b0b5cd4d7cbc2e737da1b24ebf": 18, "935": 25, "A": [8, 15, 18, 19, 21], "By": [13, 15, 16], "For": [3, 4, 12, 13, 14, 15, 16], "If": [14, 15, 16, 18], "In": 13, "It": [18, 23], "OR": 16, "The": [8, 15, 18, 19, 21, 23], "To": [11, 15, 16], "_": [], "__fields__": 18, "__file__": [2, 16], "__init__": 8, "__main__": [3, 4, 8], "__name__": [3, 4, 8], "_build": [], "_queri": 8, "_static": [], "_top_k": 8, "aadd_doc": [17, 18, 19], "abc": 19, "about": [8, 18], "abov": [14, 18], "abstract": 19, "accur": [13, 15], "accuraci": [13, 15, 18], "activ": 8, "add": [15, 18, 19], "add_caption_first": [15, 17, 18], "add_captions_to_block": [15, 17, 18], "add_captions_to_text": [15, 17, 18], "add_doc": [17, 18, 19], "addit": 18, "addition": 15, "address": [8, 15, 19], "adjust": 15, "after": [14, 15, 18], "aget_chunk": [17, 18, 19], "aget_doc": [17, 18], "aingest": [2, 17, 18], "algorithm": 15, "alik": 8, "aliv": 19, "all": [5, 7, 15, 18, 19, 20, 25], "allow": 13, "also": [18, 23], "alt": [], "altern": 2, "alwai": 18, "an": [7, 13, 14, 23], "ani": [13, 15, 16, 18, 23], "annot": 18, "answer": [0, 1], "anyth": 8, "api": [13, 19], "app": 8, "append": 15, "appli": 21, "applic": 8, "approach": 13, "appropri": 13, "ar": [12, 14, 15, 16], "area": 15, "arg": 8, "argument": [16, 18, 22], "artifici": 8, "arxiv": 19, "async": [2, 18, 19], "asynchron": [2, 18, 19], "asyncio": 2, "attempt": 21, "attribut": [8, 11], "augment": [11, 23], "auth": 14, "auto": 18, "auto_exampl": [6, 9], "avail": 18, "awar": 18, "backend": 8, "bar": 19, "base": [0, 1, 8, 15, 16, 17, 18, 22], "base_dir": 18, "basemodel": 18, "basi": 22, "basic": [0, 1, 6, 10, 13, 17, 23, 24, 25], "basic_rag": [0, 1, 3, 4, 22], "basicrag": [0, 1, 3, 4, 17, 22, 24], "basicrag_customprompt": [0, 6, 25], "basicrag_fewshotprompt": [1, 6, 25], "basicrag_ingest": [2, 6, 25], "basicrag_refin": [3, 6, 25], "basicrag_stuff": [4, 6, 25], "batch": 18, "bbox": 15, "befor": [15, 18], "behavior": 15, "being": 18, "below": [1, 5, 7, 14], "better": [15, 18], "block": [15, 18], "blog": 8, "bool": [15, 18, 19], "both": [13, 15, 16], "bound": 15, "box": 15, "build": 21, "building_llamacpp": [17, 21, 24], "bulb": 8, "button": 8, "call": [15, 18], "call_func": 22, "came": 1, "can": [12, 13, 14, 15, 16], "cannot": 18, "capabl": 13, "capstone_5": [18, 19], "caption": [15, 18], "chain": [5, 6, 18, 22], "chang": [14, 16], "chat": 18, "check": [8, 14], "check_connect": 8, "chroma": [2, 8, 17, 18, 23], "chroma_cli": [2, 19], "chromacli": [2, 18, 19], "chromadb": 19, "chunk": [8, 18, 19], "chunk_overlap": [17, 18], "chunk_siz": [17, 18], "ci_test": 2, "class": [8, 15, 16, 18, 19, 22], "classifi": [17, 18], "classmethod": 18, "classvar": 18, "client": [2, 3, 4, 8, 16, 17, 18], "client_kwarg": 18, "clientserv": 16, "clone": [12, 14, 21], "cmake": 21, "co": 21, "code": [0, 1, 2, 3, 4, 5, 7, 8], "collect": [8, 18, 19], "collection_nam": [2, 3, 4, 8, 18, 19], "combin": [15, 18], "command": 14, "complet": 13, "completedprocess": 21, "complex": 11, "compon": [0, 1, 2, 3, 4, 8, 16, 17, 19, 23, 24], "comput": 18, "computedfieldinfo": 18, "concat": 18, "concaten": [18, 22], "concern": 13, "config": [12, 14, 16, 18], "configdict": 18, "configpars": 18, "configur": 18, "conform": 18, "connect": [8, 18, 19], "consid": 15, "contain": [15, 16, 18, 20, 21], "content": [15, 24], "context": [0, 1, 18], "contextu": 13, "control": 15, "convolut": 1, "cookbook": [0, 1, 2, 3, 4, 8, 16, 23, 25], "copi": [14, 21], "core": 19, "corpu": 13, "correspond": [15, 18, 19], "cosin": [18, 19], "cost": 13, "could": [8, 15], "count": 8, "cpp": [14, 18, 21], "creat": 15, "crop": 15, "crucial": 16, "current": [16, 18], "current_path": 18, "custom": [5, 6, 12, 18], "custom_few_shot_prompt": 1, "custom_prompt": [0, 1, 17, 18, 22], "data": [2, 11, 13, 18, 19], "databas": [2, 13, 16, 18, 19], "decor": 22, "deep": 13, "deeplak": [17, 18, 23], "deeplake_cli": [2, 3, 4, 19], "deeplakecli": [2, 3, 4, 16, 18, 19], "def": 8, "default": [15, 16, 18, 19, 21, 22], "defin": 18, "delet": [18, 19], "demonstr": [0, 1, 2, 3, 4, 5, 7, 8], "deriv": 19, "design": 15, "detail": [8, 14, 16, 18], "detect": 15, "develop": [12, 15], "devic": 18, "device_map": [17, 18], "dict": [18, 22], "dictionari": [15, 18], "differ": [15, 20], "dimension": 16, "dir_path": [2, 16, 18], "directori": [14, 15, 18, 21], "displai": 8, "doc": [8, 18, 19, 22], "doc_chain": [0, 1, 3, 4, 17, 18, 22], "doc_id": 18, "document": [5, 6, 13, 15, 18, 19, 22], "doe": [13, 14, 18], "domain": 13, "done": 15, "download": [0, 1, 2, 3, 4, 5, 7, 8, 14, 21], "dry_run": 18, "dure": [15, 18], "e": 12, "each": [15, 18, 19], "easi": 23, "easier": 16, "easiest": 14, "either": [14, 16], "element": [15, 18], "elif": 2, "els": [8, 19], "embed": [8, 11, 17, 19, 24], "embedding_funct": [17, 18, 19], "embedding_model": [8, 16, 17, 18, 19], "embedding_modelnam": 19, "embedding_typ": [8, 16, 17, 18, 19], "emploi": 15, "en": 18, "enabl": 13, "end": [0, 1, 2, 3, 4, 8, 23], "enhanc": 13, "ensur": 18, "enter": [8, 14], "enumer": 8, "etc": [14, 18], "everi": 15, "exampl": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 17, 18, 25], "example_templ": [1, 17, 18], "exclud": 18, "execut": [6, 9, 18, 25], "exist": 13, "expand": 8, "explain": [], "explicitli": 16, "extern": 13, "extract": [15, 18], "extract_imag": [15, 17, 18], "extract_image_block_typ": [15, 17, 18], "extract_text": 15, "extracted_t": 15, "extracted_tables_in_pag": 15, "extracted_text": 15, "f": 8, "f16": 18, "face": 18, "factual": 13, "fals": [8, 18, 19], "fetch_model_repo": [17, 21, 24], "few": [5, 6, 18], "fewshotprompt": [1, 17, 18, 22, 24], "field": 18, "fieldinfo": 18, "figur": [15, 18], "file": [6, 9, 12, 14, 15, 16, 18, 21, 25], "filenotfounderror": 18, "filepath": [17, 18], "final": [14, 15], "find": [8, 14, 15, 18], "find_config_path": [17, 18, 24], "find_tabl": 15, "first": 13, "flexibl": [14, 15], "float": [18, 19], "folder": 16, "follow": [0, 1, 14, 15], "form": [8, 14, 15], "form_submit_button": 8, "format": [17, 18, 22], "found": [15, 18], "from": [0, 1, 2, 3, 4, 6, 8, 9, 12, 13, 14, 15, 18, 19, 21, 25], "full": [0, 1, 2, 3, 4, 8], "function": [8, 15, 18, 19, 21], "further": 12, "futur": 15, "galleri": [0, 1, 2, 3, 4, 5, 7, 8, 25], "gate": 14, "gen_doc_id": [17, 18], "gener": [0, 1, 2, 3, 4, 5, 7, 8, 11, 16, 18, 23], "get": [14, 16, 18, 23], "get_chunk": [8, 17, 18, 19], "get_config": [17, 18, 24], "get_doc": [17, 18], "get_docs_from_chunk": [17, 18], "get_llamacpp_repo": [17, 21, 24], "get_search_result": 8, "getcwd": 8, "gguf": 14, "git": [12, 21], "given": [0, 1, 18, 21], "glob": 18, "glob_pattern": 18, "go": [0, 1, 2, 3, 4, 8], "gpu": 18, "grag": [0, 1, 2, 3, 4, 8, 11, 14, 18, 19, 21, 22], "green": 8, "gui": [9, 10, 23], "gui_jupyt": 7, "gui_python": 7, "guid": 13, "ha": [14, 15, 18], "handl": [15, 18], "has_result": 8, "have": [12, 16], "help": 13, "hexadecim": 18, "hf_pipelin": [17, 18], "hi_r": [15, 18], "high": 16, "hkunlp": 19, "hold": 8, "home": [8, 18, 19], "horizont": 15, "horizontal_strategi": 15, "host": [8, 16, 18, 19], "how": [0, 1, 2, 3, 4], "howev": 15, "html": 15, "http": 8, "httpclient": 19, "hug": 18, "huggingfac": [11, 16, 18, 21], "i": [1, 5, 7, 8, 13, 14, 15, 16, 18, 19, 21, 23], "id": [14, 18, 21], "id_gen": [17, 18], "id_kei": [17, 18], "identifi": 18, "illustr": [3, 4, 13], "imag": [15, 18], "image_output_dir": [15, 17, 18], "implement": [13, 23], "import": [0, 1, 2, 3, 4, 8], "improv": [13, 15], "includ": [15, 18], "incorpor": 13, "index": 23, "infer_table_structur": [15, 17, 18], "inform": [3, 4, 13], "ingest": [5, 6, 11, 17, 18], "ini": [12, 14, 16, 18], "initi": [8, 18], "input": [3, 4, 14, 18], "input_kei": [0, 1, 17, 18], "insert": 8, "instal": [11, 16, 23], "instanc": [8, 18, 21], "instruct": 14, "instructor": [16, 18, 19], "int": [18, 19], "integ": 19, "integr": 15, "integrag": 23, "intellig": 8, "interact": [8, 21], "interfac": 8, "introduc": 1, "involv": 15, "io": [8, 15], "ip": 19, "ipynb": [0, 1, 2, 3, 4, 8], "is_loc": 18, "issu": 15, "its": 18, "join": [18, 22], "jpg": [], "json": 18, "jupit": 1, "jupyt": [0, 1, 2, 3, 4, 5, 7, 8], "just": [14, 16], "k": 8, "kei": 18, "keyword": 22, "knowledg": 13, "kwarg": [18, 22], "langchain": [3, 4, 18, 19], "langchain_cli": [18, 19], "langchain_commun": 19, "langchain_cor": [18, 22], "languag": [13, 17, 18], "larg": [13, 18], "largest": 1, "layer": 18, "layout": 11, "learn": 13, "lecun": 1, "len": 8, "level": 21, "leverag": [13, 15], "librari": 15, "like": [1, 8, 18, 23], "limit": [11, 23], "line": 15, "link": 18, "list": [8, 15, 18, 19, 22], "llama": [14, 18, 21], "llama2": 18, "llama_cpp": [17, 18], "llamacpp": [11, 23], "llm": [11, 13, 17, 20, 22, 23, 24], "llm_kwarg": [17, 22], "llm_type": [17, 18], "load": [17, 18], "load_env": 18, "load_fil": [17, 18], "load_model": [17, 18], "load_prompt": 18, "local": [14, 16, 18, 23], "localfilestor": 18, "localhost": 19, "locat": [18, 21], "look": 8, "lost": 8, "m": 14, "machin": 13, "made": 12, "mai": 15, "main": 8, "make": [14, 16, 21, 23], "manag": [8, 18], "map": 18, "markdown": 8, "match": 22, "max_new_token": [17, 18], "maximum": 18, "mb": [6, 9, 25], "mem": [6, 9, 25], "metadata": [8, 18], "metadata_toggl": 8, "method": [13, 15, 18], "methodologi": [11, 23], "min_valu": 8, "min_words_vert": 15, "minimum": 15, "model": [8, 13, 16, 18, 19, 21, 22], "model_computed_field": [17, 18], "model_config": [17, 18], "model_dir_path": 21, "model_field": [17, 18], "model_nam": [14, 17, 18, 22], "model_path": [17, 18], "model_typ": 18, "modul": [23, 24], "more": [3, 4, 13, 14, 15, 16, 18], "moreov": [], "most": [15, 18, 19], "move": [16, 18], "much": [14, 16], "multi": 18, "multi_vector": 18, "multivec_retriev": [2, 3, 4, 8, 18], "multivectorretriev": 18, "n": [8, 18], "n_batch": [17, 18], "n_ctx": [17, 18], "n_gpu_lay": [17, 18], "name": [1, 8, 14, 17, 18, 19, 22], "namespac": [17, 18], "need": 15, "nest": 15, "network": [1, 13], "neural": [1, 13], "new": [13, 18], "new_pap": 2, "nn": [18, 22], "non": [], "none": [18, 19, 21, 22], "nonetyp": 18, "nosourc": 18, "note": 14, "notebook": [0, 1, 2, 3, 4, 5, 7, 8], "number": [8, 15, 18, 19], "number_input": 8, "o": 8, "object": [18, 22], "offer": [14, 23], "onc": 16, "one_to_on": 18, "onlin": 14, "open": 13, "oper": 21, "option": [8, 18, 21], "orchestr": 8, "organ": 15, "otherwis": 8, "our": [13, 16], "output": [15, 18, 19, 22], "output_dir": 21, "output_kei": [1, 17, 18], "output_pars": [17, 22], "over": 13, "overlap": 18, "overview": [11, 23], "overwrit": 18, "packag": 23, "page": [8, 15, 18, 22, 23], "page_cont": 8, "page_numb": 15, "pagehom": 8, "pair": 18, "paramet": [8, 18, 19, 21, 22], "parent": [2, 8, 16, 18], "pars": [11, 17, 23, 24], "parse_pdf": 18, "parsepdf": [15, 17, 18, 24], "parser": 18, "parser_kwarg": 18, "part": 15, "particularli": 13, "partit": [15, 17, 18], "pass": 18, "path": [2, 8, 14, 16, 18, 19, 21], "pathlib": [2, 8], "pattern": 18, "pdf": [2, 11, 16, 17, 23, 24], "pdfplumber": 15, "perform": 15, "phase": 12, "pip": 12, "pipelin": [3, 4, 13, 18], "place": [15, 18], "planet": 1, "point": 18, "port": [8, 16, 18, 19], "prefix": [1, 17, 18], "primarili": 15, "print": 19, "priorit": 18, "privaci": 13, "process": [13, 15, 16, 18], "process_imag": [17, 18], "process_t": [17, 18], "process_text": [17, 18], "produc": [13, 18], "progress": [18, 19], "project": 21, "prompt": [5, 6, 14, 17, 22, 23, 24], "prompt_match": [17, 22], "prompttempl": 18, "properti": [18, 22], "provid": [13, 14, 16, 18, 19, 20, 22, 23], "publish": 12, "pull": 21, "purpos": 15, "py": [0, 1, 2, 3, 4, 6, 8, 9, 25], "pydant": 18, "pypi": 12, "pytesseract": 15, "python": [0, 1, 2, 3, 4, 5, 7, 8, 14, 23], "q4_k_m": 14, "q5_k_m": [14, 18], "qa": [18, 22], "quantiz": [17, 18, 23, 24], "quantize_model": [17, 21, 24], "queri": [3, 4, 8, 18, 19], "question": [0, 1], "rag": [0, 1, 3, 4, 6, 10, 11, 16, 17, 23, 24, 25], "rag_jupyt": 5, "rag_python": 5, "rais": 18, "random": 19, "rang": 15, "re": [], "reach": 18, "read": 18, "read_onli": 19, "readm": 13, "recommend": 14, "recurs": 18, "red": 8, "ref": [], "refer": [3, 4, 13, 16, 18], "refin": [5, 6, 18, 22], "refine_cal": [17, 22], "refine_chain_langchain_illustr": [], "relev": 13, "relvant": 22, "render": 8, "render_search_form": 8, "render_search_result": 8, "render_sidebar": 8, "render_stat": 8, "replac": 18, "repo": 14, "repo_id": 21, "repositori": [12, 14, 21], "repr": 18, "repres": [8, 15], "requir": [13, 14, 15, 18], "respect": 18, "respons": [8, 13], "result": [8, 21], "retriev": [2, 3, 4, 9, 10, 11, 16, 17, 22, 23, 24], "retriever_app": [8, 9], "retriever_kwarg": [17, 22], "retriv": [7, 9], "return": [8, 15, 18, 19, 21, 22], "robust": 15, "root": [18, 21], "root_path": 21, "row": 15, "run": [2, 11, 16, 23], "run_chroma": 16, "same": 18, "sampl": 18, "save": [13, 15, 17, 18, 21], "score": [8, 18, 19], "script": [16, 18], "seamlessli": 15, "search": [8, 18, 23], "search_form": 8, "self": [8, 18, 19], "semant": 8, "sentenc": [18, 19], "seper": [18, 22], "server": 16, "servic": 13, "session_st": 8, "set": [8, 14, 15, 18], "sever": 15, "sh": 16, "shot": [5, 6, 18], "should": [14, 18, 19, 21], "show": [8, 18, 19], "sidebar": 8, "similar": [8, 18, 19], "similiar": 8, "simpl": [7, 8, 23], "sinc": [12, 13, 16], "singl": [15, 18, 19], "single_text_out": [15, 17, 18], "size": 18, "slow": 2, "smaller": 18, "so": 18, "soifjpaosiujposoifj": [], "solut": [13, 23], "sourc": [0, 1, 2, 3, 4, 5, 7, 8, 13, 16, 17, 18, 19, 21, 22], "specif": [13, 15], "specifi": [15, 18, 21], "sphinx": [0, 1, 2, 3, 4, 5, 7, 8], "sphx_glr_auto_examples_basicrag_customprompt": 25, "sphx_glr_auto_examples_basicrag_fewshotprompt": 25, "sphx_glr_auto_examples_basicrag_ingest": 25, "sphx_glr_auto_examples_basicrag_refin": 25, "sphx_glr_auto_examples_basicrag_stuff": 25, "spinner": 8, "split": 18, "split_doc": [17, 18], "splitter": [17, 24], "src": [12, 16], "st": 8, "start": [14, 16, 18, 23], "state": 8, "static": 22, "statist": 8, "statu": 19, "std_out": 18, "step": [8, 15], "storag": 18, "store": [11, 13, 17, 18, 19, 23], "store_path": [17, 18, 19], "str": [8, 15, 18, 19, 21, 22], "straightforward": 15, "strategi": [15, 17, 18], "streamlit": [7, 8], "stremlit": 8, "strength": 15, "string": [18, 19, 22], "stuff": [0, 1, 5, 6, 18, 22], "stuff_cal": [17, 22], "stuff_chain_langchain_illustr": [], "stuff_doc": [17, 18, 22, 24], "submit": 8, "submodul": [17, 18, 24], "subprocess": 21, "suffix": [1, 17, 18], "support": [11, 18, 19, 23], "sure": [14, 16], "sy": 8, "sync": 2, "synchron": 2, "system": 8, "tab": 8, "tab1": 8, "tab2": 8, "tabl": [11, 18], "table_as_html": [15, 18], "table_numb": 15, "take": 18, "task": [13, 17, 18, 22], "tax": 15, "techniqu": 13, "tell": 15, "temperatur": [17, 18], "templat": [0, 17, 18], "test": [2, 3, 4, 19], "test_connect": [8, 18, 19], "text": [13, 15, 17, 24], "text_concat": [17, 18], "text_input": 8, "text_splitt": [17, 18], "textsplitt": [17, 18, 24], "thank": 23, "them": 18, "thi": [0, 1, 2, 3, 4, 8, 13, 14, 15, 16, 18, 19, 22], "time": 18, "titl": 8, "toggl": 8, "token": [14, 18], "top": [8, 18, 19], "top_k": [8, 17, 18, 19], "total": [6, 9, 25], "tradit": 13, "tradition": 13, "transform": [18, 19], "travers": 18, "tree": 18, "true": [2, 3, 4, 8, 15, 18, 19], "tupl": 19, "two": 14, "type": [8, 15, 16, 18, 19, 22], "typic": 18, "ubuntu": [18, 19], "ui": 8, "under": 16, "union": 18, "uniqu": 18, "unstructur": 15, "until": 18, "up": [1, 8, 18], "updat": 21, "us": [0, 1, 2, 3, 4, 5, 7, 8, 11, 13, 15, 16, 18, 19, 21, 22], "user": 14, "util": [17, 24], "uuid": 18, "uuid5": 18, "v": 18, "v1": 18, "valid": 18, "validate_doc_chain": [17, 18], "validate_exampl": [17, 18], "validate_input_kei": [17, 18], "validate_output_kei": [17, 18], "validate_task": [17, 18], "valu": [8, 15, 18], "valueerror": 18, "variou": 23, "vector": [2, 11, 13, 18, 19, 23], "vectordb": [2, 3, 4, 8, 16, 17, 24], "vectorstor": [16, 19], "verbos": [18, 19], "vertical_strategi": 15, "volume_2k": [18, 19], "wai": [14, 23], "want": 14, "we": 12, "web": 8, "well": [13, 15], "were": 15, "what": [1, 8], "where": [14, 21], "wherea": 15, "whether": [15, 18], "which": [15, 18], "while": [3, 4, 15], "who": 1, "wider": 15, "width": [], "with_scor": [8, 18, 19], "word": 15, "work": 15, "wrapper": 19, "write": 8, "xl": [16, 19], "yann": 1, "yet": 12, "you": [14, 16], "your": 8, "your_collection_nam": 2, "yourself": 14, "zip": [5, 7]}, "titles": ["Custom Prompts", "Custom Few-Shot Prompts", "Document Ingestion", "Refine Chain", "Stuff Chain", "Basic-RAG Cookbooks", "Computation times", "Retriever-GUI Cookbooks", "Retriever GUI", "Computation times", "Cookbooks", "Get Started", "Installation", "GRAG Overview", "LLMs", "Parse PDF", "Vector Stores", "GRAG", "Components", "VectorDB", "Prompts", "Quantize", "RAG", "Welcome to GRAG\u2019s documentation!", "grag", "Computation times"], "titleterms": {"": 23, "To": 14, "attribut": 15, "augment": 13, "base": 19, "basic": [5, 22], "chain": [3, 4], "chroma": [16, 19], "client": 19, "complex": 15, "compon": 18, "comput": [6, 9, 25], "content": [17, 18, 19, 20, 21, 22, 23], "cookbook": [5, 7, 10], "custom": [0, 1], "data": 16, "deeplak": [16, 19], "document": [2, 23], "embed": [16, 18], "few": 1, "gener": 13, "get": 11, "grag": [13, 17, 23, 24], "gui": [7, 8], "how": 14, "huggingfac": 14, "indic": 23, "ingest": [2, 16], "instal": 12, "layout": 15, "limit": 15, "llamacpp": 14, "llm": [14, 18], "methodologi": 15, "model": 14, "modul": [17, 18, 19, 20, 21, 22], "overview": 13, "pars": [15, 18], "pdf": [15, 18], "prompt": [0, 1, 18, 20], "quantiz": [14, 21], "rag": [5, 13, 22], "refin": 3, "retriev": [7, 8, 13, 18], "run": 14, "shot": 1, "splitter": 18, "start": 11, "store": 16, "stuff": 4, "submodul": [19, 21, 22], "support": 16, "tabl": [15, 23], "text": 18, "time": [6, 9, 25], "us": 14, "util": [18, 21], "vector": 16, "vectordb": [18, 19], "welcom": 23}})
\ No newline at end of file
diff --git a/src/docs/_static/basic_RAG_pipeline.png b/src/docs/_static/basic_RAG_pipeline.png
new file mode 100644
index 0000000..9a9564c
Binary files /dev/null and b/src/docs/_static/basic_RAG_pipeline.png differ
diff --git a/src/docs/auto_examples/Basic-RAG/BasicRAG_CustomPrompt_codeobj.pickle b/src/docs/auto_examples/Basic-RAG/BasicRAG_CustomPrompt_codeobj.pickle
index be58574..14f2765 100644
Binary files a/src/docs/auto_examples/Basic-RAG/BasicRAG_CustomPrompt_codeobj.pickle and b/src/docs/auto_examples/Basic-RAG/BasicRAG_CustomPrompt_codeobj.pickle differ
diff --git a/src/docs/auto_examples/Basic-RAG/BasicRAG_ingest_codeobj.pickle b/src/docs/auto_examples/Basic-RAG/BasicRAG_ingest_codeobj.pickle
index 9c6b89b..e2ccc64 100644
Binary files a/src/docs/auto_examples/Basic-RAG/BasicRAG_ingest_codeobj.pickle and b/src/docs/auto_examples/Basic-RAG/BasicRAG_ingest_codeobj.pickle differ
diff --git a/src/docs/auto_examples/Basic-RAG/BasicRAG_refine_codeobj.pickle b/src/docs/auto_examples/Basic-RAG/BasicRAG_refine_codeobj.pickle
index 4f5082a..9c625ec 100644
Binary files a/src/docs/auto_examples/Basic-RAG/BasicRAG_refine_codeobj.pickle and b/src/docs/auto_examples/Basic-RAG/BasicRAG_refine_codeobj.pickle differ
diff --git a/src/docs/auto_examples/Basic-RAG/BasicRAG_stuff_codeobj.pickle b/src/docs/auto_examples/Basic-RAG/BasicRAG_stuff_codeobj.pickle
index 4f5082a..9c625ec 100644
Binary files a/src/docs/auto_examples/Basic-RAG/BasicRAG_stuff_codeobj.pickle and b/src/docs/auto_examples/Basic-RAG/BasicRAG_stuff_codeobj.pickle differ
diff --git a/src/docs/auto_examples/Retriver-GUI/retriever_app_codeobj.pickle b/src/docs/auto_examples/Retriver-GUI/retriever_app_codeobj.pickle
index ead1f48..5a82ffb 100644
Binary files a/src/docs/auto_examples/Retriver-GUI/retriever_app_codeobj.pickle and b/src/docs/auto_examples/Retriver-GUI/retriever_app_codeobj.pickle differ
diff --git a/src/docs/get_started.introduction.rst b/src/docs/get_started.introduction.rst
index 74c2090..d3c4197 100644
--- a/src/docs/get_started.introduction.rst
+++ b/src/docs/get_started.introduction.rst
@@ -5,7 +5,20 @@ GRAG provides an implementation of Retrieval-Augmented Generation that is comple
Since it does not use any external services or APIs, this enables a cost-saving solution as well a solution to data privacy concerns.
For more information, refer to `our readme `_.
-Retrieval-Augmented Generation
-##############################
+Retrieval-Augmented Generation (RAG)
+####################################
-Retrieval-Augmented Generation
\ No newline at end of file
+Retrieval-Augmented Generation (RAG) is a technique in machine learning that helps to enhance large-language models (LLM) by incorporating external data.
+
+In RAG, a model first retrieves relevant documents or data from a large corpus and then uses this information to guide the generation of new text. This approach allows the model to produce more informed, accurate, and contextually appropriate responses.
+
+By leveraging both the retrieval of existing knowledge and the generative capabilities of neural networks, RAG models can improve over traditional generation methods, particularly in tasks requiring deep domain-specific knowledge or factual accuracy.
+
+.. figure:: ../../_static/basic_RAG_pipeline.png
+ :width: 800
+ :alt: Basic-RAG Pipeline
+ :align: center
+
+ Illustration of a basic RAG pipeline
+
+Traditionally, it uses a vector database/vector store for both retrieval and generation processes.
diff --git a/src/docs/get_started.llms.rst b/src/docs/get_started.llms.rst
index c284755..b79074e 100644
--- a/src/docs/get_started.llms.rst
+++ b/src/docs/get_started.llms.rst
@@ -1,4 +1,4 @@
- `LLMs
+LLMs
=====
GRAG offers two ways to run LLMs locally:
@@ -17,10 +17,10 @@ provide an auth token*
To run LLMs using LlamaCPP
#############################
LlamaCPP requires models in the form of `.gguf` file. You can either download these model files online,
-or
+or **quantize** the model yourself following the instructions below.
-How to quantize models.
-************************
+How to quantize models
+***********************
To quantize the model, run:
``python -m grag.quantize.quantize``
@@ -34,4 +34,4 @@ After running the above command, user will be prompted with the following:
* If the user has the model downloaded locally, then user will be instructed to copy the model and input the name of the model directory.
-3.Finally, the user will be prompted to enter **quantization** settings (recommended Q5_K_M or Q4_K_M, etc.). For more details, check `llama.cpp/examples/quantize/quantize.cpp `_.
+3. Finally, the user will be prompted to enter **quantization** settings (recommended Q5_K_M or Q4_K_M, etc.). For more details, check `llama.cpp/examples/quantize/quantize.cpp `_.
diff --git a/src/docs/get_started.parse_pdf.rst b/src/docs/get_started.parse_pdf.rst
index 94fd2cf..4ace62b 100644
--- a/src/docs/get_started.parse_pdf.rst
+++ b/src/docs/get_started.parse_pdf.rst
@@ -1,42 +1,61 @@
- `Parse Pdf
-=====
+Parse PDF
+=========
The parsing and partitioning were primarily done using the unstructured.io library, which is designed for this purpose. However, for PDFs with complex layouts, such as nested tables or tax forms, the pdfplumber and pytesseract libraries were employed to improve the parsing accuracy.
The class has several attributes that control the behavior of the parsing and partitioning process.
Attributes
-#############################
-* single_text_out (bool): If True, all text elements are combined into a single output document. The default value is True.
-* strategy (str): The strategy for PDF partitioning. The default is "hi_res" for better accuracy
-* extract_image_block_types (list): A list of elements to be extracted as image blocks. By default, it includes "Image" and "Table".The default value is True.
-* infer_table_structure (bool): Whether to extract tables during partitioning. The default value is True.
-* extract_images (bool): Whether to extract images. The default value is True.
-* image_output_dir (str): The directory to save extracted images, if any.
-* add_captions_to_text (bool): Whether to include figure captions in the text output. The default value is True.
-* add_captions_to_blocks (bool): Whether to add captions to table and image blocks. The default value is True.
-* add_caption_first (bool): Whether to place captions before their corresponding image or table in the output. The default value is True.
-* table_as_html (bool): Whether to represent tables as HTML.
+##########
+
+- single_text_out (bool): If True, all text elements are combined into a single output document. The default value is True.
+
+- strategy (str): The strategy for PDF partitioning. The default is "hi_res" for better accuracy
+
+- extract_image_block_types (list): A list of elements to be extracted as image blocks. By default, it includes "Image" and "Table".The default value is True.
+
+- infer_table_structure (bool): Whether to extract tables during partitioning. The default value is True.
+
+- extract_images (bool): Whether to extract images. The default value is True.
+
+- image_output_dir (str): The directory to save extracted images, if any.
+
+- add_captions_to_text (bool): Whether to include figure captions in the text output. The default value is True.
+
+- add_captions_to_blocks (bool): Whether to add captions to table and image blocks. The default value is True.
+
+- add_caption_first (bool): Whether to place captions before their corresponding image or table in the output. The default value is True.
+
+- table_as_html (bool): Whether to represent tables as HTML.
Parsing Complex PDF Layouts
+###########################
+
While unstructured.io performed well in parsing PDFs with straightforward layouts, PDFs with complex layouts, such as nested tables or tax forms, were not parsed accurately. To address this issue, the pdfplumber and pytesseract libraries were employed.
Table Parsing Methodology
-#############################
+=========================
+
For each page in the PDF file, the find_tables method is called with specific table settings to find the tables on that page. The table settings used are:
-* "vertical_strategy": "text": This setting tells the function to detect tables based on the text content.
-* "horizontal_strategy": "lines": This setting tells the function to detect tables based on the horizontal lines.
-* "min_words_vertical": 3: This setting specifies the minimum number of words required to consider a row as part of a table.
-For each table found on the page, the following steps are performed:
-======
+- ``"vertical_strategy": "text"``: This setting tells the function to detect tables based on the text content.
+
+- ``"horizontal_strategy": "lines"``: This setting tells the function to detect tables based on the horizontal lines.
+
+- ``"min_words_vertical": 3``: This setting specifies the minimum number of words required to consider a row as part of a table.
+
+**For each table found on the page, the following steps are performed:**
+
1. The table area is cropped from the page using the crop method and the bbox (bounding box) of the table.
-2. The text content of the cropped table area is extracted using the extract_text method with layout=True.
-3. A dictionary is created with the table_number and extracted_text of the table, and it is appended to the extracted_tables_in_page list.
-After processing all the tables on the page, a dictionary is created with the page_number and the list of extracted_tables_in_page, and it is appended to the extracted_tables list.
+
+2. The text content of the cropped table area is extracted using the `extract_text` method with `layout=True`.
+
+3. A dictionary is created with the `table_number` and `extracted_text` of the table, and it is appended to the `extracted_tables_in_page` list.
+After processing all the tables on the page, a dictionary is created with the `page_number` and the list of `extracted_tables_in_page`, and it is appended to the `extracted_tables` list.
Finally, the extracted_tables list is returned, which contains all the extracted tables from the PDF file, organized by page and table number.
Limitations
-#############################
-While the table parsing methodology using pdfplumber could process most tables, it could not parse every table layout accurately. The table settings need to be adjusted for different types of table layouts. Additionally, pdfplumber could not extract figure captions, whereas unstructured.io could.
+===========
+
+While the table parsing methodology using `pdfplumber` could process most tables, it could not parse every table layout accurately. The table settings need to be adjusted for different types of table layouts. Additionally, pdfplumber could not extract figure captions, whereas `unstructured.io` could.
Future work may involve developing a more robust and flexible table parsing algorithm that can handle a wider range of table layouts and integrate seamlessly with the ParsePDF class to leverage the strengths of both unstructured.io and pdfplumber libraries.
diff --git a/src/docs/get_started.rst b/src/docs/get_started.rst
index ca80073..19a2d99 100644
--- a/src/docs/get_started.rst
+++ b/src/docs/get_started.rst
@@ -5,6 +5,7 @@ Get Started
get_started.introduction
get_started.installation
+ get_started.parse_pdf
get_started.llms
get_started.vectordb
diff --git a/src/docs/get_started.vectordb.rst b/src/docs/get_started.vectordb.rst
index 93e7518..02f83c9 100644
--- a/src/docs/get_started.vectordb.rst
+++ b/src/docs/get_started.vectordb.rst
@@ -1,5 +1,3 @@
-.. _Vector Stores:
-
Vector Stores
===============
diff --git a/src/grag/components/embedding.py b/src/grag/components/embedding.py
index 33f5068..a29e2e2 100644
--- a/src/grag/components/embedding.py
+++ b/src/grag/components/embedding.py
@@ -1,8 +1,8 @@
"""Class for embedding.
-This module provides.
+This module provides:
-- **Embedding**
+- Embedding
"""
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
diff --git a/src/grag/components/multivec_retriever.py b/src/grag/components/multivec_retriever.py
index dd9d240..0f59f49 100644
--- a/src/grag/components/multivec_retriever.py
+++ b/src/grag/components/multivec_retriever.py
@@ -1,6 +1,7 @@
"""Class for retriever.
This module provides:
+
- Retriever
"""
diff --git a/src/grag/components/parse_pdf.py b/src/grag/components/parse_pdf.py
index dc30f8a..4344566 100644
--- a/src/grag/components/parse_pdf.py
+++ b/src/grag/components/parse_pdf.py
@@ -1,6 +1,7 @@
"""Classes for parsing files.
This module provides:
+
- ParsePDF
"""
diff --git a/src/grag/components/prompt.py b/src/grag/components/prompt.py
index 3bd0f2b..c6a7fe3 100644
--- a/src/grag/components/prompt.py
+++ b/src/grag/components/prompt.py
@@ -1,7 +1,9 @@
"""Classes for prompts.
This module provides:
+
- Prompt - for generic prompts
+
- FewShotPrompt - for few-shot prompts
"""
diff --git a/src/grag/components/text_splitter.py b/src/grag/components/text_splitter.py
index cc177d9..48d2d04 100644
--- a/src/grag/components/text_splitter.py
+++ b/src/grag/components/text_splitter.py
@@ -1,6 +1,7 @@
"""Class for splitting/chunking text.
This module provides:
+
- TextSplitter
"""
diff --git a/src/grag/components/utils.py b/src/grag/components/utils.py
index 958dc35..44c971d 100644
--- a/src/grag/components/utils.py
+++ b/src/grag/components/utils.py
@@ -1,9 +1,13 @@
"""Utils functions.
This module provides:
+
- stuff_docs: concats langchain documents into string
+
- load_prompt: loads json prompt to langchain prompt
+
- find_config_path: finds the path of the 'config.ini' file by traversing up the directory tree from the current path.
+
- get_config: retrieves and parses the configuration settings from the 'config.ini' file.
"""
diff --git a/src/grag/components/vectordb/base.py b/src/grag/components/vectordb/base.py
index 1258b6f..420a1b7 100644
--- a/src/grag/components/vectordb/base.py
+++ b/src/grag/components/vectordb/base.py
@@ -1,6 +1,7 @@
"""Abstract base class for vector database clients.
This module provides:
+
- VectorDB
"""
diff --git a/src/grag/components/vectordb/chroma_client.py b/src/grag/components/vectordb/chroma_client.py
index 09969ca..247bac9 100644
--- a/src/grag/components/vectordb/chroma_client.py
+++ b/src/grag/components/vectordb/chroma_client.py
@@ -1,6 +1,7 @@
"""Class for Chroma vector database.
This module provides:
+
- ChromaClient
"""
diff --git a/src/grag/components/vectordb/deeplake_client.py b/src/grag/components/vectordb/deeplake_client.py
index da49aa9..e0d2df2 100644
--- a/src/grag/components/vectordb/deeplake_client.py
+++ b/src/grag/components/vectordb/deeplake_client.py
@@ -1,6 +1,7 @@
"""Class for DeepLake vector database.
This module provides:
+
- DeepLakeClient
"""
diff --git a/src/grag/rag/basic_rag.py b/src/grag/rag/basic_rag.py
index 1b45d9a..f55b5e2 100644
--- a/src/grag/rag/basic_rag.py
+++ b/src/grag/rag/basic_rag.py
@@ -1,6 +1,7 @@
"""Class for Basic RAG.
This module provides:
+
- BasicRAG
"""