Skip to content

Commit

Permalink
feat: integrate nano-graphrag (#433)
Browse files Browse the repository at this point in the history
* add nano graph-rag

* ignore entities for relevant context reference

* refactor and add local model as default nano-graphrag

* feat: add kotaemon llm & embedding integration with nanographrag

* fix: add env var for nano GraphRAG

---------

Co-authored-by: Tadashi <tadashi@cinnamon.is>
  • Loading branch information
cin-klein and taprosoft authored Oct 30, 2024
1 parent 19b386b commit 66e5656
Show file tree
Hide file tree
Showing 7 changed files with 465 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ COHERE_API_KEY=<COHERE_API_KEY>
# settings for local models
LOCAL_MODEL=llama3.1:8b
LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
LOCAL_EMBEDDING_MODEL_DIM = 768
LOCAL_EMBEDDING_MODEL_MAX_TOKENS = 8192

# settings for GraphRAG
GRAPHRAG_API_KEY=<YOUR_OPENAI_KEY>
Expand Down
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,22 @@ documents and developers who want to build their own RAG pipeline.
### Setup GraphRAG

> [!NOTE]
> Currently GraphRAG feature only works with OpenAI or Ollama API.
> Official MS GraphRAG indexing only works with OpenAI or Ollama API.
> We recommend most users to use NanoGraphRAG implementation for straightforward integration with Kotaemon.

<details>

<summary>Setup Nano GRAPHRAG</summary>

- Install nano-GraphRAG: `pip install nano-graphrag`
- Launch Kotaemon with `USE_NANO_GRAPHRAG=true` environment variable.
- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from NanoGraphRAG.

</details>

<details>

<summary>Setup MS GRAPHRAG</summary>

- **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command:

Expand All @@ -181,6 +196,8 @@ documents and developers who want to build their own RAG pipeline.
- **Setting Up API KEY**: To use the GraphRAG retriever feature, ensure you set the `GRAPHRAG_API_KEY` environment variable. You can do this directly in your environment or by adding it to a `.env` file.
- **Using Local Models and Custom Settings**: If you want to use GraphRAG with local models (like `Ollama`) or customize the default LLM and other configurations, set the `USE_CUSTOMIZED_GRAPHRAG_SETTING` environment variable to true. Then, adjust your settings in the `settings.yaml.example` file.

</details>

### Setup Local Models (for local/private RAG)

See [Local model setup](docs/local_model.md).
Expand Down
38 changes: 30 additions & 8 deletions flowsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,32 +284,54 @@
},
}


USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
GRAPHRAG_INDEX_TYPE = (
"ktem.index.file.graph.GraphRAGIndex"
if not USE_NANO_GRAPHRAG
else "ktem.index.file.graph.NanoGraphRAGIndex"
)
KH_INDEX_TYPES = [
"ktem.index.file.FileIndex",
"ktem.index.file.graph.GraphRAGIndex",
GRAPHRAG_INDEX_TYPE,
]
KH_INDICES = [

GRAPHRAG_INDEX = (
{
"name": "File",
"name": "GraphRAG",
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
"index_type": "ktem.index.file.FileIndex",
},
"index_type": "ktem.index.file.graph.GraphRAGIndex",
}
if not USE_NANO_GRAPHRAG
else {
"name": "NanoGraphRAG",
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
"index_type": "ktem.index.file.graph.NanoGraphRAGIndex",
}
)

KH_INDICES = [
{
"name": "GraphRAG",
"name": "File",
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
),
"private": False,
},
"index_type": "ktem.index.file.graph.GraphRAGIndex",
"index_type": "ktem.index.file.FileIndex",
},
GRAPHRAG_INDEX,
]
3 changes: 2 additions & 1 deletion libs/ktem/ktem/index/file/graph/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .graph_index import GraphRAGIndex
from .nano_graph_index import NanoGraphRAGIndex

__all__ = ["GraphRAGIndex"]
__all__ = ["GraphRAGIndex", "NanoGraphRAGIndex"]
26 changes: 26 additions & 0 deletions libs/ktem/ktem/index/file/graph/nano_graph_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import Any

from ..base import BaseFileIndexRetriever
from .graph_index import GraphRAGIndex
from .nano_pipelines import NanoGraphRAGIndexingPipeline, NanoGraphRAGRetrieverPipeline


class NanoGraphRAGIndex(GraphRAGIndex):
def _setup_indexing_cls(self):
self._indexing_pipeline_cls = NanoGraphRAGIndexingPipeline

def _setup_retriever_cls(self):
self._retriever_pipeline_cls = [NanoGraphRAGRetrieverPipeline]

def get_retriever_pipelines(
self, settings: dict, user_id: int, selected: Any = None
) -> list["BaseFileIndexRetriever"]:
_, file_ids, _ = selected
retrievers = [
NanoGraphRAGRetrieverPipeline(
file_ids=file_ids,
Index=self._resources["Index"],
)
]

return retrievers
Loading

0 comments on commit 66e5656

Please sign in to comment.