microsoft · natoverse · Nov 6, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 5, 2025
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -6,7 +6,7 @@ permissions:
   contents: write
 
 env:
-  PYTHON_VERSION: "3.11"
+  PYTHON_VERSION: "3.12"
 
 jobs:
   build:

diff --git a/.github/workflows/python-checks.yml b/.github/workflows/python-checks.yml
@@ -0,0 +1,78 @@
+name: Python Build and Type Check
+on:
+  push:
+    branches:
+      - "**/main" # match branches like feature/main
+      - "main"    # match the main branch
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    branches:
+      - "**/main"
+      - "main"
+    paths-ignore:
+      - "**/*.md"
+      - ".semversioner/**"
+
+permissions:
+  contents: read
+  pull-requests: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  # Only run the for the latest commit
+  cancel-in-progress: true
+
+jobs:
+  python-ci:
+    # skip draft PRs
+    if: github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12"]
+        os: [ubuntu-latest, windows-latest]
+      fail-fast: false # Continue running all jobs even if one fails
+    env:
+      DEBUG: 1
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dorny/paths-filter@v3
+        id: changes
+        with:
+          filters: |
+            python:
+              - 'graphrag/**/*'
+              - 'uv.lock'
+              - 'pyproject.toml'
+              - '**/*.py'
+              - '**/*.toml'
+              - '**/*.ipynb'
+              - '.github/workflows/python*.yml'
+              - 'tests/**/*'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          uv sync --all-packages
+
+      - name: Check
+        run: |
+          uv run poe check
+
+      - name: Build
+        run: |
+          uv build --all-packages
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # continue running all jobs even if one fails
     env:
@@ -68,7 +68,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
 
       - name: Build
         run: |

diff --git a/.github/workflows/python-notebook-tests.yml b/.github/workflows/python-notebook-tests.yml
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # Continue running all jobs even if one fails
     env:
@@ -68,7 +68,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
 
       - name: Notebook Test
         run: |

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -6,7 +6,7 @@ on:
     branches: [main]
 
 env:
-  PYTHON_VERSION: "3.10"
+  PYTHON_VERSION: "3.12"
 
 jobs:
   publish:

diff --git a/.github/workflows/python-smoke-tests.yml b/.github/workflows/python-smoke-tests.yml
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # Continue running all jobs even if one fails
     env:
@@ -73,7 +73,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
 
       - name: Build
         run: |

diff --git a/.github/workflows/python-ci.yml → .github/workflows/python-unit-tests.yml b/.github/workflows/python-ci.yml → .github/workflows/python-unit-tests.yml
@@ -1,4 +1,4 @@
-name: Python CI
+name: Python Unit Tests
 on:
   push:
     branches:
@@ -32,7 +32,7 @@ jobs:
     if: github.event.pull_request.draft == false
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"] # add 3.12 once gensim supports it. TODO: watch this issue - https://github.com/piskvorky/gensim/issues/3510
+        python-version: ["3.12"]
         os: [ubuntu-latest, windows-latest]
       fail-fast: false # Continue running all jobs even if one fails
     env:
@@ -68,15 +68,6 @@ jobs:
         shell: bash
         run: |
           uv sync --all-packages
-          uv pip install gensim
-
-      - name: Check
-        run: |
-          uv run poe check
-
-      - name: Build
-        run: |
-          uv build --all-packages
 
       - name: Unit Test
         run: |

diff --git a/docs/config/models.md b/docs/config/models.md
@@ -6,9 +6,9 @@ This page contains information on selecting a model to use and options to supply
 
 GraphRAG was built and tested using OpenAI models, so this is the default model set we support. This is not intended to be a limiter or statement of quality or fitness for your use case, only that it's the set we are most familiar with for prompting, tuning, and debugging.
 
-Starting with version 2.6.0, GraphRAG supports using [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 
+GraphRAG uses [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 
 
-Example using LiteLLm as the language model tool for GraphRAG:
+Example using LiteLLM as the language model manager for GraphRAG:
 
 ```yaml
 models:
@@ -37,7 +37,7 @@ See [Detailed Configuration](yaml.md) for more details on configuration. [View L
 
 ## Model Selection Considerations
 
-GraphRAG has been most thoroughly tested with the gpt-4 series of models from OpenAI, including gpt-4 gpt-4-turbo, gpt-4o, and gpt-4o-mini. Our [arXiv paper](https://arxiv.org/abs/2404.16130), for example, performed quality evaluation using gpt-4-turbo. As stated above, non-OpenAI models are now supported with GraphRAG 2.6.0 and onwards through the use of LiteLLM but the suite of gpt-4 series of models from OpenAI remain the most tested and supported suite of models for GraphRAG.
+GraphRAG has been most thoroughly tested with the gpt-4 series of models from OpenAI, including gpt-4 gpt-4-turbo, gpt-4o, and gpt-4o-mini. Our [arXiv paper](https://arxiv.org/abs/2404.16130), for example, performed quality evaluation using gpt-4-turbo. As stated above, non-OpenAI models are supported through the use of LiteLLM but the suite of gpt-4 series of models from OpenAI remain the most tested and supported suite of models for GraphRAG – in other words, these are the models we know best and can help resolve issues with.
 
 Versions of GraphRAG before 2.2.0 made extensive use of `max_tokens` and `logit_bias` to control generated response length or content. The introduction of the o-series of models added new, non-compatible parameters because these models include a reasoning component that has different consumption patterns and response generation attributes than non-reasoning models. GraphRAG 2.2.0 now supports these models, but there are important differences that need to be understood before you switch.
 
@@ -85,30 +85,30 @@ global_search:
 
 Another option would be to avoid using a language model at all for the graph extraction, instead using the `fast` [indexing method](../index/methods.md) that uses NLP for portions of the indexing phase in lieu of LLM APIs.
 
-## Using Non-OpenAI Models
+## Using Custom Models
 
-As shown above, non-OpenAI models may be used via LiteLLM starting with GraphRAG version 2.6.0 but cases may still exist in which some users wish to use models not supported by LiteLLM. There are two approaches one can use to connect to unsupported models:
+LiteLLM supports hundreds of models, but cases may still exist in which some users wish to use models not supported by LiteLLM. There are two approaches one can use to connect to unsupported models:
 
 ### Proxy APIs
 
 Many users have used platforms such as [ollama](https://ollama.com/) and [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy) to proxy the underlying model HTTP calls to a different model provider. This seems to work reasonably well, but we frequently see issues with malformed responses (especially JSON), so if you do this please understand that your model needs to reliably return the specific response formats that GraphRAG expects. If you're having trouble with a model, you may need to try prompting to coax the format, or intercepting the response within your proxy to try and handle malformed responses.
 
 ### Model Protocol
 
-As of GraphRAG 2.0.0, we support model injection through the use of a standard chat and embedding Protocol and an accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
+We support model injection through the use of a standard chat and embedding Protocol and accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
 
 - Our Protocol is [defined here](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/protocol/base.py)
 - We have a simple mock implementation in our tests that you can [reference here](https://github.com/microsoft/graphrag/blob/main/tests/mock_provider.py)
 
 Once you have a model implementation, you need to register it with our ChatModelFactory or EmbeddingModelFactory:
 
 ```python
-class MyCustomModel:
+class MyCustomChatModel:
     ...
     # implementation
 
 # elsewhere...
-ChatModelFactory.register("my-custom-chat-model", lambda **kwargs: MyCustomModel(**kwargs))
+ChatModelFactory.register("my-custom-chat-model", MyCustomChatModel)
 ```
 
 Then in your config you can reference the type name you used:

diff --git a/docs/config/yaml.md b/docs/config/yaml.md
@@ -21,7 +21,7 @@ default_chat_model:
 
 ### models
 
-This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them differentially in the workflow steps.
+This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them independently in the workflow steps.
 
 For example:
 ```yml
@@ -173,7 +173,7 @@ Where to put all vectors for the system. Configured for lancedb by default. This
 - `audience` **str** (only for AI Search) - Audience for managed identity token if managed identity authentication is used.
 - `index_prefix` **str** - (optional) A prefix for the indexes you will create for embeddings. This stores all indexes (tables) for a given dataset ingest.
 - `database_name` **str** - (cosmosdb only) Name of the database.
-- `embeddings_schema` **list[dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
+- `embeddings_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
   - `<supported_embedding>`: 
     - `index_name` **str**: (optional) - Name for the specific embedding index table.
     - `id_field` **str**: (optional) - Field name to be used as id. Default=`id`
@@ -332,7 +332,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 #### Fields
 
 - `embeddings` **bool** - Export embeddings snapshots to parquet.
-- `graphml` **bool** - Export graph snapshots to GraphML.
+- `graphml` **bool** - Export graph snapshot to GraphML.
 
 ## Query
 

diff --git a/docs/developing.md b/docs/developing.md
@@ -13,7 +13,7 @@
 
 ```sh
 # install python dependencies
-uv sync
+uv sync --all-packages
 ```
 
 ## Execute the Indexing Engine

diff --git a/docs/examples_notebooks/api_overview.ipynb b/docs/examples_notebooks/api_overview.ipynb
@@ -28,11 +28,10 @@
     "from pathlib import Path\n",
     "from pprint import pprint\n",
     "\n",
+    "import graphrag.api as api\n",
     "import pandas as pd\n",
     "from graphrag.config.load_config import load_config\n",
-    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
-    "\n",
-    "import graphrag.api as api"
+    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
    ]
   },
   {

diff --git a/docs/examples_notebooks/input_documents.ipynb b/docs/examples_notebooks/input_documents.ipynb
@@ -30,11 +30,10 @@
     "from pathlib import Path\n",
     "from pprint import pprint\n",
     "\n",
+    "import graphrag.api as api\n",
     "import pandas as pd\n",
     "from graphrag.config.load_config import load_config\n",
-    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
-    "\n",
-    "import graphrag.api as api"
+    "from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
    ]
   },
   {

diff --git a/docs/get_started.md b/docs/get_started.md
@@ -6,12 +6,7 @@
 
 [Python 3.10-3.12](https://www.python.org/downloads/)
 
-To get started with the GraphRAG system, you have a few options:
-
-👉 [Install from pypi](https://pypi.org/project/graphrag/). <br/>
-👉 [Use it from source](developing.md)<br/>
-
-The following is a simple end-to-end example for using the GraphRAG system, using the install from pypi option.
+The following is a simple end-to-end example for using GraphRAG on the command line after installing from [pypi](https://pypi.org/project/graphrag/).
 
 It shows how to use the system to index some text, and then use the indexed data to answer questions about the documents.
 
@@ -22,7 +17,6 @@ pip install graphrag
 ```
 
 # Running the Indexer
-
 We need to set up a data project and some initial configuration. First let's get a sample dataset ready:
 
 ```sh
@@ -79,16 +73,15 @@ You will also need to login with [az login](https://learn.microsoft.com/en-us/cl
 
 ## Running the Indexing pipeline
 
-Finally we'll run the pipeline!
+Now we're ready to run the pipeline!
 
 ```sh
 graphrag index --root ./christmas
 ```
 
 ![pipeline executing from the CLI](img/pipeline-running.png)
 
-This process will take some time to run. This depends on the size of your input data, what model you're using, and the text chunk size being used (these can be configured in your `settings.yaml` file).
-Once the pipeline is complete, you should see a new folder called `./christmas/output` with a series of parquet files.
+This process will usually take a few minutes to run. Once the pipeline is complete, you should see a new folder called `./christmas/output` with a series of parquet files.
 
 # Using the Query Engine
 

diff --git a/docs/index/architecture.md b/docs/index/architecture.md
@@ -6,23 +6,25 @@
 
 In order to support the GraphRAG system, the outputs of the indexing engine (in the Default Configuration Mode) are aligned to a knowledge model we call the _GraphRAG Knowledge Model_.
 This model is designed to be an abstraction over the underlying data storage technology, and to provide a common interface for the GraphRAG system to interact with.
-In normal use-cases the outputs of the GraphRAG Indexer would be loaded into a database system, and the GraphRAG's Query Engine would interact with the database using the knowledge model data-store types.
 
 ### Workflows
 
-Because of the complexity of our data indexing tasks, we needed to be able to express our data pipeline as series of multiple, interdependent workflows.
+Below is the core GraphRAG indexing pipeline. Individual workflows are described in detail in the [dataflow](./default_dataflow.md) page.
 
 ```mermaid
 ---
-title: Sample Workflow DAG
+title: Basic GraphRAG
 ---
 stateDiagram-v2
-    [*] --> Prepare
-    Prepare --> Chunk
-    Chunk --> ExtractGraph
-    Chunk --> EmbedDocuments
-    ExtractGraph --> GenerateReports
+    [*] --> LoadDocuments
+    LoadDocuments --> ChunkDocuments
+    ChunkDocuments --> ExtractGraph
+    ChunkDocuments --> ExtractClaims
+    ChunkDocuments --> EmbedChunks
+    ExtractGraph --> DetectCommunities
     ExtractGraph --> EmbedEntities
+    DetectCommunities --> GenerateReports
+    GenerateReports --> EmbedReports
 ```
 
 ### LLM Caching
@@ -34,11 +36,11 @@ This allows our indexer to be more resilient to network issues, to act idempoten
 
 ### Providers & Factories
 
-Several subsystems within GraphRAG use a factory pattern to register and retrieve provider implementations. This allows deep customization to support models, storage, and so on that you may use but isn't built directly into GraphRAG.
+Several subsystems within GraphRAG use a factory pattern to register and retrieve provider implementations. This allows deep customization to support your own implementations of models, storage, and so on that we haven't built into the core library.
 
 The following subsystems use a factory pattern that allows you to register your own implementations:
 
-- [language model](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/factory.py) - implement your own `chat` and `embed` methods to use a model provider of choice beyond the built-in OpenAI/Azure support
+- [language model](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/factory.py) - implement your own `chat` and `embed` methods to use a model provider of choice beyond the built-in LiteLLM wrapper
 - [input reader](https://github.com/microsoft/graphrag/blob/main/graphrag/index/input/factory.py) - implement your own input document reader to support file types other than text, CSV, and JSON
 - [cache](https://github.com/microsoft/graphrag/blob/main/graphrag/cache/factory.py) - create your own cache storage location in addition to the file, blob, and CosmosDB ones we provide
 - [logger](https://github.com/microsoft/graphrag/blob/main/graphrag/logger/factory.py) - create your own log writing location in addition to the built-in file and blob storage

diff --git a/docs/index/byog.md b/docs/index/byog.md
@@ -16,8 +16,6 @@ The approach described here will be to run a custom GraphRAG workflow pipeline t
 
 See the full entities [table schema](./outputs.md#entities). For graph summarization purposes, you only need id, title, description, and the list of text_unit_ids.
 
-The additional properties are used for optional graph visualization purposes.
-
 ### Relationships
 
 See the full relationships [table schema](./outputs.md#relationships). For graph summarization purposes, you only need id, source, target, description, weight, and the list of text_unit_ids.
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,7 +6,7 @@ permissions: @@
       contents: write
     env:
-      PYTHON_VERSION: "3.11"
+      PYTHON_VERSION: "3.12"
     jobs:
       build:
@@ Expand Down @@