microsoft · natoverse · Jan 15, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 14, 2026
@@ -34,7 +34,7 @@ This is a summary of changes:
 - Collapsed the `vector_store` dict into a single root-level object. This is because we no longer support multi-search, and this dict required a lot of downstream complexity for that single use case.
 - Removed the `outputs` block that was also only used for multi-search.
 - Most workflows had an undocumented `strategy` config dict that allowed fine tuning of internal settings. These fine tunings are never used and had associated complexity, so we removed it.
-- Vector store configuration now allows custom schema per embedded field. This overrides the need for the `container_name` prefix, which caused confusion anyway. Now, the default container name will simply be the embedded field name - if you need something custom, add the `embeddings_schema` block and populate as needed.
+- Vector store configuration now allows custom schema per embedded field. This overrides the need for the `container_name` prefix, which caused confusion anyway. Now, the default container name will simply be the embedded field name - if you need something custom, add the `index_schema` block and populate as needed.
 - We previously supported the ability to embed any text field in the data model. However, we only ever use text_unit_text, entity_description, and community_full_content, so all others have been removed.
 - Removed the `umap` and `embed_graph` blocks which were only used to add x/y fields to the entities. This fixed a long-standing dependency issue with graspologic. If you need x/y positions, see the [visualization guide](https://microsoft.github.io/graphrag/visualization_guide/) for using gephi.
 - Removed file filtering from input document loading. This was essentially unused.

@@ -172,9 +172,8 @@ Where to put all vectors for the system. Configured for lancedb by default. This
 - `url` **str** (only for AI Search) - AI Search endpoint
 - `api_key` **str** (optional - only for AI Search) - The AI Search api key to use.
 - `audience` **str** (only for AI Search) - Audience for managed identity token if managed identity authentication is used.
-- `index_prefix` **str** - (optional) A prefix for the indexes you will create for embeddings. This stores all indexes (tables) for a given dataset ingest.
 - `database_name` **str** - (cosmosdb only) Name of the database.
-- `embeddings_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
+- `index_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
   - `<supported_embedding>`: 
     - `index_name` **str**: (optional) - Name for the specific embedding index table.
     - `id_field` **str**: (optional) - Field name to be used as id. Default=`id`
@@ -193,8 +192,7 @@ For example:
 vector_store:
   type: lancedb
   db_uri: output/lancedb
-  index_prefix: "christmas-carol"
-  embeddings_schema:
+  index_schema:
     text_unit_text:
       index_name: "text-unit-embeddings"
       id_field: "id_custom"

@@ -22,7 +22,6 @@
     "from graphrag.config.enums import ModelType\n",
     "from graphrag.config.models.drift_search_config import DRIFTSearchConfig\n",
     "from graphrag.config.models.language_model_config import LanguageModelConfig\n",
-    "from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n",
     "from graphrag.language_model.manager import ModelManager\n",
     "from graphrag.query.indexer_adapters import (\n",
     "    read_indexer_entities,\n",
@@ -36,7 +35,7 @@
     ")\n",
     "from graphrag.query.structured_search.drift_search.search import DRIFTSearch\n",
     "from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
-    "from graphrag.vector_stores.lancedb import LanceDBVectorStore\n",
+    "from graphrag_vectors.lancedb import LanceDBVectorStore\n",
     "\n",
     "INPUT_DIR = \"./inputs/operation dulce\"\n",
     "LANCEDB_URI = f\"{INPUT_DIR}/lancedb\"\n",
@@ -61,16 +60,16 @@
     "# load description embeddings to an in-memory lancedb vectorstore\n",
     "# to connect to a remote db, specify url and port values.\n",
     "description_embedding_store = LanceDBVectorStore(\n",
-    "    vector_store_schema_config=VectorStoreSchemaConfig(index_name=\"entity_description\"),\n",
+    "    db_uri=LANCEDB_URI,\n",
+    "    index_name=\"entity_description\",\n",
     ")\n",
-    "description_embedding_store.connect(db_uri=LANCEDB_URI)\n",
+    "description_embedding_store.connect()\n",
     "\n",
     "full_content_embedding_store = LanceDBVectorStore(\n",
-    "    vector_store_schema_config=VectorStoreSchemaConfig(\n",
-    "        index_name=\"community_full_content\"\n",
-    "    )\n",
+    "    db_uri=LANCEDB_URI,\n",
+    "    index_name=\"community_full_content\",\n",
     ")\n",
-    "full_content_embedding_store.connect(db_uri=LANCEDB_URI)\n",
+    "full_content_embedding_store.connect()\n",
     "\n",
     "print(f\"Entity count: {len(entity_df)}\")\n",
     "entity_df.head()\n",

@@ -19,7 +19,6 @@
     "import os\n",
     "\n",
     "import pandas as pd\n",
-    "from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n",
     "from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey\n",
     "from graphrag.query.indexer_adapters import (\n",
     "    read_indexer_covariates,\n",
@@ -33,7 +32,7 @@
     "    LocalSearchMixedContext,\n",
     ")\n",
     "from graphrag.query.structured_search.local_search.search import LocalSearch\n",
-    "from graphrag.vector_stores.lancedb import LanceDBVectorStore"
+    "from graphrag_vectors import IndexSchema, LanceDBVectorStore"
    ]
   },
   {
@@ -101,9 +100,7 @@
     "# load description embeddings to an in-memory lancedb vectorstore\n",
     "# to connect to a remote db, specify url and port values.\n",
     "description_embedding_store = LanceDBVectorStore(\n",
-    "    vector_store_schema_config=VectorStoreSchemaConfig(\n",
-    "        index_name=\"default-entity-description\"\n",
-    "    )\n",
+    "    index_schema=IndexSchema(index_name=\"default-entity-description\")\n",
     ")\n",
     "description_embedding_store.connect(db_uri=LANCEDB_URI)\n",
     "\n",

@@ -0,0 +1,109 @@
+# GraphRAG Vectors
+
+Vector store implementations for GraphRAG.
+
+## Basic Usage
+
+### Using the utility function (recommended)
+
+```python
+from graphrag_vectors import (
+    create_vector_store,
+    VectorStoreType,
+    IndexSchema,
+)
+
+# Create a vector store using the convenience function
+store_config = VectorStoreConfig(
+    type="lancedb",
+    db_uri="lance"
+)
+
+schema_config = IndexSchema(
+    index_name="my_index",
+    vector_size=1536,
+)
+
+vector_store = create_vector_store(
+    config=store_config
+    index_schema=schema_config,
+)
+
+vector_store.connect()
+vector_store.create_index()
+```
+
+### Using the factory directly
+
+```python
+from graphrag_vectors import (
+    VectorStoreFactory,
+    vector_store_factory,
+    VectorStoreType,
+    IndexSchema,
+)
+
+# Create a vector store using the factory
+schema_config = IndexSchema(
+    index_name="my_index",
+    vector_size=1536,
+)
+
+vector_store = vector_store_factory.create(
+    VectorStoreType.LanceDB,
+    {
+        "index_schema": schema_config,
+        "db_uri": "./lancedb"
+    }
+)
+
+vector_store.connect()
+vector_store.create_index()
+```
+
+## Supported Vector Stores
+
+- **LanceDB**: Local vector database
+- **Azure AI Search**: Azure's managed search service with vector capabilities
+- **Azure Cosmos DB**: Azure's NoSQL database with vector search support
+
+## Custom Vector Store
+
+You can register custom vector store implementations:
+
+```python
+from graphrag_vectors import VectorStore, register_vector_store, create_vector_store
+
+class MyCustomVectorStore(VectorStore):
+    def __init__(self, my_param):
+        self.my_param = my_param
+
+    def connect(self):
+        # Implementation
+        pass
+
+    def create_index(self):
+        # Implementation
+        pass
+
+    # ... implement other required methods
+
+# Register your custom implementation
+register_vector_store("my_custom_store", MyCustomVectorStore)
+
+# Use your custom vector store
+config = VectorStoreConfig(
+    type="my_custom_store",
+    my_param="something"
+)
+custom_store = create_vector_store(
+    config=config,
+    index_schema=schema_config,
+)
+```
+
+## Configuration
+
+Vector stores are configured using:
+- `VectorStoreConfig`: baseline parameters for the store
+- `IndexSchema`: Schema configuration for the specific index to create/connect to (index name, field names, vector size)
@@ -0,0 +1,34 @@
+# Copyright (c) 2024 Microsoft Corporation.
+# Licensed under the MIT License
+
+"""GraphRAG vector store implementations."""
+
+from graphrag_vectors.index_schema import IndexSchema
+from graphrag_vectors.types import TextEmbedder
+from graphrag_vectors.vector_store import (
+    VectorStore,
+    VectorStoreDocument,
+    VectorStoreSearchResult,
+)
+from graphrag_vectors.vector_store_config import VectorStoreConfig
+from graphrag_vectors.vector_store_factory import (
+    VectorStoreFactory,
+    create_vector_store,
+    register_vector_store,
+    vector_store_factory,
+)
+from graphrag_vectors.vector_store_type import VectorStoreType
+
+__all__ = [
+    "IndexSchema",
+    "TextEmbedder",
+    "VectorStore",
+    "VectorStoreConfig",
+    "VectorStoreDocument",
+    "VectorStoreFactory",
+    "VectorStoreSearchResult",
+    "VectorStoreType",
+    "create_vector_store",
+    "register_vector_store",
+    "vector_store_factory",
+]
@@ -22,49 +22,59 @@
 )
 from azure.search.documents.models import VectorizedQuery
 
-from graphrag.data_model.types import TextEmbedder
-from graphrag.vector_stores.base import (
-    BaseVectorStore,
+from graphrag_vectors.vector_store import (
+    VectorStore,
     VectorStoreDocument,
     VectorStoreSearchResult,
 )
 
 
-class AzureAISearchVectorStore(BaseVectorStore):
+class AzureAISearchVectorStore(VectorStore):
     """Azure AI Search vector storage implementation."""
 
     index_client: SearchIndexClient
 
-    def connect(self, **kwargs: Any) -> Any:
+    def __init__(
+        self,
+        url: str,
+        api_key: str | None = None,
+        audience: str | None = None,
+        vector_search_profile_name: str = "vectorSearchProfile",
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+        if not url:
+            msg = "url must be provided for Azure AI Search."
+            raise ValueError(msg)
+        self.url = url
+        self.api_key = api_key
+        self.audience = audience
+        self.vector_search_profile_name = vector_search_profile_name
+
+    def connect(self) -> Any:
         """Connect to AI search vector storage."""
-        url = kwargs["url"]
-        api_key = kwargs.get("api_key")
-        audience = kwargs.get("audience")
-
-        self.vector_search_profile_name = kwargs.get(
-            "vector_search_profile_name", "vectorSearchProfile"
+        audience_arg = (
+            {"audience": self.audience} if self.audience and not self.api_key else {}
+        )
+        self.db_connection = SearchClient(
+            endpoint=self.url,
+            index_name=self.index_name,
+            credential=(
+                AzureKeyCredential(self.api_key)
+                if self.api_key
+                else DefaultAzureCredential()
+            ),
+            **audience_arg,
+        )
+        self.index_client = SearchIndexClient(
+            endpoint=self.url,
+            credential=(
+                AzureKeyCredential(self.api_key)
+                if self.api_key
+                else DefaultAzureCredential()
+            ),
+            **audience_arg,
         )
-
-        if url:
-            audience_arg = {"audience": audience} if audience and not api_key else {}
-            self.db_connection = SearchClient(
-                endpoint=url,
-                index_name=self.index_name if self.index_name else "",
-                credential=(
-                    AzureKeyCredential(api_key) if api_key else DefaultAzureCredential()
-                ),
-                **audience_arg,
-            )
-            self.index_client = SearchIndexClient(
-                endpoint=url,
-                credential=(
-                    AzureKeyCredential(api_key) if api_key else DefaultAzureCredential()
-                ),
-                **audience_arg,
-            )
-        else:
-            not_supported_error = "Azure AI Search expects `url`."
-            raise ValueError(not_supported_error)
 
     def create_index(self) -> None:
         """Load documents into an Azure AI Search index."""
@@ -93,7 +103,7 @@ def create_index(self) -> None:
         )
         # Configure the index
         index = SearchIndex(
-            name=self.index_name if self.index_name else "",
+            name=self.index_name,
             fields=[
                 SimpleField(
                     name=self.id_field,
@@ -154,17 +164,6 @@ def similarity_search_by_vector(
             for doc in response
         ]
 
-    def similarity_search_by_text(
-        self, text: str, text_embedder: TextEmbedder, k: int = 10
-    ) -> list[VectorStoreSearchResult]:
-        """Perform a text-based similarity search."""
-        query_embedding = text_embedder(text)
-        if query_embedding:
-            return self.similarity_search_by_vector(
-                query_embedding=query_embedding, k=k
-            )
-        return []
-
     def search_by_id(self, id: str) -> VectorStoreDocument:
         """Search for a document by id."""
         response = self.db_connection.get_document(id)