From f6f33a9ed1a154f4bbd369184dc0f685df795a66 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Tue, 7 Nov 2023 13:33:02 -0600 Subject: [PATCH 1/4] add namespaced vector stores to storage context --- llama_index/storage/storage_context.py | 83 ++++++++++++++++++++------ 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/llama_index/storage/storage_context.py b/llama_index/storage/storage_context.py index e798a4842ea31..793944c080fcd 100644 --- a/llama_index/storage/storage_context.py +++ b/llama_index/storage/storage_context.py @@ -1,7 +1,7 @@ import os from dataclasses import dataclass from pathlib import Path -from typing import Optional, Union +from typing import Dict, Optional, Union import fsspec @@ -24,7 +24,11 @@ from llama_index.storage.index_store.types import BaseIndexStore from llama_index.utils import concat_dirs from llama_index.vector_stores.simple import DEFAULT_PERSIST_FNAME as VECTOR_STORE_FNAME -from llama_index.vector_stores.simple import SimpleVectorStore +from llama_index.vector_stores.simple import ( + DEFAULT_VECTOR_STORE, + NAMESPACE_SEP, + SimpleVectorStore, +) from llama_index.vector_stores.types import VectorStore DEFAULT_PERSIST_DIR = "./storage" @@ -45,7 +49,7 @@ class StorageContext: docstore: BaseDocumentStore index_store: BaseIndexStore - vector_store: VectorStore + vector_stores: Dict[str, VectorStore] graph_store: GraphStore @classmethod @@ -54,6 +58,7 @@ def from_defaults( docstore: Optional[BaseDocumentStore] = None, index_store: Optional[BaseIndexStore] = None, vector_store: Optional[VectorStore] = None, + vector_stores: Optional[Dict[str, VectorStore]] = None, graph_store: Optional[GraphStore] = None, persist_dir: Optional[str] = None, fs: Optional[fsspec.AbstractFileSystem] = None, @@ -70,8 +75,12 @@ def from_defaults( if persist_dir is None: docstore = docstore or SimpleDocumentStore() index_store = index_store or SimpleIndexStore() - vector_store = vector_store or SimpleVectorStore() graph_store = graph_store or SimpleGraphStore() + + if vector_store: + vector_stores = {DEFAULT_VECTOR_STORE: vector_store} + else: + vector_stores = vector_stores, SimpleVectorStore() else: docstore = docstore or SimpleDocumentStore.from_persist_dir( persist_dir, fs=fs @@ -79,14 +88,24 @@ def from_defaults( index_store = index_store or SimpleIndexStore.from_persist_dir( persist_dir, fs=fs ) - vector_store = vector_store or SimpleVectorStore.from_persist_dir( - persist_dir, fs=fs - ) graph_store = graph_store or SimpleGraphStore.from_persist_dir( persist_dir, fs=fs ) - return cls(docstore, index_store, vector_store, graph_store) + if vector_store: + vector_stores = {DEFAULT_VECTOR_STORE: vector_store} + else: + vector_stores = ( + vector_stores + or SimpleVectorStore.from_namespaced_persist_dir(persist_dir, fs=fs) + ) + + return cls( + docstore=docstore, + index_store=index_store, + vector_stores=vector_stores, + graph_store=graph_store, + ) def persist( self, @@ -106,39 +125,56 @@ def persist( persist_dir = str(persist_dir) # NOTE: doesn't support Windows here docstore_path = concat_dirs(persist_dir, docstore_fname) index_store_path = concat_dirs(persist_dir, index_store_fname) - vector_store_path = concat_dirs(persist_dir, vector_store_fname) graph_store_path = concat_dirs(persist_dir, graph_store_fname) else: persist_dir = Path(persist_dir) docstore_path = str(persist_dir / docstore_fname) index_store_path = str(persist_dir / index_store_fname) - vector_store_path = str(persist_dir / vector_store_fname) graph_store_path = str(persist_dir / graph_store_fname) self.docstore.persist(persist_path=docstore_path, fs=fs) self.index_store.persist(persist_path=index_store_path, fs=fs) - self.vector_store.persist(persist_path=vector_store_path, fs=fs) self.graph_store.persist(persist_path=graph_store_path, fs=fs) + # save each vector store under it's namespace + for vector_store_name, vector_store in self.vector_stores.items(): + if fs is not None: + vector_store_path = concat_dirs( + persist_dir, + f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}", + ) + else: + vector_store_path = str( + persist_dir + / f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}" + ) + + vector_store.persist(persist_path=vector_store_path, fs=fs) + def to_dict(self) -> dict: all_simple = ( - isinstance(self.vector_store, SimpleVectorStore) - and isinstance(self.docstore, SimpleDocumentStore) + isinstance(self.docstore, SimpleDocumentStore) and isinstance(self.index_store, SimpleIndexStore) and isinstance(self.graph_store, SimpleGraphStore) + and all( + isinstance(vs, SimpleVectorStore) for vs in self.vector_stores.values() + ) ) if not all_simple: raise ValueError( "to_dict only available when using simple doc/index/vector stores" ) - assert isinstance(self.vector_store, SimpleVectorStore) assert isinstance(self.docstore, SimpleDocumentStore) assert isinstance(self.index_store, SimpleIndexStore) assert isinstance(self.graph_store, SimpleGraphStore) return { - VECTOR_STORE_KEY: self.vector_store.to_dict(), + VECTOR_STORE_KEY: { + key: vector_store.to_dict() + for key, vector_store in self.vector_stores.items() + if isinstance(vector_store, SimpleVectorStore) + }, DOC_STORE_KEY: self.docstore.to_dict(), INDEX_STORE_KEY: self.index_store.to_dict(), GRAPH_STORE_KEY: self.graph_store.to_dict(), @@ -148,12 +184,25 @@ def to_dict(self) -> dict: def from_dict(cls, save_dict: dict) -> "StorageContext": """Create a StorageContext from dict.""" docstore = SimpleDocumentStore.from_dict(save_dict[DOC_STORE_KEY]) - vector_store = SimpleVectorStore.from_dict(save_dict[VECTOR_STORE_KEY]) index_store = SimpleIndexStore.from_dict(save_dict[INDEX_STORE_KEY]) graph_store = SimpleGraphStore.from_dict(save_dict[GRAPH_STORE_KEY]) + + vector_stores = {} + for key, vector_store_dict in save_dict[VECTOR_STORE_KEY].items(): + vector_stores[key] = SimpleVectorStore.from_dict(vector_store_dict) + return cls( docstore=docstore, index_store=index_store, - vector_store=vector_store, + vector_stores=vector_stores, graph_store=graph_store, ) + + @property + def vector_store(self) -> VectorStore: + """Backwrds compatibility for vector_store property.""" + return self.vector_stores[DEFAULT_VECTOR_STORE] + + def add_vector_store(self, vector_store: VectorStore, namespace: str) -> None: + """Add a vector store to the storage context.""" + self.vector_stores[namespace] = vector_store From aa41600fdea0ce3dd289eb7851730130c2dee492 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Tue, 7 Nov 2023 13:33:02 -0600 Subject: [PATCH 2/4] add namespaced vector stores to storage context --- llama_index/vector_stores/simple.py | 37 +++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/llama_index/vector_stores/simple.py b/llama_index/vector_stores/simple.py index 7492ace7cd3b6..dd38feb5e1a2f 100644 --- a/llama_index/vector_stores/simple.py +++ b/llama_index/vector_stores/simple.py @@ -37,6 +37,9 @@ MMR_MODE = VectorStoreQueryMode.MMR +NAMESPACE_SEP = "__" +DEFAULT_VECTOR_STORE = "default" + def _build_metadata_filter_fn( metadata_lookup_fn: Callable[[str], Mapping[str, Any]], @@ -107,15 +110,45 @@ def __init__( def from_persist_dir( cls, persist_dir: str = DEFAULT_PERSIST_DIR, + namespace: Optional[str] = None, fs: Optional[fsspec.AbstractFileSystem] = None, ) -> "SimpleVectorStore": """Load from persist dir.""" + if namespace: + persist_fname = f"{namespace}{NAMESPACE_SEP}{DEFAULT_PERSIST_FNAME}" + else: + persist_fname = DEFAULT_PERSIST_FNAME + if fs is not None: - persist_path = concat_dirs(persist_dir, DEFAULT_PERSIST_FNAME) + persist_path = concat_dirs(persist_dir, persist_fname) else: - persist_path = os.path.join(persist_dir, DEFAULT_PERSIST_FNAME) + persist_path = os.path.join(persist_dir, persist_fname) return cls.from_persist_path(persist_path, fs=fs) + @classmethod + def from_namespaced_persist_dir( + cls, + persist_dir: str = DEFAULT_PERSIST_DIR, + fs: Optional[fsspec.AbstractFileSystem] = None, + ) -> Dict[str, "SimpleVectorStore"]: + """Load from namespaced persist dir.""" + vector_stores = {} + for fname in os.listdir(persist_dir): + if fname.endswith(DEFAULT_PERSIST_FNAME): + namespace = fname.split(NAMESPACE_SEP)[0] + + # handle backwards compatibility with stores that were persisted + if namespace == DEFAULT_PERSIST_FNAME: + vector_stores[DEFAULT_VECTOR_STORE] = cls.from_persist_dir( + persist_dir=persist_dir, fs=fs + ) + else: + vector_stores[namespace] = cls.from_persist_dir( + persist_dir=persist_dir, namespace=namespace, fs=fs + ) + + return vector_stores + @property def client(self) -> None: """Get client.""" From 2dac0f7612c694063208f7084d814f68c1cbea5f Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Tue, 7 Nov 2023 13:59:47 -0600 Subject: [PATCH 3/4] fix tests --- llama_index/storage/storage_context.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/llama_index/storage/storage_context.py b/llama_index/storage/storage_context.py index 793944c080fcd..43dabcd6353ec 100644 --- a/llama_index/storage/storage_context.py +++ b/llama_index/storage/storage_context.py @@ -80,7 +80,9 @@ def from_defaults( if vector_store: vector_stores = {DEFAULT_VECTOR_STORE: vector_store} else: - vector_stores = vector_stores, SimpleVectorStore() + vector_stores = vector_stores or { + DEFAULT_VECTOR_STORE: SimpleVectorStore() + } else: docstore = docstore or SimpleDocumentStore.from_persist_dir( persist_dir, fs=fs @@ -94,10 +96,11 @@ def from_defaults( if vector_store: vector_stores = {DEFAULT_VECTOR_STORE: vector_store} + elif vector_stores: + vector_stores = vector_stores else: - vector_stores = ( - vector_stores - or SimpleVectorStore.from_namespaced_persist_dir(persist_dir, fs=fs) + vector_stores = SimpleVectorStore.from_namespaced_persist_dir( + persist_dir, fs=fs ) return cls( From 728c56059a33c42743d84745f88b4512a152fa94 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Tue, 7 Nov 2023 14:08:51 -0600 Subject: [PATCH 4/4] fix linting --- llama_index/storage/storage_context.py | 6 +++--- llama_index/vector_stores/simple.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_index/storage/storage_context.py b/llama_index/storage/storage_context.py index 43dabcd6353ec..a7cfe4ef7e88d 100644 --- a/llama_index/storage/storage_context.py +++ b/llama_index/storage/storage_context.py @@ -143,12 +143,12 @@ def persist( for vector_store_name, vector_store in self.vector_stores.items(): if fs is not None: vector_store_path = concat_dirs( - persist_dir, + str(persist_dir), f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}", ) else: vector_store_path = str( - persist_dir + Path(persist_dir) / f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}" ) @@ -190,7 +190,7 @@ def from_dict(cls, save_dict: dict) -> "StorageContext": index_store = SimpleIndexStore.from_dict(save_dict[INDEX_STORE_KEY]) graph_store = SimpleGraphStore.from_dict(save_dict[GRAPH_STORE_KEY]) - vector_stores = {} + vector_stores: Dict[str, VectorStore] = {} for key, vector_store_dict in save_dict[VECTOR_STORE_KEY].items(): vector_stores[key] = SimpleVectorStore.from_dict(vector_store_dict) diff --git a/llama_index/vector_stores/simple.py b/llama_index/vector_stores/simple.py index dd38feb5e1a2f..caec852381494 100644 --- a/llama_index/vector_stores/simple.py +++ b/llama_index/vector_stores/simple.py @@ -130,9 +130,9 @@ def from_namespaced_persist_dir( cls, persist_dir: str = DEFAULT_PERSIST_DIR, fs: Optional[fsspec.AbstractFileSystem] = None, - ) -> Dict[str, "SimpleVectorStore"]: + ) -> Dict[str, VectorStore]: """Load from namespaced persist dir.""" - vector_stores = {} + vector_stores: Dict[str, VectorStore] = {} for fname in os.listdir(persist_dir): if fname.endswith(DEFAULT_PERSIST_FNAME): namespace = fname.split(NAMESPACE_SEP)[0]