diff --git a/build/lib/lyzr/base/vector_store.py b/build/lib/lyzr/base/vector_store.py index 1fca590..feff1a4 100644 --- a/build/lib/lyzr/base/vector_store.py +++ b/build/lib/lyzr/base/vector_store.py @@ -1,5 +1,9 @@ from typing import Optional, Sequence +import os +import uuid +import weaviate +from weaviate.embedded import EmbeddedOptions from llama_index import Document, ServiceContext, VectorStoreIndex, StorageContext from llama_index.node_parser import SimpleNodeParser @@ -13,7 +17,7 @@ def import_vector_store_class(vector_store_class_name: str): class LyzrVectorStoreIndex: @staticmethod def from_defaults( - vector_store_type: str = "LanceDBVectorStore", + vector_store_type: str = "WeaviateVectorStore", documents: Optional[Sequence[Document]] = None, service_context: Optional[ServiceContext] = None, **kwargs @@ -21,22 +25,35 @@ def from_defaults( if documents is None and vector_store_type == "SimpleVectorStore": raise ValueError("documents must be provided for SimpleVectorStore") - vector_store_class = import_vector_store_class(vector_store_type) + VectorStoreClass = import_vector_store_class(vector_store_type) + + if vector_store_type == "WeaviateVectorStore": + weaviate_client = weaviate.Client( + embedded_options=weaviate.embedded.EmbeddedOptions(), + additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]}, + ) + kwargs["weaviate_client"] = ( + weaviate_client + if "weaviate_client" not in kwargs + else kwargs["weaviate_client"] + ) + kwargs["index_name"] = ( + f"DB_{uuid.uuid4().hex}" if "index_name" not in kwargs else kwargs["index_name"] + ) + + vector_store = VectorStoreClass(**kwargs) + else: + vector_store = VectorStoreClass(**kwargs) if documents is None: - vector_store = vector_store_class(**kwargs) index = VectorStoreIndex.from_vector_store( vector_store=vector_store, service_context=service_context ) - else: - if vector_store_type == "LanceDBVectorStore": - kwargs["uri"] = "./.lancedb" if "uri" not in kwargs else kwargs["uri"] - kwargs["table_name"] = ( - "vectors" if "table_name" not in kwargs else kwargs["table_name"] - ) - vector_store = vector_store_class(**kwargs) - storage_context = StorageContext.from_defaults(vector_store=vector_store) + return index + + storage_context = StorageContext.from_defaults(vector_store=vector_store) + if documents is not None: index = VectorStoreIndex.from_documents( documents=documents, storage_context=storage_context, diff --git a/build/lib/lyzr/chatqa/chatbot.py b/build/lib/lyzr/chatqa/chatbot.py index ba30750..2caed78 100644 --- a/build/lib/lyzr/chatqa/chatbot.py +++ b/build/lib/lyzr/chatqa/chatbot.py @@ -42,6 +42,7 @@ def pdf_chat( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: return pdf_chat_( input_dir=input_dir, @@ -57,6 +58,7 @@ def pdf_chat( vector_store_params=vector_store_params, service_context_params=service_context_params, chat_engine_params=chat_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -74,6 +76,7 @@ def docx_chat( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: return docx_chat_( input_dir=input_dir, @@ -89,6 +92,7 @@ def docx_chat( vector_store_params=vector_store_params, service_context_params=service_context_params, chat_engine_params=chat_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -106,6 +110,7 @@ def txt_chat( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: return txt_chat_( input_dir=input_dir, @@ -121,6 +126,7 @@ def txt_chat( vector_store_params=vector_store_params, service_context_params=service_context_params, chat_engine_params=chat_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -133,6 +139,7 @@ def webpage_chat( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: return webpage_chat_( url=url, @@ -143,6 +150,7 @@ def webpage_chat( vector_store_params=vector_store_params, service_context_params=service_context_params, chat_engine_params=chat_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -155,6 +163,7 @@ def website_chat( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: return website_chat_( url=url, @@ -165,6 +174,7 @@ def website_chat( vector_store_params=vector_store_params, service_context_params=service_context_params, chat_engine_params=chat_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -177,6 +187,7 @@ def youtube_chat( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: return youtube_chat_( urls=urls, @@ -187,4 +198,5 @@ def youtube_chat( vector_store_params=vector_store_params, service_context_params=service_context_params, chat_engine_params=chat_engine_params, + retriever_params=retriever_params, ) diff --git a/build/lib/lyzr/chatqa/qa_bot.py b/build/lib/lyzr/chatqa/qa_bot.py index 14a117a..2b2f145 100644 --- a/build/lib/lyzr/chatqa/qa_bot.py +++ b/build/lib/lyzr/chatqa/qa_bot.py @@ -41,6 +41,7 @@ def pdf_qa( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: return pdf_rag( input_dir=input_dir, @@ -56,6 +57,7 @@ def pdf_qa( vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -73,6 +75,7 @@ def docx_qa( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: return docx_rag( input_dir=input_dir, @@ -88,6 +91,7 @@ def docx_qa( vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -105,6 +109,7 @@ def txt_qa( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: return txt_rag( input_dir=input_dir, @@ -120,6 +125,7 @@ def txt_qa( vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -132,6 +138,7 @@ def webpage_qa( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: return webpage_rag( url=url, @@ -142,6 +149,7 @@ def webpage_qa( vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -154,6 +162,7 @@ def website_qa( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: return website_rag( url=url, @@ -164,6 +173,7 @@ def website_qa( vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params, + retriever_params=retriever_params, ) @staticmethod @@ -176,6 +186,7 @@ def youtube_qa( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: return youtube_rag( urls=urls, @@ -186,4 +197,5 @@ def youtube_qa( vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params, + retriever_params=retriever_params, ) diff --git a/build/lib/lyzr/utils/chat_utils.py b/build/lib/lyzr/utils/chat_utils.py index 86971b1..50d4e82 100644 --- a/build/lib/lyzr/utils/chat_utils.py +++ b/build/lib/lyzr/utils/chat_utils.py @@ -47,7 +47,7 @@ def pdf_chat_( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -106,6 +106,7 @@ def txt_chat_( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: documents = read_txt_as_documents( input_dir=input_dir, @@ -118,7 +119,7 @@ def txt_chat_( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -177,6 +178,7 @@ def docx_chat_( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: documents = read_docx_as_documents( input_dir=input_dir, @@ -189,7 +191,7 @@ def docx_chat_( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -243,6 +245,7 @@ def webpage_chat_( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: documents = read_webpage_as_documents( url=url, @@ -250,7 +253,7 @@ def webpage_chat_( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -304,6 +307,7 @@ def website_chat_( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: documents = read_website_as_documents( url=url, @@ -311,7 +315,7 @@ def website_chat_( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -365,6 +369,7 @@ def youtube_chat_( vector_store_params: dict = None, service_context_params: dict = None, chat_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseChatEngine: documents = read_youtube_as_documents( urls=urls, @@ -372,7 +377,7 @@ def youtube_chat_( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) diff --git a/build/lib/lyzr/utils/rag_utils.py b/build/lib/lyzr/utils/rag_utils.py index cd69156..28b59e2 100644 --- a/build/lib/lyzr/utils/rag_utils.py +++ b/build/lib/lyzr/utils/rag_utils.py @@ -45,14 +45,14 @@ def pdf_rag( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) service_context_params = ( {} if service_context_params is None else service_context_params ) - chat_engine_params = {} if chat_engine_params is None else chat_engine_params + query_engine_params = {} if query_engine_params is None else query_engine_params retriever_params = ( {"retriever_type": "QueryFusionRetriever"} @@ -61,6 +61,7 @@ def pdf_rag( ) llm = LyzrLLMFactory.from_defaults(**llm_params) + service_context = LyzrService.from_defaults( llm=llm, embed_model=embed_model, @@ -96,6 +97,7 @@ def txt_rag( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: documents = read_txt_as_documents( input_dir=input_dir, @@ -108,7 +110,7 @@ def txt_rag( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -117,7 +119,14 @@ def txt_rag( ) query_engine_params = {} if query_engine_params is None else query_engine_params + retriever_params = ( + {"retriever_type": "QueryFusionRetriever"} + if retriever_params is None + else retriever_params + ) + llm = LyzrLLMFactory.from_defaults(**llm_params) + service_context = LyzrService.from_defaults( llm=llm, embed_model=embed_model, @@ -130,7 +139,13 @@ def txt_rag( **vector_store_params, documents=documents, service_context=service_context ) - return vector_store_index.as_query_engine(**query_engine_params, similarity_top_k=5) + retriever = LyzrRetriever.from_defaults( + **retriever_params, base_index=vector_store_index + ) + + query_engine = RetrieverQueryEngine.from_args(retriever, query_engine_params) + + return query_engine def docx_rag( @@ -147,6 +162,7 @@ def docx_rag( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: documents = read_docx_as_documents( input_dir=input_dir, @@ -159,7 +175,7 @@ def docx_rag( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -168,7 +184,14 @@ def docx_rag( ) query_engine_params = {} if query_engine_params is None else query_engine_params + retriever_params = ( + {"retriever_type": "QueryFusionRetriever"} + if retriever_params is None + else retriever_params + ) + llm = LyzrLLMFactory.from_defaults(**llm_params) + service_context = LyzrService.from_defaults( llm=llm, embed_model=embed_model, @@ -181,7 +204,13 @@ def docx_rag( **vector_store_params, documents=documents, service_context=service_context ) - return vector_store_index.as_query_engine(**query_engine_params, similarity_top_k=5) + retriever = LyzrRetriever.from_defaults( + **retriever_params, base_index=vector_store_index + ) + + query_engine = RetrieverQueryEngine.from_args(retriever, query_engine_params) + + return query_engine def webpage_rag( @@ -193,6 +222,7 @@ def webpage_rag( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: documents = read_webpage_as_documents( url=url, @@ -200,7 +230,7 @@ def webpage_rag( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -209,7 +239,14 @@ def webpage_rag( ) query_engine_params = {} if query_engine_params is None else query_engine_params + retriever_params = ( + {"retriever_type": "QueryFusionRetriever"} + if retriever_params is None + else retriever_params + ) + llm = LyzrLLMFactory.from_defaults(**llm_params) + service_context = LyzrService.from_defaults( llm=llm, embed_model=embed_model, @@ -222,7 +259,13 @@ def webpage_rag( **vector_store_params, documents=documents, service_context=service_context ) - return vector_store_index.as_query_engine(**query_engine_params, similarity_top_k=5) + retriever = LyzrRetriever.from_defaults( + **retriever_params, base_index=vector_store_index + ) + + query_engine = RetrieverQueryEngine.from_args(retriever, query_engine_params) + + return query_engine def website_rag( @@ -234,6 +277,7 @@ def website_rag( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: documents = read_website_as_documents( url=url, @@ -241,7 +285,7 @@ def website_rag( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -250,7 +294,14 @@ def website_rag( ) query_engine_params = {} if query_engine_params is None else query_engine_params + retriever_params = ( + {"retriever_type": "QueryFusionRetriever"} + if retriever_params is None + else retriever_params + ) + llm = LyzrLLMFactory.from_defaults(**llm_params) + service_context = LyzrService.from_defaults( llm=llm, embed_model=embed_model, @@ -263,7 +314,13 @@ def website_rag( **vector_store_params, documents=documents, service_context=service_context ) - return vector_store_index.as_query_engine(**query_engine_params, similarity_top_k=5) + retriever = LyzrRetriever.from_defaults( + **retriever_params, base_index=vector_store_index + ) + + query_engine = RetrieverQueryEngine.from_args(retriever, query_engine_params) + + return query_engine def youtube_rag( @@ -275,6 +332,7 @@ def youtube_rag( vector_store_params: dict = None, service_context_params: dict = None, query_engine_params: dict = None, + retriever_params: dict = None, ) -> BaseQueryEngine: documents = read_youtube_as_documents( urls=urls, @@ -282,7 +340,7 @@ def youtube_rag( llm_params = {} if llm_params is None else llm_params vector_store_params = ( - {"vector_store_type": "LanceDBVectorStore"} + {"vector_store_type": "WeaviateVectorStore"} if vector_store_params is None else vector_store_params ) @@ -291,7 +349,14 @@ def youtube_rag( ) query_engine_params = {} if query_engine_params is None else query_engine_params + retriever_params = ( + {"retriever_type": "QueryFusionRetriever"} + if retriever_params is None + else retriever_params + ) + llm = LyzrLLMFactory.from_defaults(**llm_params) + service_context = LyzrService.from_defaults( llm=llm, embed_model=embed_model, @@ -304,4 +369,10 @@ def youtube_rag( **vector_store_params, documents=documents, service_context=service_context ) - return vector_store_index.as_query_engine(**query_engine_params, similarity_top_k=5) + retriever = LyzrRetriever.from_defaults( + **retriever_params, base_index=vector_store_index + ) + + query_engine = RetrieverQueryEngine.from_args(retriever, query_engine_params) + + return query_engine diff --git a/dist/lyzr-0.1.21.tar.gz b/dist/lyzr-0.1.21.tar.gz deleted file mode 100644 index ec46895..0000000 Binary files a/dist/lyzr-0.1.21.tar.gz and /dev/null differ diff --git a/dist/lyzr-0.1.21-py3-none-any.whl b/dist/lyzr-0.1.23-py3-none-any.whl similarity index 57% rename from dist/lyzr-0.1.21-py3-none-any.whl rename to dist/lyzr-0.1.23-py3-none-any.whl index fd8b93b..d3b164e 100644 Binary files a/dist/lyzr-0.1.21-py3-none-any.whl and b/dist/lyzr-0.1.23-py3-none-any.whl differ diff --git a/dist/lyzr-0.1.23.tar.gz b/dist/lyzr-0.1.23.tar.gz new file mode 100644 index 0000000..0c8ae50 Binary files /dev/null and b/dist/lyzr-0.1.23.tar.gz differ diff --git a/lyzr.egg-info/PKG-INFO b/lyzr.egg-info/PKG-INFO index dee3865..7f20d6d 100644 --- a/lyzr.egg-info/PKG-INFO +++ b/lyzr.egg-info/PKG-INFO @@ -1,17 +1,25 @@ Metadata-Version: 2.1 Name: lyzr -Version: 0.1.21 -Summary: UNKNOWN -Home-page: UNKNOWN +Version: 0.1.23 +Home-page: Author: lyzr -License: UNKNOWN -Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Requires-Python: >=3.8.1, <3.12 Description-Content-Type: text/markdown License-File: LICENSE.txt +Requires-Dist: asyncio +Requires-Dist: nest_asyncio +Requires-Dist: openai==1.3.4 +Requires-Dist: litellm==1.2.0 +Requires-Dist: llama-index==0.9.4 +Requires-Dist: langchain==0.0.339 +Requires-Dist: python-dotenv>=1.0.0 +Requires-Dist: beautifulsoup4==4.12.2 +Requires-Dist: pandas==2.0.2 +Requires-Dist: matplotlib==3.8.2 +Requires-Dist: weaviate-client==3.25.3 # lyzr @@ -72,5 +80,3 @@ Replace `[version]` with the actual version of the package you have built. ## License `lyzr` is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license. - - diff --git a/lyzr.egg-info/SOURCES.txt b/lyzr.egg-info/SOURCES.txt index 8f78ee8..0277951 100644 --- a/lyzr.egg-info/SOURCES.txt +++ b/lyzr.egg-info/SOURCES.txt @@ -14,6 +14,7 @@ lyzr/base/file_utils.py lyzr/base/llm.py lyzr/base/llms.py lyzr/base/prompt.py +lyzr/base/retrievers.py lyzr/base/service.py lyzr/base/vector_store.py lyzr/base/prompts/ai_queries_pt.txt diff --git a/lyzr.egg-info/requires.txt b/lyzr.egg-info/requires.txt index 18685cd..928b7fb 100644 --- a/lyzr.egg-info/requires.txt +++ b/lyzr.egg-info/requires.txt @@ -1,11 +1,11 @@ asyncio -beautifulsoup4==4.12.2 -lancedb==0.3.3 -langchain==0.0.339 -litellm==1.2.0 -llama-index==0.9.4 -matplotlib==3.8.2 nest_asyncio openai==1.3.4 -pandas==2.0.2 +litellm==1.2.0 +llama-index==0.9.4 +langchain==0.0.339 python-dotenv>=1.0.0 +beautifulsoup4==4.12.2 +pandas==2.0.2 +matplotlib==3.8.2 +weaviate-client==3.25.3 diff --git a/lyzr/base/vector_store.py b/lyzr/base/vector_store.py index 6ad72bd..feff1a4 100644 --- a/lyzr/base/vector_store.py +++ b/lyzr/base/vector_store.py @@ -38,7 +38,7 @@ def from_defaults( else kwargs["weaviate_client"] ) kwargs["index_name"] = ( - uuid if "index_name" not in kwargs else kwargs["index_name"] + f"DB_{uuid.uuid4().hex}" if "index_name" not in kwargs else kwargs["index_name"] ) vector_store = VectorStoreClass(**kwargs) diff --git a/setup.py b/setup.py index aae7724..d915297 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="lyzr", - version="0.1.22", + version="0.1.23", author="lyzr", description="", long_description=open("README.md").read(),